;;; learning/agents/active-adp-learner.lisp ;;; Reinforcement learning agent that uses dynamic ;;; programming to solve the Markov decision process ;;; that it learns from its experience. Thus, the ;;; main job is to update the model over time. (defun make-random-qi-learner (actions) (make-active-qi-learner actions #'q-random-choice)) (defun make-maximizing-qi-learner (actions) (make-active-qi-learner actions #'q-max-choice)) (defun make-active-qi-learner (actions choice-function) (let* ((percepts nil) (last-action nil) (Q (make-hash-table :test #'equal)) (N (make-hash-table :test #'equal)) (M (make-hash-table :test #'equal)) (R (make-hash-table :test #'equal)) (mdp (make-mdp :model M :rewards R))) #'(lambda (e) (push e percepts) (let ((s (mdp-percept-state e))) (unless (gethash s N) ;;; make entries for new state (setf (gethash s N) 0 (gethash s Q) (mapcar #'(lambda (a) (cons a (mdp-percept-reward e))) actions) (gethash s M) (mapcar #'(lambda (a) (cons a (make-mdp-action-model))) actions) (gethash s R) (mdp-percept-reward e))) (incf (gethash s N)) (update-active-model mdp percepts last-action) (when (mdp-terminal-states mdp) ;;; make sure DP alg. terminates (setq Q (q-iteration mdp Q))) (when (mdp-percept-terminalp e) (setq percepts nil)) (setq last-action (funcall choice-function s Q))))))