(defstruct (spider-mdp (:include mdp (reward-type 'sas) (gamme 1.0d0)))
problem ; The underlying partially observable problem in the environment
)
(defmethod actions ((mdp spider-mdp) state)
(actions (spider-mdp-problem mdp) state))
(defmethod terminal? ((mdp spider-mdp) state)
(goal-test (spider-mdp-problem mdp) state))
(defmethod state-hash-key ((mdp spider-mdp) state)
"Return a canonical form for a state; can omit hidden cards
since they are fixed by the visible cards. [[Fix for multisuit completed]]"
(cons (spider-state-stacks state) (length (spider-state-completed state))))
(defmethod sas-reward ((mdp spider-mdp) state1 action state2)
(step-cost (spider-mdp-problem mdp) state1 action state2))
(defmethod results ((mdp spider-mdp) move state)
"Return enumerated distribution over outcome from move in state. Uses the deterministic
result function for the underlying problem, which returns a new state
possibly with some *unknown* cards exposed. These unknown cards
are then chosen in all possible ways from the set of all hidden cards."
(let* ((problem (spider-mdp-problem mdp))
(new (result problem move state))
(stacks (spider-state-stacks new))
(unknowns nil))
(loop for i from 0 to (1- (length stacks)) do
(when (and (aref stacks i) (card-unknown? (first (aref stacks i)))) (push i unknowns)))
(let* ((outcomes (all-spider-outcomes new unknowns))
(p (/ 1.0d0 (length outcomes))))
(mapcar #'(lambda (s) (cons s p)) outcomes))))
(defun all-spider-outcomes (state unknowns)
(if (null unknowns) (list state)
(let ((all-hidden-cards (spider-state-all-hidden-cards state)))
(mapcan #'(lambda (card)
(let ((new (copy-state state)))
(setf (first (aref (spider-state-stacks new) (first unknowns))) (unhide card))
(setf (spider-state-all-hidden-cards new) (remove card all-hidden-cards))
(all-spider-outcomes new (rest unknowns))))
all-hidden-cards))))
(defmethod num-results ((mdp spider-mdp) move state)
"Return the number of possible outcomes from move in state. Uses the deterministic
result function for the underlying problem, which returns a new state
possibly with some *unknown* cards exposed. Count the ways these could be chosen."
(let* ((problem (spider-mdp-problem mdp))
(new (result problem move state))
(stacks (spider-state-stacks new))
(num-unknowns 0)
(count 1)
(num-hidden-cards (length (spider-state-all-hidden-cards state))))
(loop for i from 0 to (1- (length stacks)) do
(when (and (aref stacks i) (card-unknown? (first (aref stacks i)))) (incf num-unknowns)))
(loop for j from (1+ (- num-hidden-cards num-unknowns)) to num-hidden-cards do
(setf count (* count j)))
count))
(defmethod random-result ((mdp spider-mdp) move state)
"Return a random outcome from move in state. Uses the deterministic
result function for the underlying problem, which returns a new state
possibly with some *unknown* cards exposed. These unknown cards
are then sampled from the set of all hidden cards."
(let* ((problem (spider-mdp-problem mdp))
(new (result problem move state))
(stacks (spider-state-stacks new))
(unknowns nil)
(all-hidden-cards (spider-state-all-hidden-cards state)))
(loop for i from 0 to (1- (length stacks)) do
(when (and (aref stacks i) (card-unknown? (first (aref stacks i)))) (push i unknowns)))
(let ((new-cards (if (= (length all-hidden-cards) (length unknowns))
(shuffle all-hidden-cards)
(sample-without-replacement (length unknowns) all-hidden-cards))))
(mapc #'(lambda (i card) (setf (first (aref stacks i)) (unhide card)))
unknowns new-cards))
new))