(defstruct (spider-mdp (:include mdp (reward-type 'sas) (gamme 1.0d0)))
  problem   ; The underlying partially observable problem in the environment
  )

(defmethod actions ((mdp spider-mdp) state)
  (actions (spider-mdp-problem mdp) state))

(defmethod terminal? ((mdp spider-mdp) state)
  (goal-test (spider-mdp-problem mdp) state))

(defmethod state-hash-key ((mdp spider-mdp) state)
  "Return a canonical form for a state; can omit hidden cards
   since they are fixed by the visible cards. [[Fix for multisuit completed]]"
  (cons (spider-state-stacks state) (length (spider-state-completed state))))

(defmethod sas-reward ((mdp spider-mdp) state1 action state2)
  (step-cost (spider-mdp-problem mdp) state1 action state2))

(defmethod results ((mdp spider-mdp) move state)
  "Return enumerated distribution over outcome from move in state. Uses the deterministic
   result function for the underlying problem, which returns a new state
   possibly with some *unknown* cards exposed. These unknown cards
   are then chosen in all possible ways from the set of all hidden cards."
  (let* ((problem (spider-mdp-problem mdp))
	 (new (result problem move state))
	 (stacks (spider-state-stacks new))
	 (unknowns nil))
    (loop for i from 0 to (1- (length stacks)) do
	  (when (and (aref stacks i) (card-unknown? (first (aref stacks i)))) (push i unknowns)))
    (let* ((outcomes (all-spider-outcomes new unknowns))
	   (p (/ 1.0d0 (length outcomes))))
      (mapcar #'(lambda (s) (cons s p)) outcomes))))

(defun all-spider-outcomes (state unknowns)
  (if (null unknowns) (list state)
    (let ((all-hidden-cards (spider-state-all-hidden-cards state)))
      (mapcan #'(lambda (card)
		  (let ((new (copy-state state)))
		    (setf (first (aref (spider-state-stacks new) (first unknowns))) (unhide card))
		    (setf (spider-state-all-hidden-cards new) (remove card all-hidden-cards))
		    (all-spider-outcomes new (rest unknowns))))
	      all-hidden-cards))))

(defmethod num-results ((mdp spider-mdp) move state)
  "Return the number of possible outcomes from move in state. Uses the deterministic
   result function for the underlying problem, which returns a new state
   possibly with some *unknown* cards exposed. Count the ways these could be chosen."
  (let* ((problem (spider-mdp-problem mdp))
	 (new (result problem move state))
	 (stacks (spider-state-stacks new))
	 (num-unknowns 0)
	 (count 1)
	 (num-hidden-cards (length (spider-state-all-hidden-cards state))))
    (loop for i from 0 to (1- (length stacks)) do
	  (when (and (aref stacks i) (card-unknown? (first (aref stacks i)))) (incf num-unknowns)))
      (loop for j from (1+ (- num-hidden-cards num-unknowns)) to num-hidden-cards do
	    (setf count (* count j)))
    count))

(defmethod random-result ((mdp spider-mdp) move state)
  "Return a random outcome from move in state. Uses the deterministic
   result function for the underlying problem, which returns a new state
   possibly with some *unknown* cards exposed. These unknown cards
   are then sampled from the set of all hidden cards."
  (let* ((problem (spider-mdp-problem mdp))
	 (new (result problem move state))
	 (stacks (spider-state-stacks new))
	 (unknowns nil)
	 (all-hidden-cards (spider-state-all-hidden-cards state)))
    (loop for i from 0 to (1- (length stacks)) do
	  (when (and (aref stacks i) (card-unknown? (first (aref stacks i)))) (push i unknowns)))
    (let ((new-cards (if (= (length all-hidden-cards) (length unknowns))
			 (shuffle all-hidden-cards)
		       (sample-without-replacement (length unknowns) all-hidden-cards))))
      (mapc #'(lambda (i card) (setf (first (aref stacks i)) (unhide card)))
	    unknowns new-cards))
    new))