;; fast yes or no functions.  goal: a function which, when called,
;; returns nil or t with about equal probability in a random
;; sequence. (pseudorandom, anyway).  the cycle could be on the order
;; of 16k. It would be nice to be both fast and small.

;; semantically, this would do it.
(defun g()(= 0 (random 2)))

;; First serious attempt: fast random  t or nil.  Pick a number, like 397

(defparameter yesno (let ((h (loop for i from 1 to 397 collect (> (random 2) 0))))
		      (nconc h h))) ;; this is an infinite loop of 397 t or nils.

;;then to get a random t , do  (pop yesno)
(defmacro nextyn0()  `(pop yesno))

;; if you don't see how this works, maybe study it a bit.

;; problem:  access to / change of/ global variable may be more expensive than
;; local/lexically bound.

;; THEREFORE, try this. define (gen0)  as above, to return t or nil..

(let* ((h (loop for i from 1 to 397 collect (> (random 2) 0)))
       (yn (nconc h h)))
    (defun gen0()
         (declare (optimize (speed 3)(safety 0)))
	 (pop yn))
    (defun test0(n)   (time (dotimes (i n)(declare (fixnum i))(gen0)))))


;; problem:  397 items is not really enough, and it takes 3176 bytes.
;; the method below has 16384 items, and takes only 2048 bytes.
;; using a BIT ARRAY

(let* ( ;yes-no-bit-array-count
	 (yl  #.(expt 2 16))		;yes-no-bit-array-length
	 (ybc (1- yl)) ;counter: which element are we looking at?
	 (yb (make-array yl  :element-type '(mod 2)))) ;yes-no-bit-array
    (declare (fixnum ybc yl)
	     (type (simple-array (mod 2) (*)) yb))
    ;; set up the lexically closed array of bits
    (loop for i from 0 to (1-(length yb)) do(setf (aref yb i)(random 2)))
    
  (defun gen1()
         (declare (optimize (speed 3)(safety 0)))
	 (if (= 0 ybc)(setf ybc yl)) ;loop around when you exhaust the bits
	 (not (= 0 (aref yb (decf ybc))))))

;;;;; concern... testing/ if/  is a branch, which might be slow
;;;;; we found  (not (= 0 ..))  is slightly faster than (= 1 ...)
;;;;; how about using memory instead of branching?

(let* ((ybc 0) ;yes-no-bit-array-count
	 (yl  #.(expt 2 16)) ;yes-no-bit-array-length
	 (yb (make-array #.(expt 2 16) :element-type '(mod 2))) ;yes-no-bit-array
	 (truenil (make-array 2 :initial-contents '(nil t))))
    (declare (fixnum ybc yl)
	     (type (simple-array (mod 2) (*)) yb)
	     (type (simple-array t (2)) truenil))
    ;; set up the lexically closed array of bits
    (loop for i from 0 to (1-(length yb)) do(setf (aref yb i)(random 2)))
    
 (defun gen2()
         (declare (optimize (speed 3)(safety 0)))
	 (if (= ybc yl)(setf ybc -1)) ;loop around when you exhaust the bits
	 (aref truenil (aref yb (incf ybc))))) ;; this is faster than (if ...)

;;suggested by Daniel Herring 8/7/08

(let* ((count #.(expt 2 16))
       (mask (1- count))
       (table (make-array (list count)
                          ;; this type spec helps
                          :element-type 'boolean))
       (index 0))
  (declare (optimize (speed 3)(safety 0)))
  (dotimes (n count)
    (setf (svref table n) (= (random 2) 0)))
  (defun gen3 ()
    (declare (optimize (speed 3)(safety 0)) (fixnum index mask))
    (setf index (logand (1+ index) mask))
    (svref table index)))

(defun dum();; dummy function for testing
  (declare (optimize (speed 3)(safety 0)))
  nil)
(defun testdum(n)   (time (dotimes (i n)(declare (fixnum i))(dum))))

(defun testyn(n f) ;; test yes no functions by funcalling them n times
  (declare (optimize (speed 3)(safety 0)) (fixnum n))
  (time (dotimes (i n)(declare (fixnum i))(funcall f))))

#|  (testyn 10000000 #'g)     969 ms  ;actually using (random 2)
    (testyn 10000000 #'gen0)  281 ms
    (testyn 10000000 #'gen1)  406 ms
    (testyn 10000000 #'gen2)  297 ms
    (testyn 10000000 #'gen3)  375 ms
    (testyn 10000000 #'dum)   140 ms

    using International Allegro CL Enterprise Edition
    8.0 [Windows] Pentium D 3Ghz .

    Subtracting off the empty loop, we get this:
    g    829 ms.
    gen0 141 ms.  That is, 1.41e-8 seconds per call, or 42 instructions if they are run at 3GHz.
    gen1 266 ms.
    gen2 157 ms.
    gen3 235 ms.

    note: gen0 uses more storage (and cache) and is far less random than gen2, so gen2 may be
    preferable. Also note that by using (test0 10000000) rather than (testyn 10000000 'gen0) 
    we can eliminate the funcall overhead from the test.
    This reduces the time to 281. A similar change to testdum reduces that time to 94 ms.
    Subtracting off this loop overhead we get 191 ms. rather than 141 ms. 
        


    Using SBCL 1.0.13, same machine

    g    0.469 sec
    gen0 0.223
    gen1 0.328
    gen2 0.250
    dum  0.217

It seems that a much higher percentage of time is taken for the empty loop in SBCL, though
#'dum is only 5 instructions in SBCL or 4 in Allegro.
    |#