;;; multiplying polynomials
;;; managing exponents with a heap, like sorting X+Y
;;;  This works.

;;; How to speed up further??
;;; would have to make percolate-down faster (takes 48%), or avoid using it.
;;; percolate-up takes 5%
;;; coefficient multiplication takes 3%. [generic calls can do rational, complex, float etc]

(eval-when (compile load) (declaim   (optimize (speed 3)(safety 0))))

;; this is the 5 part structure to make hash-table entries.

(defstruct h-ent (expon 0 :type fixnum) xexps yexps xcoefs ycoefs)

(defmacro makehbox(e x y c d)
  `(make-h-ent :expon ,e :xexps ,x :yexps ,y :xcoefs ,c :ycoefs ,d))

(defmacro re-use-hbox(a e x y c d)
  `(let()(setf (h-ent-expon ,a) ,e 
	       (h-ent-xexps ,a) ,x 
	       (h-ent-yexps ,a) ,y
	       (h-ent-xcoefs ,a) ,c
	       (h-ent-ycoefs ,a) ,d)
       ,a))

(defvar *debug* nil)
(defun deb (&optional (val nil))(setf *debug* val))  ;; (deb) turns off. (deb t) turns on debugging printouts

(let ((*fill-pointer* 0)) ;; make lexical context (not global) for *fill-pointer*, makes for faster access.
  (declare (fixnum *fill-pointer*))
  
  ;;; HEAP stuff
  
  (defmacro hlessfun (r s)
    `(let ((cr (h-ent-expon ,r))(cs (h-ent-expon ,s)))
       (declare (integer cr cs))
       (cond ((< cr cs) t)
	     ((= cr cs) (< (the integer (car (h-ent-xexps ,r))) 
			   (the integer (car (h-ent-xexps ,s)))
			   )))))

  (defmacro lesser-child (a parent)
    "Return the index of the lesser child.  If there's one child,
 return its index.  If there are no children, return 
 (*FILL-POINTER* (HEAP-A HEAP))."
					; (declare (type (simple-array t (*)) a)  (fixnum parent) )
    `(let* ((left (+ ,parent ,parent))
	    (right (1+ left))
	    (fp *fill-pointer*))
       (declare (fixnum left fp right))
       (cond ((>= (the fixnum left)(the fixnum fp)) fp)
	     ((= (the fixnum right)(the fixnum  fp)) left)
	     ((hlessfun (aref ,a left) (aref ,a right)) left)
	     (t right))))

  (defun percolate-down (a hole x)
    "Move the HOLE down until it's in a location suitable for X.
Return the new index of the hole."
    (declare (fixnum hole)(type (simple-array t (*)) a))
	
    (do ((child (lesser-child a hole) (lesser-child a hole)))
	((or  (hlessfun x (aref a child)) (>= child *fill-pointer*))
	 hole)
      (declare (fixnum child *fill-pointer*))
      (setf (aref a hole) (aref a child)
	    hole child)))

  (defun percolate-up (a hole x)
    "Private.  Moves the HOLE until it's in a location suitable for holding
X.  Does not actually bind X to the HOLE.  Returns the new
index of the HOLE.  The hole itself percolates down; it's the X
that percolates up."
    (declare (fixnum hole)  (type (simple-array t (*)) a)	)
    (setf (aref a 0) x)
    (do ((i hole parent)
	 (parent (ash hole -1) (ash parent -1) ))
	((not (hlessfun x (aref a parent))) i)
      (declare (fixnum i parent))
      (setf (aref a i) (aref a parent))))

  (defun create-heap-ordered
    (initial-contents)
    ;; used only if initial-contents is already sorted list.
    ;; important: initial-contents must have the first element duplicated.
    ;; e.g.  (3 5 8)  should be (3 3 5 8)
  
    "Initialize the indicated heap.  If INITIAL-CONTENTS is a non-empty
list, the heap's contents are initialized to the values in that
list; they are assumed already ordered according to hlessfun.  INITIAL-CONTENTS must
be a list or NIL."
    (let* ((n (length initial-contents))
	   (heap
	    (make-array  n
			 :initial-contents initial-contents
			 :element-type t)))
      (setf *fill-pointer* n)		;global value for this heap
      heap))

  (defmacro heap-insert (a xin)
    "Insert a new element into the heap.  Returns the heap." 
    ;; Append a hole for the new element.
    ;; assume enough room...
  
    `(let((x ,xin))
       (incf *fill-pointer*)
       ;; Move the hole from the end towards the front of the
       ;; queue until it is in the right position for the new
       ;; element.
       (setf (aref ,a (percolate-up ,a (1- *fill-pointer*) x)) x)))

  (defmacro heap-remove(a) 
    ;; assumes non-empty!! if empty next answer is bogus.
    ;; answer after that is an error (fill pointer can't pop)
 
    ;;(format t "-")
    `(let* ((x (aref ,a 1))
	    (last-object		
	     (progn (decf *fill-pointer*)(aref ,a *fill-pointer*) ) ))
       (setf (aref ,a (percolate-down ,a 1 last-object)) last-object)
       x))

;;; --- end of heap stuff ---

  ;; multiply 2 polynomials each  ( list_of_expons .  list_of_coefs) 
  ;; the coefficients may be integers, double-floats, single-floats, bignums, complex, rationals
  ;; or a mixture of them.
  
  (defun mm (xin yin);; assume #x <=#y, or it will be slower
    (let*  ((x (car xin))
	    (y (car yin))
	    (xc (cdr xin))
	    (yc (cdr yin))
	    (x1 (car x))		; lead exponent
	    ;; the following initialization puts #x items into the heap. 
	    ;; to follow M&P, we would have to put just 1 item in the heap
	    ;; and let it grow up to #x if necessary.
	    ;; I'm tired of fiddling with the code for now.
	    (m (maplist #'(lambda (r )(makehbox (+ (car r) x1) r x yc xc))
			(cons (car y) y);; add one element to heap initialization this way..
			))
	    (h (create-heap-ordered m))
	    (anse (list -1))		;will contain expons in answer 
					;sentinel value -1 is less than any valid exponent, removed later.
	    (ansc nil)			;will contain coefs in answer.
	    (r nil)
	    (nexti nil)
	    (nextj nil))
 
      (while (> *fill-pointer* 1)
	(setf r (heap-remove h))	;remove the smallest entry on heap.

	;; if expon different from previous one, we push result into answer
	;; otherwise we combine coefficients.
	(cond ((/= (h-ent-expon r)(car anse))
	       (push (h-ent-expon r) anse)
	       #+ignore;; fixnum-only version next line. (double-floats could be done similarly)
	       (push (the fixnum (* (the fixnum (car (h-ent-xcoefs r)))(the fixnum (car(h-ent-ycoefs r))))) ansc)
	       (push (* (car (h-ent-xcoefs r))(car(h-ent-ycoefs r))) ansc))
	      (t
	       ;; another, equal exponent term to combine with this output
	       #+ignore;;fixnum version next line
	       (incf (the fixnum (car ansc))(the fixnum (* (the fixnum (car (h-ent-xcoefs r)))(the fixnum (car(h-ent-ycoefs r))))))
	       (incf (car ansc)(* (car (h-ent-xcoefs r))(car(h-ent-ycoefs r)))) ))
      	
	(cond ((and (setf nexti (h-ent-xexps r))
		    (cdr (setf nextj (h-ent-yexps r))))
	       (heap-insert h 
			    ;; we recycle an h-ent structure
			    (re-use-hbox r 
					 (+ (car nexti)(cadr nextj))
					 nexti (cdr  nextj)
					 (h-ent-xcoefs r)
					 (cdr (h-ent-ycoefs r))
					 ))))
	  
	); end of while
      ;; at this point anse is a list of exponents in reverse order, trailed by a (-1)
      ;; also,  ansc is a list of coefficients in reverse order
      (cons  (cdr(nreverse anse))(nreverse ansc)))) ; end mm

  )					;end lex context 

;; if we need answers in the format of ( array_of_expons . array_of_coeffs)  consider this

(defun l2a(x)(let ((c (length(car x))))
		   (cons (make-array c :element-type 'fixnum :initial-contents (car x))
			 (make-array c  :initial-contents (cdr x)))))

;; a pre-conversion of the input to mm, from arrays to lists could be done this way:

(defun a2l(x)(cons (coerce (car x) 'list)(coerce (cdr x)'list)))
;; and then a program consistent with the others I wrote, with respect to input and output:
;; (but which uses extra intermediate storage -- enough to store the answer as a list..)

(defun mul-heap (x y)(l2a (mm (a2l x)(a2l y))))  




#|  examples

;; a pair of lists

(setf x (cons '(4 5 6 10 15) '(1 1 1 1 1)))  ;polynomial x = 1*x^4+1*x^5+ ..
(setf y (cons '(3 4 9 12 14 19) '(1 1 1 1 1 1)))

;;; this differs from representation used by other programs here which would use
(setf xv (cons #(4 5 6 10 15) #(1 1 1 1 1))) ; a pair of vectors
;;; it seems that writing (and probably running) the heap program is simpler
;;; with terms in lists, so we can hand around (cdr list)  or "rest" easily.


;; another easy test could be to square examples like this
(setf x1 '((0 1 2)  1 1 1))

(setf x200 (cons (loop for i from 0 to 199 collect i) 
		 (loop repeat 200 collect 1)))

(setf xd200 (cons (coerce (car x200) '(simple-array 'fixnum)
		  (coerce (cdr x200) 'vector)))

;; space out exponents. should make no difference as long
;; as exponents remain under fixnum limit.

(setf xr200 (cons (sort (loop for i from 1 to 200 collect (random (* 500 i) )) #'<)
		  (loop repeat 200 collect 1)))

(setf xdr200 (cons (coerce (car xr200) '(simple-array 'fixnum))
		   (coerce (cdr xr200) 'vector)))  ;; usual rep for mul-dense

;; mul-dense is 4.3 times faster than mm for this example above
;; test:  (time (mul-dense xdr200 xdr200))    vs (time (mm xr200 xr200))

(setf xr2000 (cons (sort (loop for i from 1 to 200 collect (random (* 2000 i) )) #'<)
		   (loop repeat 200 collect 1)))
(setf xdr2000 (cons (coerce (car xr2000) '(simple-array 'fixnum))
		    (coerce (cdr xr2000) 'vector)))

;; mul-dense is  1.8 times faster than mm for this example above

(setf xr1000 (cons (sort (loop for i from 1 to 2000 collect (random (* 1000 i) )) #'<)
		   (loop repeat 2000 collect 1)))
(setf xdr1000 (cons (coerce (car xr1000) '(simple-array 'fixnum))
		    (coerce (cdr xr1000) 'vector)))

;; mul-dense is  6 times faster than mm for this example above.  The result has 1,301,415 terms

;;  if we want mm to be faster, here's an example where it works great.

(setf xr20 (cons (sort (loop for i from 1 to 20 collect (random (* 20000 i) )) #'<)
		 (loop repeat 20 collect 1)))
(setf xr21 (cons (sort (loop for i from 1 to 20 collect (random (* 20000 i) )) #'<)
		 (loop repeat 20 collect 1)))
(setf xdr20 (cons (coerce (car xr20) '(simple-array 'fixnum))
		  (coerce (cdr xr20) 'vector)))

mul-dense is 95X slower, on multiplying two 20-term polynomials in this case.
However, we have to space out the exponents so the answer is degree 677,000.

Is M&R chaining going to help?  If percolate-up and percolate-down could be made free,
we would speed mm up by a factor of 2 on squaring xr1000.
|#