;;; multiplying polynomials ;;; managing exponents with a heap, like sorting X+Y ;;; This works. ;;; How to speed up further?? ;;; would have to make percolate-down faster (takes 48%), or avoid using it. ;;; percolate-up takes 5% ;;; coefficient multiplication takes 3%. [generic calls can do rational, complex, float etc] (eval-when (compile load) (declaim (optimize (speed 3)(safety 0)))) ;; this is the 5 part structure to make hash-table entries. (defstruct h-ent (expon 0 :type fixnum) xexps yexps xcoefs ycoefs) (defmacro makehbox(e x y c d) `(make-h-ent :expon ,e :xexps ,x :yexps ,y :xcoefs ,c :ycoefs ,d)) (defmacro re-use-hbox(a e x y c d) `(let()(setf (h-ent-expon ,a) ,e (h-ent-xexps ,a) ,x (h-ent-yexps ,a) ,y (h-ent-xcoefs ,a) ,c (h-ent-ycoefs ,a) ,d) ,a)) (defvar *debug* nil) (defun deb (&optional (val nil))(setf *debug* val)) ;; (deb) turns off. (deb t) turns on debugging printouts (let ((*fill-pointer* 0)) ;; make lexical context (not global) for *fill-pointer*, makes for faster access. (declare (fixnum *fill-pointer*)) ;;; HEAP stuff (defmacro hlessfun (r s) `(let ((cr (h-ent-expon ,r))(cs (h-ent-expon ,s))) (declare (integer cr cs)) (cond ((< cr cs) t) ((= cr cs) (< (the integer (car (h-ent-xexps ,r))) (the integer (car (h-ent-xexps ,s))) ))))) (defmacro lesser-child (a parent) "Return the index of the lesser child. If there's one child, return its index. If there are no children, return (*FILL-POINTER* (HEAP-A HEAP))." ; (declare (type (simple-array t (*)) a) (fixnum parent) ) `(let* ((left (+ ,parent ,parent)) (right (1+ left)) (fp *fill-pointer*)) (declare (fixnum left fp right)) (cond ((>= (the fixnum left)(the fixnum fp)) fp) ((= (the fixnum right)(the fixnum fp)) left) ((hlessfun (aref ,a left) (aref ,a right)) left) (t right)))) (defun percolate-down (a hole x) "Move the HOLE down until it's in a location suitable for X. Return the new index of the hole." (declare (fixnum hole)(type (simple-array t (*)) a)) (do ((child (lesser-child a hole) (lesser-child a hole))) ((or (hlessfun x (aref a child)) (>= child *fill-pointer*)) hole) (declare (fixnum child *fill-pointer*)) (setf (aref a hole) (aref a child) hole child))) (defun percolate-up (a hole x) "Private. Moves the HOLE until it's in a location suitable for holding X. Does not actually bind X to the HOLE. Returns the new index of the HOLE. The hole itself percolates down; it's the X that percolates up." (declare (fixnum hole) (type (simple-array t (*)) a) ) (setf (aref a 0) x) (do ((i hole parent) (parent (ash hole -1) (ash parent -1) )) ((not (hlessfun x (aref a parent))) i) (declare (fixnum i parent)) (setf (aref a i) (aref a parent)))) (defun create-heap-ordered (initial-contents) ;; used only if initial-contents is already sorted list. ;; important: initial-contents must have the first element duplicated. ;; e.g. (3 5 8) should be (3 3 5 8) "Initialize the indicated heap. If INITIAL-CONTENTS is a non-empty list, the heap's contents are initialized to the values in that list; they are assumed already ordered according to hlessfun. INITIAL-CONTENTS must be a list or NIL." (let* ((n (length initial-contents)) (heap (make-array n :initial-contents initial-contents :element-type t))) (setf *fill-pointer* n) ;global value for this heap heap)) (defmacro heap-insert (a xin) "Insert a new element into the heap. Returns the heap." ;; Append a hole for the new element. ;; assume enough room... `(let((x ,xin)) (incf *fill-pointer*) ;; Move the hole from the end towards the front of the ;; queue until it is in the right position for the new ;; element. (setf (aref ,a (percolate-up ,a (1- *fill-pointer*) x)) x))) (defmacro heap-remove(a) ;; assumes non-empty!! if empty next answer is bogus. ;; answer after that is an error (fill pointer can't pop) ;;(format t "-") `(let* ((x (aref ,a 1)) (last-object (progn (decf *fill-pointer*)(aref ,a *fill-pointer*) ) )) (setf (aref ,a (percolate-down ,a 1 last-object)) last-object) x)) ;;; --- end of heap stuff --- ;; multiply 2 polynomials each ( list_of_expons . list_of_coefs) ;; the coefficients may be integers, double-floats, single-floats, bignums, complex, rationals ;; or a mixture of them. (defun mm (xin yin);; assume #x <=#y, or it will be slower (let* ((x (car xin)) (y (car yin)) (xc (cdr xin)) (yc (cdr yin)) (x1 (car x)) ; lead exponent ;; the following initialization puts #x items into the heap. ;; to follow M&P, we would have to put just 1 item in the heap ;; and let it grow up to #x if necessary. ;; I'm tired of fiddling with the code for now. (m (maplist #'(lambda (r )(makehbox (+ (car r) x1) r x yc xc)) (cons (car y) y);; add one element to heap initialization this way.. )) (h (create-heap-ordered m)) (anse (list -1)) ;will contain expons in answer ;sentinel value -1 is less than any valid exponent, removed later. (ansc nil) ;will contain coefs in answer. (r nil) (nexti nil) (nextj nil)) (while (> *fill-pointer* 1) (setf r (heap-remove h)) ;remove the smallest entry on heap. ;; if expon different from previous one, we push result into answer ;; otherwise we combine coefficients. (cond ((/= (h-ent-expon r)(car anse)) (push (h-ent-expon r) anse) #+ignore;; fixnum-only version next line. (double-floats could be done similarly) (push (the fixnum (* (the fixnum (car (h-ent-xcoefs r)))(the fixnum (car(h-ent-ycoefs r))))) ansc) (push (* (car (h-ent-xcoefs r))(car(h-ent-ycoefs r))) ansc)) (t ;; another, equal exponent term to combine with this output #+ignore;;fixnum version next line (incf (the fixnum (car ansc))(the fixnum (* (the fixnum (car (h-ent-xcoefs r)))(the fixnum (car(h-ent-ycoefs r)))))) (incf (car ansc)(* (car (h-ent-xcoefs r))(car(h-ent-ycoefs r)))) )) (cond ((and (setf nexti (h-ent-xexps r)) (cdr (setf nextj (h-ent-yexps r)))) (heap-insert h ;; we recycle an h-ent structure (re-use-hbox r (+ (car nexti)(cadr nextj)) nexti (cdr nextj) (h-ent-xcoefs r) (cdr (h-ent-ycoefs r)) )))) ); end of while ;; at this point anse is a list of exponents in reverse order, trailed by a (-1) ;; also, ansc is a list of coefficients in reverse order (cons (cdr(nreverse anse))(nreverse ansc)))) ; end mm ) ;end lex context ;; if we need answers in the format of ( array_of_expons . array_of_coeffs) consider this (defun l2a(x)(let ((c (length(car x)))) (cons (make-array c :element-type 'fixnum :initial-contents (car x)) (make-array c :initial-contents (cdr x))))) ;; a pre-conversion of the input to mm, from arrays to lists could be done this way: (defun a2l(x)(cons (coerce (car x) 'list)(coerce (cdr x)'list))) ;; and then a program consistent with the others I wrote, with respect to input and output: ;; (but which uses extra intermediate storage -- enough to store the answer as a list..) (defun mul-heap (x y)(l2a (mm (a2l x)(a2l y)))) #| examples ;; a pair of lists (setf x (cons '(4 5 6 10 15) '(1 1 1 1 1))) ;polynomial x = 1*x^4+1*x^5+ .. (setf y (cons '(3 4 9 12 14 19) '(1 1 1 1 1 1))) ;;; this differs from representation used by other programs here which would use (setf xv (cons #(4 5 6 10 15) #(1 1 1 1 1))) ; a pair of vectors ;;; it seems that writing (and probably running) the heap program is simpler ;;; with terms in lists, so we can hand around (cdr list) or "rest" easily. ;; another easy test could be to square examples like this (setf x1 '((0 1 2) 1 1 1)) (setf x200 (cons (loop for i from 0 to 199 collect i) (loop repeat 200 collect 1))) (setf xd200 (cons (coerce (car x200) '(simple-array 'fixnum) (coerce (cdr x200) 'vector))) ;; space out exponents. should make no difference as long ;; as exponents remain under fixnum limit. (setf xr200 (cons (sort (loop for i from 1 to 200 collect (random (* 500 i) )) #'<) (loop repeat 200 collect 1))) (setf xdr200 (cons (coerce (car xr200) '(simple-array 'fixnum)) (coerce (cdr xr200) 'vector))) ;; usual rep for mul-dense ;; mul-dense is 4.3 times faster than mm for this example above ;; test: (time (mul-dense xdr200 xdr200)) vs (time (mm xr200 xr200)) (setf xr2000 (cons (sort (loop for i from 1 to 200 collect (random (* 2000 i) )) #'<) (loop repeat 200 collect 1))) (setf xdr2000 (cons (coerce (car xr2000) '(simple-array 'fixnum)) (coerce (cdr xr2000) 'vector))) ;; mul-dense is 1.8 times faster than mm for this example above (setf xr1000 (cons (sort (loop for i from 1 to 2000 collect (random (* 1000 i) )) #'<) (loop repeat 2000 collect 1))) (setf xdr1000 (cons (coerce (car xr1000) '(simple-array 'fixnum)) (coerce (cdr xr1000) 'vector))) ;; mul-dense is 6 times faster than mm for this example above. The result has 1,301,415 terms ;; if we want mm to be faster, here's an example where it works great. (setf xr20 (cons (sort (loop for i from 1 to 20 collect (random (* 20000 i) )) #'<) (loop repeat 20 collect 1))) (setf xr21 (cons (sort (loop for i from 1 to 20 collect (random (* 20000 i) )) #'<) (loop repeat 20 collect 1))) (setf xdr20 (cons (coerce (car xr20) '(simple-array 'fixnum)) (coerce (cdr xr20) 'vector))) mul-dense is 95X slower, on multiplying two 20-term polynomials in this case. However, we have to space out the exponents so the answer is degree 677,000. Is M&R chaining going to help? If percolate-up and percolate-down could be made free, we would speed mm up by a factor of 2 on squaring xr1000. |#