(eval-when (compile load) (declaim (optimize (speed 3)(safety 0)))) (load "fftwdsimple.fasl") ;; polynomial multiplication by FFT: pack 2 input arrays into one array ;; for a complex fft. ;;; BROKEN (defvar *fftbufsize* 8) (defvar *fftbuf* (make-array *fftbufsize* :element-type 'double-float :allocation :static-reclaimable )) (defvar *fftbuf2*(make-array *fftbufsize* :element-type 'double-float :allocation :static-reclaimable )) (defun fftpoly2(poly1 poly2 len) ;; poly1, poly2 inputs are pairs of arrays: ;; of exponents in increasing order, ;; and of coefs corresponding to those exponents. ;; len is desired length of transform, which is at least as large as degree of poly1,2 ;; but may be rounded up to next power of 2, or some other convenient FFT length ;; This program returns a transformed sequence suitable to ;; send into untangle to get the ;; fourier transforms of poly1 and poly2l of length len (declare (fixnum len)) (let*((exps1 (car poly1)) (coefs1 (cdr poly1)) (exps2 (car poly2)) (coefs2 (cdr poly2)) (e 0) ;(c 0.0d0) (r *fftbuf*) ;; this is known to be at least 2*len in size big enough ;; we could do this in r, for this program.. ;;(ans(make-array (+ len len) :element-type 'double-float :displaced-to r)) ;;(ans *fftbuf*) (plan (fftw_plan len r r -1 64)) ) (declare (fixnum len e ); (double-float c) (type (array double-float (*)) r) (optimize (speed 3)(safety 0))) ;; array is twice the length of the (real) input to make room for real & imag parts ;; copy real/imag parts of input into adjacent array locations and pre-normalize. ;; Fill with zeros first. (loop for i fixnum from 0 to (+ len len -1) do (setf (aref r i) 0.0d0)) ;; next, copy real numbers from poly1 (loop for i fixnum from 0 to (1- (length exps1)) do (setf e (aref exps1 i)) ;; assume coefs are already double-floats ... (setf (aref r (+ e e)) (float (aref coefs1 i) 1.0d0) )) (loop for i fixnum from 0 to (1- (length exps2)) do (setf e (aref exps2 i)) (setf (aref r (+ 1 e e)) (float (aref coefs2 i) 1.0d0) )) ;; (if *debug* (format t "~%len=~s" len)) ;;(if *debug* (format t "~%r=~s" (loop for i from 0 to len collect (aref r i)))) ;; we can do the fft plan here since estimate only will be cheap and not clobber r? (fftw_execute_dft plan r r) ;; do the FFT (fftw_destroy_plan plan) r )) ;; faster yet? (defvar *debug* t) (defun untangleprod2(f NN ) ;;; given an array f, of real/imag/real/imag doubles, untangle to a pair of ;;; fourier transforms, and multiply them pointwise ;;; according to http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM ;;; with these interleaved data r,s. fft(k,z) is the kth element of ;;; the array of fft coefs. (but we use 2 spots for real/ imag ;;; r= fft(k,r) = (fft(k,z)+conj(fft(N-k,z))/2 ;;; s= fft(k,s) =-i*(fft(k,z)-conj(fft(N-k,z))/2 ;;; they leave out what to do when k=0, since fft is defined from 0 to N-1. ;;; treat fft(N,z) == fft(0,z). ;;; so ans[q]=r[q]*s[q] ;;; returned as real/imag real/image etc. (declare (type (array double-float (*)) f) (fixnum NN) (optimize (speed 3)(safety 0))) ;; (format t "~% NN=~s" NN) ;; (format t "~%f=~s" f) ;; (if *debug* (format t "~%unt=~s" (subseq f 0 (* 2 NN)))) (let*( ;;(ans2 *fftbuf*) ;; re-use buffer?? ;;(ans2 (make-array (+ NN NN) :element-type 'double-float :initial-element 0.0d0)) (ans2 *fftbuf2*) (ar 0.0d0)(ai 0.0d0)(br 0.0d0)(bi 0.0d0)(idx1 0)(idx2 0) (sr 0.0d0)(si 0.0d0)(rr 0.0d0)(ri 0.0d0)) (declare (type (array double-float (*)) ans2) (fixnum idx1 idx2) (double-float ar ai br bi sr si rr ri)) (setf (aref ans2 0) (* (aref f 0)(aref f 1))) (setf (aref ans2 1) 0.0d0) (loop for k fixnum from 1 to (1- NN) do (setf idx1 (+ k k)) (setf idx2 (* (- NN k) 2)) (setf ar (aref f idx1) ai (aref f (1+ idx1))) (setf br (aref f idx2) bi (aref f (+ idx2 1))) (setf rr (+ ar br) ri (- ai bi)) (setf sr (+ ai bi) si (- br ar)) ; (format t "r= ~s ~s, s= ~s ~s" rr ri sr si) (setf (aref ans2 idx1) (* 0.25d0 (- (* rr sr) (* ri si)))) (setf (aref ans2 (1+ idx1))(* 0.25d0 (+ (* rr si)(* ri sr))))) ;; this all accomplishes complex multiplication of relevant parts. ;; pieces unraveled for speed, instead of consing up complex numbers. ;; (if *debug* (format t "~%ans2=~s" (subseq ans2 0 (* 2 NN)))) ans2)) (defun mulfft3(r s) ;; multiply polynomials r,s using floating FFT ;;compute the size Z of the answer. Z is a power of 2. ;; compute complex array r, increased to size S ;; compute complex array r, increased to size S ;; compute two FFTs ;; multiply pointwise ;; compute inverse FFT * 1/n ;; convert back to array and return answer, coefficients rounded to integers. ;; The answer will be only approximate if the coefficients are too large or ;; too corrupted by round-off. (let* ((lr (length (car r))) (ls (length (car s))) ;; compute deg of r + degree of s +1 (lans (+ (aref (car r) (1- lr)) (aref (car s) (1- ls)) 1)) (z (ash 1 (ceiling (log lans 2))))) ; round up to power of 2 ;;; check the size of fftbuf here.. allocate more if necessary (if (< *fftbufsize* (+ z z)) ;; enough for real and imag part (setf *fftbuf* (make-array (setf *fftbufsize* (+ z z) ) ;; real and imag parts :element-type 'double-float :allocation :static-reclaimable ) *fftbuf2* ;; need another one, or could overlap ops in *fftbuf* with some fancier progs. (make-array *fftbufsize* ;; real and imag parts :element-type 'double-float :allocation :static-reclaimable ))) ;; this next line does the whole job. ;; (format t "~%z=~s" z) (fftiR3(untangleprod2 (fftpoly2 r s z) z) z))) (defun fftiR3(input len) ;; this is the inverse FFT, retaining only REAL part of answer ;; input is array of real, imag, real, imag, ... ;; returns array of REAL parts of answer, double-float. ;; computes a plan, if there is not one. (declare (type (array double-float (*)) input)) (let*(;(len (ash (length input) -1)) (r input) (anse (make-array len :element-type 'fixnum :fill-pointer 0)) (ansc (make-array len :fill-pointer 0)) (q len) (plan (fftw_plan len r r 1 64)) (k 0)) (declare (fixnum len k q) (type (array double-float (*)) ansc) (type (array fixnum (*)) anse) (optimize (speed 3)(safety 0))) (fftw_execute_dft plan r r) ;; do the FFT ; (if *debug* (print "!")) (fftw_destroy_plan plan) (loop for i fixnum from 0 to (1- (* 2 len)) by 2 do ;; copy real parts into answer (setf k (round (aref r i) q)) ;(setf k (aref r i)) ;; (format t "~% r[i]=~s k=~s " (aref r i) k) (cond ((= k 0) nil) (t (vector-push (ash i -1) anse) (vector-push k ansc)))) (cons anse ansc))) ;;maxima version of pieces of this.. #| ;; normalization is 1, 1/n (slowFFT(x):= block([n:length(x)], map(lambda([k], sum(x[j]*exp((-2*%i*%pi*(j-1)*k)/n),j,1,n)), makelist(i,i,0,n-1))), slowFFTi(x):= block([n:length(x)], map(lambda([k], 1/n*sum(x[j]*exp((2*%i*%pi*(j-1)*k)/n),j,1,n)), makelist(i,i,0,n-1))), untangle(v):=block([n:length(v)], local(a,b,r), r[i]:=v[i+1], a[0]:realpart(r[0]), b[0]:imagpart(r[0]), a[i]:=1/2*(r[i]+conjugate(r[n-i])), b[i]:= -%i/2*(r[i]-conjugate(r[n-i])), [makelist(a[i],i,0,n-1), makelist(b[i],i,0,n-1)])) |# #| (defvar xv (cons (coerce (loop for i from 0 to 1999 collect i) 'vector) (coerce (loop for i from 0 to 1999 collect 1) 'vector))) (defvar xd (cons (coerce (loop for i from 0 to 1999 collect i) 'vector) (coerce (loop for i from 0 to 1999 collect 1.0d0) 'vector))) (defvar xs (cons (coerce (loop for i from 0 to 511 collect i) 'vector) (coerce (loop for i from 0 to 511 collect 1.0d0) 'vector))) (defvar xss (cons (coerce (loop for i from 0 to 511 by 2 collect i) 'vector) ;sparse (coerce (loop for i from 0 to 511 by 2 collect 1.0d0) 'vector))) (defvar xt2 (cons (coerce (loop for i from 0 to 511 by 8 collect i) 'vector) ;sparse (coerce (loop for i from 0 to 511 by 8 collect 1.0d0) 'vector))) (defvar xt16 (cons (coerce (loop for i from 0 to 511 by 16 collect i) 'vector) ;sparse (coerce (loop for i from 0 to 511 by 16 collect 1.0d0) 'vector))) (defvar xt100 (cons (coerce (loop for i from 0 to 99 collect i) '(array fixnum (*))) (coerce (loop for i from 0 to 99 collect 1.0d0) '(array double-float (*))))) |# ;; for comparison purposes we might need a pure fft multiplication routine that ;; assumes as input two double-float dense arrays; exponents implicit. Here it is (defun mulfftd(p q) (declare(optimize (speed 3)(safety 0)) (type (array double-float (*)) p q)) (assert (arrayp p)) (assert (arrayp q)) (let* ((plim (length p));; degree is plim-1 (qlim (length q)) (lans (+ plim qlim -1)) (z (ash 1 (ceiling (log lans 2)))) ;; don't round up to power of 2. ;; fftw can handle any length, and usually saves time. ;;(z lans) (zflo (/ 1.0d0 (float z 1.0d0))) (ppqq (make-array (+ z z) :element-type 'double-float :initial-element 0.0d0)) ;; (ppqq *fftbuf*) ;; (ans (make-array lans :element-type 'double-float)) ;; could use ppqq again.. ) ; round up to power of 2 (declare (fixnum qlim plim) (double-float zflo) ) (loop for i fixnum from 0 below plim do (setf (aref ppqq (+ i i)) ; (aref p i) (* zflo (aref p i)) )) ;; normalize? (loop for i fixnum from 0 below qlim do (setf (aref ppqq (+ 1 i i))(aref q i))) (fftw_execute_dft (fftw_plan z ppqq ppqq -1 64) ppqq ppqq) (setf ppqq (untangleprod2 ppqq z)) (fftw_execute_dft (fftw_plan z ppqq ppqq 1 64) ppqq ppqq) (loop for i fixnum from 0 below lans do (setf (aref ppqq i)(aref ppqq (+ i i)))) (subseq ppqq 0 lans) ;;ppqq )) (defvar x0 (cons #(2 6 8 10 12 14 16 17 21 23 27 31 34 38 41 45 47 51 52 53 56 60 64 66 67 70 72 74 78 79) #(930 19 942 816 510 1005 397 1007 279 503 969 449 173 268 820 949 136 60 251 774 712 289 875 722 937 786 1015 526 157 471)))