(eval-when (compile load) (declaim (optimize (speed 3)(safety 0)))) (load "fftwdsimple.fasl") ;; based on fft7.lisp, but trying to pack 2 input arrays into one array ;; for a complex fft. ;;; mulffts2 WORKS. slow. in file 2fft.lisp ;;; try to make it fast, now. (defmacro mref(A i) `(aref ,A ,i)) ;; much of the futzing around here is to help re-use plans for fft. ;; much ot that has been tossed out.. we recompute plans on the fly. (defvar *fftbufsize* 8) (defvar *fftbuf* (make-array *fftbufsize* :element-type 'double-float :allocation :static-reclaimable )) (defun fftpoly2(poly1 poly2 len) ;;poly1, poly2 inputs are pairs of arrays: ;; of exponents in increasing order, ;; and of coefs corresponding to those exponents. ;; len is desired length of transform, which is at least as large as degree of poly1,2 ;; but may be rounded up to next power of 2, or some other convenient FFT length ;; This program returns a transformed sequence suitable to ;; send into untangle to get the ;; fourier transforms of poly1 and poly2l of length len (let*((exps1 (car poly1)) (coefs1 (cdr poly1)) (exps2 (car poly2)) (coefs2 (cdr poly2)) (e 0) ;(c 0.0d0) (r *fftbuf*) ;; this is known to be at least 2*len in size big enough ;; we could do this in r, for this program.. (ans(make-array (+ len len) :element-type 'double-float :displaced-to r)) ) (declare (fixnum len e ) (double-float c) (type (array double-float (*)) ans r) (optimize (speed 3)(safety 0))) ;; array is twice the length of the (real) input to make room for real & imag parts ;; copy real/imag parts of input into adjacent array locations and pre-normalize. ;; Fill with zeros first. (loop for i fixnum from 0 to (+ len len -1) do (setf (mref r i) 0.0d0)) ;; next, copy real numbers from poly1 (loop for i fixnum from 0 to (1- (length exps1)) do (setf e (aref exps1 i)) (setf (mref r (+ e e)) (float (mref coefs1 i) 1.0d0))) (loop for i fixnum from 0 to (1- (length exps2)) do (setf e (aref exps2 i)) (setf (mref r (+ 1 e e)) (float (mref coefs2 i) 1.0d0))) ;; we can do the fft plan here since estimate only will be cheap and not clobber r? (fftw_execute_dft (fftw_plan len r r -1 64) r r) ;; do the FFT ans )) ;; faster yet? (defun untangleprod2(f NN ) ;;; given an array *f*, of complex doubles, untangle to a pair of ;;; fourier transforms. ;;; according to http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM ;;; with these interleaved data r,s. fft(k,z) is the kth element of ;;; the array of fft coefs. (but we use 2 spots for real/ imag ;;; fft(k,r) = (fft(k,z)+conj(fft(N-k,z))/2 ;;; fft(k,s) =-i*(fft(k,z)-conj(fft(N-k,z))/2 ;;; so ans[q]=r[q]*s[q] is a complex number that looks like (declare (type (array double-float (*)) f) (optimize (speed 3)(safety 0))) (let*(;(ans (make-array NN :initial-element #c(0.0d0 0.0d0) )) (ans2 (make-array (+ NN NN) :element-type 'double-float :initial-element 0.0d0)) (a 0.0d0)(b 0.0d0) (r 0.0d0)(s 0.0d0) ) (declare (type (complex double-float) ans2)) ; (format t"~%f=~s" *f*) (setf r (aref f 0) s (aref f 1)) (setf (aref ans2 0) (* r s)) (setf (aref ans2 1) 0.0d0) (loop for k from 1 to (1- NN ) do (setf a ;(aref *f* k) (complex (aref f (* 2 k))(aref f (1+ (* 2 k))))) ;; ok (setf b (complex (aref f (*(- NN k) 2)) (- (aref f (+(* 2 (- NN k)) 1)))) ;ok ) (setf r (+ a b)) (setf s (* #.(complex 0.0d0 -1) (- a b))) (setf (aref ans2 (* 2 k)) (realpart (* 0.25d0 r s))) (setf (aref ans2 (1+ (* 2 k))) (imagpart (* 0.25d0 r s)))) ans2 )) (defun mulfft3(r s) ;; multiply polynomials r,s using floating FFT ;;compute the size Z of the answer. Z is a power of 2. ;; compute complex array r, increased to size S ;; compute complex array r, increased to size S ;; compute two FFTs ;; multiply pointwise ;; compute inverse FFT * 1/n ;; convert back to array and return answer, coefficients rounded to integers ;; the answer will be only approximate if the coefficients are too large or ;; corrupted by round-off. (let* ((lr (length (car r))) (ls (length (car s))) ;; compute deg of r + degree of s +1 (lans (+ (aref (car r) (1- lr)) (aref (car s) (1- ls)) 1)) (z (ash 1 (ceiling (log lans 2))))) ; round up to power of 2 ;;; check the size of fftbuf here.. (if (< *fftbufsize* (+ z z)) ;; enough for real and imag part (setf *fftbuf* (make-array (setf *fftbufsize* (+ z z) ) ;; real and imag parts :element-type 'double-float :allocation :static-reclaimable ))) (fftiR3(untangleprod2 (fftpoly2 r s z) z)))) (defun fftiR3(input) ;; this is the inverse FFT, retaining only REAL part of answer ;; input is array of real, imag, real, imag, ... ;; returns array of REAL parts of answer, double-float. ;; computes a plan, if there is not one. (let*( ;(len (ash (length input) -1) ) (len (ash (length input) -1)) ; (r (flatten input)) ;;(r (make-array (length input) :element-type 'double-float :initial-contents input)) (r input) (anse (make-array len :element-type 'fixnum :fill-pointer 0)) (ansc (make-array len :fill-pointer 0)) (q len) (k 0)) (declare (fixnum len k) (type (array double-float (*)) r) (type (array fixnum (*)) anse) (optimize (speed 3)(safety 0))) (fftw_execute_dft (fftw_plan len r r 1 64) r r) ;; do the FFT ;; (loop for i fixnum from 0 to (1- len) do (print (aref r i))) ;debug (loop for i fixnum from 0 to (1- (* 2 len)) by 2 do ;; copy real parts into answer (setf k (round (mref r i) q)) (cond((= k 0) nil) (t (vector-push (ash i -1) anse) (vector-push k ansc)))) (cons anse ansc))) ;;maxima version ;; normalization should be such that n1*n2 = 1/n. ;; here it is 1/n^2 ?? #| slowFFT(x):= block([n:length(x)], map(lambda([k], 1/n*sum(x[j]*exp((-2*%i*%pi*(j-1)*k)/n),j,1,n)), makelist(i,i,0,n-1)))$ slowFFTi(x):= block([n:length(x)], map(lambda([k], 1/n*sum(x[j]*exp((2*%i*%pi*(j-1)*k)/n),j,1,n)), makelist(i,i,0,n-1)))$ ;; normalization is 1, 1/n (slowFFT(x):= block([n:length(x)], map(lambda([k], sum(x[j]*exp((-2*%i*%pi*(j-1)*k)/n),j,1,n)), makelist(i,i,0,n-1))), slowFFTi(x):= block([n:length(x)], map(lambda([k], 1/n*sum(x[j]*exp((2*%i*%pi*(j-1)*k)/n),j,1,n)), makelist(i,i,0,n-1))), a1: slowFFT([1+%i,2+3*%i,3+6*%i, 4+7*%i]), a2: slowFFT([1,2,3,4]), a3: slowFFT([1,3,6,7]), untangle(v):=block([n:length(v)], local(a,b,r), r[i]:=v[i+1], a[0]:realpart(r[0]), b[0]:imagpart(r[0]), a[i]:=1/2*(r[i]+conjugate(r[n-i])), b[i]:= -%i/2*(r[i]-conjugate(r[n-i])), [makelist(a[i],i,0,n-1), makelist(b[i],i,0,n-1)])) |#