function m1Y % ************************************************************** % * Matrix-vector multiplication b=A*x * % * x is a column vector with n elements * % * A is a n times n matrix with zeros on the diagonal * % * and off-diagonal elements A_{lj}=p_j/(z_j-z_l)^nv * % * where nv is an integer * % * z_j are n complex points in the unit * % * square [-0.5 0.5 -0.5 0.5] * % * p_j, is the product -i*zp_j*dt_j * % * where zp_j and dt_j are numbers and * % * i is the imaginary unit * % * Comparison is carried out between * % * Direct calculation in O(n^2) FLOPs and storage * % * Fast Multipole Method caluclation in * % * O(n*log(n)) FLOPs and O(n) storage * % * See L. Greengard and V. Rokhlin, * % * A fast algorithm for particle simulations, * % * J. Comput. Phys. {\bf 73}(2), pp. 325--348 (1987), * % * doi:10.1016/0021-9991(87)90140-9 * % ************************************************************** format compact rand('state',0) randn('state',0) tol=10*eps; nv=2; levmax=12; np=3001; x=randn(np,1); [z,zp,dt]=zinit(np); % disp('Setup of matrix for direct multiplication.') disp('Please wait...') disp(' ') A=M1init(z,zp,dt,np,nv); tic b=A*x; disp(['Direct multiplication takes ',num2str(toc),' seconds']) disp(' ') % tic [bfmm,ilev]=MatvecMainY(z,-i*zp.*dt.*x,tol,levmax,nv); ilev disp(['Fast-M multiplication takes ',num2str(toc),' seconds']) disp(' ') relerr=norm(bfmm-b)/norm(b) % function M1=M1init(z,zp,dt,np,nv) M1=zeros(np); for n=1:np M1( 1:n-1,n)=-i*zp(n)*dt(n)./(z(n)-z( 1:n-1)).^nv; M1(n+1:np ,n)=-i*zp(n)*dt(n)./(z(n)-z(n+1:np )).^nv; end % function [z,zp,dt]=zinit(np) z =rand(np,1)-0.5+i*(rand(np,1)-0.5); zp=rand(np,1)-0.5+i*(rand(np,1)-0.5); dt=randn(np,1)/2/pi; % function [M1,ILEV]=MatvecMainY(ZSRC,QA,TOL,LEVMAX,NV) if (sum(abs(floor(0.999*ZSRC+0.5+0.5i)))>0) error('Points outside computational box') end NATOMS = length(ZSRC); M1 = zeros(NATOMS,1); NTERMS = -round(log(TOL)/log(2)); NT2 = fix(NTERMS/2); C = zeros(2*NTERMS+NV-2); C(:,1)=1; for I=2:2*NTERMS+NV-2 C(I,2:I)=C(I-1,(2:I)-1)+C(I-1,2:I); end C1=(-1)^NV*C(NV:NTERMS+NV-1,NV).'; CC=zeros(NTERMS); for L=1:NTERMS CC(L,:)=-(-1)^L*C(NV-1+L:NTERMS+NV-2+L,L); end ILEV = 1; NINBOX = NTERMS + 1; IJ = Intlist; while (ILEV < LEVMAX) && (max(NINBOX) > NTERMS) ILEV = ILEV + 1; NSIDE = 2^ILEV; [IADR,IOFF,ICORO,NINBOX,INDA]=Assign(ZSRC,NATOMS,NSIDE); M1=Mpoles(M1,ZSRC,QA,CC,C1,IADR,IOFF,ICORO,NINBOX,INDA,IJ, ... NTERMS,NSIDE,NT2,NV); end M1=Direct(M1,ZSRC,QA,IADR,IOFF,ICORO,NINBOX,INDA,NSIDE,NV); % function M1=Mpoles(M1,ZSRC,QA,CC,C1,IADR,IOFF,ICORO,NINBOX,INDA,IJ, ... NTERMS,NSIDE,NT2,NV); INDB = find(NINBOX > NT2); NTBOX = length(INDB); ILOC = zeros(1,NSIDE*NSIDE); ILOC(INDB) = 1:NTBOX; LOCEXP = zeros(NTERMS,NTBOX); MPOLE = zeros(NTERMS,1); ZCENT = (ICORO - (NSIDE-1)/2*(1+i))/NSIDE; IFLAG = 2*mod(real(ICORO),2)+mod(imag(ICORO),2)+1; BOX0 = imag(IJ)*NSIDE+real(IJ); for I = INDA LVEC = IADR(IOFF(I)+1:IOFF(I+1)); ZSHIFT = ZSRC(LVEC)-ZCENT(I); ZPOW = QA(LVEC); MPOLE(1) = C1(1)*sum(ZPOW); for J = 2:NTERMS ZPOW = ZPOW.*ZSHIFT; MPOLE(J) = C1(J)*sum(ZPOW); end JCORO = ICORO(I) + IJ(:,IFLAG(I)); for J = find(abs(floor(JCORO/NSIDE)) == 0).' JBOX = I+BOX0(J,IFLAG(I)); if (NINBOX(JBOX) > NT2) ZSHIFT = NSIDE/IJ(J,IFLAG(I)); Z0P = ZSHIFT.^(0:NTERMS-1).'; B1 = CC*(MPOLE.*Z0P).*Z0P*ZSHIFT^NV; LOCEXP(:,ILOC(JBOX))=LOCEXP(:,ILOC(JBOX))+B1; elseif (NINBOX(JBOX) > 0) JTVEC = IADR(IOFF(JBOX)+1:IOFF(JBOX+1)); ZSHIFT = 1./(ZSRC(JTVEC) - ZCENT(I)); PHI = MPOLE(NTERMS); for K = NTERMS-1:-1:1 PHI = MPOLE(K)+PHI.*ZSHIFT; end M1(JTVEC) = M1(JTVEC) + PHI.*ZSHIFT.^NV; end end end for I = 1:NTBOX JTVEC= IADR(IOFF(INDB(I))+1:IOFF(INDB(I)+1)); ZSHIFT = ZSRC(JTVEC) - ZCENT(INDB(I)); PHI = LOCEXP(NTERMS,I); for J = NTERMS-1:-1:1 PHI = LOCEXP(J,I) + PHI.*ZSHIFT; end M1(JTVEC) = M1(JTVEC) + PHI; end % function [IADR,IOFF,ICORO,NINBOX,INDA]=Assign(ZSRC,NATOMS,NSIDE) NBOXES = NSIDE*NSIDE; IBOX = 0:NBOXES-1; ICORO = mod(IBOX,NSIDE) + i*fix(IBOX/NSIDE); IADR = zeros(1,NATOMS); IH = floor(NSIDE*([real(ZSRC) imag(ZSRC)]+0.5)); IH(IH<0)=0; IH(IH>=NSIDE)=NSIDE-1; IBOX = IH(:,2)*NSIDE + IH(:,1) + 1; NINBOX = accumarray(IBOX,1,[NBOXES 1]).'; INDA = find(NINBOX>0); IOFF(2:NBOXES+1) = cumsum(NINBOX); ICNT = IOFF; for J = 1:NATOMS ICNT(IBOX(J)) = ICNT(IBOX(J))+1; IADR(ICNT(IBOX(J))) = J; end % function M1=Direct(M1,ZSRC,QA,IADR,IOFF,ICORO,NINBOX,INDA,NSIDE,NV) IJ = [-1 -1 -1 0 0 1 1 1]+[-i 0 i -i i -i 0 i]; BOX0 = imag(IJ)*NSIDE+real(IJ); for I = INDA KVEC = IADR(IOFF(I)+1:IOFF(I+1)); QA2 = QA(KVEC); ZSRC2 = ZSRC(KVEC); for KK = 1:NINBOX(I) MYIND = [1:KK-1,KK+1:NINBOX(I)]; K = KVEC(KK); M1(K) = M1(K) + sum(QA2(MYIND)./(ZSRC2(MYIND)-ZSRC(K)).^NV); end JCORO = ICORO(I) + IJ; for JBOX = I+BOX0(find(abs(floor(JCORO/NSIDE))==0)) KVEC = IADR(IOFF(JBOX)+1:IOFF(JBOX+1)); for KK = 1:NINBOX(JBOX) K = KVEC(KK) ; M1(K) = M1(K) + sum(QA2./(ZSRC2-ZSRC(K)).^NV); end end end % function IJ=Intlist IJ = zeros(27,4); IJ([1 5 9 12 16],:)= 2; IJ([3 7 10 11 14],:)=-2; IJ([6 15],:) =-1; IJ([8 13],:) = 1; IJ([2 6 9 10 13],:) = IJ([2 6 9 10 13],:)+2i; IJ([4 8 11 12 15],:) = IJ([4 8 11 12 15],:)-2i; IJ([5 14],:) = IJ([5 14],:)+i; IJ([7 16],:) = IJ([7 16],:)-i; IJ(17,:) = [ 3 -2 2 -3]+[-2 -3 3 2]*i; IJ(18,:) = [ 2 3 -3 -2]+[ 3 -2 2 -3]*i; IJ(19,:) = [ 3 -1 1 -3]+[-1 -3 3 1]*i; IJ(20,:) = [ 1 3 -3 -1]+[ 3 -1 1 -3]*i; IJ(21,:) = [ 3 0 0 -3]+[ 0 -3 3 0]*i; IJ(22,:) = [ 0 3 -3 0]+[ 3 0 0 -3]*i; IJ(23,:) = [ 3 1 -1 -3]+[ 1 -3 3 -1]*i; IJ(24,:) = [-1 3 -3 1]+[ 3 1 -1 -3]*i; IJ(25,:) = [ 3 2 -2 -3]+[ 2 -3 3 -2]*i; IJ(26,:) = [-2 3 -3 2]+[ 3 2 -2 -3]*i; IJ(27,:) = [ 3 3 -3 -3]+[ 3 -3 3 -3]*i;