#include #include #include #include #define N 1048576 #define N2 N/2 main() { /* Example of Apple Altivec coded binary radix FFT using intrinsics from Petersen and Arbenz "Intro. to Parallel Computing," Section 3.6 This is an expanded version of a generic work-space FFT: steps are in-line. cfft2(n,x,y,w,sign) takes complex n-array "x" (Fortran real,aimag,real,aimag,... order) and writes its DFT in "y". Both input "x" and the original contents of "y" are destroyed. Initialization for array "w" (size n/2 complex of twiddle factors (exp(twopi*i*k/n), for k=0..n/2-1)) is computed once by cffti(n,w). WPP, SAM. Math. ETHZ, 1 June, 2002 */ int first,i,icase,it,ln2,n; int nits=1000000; static float seed = 331.0; float error,fnm1,sign,z0,z1,ggl(); float *x,*y,*z,*w; double t1,mflops; void cffti(),cfft2(); /* allocate storage for x,y,z,w on 4-word bndr. */ x = (float *) malloc(8*N); y = (float *) malloc(8*N); z = (float *) malloc(8*N); w = (float *) malloc(4*N); n = 2; for(ln2=1;ln2<21;ln2++){ first = 1; for(icase=0;icase<2;icase++){ if(first){ for(i=0;i<2*n;i+=2){ z0 = ggl(&seed); /* real part of array */ z1 = ggl(&seed); /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } else { for(i=0;i<2*n;i+=2){ z0 = 0; /* real part of array */ z1 = 0; /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } /* initialize sine/cosine tables */ cffti(n,w); /* transform forward, back */ if(first){ sign = 1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); /* results should be same as initial multiplied by n */ fnm1 = 1.0/((float) n); error = 0.0; for(i=0;i<2*n;i+=2){ error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) + (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]); } error = sqrt(fnm1*error); printf(" for n=%d, fwd/bck error=%e\n",n,error); first = 0; } else { t1 = ((double)clock())/((double) CLOCKS_PER_SEC); for(it=0;it y */ for(j=0;j float ggl(float *ds) { /* generate u(0,1) distributed random numbers. Seed ds must be saved between calls. ggl is essentially the same as the IMSL routine RNUM. W. Petersen and M. Troyer, 24 Oct. 2002, ETHZ: a modification of a fortran version from I. Vattulainen, Tampere Univ. of Technology, Finland, 1992 */ double t,d2=0.2147483647e10; t = (float) *ds; t = fmod(0.16807e5*t,d2); *ds = (float) t; return((float) ((t-1.0e0)/(d2-1.0e0))); }