#include #include #include "mpi.h" #define am(p,q) *(a+2*cols*p+q) #define bm(p,q) *(buff+2*(cols-rows)*p+q) int main(int argc, char **argv ) { /* 2-D binary radix FFT using message passing. Data are in strips. Given "size" processors, there are "size" strips containing MAX_ROWS/size vectors in each. Complex element A(i,j) is in strip i mod (MAX_ROWS/size) and has location &A(i%(MAX_ROWS/size),j) = &A(mod(i,MAX_ROWS/size),j). From Sections 5.8 and 5.9 of Arbenz and Petersen, "Intro. to Parallel Computing," Oxford Univ. Press, 2004. W. Petersen, 22 Oct. 2003 */ int MAX_ROWS=1024; int MAX_COLS=1024; int ierr,master,rank,size,rows,col,cols,i,j,ij,ip,offset; int skip; static float seed; float *a,*b,*w,*buff; /* checksum to test results */ float *acopy,sign,err,fnn; float ggl(float*); void cffti(int cols, float *w); void Checkres(float *a, float *acopy, int rows, int cols); void FFT2D(float *a,float *w,float sign,int rows,int cols); MPI_Status stat; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); master = 0; cols = MAX_COLS; rows = cols/size; a = (float *) malloc(2*rows*cols*sizeof(float)); acopy = (float *) malloc(2*cols*cols*sizeof(float)); w = (float *) malloc(2*cols*sizeof(float)); buff = (float *) malloc(2*rows*cols*sizeof(float)); /* initialize sine/cosine tables */ cffti(cols,w); if(rank==master){ /* send section of A to each proccessor */ seed = 331.0; /* random seed */ for(ip=0;ip0){ ierr=MPI_Send(buff,2*rows*cols,MPI_FLOAT,ip,ip, MPI_COMM_WORLD); } } } else { /* slave parts */ ierr=MPI_Recv(buff,2*rows*cols,MPI_FLOAT,master,MPI_ANY_TAG, MPI_COMM_WORLD,&stat); if(stat.MPI_TAG!=0){ ij = 0; for(i=0;i0){ ierr=MPI_Send(a,2*rows*cols,MPI_FLOAT,0,0, MPI_COMM_WORLD); } else { /* master */ fnm2 = 1.0/((float) (cols*cols)); /* rank > 0 part of check */ for(is=1;is 0.){ #pragma ivdep for(k=0;k 0.){ for(i=j;i 0.){ wku = w[kw][1]; } else { wku = -w[kw][1]; } for(i=0;i float ggl(float *ds) { /* generate u(0,1) distributed random numbers. Seed ds must be saved between calls. ggl is essentially the same as the IMSL routine RNUM. W. Petersen and M. Troyer, 24 Oct. 2002, ETHZ: a modification of a fortran version from I. Vattulainen, Tampere Univ. of Technology, Finland, 1992 */ double t,d2=0.2147483647e10; t = (float) *ds; t = fmod(0.16807e5*t,d2); *ds = (float) t; return((float) ((t-1.0e0)/(d2-1.0e0))); }