/*********************************************************** * mv_pblas.c * * -- parallel matrix - vector multiplication * * * * Compute y = A * x by means of the * * Parallel Basic Linear Algebra Subroutines (PBLAS) * * * ***********************************************************/ #include #include #include /* Matrix size */ int m = 16; int n = 20; /* Block size */ int mb = 4; int nb = 4; /* Max submatrix size */ int mA = 8; int nA = 12; int zero = 0; int one=1; /***************************************************************************** * Header files for Blacs and Pblas are not available from Intel Mkl */ void Cblacs_pinfo(int*, int*); void Cblacs_get(int, int, int*); void Cblacs_gridinit(int*, char*, int, int); void Cblacs_gridinfo(int, int*, int*, int*, int*); void Cblacs_barrier(int , char*); void Cblacs_gridexit(int); void descinit_(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*); void pdgemv_(char*,int*,int*,double*,double*,int*,int*,int*,double*, int*,int*,int*,int*,double*,double*,int*,int*,int*,int*); /***************************************************************************** * Local functions */ /* * Fills the matrix block (ib, jb), 0 <= ib, jb <= 4 with the transposed block */ void block_A (int ib, int jb, double* a) { int i; int j; for (i = 0; i < mb; i++) { for (j = 0; j < nb; j++) { /* Exchange i and j for the value */ a[nb * i + j] = ...; } } }; /* * Fills the vector block (1, ib), ib in {0, 1, ..., 4} */ void block_x (int ib, double* x) { int j; for (j = 0; j < nb; j++) { /* Index plus offset */ x[j] = ...; } }; /***************************************************************************** * The main program to perfom matrix-vector multiplication on DMM */ int main(int argc, char* argv[]) { /* BLACS stuff: Context, grid size */ int ctxt; int nprow = 2; int npcol = 2; int myrow; int mycol; /* BLACS stuff: descriptors */ int descA[9]; int descx[9]; int descy[9]; int info; /* Local storage for the data of the matrix and vectors * Remember: A transposed! */ double A[nA][mA]; double x[nA]; double y[mA]; /* Temporary storage for generating blocks */ double aux[nb * mb]; double alpha = 1; double beta = 0; int iam; int nprocs; int i; int j; int ib; int jb; int iad; int jad; int nbr_loc; int nbc_loc; /* Start MPI */ MPI_Init(&argc, &argv); /* Initialize blacs and the process grid */ Cblacs_pinfo(&iam, &nprocs); if (iam == 0 && nprocs < 4) { printf("Number of processors should be at least 4\n"); return 0; } /* Define the process grid */ Cblacs_get(0, 0, &ctxt); Cblacs_gridinit(&ctxt, "Row-major", nprow, npcol); /* Find who I am in the grid */ Cblacs_gridinfo(ctxt, &nprow, &npcol, &myrow, &mycol); /* Initialize BLACS descriptors */ descinit_(descA, &m, &n, &mb, &nb, &zero, &zero, &ctxt, &mA,&info); descinit_(descx, &one, &n, &one, &nb, &zero, &zero, &ctxt, &one, &info); descinit_(descy, &m, &one, &mb, &one, &zero, &zero, &ctxt, &mA, &info); /* Initialize matrix A with A[i,j] = |i-j| * Because of the Fortan storage each processor receives the transpose * of its submatrix! */ /* Number of blocks in a row in the subblock of the curent process */ nbr_loc = ...; /* Number of blocks in a row in the subblock of the curent process */ nbc_loc = ...; /* Row offset of the first block of the current process */ bro = ...; /* Column offset of the first block of the current process */ bco = ...; ... ... ... /* Initialize vector x = [0, 1, ..., N_1] */ if (myrow == 0) { /* Offset of the first vector block of the current process */ xco = ...; ... ... } /* Wait for all processes to finish */ Cblacs_barrier(ctxt,"A"); /*if (myrow == 0) { if (mycol == 0) { printf("x for processor (%d, %d)\n", myrow, mycol); for(i = 0; i < nA; i++) { printf("%9g\n", x[i]); } printf("\n"); } else if (mycol == 1) { printf("x for processor (%d, %d)\n", myrow, mycol); for(i = 0; i < mA; i++) { printf("%9g\n",x[i]); } printf("\n"); } }*/ /* Multiplication: y = alpha*A*x + beta*y (alpha=1, beta=0) */ pdgemv_("No Transpose", &m, &n, &alpha, &A[0][0], &one, &one, descA, x, &one, &one, descx, &one, &beta, y, &one, &one, descy, &one); /* Print the result*/ if (mycol == 0) { if (myrow == 0) { printf("Solution: blocks y0:\n"); for(i = 0; i < mA / 2; i++) { printf("%12.2f\n", y[i]); } printf("\nSolution: blocks y2:\n"); for(; i < mA; i++) { printf("%12.2f\n", y[i]); } printf("\n"); } else if (myrow == 1) { printf("Solution: blocks y1:\n"); for(i=0; i < mA / 2; i++) { printf("%12.2f\n", y[i]); } printf("\nSolution: blocks y3:\n"); for(; i < mA; i++) { printf("%12.2f\n", y[i]); } printf("\n"); } } /* Release process grid */ Cblacs_gridexit(ctxt); /* Shut down MPI */ MPI_Finalize(); return 0; }