/********************************************************************* * Multiplication banded-matrix x vector: Data structure and Blas * Compile with: * gcc -O3 gbmv.c /usr/lib/libblas.so.3 *********************************************************************/ #include #include #include #define N 50 #define KB 2 #define REP 5000 /* Structure for the bands of the matrix */ typedef struct { /* the size of the matrix */ int nA; /* 1 + 2kb bands are not null */ int kb; /* the column indices of the nonzero bands*/ int ib[KB+1]; /* the bands themselves */ double ab[1 + 2 * KB][N * N]; } Matb; int main() { int i; int j; int k; int r; int n = N; int nn = N*N; int kb = KB; int inc = 1; int lda = 2 * N + 1; int ku = n; double alpha = 1; double beta = 0; double x[N*N]; double y[N * N]; Matb A; double A1[N * N][2 * N + 1]; char trans = 'N'; clock_t clock0; clock_t clock1; /* Initializationon of the matrix and vectors */ A.nA = nn; A.kb = kb; A.ib[0] = n; A.ib[1] = 1; A.ib[2] = 0; for (i = 0; i < A.nA; i++) { x[i] = 1; y[i] = 0; } memset(A.ab, 0, sizeof(double) * (1 + 2 * KB)*(N * N)); /* We set exactly only the nonzero fields. */ for (j = n; j < A.nA; j++) { A.ab[0][j] = -1; } for (j = 1; j < A.nA; j++) { A.ab[1][j] = -1; } for (j = 0; j < A.nA; j++) { A.ab[2][j] = 4; } for (j = 0; j < A.nA - 1; j++) { A.ab[3][j] = -1; } for (j = 0; j < A.nA - n; j++) { A.ab[4][j] = -1; } /* If the array is small - print it*/ if (N < 6) { for (i = 0; i < 5; i++) { for (j = 0; j < A.nA; j++) { printf("%2g ", A.ab[i][j]); } printf("\n"); } printf("\n"); } /* Bandwise multiplication *************************/ clock0 = clock(); /* Do the multiplication REP times*/ for (r = 0; r < REP; r++) { /* Clear y - this adds some overhead */ memset(y, 0, sizeof(double) * (N * N)); /* ... */ } clock1 = clock(); /* If the result is small - print it*/ if (N < 10) { printf("y = "); for (i = 0; i < A.nA; i++) { printf(" %g ", y[i]); } } printf("\nband multiplication CPU-time: %g miliseconds\n", (1000 * (double)(clock1 - clock0)) / CLOCKS_PER_SEC); /* Blas *********/ /* Fill A1 with 0 */ memset(A1, 0, sizeof(double) * (2 * N + 1)*(N * N)); /* Set the nonzero components */ for (j = n; j < A.nA; j++) { A1[j][0] = -1; } for (j = 1; j < A.nA; j++) { A1[j][ku - 1] = -1; } for (j = 0; j < A.nA; j++) { A1[j][ku] = 4; } for (j = 0; j < A.nA - 1; j++) { A1[j][ku + 1] = -1; } for (j = 0; j < A.nA - n; j++) { A1[j][lda - 1] = -1; } /* If the array is small - print it*/ if (N < 6) { for (i = 0; i < lda; i++) { for (j = 0; j < A.nA; j++) { printf("%2g ", A1[j][i]); } printf("\n"); } printf("\n"); } memset(y, 0, sizeof(double) * (N * N)); /* Do the multiplication REP times*/ clock0 = clock(); for (r = 0; r < REP; r++) { /* Sparse matrix multiplication function: for type "N": alpha*A*x + beta*y is stored in y */ dgbmv_(&trans, /* operation type, "N" for multiplication */ &nn, /* number of rows of the original matrix */ &nn, /* number of columns of the original matrix */ &ku, /* number of sub-diagonals (kl = ku) */ &ku, /* number of super-diagonals */ &alpha, /* the alpha parameter */ &(A1[0][0]), /* pointer to first melement lf the array of bands, see function definition for layout */ &lda, /* number of rows in A; at least (in our case exactly) kl + ku + 1) */ x, /* the vector */ &inc, /* increment in memory between components of x (1) */ &beta, /* the beta parameter */ y, /* the resultvector */ &inc /* increment in memory between components of y (1) */ ); } clock1 = clock(); /* If the result is small - print it*/ if (N < 10) { printf("y = "); for (i = 0; i < A.nA; i++) { printf(" %g ", y[i]); } } printf("\nblas CPU-time: %g miliseconds\n", (1000 * (double)(clock1 - clock0)) / CLOCKS_PER_SEC); return 0; }