/* iccbin -tpp2 -openmp -L/opt/intel/mkl/10.1.1.019/lib/64 -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lpthread -lm */ #include #include #include #include #include #define N 1240000 #define NUM_NON_0 5 #define ITMAX 100 /* * Compares two integers. Needed for sorting. */ int compare_f(const void *f1, const void *f2) { return *((int *)f1) > *((int *)f2); } /* * Initializes the data structures. */ void init(float b[N], float x1[N], float x2[N], float M[N], float Nb[NUM_NON_0 * N], int jc[NUM_NON_0 * N], int j_ad[N + 1]) { int i; int j; int k; int counter; int diff = 0; int ind; int l = 0; int indices[N]; memset(indices, -1, sizeof(int) * N); for(i = 0; i < N; i++) { x1[i] = 0; b[i] = 1; /* The diagonal is all 10 */ M[i]= 10.0; /* Each row has NUM_NON_0 non-zero entries outside the diagonal */ j_ad[i] = NUM_NON_0 * i; /* Choose NUM_NON_0 random locations not on diagonal */ indices[i] = i; for(j = 0; j < NUM_NON_0; j++) { do { /* Choose a random index in row not on diagonal */ ind = rand() % N; /* Check if the index is not already used for this row */ diff = 0; if(indices[ind] < i) { indices[ind] = i; diff = 1; } } while(diff != 1); /* Remember the index */ jc[j + NUM_NON_0 * i] = ind; /* Set the -1 */ Nb[j + NUM_NON_0 * i] = -1.0; } qsort(&jc[NUM_NON_0 * i], NUM_NON_0, sizeof(float), compare_f); } j_ad[N] = NUM_NON_0 * N; /* If the array is small enough - print it */ if(N < 11) { counter = 0; for(i = 0; i < N; ++i) { for(j = 0; j < N; ++j) { if(i == j) { printf("%4g ", M[i]); } else if(jc[counter] == j) { printf("%4g ", Nb[counter++]); } else { printf(" 0 "); } } printf("\n"); } printf("Nb = "); for(i = 0; i < NUM_NON_0 * N; ++i) { printf("%2g ", Nb[i]); } printf("\njc =\n"); for(i = 0; i < NUM_NON_0 * N; ++i) { printf("%d ", jc[i]); if(jc[i] == i / NUM_NON_0) { printf("%d <- error, diag ", jc[i]); } if(i % NUM_NON_0 == NUM_NON_0 - 1) { printf("\n"); } } printf("j_ad = "); for(i = 0; i < N + 1; ++i) { printf("%d ", j_ad[i]); } printf("\n"); } } /****************************************************************************/ float sdot_(int* , float* , int* , float* , int* ); void scopy_(int* , float* , int* , float* , int* ); /****************************************************************************/ int main() { /* right-hand-side + 2 vectors for the iterations */ float b[N]; float x1[N]; float x2[N]; /* Data structure for the matrix */ float M[N]; float Nb[NUM_NON_0 * N]; int jc[NUM_NON_0 * N]; int j_ad[N + 1]; clock_t clock0; clock_t clock1; int i; int j; int it = 0; float norm = N; int n = N; int inc = 1; /* Seed the random generator to get different matrices eqach time */ /*srand(time(NULL));*/ /* Seed with constsnt to get the same results each time */ srand(3456); int num_threads = omp_get_max_threads(); printf("number of processors: %d\n", num_threads); /* Initializations */ init(b, x1, x2, M, Nb, jc, j_ad); clock0 = clock(); /* Jacobi */ while (it < ITMAX && norm > 1.e-6) { /* Perform iteration */ /* ... */ scopy_(&n, x2, &inc, x1, &inc); /* Computing the residual */ /* ... */ it++; /* printf("norm=%g it=%d\n", norm, it); */ } clock1 = clock(); printf("norm of the residual: %g Number of iterations: %d \n", norm, it); printf("x[N / 2] = %g\n", x1[N / 2]); printf("CPU-time : %g miliseconds\n\n", 1000.0 * (clock1 - clock0) / (double)CLOCKS_PER_SEC); return 0; }