////////////////////////////////////////////////////////////////////// // $Id: driver.c,v 1.9 1998/02/24 22:55:33 dmartin Exp $ ////////////////////////////////////////////////////////////////////// // // Matrix Multiply Contest Test Driver // U.C. Berkeley, Department of EECS, Computer Science Division // CS 267, Spring 1998 // Based on code from Chad Yoshikawa // Extended by David Martin // ////////////////////////////////////////////////////////////////////// #include #include #include #include #include #include #define ABS(val) ((val) > 0 ? (val) : -(val)) #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) #define SQR(a) ((a) * (a)) #define CUBE(a) ((a) * (a) * (a)) ////////////////////////////////////////////////////////////////////// // BEGIN CONFIGURATION #define NUM_CORRECTNESS_CHECKS 10 #define RANDOM_TESTS 0 #define MAX_ERROR 2.0 #define TEST_RUNS 30 #define CALC_ITERS(n) (10 + 1e8 / CUBE (n)) int qtest_sizes[] = { 16, 24, 32, 48, 64, 96, 128, 192, 256 }; // aligned sizes int atest_sizes[] = { 23, 31, 47, 73, 97, 127, 163, 191, 211, 229, 251 }; // odd sizes /* primes from 16 to 256: 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 101 103 107 109 113 127 131 137 139 149 151 157 163 167 173 179 181 191 193 197 199 211 223 227 229 233 239 241 251 */ // END CONFIGURATION ////////////////////////////////////////////////////////////////////// #define NUM_QTESTS (sizeof(qtest_sizes) / sizeof(int)) #define NUM_ATESTS (sizeof(atest_sizes) / sizeof(int)) int* test_sizes[2] = { qtest_sizes, atest_sizes }; int num_tests[2] = { NUM_QTESTS, NUM_ATESTS }; extern void matmul (int i_matdim, const double* pd_A, const double* pd_B, double* pd_C); #define MUL_MFMF_MF(size,A,B,C) matmul(size,A,B,C) extern double drand48(); extern unsigned short* seed48(); extern int getrussage(int,struct rusage*); struct rusage rus; /* starting time */ struct rusage rue; /* ending time */ #define START_TIMING getrusage(RUSAGE_SELF,&rus); #define STOP_TIMING getrusage(RUSAGE_SELF,&rue); double reportTiming() { struct timeval utime; utime.tv_sec = rue.ru_utime.tv_sec - rus.ru_utime.tv_sec ; if ( rue.ru_utime.tv_usec < rus.ru_utime.tv_usec ) { utime.tv_sec--; utime.tv_usec = 1000000l - rus.ru_utime.tv_usec + rue.ru_utime.tv_usec; } else utime.tv_usec = rue.ru_utime.tv_usec - rus.ru_utime.tv_usec ; return ((double)utime.tv_sec + (double)utime.tv_usec*1e-6); } void myseed() { int i; unsigned short seed16v[3]; for (i=0;i<3;i++) seed16v[i] = time(0); seed48(seed16v); } /* ** A naive matrix multiply routine. ** Used to test for correctness. */ void naive_mm(int Sm,int Sk,int Sn, const double *A,const double *B,double *C) { int i,j,k; for (i=0;i MAX_ERROR) { fprintf (stderr, "Error for test case %dx%d is %f > %f. DISQUALIFIED!!!\n", matdim, matdim, err, MAX_ERROR); //exit (0); } } fprintf (stderr,"\n"); free (A); free (B); free (C); free (cA); free (cB); free (cC); } void timeIt () { double *A, *B, *C; double *oA[TEST_RUNS], *oB[TEST_RUNS], *oC[TEST_RUNS]; int i, j, k; int test; for (k = 0; k < 2; k++) { if (k > 0) printf ("\n"); for (test = 0; test < num_tests[k]; test++) { int matdim = test_sizes[k][test]; const int num_iters = CALC_ITERS (matdim); double max_mflops = 0.0; int run; /* make sure these are quad-word (i.e., 16-byte) aligned */ #if 0 A = oA = (double*) malloc ((SQR(matdim)+1) * sizeof(double)); B = oB = (double*) malloc ((SQR(matdim)+1) * sizeof(double)); C = oC = (double*) malloc ((SQR(matdim)+1) * sizeof(double)); #endif for (run = 0; run < TEST_RUNS; run++) { int iter; double mflops; double utime; /* use different matricies for each trial so that the OS page mapping */ /* won't affect the results... */ A = oA[run] = (double*) malloc ((SQR(matdim)+rrand(1,10)) * sizeof(double)); B = oB[run] = (double*) malloc ((SQR(matdim)+rrand(1,10)) * sizeof(double)); C = oC[run] = (double*) malloc ((SQR(matdim)+rrand(1,10)) * sizeof(double)); if (((unsigned)A) & 0x8) A = (double*)(((unsigned)A)+0x8); if (((unsigned)B) & 0x8) B = (double*)(((unsigned)B)+0x8); if (((unsigned)C) & 0x8) C = (double*)(((unsigned)C)+0x8); mat_init (A, matdim, matdim); mat_init (B, matdim, matdim); mat_init (C, matdim, matdim); START_TIMING; for (iter=0;iter