"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "STREAM/stream.c" between
hpcc-1.5.0b.tar.gz and hpcc-1.5.0.tar.gz

About: HPCC (HPC Challenge) benchmark consists of basically 7 tests: HPL, STREAM, RandomAccess, PTRANS, FFTE, DGEMM and b_eff Latency/Bandwidth.

stream.c  (hpcc-1.5.0b):stream.c  (hpcc-1.5.0)
skipping to change at line 389 skipping to change at line 389
} }
*aAvgErr = aSumErr / (double) array_elements; *aAvgErr = aSumErr / (double) array_elements;
*bAvgErr = bSumErr / (double) array_elements; *bAvgErr = bSumErr / (double) array_elements;
*cAvgErr = cSumErr / (double) array_elements; *cAvgErr = cSumErr / (double) array_elements;
} }
int int
HPCC_Stream(HPCC_Params *params, int doIO, MPI_Comm comm, int world_rank, HPCC_Stream(HPCC_Params *params, int doIO, MPI_Comm comm, int world_rank,
double *copyGBs, double *scaleGBs, double *addGBs, double *triadGBs, int *fail ure) { double *copyGBs, double *scaleGBs, double *addGBs, double *triadGBs, int *fail ure) {
int quantum, BytesPerWord, numranks, myrank; int quantum, BytesPerWord, numranks, myrank;
int i, j, k; int j, k;
double scalar, t, t0, t1, tmin, times[4][NTIMES]; double scalar, t, t0, t1, times[4][NTIMES], times_copy[4][NTIMES];
FILE *outFile; FILE *outFile;
double GiBs = 1024.0 * 1024.0 * 1024.0, curGBs; double GiBs = 1024.0 * 1024.0 * 1024.0, curGBs;
double AvgError[3] = {0.0,0.0,0.0}; double AvgError[3] = {0.0,0.0,0.0};
double *AvgErrByRank; double *AvgErrByRank;
double *TimesByRank;
if (doIO) { if (doIO) {
outFile = fopen( params->outFname, "a" ); outFile = fopen( params->outFname, "a" );
if (! outFile) { if (! outFile) {
outFile = stderr; outFile = stderr;
fprintf( outFile, "Cannot open output file.\n" ); fprintf( outFile, "Cannot open output file.\n" );
return 1; return 1;
} }
} }
skipping to change at line 481 skipping to change at line 480
c[j] = 0.0; c[j] = 0.0;
} }
/* Rank 0 needs to allocate arrays to hold error data and timing data from /* Rank 0 needs to allocate arrays to hold error data and timing data from
all ranks for analysis and output. all ranks for analysis and output.
Allocate and instantiate the arrays here -- after the primary arrays Allocate and instantiate the arrays here -- after the primary arrays
have been instantiated -- so there is no possibility of having these have been instantiated -- so there is no possibility of having these
auxiliary arrays mess up the NUMA placement of the primary arrays. */ auxiliary arrays mess up the NUMA placement of the primary arrays. */
/* There are 3 average error values for each rank (using double). */ /* There are 3 average error values for each rank (using double). */
AvgErrByRank = (double *) malloc(3 * sizeof(double) * numranks); AvgErrByRank = HPCC_XMALLOC( double, 3 * numranks );
/* There are 4*NTIMES timing values for each rank (always doubles) */ /* There are 4*NTIMES timing values for each rank (always doubles) */
TimesByRank = (double *) malloc(4 * NTIMES * sizeof(double) * numranks); if (AvgErrByRank == NULL) {
if (AvgErrByRank == NULL || TimesByRank == NULL) {
if (AvgErrByRank != NULL) free( AvgErrByRank );
if (TimesByRank != NULL) free( TimesByRank );
if (doIO) if (doIO)
fprintf( outFile, "Ooops -- allocation of arrays to collect timing data on MPI rank %d failed\n", world_rank); fprintf( outFile, "Ooops -- allocation of arrays to collect timing data on MPI rank %d failed\n", world_rank);
MPI_Abort(comm, 3); /* FIXME: handle failure more gracefully */ MPI_Abort(comm, 3); /* FIXME: handle failure more gracefully */
} }
/* FIXME: replace with loop to use floating-point data */ /* FIXME: replace with loop to use floating-point data */
memset(AvgErrByRank,0,3*sizeof(double)*numranks); memset(AvgErrByRank,0,3*sizeof(double)*numranks);
memset(TimesByRank,0,4*NTIMES*sizeof(double)*numranks);
if (doIO) fprintf( outFile, HLINE); if (doIO) fprintf( outFile, HLINE);
if ( (quantum = checktick()) >= 1) { if ( (quantum = checktick()) >= 1) {
if (doIO) fprintf( outFile, "Your clock granularity/precision appears to b e " if (doIO) fprintf( outFile, "Your clock granularity/precision appears to b e "
"%d microseconds.\n", quantum); "%d microseconds.\n", quantum);
} else { } else {
if (doIO) fprintf( outFile, "Your clock granularity appears to be " if (doIO) fprintf( outFile, "Your clock granularity appears to be "
"less than one microsecond.\n"); "less than one microsecond.\n");
} }
skipping to change at line 613 skipping to change at line 608
} }
t0 = MPI_Wtime(); t0 = MPI_Wtime();
/* --- SUMMARY --- */ /* --- SUMMARY --- */
/* Because of the MPI_Barrier() calls, the timings from any thread are equal ly valid. /* Because of the MPI_Barrier() calls, the timings from any thread are equal ly valid.
The best estimate of the maximum performance is the minimum of the "outsi de the barrier" The best estimate of the maximum performance is the minimum of the "outsi de the barrier"
timings across all the MPI ranks. */ timings across all the MPI ranks. */
/* Gather all timing data to MPI rank 0 */ memcpy(times_copy, times, sizeof times_copy );
MPI_Gather(times, 4*NTIMES, MPI_DOUBLE, TimesByRank, 4*NTIMES, MPI_DOUBLE, 0
, comm);
/* Rank 0 processes all timing data */ /* for each iteration and each kernel, collect the minimum time across all M
if (myrank == 0) { PI ranks */
/* for each iteration and each kernel, collect the minimum time across all MPI_Allreduce( times_copy, times, 4*NTIMES, MPI_DOUBLE, MPI_MIN, comm );
MPI ranks
and overwrite the rank 0 "times" variable with the minimum so the origi
nal post-
processing code can still be used. */
for (k=0; k<NTIMES; k++) {
for (j=0; j<4; j++) {
tmin = 1.0e36;
for (i=0; i<numranks; i++) {
tmin = Mmin(tmin, TimesByRank[4*NTIMES*i+j*NTIMES+k]);
}
times[j][k] = tmin;
}
}
/* Back to the original code, but now using the minimum global timing acro /* Back to the original code, but now using the minimum global timing across
ss all ranks */ all ranks */
for (k=1; k<NTIMES; k++) /* note -- skip first iteration */ for (k=1; k<NTIMES; k++) /* note -- skip first iteration */
{ {
for (j=0; j<4; j++) for (j=0; j<4; j++)
{ {
avgtime[j] = avgtime[j] + times[j][k]; avgtime[j] = avgtime[j] + times[j][k];
mintime[j] = Mmin(mintime[j], times[j][k]); mintime[j] = Mmin(mintime[j], times[j][k]);
maxtime[j] = Mmax(maxtime[j], times[j][k]); maxtime[j] = Mmax(maxtime[j], times[j][k]);
} }
} }
if (doIO) if (doIO)
fprintf( outFile, "Function Rate (GB/s) Avg time Min time M ax time\n"); fprintf( outFile, "Function Rate (GB/s) Avg time Min time M ax time\n");
for (j=0; j<4; j++) { for (j=0; j<4; j++) {
avgtime[j] /= (double)(NTIMES - 1); /* note -- skip first iteration */ avgtime[j] /= (double)(NTIMES - 1); /* note -- skip first iteration */
/* make sure no division by zero */ /* make sure no division by zero */
curGBs = (mintime[j] > 0.0 ? 1.0 / mintime[j] : -1.0); curGBs = (mintime[j] > 0.0 ? 1.0 / mintime[j] : -1.0);
curGBs *= 1e-9 * bytes[j] * array_elements; curGBs *= 1e-9 * bytes[j] * array_elements;
if (doIO) if (doIO)
fprintf( outFile, "%s%11.4f %11.4f %11.4f %11.4f\n", label[j], fprintf( outFile, "%s%11.4f %11.4f %11.4f %11.4f\n", label[j],
curGBs, curGBs,
avgtime[j], avgtime[j],
mintime[j], mintime[j],
maxtime[j]); maxtime[j]);
switch (j) { switch (j) {
case 0: *copyGBs = curGBs; break; case 0: *copyGBs = curGBs; break;
case 1: *scaleGBs = curGBs; break; case 1: *scaleGBs = curGBs; break;
case 2: *addGBs = curGBs; break; case 2: *addGBs = curGBs; break;
case 3: *triadGBs = curGBs; break; case 3: *triadGBs = curGBs; break;
} }
}
if (doIO) fprintf( outFile, HLINE);
} }
if (doIO)
fprintf( outFile, HLINE);
/* --- Every Rank Checks its Results --- */ /* --- Every Rank Checks its Results --- */
computeSTREAMerrors(&AvgError[0], &AvgError[1], &AvgError[2]); computeSTREAMerrors(&AvgError[0], &AvgError[1], &AvgError[2]);
/* --- Collect the Average Errors for Each Array on Rank 0 --- */ /* --- Collect the Average Errors for Each Array on Rank 0 --- */
MPI_Gather(AvgError, 3, MPI_DOUBLE, AvgErrByRank, 3, MPI_DOUBLE, 0, comm); MPI_Gather(AvgError, 3, MPI_DOUBLE, AvgErrByRank, 3, MPI_DOUBLE, 0, comm);
/* -- Combined averaged errors and report on Rank 0 only --- */ /* -- Combined averaged errors and report on Rank 0 only --- */
if (myrank == 0) { if (myrank == 0) {
checkSTREAMresults( outFile, doIO, AvgErrByRank, numranks, failure ); checkSTREAMresults( outFile, doIO, AvgErrByRank, numranks, failure );
if (doIO) fprintf( outFile, HLINE); if (doIO) fprintf( outFile, HLINE);
} }
HPCC_free(AvgErrByRank);
HPCC_free(c); HPCC_free(c);
HPCC_free(b); HPCC_free(b);
HPCC_free(a); HPCC_free(a);
if (doIO) { if (doIO) {
fflush( outFile ); fflush( outFile );
fclose( outFile ); fclose( outFile );
} }
return 0; return 0;
 End of changes. 12 change blocks. 
47 lines changed or deleted 29 lines changed or added

Home  |  About  |  All  |  Newest  |  Fossies Dox  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTPS