/usr/share/doc/papi-examples/ctests/matrix-hl.c is in papi-examples 5.4.3-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | /****************************************************************************
*C
*C matrix-hl.f
*C An example of matrix-matrix multiplication and using PAPI high level
*C to look at the performance. written by Kevin London
*C March 2000
*C Added to c tests to check stop
*C****************************************************************************
*/
#include "papi_test.h"
#include <stdlib.h>
int
main( int argc, char **argv )
{
#define NROWS1 175
#define NCOLS1 225
#define NROWS2 NCOLS1
#define NCOLS2 150
double p[NROWS1][NCOLS1], q[NROWS2][NCOLS2], r[NROWS1][NCOLS2];
int i, j, k, num_events, retval;
/* PAPI standardized event to be monitored */
int event[2];
/* PAPI values of the counters */
long long values[2], tmp;
extern int TESTS_QUIET;
tests_quiet( argc, argv );
/* Setup default values */
num_events = 0;
/* See how many hardware events at one time are supported
* This also initializes the PAPI library */
num_events = PAPI_num_counters( );
if ( num_events < 2 ) {
printf( "This example program requries the architecture to "
"support 2 simultaneous hardware events...shutting down.\n" );
test_skip( __FILE__, __LINE__, "PAPI_num_counters", 1 );
}
if ( !TESTS_QUIET )
printf( "Number of hardware counters supported: %d\n", num_events );
if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK )
event[0] = PAPI_FP_OPS;
else if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK )
event[0] = PAPI_FP_INS;
else
event[0] = PAPI_TOT_INS;
/* Time used */
event[1] = PAPI_TOT_CYC;
/* matrix 1: read in the matrix values */
for ( i = 0; i < NROWS1; i++ )
for ( j = 0; j < NCOLS1; j++ )
p[i][j] = i * j * 1.0;
for ( i = 0; i < NROWS2; i++ )
for ( j = 0; j < NCOLS2; j++ )
q[i][j] = i * j * 1.0;
for ( i = 0; i < NROWS1; i++ )
for ( j = 0; j < NCOLS2; j++ )
r[i][j] = i * j * 1.0;
/* Set up the counters */
num_events = 2;
retval = PAPI_start_counters( event, num_events );
if ( retval != PAPI_OK )
test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval );
/* Clear the counter values */
retval = PAPI_read_counters( values, num_events );
if ( retval != PAPI_OK )
test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval );
/* Compute the matrix-matrix multiplication */
for ( i = 0; i < NROWS1; i++ )
for ( j = 0; j < NCOLS2; j++ )
for ( k = 0; k < NCOLS1; k++ )
r[i][j] = r[i][j] + p[i][k] * q[k][j];
/* Stop the counters and put the results in the array values */
retval = PAPI_stop_counters( values, num_events );
if ( retval != PAPI_OK )
test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval );
/* Make sure the compiler does not optimize away the multiplication
* with dummy(r);
*/
dummy( r );
if ( !TESTS_QUIET ) {
if ( event[0] == PAPI_TOT_INS ) {
printf( TAB1, "TOT Instructions:", values[0] );
} else {
printf( TAB1, "FP Instructions:", values[0] );
}
printf( TAB1, "Cycles:", values[1] );
}
/*
* Intel Core overreports flops by 50% when using -O
* Use -O2 or -O3 to produce the expected # of flops
*/
if ( event[0] == PAPI_FP_INS ) {
/* Compare measured FLOPS to expected value */
tmp =
2 * ( long long ) ( NROWS1 ) * ( long long ) ( NCOLS2 ) *
( long long ) ( NCOLS1 );
if ( abs( ( int ) values[0] - ( int ) tmp ) > ( double ) tmp * 0.05 ) {
/* Maybe we are counting FMAs? */
tmp = tmp / 2;
if ( abs( ( int ) values[0] - ( int ) tmp ) >
( double ) tmp * 0.05 ) {
printf( "\n" TAB1, "Expected operation count: ", 2 * tmp );
printf( TAB1, "Or possibly (using FMA): ", tmp );
printf( TAB1, "Instead I got: ", values[0] );
test_fail( __FILE__, __LINE__,
"Unexpected FLOP count (check vector operations)",
1 );
}
}
}
test_pass( __FILE__, 0, 0 );
return ( PAPI_EMISC );
}
|