SGI GL matrix performance
James Price
jamie at archone.tamu.edu
Sat Apr 27 06:10:33 AEST 1991
Has anyone done any benchmarking of the SGI matrix functions? I was curious
and wrote the program included below. It does a number of 4x4 matrix
multiplies, first using software, and then using the geometry pipeline
functions (loadmatrix(), multmatrix(), getmatrix()).
Here are some typical results:
10000 iterations on fritz, with GL version: GL4DGT-3.3
Software - no optimization: 3.349 sec.
Software - some optimization: 1.130 sec.
Software - more optimization: 0.910 sec.
Hardware - preserve CTM: 2.379 sec.
Hardware - destroy CTM: 2.289 sec.
Hardware - abandon results: 0.580 sec.
The actual hardware multiplication is fast (0.580 sec/10000 multiplies)
but if we call getmatrix() to access the results, it slows things down
by around 400% (to 2.379 sec/10000 multiplies). I was hoping to use the
speed of the hardware for my own matrix needs, but it looks like the
getmatrix() call is simply too costly. Is there a better way?
Jim Price
jamie at archone.tamu.edu
Visualization Laboratory
Texas A&M University
/**************************************************************************/
/* */
/* matperf.c - SGI GL matrix performance checker */
/* */
/* to compile: cc -o matperf matperf.c -lgl_s -lm */
/* */
/* to run: matperf n */
/* where n = number of matrix multiplies to perform */
/* */
/**************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/param.h>
#include <gl.h>
typedef float MAT44[4][4];
void Print44(MAT44 *pMat);
void Identity(MAT44 *pMat);
double Duration(struct timeval *ptv1, struct timeval *ptv2);
void SoftMult44_1(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2);
void SoftMult44_2(float pResult[], MAT44 *pm1, MAT44 *pm2);
void SoftMult44_3(float pResult[], MAT44 *pm1, MAT44 *pm2);
void HardMult44_1(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2);
void HardMult44_2(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2);
void HardMult44_3(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2);
void main(int argc, char *argv[])
{
register long i;
long iter;
MAT44 m1, m2, result;
char hwver[13],hostname[MAXHOSTNAMELEN+1];
struct timeval tv1,tv2;
struct timezone tz;
if (argc != 2)
{
printf("Usage: matperf n\n");
return;
}
iter = atoi(argv[1]);
/* put in some numbers */
Identity(m1);
m1[0][1] = 1.0;
m1[0][2] = 1.0;
m1[0][3] = 1.0;
Identity(m2);
m2[0][0] = 5;
m2[1][1] = 6;
m2[2][2] = 7;
m2[3][0] = 10;
m2[3][1] = 20;
m2[3][2] = 30;
gethostname(hostname,MAXHOSTNAMELEN);
gversion(hwver);
/* winopen() necessary to use geometry pipeline */
prefposition(500,600,500,600);
noport();
winopen("perf");
/* give window processes a chance to get up and running */
sleep(5);
printf("\n%ld iterations on %s, with GL version: %s\n",iter,hostname,hwver);
gettimeofday(&tv1,&tz);
for (i=0; i<iter; i++)
SoftMult44_1(result,m1,m2);
gettimeofday(&tv2,&tz);
printf("\nSoftware - no optimization: %7.3f sec.\n",Duration(&tv1,&tv2));
gettimeofday(&tv1,&tz);
for (i=0; i<iter; i++)
SoftMult44_2(result,m1,m2);
gettimeofday(&tv2,&tz);
printf("\nSoftware - some optimization: %7.3f sec.\n",Duration(&tv1,&tv2));
gettimeofday(&tv1,&tz);
for (i=0; i<iter; i++)
SoftMult44_3(result,m1,m2);
gettimeofday(&tv2,&tz);
printf("\nSoftware - more optimization: %7.3f sec.\n",Duration(&tv1,&tv2));
gettimeofday(&tv1,&tz);
for (i=0; i<iter; i++)
HardMult44_1(result,m1,m2);
gettimeofday(&tv2,&tz);
printf("\nHardware - preserve CTM: %7.3f sec.\n",Duration(&tv1,&tv2));
gettimeofday(&tv1,&tz);
for (i=0; i<iter; i++)
HardMult44_2(result,m1,m2);
gettimeofday(&tv2,&tz);
printf("\nHardware - destroy CTM: %7.3f sec.\n",Duration(&tv1,&tv2));
gettimeofday(&tv1,&tz);
for (i=0; i<iter; i++)
HardMult44_3(result,m1,m2);
gettimeofday(&tv2,&tz);
printf("\nHardware - abandon results: %7.3f sec.\n",Duration(&tv1,&tv2));
printf("\nDone.");
}
/* convert gettimeofday() values to real number */
double Duration(struct timeval *ptv1, struct timeval *ptv2)
{
return (((double)ptv2->tv_sec + (double)ptv2->tv_usec / 1000000.0) -
((double)ptv1->tv_sec + (double)ptv1->tv_usec / 1000000.0));
}
/* 4x4 no optimization */
void SoftMult44_1(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2)
{
int i,j,k;
for (i=0; i<4; i++)
for (j=0; j<4; j++)
{
(*pResult)[i][j] = 0.0;
for (k=0; k<4; k++)
(*pResult)[i][j] += (*pm1)[i][k]*(*pm2)[k][j];
}
}
/* 4x4 some optimization */
void SoftMult44_2(float pResult[], MAT44 *pm1, MAT44 *pm2)
{
register int i,j;
for (i=0; i<4; i++)
for (j=0; j<4; j++)
{
*pResult = (*pm1)[i][0]*(*pm2)[0][j] +
(*pm1)[i][1]*(*pm2)[1][j] +
(*pm1)[i][2]*(*pm2)[2][j] +
(*pm1)[i][3]*(*pm2)[3][j];
pResult++;
}
}
/* 4x4 more optimization */
void SoftMult44_3(float pResult[], MAT44 *pm1, MAT44 *pm2)
{
register int i;
for (i=0; i<4; i++)
{
*pResult = (*pm1)[i][0]*(*pm2)[0][0] +
(*pm1)[i][1]*(*pm2)[1][0] +
(*pm1)[i][2]*(*pm2)[2][0] +
(*pm1)[i][3]*(*pm2)[3][0];
pResult++;
*pResult = (*pm1)[i][0]*(*pm2)[0][1] +
(*pm1)[i][1]*(*pm2)[1][1] +
(*pm1)[i][2]*(*pm2)[2][1] +
(*pm1)[i][3]*(*pm2)[3][1];
pResult++;
*pResult = (*pm1)[i][0]*(*pm2)[0][2] +
(*pm1)[i][1]*(*pm2)[1][2] +
(*pm1)[i][2]*(*pm2)[2][2] +
(*pm1)[i][3]*(*pm2)[3][2];
pResult++;
*pResult = (*pm1)[i][0]*(*pm2)[0][3] +
(*pm1)[i][1]*(*pm2)[1][3] +
(*pm1)[i][2]*(*pm2)[2][3] +
(*pm1)[i][3]*(*pm2)[3][3];
pResult++;
}
}
/* preserve CTM */
void HardMult44_1(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2)
{
pushmatrix();
loadmatrix(pm2);
multmatrix(pm1);
getmatrix(pResult);
popmatrix();
}
/* destroy CTM */
void HardMult44_2(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2)
{
loadmatrix(pm2);
multmatrix(pm1);
getmatrix(pResult);
}
/* preserve CTM, abandon results */
void HardMult44_3(MAT44 *pResult, MAT44 *pm1, MAT44 *pm2)
{
pushmatrix();
loadmatrix(pm2);
multmatrix(pm1);
popmatrix();
}
void Print44(MAT44 *pMat)
{
int i,j;
for (i=0; i<4; i++)
{
printf("\n");
for (j=0; j<4; j++)
printf("%5.3f ",(*pMat)[i][j]);
}
}
void Identity(MAT44 *pMat)
{
int i,j;
for (i=0; i<4; i++)
for (j=0; j<4; j++)
(*pMat)[i][j] = (i == j) ? (1.0) : (0.0);
}
More information about the Comp.sys.sgi
mailing list