count of bits in a long
David Moews
moews at math.berkeley.edu
Wed Oct 17 17:45:48 AEST 1990
I took Gene Olson's test program and stuck Torek's routines into it, as well
as adding routines to count bits by using a table with a 16-bit index, and
a slightly sped-up version of HAKMEM 169 (`hackmemallfold'). The 16-bit
table lookup appears to be fastest, at least on a Sun-3/50 and a
Sparcstation 1. `hackmemallfold' and `sumbits' both use a total of 16
arithmetic, logical, and shift operations. However, if gcc is used
`hackmemallfold' is apparently faster (because gcc refuses to put constants
in registers?)
name technique used
---- --------------
hackmemmod HACKMEM 169, using % operator
hackmemloop HACKMEM 169, using loop to implement %
hackmemunrolled HACKMEM 169, with 5-step % (loop unrolled)
hackmemallfold HACKMEM 169, no %, fold to the bitter end
rmlsbsub remove lsb with `n -= (n & -n)'
rmlsbmask remove lsb with `n &= n - 1'
testlsb test n&1, then n>>=1
testmsb test n&MSB, then n+=n (rather than n<<=1)
testsignandshift test n<0, then n<<=1
testeachbit test n&mask, then mask+=mask
testeachbit1shl test n&(1<<bit) for bit in [0..32)
tableshift nbits[n>>24] + nbits[(n>>16)&255] ...
tableuchar set p=&n, nbits[p[0]] + nbits[p[1]] ...
tableshiftcast nbits[n>>24] + nbits[(u_char)(n>>16)] ...
itableshift same as tableshift, but table datatype is int
itableuchar ditto
itableshiftcast ditto (note, nbits table is `char')
16tableshift nbits16[n>>16] + nbits16[n&65535]
16tableushort set p=&n, nbits16[p[0]] + nbits16[p[1]]
16tableshiftcast nbits16[n>>16] + nbits16[(u_short)(n)]
16itableshift same as 16tableshift, but table datatype is int
16itableushort ditto
16itableshiftcast ditto
sumbits Gene Olson's function (see code for comments)
Sun-3/50, SunOS 4.1, cc -O
function time ratio
hackmemmod 1.0166168e-05 1.931
hackmemloop 1.2187958e-05 2.315
hackmemunrolled 1.3732910e-05 2.609
hackmemallfold 8.2588196e-06 1.569
rmlsbsub 1.9512177e-05 3.707
rmlsbmask 1.9512177e-05 3.707
testlsb 4.6901703e-05 8.909
testmsb 4.8561096e-05 9.225
testsignandshift 4.0779114e-05 7.746
testeachbit 4.6749115e-05 8.880
testeachbit1shl 6.6032410e-05 12.543
tableshift 1.6460419e-05 3.127
tableuchar 1.3256073e-05 2.518
tableshiftcast 1.1730194e-05 2.228
itableshift 1.6918182e-05 3.214
itableuchar 9.5176697e-06 1.808
itableshiftcast 1.2359619e-05 2.348
16tableshift 1.0337830e-05 1.964
16tableushort 6.0081482e-06 1.141
16tableshiftcast 6.1416626e-06 1.167
16itableshift 5.2642822e-06 1.000
16itableushort 6.6184998e-06 1.257
16itableshiftcast 1.1196136e-05 2.127
sumbits 7.1525574e-06 1.359
Sun-3/50, SunOS 4.1, gcc -O, ver. 1.37.1
function time ratio
hackmemmod 1.0070801e-05 3.259
hackmemloop 1.1653900e-05 3.772
hackmemunrolled 1.0719299e-05 3.469
hackmemallfold 7.9727173e-06 2.580
rmlsbsub 1.9207001e-05 6.216
rmlsbmask 1.9226074e-05 6.222
testlsb 4.4364929e-05 14.358
testmsb 3.7155151e-05 12.025
testsignandshift 4.1427612e-05 13.407
testeachbit 4.8522949e-05 15.704
testeachbit1shl 5.4588318e-05 17.667
tableshift 1.1863708e-05 3.840
tableuchar 8.7928772e-06 2.846
tableshiftcast 1.6403198e-05 5.309
itableshift 9.8037720e-06 3.173
itableuchar 6.3896179e-06 2.068
itableshiftcast 1.3904572e-05 4.500
16tableshift 5.7983398e-06 1.877
16tableushort 4.4250488e-06 1.432
16tableshiftcast 5.3215027e-06 1.722
16itableshift 4.9591064e-06 1.605
16itableushort 3.0899048e-06 1.000
16itableshiftcast 9.1934204e-06 2.975
sumbits 9.6511841e-06 3.123
SparcStation 1, SunOS 4.0.3c, cc -O
function time ratio
hackmemmod 1.4197826e-05 18.551
hackmemloop 2.2125244e-06 2.891
hackmemunrolled 1.4662743e-06 1.916
hackmemallfold 1.0657310e-06 1.393
rmlsbsub 5.1355362e-06 6.710
rmlsbmask 4.2915344e-06 5.607
testlsb 1.0557175e-05 13.794
testmsb 1.0647774e-05 13.913
testsignandshift 1.0533333e-05 13.763
testeachbit 1.0833740e-05 14.156
testeachbit1shl 1.3933182e-05 18.206
tableshift 9.0837479e-07 1.187
tableuchar 1.3136864e-06 1.717
tableshiftcast 9.1314316e-07 1.193
itableshift 1.1110306e-06 1.452
itableuchar 1.5211105e-06 1.988
itableshiftcast 1.1157990e-06 1.458
16tableshift 8.2015991e-07 1.072
16tableushort 1.1849403e-06 1.548
16tableshiftcast 7.6532364e-07 1.000
16itableshift 1.6880035e-06 2.206
16itableushort 2.0956993e-06 2.738
16itableshiftcast 1.6403198e-06 2.143
sumbits 1.0561943e-06 1.380
SparcStation 1, SunOS 4.0.3c, gcc -O, ver. 1.37
function time ratio
hackmemmod 1.1713505e-05 14.241
hackmemloop 2.4437904e-06 2.971
hackmemunrolled 1.4662743e-06 1.783
hackmemallfold 1.0633469e-06 1.293
rmlsbsub 5.1999092e-06 6.322
rmlsbmask 4.3869019e-06 5.333
testlsb 1.2927055e-05 15.716
testmsb 1.6040802e-05 19.501
testsignandshift 1.2907982e-05 15.693
testeachbit 1.1508465e-05 13.991
testeachbit1shl 1.4796257e-05 17.988
tableshift 1.0609627e-06 1.290
tableuchar 1.5187263e-06 1.846
tableshiftcast 1.0633469e-06 1.293
itableshift 1.2660027e-06 1.539
itableuchar 1.7237663e-06 2.096
itableshiftcast 1.2612343e-06 1.533
16tableshift 8.7499619e-07 1.064
16tableushort 1.1301041e-06 1.374
16tableshiftcast 8.2254410e-07 1.000
16itableshift 1.7356873e-06 2.110
16itableushort 1.9979477e-06 2.429
16itableshiftcast 1.6951561e-06 2.061
sumbits 1.3136864e-06 1.597
--
David Moews moews at math.berkeley.edu
-----------------------------cut here for program-------------------------------
#include <stdio.h>
#include <sys/types.h>
#ifdef FD_SETSIZE
# define USE_GETRUSAGE
# include <sys/time.h>
# include <sys/resource.h>
#else
# include <sys/times.h>
#endif
#define SIZEOF(a) (sizeof(a)/sizeof(a[0]))
#define NUMRAND 16384
unsigned long rr[NUMRAND];
/*
* This function is used to calibrate the timing mechanism.
* This way we can subtract the loop and call overheads.
*/
int
calibrate(n)
register unsigned long n;
{
return 0;
}
/*
* This function counts the number of bits in a long.
* It is limited to 63 bit longs, but a minor mod can cope with 511 bits.
*
* It is so magic, an explanation is required:
* Consider a 3 bit number as being
* 4a+2b+c
* if we shift it right 1 bit, we have
* 2a+b
* subtracting this from the original gives
* 2a+b+c
* if we shift the original 2 bits right we get
* a
* and so with another subtraction we have
* a+b+c
* which is the number of bits in the original number.
* Suitable masking allows the sums of the octal digits in a
* 32 bit number to appear in each octal digit. This isn't much help
* unless we can get all of them summed together.
* This can be done by modulo arithmetic (sum the digits in a number by molulo
* the base of the number minus one) the old "casting out nines" trick they
* taught in school before calculators were invented.
* Now, using mod 7 wont help us, because our number will very likely have
* more than 7 bits set. So add the octal digits together to get base64
* digits, and use modulo 63.
* (Those of you with 64 bit machines need to add 3 octal digits together to
* get base512 digits, and use mod 511.)
*
* This is HACKMEM 169, as used in X11 sources.
*/
int
t0_hackmemmod(n)
register unsigned long n;
{
register unsigned long tmp;
tmp = n - ((n >> 1) & 033333333333) - ((n >> 2) & 011111111111);
return ((tmp + (tmp >> 3)) & 030707070707) % 63;
}
/*
* This is the same as the above, but does not use the % operator.
* Most modern machines have clockless division, and so the modulo is as
* expensive as, say, [sic] an addition.
*/
int
t1_hackmemloop(n)
register unsigned long n;
{
register unsigned long tmp;
tmp = n - ((n >> 1) & 033333333333) - ((n >> 2) & 011111111111);
tmp = (tmp + (tmp >> 3)) & 030707070707;
while (tmp > 63)
tmp = (tmp & 63) + (tmp >> 6);
return tmp;
}
/*
* Above, without using while loop. It takes at most 5 iterations of the
* loop, so we just do all 5 in-line. The final result is never 63
* (this is assumed above as well).
*/
int
t2_hackmemunrolled(n)
register unsigned long n;
{
register unsigned long tmp;
tmp = n - ((n >> 1) & 033333333333) - ((n >> 2) & 011111111111);
tmp = (tmp + (tmp >> 3)) & 030707070707;
tmp = (tmp & 63) + (tmp >> 6);
tmp = (tmp & 63) + (tmp >> 6);
tmp = (tmp & 63) + (tmp >> 6);
tmp = (tmp & 63) + (tmp >> 6);
tmp = (tmp & 63) + (tmp >> 6);
return (tmp);
}
/*
* As above, but no modulus nor simulation thereof. Instead,
* we continue shifting and masking to the bitter end.
*/
int
t3_hackmemallfold(n)
register unsigned long n;
{
register unsigned long tmp;
tmp = n - ((n >> 1) & 033333333333) - ((n >> 2) & 011111111111);
tmp = (tmp + (tmp >> 3)) & 030707070707;
tmp += tmp >> 6;
return ((tmp + (tmp >> 12) + (tmp >> 24)) & 077);
}
/*
* This function counts the bits in a long.
*
* It removes the lsb and counting the number of times round the loop.
* The expression (n & -n) yields the lsb of a number,
* but it only works on 2's compliment machines.
*/
int
t4_rmlsbsub(n)
register unsigned long n;
{
register int count;
for (count = 0; n; n -= (n & -n))
count++;
return count;
}
int
t5_rmlsbmask(n)
register unsigned long n;
{
register int count;
for (count = 0; n; count++)
n &= n - 1; /* take away lsb */
return (count);
}
/*
* This function counts the bits in a long.
*
* It works by shifting the number down and testing the bottom bit.
*/
int
t6_testlsb(n)
register unsigned long n;
{
register int count;
for (count = 0; n; n >>= 1)
if (n & 1)
count++;
return count;
}
/*
* This function counts the bits in a long.
*
* It works by shifting the number left and testing the top bit.
* On many machines shift is expensive, so it uses a cheap addition instead.
*/
int
t7_testmsb(n)
register unsigned long n;
{
register int count;
for (count = 0; n; n += n)
if (n & ~(~(unsigned long)0 >> 1))
count++;
return count;
}
int
t8_testsignandshift(n)
register unsigned long n;
{
register int count;
for (count = 0; n; n <<= 1)
if ((long)n < 0)
count++;
return (count);
}
/*
* This function counts the bits in a long.
*
* It works by masking each bit.
* This is the second most intuitively obvious method,
* and is independent of the number of bits in the long.
*/
int
t9_testeachbit(n)
register unsigned long n;
{
register int count;
register unsigned long mask;
count = 0;
for (mask = 1; mask; mask += mask)
if (n & mask)
count++;
return count;
}
/*
* This function counts the bits in a long.
*
* It works by masking each bit.
* This is the most intuitively obvious method,
* but how do you a priori know how many bits in the long?
* (except for ''sizeof(long) * CHAR_BITS'' expression)
*/
int
tA_testeachbit1shl(n)
register unsigned long n;
{
register int count;
register int bit;
count = 0;
for (bit = 0; bit < 32; ++bit)
if (n & ((unsigned long)1 << bit))
count++;
return count;
}
static char nbits[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
};
static int inbits[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
};
int
tB_tableshift(n)
register unsigned long n;
{
return (nbits[n & 0xff] + nbits[(n >> 8) & 0xff] +
nbits[(n >> 16) & 0xff] + nbits[n >> 24]);
}
int
tC_tableuchar(n)
unsigned long n;
{
register unsigned char *p = (unsigned char *)&n;
return (nbits[p[0]] + nbits[p[1]] + nbits[p[2]] + nbits[p[3]]);
}
int
tD_tableshiftcast(n)
register unsigned long n;
{
return nbits[(unsigned char) n] +
nbits[(unsigned char) (n >> 8)] +
nbits[(unsigned char) (n >> 16)] +
nbits[n >> 24];
}
int
tE_itableshift(n)
register unsigned long n;
{
return (inbits[n & 0xff] + inbits[(n >> 8) & 0xff] +
inbits[(n >> 16) & 0xff] + inbits[n >> 24]);
}
int
tF_itableuchar(n)
unsigned long n;
{
register unsigned char *p = (unsigned char *)&n;
return (inbits[p[0]] + inbits[p[1]] + inbits[p[2]] + inbits[p[3]]);
}
int
tG_itableshiftcast(n)
register unsigned long n;
{
return inbits[(unsigned char) n] +
inbits[(unsigned char) (n >> 8)] +
inbits[(unsigned char) (n >> 16)] +
inbits[n >> 24];
}
static char nbits16[65536];
static int inbits16[65536];
int
tH_16tableshift(n)
register unsigned long n;
{
return (nbits16[n & 0xffff] + nbits16[n >> 16]);
}
int
tI_16tableushort(n)
unsigned long n;
{
register unsigned short *p = (unsigned short *)&n;
return (nbits16[p[0]] + nbits16[p[1]]);
}
int
tJ_16tableshiftcast(n)
register unsigned long n;
{
return nbits16[(unsigned short) n] +
nbits16[n >> 16];
}
int
tK_16itableshift(n)
register unsigned long n;
{
return (inbits16[n & 0xffff] + inbits16[n >> 16]);
}
int
tL_16itableushort(n)
unsigned long n;
{
register unsigned short *p = (unsigned short *)&n;
return (inbits16[p[0]] + inbits16[p[1]]);
}
int
tM_16itableshiftcast(n)
register unsigned long n;
{
return inbits16[(unsigned short) n] +
inbits16[n >> 16];
}
/*
* Explanation:
* First we add 32 1-bit fields to get 16 2-bit fields.
* Each 2-bit field is one of 00, 01, or 10 (binary).
* We then add all the two-bit fields to get 8 4-bit fields.
* These are all one of 0000, 0001, 0010, 0011, or 0100.
*
* Now we can do something different, becuase for the first
* time the value in each k-bit field (k now being 4) is small
* enough that adding two k-bit fields results in a value that
* still fits in the k-bit field. The result is four 4-bit
* fields containing one of {0000,0001,...,0111,1000} and four
* more 4-bit fields containing junk (sums that are uninteresting).
* Pictorially:
* n = 0aaa0bbb0ccc0ddd0eee0fff0ggg0hhh
* n>>4 = 00000aaa0bbb0ccc0ddd0eee0fff0ggg
* sum = 0aaaWWWWiiiiXXXXjjjjYYYYkkkkZZZZ
* where W, X, Y, and Z are the interesting sums (each at most 1000,
* or 8 decimal). Masking with 0x0f0f0f0f extracts these.
*
* Now we can change tactics yet again, because now we have:
* n = 0000WWWW0000XXXX0000YYYY0000ZZZZ
* n>>8 = 000000000000WWWW0000XXXX0000YYYY
* so sum = 0000WWWW000ppppp000qqqqq000rrrrr
* where p and r are the interesting sums (and each is at most
* 10000, or 16 decimal). The sum `q' is junk, like i, j, and
* k above; but it is not necessarry to discard it this time.
* One more fold, this time by sixteen bits, gives
* n = 0000WWWW000ppppp000qqqqq000rrrrr
* n>>16 = 00000000000000000000WWWW000ppppp
* so sum = 0000WWWW000ppppp000sssss00tttttt
* where s is at most 11000 and t is it most 100000 (32 decimal).
*
* Now we have t = r+p = (Z+Y)+(X+W) = ((h+g)+(f+e))+((d+c)+(b+a)),
* or in other words, t is the number of bits set in the original
* 32-bit longword. So all we have to do is return the low byte
* (or low 6 bits, but `low byte' is typically just as easy if not
* easier).
*
* This technique is also applicable to 64 and 128 bit words, but
* 256 bit or larger word sizes require at least one more masking
* step.
*/
int
tN_sumbits(n)
register unsigned long n;
{
n = (n & 0x55555555) + ((n >> 1) & 0x55555555);
n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
n = (n + (n >> 4)) & 0x0f0f0f0f;
n += n >> 8;
n += n >> 16;
return (n & 0xff);
}
/*
* build a long random number.
* The standard rand() returns at least a 15 bit number.
* We use the top 9 of 15 (since the lower N bits of the usual rand()
* repeat with a period of 2^N).
*/
unsigned long
bigrand()
{
#define randbits() ((unsigned long)((rand() >> 6) & 0777))
register int r;
r = randbits() << 27;
r |= randbits() << 18;
r |= randbits() << 9;
r |= randbits();
return(r) ;
}
/*
* Run the test many times.
* You will need a running time in the 10s of seconds
* for an accurate result.
*
* To give a fair comparison, the random number generator
* is seeded the same for each measurement.
*
* Return value is seconds per iteration.
*/
#if defined(mips) || defined(sparc)
#define REPEAT (1L<<22)
#else
#define REPEAT (1L<<20)
#endif
double
measure(func)
register int (*func)();
{
#ifdef USE_GETRUSAGE
struct rusage ru0, ru1;
register long j;
(void) getrusage(RUSAGE_SELF, &ru0);
for (j = 0; j < REPEAT; ++j)
func(rr[j & (NUMRAND-1)]);
(void) getrusage(RUSAGE_SELF, &ru1);
ru1.ru_utime.tv_sec -= ru0.ru_utime.tv_sec;
if ((ru1.ru_utime.tv_usec -= ru0.ru_utime.tv_usec) < 0) {
ru1.ru_utime.tv_usec += 1000000;
ru1.ru_utime.tv_sec--;
}
return ((ru1.ru_utime.tv_sec + (ru1.ru_utime.tv_usec / 1000000.0)) /
(double)REPEAT);
#else
register long j;
struct tms start;
struct tms finish;
times(&start);
for (j = 0; j < REPEAT; ++j)
func(rr[j & (NUMRAND-1)]);
times(&finish);
return ((finish.tms_utime - start.tms_utime) / (double)REPEAT);
#endif
}
struct table {
char *name;
int (*func)();
double measurement;
int trial;
};
struct table table[] =
{
{ "hackmemmod", t0_hackmemmod },
{ "hackmemloop", t1_hackmemloop },
{ "hackmemunrolled", t2_hackmemunrolled },
{ "hackmemallfold", t3_hackmemallfold },
{ "rmlsbsub", t4_rmlsbsub },
{ "rmlsbmask", t5_rmlsbmask },
{ "testlsb", t6_testlsb },
{ "testmsb", t7_testmsb },
{ "testsignandshift", t8_testsignandshift },
{ "testeachbit", t9_testeachbit },
{ "testeachbit1shl", tA_testeachbit1shl },
{ "tableshift", tB_tableshift },
{ "tableuchar", tC_tableuchar },
{ "tableshiftcast", tD_tableshiftcast },
{ "itableshift", tE_itableshift },
{ "itableuchar", tF_itableuchar },
{ "itableshiftcast", tG_itableshiftcast },
{ "16tableshift", tH_16tableshift },
{ "16tableushort", tI_16tableushort },
{ "16tableshiftcast", tJ_16tableshiftcast },
{ "16itableshift", tK_16itableshift },
{ "16itableushort", tL_16itableushort },
{ "16itableshiftcast", tM_16itableshiftcast },
{ "sumbits", tN_sumbits }
};
main(argc, argv)
int argc;
char **argv;
{
double calibration, v, best;
int j, k, m, verbose;
/* Initialize the 16-bit lookup table. */
for (j = 0; j < 65536; ++j) {
nbits16[j] = inbits16[j] = tN_sumbits((unsigned long)j);
}
verbose = argc > 1;
/*
* run a few tests to make sure they all agree
*/
srand(getpid());
for (j = 0; j < 10; ++j) {
unsigned long n;
int bad;
n = bigrand();
for (k = 0; k < SIZEOF(table); ++k)
table[k].trial = table[k].func(n);
bad = 0;
for (k = 0; k < SIZEOF(table); ++k)
for (m = 0; m < SIZEOF(table); ++m)
if (table[k].trial != table[m].trial)
bad = 1;
if (bad) {
printf("wrong: %08lX", n);
for (k = 0; k < SIZEOF(table); ++k)
printf(" %3d", table[k].trial);
printf("\n");
}
}
for (k = 0 ; k < NUMRAND ; k++) rr[k] = bigrand() ;
/*
* calibrate the timing mechanism
*/
calibration = measure(calibrate);
if (verbose)
printf("calibration: %g\n", calibration);
/*
* time them all, keeping track of the best (smallest)
*/
for (j = 0; j < SIZEOF(table); ++j) {
v = measure(table[j].func) - calibration;
if (verbose)
printf("%s: %g\n", table[j].name, v);
table[j].measurement = v;
if (!j || v < best)
best = v;
}
(void) printf("%-24s %-14sratio\n", "function", "time");
for (j = 0; j < SIZEOF(table); ++j) {
(void) printf("%-24s %#10.8g %6.3f\n",
table[j].name,
table[j].measurement,
table[j].measurement / best);
}
exit(0);
}
More information about the Comp.lang.c
mailing list