count of bits in a long
Peter Miller
peter at neccan.oz
Fri Oct 5 12:17:33 AEST 1990
/*
* A while back I posted a solution to the "count of bits set in a
* long" question, suggesting that HACKMEM 169 was a very fast way to do it.
* This program demonstrates the technique.
*
* The HACKMEM 169 method uses a modulo to sum the digits.
* A number of people have asked me "how can a division be faster than a loop?"
* This program demonstrates how. The modulo is a special case,
* and can be unwound into a tight loop if your machine does not
* have a fast divide opcode (although many modern machines have a 1 cycle
* divide opcode, except for we-really-believe-our-press-hype risc
* manufacturers).
*
* This program was run on a 386/20 under unix with the following results:
* Func sec/iter scaled
* nbits 0.00018310547 1.000 hackmem 169
* nbits1 0.0006980896 3.812 hackmem 169 no % operator
* nbits2 0.0016822815 9.188 count lsb's loop
* nbits3 0.0037384033 20.417
* nbits4 0.0034370422 18.771
* nbits5 0.0037765503 20.625
* nbits6 0.0035247803 19.250
*/
#include <stdio.h>
#include <sys/types.h>
#include <sys/times.h>
#define SIZEOF(a) (sizeof(a)/sizeof(a[0]))
/*
* This function is used to calibrate the timing mechanism.
* This way we can subtract the loop and call overheads.
*/
int
calibrate(n)
register unsigned long n;
{
return 0;
}
/*
* This function counts the number of bits in a long.
* It is limited to 63 bit longs, but a minor mod can cope with 511 bits.
*
* It is so magic, an explanation is required:
* Consider a 3 bit number as being
* 4a+2b+c
* if we shift it right 1 bit, we have
* 2a+b
* subtracting this from the original gives
* 2a+b+c
* if we shift the original 2 bits right we get
* a
* and so with another subtraction we have
* a+b+c
* which is the number of bits in the original number.
* Suitable masking allows the sums of the octal digits in a
* 32 bit bumber to appear in each octal digit. This isn't much help
* unless we can get all of them summed together.
* This can be done by modulo arithmetic (sum the digits in a number by molulo
* the base of the number minus one) the old "cating out nines" trick they
* taught in school before calculators were invented.
* Now, using mod 7 wont help us, because our number will very likely have
* more than 7 bits set. So add the octal digits together to get base64
* digits, and use modulo 63.
* (Those of you with 64 bit machines need to add 3 octal digits together to
* get base512 digits, and use mod 511.)
*
* This is HACKMEM 169, as used in X11 sources.
*/
int
nbits(n)
register unsigned long n;
{
register unsigned long tmp;
tmp = n - ((n >> 1) & 033333333333) - ((n >> 2) & 011111111111);
return ((tmp + (tmp >> 3)) & 030707070707) % 63;
}
/*
* This is the same as the above, but does not use the % operator.
* Most modern machines have clockless division, and so the modulo is as
* expensive as, say, an addition.
*/
int
nbits1(n)
register unsigned long n;
{
register unsigned long tmp;
tmp = n - ((n >> 1) & 033333333333) - ((n >> 2) & 011111111111);
tmp = (tmp + (tmp >> 3)) & 030707070707;
while (tmp > 63)
tmp = (tmp & 63) + (tmp >> 6);
return tmp;
}
/*
* This function counts the bits in a long.
*
* It removes the lsb and counting the number of times round the loop.
* The expression (n & -n) yields the lsb of a number,
* but it only works on 2's compliment machines.
*/
int
nbits2(n)
register unsigned long n;
{
register int count;
for (count = 0; n; n -= (n & -n))
count++;
return count;
}
/*
* This function counts the bits in a long.
*
* It works by shifting the number left and testing the top bit.
* On many machines shift is expensive, so it uses a cheap addition instead.
*/
int
nbits3(n)
register unsigned long n;
{
register int count;
for (count = 0; n; n += n)
if (n & ~(~(unsigned long)0 >> 1))
count++;
return count;
}
/*
* This function counts the bits in a long.
*
* It works by shifting the number down and testing the bottom bit.
*/
int
nbits4(n)
register unsigned long n;
{
register int count;
for (count = 0; n; n >>= 1)
if (n & 1)
count++;
return count;
}
/*
* This function counts the bits in a long.
*
* It works by masking each bit.
* This is the second most intuitively obvious method,
* and is independent of the number of bits in the long.
*/
int
nbits5(n)
register unsigned long n;
{
register int count;
register unsigned long mask;
count = 0;
for (mask = 1; mask; mask += mask)
if (n & mask)
count++;
return count;
}
/*
* This function counts the bits in a long.
*
* It works by masking each bit.
* This is the most intuitively obvious method,
* but how do you a priori know how many bits in the long?
* (except for ''sizeof(long) * CHAR_BITS'' expression)
*/
int
nbits6(n)
register unsigned long n;
{
register int count;
register int bit;
count = 0;
for (bit = 0; bit < 32; ++bit)
if (n & ((unsigned long)1 << bit))
count++;
return count;
}
/*
* build a long random number.
* The standard rand() returns a 15bit number.
*/
unsigned long
bigrand()
{
return
(
((unsigned long)(rand() & 0xFFF) << 24)
|
((unsigned long)(rand() & 0xFFF) << 12)
|
(unsigned long)(rand() & 0xFFF)
);
}
/*
* Run the test many times.
* You will need a running time in the 10s of seconds
* for an accurate result.
*
* To give a fair comparison, the random number generator
* is seeded the same for each measurement.
*
* Return value is seconds per iteration.
*/
#define REPEAT (1L<<18)
double
measure(func)
int (*func)();
{
long j;
struct tms start;
struct tms finish;
srand(1);
times(&start);
for (j = 0; j < REPEAT; ++j)
func(bigrand());
times(&finish);
return ((finish.tms_utime - start.tms_utime) / (double)REPEAT);
}
struct table
{
char *name;
int (*func)();
double measurement;
int trial;
};
struct table table[] =
{
{ "nbits", nbits },
{ "nbits1", nbits1 },
{ "nbits2", nbits2 },
{ "nbits3", nbits3 },
{ "nbits4", nbits4 },
{ "nbits5", nbits3 },
{ "nbits6", nbits4 },
};
main()
{
double calibration;
double best;
int j, k, m;
/*
* run a few tests to make sure they all agree
*/
srand(getpid());
for (j = 0; j < 10; ++j)
{
unsigned long n;
int bad;
n = bigrand();
for (k = 0; k < SIZEOF(table); ++k)
table[k].trial = table[k].func(n);
bad = 0;
for (k = 0; k < SIZEOF(table); ++k)
for (m = 0; m < SIZEOF(table); ++m)
if (table[k].trial != table[m].trial)
bad = 1;
if (bad)
{
printf("wrong:\t%08lX", n);
for (k = 0; k < SIZEOF(table); ++k)
printf("\t%d", table[k].trial);
printf("\n");
}
}
/*
* calibrate the timing mechanism
*/
calibration = measure(calibrate);
/*
* time them all, keeping track of the best (smallest)
*/
for (j = 0; j < SIZEOF(table); ++j)
{
table[j].measurement = measure(table[j].func) - calibration;
if (!j || table[j].measurement < best)
best = table[j].measurement;
}
for (j = 0; j < SIZEOF(table); ++j)
{
printf
(
"%s\t%10.8g\t%6.3f\n",
table[j].name,
table[j].measurement,
table[j].measurement / best
);
}
exit(0);
}
Regards
Peter Miller UUCP uunet!munnari!pdact.pd.necisa.oz!pmiller
ARPA pmiller%pdact.pd.necisa.oz at australia
/\/\* CSNET pmiller%pdact.pd.necisa.oz at au
ACSnet pmiller at pdact.pd.necisa.oz
Disclaimer: The views expressed here are personal and do not necessarily
reflect the view of my employer or the views or my colleagues.
D
More information about the Comp.lang.c
mailing list