[comp.lang.c] Re: hash function for text in C
Chris Torek
chris at mimsy.umd.edu
Fri Oct 19 04:44:51 AEST 1990
Archive-name: hash/18-Oct-90
Original-posting-by: chris at mimsy.umd.edu (Chris Torek)
Original-subject: Re: hash function for text in C
Reposted-by: emv at math.lsa.umich.edu (Edward Vielmetti)
[Reposted from comp.lang.c.
Comments on this service to emv at math.lsa.umich.edu (Edward Vielmetti).]
The following implements expanding chained hash tables for strings and
numbers. This is unmodified from a special-purpose program (having to
do with reading a large number of files with name=id pairs and doing
various operations on them). Chain lengths are not monitored; instead,
the table is invariably expanded whenever it becomes 2/3 full. Entries
may be neither removed nor modified.
An essentially-unrelated function called `string' maintains a string
pool such that string(x) == string(y) iff strcmp(x,y)==0 (the program
typically stores each name several times, and often needs to test for
name equality, so this helps there).
: Run this shell script with "sh" not "csh"
PATH=/bin:/usr/bin:/usr/ucb:/etc:$PATH
export PATH
all=false
if [ x$1 = x-a ]; then
all=true
fi
echo Extracting hash.h
sed 's/^X//' <<'//go.sysin dd *' >hash.h
X/*
X * $Id: hash.h,v 1.1 90/09/24 23:58:38 chris Exp $
X *
X * Hash table entries keep track of name (or id) = value pairs.
X * Values are simply pointers. Hash tables themselves are named
X * (for debugging).
X */
X
Xstruct hashtab;
X
X/*
X * The `ins' functions return nonzero if the old value existed,
X * without changing the value.
X * The iterate functions calls a given function with name,value
X * or id,value pairs.
X */
Xstruct hashtab *ht_new(); /* given name, create new hash table */
Xchar *ht_nget(); /* given table and name, get value */
Xchar *ht_iget(); /* given table and ID, get value */
Xint ht_nins(); /* given table and name, insert new value */
Xint ht_iins(); /* given table and id, insert new value */
Xvoid ht_niterate(); /* given table and function, iterate */
Xvoid ht_iiterate(); /* given table and function, iterate */
X
X/*
X * Some things that ought not to be here, but are anyway.
X * goodnumber() takes a number of objects and a size and returns
X * a new number of objects, such that malloc(goodnumber(n,size)*size)
X * calls malloc with a `good' size value (resulting in less wasted
X * memory). emalloc is malloc with program-abort on out-of-memory.
X * string() makes a `read-only' copy of a string, reusing the previous
X * copy if any.
X */
Xint goodnumber(); /* given n & sizeof, return new n */
Xchar *emalloc(); /* malloc, exit on error */
Xchar *string(); /* make an `ideal' copy of a string */
//go.sysin dd *
if [ `wc -c < hash.h` != 1415 ]; then
made=false
echo error transmitting hash.h --
echo length should be 1415, not `wc -c < hash.h`
else
made=true
fi
if $all; then
echo ' Changing owner to bin'
chown bin hash.h
else
echo ' Original owner was bin'
fi
if $made; then
chmod 444 hash.h
echo -n ' '; ls -ld hash.h
fi
echo Extracting hash.c
sed 's/^X//' <<'//go.sysin dd *' >hash.c
X#ifndef lint
Xstatic char rcsid[] = "$Id: hash.c,v 1.2 90/10/02 13:32:23 chris Exp $";
X#endif
X
X/*
X * Hash table routines.
X */
X
X#include <stdio.h>
X#include <stdlib.h>
X#include <string.h>
X#include "hash.h"
X
X/*
X * Hash table entries keep track of name=value pairs.
X * The names may be numeric IDs instead (by having a null name).
X */
Xstruct hashent {
X struct hashent *h_next;/* next in chain */
X int h_hash; /* hash value or ID */
X char *h_name; /* name (null if from numeric ID) */
X char *h_value; /* value */
X};
X
Xstruct hashtab {
X int ht_size; /* size (power of 2) */
X int ht_mask; /* == ht_size - 1 */
X int ht_used; /* number of entries used */
X int ht_lim; /* when to expand */
X struct hashent **ht_tab;/* base of table */
X char ht_name[1]; /* table name; actually larger */
X};
X
Xextern char *progname;
X
Xchar *
Xemalloc(n)
X size_t n;
X{
X register char *p = malloc(n);
X
X if (p == NULL) {
X (void) fprintf(stderr, "%s: out of memory\n", progname);
X exit(1);
X }
X return (p);
X}
X
X/* round up to next multiple of y, where y is a power of 2 */
X#define ROUND(x, y) (((x) + (y) - 1) & ~((y) - 1))
X
X/* compute a `good' number of objects to allocate via malloc */
Xint
Xgoodnumber(n, s)
X int n;
X size_t s;
X{
X
X /* 16384 is a guess at a good page size for malloc */
X /* 32 is a guess at malloc's overhead */
X return ((int)((ROUND(n * s + 32, 16384) - 32) / s));
X}
X
X/*
X * Make a new hash table.
X */
Xstruct hashtab *
Xht_new(name)
X char *name;
X{
X register struct hashtab *ht;
X register struct hashent **h;
X register int n;
X
X ht = (struct hashtab *)emalloc(sizeof *ht + strlen(name));
X ht->ht_tab = h = (struct hashent **)emalloc(128 * sizeof *h);
X ht->ht_size = 128;
X ht->ht_mask = 127;
X for (n = 128; --n >= 0;)
X *h++ = NULL;
X ht->ht_used = 0;
X ht->ht_lim = (128 * 2) / 3;
X (void) strcpy(ht->ht_name, name);
X return (ht);
X}
X
X/*
X * Expand an existing hash table.
X */
Xstatic void
Xht_expand(ht)
X register struct hashtab *ht;
X{
X register int n = ht->ht_size * 2, i;
X register struct hashent *p, **h, **oldh, *q;
X
X h = (struct hashent **)emalloc(n * sizeof *h);
X for (i = n; --i >= 0;)
X *h++ = NULL;
X h -= n;
X oldh = ht->ht_tab;
X n--;
X for (i = ht->ht_size; --i >= 0;) {
X for (p = *oldh++; p != NULL; p = q) {
X q = p->h_next;
X p->h_next = h[p->h_hash & n];
X h[p->h_hash & n] = p;
X }
X }
X free((char *)ht->ht_tab);
X ht->ht_tab = h;
X ht->ht_mask = n;
X ht->ht_size = ++n;
X ht->ht_lim = (n * 2) / 3;
X}
X
X/*
X * Make a new hash entry. Its h_next will be NULL.
X */
Xstatic struct hashent *
Xnewhashent(hash, name, value)
X int hash;
X char *name, *value;
X{
X static struct hashent *hfree;
X register struct hashent *h;
X register int n, nalloc;
X
X if ((h = hfree) != NULL)
X hfree = h->h_next;
X else {
X nalloc = goodnumber(2, sizeof *h); /* need at least 2 */
X hfree = h = (struct hashent *)emalloc(nalloc * sizeof *h) + 1;
X for (n = nalloc - 2; --n >= 0; h++)
X h->h_next = h + 1;
X h->h_next = NULL;
X h -= nalloc - 1;
X }
X h->h_next = NULL;
X h->h_hash = hash;
X h->h_name = name;
X h->h_value = value;
X return (h);
X}
X
X#define HASH(str, h, p) \
X for (p = str, h = 0; *p;) h = (h << 5) - h + *p++
X
X/*
X * Look up a name=value.
X */
Xchar *
Xht_nget(ht, name)
X register struct hashtab *ht;
X char *name;
X{
X register char *p;
X register int hash;
X register struct hashent *h;
X
X HASH(name, hash, p);
X p = name;
X for (h = ht->ht_tab[hash & ht->ht_mask]; h != NULL; h = h->h_next)
X if (h->h_hash == hash && h->h_name != NULL &&
X strcmp(h->h_name, p) == 0)
X return (h->h_value);
X return (NULL);
X}
X
X/*
X * Look up an ID=value.
X */
Xchar *
Xht_iget(ht, id)
X register struct hashtab *ht;
X register int id;
X{
X register struct hashent *h;
X
X for (h = ht->ht_tab[id & ht->ht_mask]; h != NULL; h = h->h_next)
X if (h->h_hash == id && h->h_name == NULL)
X return (h->h_value);
X return (NULL);
X}
X
X/*
X * Insert (do not clobber) a name=value.
X * Return zero on success.
X */
Xint
Xht_nins(ht, name, value)
X register struct hashtab *ht;
X char *name, *value;
X{
X register char *p;
X register int hash;
X register struct hashent *h, **hp;
X
X HASH(name, hash, p);
X p = name;
X for (hp = &ht->ht_tab[hash & ht->ht_mask]; (h = *hp) != NULL;
X hp = &h->h_next)
X if (h->h_hash == hash && h->h_name != NULL &&
X strcmp(h->h_name, p) == 0)
X return (-1);
X *hp = newhashent(hash, name, value);
X if (++ht->ht_used > ht->ht_lim)
X ht_expand(ht);
X return (0);
X}
X
X/*
X * Insert (do not clobber) an ID=value.
X * Return zero on success.
X */
Xint
Xht_iins(ht, id, value)
X register struct hashtab *ht;
X register int id;
X char *value;
X{
X register struct hashent *h, **hp;
X
X for (hp = &ht->ht_tab[id & ht->ht_mask]; (h = *hp) != NULL;
X hp = &h->h_next)
X if (h->h_hash == id && h->h_name == NULL)
X return (-1);
X *hp = newhashent(id, (char *)NULL, value);
X if (++ht->ht_used > ht->ht_lim)
X ht_expand(ht);
X return (0);
X}
X
X/*
X * Stash a copy of a string away; it will never be freed.
X */
Xstatic char *
Xpoolstr(s)
X char *s;
X{
X register char *p;
X register size_t l = strlen(s) + 1;
X static char *poolp;
X static size_t nleft;
X
X if (nleft < l)
X poolp = emalloc(nleft = goodnumber(l, 1));
X bcopy(s, p = poolp, l);
X poolp += l;
X return (p);
X}
X
X/*
X * Generate a single unique copy of the given string.
X */
Xchar *
Xstring(s)
X char *s;
X{
X register char *p;
X register int hash;
X register struct hashent *h, **hp;
X static struct hashtab *ht;
X
X if (ht == NULL)
X ht = ht_new("strings");
X HASH(s, hash, p);
X p = s;
X for (hp = &ht->ht_tab[hash & ht->ht_mask]; (h = *hp) != NULL;
X hp = &h->h_next)
X if (h->h_hash == hash && strcmp(h->h_name, p) == 0)
X return (h->h_name);
X *hp = h = newhashent(hash, poolstr(s), (char *)NULL);
X if (++ht->ht_used > ht->ht_lim)
X ht_expand(ht);
X return (h->h_name);
X}
X
X/*
X * Call fn on all the name=value pairs.
X */
Xvoid
Xht_niterate(ht, fn)
X struct hashtab *ht;
X register void (*fn)();
X{
X register struct hashent *h, **hp;
X register int n;
X
X for (n = ht->ht_size, hp = ht->ht_tab; --n >= 0;)
X for (h = *hp++; h != NULL; h = h->h_next)
X if (h->h_name != NULL)
X (*fn)(h->h_name, h->h_value);
X}
X
X/*
X * Call fn on all the id=value pairs.
X */
Xvoid
Xht_iiterate(ht, fn)
X struct hashtab *ht;
X register void (*fn)();
X{
X register struct hashent *h, **hp;
X register int n;
X
X for (n = ht->ht_size, hp = ht->ht_tab; --n >= 0;)
X for (h = *hp++; h != NULL; h = h->h_next)
X if (h->h_name == NULL)
X (*fn)(h->h_hash, h->h_value);
X}
//go.sysin dd *
if [ `wc -c < hash.c` != 6291 ]; then
made=false
echo error transmitting hash.c --
echo length should be 6291, not `wc -c < hash.c`
else
made=true
fi
if $all; then
echo ' Changing owner to bin'
chown bin hash.c
else
echo ' Original owner was bin'
fi
if $made; then
chmod 444 hash.c
echo -n ' '; ls -ld hash.c
fi
--
In-Real-Life: Chris Torek, Univ of MD Comp Sci Dept (+1 301 405 2750)
Domain: chris at cs.umd.edu Path: uunet!mimsy!chris
More information about the Alt.sources
mailing list