Sets in C (like Pascal) **source included**
Graham Toal
gtoal at tharr.UUCP
Fri Jun 29 15:55:16 AEST 1990
Archive-name: setdemo.c
/*
File: setdemo.c
Author: Graham Toal <gtoal%uk.ac.ed at nsfnet-relay.ac.uk>
Created: 27/06/90
Bryon Lape <wozniak at utkux1.utk.edu> recently asked for C code to implement
pascal-like sets. By coincidence I had written this just a few days
earlier, as a proof-of-concept test program. Slight lack of comments
but should be fathomable.
The code in fact implements something stronger than pascal sets; it allows
arbitrarily sized sets, and it lets you construct expressions out of
those sets *without* using any scratch workspace to evaluate intermediates.
(The real use of the code is in free text indexing, where the sets are
lists of addresses of words in a large file, and we are performing
queries on the file)
(c) Graham Toal 1990.
I retain copyright on this only to stop anyone else copyrighting it and
restricting my use of my own code! Otherwise it is entirely in the
public domain. Use it as you wish, commercially or otherwise.
Note: the sets are stored as lists of elements; if the input data
is always unique and sorted, the output data will be too; a simple
proof by induction can show this. So when constructing sets as
input to this code, remember to validate them.
For example, list1 = [ 1 2 38 3456 100006 ] is ok, but
list2 = [ 1 38 2 3456 100006 ] is bad (order), and
list3 = [ 1 1 1 2 38 3456 100006 ] is also bad (not unique).
*/
/*#define DEBUG*/
#include <stdio.h>
#define TRUE (0==0)
#define FALSE (0!=0)
#define DATA int
typedef struct PIPEstruct PIPE;
typedef int (*UNARYFN)(PIPE *, DATA *);
typedef int (*BINFN)(PIPE *, PIPE *, DATA *);
/* easily extended to ternary functions for things such as range-checking */
#define MONO 1
#define BINARY 2
#define PENDING (EOF+1)
#define UNDEF (PENDING+1)
struct PIPEstruct {
int fntype;
UNARYFN monofun;
BINFN bifun;
struct PIPEstruct *op1, *op2;
int state; /* EOF, PENDING, UNDEF */
DATA lookahead;
int *array; /* These two represent data stream for now */
int index;
/* DATA offset1, offset2; */ /* These two would be used in real life
to point into a file of word indexes */
};
void unread(PIPE *p, DATA d) {
p->state = PENDING; p->lookahead = d;
}
int and(PIPE *left, PIPE *right, DATA *value) {
DATA d1, d2;
/* Perform an AND operation on two streams */
for (;;) {
if (!eval(left, &d1) || !eval(right, &d2)) return(FALSE);
while ((d1 < d2) && eval(left, &d1));
while ((d2 < d1) && eval(right, &d2));
if (d1 == d2) {*value = d1; return(TRUE);}
}
}
int or(PIPE *left, PIPE *right, DATA *value) {
DATA d1, d2;
/* Perform an OR operation on two streams */
if (!eval(left, &d1)) return(eval(right, value)); *value = d1;
if (eval(right, &d2)) {
if (d1 < d2) unread(right, d2);
else if (d2 < d1) {unread(left, d1); *value = d2;}
}
return(TRUE);
}
int eval(PIPE *expr, DATA *value) {
if (expr->state == PENDING) {
expr->state = UNDEF; *value = expr->lookahead;
return(TRUE);
}
switch (expr->fntype) {
case MONO: return(expr->monofun(expr->op1, value));
case BINARY: return(expr->bifun(expr->op1, expr->op2, value));
}
}
int data(PIPE *d, DATA *value) {
if (d->state == PENDING) {
d->state = UNDEF; *value = d->lookahead;
return(TRUE);
}
if (d->array[d->index] == -1) d->state = EOF; /* Hack for demo --
real code would compare offset1 to offset2 */
if (d->state == EOF) return(FALSE);
*value = d->array[d->index++]; d->state = UNDEF;
return(TRUE);
}
PIPE *make_data_stream(int O_List[]) {
PIPE *tmp;
tmp = (PIPE *)malloc(sizeof(PIPE));
tmp->fntype = MONO;
tmp->monofun = data;
tmp->array = O_List;
tmp->index = 0;
tmp->op1 = tmp; /* Actually op1 should be a structure containing
array and index from above; this is just laziness */
tmp->state = UNDEF;
return(tmp);
}
PIPE *make_pipe_binop(int (*fn)(PIPE *left, PIPE *right, DATA *value),
PIPE *arg1, PIPE *arg2) {
PIPE *tmp;
tmp = (PIPE *)malloc(sizeof(PIPE));
tmp->bifun = fn;
tmp->fntype = BINARY;
tmp->op1 = arg1;
tmp->op2 = arg2;
tmp->state = UNDEF;
return(tmp);
}
int main(int argc, char **argv) {
/* Simulated Search Engine -- effectively performing set operations
on arbitrarily large sets. */
static int O_List1[16] = {
1, 3, 5, 6,
7, 8, 10, 12,
14, 16, 17, 18,
20, 21, 22, -1};
static int O_List2[7] = {1, 2, 4, 5, 6, 7, -1};
static int O_List3[9] = {14, 15, 16, 17, 18, 19, 20, 21, -1};
static int O_List4[4] = {12, 13, 14, -1};
/*
The problem is a free-text database lookup. We have an index
which when presented with a keyword, returns a list of identifiers
of records which contain that keyword. We wish to use and and or
operations on several keywords, eg 'Give me all the records which
contain "programmer" and "poor", or "manager" and "rich"'
Here is a worked example of the problem. There is a very
easy solution possible *iff* all the sets are kept in memory;
however they can grow to be excessively large, so the chosen
algorithm has to work on small sections of these sets at any
one time.
Conceptually the simplest way to do this is to define a mechanism
where the operator is akin to a unix filter or process, receiving its
input data from a stream, and writing its results to a stream. Then
the expression evaluation becomes a dataflow problem of connecting these
streams together, and reading the results coming out of the topmost filter.
One minor implementation note: accessing the elements of each of
these lists off CD Rom turn-about would cause lots of disk-head movement;
we can optimise this to any degree necessary by having the bottom-level
functions buffer their data in blocks in Ram.
List1 <- 1 3 5 6 7 8 10 12 14 16 17 18 20 21 22
/
OR
/ \
/ List2 <- 1 2 4 5 6 7
Result <- AND
\ List3 <- 12 13 14 15 16 17 18 19
\ /
OR
\
List4 <- 12 13 14
<- 1 3 4 5 6 7 8 10 12 14 16 17 18 20 21 22
/
/
Result <- AND
\
\
<- 12 13 14 15 16 17 18 19 20 21
Result <- 12 14 16 17 18 20 21
*/
PIPE *data1, *data2, *data3, *data4, *or1, *or2, *and1;
DATA value;
/* The Object-lists are simulated by a C array; in real life they
would be found in a large index file. (They are actually found
by following a 'trie' from a master index) */
data1 = make_data_stream(O_List1);
data2 = make_data_stream(O_List2);
data3 = make_data_stream(O_List3);
data4 = make_data_stream(O_List4);
/* Compile our expression */
or1 = make_pipe_binop(or, data1, data2);
or2 = make_pipe_binop(or, data3, data4);
and1 = make_pipe_binop(and, or1, or2);
/* Perform the evaluation of the expression; if you want, you can
store the results in a new list; often it is easier to do things
with them one at a time as the members of the list are presented */
fprintf(stderr, "The result of list1|list2 & list3|list4 is:\n");
while (eval(and1, &value)) {
fprintf(stderr, " %d", value);
}
fprintf(stderr, "\n");
}
More information about the Comp.lang.c
mailing list