v13i042: Brute force measurement selection
Rich Salz
rsalz at bbn.com
Wed Feb 17 11:27:44 AEST 1988
Submitted-by: elsie!ado (Arthur David Olson)
Posting-number: Volume 13, Issue 42
Archive-name: measures
[ Truly how to save work by using a computer. --r$ ]
Measures reads one or more files containing the values of measurable
traits for a set of items, and lists which traits may be measured to learn
which one of the items an unknown item is.
: To unbundle, sh this file
echo 'measures.1' 1>&2
cat >'measures.1' <<'End of measures.1'
.LC @(#)measures.1 1.13
.TH MEASURES 1E \*(eH
.SH NAME
measures \- list measurements to take
.SH SYNOPSIS
.B measures
[ file ... ]
.= measures
.SH DESCRIPTION
.I Measures
reads one or more files containing the values of measurable traits
for a set of items, and lists which traits may be measured to learn which one of
the items an unknown item is.
.PP
The file(s) to read may be named on the command line;
the standard input is read if there are no
.IR file (s)
on the command line or if
.B `-'
is used.
Sharp signs ('#'s) and characters that follow them on lines are ignored.
Lines with only spaces, tabs, and colons in them are ignored.
Each remaining line gives the values for the item
whose name is given first on the line;
values are separated by any number of spaces, tabs, or colons.
A non-numeric string of characters
(for example,
.B `--'
or
.BR `unknown' )
may be used for any trait whose value is unknown for a given item.
Two values separated by a `-' may be used to give a range in which
the value for the item is known to fall.
.PP
If a line starts with one of the words listed below,
it gets the special treatment indicated.
.TP
.B Filed
Other entries on the line are used as names of the traits when the traits are
listed. In the absence of a
.B `Filed'
line, the first trait is called `1', the second is called `2', and so forth.
.TP
.B Errors
Other entries on the line give the `experimental error' involved in measuring
each trait. The number may end with a percent sign (`%') to signify a
percentage error figure.
The range of values that might be measured for a trait
(given experimental error)
for two items must not overlap
for the two items to be told apart by measuring that trait.
In the absence of a
.B `Errors'
line, each trait's experimental error is taken to be zero.
.TP
.B Costs
Other entries on the line give the cost of measuring each trait.
In the absence of a
.B `Costs'
line, each trait's cost is taken to be one.
.PP
The listed set or sets of traits to be measured are those whose totaled costs
are lowest and provide as much information as you could get by measuring
all the traits.
.SH EXAMPLE
Given the input
.in +.5i
.nf
.ft B
.ta \w'Errors\0\0'u +\w'10%\0\0'u +\w'10%\0\0'u
Filed A B C
Errors 10% 10% 10%
p 1 6 5
q 1 3 5
r 4 6 2
.br
.in -.5i
.fi
.ft P
the
.I measures
program produces the two lines of output
.in +.5i
.nf
.ft B
A B
B C
.br
.in -.5i
.fi
.ft P
to indicate that either A and B or B and C may be measured to identify an
unknown sample as being one of p, q, or r.
.SH LIMITATION
There's a limit (usually thirty-two)
on the number of traits that may be worked with.
Lines may be at most one thousand characters long.
.SH SEE ALSO
.IR "Journal of Theoretical Biology" ,
(1987)
.BR 128 ,
1-9.
End of measures.1
echo 'Makefile' 1>&2
cat >'Makefile' <<'End of Makefile'
# @(#)Makefile 1.1
SRCS= ealloc.c ialloc.c measures.c substrings.c wild.c wildexit.c
OBJS= ealloc.o ialloc.o measures.o substrings.o wild.o wildexit.o
CFLAGS= -s -O
LINT= lint
LINTFLAGS= -phbaaxc
all: measures
sure:
$(LINT) $(LINTFLAGS) $(SRCS)
clean:
rm -f *.o *.out core ,*
measures: $(OBJS)
$(CC) $(CFLAGS) $(OBJS) -o $@
End of Makefile
echo 'ealloc.c' 1>&2
cat >'ealloc.c' <<'End of ealloc.c'
#
/*LINTLIBRARY*/
#include "stdio.h"
#if !defined lint && !defined NOID
static char elsieid[] = "@(#)ealloc.c 8.2";
#endif /* !defined lint && !defined NOID */
extern char * icalloc();
extern char * icatalloc();
extern char * icpyalloc();
extern char * imalloc();
extern char * irealloc();
static char *
check(pointer)
char * pointer;
{
if (pointer == NULL)
wildrexit("allocating memory");
return pointer;
}
char *
emalloc(size)
{
return check(imalloc(size));
}
char *
ecalloc(nelem, elsize)
{
return check(icalloc(nelem, elsize));
}
char *
erealloc(ptr, size)
char * ptr;
{
return check(irealloc(ptr, size));
}
char *
ecatalloc(old, new)
char * old;
char * new;
{
return check(icatalloc(old, new));
}
char *
ecpyalloc(string)
char * string;
{
return check(icpyalloc(string));
}
efree(p)
char * p;
{
ifree(p);
}
ecfree(p)
char * p;
{
icfree(p);
}
End of ealloc.c
echo 'ialloc.c' 1>&2
cat >'ialloc.c' <<'End of ialloc.c'
#
/*LINTLIBRARY*/
#include "stdio.h"
#if !defined lint && !defined NOID
static char elsieid[] = "@(#)ialloc.c 8.3";
#endif /* !defined lint && !defined NOID */
#if !defined alloc_t
#define alloc_t unsigned
#endif /* !defined alloc_t */
#if defined MAL
#define NULLMAL(x) ((x) == NULL || (x) == MAL)
#else /* !defined MAL */
#define NULLMAL(x) ((x) == NULL)
#endif /* !defined MAL */
extern char * calloc();
extern char * malloc();
extern char * realloc();
extern char * strcpy();
char *
imalloc(n)
{
#if defined MAL
register char * result;
if (n == 0)
n = 1;
result = malloc((alloc_t) n);
return (result == MAL) ? NULL : result;
#else /* !defined MAL */
if (n == 0)
n = 1;
#if defined __TURBOC__
/*
** Beat a TURBOC bug.
*/
if ((n & 1) != 0)
++n;
#endif /* defined __TURBOC__ */
return malloc((alloc_t) n);
#endif /* !defined MAL */
}
char *
icalloc(nelem, elsize)
{
if (nelem == 0 || elsize == 0)
nelem = elsize = 1;
#if defined __TURBOC__
if ((nelem & 1) != 0 && (elsize & 1) != 0)
++nelem;
#endif /* defined __TURBOC__ */
return calloc((alloc_t) nelem, (alloc_t) elsize);
}
char *
irealloc(pointer, size)
char * pointer;
{
if (NULLMAL(pointer))
return imalloc(size);
if (size == 0)
size = 1;
#if defined __TURBOC__
if ((size & 1) != 0)
++size;
#endif /* defined __TURBOC__ */
return realloc(pointer, (alloc_t) size);
}
char *
icatalloc(old, new)
char * old;
char * new;
{
register char * result;
register oldsize, newsize;
oldsize = NULLMAL(old) ? 0 : strlen(old);
newsize = NULLMAL(new) ? 0 : strlen(new);
if ((result = irealloc(old, oldsize + newsize + 1)) != NULL)
if (!NULLMAL(new))
(void) strcpy(result + oldsize, new);
return result;
}
char *
icpyalloc(string)
char * string;
{
return icatalloc((char *) NULL, string);
}
ifree(p)
char * p;
{
if (!NULLMAL(p))
free(p);
}
icfree(p)
char * p;
{
if (!NULLMAL(p))
free(p);
}
End of ialloc.c
echo 'measures.c' 1>&2
cat >'measures.c' <<'End of measures.c'
#
#include "stdio.h"
#if !defined lint && !defined NOID
static char elsieid[] = "@(#)measures.c 1.23";
#endif /* !defined lint && !defined NOID */
#include "math.h"
#if !defined NBPI
#define NBPI (8 * sizeof (int)) /* Number of Bits Per Int */
#endif /* !defined NBPI */
#if !defined MAXLINE
#define MAXLINE 1000 /* MAXimum LINE length */
#endif /* !defined MAXLINE */
#define SKIPPED '\0'
#if defined __TURBOC__
#define HUGE HUGE_VAL
#define float double
#undef SKIPPED
#define SKIPPED (-1)
#endif /* defined __TURBOC__ */
extern char * ecpyalloc();
extern char * erealloc();
extern char * strchr();
extern char ** substrings();
extern char * wildname();
struct range {
float lo;
float hi;
};
struct range ** ranges; /* measured ranges of traits, -/+ errors */
int itemcount; /* number of items with traits */
int traitcount; /* number of traits of each item */
struct trait {
char * name;
float error;
char errtype;
float cost;
};
struct trait traits[NBPI];
apartbits(i, j)
{
register struct range * rp1;
register struct range * rp2;
register int n, result;
result = 0;
n = traitcount;
rp1 = ranges[i + 1];
rp2 = ranges[j + 1];
while (--n >= 0) {
--rp1;
--rp2;
if (rp1->lo > rp2->hi || rp1->hi < rp2->lo)
result |= 1 << n;
}
return result;
}
char * oopsname;
long oopsline;
oops(message)
char * message;
{
(void) fprintf(stderr, "\"%s\", line %ld: wild %s\n",
oopsname, oopsline, message);
for ( ; ; )
wildexit(message);
}
dofiled(innames)
char ** innames;
{
register int i;
for (i = 0; i < traitcount; ++i)
if (traits[i].name != NULL)
if (strcmp(innames[i], traits[i].name) == 0)
continue;
else oops("mismatched `Filed' lines");
else traits[i].name = ecpyalloc(innames[i]);
}
doerrors(inerrors)
char ** inerrors;
{
register int i;
float f;
char c;
for (i = 0; i < traitcount; ++i) {
c = '\0';
if ((sscanf(inerrors[i], "%f%c", &f, &c) == 1 &&
c != SKIPPED) || (c != SKIPPED && c != '%'))
oops("`Errors' line");
if (f < 0)
oops("negative `Errors' value");
if (c == '%' && f > 100)
oops("Error percentage > 100");
if (traits[i].error != 0 &&
(f != traits[i].error || c != traits[i].errtype))
oops("mismatched `Errors' lines");
traits[i].error = f;
traits[i].errtype = c;
}
}
docosts(incosts)
char ** incosts;
{
register int i;
float f;
char c;
for (i = 0; i < traitcount; ++i) {
c = '\0';
if (sscanf(incosts[i], "%f%c", &f, &c) != 1 || c != SKIPPED)
oops("`Costs' line");
if (f <= 0)
oops("non-positive `Costs' value");
if (traits[i].cost != 0 && f != traits[i].cost)
oops("mismatched `Costs' lines");
traits[i].cost = f;
}
}
getanum(string, address)
char * string;
float * address;
{
double d;
char c;
/*
** Some buggy sscanfs think that "-" is a number.
*/
if (strcmp(string, "-") == 0)
return 0;
c = '\0';
if (sscanf(string, "%f%c", &d, &c) == 1 && c == SKIPPED) {
*address = d;
return 1;
}
if (*string == '-') {
*address = -HUGE;
++string;
} else *address = HUGE;
return strcmp(string, "HUGE") == 0 || strcmp(string, "huge") == 0 ||
strcmp(string, "Huge") == 0;
}
#define ATATIME 500
doranges(inranges)
register char ** inranges;
{
register char * cp;
register int i;
register int ok;
if ((itemcount % ATATIME) == 0) {
ranges = (struct range **) erealloc((char *) ranges,
(itemcount + ATATIME) * sizeof *ranges);
ranges[0] = (struct range *) erealloc((char *) ranges[0],
(itemcount + ATATIME) * traitcount * sizeof *ranges[0]);
for (i = 1; i < itemcount + ATATIME; ++i)
ranges[i] = ranges[i - 1] + traitcount;
}
for (i = 0; i < traitcount; ++i) {
if (getanum(inranges[i], &ranges[itemcount][i].lo)) {
ranges[itemcount][i].hi = ranges[itemcount][i].lo;
continue;
}
cp = inranges[i];
for ( ; ; ) {
cp = strchr(cp + 1, '-');
if (cp == 0) {
ranges[itemcount][i].lo = -HUGE;
ranges[itemcount][i].hi = HUGE;
break;
}
*cp = '\0';
ok = getanum(inranges[i], &ranges[itemcount][i].lo) &&
getanum(cp + 1, &ranges[itemcount][i].hi);
*cp++ = '-';
if (!ok)
continue;
if (ranges[itemcount][i].lo > ranges[itemcount][i].hi)
oops("range");
break;
}
}
++itemcount;
}
infile(filename)
char * filename;
{
register FILE * fp;
register char * cp;
register char ** cpp;
register int i;
char buf[MAXLINE + 2]; /* +2 for "\n\0" */
if (strcmp(filename, "-") == 0) {
fp = stdin;
filename = "standard input";
}
else if ((fp = fopen(filename, "r")) == NULL)
for ( ; ; )
wild2exit("result opening", filename);
oopsname = filename;
for (oopsline = 1; fgets(buf, sizeof buf, fp) == buf; ++oopsline) {
cp = strchr(buf, '\n');
if (cp == 0)
oops("long line");
*cp = '\0'; /* Zap the trailing newline */
if (strchr(buf, '#') != 0)
*strchr(buf, '#') = '\0'; /* Zap comments */
cpp = substrings(buf, ": \t");
if (cpp == NULL)
for ( ; ; )
wildrexit("substrings");
for (i = 0; cpp[i] != NULL; ++i)
;
if (i == 0) {
free((char *) cpp);
continue;
}
if (traitcount == 0) {
traitcount = i - 1;
if (traitcount <= 0)
oops("lack of traits");
if (traitcount > NBPI)
oops("large number of traits");
} else if (traitcount != i - 1)
oops("number of traits");
cp = cpp[0];
if (strcmp(cp, "Filed") == 0)
dofiled(&cpp[1]);
else if (strcmp(cp, "Errors") == 0)
doerrors(&cpp[1]);
else if (strcmp(cp, "Costs") == 0)
docosts(&cpp[1]);
else doranges(&cpp[1]);
(void) free((char *) cpp);
}
if (ferror(fp) || !feof(fp))
for ( ; ; )
wild2exit("result reading", filename);
if (fp != stdin)
(void) fclose(fp);
}
int * aparts; /* traits to measure to tell two items apart */
int apartcount; /* number of elements in above table */
int mustbits; /* tells which traits MUST be measured */
int mustcount; /* tells how many traits MUST be measured */
main(argc, argv)
int argc;
char * argv[];
{
register int argind;
register int i, j;
float lo, hi;
char buf[NBPI]; /* Wildly generous! */
argv[0] = wildname(argv[0]);
argind = 1;
if (strcmp(argv[argind], "--") == 0)
++argind;
if (argind == (argc - 1) && strcmp(argv[argind], "=") == 0) {
(void) fprintf(stderr, "%s: usage is %s [file...]\n",
argv[0], argv[0]);
for ( ; ; )
tameexit();
}
if (argind == argc)
infile("-");
else for (i = argind; i < argc; ++i)
infile(argv[i]);
if (itemcount == 0)
for ( ; ; )
wildrexit("lack of items");
/*
** Set all costs to one if no costs were given.
*/
for (i = 0; i < traitcount; ++i)
if (traits[i].cost != 0)
break;
if (i >= traitcount)
for (i = 0; i < traitcount; ++i)
traits[i].cost = 1;
/*
** Set all labels if no labels were given.
*/
if (traits[0].name == NULL)
for (i = 0; i < traitcount; ++i) {
(void) sprintf(buf, "%d", i + 1);
traits[i].name = ecpyalloc(buf);
}
/*
** Adjust the ranges to reflect the errors.
*/
for (i = 0; i < itemcount; ++i)
for (j = 0; j < traitcount; ++j) {
if (traits[j].error == 0)
continue;
lo = ranges[i][j].lo;
if (lo > -HUGE && lo < HUGE) {
if (traits[j].errtype == '\0')
lo -= traits[j].error;
else lo -= lo * (traits[j].error / 100.);
ranges[i][j].lo = lo;
}
hi = ranges[i][j].hi;
if (hi > -HUGE && hi < HUGE) {
if (traits[j].errtype == '\0')
hi += traits[j].error;
else hi += hi * (traits[j].error / 100.);
ranges[i][j].hi = hi;
}
}
/*
** Build the "tell apart" table.
*/
for (i = 0; i < itemcount - 1; ++i) {
aparts = (int *) erealloc((char *) aparts,
(apartcount + itemcount + 1 - i) * sizeof *aparts);
for (j = i + 1; j < itemcount; ++j)
newbits(apartbits(i, j));
}
if (mustcount + apartcount == 0)
for ( ; ; )
wildexit("indistinguishable items (given errors)");
finish();
for ( ; ; )
tameexit();
}
newbits(new)
register int new;
{
register int * ip;
register int i;
if ((mustbits & new) != 0)
return; /* A test we must do can tell these two apart */
if (new == 0)
return; /* We can't tell these two apart! */
/*
** Is the new case (or a harder one) already in the table?
*/
ip = aparts;
ip[apartcount] = new;
while ((*ip++ & ~new) != 0)
;
if (ip <= &aparts[apartcount])
return;
/*
** Get rid of any old entries that this new one is harder than.
*/
ip = aparts;
for ( ; ; )
if ((new & ~*ip++) == 0) {
if (*--ip == new)
break;
*ip = aparts[--apartcount];
aparts[apartcount] = new;
}
/*
** Is more than one test involved?
*/
i = 1;
while ((new & i) == 0)
i <<= 1;
if (new != i) {
++apartcount;
return;
}
/*
** A single test is involved.
*/
mustbits |= i;
if (++mustcount < traitcount)
return;
/*
** Oh well. . .
*/
finish();
for ( ; ; )
tameexit();
}
float lowcost;
int * lowbits;
int lowcount;
finish()
{
register int i, j;
lowcost = HUGE;
aparts[apartcount] = 0;
dobits(mustbits, 0, 0., aparts);
for (i = 0; i < lowcount; ++i) {
for (j = 0; j < traitcount; ++j)
if ((lowbits[i] & (1 << j)) != 0)
(void) printf("%s ", traits[j].name);
printf("\n");
}
}
dobits(todo, done, cost, ip)
register int todo, done;
register float cost;
register int * ip;
{
register int i, nextbit;
while((todo & *ip++) != 0)
;
if (*--ip == 0) {
if (cost < lowcost) {
lowcost = cost;
lowcount = 0;
}
lowbits = (int *) erealloc((char *) lowbits,
(lowcount + 1) * sizeof *lowbits);
lowbits[lowcount++] = todo;
return;
}
if (cost >= lowcost)
return; /* cost can't get smaller! */
i = *ip++ & ~(todo | done);
for (nextbit = 0; nextbit < traitcount; ++nextbit) {
if ((i & (1 << nextbit)) == 0 ||
cost + traits[nextbit].cost > lowcost)
continue;
dobits(todo | ( 1 << nextbit), done,
cost + traits[nextbit].cost, ip);
done |= 1 << nextbit;
}
}
End of measures.c
echo 'substrings.c' 1>&2
cat >'substrings.c' <<'End of substrings.c'
#
/*LINTLIBRARY*/
#include "stdio.h"
#if !defined lint && !defined NOID
static char elsieid[] = "@(#)substrings.c 8.2";
#endif /* !defined lint && !defined NOID */
#include "ctype.h"
#if !defined TRUE
#define TRUE 1
#define FALSE 0
#endif /* !defined TRUE */
extern char * imalloc();
extern char * strchr();
char **
substrings(string, separators)
char * string;
char * separators;
{
register char ** array;
register char * cp;
register int nsubs;
register int lastsepprint;
if (string == NULL || separators == NULL || *separators == '\0')
return NULL;
array = (char **) imalloc((strlen(string) + 2) * sizeof *array);
if (array == NULL)
return NULL;
if (*string == '\0') {
array[0] = NULL;
return array;
}
nsubs = 0;
lastsepprint = TRUE;
for (cp = string; *cp != '\0'; ++cp)
if (strchr(separators, *cp) != 0) {
if (isascii(*cp) && isprint(*cp) && *cp != ' ' &&
lastsepprint)
array[nsubs++] = cp;
lastsepprint = isascii(*cp) && isprint(*cp) &&
*cp != ' ';
*cp = '\0';
} else {
lastsepprint = FALSE;
if (cp == string || *(cp - 1) == '\0')
array[nsubs++] = cp;
}
if (lastsepprint && *(cp - 1) == '\0')
array[nsubs++] = cp;
array[nsubs] = NULL;
return array;
}
End of substrings.c
echo 'wild.c' 1>&2
cat >'wild.c' <<'End of wild.c'
#
/*LINTLIBRARY*/
#include "stdio.h"
#if !defined lint && !defined NOID
static char elsieid[] = "@(#)wild.c 8.3";
#endif /* !defined lint && !defined NOID */
char *
wildname(name)
char * name;
{
register int i;
static char saved[15];
if (name != NULL && *name != '\0')
for (i = 0; (saved[i] = *name) != '\0'; ++name)
if ((*name == '/' || *name == '\\') &&
*(name + 1) != '/' && *(name + 1) != '\\' &&
*(name + 1) != '\0')
i = 0;
else if (i < sizeof saved - 1)
++i;
return (saved[0] == '\0') ? "?" : saved;
}
wild2(part1, part2)
char * part1;
char * part2;
{
(void) fputs(wildname(""), stderr);
/*
** One space after the colon matches what perror does
** (although your typing teacher may want a second space).
*/
(void) fputs(": wild ", stderr);
if (part1 == NULL)
part1 = "";
if (part2 == NULL)
part2 = "";
(void) fputs(part1, stderr);
if (*part1 != '\0' && *part2 != '\0')
(void) fputs(" ", stderr);
(void) fputs(part2, stderr);
(void) fputs("\n", stderr);
}
wild(string)
char * string;
{
wild2("", string);
}
wildcall(string)
char * string;
{
wild2("call of", string);
}
wildret(string)
char * string;
{
wild2("result from", string);
}
End of wild.c
echo 'wildexit.c' 1>&2
cat >'wildexit.c' <<'End of wildexit.c'
#
/*LINTLIBRARY*/
#include "stdio.h"
#if !defined lint && !defined NOID
static char elsieid[] = "@(#)wildexit.c 8.2";
#endif /* !defined lint && !defined NOID */
#if !defined TAMEEXITVAL
#define TAMEEXITVAL 0
#endif /* !defined TAMEEXITVAL */
#if !defined WILDEXITVAL
#define WILDEXITVAL 1
#endif /* !defined WILDEXITVAL */
wildexit(string)
char * string;
{
wild(string);
for ( ; ; )
exit(WILDEXITVAL);
}
wildcexit(string)
char * string;
{
wildcall(string);
for ( ; ; )
exit(WILDEXITVAL);
}
wildrexit(string)
char * string;
{
wildret(string);
for ( ; ; )
exit(WILDEXITVAL);
}
wild2exit(part1, part2)
char * part1;
char * part2;
{
wild2(part1, part2);
for ( ; ; )
exit(WILDEXITVAL);
}
tameexit()
{
for ( ; ; )
exit(TAMEEXITVAL);
}
End of wildexit.c
exit
--
For comp.sources.unix stuff, mail to sources at uunet.uu.net.
More information about the Comp.sources.unix
mailing list