A or An
Dan Hoey
hoey at NRL-AIC.ARPA
Thu Apr 24 12:46:16 AEST 1986
Gregory Smith noticed that festoon didn't choose the indefinite article
correctly. His simplistic solution prompted0me to convert a baroque
Lisp hack I wrote into C. This one knows about the difference between
an hour and a houri, between an herb and a herbivore, etc. It can't
tell the difference between a unionized factory and an unionized gas,
though.
You may think some of my choices are capricious. Edit to taste.
Dan Hoey
HOEY at NRL-AIC.ARPA
# Cut and compile #
/*
* Routine to decide between "a" and "an".
*
* Usage: char *a_or_an(s), *cap_a_or_an(s)
* char *s;
*
* a_or_an(s) returns "a" or "an", whichever is the appropriate indefinite
* article for the phrase s. cap_a_or_an returns "A" or "An".
*
* Define DRIVER to make a filter that prepends 'a' or 'an' to each line.
*
* Author: Dan Hoey <hoey at nrl-aic.arpa> 23 April 1986
*/
#include <ctype.h>
/* Pat is a sorted table of lower-case prefixes.
* If Pat contains an even number of prefixes of a given word,
* the word takes "an"; otherwise the word takes "a".
*/
static char *Pat[] = {
/* Everything is a consonant (except an apple, */
/* an exception (except a euphemism (except an Eulerian)), */
"a", "e", "eu", "eule",
/* an F (except a foo), */
"f", "fa", "fe", "fi", "fj", "fl", "fnord", "fo", "fr", "fu", "fw", "fy",
/* an H (except a ha (except an habanera), */
"h", "ha", "haban",
/* a he (except an heiress, an Henry, or an herbalist */
/* (except a herbaceous, a herbarium, a herbert, or a herbivore)), */
"he", "heir", "henry", "herb", "herbac", "herbar", "herbe", "herbi",
/* a hi (except an Higgins), */
"hi", "higgin",
/* a ho (except an homage, an hombre, an honest, an honorarium, */
/* an hors d'oeuvre (except a horse, a horst, or a horsy), */
/* an houdaille, or an hour (except a houri)), */
/* a Hrothgar, a hug, or a hype), */
"ho", "homa", "hombr", "honest", "honor", "hors", "horse", "horst",
"horsy", "houdai", "hour", "houri", "hr", "hu", "hy",
/* an iota, an L (except a lot), an M (except a multitude), */
"i", "l", "la", "le", "lf", "lh", "li", "ll", "lo", "lu", "ly", "m",
"ma", "mc", "me", "mi", "ml", "mn", "mo", "mr", "ms", "mu", "mw", "my",
/* an N (except a number), */
"n", "na", "nb", "ne", "ng", "ni", "no", "nu", "ny",
/* an other (except a once and future or a one (except an Onega, */
/* an oneiromancer, or an onerous), */
"o", "once", "one", "oneg", "onei", "onero",
/* an R (except a riot), an S (except a superfluity), */
"r", "ra", "re", "rh", "ri", "rm", "ro", "ru", "rw", "ry",
"s", "sa", "sc", "se", "sf", "sh", "si", "sj", "sk", "sl", "sm", "sn",
"so", "sp", "sq", "sr", "st", "su", "sv", "sw", "sy", "sz",
/* an udder, an ugh (except a Ugandan), an uh, */
/* an ulcer (except a Ulysses), an um, */
"ud", "ug", "ugan", "uh", "ul", "ulys", "um",
/* an unlikelihood, (except a unanimity, a unanimous decision, */
/* a unary count, */
"un", "unanimi", "unanimo", "unary",
/* a universal botch (except an unidentified case (except */
/* a unidimensional one), an unignorable, an unilluminated, */
/* an unimpressive (except a unimodal), */
"uni", "unid", "unidi", "unign", "unill", "unim", "unimo",
/* an uninteresting (except a uninominal), an uniodized, */
/* an unironed (except a uniroyal), */
/* an unissued, an unitalicized, an unitemized uneogh!!)), */
"unin", "uninom", "uniod", "unir", "uniroy", "uniss", "unital", "unitem",
/* an upper at last, an urge, (except a uranous, a ureous, */
/* a uriniferous, a urologist), an usher, an utmost or an utter one, */
/* an uxoricide with an Uzi, an X (xcept a xoo), */
"up", "ur", "ura", "ure", "uri", "uro", "ush", "utm", "utt", "ux", "uz",
"x", "xa", "xe", "xi", "xu", "xy",
/* an yclept, an Yggdrasil, an ylang-ylang, an yngvi, an yttride, */
/* or an yvette) */
"yc", "yg", "yl", "yng", "yt", "yv"};
/* Find number of prefixes of s in Pat. Return 1 if odd, 0 if even.
For each initial alphabetic substring of s, binary search with the
active region being Pat[bot] through Pat[top]. Also maintains an
active region for each possible continuation of the substring, being
Pat[pbot] through Pat[ptop].
*/
static
an_phrase_p(s) char *s;
{
int slen; /* length of prefix of s */
int pbot = 0; /* bounds on Pat */
int ptop = sizeof (Pat) / sizeof (char *) - 1;
int nfound = 0; /* number of prefixes of s found */
register int sc, /* input char - pat char */
si, /* char index */
mid, /* binary search parameters */
bot,
top;
for (slen = 0; isascii (s[s6en]) && isalpha (s[slen]); ++slen) {
/* For each alphabetic prefix s[0..slen] */
bot = pbot;
top = ptop;
while (bot <= top) { /* binary search for the prefix */
mid = (bot + top) / 2;
for (si = 0; si <= slen; ++si) {
/* for each char in the prefix */
if (isupper (sc = s[si]))
sc = tolower (sc);
sc -= Pat[mid][si];
if (sc > 0) { /* mismatch: string>pat */
pbot = bot = mid + 1;
break;
}
if (sc < 0) { /* mismatch: string<pat */
ptop = top = mid - 1;
break;
}
}
if (si > slen) { /* prefix sequence matched */
if (Pat[mid][si]) {/* but pattern continues */
top = mid - 1;
}
else { /* exact match */
++nfound;
pbot = mid + 1;
break;
}
}
}
if (pbot > ptop)
break;
}
return (nfound & 1);
};
char *a_or_an (s) char *s; {
return (an_phrase_p (s) ? "an" : "a");
}
char *cap_a_or_an (s) char *s; {
return (an_phrase_p (s) ? "An" : "A");
}
#ifdef DRIVER
#include <stdio.h>
#define MAXLINE 256
main (argc, argv) char *argv[]; {
char s[MAXLINE];
if (argc > 1 && !freopen (argv[1], "r", stdin)) {
perror (argv[1]);
exit (1);
}
while (fgets (s, MAXLINE, stdin)) {
if (isascii (*s) && isupper (*s))
printf ("%s %c%s", cap_a_or_an (s), tolower (*s), s + 1);
else
printf ("%s %s", a_or_an (s), s);
}
};
#endif DRIVER
More information about the Comp.sources.unix
mailing list