lq-text Full Text Retrieval Database Part 04/13

Mon Mar 4 12:03:16 AEST 1991

: cut here --- cut here --
: To unbundle, sh this file
#! /bin/sh
: part 04
echo x - lq-text/src/liblqtext/Phrase.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/Phrase.c <<'@@@End of lq-text/src/liblqtext/Phrase.c'
X/* Phrase.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/*
X * Deal with (WID, FID, Offfset) triples
X * Liam Quin, September 1989
X *
X * $Id: Phrase.c,v 1.11 91/02/22 18:22:59 lee Rel1-10 $
X *
X * $Log:	Phrase.c,v $
X * Revision 1.11  91/02/22  18:22:59  lee
X * Improved a trace message.
X * 
X * Revision 1.10  90/10/06  00:11:57  lee
X * Prepared for first beta release.
X * 
X * Revision 1.9  90/10/04  17:10:43  lee
X * Case matching now works on one-word phrases.
X * 
X * Revision 1.8  90/10/03  20:47:06  lee
X * changed an int to an unsigned long
X * 
X * Revision 1.7  90/08/29  21:46:39  lee
X * Alpha release.
X * 
X * Revision 1.6  90/03/29  19:59:05  lee
X * Now passes gcc -Wall
X * 
X * Revision 1.5  90/03/19  00:02:23  lee
X * Simplified phrase matching greatly by adding new routine, and also
X * improved checking of word flags and generation of ModifiedString,
X * the canonical phrase, in String2Phrase.
X * 
X * Revision 1.4  90/03/16  22:43:15  lee
X * After Richard's consummate help...
X * fixed two severe bugs in matching, and started to simplify
X * MakeMatches().
X * 
X * Revision 1.3  90/03/14  21:02:14  lee
X * Before Richard...
X * 
X * Revision 1.2  90/03/12  22:39:30  lee
X * prepared for general release.
X * 
X * Revision 1.1  89/09/17  23:01:34  lee
X * Initial revision
X * 
X *
X */
X
X/** Unix system calls that need to be declared: **/
Xextern void exit();
X/** Unix/C Library Functions: **/
Xextern unsigned int sleep();
X#ifndef tolower
X extern int tolower();
X#endif
Xextern int strlen();
Xextern char *strcpy();
X/** lqtext functions: **/
Xextern int TooCommon();
Xextern char *UnFlag();
X/** **/
X
X#include "globals.h" /* defines and declarations for database filenames */
X
X#include <stdio.h> /* stderr, also for fileinfo.h */
X#include <fcntl.h>
X#include <malloc.h>
X#include <sys/types.h>
X#include <ctype.h>
X
X#include "fileinfo.h" /* for wordinfo.h */
X#include "wordinfo.h"
X#include "pblock.h"
X#include "phrase.h"
X#include "wordrules.h"
X
X#include "emalloc.h"
X
X#ifndef STREQ
X# define STREQ(boy,girl) ((*(boy) == *(girl)) && (!strcmp((boy),(girl))))
X#endif
X
X#ifndef new
X# define new(type) ((type *) emalloc(sizeof (type)))
X#endif
X
X#ifndef MAXPHRASELEN
X# define MAXPHRASELEN 2000
X#endif
X
Xextern int AsciiTrace;
X
Xt_Phrase *
XString2Phrase(String)
X    char *String;
X{
X    extern t_WordInfo *WID2WordInfo();
X    extern t_WID Word2WID();
X    extern char *WordRoot();
X
X    t_Phrase *Result;
X    t_PhraseItem **ThisWord;
X    /* (* 3 because in the worst case, "a a a" expands to "[a] [a] [a]") */
X    register char *p;
X    register char *q;
X    char *LastStart = 0;
X    char *PrevLastEnd = (char *) 0;
X    int InWord = 0;
X    int Flags = 0;
X    int FoundLetters = 0;
X
X    if (AsciiTrace > 50) {
X	fprintf(stderr, "String2Phrase(%s)\n", String);
X    }
X    Result = (t_Phrase *) emalloc(sizeof(t_Phrase));
X    Result->Next = (t_Phrase *) 0;
X    p = Result->ModifiedString = emalloc(strlen(String) * 3 + 1);
X
X    Result->HasUnknownWords = 0;
X
X    *(ThisWord = &Result->Words) = (t_PhraseItem *) 0;
X
X    /* March along the supplied phrase, looking for keywords.
X     * surround unindexed or short words with [brackets].
X     * Also converts to lower case and strips plurals.
X     */
X    for (q = String; /*LOTSOFTIMES*/; q++) {
X
X	if (AsciiTrace > 50) {
X	    fputc(*q, stderr);
X	}
X
X	if (!InWord && !StartsWord(*q)) {
X	    if (!*q) {
X		break;
X	    } else {
X		if (!LastStart) continue;
X	    }
X	}
X
X	if (!InWord) {
X	    LastStart = q;
X	    if (StartsWord(*q)) {
X		InWord = 1;
X	    }
X	    continue;
X	} else if (isalpha(*q)) {
X	    /* in a word and found letters, so remember in case we skip
X	     * this word...
X	     */
X	    FoundLetters = 1;
X	}
X
X	/* ASSERT: inword == 1 */
X
X	if (*q == '\'') {
X	    if (!WithinWord(q[1])) {
X		InWord = 0;
X	    }
X	}
X
X	if (!*q || !WithinWord(*q)) {
X	    InWord = 0;
X	}
X
X
X	if (LastStart && !InWord) {
X	    int Length = q - LastStart;
X	    int UsedABracket = 0;
X
X	    if (p > Result->ModifiedString) *p++ = ' ';
X
X	    /* we have reached the end of a word, is it long enough? */
X	    if (!FoundLetters) {
X		*p++ = '[';
X		UsedABracket = 1;
X	    } else if (Length < MinWordLength) {
X		*p++ = '[';
X		UsedABracket = 1;
X		if (FoundLetters) {
X		    Flags |= WPF_LASTHADLETTERS;
X		    FoundLetters = 0;
X		}
X	    } else {
X		t_WID WID;
X		t_WordInfo *W;
X		char SaveEnd = (*q);
X		t_WordInfo TryRoot;
X		register char *p2;
X		char RootBuffer[MaxWordLength + 1];
X		char *R = RootBuffer;
X
X		/* Add the word to the chain, too: */
X		*q = '\0';
X
X		FoundLetters = 0; /* unnecessary now (?) */
X		TryRoot.Length = Length;
X		TryRoot.WordPlace.Flags = Flags;
X		if (isupper(*LastStart)) {
X		    TryRoot.WordPlace.Flags |= WPF_UPPERCASE;
X		}
X		Flags = 0;
X
X		for (p2 = LastStart; *p2; p2++) {
X		    *R++ = isupper(*p2) ? tolower(*p2) : *p2;
X		}
X		*R = '\0';
X		TryRoot.Word = RootBuffer;
X
X		R = WordRoot(&TryRoot);
X
X		if (TooCommon(&TryRoot)) {
X		    *p++ = '[';
X		    *p++ = '*';
X		    UsedABracket = 1;
X		    Flags |= WPF_LASTWASCOMMON;
X		    if (AsciiTrace > 10) {
X			fprintf(stderr, " Common(%s) ", TryRoot.Word);
X		    }
X		} else if (!(WID = Word2WID(TryRoot.Word, TryRoot.Length)) ||
X			    (W = WID2WordInfo(WID)) == (t_WordInfo *) 0) {
X		    *p++ = '[';
X		    *p++ = WID ? '@' : '?';
X		    UsedABracket = 1;
X		    if (AsciiTrace > 10) {
X			fprintf(stderr, " Unknown(%s) ", TryRoot.Word);
X		    }
X		    Result->HasUnknownWords++;
X		} else {
X		    if ((*ThisWord = new(t_PhraseItem)) == (t_PhraseItem *) 0) {
X			fprintf(stderr,
X			"Not enough memory for PHRASE \"%s\"", String);
X			return (t_Phrase *) 0;
X		    }
X		    W->WordPlace.Flags |= TryRoot.WordPlace.Flags;
X		    if (PrevLastEnd == (char *) 0) {
X			W->WordPlace.StuffBefore = 0;
X		    } else {
X			W->WordPlace.StuffBefore = LastStart - PrevLastEnd;
X		    }
X		    PrevLastEnd = &q[1];
X
X		    if (AsciiTrace) {
X			fprintf(stderr, "Word %s --> %s, %lu matches\n",
X			    LastStart,
X			    UnFlag(W, W->WordPlace.Flags),
X			    W->NumberOfWordPlaces);
X		    }
X		    /* point to the new space */
X		    (*ThisWord)->Word = W;
X		    (*ThisWord)->WordStart = LastStart;
X		    (*ThisWord)->Next = (t_PhraseItem *) 0;
X		    (*ThisWord)->SearchIndex = 0L;
X		    ThisWord = &(*ThisWord)->Next;
X
X		    /** (void) strcpy(p, R); **/
X		    /** p += TryRoot.Length; **/
X
X		    (void) strcpy(p, LastStart);
X		    p += q - LastStart; /* q points one beyond the end */
X
X		    LastStart = q;
X
X		}
X		*q = SaveEnd;
X	    }
X	    while (LastStart < q) {
X		*p++ = *LastStart++;
X	    }
X	    if (UsedABracket) {
X		*p++ = ']';
X	    }
X	    LastStart = 0;
X	} /* if */
X	if (!*q) break;
X    } /* for */
X    *p= '\0';
X
X    if (ThisWord == &Result->Words) {
X	/* There were no words in the phrase! */
X	return (t_Phrase *) 0;
X    }
X
X    Result->OriginalString = emalloc(q - String + 2);
X    (void) strcpy(Result->OriginalString, String);
X
X    Result->NumberOfMatches = 0;
X    Result->Matches = (t_MatchList *) 0;
X    if (AsciiTrace > 1) {
X	fprintf(stderr, "phrase \"%s\",\n", Result->OriginalString, String);
X	fprintf(stderr, "Canonical form \"%s\"\n", Result->ModifiedString);
X    }
X    return Result;
X}
X
X#define MaxDistance 20
X
Xt_Answer *
XGetFiles(Phrase)
X    t_Phrase *Phrase;
X{
X    char *MakeOneDescription();
X
X    t_Answer *Result = 0;
X    t_Answer **RP = &Result;
X    t_MatchList *MP;
X    t_FID LastFID;
X    unsigned long ThisFIDNumberOfMatches = 0L;
X
X    if (!Phrase || !Phrase->Matches) {
X	return Result;
X    }
X
X    LastFID = Phrase->Matches->Match->Where->FID;
X
X    for (MP = Phrase->Matches; MP; MP = MP->Next) {
X	if (MP->Match->Where->FID != LastFID) {
X	    char *p;
X	    
X	    p = MakeOneDescription(LastFID, ThisFIDNumberOfMatches);
X
X	    if ((*RP = new(t_Answer)) == (t_Answer *) 0) {
X		return Result;
X	    }
X	    (*RP)->Answer = p;
X	    RP = &(*RP)->Next;
X	    *RP = (t_Answer *) 0;
X	    ThisFIDNumberOfMatches = 0L;
X	} else {
X	    ++ThisFIDNumberOfMatches;
X	}
X
X	LastFID = MP->Match->Where->FID;
X    }
X
X    if (ThisFIDNumberOfMatches) {
X	char *p = MakeOneDescription(LastFID, ThisFIDNumberOfMatches);
X
X	if ((*RP = new(t_Answer)) == (t_Answer *) 0) {
X	    return Result;
X	}
X	(*RP)->Answer = p;
X	RP = &(*RP)->Next;
X	*RP = (t_Answer *) 0;
X	ThisFIDNumberOfMatches = 0L;
X    }
X
X    return Result;
X}
X
Xchar *
XMakeOneDescription(FID, NumberOfMatches)
X    t_FID FID;
X    unsigned long NumberOfMatches;
X{
X    extern char *ctime();
X    extern t_FileInfo *GetFileInfo();
X
X    char *Date;
X    char *p;
X    t_FileInfo *FileInfo;
X    char NumBuf[20];
X
X    if (!FID) return (char *) 0;
X
X    if (!(FileInfo = GetFileInfo(FID))) return (char *) 0;
X
X    Date = ctime(&(FileInfo->Date));
X    /** Tue Oct  3 00:57:11 BST 1989 **/
X    Date[10] = '\0';
X
X    (void) sprintf(NumBuf, "%lu", NumberOfMatches);
X
X    p = emalloc((unsigned) (strlen(FileInfo->Name) + strlen(NumBuf) + 11));
X    (void) sprintf(p, "%-.5s %s %s", NumBuf, Date, FileInfo->Name);
X    efree(FileInfo->Name);
X    efree(FileInfo);
X    return p;
X}
X
Xvoid
XResetPhraseMatch(Phrase)
X    t_Phrase *Phrase;
X{
X    t_PhraseItem *Word;
X
X    if (!Phrase || !Phrase->Words) return;
X
X    for (Word = Phrase->Words; Word; Word = Word->Next) {
X	Word->SearchIndex = 0;
X    }
X    Phrase->NumberOfMatches = 0;
X}
X
X/* Default is to check case, etc. only if given in the input phrase.
X * This is an enum from phrase.h, and only used in MakeMatches().
X */
X
Xextern t_PhraseCaseMatch PhraseMatchLevel;
X
Xlong
XMakeMatches(Phrase)
X    t_Phrase *Phrase;
X{
X    /* Each word has a pointer (SearchIndex) to the last Word Place
X     * that was examined.  This enables an O(NumberOfWords) search instead
X     * of O(NumberOfWords * NumberOfWords) search.
X     */
X    static int ContinuesMatch();
X
X    unsigned long PIFB; /* PlaceInFirstBlock */
X    t_MatchList **MLPP = &(Phrase->Matches);
X    t_Match **MPP;
X    t_Match **OldMatch;
X    t_WordPlace *pp;
X    t_PhraseItem *Word;
X    long Result = 0L;
X    long LastResult = (-1L); /* to detect new matches */
X    t_PhraseItem *LeastWord;
X    int HowGood;
X
X    if (!Phrase) {
X	return 0L;
X    }
X
X    ResetPhraseMatch(Phrase);
X    /* Each iteration over this list either produces a match or rejects a
X     * possible phrase starting place.
X     */
X
X    if (AsciiTrace > 1) {
X	fprintf(stderr, "Match(%s)\n", Phrase->ModifiedString);
X    }
X
X    /* A phrase with garbage words can't match anything */
X    if (Phrase->HasUnknownWords && PhraseMatchLevel != PCM_AnyCase) {
X	return 0L;
X    }
X
X    /* Ensure that the matches for the first word have been read */
X    if (Phrase->Words->Word->WordPlacesInHere <
X		    Phrase->Words->Word->NumberOfWordPlaces) {
X	extern t_WordPlace *GetWordPlaces();
X	t_WordInfo *W = Phrase->Words->Word; /* less indirection! */
X
X	if (W->WordPlaces) {
X	    (void) efree(W->WordPlaces);
X	}
X
X	W->WordPlaces = GetWordPlaces(
X	    W->WID,
X	    W->WordPlaceStart,
X	    (unsigned) WIDBLOCKSIZE - (W->WordPlaceStart - W->DataBlock),
X	    W->Offset,
X	    W->NumberOfWordPlaces
X	);
X	W->WordPlacesInHere = W->NumberOfWordPlaces;
X    }
X
X    /* Find the word in the phrase with least matches: */
X    LeastWord = Phrase->Words;
X    for (Word = Phrase->Words; Word; Word = Word->Next) {
X	if (Word->Word->NumberOfWordPlaces <
X					LeastWord->Word->NumberOfWordPlaces) {
X	    LeastWord = Word;
X	}
X    }
X
X    /* For each match in the first word in the phrase: */
X    for (PIFB = 0; PIFB < Phrase->Words->Word->NumberOfWordPlaces; PIFB++) {
X	t_WordPlace *LastFOP = (t_WordPlace *) 0;
X
X	/* The idea is that the next two loops are are likely to reduce
X	 * considerably the number of places we have to consider in the
X	 * case that the first word in the phrase has a lot of matches
X	 * and there is a subsequent word with relatively few matches.
X	 * Experiments suggest that this is fairly common.
X	 *
X	 * This is still a nearly (i.e. slightly-better-than) linear
X	 * algorithm w.r.t the total number of matches in all of the
X	 * words added up.  Note that I alter LeastWord->SearchIndex in
X	 * one of the two loops that follow, so when WordPlaces from that
X	 * word are considered, we don't have to look at any twice.
X	 *
X	 * In order to do better, one would have to be able to avoid
X	 * looking at some or (better!) most of the WordPlaces.
X	 *
X	 * For example, not fetching so many from disk:
X	 * if we didn't do the fetches until we needed to, and we gave
X	 * GetWordPlaces a minimum FID to look for, we might be able
X	 * to reduce things by (say) 15%.
X	 * If all of the FIDS were stored separately, we would not
X	 * have to look at the (Block, Word, Flags, StuffBefore) stuff at
X	 * all, and that would be much faster.  One way to do that might be
X	 * to store the list of FIDs with the word (as now), and perhaps
X	 * some flags and the count of words/fid, and to store the rest
X	 * in a per-file data structure.
X	 *
X	 * That would be a major, major hack...
X	 * 					... sigh.o
X	 *
X	 */
X
X	while (LeastWord->Word->WordPlaces[LeastWord->SearchIndex].FID <
X		    Phrase->Words->Word->WordPlaces[PIFB].FID) {
X	    if (++(LeastWord->SearchIndex) >=
X					LeastWord->Word->NumberOfWordPlaces) {
X		goto GiveUp;
X	    }
X	}
X
X	while (Phrase->Words->Word->WordPlaces[PIFB].FID <
X		LeastWord->Word->WordPlaces[LeastWord->SearchIndex].FID) {
X	    if (++PIFB >= Phrase->Words->Word->NumberOfWordPlaces) {
X		goto GiveUp;
X	    }
X	}
X
X	/* The following comment tells Sabre_C not to moan about "if (0)" */
X	/*SUPPRESS558*/
X	if (0) {
XGiveUp:
X	    break;
X	}
X	/* end of attempted speed improvement */
X
X	/* Optimistically allocate a new match: */
X	if (1 || Result != LastResult) {
X	    *MLPP = (t_MatchList *) emalloc(sizeof(t_MatchList));
X	    (*MLPP)->Match = (t_Match *) 0;
X	    OldMatch = MPP = &((*MLPP)->Match);
X	    MLPP = &(*MLPP)->Next;
X	    *MLPP = (t_MatchList *) 0;
X	}
X	LastResult = Result;
X
X	pp = &Phrase->Words->Word->WordPlaces[Phrase->Words->SearchIndex = PIFB];
X	/* When we have a partially completed match,
X	 * FOP (declared below) will point to the WordPlace currently
X	 * being considered to see if it extends the partial match;
X	 * LastFOP points to the previous WordPlace in the match.
X	 */
X
X	/* For each word in the phrase: */
X	for (Word = Phrase->Words; Word; Word = Word->Next) {
X	    int GotOne = 0;
X
X	    /* Ensure that the matches word have been read */
X	    if (Word->Word->WordPlacesInHere <
X			    Word->Word->NumberOfWordPlaces) {
X		extern t_WordPlace *GetWordPlaces();
X		t_WordInfo *W = Word->Word; /* less indirection! */
X
X		if (W->WordPlaces) {
X		    (void) efree(W->WordPlaces);
X		}
X		W->WordPlaces = GetWordPlaces(
X		    W->WID,
X		    W->WordPlaceStart,
X		    (unsigned) WIDBLOCKSIZE - (W->WordPlaceStart - W->DataBlock),
X		    W->Offset,
X		    W->NumberOfWordPlaces
X		);
X		W->WordPlacesInHere = W->NumberOfWordPlaces;
X	    }
X
X	    /* For each occurrence of that word: */
X	    for (; Word->SearchIndex < Word->Word->NumberOfWordPlaces;
X							++Word->SearchIndex) {
X		register t_WordPlace *FOP =
X			&Word->Word->WordPlaces[Word->SearchIndex];
X		
X#if 0
X		/* Speedup -- binary search to find next candidate...
X		 * this is commented out because it actually seems to
X		 * make things run slower!
X		 */
X		{
X		    int low = Word->SearchIndex;
X		    int high = Word->Word->NumberOfWordPlaces - 1;
X		    t_WordPlace *Places = Word->Word->WordPlaces;
X		    int guess = (high + low) / 2;
X
X		    while (low < high) {
X			if (Places[guess].FID < pp->FID) {
X			    /* not gone far enough */
X			    low = guess + 1;
X			} else {
X			    high = guess;
X			}
X			guess = (high + low) / 2;
X		    }
X		    if (guess != Word->SearchIndex) {
X			Word->SearchIndex = guess;
X			FOP = &Word->Word->WordPlaces[Word->SearchIndex];
X		    }
X		}
X#endif
X
X		if (!LastFOP) {
X		    LastFOP = FOP;
X		}
X
X		/** So:
X		 ** | int PIFB = each match in the first word in the phrase
X		 ** | t_WordPlace *pp = each match in the phrase
X		 **    | t_PhraseItem *Word = each word in the phrase
X		 **	   | unsigned SearchIndex = each match of that word
X		 **	   | t_WordPlace *FOP = each occurrence of that word
X		 **
X		 ** Hence, we are comparing pp and FOP, hoping that each time
X		 ** round the (Word) loop we will advance FOP.
X		 ** Once we have decided that FOP and pp relate to the
X		 ** same file and that FOP is no earlier than pp in the
X		 ** file, we must then check that FOP is advancing the
X		 ** chain by comparing it to the previous element in the
X		 ** list (LastFOP).
X		 **
X		 ** When we break from this inner list, we must either have
X		 ** eliminated this particular (PIFB) as starting a match-
X		 ** chain, or have decided that we have extended the
X		 ** current match chain (by setting GotOne).
X		 **/
X
X
X		if (LastFOP == FOP) {
X		    HowGood = CheckFlags(Word->Word, FOP);
X		} else {
X		    HowGood = ContinuesMatch(Word->Word, pp, LastFOP, FOP);
X		}
X
X		switch (HowGood) {
X		case 0:
X		    /* G O T C H A !!!! */
X		    /* extend the HitList, since it's OK so far. */
X
X		    *MPP = (t_Match *) emalloc(sizeof(t_Match));
X		    (*MPP)->WID = Word->Word->WID;
X		    (*MPP)->Where = FOP;
X		    (*MPP)->Next = (t_Match *) 0;
X		    MPP = &(*MPP)->Next;
X		    GotOne++;
X		    break;
X		case 1: /* gone too far */
X		    if (AsciiTrace > 10) {
X			t_WordInfo WW;
X			
X			WW = *(Word->Word);
X
X			if (LastFOP == FOP) {
X			    /* UnFlag() returns a pointer to a static buffer,
X			     * so I have to use two printf() calls here.
X			     */
X			    fprintf(stderr, "Reject(%s (%d) != ",
X				    UnFlag(&WW, WW.WordPlace.Flags),
X				    WW.WordPlace.Flags);
X			    fprintf(stderr, "%s (%d)) [flags]\n",
X				    UnFlag(&WW, FOP->Flags), FOP->Flags);
X			} else {
X			    fprintf(stderr, "Reject(%s) -- too far\n",
X				UnFlag(&WW, WW.WordPlace.Flags));
X			}
X		    }
X		    break;
X		case -1:
X		    continue; /* not there yet */
X		default:
X		    fprintf(stderr, "\n\rInternal Error %s: %d\n", __FILE__,
X							__LINE__ - 1);
X		    (void) sleep(4); /* for curses stuff... */
X		    exit(1);
X		}
X
X		/* Remember where we got up to... so that we can extend
X		 * the list when we start looking at the next word.
X		 */
X		LastFOP = FOP;
X
X		if (AsciiTrace >= 4) {
X		    t_WordInfo WW;
X		    
X		    WW = *(Word->Word);
X		    /* UnFlag() returns a pointer to a static buffer */
X		    fprintf(stderr, "Partial match %s",
X				UnFlag(&WW, Word->Word->WordPlace.Flags));
X		    fprintf(stderr, "(Word (%s,%lu,%u) in file %lu)\n",
X				    UnFlag(&WW, FOP->Flags),
X				    FOP->BlockInFile, FOP->WordInBlock,
X				    FOP->FID
X		    );
X		}
X		/* If we got to here, we extended the list, which is fine;
X		 * otherwise, if we hit a continue, we try to carry on
X		 * looking at matches of this word, and if we hit a break
X		 * before we set "GotOne", we give up on this match
X		 * altogether.
X		 */
X		break;
X	    } /* For each occurrence of that word: */
X
X	    if (!GotOne) {
X		t_Match *MP;
X		/* This word isn't here, so neither is the phrase found
X		 * in this file starting here.
X		 */
X
X		for (MP = (*OldMatch); MP != (t_Match *) 0; /*void*/) {
X		    t_Match *Next = MP->Next;
X
X		    efree((char *) MP);
X		    MP = Next;
X		}
X
X		*OldMatch = (t_Match *) 0;
X		break;
X	    } else {
X		/* If we've reached the end of the phrase, i.e. if
X		 * Word->Next is zero, we have successfully added a new
X		 * phrase!
X		 */
X		if (Word->Next == (t_PhraseItem *) 0) {
X		    if (AsciiTrace > 10) {
X			fprintf(stderr, "Result now %d\n", Result + 1);
X		    }
X		    Result++;
X		}
X	    }
X
X	} /* end for (each word in the phrase) */
X    } /* end (for each FID/Offset pair in the first word */
X    return Phrase->NumberOfMatches = Result;
X}
X
X
Xstatic int
XContinuesMatch(QueryWord, First, Prev, New)
X    t_WordInfo *QueryWord;
X    t_WordPlace *First;
X    t_WordPlace *Prev;
X    t_WordPlace *New;
X{
X    /* Return Value is
X     *  -1 --- if New occurs before Prev (and thus isn't part of the match)
X     *  0  --- if it's the next word in the match
X     *  +1 --- if we've gone past it
X     * Note: you can use these values in a switch() if you want.
X     */
X
X    /* First check we are looking at the right file:
X     * Have we gone far enough?
X     */
X    if (New->FID < First->FID) {
X	return -1; /* not far enough */
X    } else if (New->FID > First->FID) {
X	return 1; /* too far */
X    } else if (Prev == New) {
X	return 0;
X    }
X
X    /* Hey everybody, they're the same!
X     * That means that this might be a candidate for a MATCH!!!!
X     */
X    
X    /* if (SimplyAnywhereWillDo) { OK; break; } */
X
X    /* Clearly later words in the phrase can't be in earlier
X     * blocks...
X     */
X    if (New->BlockInFile < First->BlockInFile) {
X	/* Although we are in the right file, we have not
X	 * yet reached the correct offset.
X	 */
X	return -1;
X    } 
X
X    /* If we get to here,
X     * . we are in the right file
X     * . we are at least as far into the file as the start
X     *   of the phrase
X     */
X
X    /* Now check that we are a reasonable distance past
X     * the preceding word (checking that we are not on the first
X     * match in the list, of course):
X     */
X    if (New->BlockInFile < Prev->BlockInFile) {
X	/* not gone far enough */
X	return -1;
X    }
X    if (New->BlockInFile > Prev->BlockInFile + 1) {
X	/* If they are more than one block apart, I
X	 * don't believe them to be part of a phrase!
X	 */
X	return 1;
X    }
X    if (New->BlockInFile == Prev->BlockInFile) {
X	/* If they are in the same block, one must be
X	 * exactly one word beyond the other.  I don't
X	 * think they can ever be the same, unless there
X	 * is a serious bug somewhere!
X	 */
X	if (New->WordInBlock <= Prev->WordInBlock) {
X	    /* too early in the block */
X	    return -1;
X	}
X	switch (PhraseMatchLevel) {
X	case PCM_AnyCase:
X	    if (New->WordInBlock > Prev->WordInBlock + 4) {
X		return 1; /* gone too far */
X		/* We allow a few words slop in this case, though... */
X	    }
X	    break; /* clearly OK */
X	case PCM_SameCase:
X	case PCM_HalfCase:
X	default:
X	    if (New->WordInBlock > Prev->WordInBlock + 1) {
X		return 1; /* gone too far */
X	    }
X	}
X    } else {
X	/* they are in adjacent blocks */
X	if (New->WordInBlock > 0 ||
X			!(Prev->Flags & WPF_LASTINBLOCK)) {
X	    /* there is another word between them, so
X	     * we have gone too far.
X	     * I went to a lot of effort in addfile.c to
X	     * mantain that flag, just for this!
X	     */
X	    return 1;
X	}
X    }
X    /* So they are adjacent words.
X     * Now, I wonder if they are plausible distances
X     * apart, and whether the common words skipped are
X     * the same?
X     * Also, what about other flag details?
X     */
X    
X    /* NOTDONE */
X    
X    /* Now we check that the word matches the given word
X     * -- in other words, that possessive/plural/case
X     *    is correct if required.  Do this later as it is
X     *    relatively expensive I expect, and we will not
X     *    usually care about case.
X     *
X     * Since the word is in the right place, if it fails here there
X     * is no point in looking at the next word in this block!
X     */
X
X    return CheckFlags(QueryWord, New);
X}
X
XCheckFlags(QueryWord, New)
X    t_WordInfo *QueryWord;
X    t_WordPlace *New;
X{
X    /* First check case */
X    switch (PhraseMatchLevel) {
X
X    default: /* defensive! */
X	fprintf(stderr, "\n\rinternal error %d %s\n", __LINE__, __FILE__);
X	(void) sleep(4);
X	break;
X    case PCM_AnyCase:
X	break; /* clearly OK */
X    case PCM_SameCase:
X	if ((QueryWord->WordPlace.Flags & (WPF_UPPERCASE|WPF_POSSESSIVE)) != 
X			(New->Flags & (WPF_UPPERCASE|WPF_POSSESSIVE))) {
X	    /* The cases are different, no good */
X	    return 1; /* give up on this match */
X	}
X	if (QueryWord->WordPlace.StuffBefore > 0) {
X	    int Difference;
X
X	    Difference = QueryWord->WordPlace.StuffBefore - New->StuffBefore;
X	    if (Difference < -2 || Difference > 4) {
X		return 1; /* give up on this match */
X	    }
X	}
X	/* Now, what about skipped common words? */
X	if ((New->Flags & WPF_LASTWASCOMMON) !=
X		    (QueryWord->WordPlace.Flags & WPF_LASTWASCOMMON)) {
X	    return 1; /* give up on this match */
X	}
X
X	/* plurals: this should be separate */
X	if ((QueryWord->WordPlace.Flags & WPF_WASPLURAL) &&
X				!(New->Flags & WPF_WASPLURAL)) {
X	    return 1; /* give up on this match */
X	}
X
X	/* Only do this test if we are being awfully strict.
X	 * Remember also that the first word in the phrase will
X	 * not usually have this set.
X	 */
X	if ((QueryWord->WordPlace.Flags & WPF_LASTHADLETTERS) &&
X			    !(New->Flags & WPF_LASTHADLETTERS)) {
X	    return 1; /* give up on this match */
X	}
X	break;
X    case PCM_HalfCase:
X	/* In this case, we are lax about things, but if the
X	 * user typed  plural/possessive/capital, we only
X	 * match one with the same attribute.
X	 */
X	if ((QueryWord->WordPlace.Flags & WPF_UPPERCASE) &&
X				    !(New->Flags & WPF_UPPERCASE)) {
X	    if (AsciiTrace > 4) {
X		fprintf(stderr, "Reject [uppercase]\n");
X	    }
X	    return 1; /* give up on this match */
X	}
X
X	/* plurals: this should be separate */
X	if ((New->Flags & WPF_WASPLURAL) &&
X		  !(QueryWord->WordPlace.Flags & WPF_WASPLURAL)) {
X	    if (AsciiTrace > 4) {
X		fprintf(stderr, "Reject [plural]\n");
X	    }
X	    return 1; /* give up on this match */
X	}
X
X	/* Now, what about skipped common words? */
X	if ((QueryWord->WordPlace.Flags & WPF_LASTWASCOMMON) &&
X				!(New->Flags & WPF_LASTWASCOMMON)) {
X	    if (AsciiTrace > 4) {
X		fprintf(stderr, "Reject [last was common]\n");
X	    }
X	    return 1; /* give up on this match */
X	}
X	/* Stuff before, if given, must be present: */
X	if (QueryWord->WordPlace.StuffBefore > 1) {
X	    if (New->StuffBefore < QueryWord->WordPlace.StuffBefore - 1) {
X		if (AsciiTrace > 4) {
X		    fprintf(stderr, "Reject [Stuff Before %d != Q%d]\n",
X			    QueryWord->WordPlace.StuffBefore,
X			    New->StuffBefore);
X		}
X		return 1;
X	    } /* don't care if there is too much there, though */
X	}
X	if ((QueryWord->WordPlace.Flags & WPF_POSSESSIVE) &&
X				   !(New->Flags & WPF_POSSESSIVE)) {
X	    if (AsciiTrace > 4) {
X		fprintf(stderr, "Reject [user flag]\n");
X	    }
X	    return 1; /* give up on this match */
X	}
X	break;
X    }
X
X    /* If we got here...
X     *
X     */
X    
X    return 0; /* It's all OK! */
X}
@@@End of lq-text/src/liblqtext/Phrase.c
echo x - lq-text/src/liblqtext/Root.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/Root.c <<'@@@End of lq-text/src/liblqtext/Root.c'
X/* Root.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/*
X * $Id: Root.c,v 2.7 91/03/03 00:13:36 lee Rel1-10 $
X *
X * $Log:	Root.c,v $
X * Revision 2.7  91/03/03  00:13:36  lee
X * cosmetic changes.
X * 
X * Revision 2.6  90/10/06  00:11:59  lee
X * Prepared for first beta release.
X * 
X * Revision 2.5  90/08/29  21:46:42  lee
X * Alpha release.
X * 
X * Revision 2.4  90/08/09  19:16:29  lee
X * BSD lint and fixes...
X * 
X * Revision 2.3  90/03/29  23:00:04  lee
X * Now passes gcc -Wall
X * 
X * Revision 2.2  89/10/08  20:44:56  lee
X * Working version of nx-text engine.  Addfile and wordinfo work OK.
X * 
X * Revision 2.1  89/10/02  01:13:07  lee
X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
X * 
X *
X */
X
X#include "globals.h" /* defines and declarations for database filenames */
X
X#include <sys/types.h>
X#include <fcntl.h> /* for my header files, sorry */
X#include <stdio.h>
X#include <malloc.h>
X#include <ctype.h>
X
X#include "fileinfo.h"
X#include "wordinfo.h"
X#include "wordrules.h"
X#include "emalloc.h"
X
X/** Unix system calls that need to be declared: **/
X    /* (none) */
X/** C Library functions that nees to be declared: **/
Xextern void perror();
Xextern int strcmp();
Xextern int strlen();
Xextern char *strcpy();
Xextern char *strcat();
X#ifndef tolower
X extern int toupper();
X#endif
X
X/** lqtext functions that need to be declared: **/
X/** Functions from this file that need to be declared: **/
Xvoid InsertCommonWord();
X/** **/
X
X/** Useful macros **/
X#define new(type) ((type *) emalloc(sizeof(type)))
X	/* so you can say
X	 * struct foo *x = enew(struct foo)
X	 */
X
X#define STRCMP(s1,s2) ((*(s1) == *(s2)) ? strcmp(s1, s2) : *(s1) - *(s2))
X	/* faster then strcmp in the (common) case where the
X	 * strings differ at the first character.
X	 * From an idea by Henry Spencer (utzoo!henry)
X	 */
X
X/** **/
X
Xextern int AsciiTrace;
X
X/* This routine is only sensible for English (although it could be
X * modified...), but that does not matter.
X */
Xchar *
XWordRoot(WordInfo)
X    t_WordInfo *WordInfo;
X{
X    char *Word;
X
X    if (!WordInfo) return "@#!!";
X
X    Word = WordInfo->Word;
X
X    if (!Word) {
X	return "oh dear";
X    }
X
X    if (!*Word) {
X	return Word;
X    }
X
X    /** delete trailing <'s> and mark posessive */
X    while (WordInfo->Length >= 3 && Word[WordInfo->Length - 1] == 's' &&
X				    Word[WordInfo->Length - 2] == '\'') {
X	WordInfo->Length -= 2;
X	Word[WordInfo->Length] = '\0';
X	WordInfo->WordPlace.Flags |= WPF_POSSESSIVE;
X    }
X
X    /** delete trailing plural suffix and mark plural */
X
X    /* It's important to realise that the purpose of this routine is not
X     * in any way to reduce a word to an etymological root.  In other words,
X     * no attempt is made to differentiate between plurals and present
X     * participles, or words that simply happen to end in `s'.
X     * Hence, elephants, blunderbus, hostess, runs and tomatoes are all
X     * candidates.  Of course, one would like to do as well as one can!
X     * Again, the object isn't to derive the correct singular, but instead
X     * to be fairly fast, and, above all, to ensure that any transformations
X     * are reversible!
X     *
X     * The result is that I can store dog and dogs in the same Wordinfo
X     * chain.  In the case that either word is unusual, there is a space
X     * saving of (on average) 30 or so bytes.  More usefully, if you ask
X     * for `Window', I will automatically find `Windows' as well.
X     *
X     * so...
X     * XXXo, XXXss, XXXsh, XXXch, XXXx --> +es
X     * 	except: pianos, dynamos, photos
X     * XXCy --> XXCies [ C consonant]
X     * XXVy --> XXVys [ V vowel ]
X     * f or fe --> ves (12 cases only)
X     * vowel change:
X     * foot/feet (why bother with these? -- use a thesaurus!)
X     * need to keep penny/pence separate
X     * See Thomson & Martinet, section 8ff (I think)
X     */
X    if (WordInfo->Length > 2 && Word[WordInfo->Length - 1] == 's') {
X	WordInfo->WordPlace.Flags |= WPF_WASPLURAL; /* WRONG */
X	switch (Word[WordInfo->Length - 2]) {
X	case 'e':
X	    if (WordInfo->Length >= 3) switch (Word[WordInfo->Length - 3]) {
X	    case 'i': /* xxcies --> xxxy */
X		if (WordInfo->Length > 3) {
X		    Word[WordInfo->Length - 3] = 'y';
X		    WordInfo->Length -= 2;
X		} else { /* ies not a plural, but lies is :-) */
X		    WordInfo->Length--; /* just the s */
X		}
X		break;
X	    case 's':
X	    case 'h':
X	    case 'x':
X	    case 'o': /* xxxoes --> xxx */
X		WordInfo->Length -= 2;
X		break;
X	    default: /* xxxes -> xxxe */
X		WordInfo->Length -= 1;
X		break;
X	    } else { /* too short */
X		WordInfo->WordPlace.Flags &=
X			(unsigned short)~(unsigned short)WPF_WASPLURAL;
X	    }
X	    break;
X	case 'y': /* xxxvys --> xxxvy */
X	    switch (Word[WordInfo->Length - 2]) { /* e.g. holidays */
X	    case 'a': /* flays */
X	    case 'e': /* beys */
X	    case 'i': /* ??iys?? */
X	    case 'o': /* boys */
X	    case 'u': /* guys */
X		WordInfo->Length--; /* just remove the s */
X		break;
X	    default: /*not a plural, e.g. Unixsys, happy */
X		WordInfo->WordPlace.Flags &=
X			(unsigned short)~(unsigned short)WPF_WASPLURAL;
X		break;
X	    }
X	    break;
X	case 's': /* trailing ss doesn't mark a plural! */
X	    WordInfo->WordPlace.Flags &=
X			    (unsigned short)~(unsigned short)WPF_WASPLURAL;
X	    break;
X	case 'u':
X	    /* ONE bus, thus, omnibus; TWO gnus, TWO emus */
X	    /* So it doesn't work for gnus and emus right now! */
X	    WordInfo->WordPlace.Flags &=
X		    (unsigned short)~(unsigned short)WPF_WASPLURAL;
X	    break;
X	case 'i': /* not a plural.. this, his, fleur-de-lis */
X	    WordInfo->WordPlace.Flags &=
X		    (unsigned short)~(unsigned short)WPF_WASPLURAL;
X	    break;
X	case 'a': /* has */
X	case 'o': /* cos */
X	    if (WordInfo->Length < 4) {
X		WordInfo->WordPlace.Flags &=
X			(unsigned short)~(unsigned short)WPF_WASPLURAL;
X		break;
X	    }
X	    /* else fall through */
X	default: /* just plain s */
X	    WordInfo->Length -= 1;
X	    break;
X	}
X	Word[WordInfo->Length] = '\0';
X    } 
X    /* Should check for ii --> ius here, but that would increase the length
X     * of the word and therefore will break lots of things.
X     */
X    return WordInfo->Word;
X}
X
Xchar *
XUnFlag(WordInfo, Flags)
X    t_WordInfo *WordInfo;
X    unsigned int Flags;
X{
X    static char Buffer[MaxWordLength + 5]; /* 's + es + \0 */
X    register char *p, *q;
X    int Length;
X
X    if (!WordInfo) return "(null word info)";
X    if (!WordInfo->Word) return "(null word)";
X    if (!WordInfo->Word[0]) return "(empty word)";
X
X    p = Buffer;
X    q = WordInfo->Word;
X    while (*p++ = *q++)
X	;
X    *p = '\0';
X    
X    if ((Length = p - Buffer) != WordInfo->Length) {
X	/* Well, maybe I can't count */
X	WordInfo->Length = Length = strlen(Buffer);
X    }
X
X    if (Flags & WPF_WASPLURAL) {
X	if (Length >= 2) switch (Buffer[Length - 1]) {
X	case 'y':
X	    if (Length > 2) switch (Buffer[Length - 2]) {
X	    case 'a':
X	    case 'e':
X	    case 'i':
X	    case 'o':
X	    case 'u':
X		Buffer[Length++] = 's'; /* e.g. days */
X		break;
X	    default:
X		strcpy(&Buffer[Length - 1], "ies"); /* ladies */
X		Length += 2;
X	    }
X	    break;
X	case 's':
X	    if (Length > 2) if (Buffer[Length - 2] == 'u') {
X		strcpy(&Buffer[Length - 1], "ii"); /* Genii */
X		break;
X	    } /* else fall through... */
X	case 'o':
X	case 'h':
X	case 'x':
X	    strcat(Buffer, "es");
X	    Length += 2;
X	    break;
X	default:
X	    Buffer[Length++] = 's';
X	}
X	Buffer[Length] = '\0';
X    }
X
X    if (Flags & WPF_POSSESSIVE) {
X	Buffer[Length++] = '\'';
X	Buffer[Length++] = 's';
X	Buffer[Length] = '\0';
X    }
X
X    if (Flags & WPF_UPPERCASE) {
X	Buffer[0] = toupper(Buffer[0]);
X    }
X
X    return Buffer;
X}
X
Xtypedef struct s_WordList {
X    char *Word;
X    unsigned short Flags;
X    struct s_WordList *Next;
X} t_WordList;
X
Xstatic t_WordList *CommonWords = 0;
X
Xint
XTooCommon(WordInfo)
X    t_WordInfo *WordInfo;
X{
X    register char *Word = WordInfo->Word;
X    register t_WordList **WP;
X
X    for (WP = &CommonWords; *WP; WP = &(*WP)->Next) {
X	int i = STRCMP((*WP)->Word, Word);
X
X	if (i == 0) return 1; /* yes, it's common */
X	else if (i > 0) return 0;
X    }
X    return 0;
X}
X
Xstatic char *FileName = "Internal Error";
X/* should be set before being printed! */
X
Xint
XReadCommonWords(CommonFile)
X    char *CommonFile;
X{
X    extern char *fgets();
X    extern int AsciiTrace;
X
X    FILE *fd;
X    extern int errno;
X    char Buffer[200];
X    t_WordInfo W;
X    char *Root;
X    t_WordList *WP;
X
X    errno = 0;
X
X    if ((fd = fopen(CommonFile, "r")) == (FILE *) 0) {
X	int e = errno;
X
X	fprintf(stderr, "Can't open common word list ");
X	errno = e;
X	perror(CommonFile);
X	return -1;
X    }
X
X    FileName = CommonFile;
X
X    while (fgets(Buffer, sizeof(Buffer), fd) != (char *) 0) {
X	register char *p;
X	char *Start;
X
X	for (p = Buffer; *p; p++) {
X	    if (*p == '#') break;
X	    if (StartsWord(*p)) break;
X	}
X
X	if (*p == '#' || !*p) {
X	    continue;
X	}
X
X	Start = p;
X
X	for (; *p; p++) {
X	    if (!WithinWord(*p)) break;
X	    if (*p == '\'' && !WithinWord(p[1])) break;
X	}
X
X	if (p - Start + 1 < MinWordLength) continue;
X
X	*p = '\0'; /* delete trailing \n or whatever */
X	W.WordPlace.Flags = 0;
X	W.Word = Start;
X	W.Length = p - Start; /* length excludes the \0 */
X
X	Root = WordRoot(&W);
X	InsertCommonWord(Root, W.WordPlace.Flags);
X    }
X    (void) fclose(fd);
X
X#if 0
X    if (!CommonWords) {
X	fprintf(stderr, "No common words found in file \"%s\"\n", FileName);
X	exit(1);
X    }
X#endif
X
X    if (AsciiTrace > 1) {
X	for (WP = CommonWords; WP; WP = WP->Next) {
X	    fprintf(stderr, "Ignore: \"%s\"\n", WP->Word);
X	}
X    }
X    FileName = "Internal Error";
X    return 0;
X}
X
Xvoid
XInsertCommonWord(Root, Flags)
X    char *Root;
X    unsigned int Flags;
X{
X    register t_WordList **WP;
X    t_WordList *W;
X
X    for (WP = &CommonWords; *WP; WP = &(*WP)->Next) {
X	int i = STRCMP((*WP)->Word, Root);
X
X	if (i == 0) return;
X	else if (i > 0) break;
X    }
X    /* insert it before this one! */
X    W = (*WP);
X    (*WP) = (t_WordList *) emalloc(sizeof(t_WordList));
X    (*WP)->Word = emalloc(strlen(Root) + 1);
X    (void) strcpy((*WP)->Word, Root);
X    (*WP)->Flags = Flags;
X    (*WP)->Next = W;
X    return;
X}
@@@End of lq-text/src/liblqtext/Root.c
echo x - lq-text/src/liblqtext/WordInfo.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/WordInfo.c <<'@@@End of lq-text/src/liblqtext/WordInfo.c'
X/* WordInfo.c -- Copyright 1989 Liam R. Quin.  All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/* WordInfo.c -- handle the database of words for NX-Text.
X * 
X * NX-Text keeps a master list of all of the words that have ever been
X * seen.  Currently, this is in dbm format (sdbm or ndbm).
X * For each word, there's an associated WID (a unique number), an offset
X * into the master database (see pblock.c), and possibly thesaurus info.
X *
X * $Id: WordInfo.c,v 2.11 90/10/13 03:10:07 lee Rel1-10 $
X *
X * $Log:	WordInfo.c,v $
X * Revision 2.11  90/10/13  03:10:07  lee
X * Type error -- efree() needs a char *.
X * 
X * Revision 2.10  90/10/06  00:12:01  lee
X * Prepared for first beta release.
X * 
X * Revision 2.9  90/09/29  23:47:30  lee
X * Reduced the size of a buffer, and plugged yet another memory leak!
X * 
X * Revision 2.8  90/09/10  13:38:50  lee
X * deleted declaration of sleep()
X * 
X * Revision 2.7  90/08/29  21:46:48  lee
X * Alpha release.
X * 
X * Revision 2.6  90/08/12  17:33:38  lee
X * malloc changes; added SlayWordInfo() and MakeWordInfo().
X * 
X * Revision 2.5  90/08/09  19:16:35  lee
X * BSD lint and fixes...
X * 
X * Revision 2.4  90/03/22  14:23:19  lee
X * new calls to efree();
X * Offset now stored as a block number, not a byte offset
X * 
X * Revision 2.3  90/03/21  14:59:13  lee
X * Numerous changes.  WID2WordInfo() no longer calles GetWordPlaces().
X * 
X * Revision 2.2  89/10/08  20:45:05  lee
X * Working version of nx-text engine.  Addfile and wordinfo work OK.
X * 
X * Revision 2.1  89/10/02  01:13:56  lee
X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
X * 
X * Revision 1.4  89/09/17  23:01:53  lee
X * Various fixes; NumberInBlock now a short...
X * 
X * Revision 1.3  89/09/16  21:16:07  lee
X * First demonstratable version.
X * 
X * Revision 1.2  89/09/11  00:35:03  lee
X * Some speedups, but WID not working properly...
X * 
X * Revision 1.1  89/09/07  21:05:51  lee
X * Initial revision
X * 
X *
X */
X
X#include "globals.h" /* defines and declarations for database filenames */
X
X#include <errno.h>
X#include <fcntl.h>
X#include <malloc.h>
X#include <signal.h>
X#include <stdio.h>
X#include <string.h>
X#include <ctype.h>
X#include <sys/types.h>
X#include <sys/stat.h>
X#include <unistd.h>
X
X#include "fileinfo.h"
X#include "smalldb.h"
X#include "wordindex.h"
X#include "wordinfo.h"
X#include "numbers.h"
X
X#include "emalloc.h"
X
X#include "wordrules.h" /* max word length */
X
X#include "pblock.h"
X
X/** declarations: **/
X/** Unix system calls that need to be declared: **/
Xextern int open(), close(); /* (these are not the stdio fopen and fclose) */
Xextern int creat();
Xextern void exit();
Xextern long lseek(); /* watch out for this on 16 bit (286, PDP11) systems! */
Xextern int read(), write();
Xextern int stat();
Xextern unsigned alarm(/* unsigned */);
X
X/** Unix Library Calls that need to be declared: **/
X#ifndef tolower /* e.g. on SunOS */
Xextern int tolower();
X#endif
Xextern void perror();
X/* extern int sleep(); -- this is unsigned on some systems */
Xextern int lockf();
X/** lqtext Library calls that need to be declared: **/
Xextern void Deletepblock();
X
X/** Functions within this file that need to be declared: **/
Xt_WordInfo *MakeWordInfo();
Xvoid SlayWordInfo();
X
X/** **/
X
Xextern int AsciiTrace;
Xextern char *progname;
X
X#define new(type) ( ((type) *) emalloc(sizeof(type)) )
X
X/* Format when using ndbm: */
X
Xtypedef struct {
X    t_WID WID;
X    unsigned long Offset; /* position in the database */
X    unsigned long NumberOfWordPlaces;
X    char Word[1]; /* Cheat here */
X} t_WordIndexEntry;
X
X/* Replacement fomat, intended to be faster and to use much less disk space.
X * Still use dbm for Word2WID, but not for the reverse mapping.
X * Also, cache the most recently-used WordInfo entry...
X * I have not measured how much of a win this is, but a lot of the code
X * calls Word2Wid() and then WID2WordInfo().
X */
X
Xstatic int Widfd = (-1);
X
Xt_WordInfo *
XWID2WordInfo(WID)
X    t_WID WID;
X{
X    extern t_WordPlace *GetWordPlaces(); /* pblock.c */
X
X    char Buffer[WIDBLOCKSIZE + 5]; /* the +5 allows for overrun... */
X    char *q = Buffer;
X    t_WordInfo *WP;
X
X    /* The above calculation is derived like this:
X     *
X     * The entry contains the total number of pairs (>= 1 byte),
X     * the length of the string (>= 1 byte), and the string (>= 3 bytes)
X     * (actually >= MinWordLength bytes, but setting this less than 3
X     * would be a major disaster!)
X     * Hence, there are WIDBLOCKSIZE - (2 + length) bytes left for pairs.
X     * Now, each pair is at least 2 bytes, so halve the remainder.  I add
X     * one coz WIDBLOCKSIZE - 3 is odd, so I would otherwise lose one!
X     */
X
X    if (Widfd < 0) {
X	if ((Widfd = open(WidIndexFile, O_RDWR|O_CREAT, 0766)) < 0) {
X	    fprintf(stderr, "Can't open WID file \"%s\"\n", WidIndexFile);
X	    exit(1);
X	}
X    }
X
X    if (lseek(Widfd, (long) (WID * WIDBLOCKSIZE), 0) < 0) {
X	return (t_WordInfo *) 0;
X    }
X
X    if (read(Widfd, Buffer, WIDBLOCKSIZE) != WIDBLOCKSIZE) {
X	return (t_WordInfo *) 0;
X    }
X
X    {
X	unsigned short L;
X
X	if ((L = sReadNumber(&q)) == 0) {
X	    (void) fprintf(stderr,
X		    "%s: Database corrupt, WID %lu has length zero\n",
X		    progname, WID);
X	    return (t_WordInfo *) 0;
X	}
X	WP = MakeWordInfo(WID, (int) L, q);
X	q += L;
X    }
X
X    WP->Offset = (sReadNumber(&q) >> 1) * BLOCKSIZE;
X    WP->NumberOfWordPlaces = sReadNumber(&q);
X
X    /* Now, maybe read some WordPlace tuplets: */
X#if 1
X    if (q - Buffer < WIDBLOCKSIZE) {
X	WP->WordPlaces = GetWordPlaces(
X	    WP->WID,
X	    q,
X	    WIDBLOCKSIZE - (q - Buffer),
X	    WP->Offset,
X	    WP->NumberOfWordPlaces
X	);
X	WP->WordPlacesInHere = WP->NumberOfWordPlaces;
X    } else {
X	fprintf(stderr, "%s: Internal error, block too small for %ld (%s)\n",
X		progname, WP->WID, WP->Word);
X	exit(1);
X    }
X
X#else
X    WP->WordPlaces = (t_WordPlace *) 0;
X    if (q - Buffer < WIDBLOCKSIZE) {
X	WP->DataBlock = emalloc(WIDBLOCKSIZE + 5);
X	(void) memcpy(WP->DataBlock, Buffer, WIDBLOCKSIZE);
X	WP->WordPlaceStart = &(WP->DataBlock[q - Buffer]);
X    }
X#endif
X
X    /* done! */
X    return WP;
X}
X
Xstatic char PairBuffer[WIDBLOCKSIZE + 5]; /* the +5 allows for overrun... */
X
X/* Make WordInfo Block Header... */
Xvoid
XMkWIBH(WordInfo, pblock)
X    t_WordInfo *WordInfo;
X    t_pblock *pblock;
X{
X    char *q = PairBuffer;
X
X#ifdef ASCIITRACE
X    if (AsciiTrace > 15) {
X	fprintf(stderr, "\tMake info block header for %s, Offset %lu==%lu\n",
X	WordInfo->Word, pblock->ChainStart, WordInfo->Offset);
X    }
X#endif
X
X    sWriteNumber(&q, WordInfo->Length);
X    (void) strncpy(q, WordInfo->Word, WordInfo->Length);
X    q += WordInfo->Length;
X    if (pblock) sWriteNumber(&q, (pblock->ChainStart / BLOCKSIZE) << 1);
X    else sWriteNumber(&q, 0L);
X    sWriteNumber(&q, WordInfo->NumberOfWordPlaces);
X
X    WordInfo->WordPlaceStart = q;
X    WordInfo->DataBlock = PairBuffer;
X}
X
X/* Make WordInfo Block ... */
Xint
XMkWIB(WordInfo, pblock)
X    t_WordInfo *WordInfo;
X    t_pblock *pblock;
X{
X    extern unsigned short PutWordPlaces();
X
X    /* See how many pairs from the given pblock fit into WordInfo,
X     * and leave them in PairBuffer...
X     */
X
X    if (AsciiTrace > 3) {
X	fprintf(stderr, "Make info block for %s\n", WordInfo->Word);
X    }
X
X    MkWIBH(WordInfo, pblock);
X
X    if (pblock == (t_pblock *) 0) {
X	/* No WordPlaces to put in! */
X	WordInfo->WordPlacesInHere = 0;
X	return 0;
X    }
X
X    return WordInfo->WordPlacesInHere = PutWordPlaces(
X		pblock->WordPlaces,
X		WordInfo->WID,
X		WordInfo->WordPlaceStart,
X		WIDBLOCKSIZE - (WordInfo->WordPlaceStart - PairBuffer),
X		pblock->ChainStart,
X		pblock->NumberOfWordPlaces);
X}
X
Xchar *
XString2SixBitString(String)
X    unsigned char *String;
X{
X    static unsigned char Buffer[MaxWordLength + 1];
X    register unsigned char *p;
X    register unsigned char *Bufp = Buffer;
X    unsigned short Val;
X    int BitsLeft = 0;
X
X    /* BUG: we lose word-processing accents, etc. and 8-bitness if
X     * we do this.  Also, it slows things down very, very slightly.
X     */
X
X    /* Some ascii character equivalents:
X     * '0' 48 060 0x30
X     * 'A' 65 0101 0x41
X     * '_' 95 0137 0x5f
X     * 'a' 97 0141 0x61
X     */
X    for (p = String; *p; p++) {
X	if (!isalnum(*p) && *p != '\'' && *p != '_') {
X	    return (char *) 0;
X	}
X	if (isupper(*p)) *p = tolower(*p);
X	/* Store as
X	 * 0-9 --> 0-9 (easy!)
X	 * a-z --> 10...35
X	 * _/' --> 36/37
X	 * hence, I need 6 bits per character.  This also leaves rather
X	 * a lot of bits spare (38..64, 27 or so spaces).  As I fold case,
X	 * and don't have controls, I don't know what to do there.  I
X	 * could store digrams.  There are 38*38 = 1444 of these, but
X	 * some of them don't happen.  Not worth the effort.
X	 */
X	if (isdigit(*p)) {
X	    Val = (*p) - '0';
X	} else if (isalpha(*p)) {
X	    Val = (*p) - 'a' + ('9' - '0');
X	} else if (*p == '\'') {
X	    Val = ('9' - '0') + ('z' - 'a') + 1;
X	} else if (*p == '_') {
X	    Val = ('9' - '0') + ('z' - 'a') + 2;
X	} else {
X#define NEXTISEIGHT ('9' - '0') + ('z' - 'a') + 3
X	    Val = NEXTISEIGHT;
X	}
X	/* Write the first half */
X	if (!(BitsLeft & 07)) { /* i.e. it's 0 or 8 */
X	    *Bufp = (Val << 2);
X	    BitsLeft = 2;
X	} else {
X	    /* top BITSLEFT bits */
X	    *Bufp++ |= (Val >> (6 - BitsLeft));
X	    *Bufp = (unsigned) (Val << (2 + BitsLeft)); /* lose some bits */
X	    if ((BitsLeft -= 6) < 0) BitsLeft += 8;
X	}
X    }
X    if (BitsLeft) {
X	Bufp++;
X    }
X    *Bufp = 0;
X    return (char *) Buffer;
X}
X
Xunsigned char *
XSixBitString2String(SixBitString)
X    char *SixBitString;
X{
X    static unsigned char Buffer[MaxWordLength + 2];
X    register unsigned char *p = (unsigned char *) SixBitString;
X    int BitsLeft = 0;
X    unsigned char *Bufp = Buffer;
X
X    while (*p) {
X	if (!(BitsLeft & 07)) { /* i.e. it's 0 or 8 */
X	    *Bufp++ = (*p) >> 2;
X	    BitsLeft = 2;
X	} else {
X	    /* W R O N G */
X	    /* bottom BITSLEFT bits */
X	    *Bufp = ((*p) << (6 - BitsLeft));
X	    /* Rest */
X	    *Bufp = (unsigned) ((*p) << (2 + BitsLeft)); /* lose some bits */
X	    if ((BitsLeft -= 6) < 0) BitsLeft += 8;
X	}
X    }
X    return (unsigned char *) "notdone'fixme"; /* NOTDONE FIXME */
X}
X
X#ifdef TESTSIX
Xchar *progname= "testsix";
Xmain(argc, argv)
X    int argc;
X    char *argv[];
X{
X    extern char *gets();
X
X    char Line[4096];
X    int Encode = 1;
X
X    if (argc != 3) {
X	fprintf(stderr, "bad arg count; usage: %s -[de]\n", progname);
X    }
X    if (STREQ(argv[1], "-e")) Encode = 1;
X    else if (STREQ(argv[1], "-d")) Decode = 1;
X    else {
X	fprintf(stderr, "usage: %s -[d|e]\n", progname);
X	exit(1);
X    }
X    while (gets(Line) != (char *) 0) {
X	char *Result;
X
X	if (Encode) {
X	    Result = String2SixBitString(Line);
X	} else {
X	    if (STREQ(Line, "(cannot be encoded)")) {
X		Result = "(this line was not saved)";
X	    } else {
X		Result = SixBitString2String(line);
X	    }
X	}
X	if (Result) {
X	    printf("%s\n", Result);
X	} else {
X	    printf("(cannot be encoded)\n");
X	}
X    }
X}
X
X#endif /*TESTSIX*/
X
X
Xt_WID
XWord2WID(Word, Length)
X    char *Word;
X    unsigned int Length;
X{
X    DBM *db;
X    datum key, data;
X    char *q;
X    t_WID WID;
X    char Buffer[8];
X	/* enough for the binary representation of a number -- see numbers.c;
X	 * this is _not_ sizeof(long).  It's probably 5, in fact, although
X	 * for small numbers it's less.
X	 */
X
X    if (Length > MaxWordLength) {
X	Length = MaxWordLength; /* NOTE: no trailing \0 required. */
X    }
X
X    /* contact database server */
X    if ((db = startdb(WordIndex)) == (DBM *) 0) {
X	fprintf(stderr, "dbmopen(%s) failed\n", WordIndex);
X	exit(2);
X    }
X
X    key.dptr = Word;
X    key.dsize = Length;
X
X    data = dbm_fetch(db, key);
X
X    enddb(db);
X
X    if (data.dsize == 0) {
X	return (t_WID) 0;
X    }
X
X    /* do this because ReadNumber will leave q pointing beyond Buffer: */
X    (void) memcpy(Buffer, data.dptr, data.dsize);
X    q = Buffer;
X    WID = sReadNumber(&q);
X    if (q - Buffer != data.dsize) {
X	fprintf(stderr, "Word2Wid failed... got %lu\n");
X	return (t_WID) 0;
X    }
X    return WID;
X}
X    
Xchar *
XWID2Word(WID)
X    t_WID WID;
X{
X    t_WordInfo *W;
X    char *Word;
X
X    if (WID == (t_WID) 0) {
X	return (char *) 0;
X    }
X
X    if ((W = WID2WordInfo(WID)) == (t_WordInfo *) 0) {
X	return (char *) 0;
X    }
X    Word = W->Word;
X    W->Word = (char *) 0;
X    SlayWordInfo(W);
X    return Word;
X}
X
Xint
XPutWordInfoIntoIndex(WordInfo, Offset)
X    t_WordInfo *WordInfo;
X    unsigned long Offset;
X{
X    DBM *db;
X    char NumBuf[sizeof(t_WID) + 1];
X    char *q = NumBuf;
X    datum key, data;
X    int RetVal;
X
X    /** First, write the WID itself, so we can go from Word to WID */
X
X    key.dptr = WordInfo->Word;
X    key.dsize = WordInfo->Length;
X
X    sWriteNumber(&q, WordInfo->WID);
X
X    data.dptr = NumBuf;
X    data.dsize = q - NumBuf;
X
X    /* contact database server */
X    if ((db = startdb(WordIndex)) == (DBM *) 0) {
X	fprintf(stderr, "dbmopen(%s) failed\n", WordIndex);
X	exit(2);
X    }
X
X    RetVal = dbm_store(db, key, data, DBM_REPLACE);
X
X    enddb(db);
X
X    /** Now, ensure that we have a physical block for WordInfo.  If
X     ** we don't, there is something very wrong in pblock.c, our only
X     ** possible caller.
X     **/
X
X    if (WordInfo->DataBlock == (char *) 0) {
X	if (Offset) {
X	    fprintf(stderr, "WARNING: WordInfo corrupt for \"%s\"\n",
X			    WordInfo->Word);
X	}
X	(void) MkWIB(WordInfo, (t_pblock *) 0);
X    }
X
X    /** Now write the physical entry... */
X
X    if (Widfd < 0) {
X	if ((Widfd = open(WidIndexFile, O_RDWR|O_CREAT, 0766)) < 0) {
X	    fprintf(stderr, "Can't open WID file \"%s\"\n", WidIndexFile);
X	    exit(1);
X	}
X    }
X
X    if (lseek(Widfd, (long) (WordInfo->WID * WIDBLOCKSIZE), 0) < 0) {
X	perror("lseek");
X	exit(1);
X    }
X
X    if (write(Widfd, WordInfo->DataBlock, WIDBLOCKSIZE) != WIDBLOCKSIZE) {
X	perror("write");
X	exit(1);
X    }
X
X    return RetVal;
X}
X
Xint
XWID_sig()
X{
X    return 0;
X}
X
Xt_WID
XGetMaxWID()
X{
X    extern int errno;
X    extern long atol();
X
X    int fd;
X    char Buffer[20]; /* large enough to sprintf() a WID */
X    struct stat StatBuf;
X#if 0
X    int FileKey; /* what one gets from a lock... */
X    int NumberOfTriesLeft = 5;
X#endif
X
X    /* ensure that the file is there */
X    if (stat(WidFile, &StatBuf) == -1) {
X	return 0;
X    }
X
X    if ((fd = open(WidFile, O_RDWR, 0)) < 0) {
X	fprintf(stderr, "Warning: Can't open WID file");
X	return 0;
X    }
X
X#if 0
X    errno = 0;
X
X    /** Lock the file **/
X
X    do {
X	/* Set a timeout of 2 seconds */
X	signal(SIGALRM, WID_sig);
X	(void) alarm(3);
X	if ((FileKey = lockf(fd, F_LOCK, 0L)) < 0) {
X	    switch (errno) {
X	    case EACCES: /*[sic]*/ /* another process has the lock */
X		fprintf(stderr, "Please wait...\n");
X		/* shouldn't happen */
X		break;
X	    case EDEADLK:
X		fprintf(stderr, "Warning: can't lock \"%s\" -- EDEADLK\n", WidFile);
X		FileKey = 1;
X		break;
X	    case EINTR:
X		fprintf(stderr, "Please Wait... someone has the key...\n");
X		sleep(1);
X		break;
X	    }
X	}
X	if (--NumberOfTriesLeft <= 0) {
X	    fprintf(stderr, "Warning: can't lock ");
X	    perror(WidFile);
X	    (void) close(fd);
X	    return 0;
X	}
X    } while (FileKey < 0);
X    (void) alarm(0);
X
X    if (stat(WidFile, &StatBuf) == -1) {
X	fprintf(stderr, "It went away!\n");
X	return 0;
X    }
X#endif
X
X    /* Read the file */
X    if (read(fd, Buffer, (unsigned int) StatBuf.st_size) < 0) {
X	fprintf(stderr, "Can't read from \"%s\"\n", WidFile);
X	exit(1);
X    }
X
X#if 0
X    /** Unlock the file **/
X    if (lockf(fd, F_ULOCK, 0L) < 0 && FileKey == 0) {
X	fprintf(stderr, "Warning: might not have unlocked \"%s\"\n",
X				WidFile);
X    }
X#endif
X    (void) close(fd);
X
X    Buffer[StatBuf.st_size] = '\0';
X
X    return atol(Buffer);
X}
X
X
Xt_WID LastNextWIDVal = (t_WID) 0;
X
X#undef GetNextWID
X
Xt_WID GetNextWID(), GetMaxWID();
X
XINLINE
Xt_WID
XSpoofGetNextWID()
X{
X    static int SinceLastUpdate = 0;
X
X    /* Call the real function sometimes, so that the database does
X     * get updated in case of a crash or for other users.
X     */
X    if (++SinceLastUpdate > 500) {
X	SinceLastUpdate = 0;
X	return GetNextWID(1);
X    }
X
X    if (LastNextWIDVal == (t_WID) 0) {
X	SinceLastUpdate = 0;
X	LastNextWIDVal = GetMaxWID();
X    }
X    return ++LastNextWIDVal;
X}
X
Xvoid
XWriteCurrentMaxWID()
X{
X    (void) GetNextWID(1);
X}
X
Xt_WID
XGetNextWID(WriteCurrent)
X    int WriteCurrent; /* simply write the current MaxWID if true */
X{
X    extern int errno;
X    extern long atol();
X
X    int fd;
X    char Buffer[20];
X    struct stat StatBuf;
X#if 0
X    int FileKey; /* what one gets from a lock... */
X    int NumberOfTriesLeft = 5;
X#endif
X    t_WID Result;
X
X    /** Alter the file, so other programs can see the new words...
X     **/
X
X    /* ensure that the file is there */
X    if (stat(WidFile, &StatBuf) == -1) {
X	fprintf(stderr, "Creating WID file \"%s\"\n", WidFile);
X	if ((fd = creat(WidFile, 02666)) < 0) {
X	    fprintf(stderr, "Can't create WID file \"%s\"\n", WidFile);
X	    exit(1);
X	}
X	(void) close(fd);
X	return GetNextWID(WriteCurrent);
X
X	/*NOTREACHED*/
X    }
X
X    if ((fd = open(WidFile, O_RDWR, 0)) < 0) {
X	fprintf(stderr, "Can't open WID file");
X	perror(WidFile);
X	exit(1);
X    }
X
X#if 0
X    errno = 0;
X
X    /** Lock the file **/
X
X    do {
X	/* Set a timeout of 2 seconds */
X	signal(SIGALRM, WID_sig);
X	(void) alarm(3);
X	if ((FileKey = lockf(fd, F_LOCK, 0L)) < 0) {
X	    switch (errno) {
X	    case EACCES: /*[sic]*/ /* another process has the lock */
X		fprintf(stderr, "Please wait...\n");
X		/* shouldn't happen */
X		break;
X	    case EDEADLK:
X		fprintf(stderr, "Warning: can't lock \"%s\" -- EDEADLK\n", WidFile);
X		FileKey = 1;
X		break;
X	    case EINTR:
X		fprintf(stderr, "Please Wait... someone has the key...\n");
X		sleep(1);
X		break;
X	    }
X	}
X	if (--NumberOfTriesLeft <= 0) {
X	    fprintf(stderr, "Warning: can't lock the file \"%s\"\n", WidFile);
X	}
X    } while (FileKey < 0);
X    (void) alarm(0);
X
X    if (stat(WidFile, &StatBuf) == -1) {
X	fprintf(stderr, "It went away!\n");
X	exit(1);
X    }
X#endif
X
X    /* Read the file */
X
X    if (read(fd, Buffer, (unsigned int) StatBuf.st_size) < 0) {
X	fprintf(stderr, "Can't read from \"%s\"\n", WidFile);
X	exit(1);
X    }
X
X    Buffer[StatBuf.st_size] = '\0';
X
X    Result = atol(Buffer);
X
X    /* if WriteCurrent is set, we should not increment anything */
X    if (!WriteCurrent) {
X	++LastNextWIDVal;
X	++Result;
X    }
X
X    if (Result < LastNextWIDVal) {
X	Result = LastNextWIDVal;
X    }
X
X    (void) sprintf(Buffer, "%lu\n", Result);
X
X    /* Move to the start of the file and write the now value.
X     * No need to truncate the file, because it didn't shrink!
X     */
X    (void) lseek(fd, 0, 0L);
X    (void) write(fd, Buffer, (unsigned int) strlen(Buffer));
X
X#if 0
X    /** Unlock the file **/
X
X    if (lockf(fd, F_ULOCK, 0L) < 0 && FileKey == 0) {
X	fprintf(stderr, "Warning: might not have unlocked \"%s\"\n",
X				WidFile);
X    }
X#endif
X    (void) close(fd);
X
X    return Result;
X}
X
Xint
XDeleteWord(Word)
X    char *Word;
X{
X    extern t_pblock *Getpblock();
X
X    t_WID WID;
X    t_WordInfo *WordInfo;
X    t_pblock *tmp;
X
X    if ((WID = Word2WID(Word, strlen(Word))) == (t_WID) 0) {
X	return -1; /* not there */
X    }
X
X    /* get info from the list */
X    if ((WordInfo = WID2WordInfo(WID)) == (t_WordInfo *) 0) {
X	return -1;
X    }
X
X    if ((tmp = Getpblock(WordInfo)) != (t_pblock *) NULL) {
X	Deletepblock(tmp);
X	(void) efree(tmp);
X    }
X
X    /* delete the offset from the database, but retain the WID: */
X    WordInfo->Offset = 0L;
X    WordInfo->NumberOfWordPlaces = 0L;
X    WordInfo->WordPlacesInHere = 0;
X    PutWordInfoIntoIndex(WordInfo, 0L);
X    SlayWordInfo(WordInfo);
X
X    return 0;
X}
X
X/* Routines to create and destroy WordInfo structures */
XINLINE t_WordInfo *
XMakeWordInfo(WID, Length, Word)
X    t_WID WID;
X    int Length;
X    char *Word; /* the word, which might not be nul-terminated */
X{
X    register t_WordInfo *WP;
X    WP = (t_WordInfo *) emalloc(sizeof(t_WordInfo));
X
X    WP->WID = WID;
X    WP->FID = (t_FID) 0;
X    WP->Next = (t_WordInfo *) 0;
X    WP->NumberOfWordPlaces = 0;
X    WP->DataBlock = (char *) 0;
X    WP->WordPlaceStart = (char *) 0;
X    WP->WordPlaces = (t_WordPlace *) 0;
X    WP->WordPlacesInHere = 0;
X    WP->WordPlace.FID = 0; /* mark as invalid */
X    WP->WordPlace.Flags = 0; /* this gets used anyway, so set it to zero! */
X
X    WP->Word = emalloc(Length + 1);
X
X    (void) strncpy(WP->Word, Word, Length);
X    WP->Length = Length;
X    WP->Word[Length] = '\0'; /* strncpy does not add a null */
X    WP->Offset = 0;
X
X    return WP;
X}
X
Xvoid
XSlayWordInfo(WP)
X    t_WordInfo *WP;
X{
X    if (!WP) return;
X    if (WP->Word) efree(WP->Word);
X    if (WP->WordPlaces) efree((char *)WP-> WordPlaces);
X
X    WP->Next = (t_WordInfo *) 0;
X	/* The above line is to force a run-time error in the common
X	 * (but wrong) case
X	 * for (w = WordList; w; w = w->Next) SlayWordInfo(w);
X	 */
X    efree((char *) WP);
X}
@@@End of lq-text/src/liblqtext/WordInfo.c
echo end of part 04
-- 
Liam R. E. Quin,  lee at sq.com, SoftQuad Inc., Toronto, +1 (416) 963-8337