lq-text Full Text Retrieval Database Part 06/13
Liam R. E. Quin
lee at sq.sq.com
Mon Mar 4 12:05:00 AEST 1991
: cut here --- cut here --
: To unbundle, sh this file
#! /bin/sh
: part 06
echo x - lq-text/src/liblqtext/progname.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/progname.c <<'@@@End of lq-text/src/liblqtext/progname.c'
X/* progname.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X * This file simply declares progname.
X * This variable MUST be set by main().
X */
X
Xchar *progname = (char *) 0;
X
X/* $Id: progname.c,v 1.2 90/10/06 00:12:19 lee Rel1-10 $
X *
X * $Log: progname.c,v $
X * Revision 1.2 90/10/06 00:12:19 lee
X * Prepared for first beta release.
X *
X * Revision 1.1 90/03/24 17:07:22 lee
X * Initial revision
X *
X *
X */
@@@End of lq-text/src/liblqtext/progname.c
echo x - lq-text/src/liblqtext/smalldb.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/smalldb.c <<'@@@End of lq-text/src/liblqtext/smalldb.c'
X/* smalldb.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/* Simple interface to start and end dbm.
X * You may also need to supply dbm_store() and dbm_fetch(), but these
X * should certainly be macros.
X *
X * $Id: smalldb.c,v 1.5 91/03/03 00:15:22 lee Rel1-10 $
X *
X * $Log: smalldb.c,v $
X * Revision 1.5 91/03/03 00:15:22 lee
X * Improved an error message and fixed a permissions bug.
X *
X * Revision 1.4 91/03/02 18:52:48 lee
X * Default access is now read only -- lqWriteAccess must be called otherwise.
X *
X * Revision 1.3 90/10/06 00:12:20 lee
X * Prepared for first beta release.
X *
X * Revision 1.2 90/09/20 17:53:26 lee
X * slight error reporting improvement.
X *
X * Revision 1.1 90/08/09 19:16:56 lee
X * Initial revision
X *
X * Revision 2.2 89/10/08 20:47:14 lee
X * Working version of nx-text engine. Addfile and wordinfo work OK.
X *
X * Revision 2.1 89/10/02 01:15:55 lee
X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
X *
X * Revision 1.2 89/09/16 21:18:39 lee
X * First demonstratable version.
X *
X * Revision 1.1 89/09/07 21:06:11 lee
X * Initial revision
X *
X *
X */
X
X#include "globals.h"
X
X#include <stdio.h>
X
X#include <fcntl.h>
X#ifdef BSD
X# include <sys/param.h>
X# define PATH_MAX MAXPATHLEN /* untested, sorry */
X#else /*!BSD*/
X# include <limits.h> /* for PATH_MAX */
X#endif
X#include "smalldb.h"
X#include "emalloc.h"
X
Xextern int strcmp();
Xextern char *strcpy();
X
X/* The physical database for the list of words, and for the list
X * of files, uses ndbm.
X * The advantage of this is that it takes only two file system accesses
X * to retrieve any data item (honest!).
X * It's also reasonably fast at insertion.
X * One disadvantage is that it doesn't cope if too many words have the
X * same (32-bit) hash function, although publicly available replacements
X * such as the GNU project's gdbm fix this.
X *
X * Since starting the database is expensive (two opens and a malloc),
X * I have a cache of DBM pointers and keep them open. Versions of the
X * dbm routines that don't support more than one database will have to
X * have a cache-size of one!
X * I am not sure what the impact of this would be on performance; for
X * adding a new file it shouldn't be too bad, as the file list is examined
X * only once for each file, during reading, and the word database is looked
X * at (at least once for each distinct word) only on writing.
X * For retrieval, however, the word database will be looked at for each
X * word in the query, and the file database for (potentially) each match
X * of each word, so the requests will be more interspersed.
X * Under no circumstances is it acceptable to dispense with the cache, as
X * otherwise you will be doing (literally) thousands of calls to
X * open() and close() per second!
X *
X */
X
X#undef startdb
X
X#ifndef CACHE
X/* It's unusual to deal with lots of databases at once, so let's not
X * waste RAM...
X */
X# define CACHE 3
X#endif
X
Xstatic char NameCache[CACHE][PATH_MAX + 1]; /* + 1 for \0, I think */
Xstatic DBM *Cache[CACHE]; /* (set to zero by definition) */
X
Xstatic int MaxInCache = (-1);
X
X/* FileFlags and Mode are passed to dbm_open */
Xstatic int FileFlags = O_RDONLY;
Xstatic int FileModes = 0;
X
Xvoid
XlqWriteAccess()
X{
X FileFlags = O_RDWR|O_CREAT;
X FileModes = 0664; /* owner and group write, others read only */
X}
X
XDBM *
Xstartdb(FilePrefix)
X char *FilePrefix;
X{
X extern int errno;
X register int i;
X
X for (i = 0; i <= MaxInCache; i++) {
X if (Cache[i] && STREQ(NameCache[i], FilePrefix)) {
X return Cache[i];
X }
X }
X
X /* Find an empty slot */
X for (i = 0; i <= MaxInCache; i++) {
X if (Cache[i] == (DBM *) 0) break;
X }
X
X if (i > MaxInCache) {
X if (i >= CACHE) i = 0;
X }
X
X if (Cache[i]) dbm_close(Cache[i]);
X NameCache[i][0] = '\0';
X
X errno = 0;
X
X if ((Cache[i] = dbm_open(FilePrefix, FileFlags, FileModes)) == (DBM *)0) {
X int e = errno;
X (void) fprintf(stderr, "%s: dbm_open error %d: ", progname, errno);
X errno = e;
X perror(FilePrefix);
X exit(1);
X }
X (void) strcpy(NameCache[i], FilePrefix);
X if (i > MaxInCache) MaxInCache = i;
X
X return Cache[i];
X}
X
X#undef enddb
X
X/*ARGSUSED*/
Xvoid
Xenddb(db)
X DBM *db;
X{
X /* no-op */
X}
X
Xvoid
Xcleanupdb()
X{
X register int i;
X
X for (i = 0; i <= MaxInCache; i++) {
X if (Cache[i]) dbm_close(Cache[i]);
X Cache[i] = (DBM *) 0;
X NameCache[i][0] = '\0';
X }
X}
@@@End of lq-text/src/liblqtext/smalldb.c
echo x - lq-text/src/liblqtext/system.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/system.c <<'@@@End of lq-text/src/liblqtext/system.c'
X/* system.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X *
X * This is not a very portable way of doing things... and certainly not
X * a very fast one. MUST be re-written.
X * Only for use from within curses.
X *
X * Lee
X *
X * $Id: system.c,v 1.3 90/10/06 00:21:37 lee Rel1-10 $
X */
X
X#ifdef ultrix
X# include <cursesX.h>
X#else
X# include <curses.h>
X#endif
X
X#ifndef echo
Xextern int echo();
X#endif
X#ifndef wmove
Xextern int wmove();
X#endif
X#ifndef nl
Xextern int nl();
X#endif
X#ifndef noecho
Xextern int noecho();
X#endif
X#ifndef nonl
Xextern int nonl();
X#endif
X#ifndef wrefresh
Xextern int wrefresh();
X#endif
X#ifndef waddstr
Xextern int waddstr();
X#endif
X#ifndef wclear
Xextern int wclear();
X#endif
X
Xint
XMySystem(string)
X char *string;
X{
X int val;
X
X clearok(stdscr, TRUE);
X clear();
X refresh();
X noraw();
X echo();
X nl();
X val = system("stty opost icanon onlcr icrnl echo");
X (void) system(string);
X fprintf(stderr, "\n[press return to continue] ");
X raw();
X noecho();
X nonl();
X (void) getch();
X clearok(stdscr, TRUE);
X mvwaddstr(stdscr, 10, 10, " "); /* ???!?? */
X
X return val;
X}
X
@@@End of lq-text/src/liblqtext/system.c
echo x - lq-text/src/lqtext/FindCommon.sh 1>&2
sed 's/^X//' >lq-text/src/lqtext/FindCommon.sh <<'@@@End of lq-text/src/lqtext/FindCommon.sh'
X:
X# FindCommon -- Copyright 1990 Liam R. Quin. All Rights Reserved.
X# This code is NOT in the public domain.
X# See the file COPYRIGHT for full details.
X#
X# $Id: FindCommon.sh,v 1.2 90/10/06 00:50:31 lee Rel1-10 $
X
X# Find the most common words in the database.
X# usage is % n, where n is the n most comon words to find
X
Xlqword -a | sed -e 's/^......................\(.........\)..\(..*\)$/\1 \2/' |
Xsort -nr | sed ${1-500}q
X
Xexit $?
X
X# 1 | 0 | 2 | pcpaintbrush
X# 2 | 0 | 2 | escape
X# 3 | 0 | 1 | durham
X# 4 | 60928 | 12 | making
X# 5 | 0 | 1 | ethical
X# 6 | 0 | 1 | committing
X
X# $Log: FindCommon.sh,v $
X# Revision 1.2 90/10/06 00:50:31 lee
X# Prepared for first beta release.
X#
X#
@@@End of lq-text/src/lqtext/FindCommon.sh
echo x - lq-text/src/lqtext/Makefile 1>&2
sed 's/^X//' >lq-text/src/lqtext/Makefile <<'@@@End of lq-text/src/lqtext/Makefile'
X# Makefile for LQ-Text, a full text retrieval package by Liam R. Quin
X# This Makefile belongs in the "src/lqtext" directory.
X#
X# Note that most of the actual configuration is done in ../Makefile and
X# in ../h/global.h, and not here.
X
X# Makefile -- Copyright 1990 Liam R. Quin. All Rights Reserved.
X# This code is NOT in the public domain.
X# See the file ../COPYRIGHT for full details.
X#
X# $Id: Makefile,v 1.5 91/03/03 00:19:26 lee Rel1-10 $
X
X
XPWD=lqtext
X
XTARGETS = lqaddfile lqfile lqword lqphrase lqshow lqkwik lq
XBINFILES =lqaddfile lqfile lqword lqshow lqphrase lqkwik
X
XDESTDIR=../bin
XMODE=755
XRANLIB=echo
X
XEXTRA=-I../h
X
Xall: $(TARGETS)
X
X# for ndbm (simplest), leave empty or use -lndbm if you need it
X# for sdbm (best so far), use ../lib/libsdbm.a
X# for gdbm... well, I dunno.
XDBMLIBS=../lib/libsdbm.a
X# DBMLIBS=-lndbm
X# DBMLIBS=ndbm.o bcopy.o
X
XTEXTLIB=../lib/liblqtext.a ../lib/liblq.a
X
X# The following are for "make depend" and for sabre to load...
XDEPENDFILES = ReadAhead.c SixBit.c fileindex.c lqaddfile.c lqphrase.c \
X lqshow.c lqword.c sizes.c wordtable.c
X
X# MALLFILES=/usr/lib/debug/malloc.o /usr/lib/debug/mallocmap.o
XMALLFILES =
X
Xinstall: all
X for i in $(BINFILES); do cp "$$i" $(DESTDIR); \
X strip "$(DESTDIR)/$$i" ; \
X done ; \
X mv lq $(DESTDIR)/lq; chmod $(MODE) $(DESTDIR)/lq;
X
X.SUFFIXES: .c .o .src .obj
X
X.c.src:
X #load $(CFLAGS) $<
X
X.o.obj:
X #load $(CFLAGS) $<
X
X# If you are going to use saber on these, you should name the programs.
Xsaber_src:
X
Xsaber_obj:
X
Xlq: lq.sh
X cp lq.sh lq
X chmod +x lq
X
Xlqshow: lqshow.o $(TEXTLIB)
X $(CC) $(CFLAGS) -o lqshow lqshow.o $(TEXTLIB) $(TERMCAP) $(DBMLIBS)
X
Xlqaddfile: lqaddfile.o wordtable.o $(TEXTLIB)
X $(CC) $(CFLAGS) -o lqaddfile lqaddfile.o wordtable.o \
X $(TEXTLIB) $(MALLOC) $(DBMLIBS) $(MALLFILES)
X
Xlqfile: fileindex.o $(TEXTLIB)
X $(CC) $(CFLAGS) -o lqfile fileindex.o $(TEXTLIB) $(MALLOC) $(DBMLIBS)
X
Xlqword: lqword.o $(TEXTLIB)
X $(CC) $(CFLAGS) -o lqword lqword.o $(TEXTLIB) $(MALLOC) $(DBMLIBS)
X
Xlqkwik: lqkwik.o $(TEXTLIB)
X $(CC) $(CFLAGS) -o lqkwik lqkwik.o $(TEXTLIB) $(MALLOC) $(DBMLIBS)
X
Xlqphrase: lqphrase.o $(TEXTLIB)
X $(CC) $(CFLAGS) -o lqphrase lqphrase.o $(TEXTLIB) $(DBMLIBS)
X
Xlint: AddFile.Lint News.Lint FileInfo.Lint Phrase.Lint
X
Xtidy:
X /bin/rm -f *.o core
X
Xclean: tidy
X /bin/rm -f $(TARGETS) $(TEST)
X
Xdepend:
X mkdep $(CFLAGS) *.c
X
X#
X# $Log: Makefile,v $
X# Revision 1.5 91/03/03 00:19:26 lee
X# added lqkwik
X#
X# Revision 1.4 90/10/06 00:50:42 lee
X# Prepared for first beta release.
X#
X# Revision 1.3 90/10/05 23:54:57 lee
X# deleted mkdep output.
X#
X# Revision 1.2 90/09/28 21:54:01 lee
X# No longer uses OWNER.
X#
X# Revision 1.1 90/08/09 19:17:39 lee
X# Initial revision
X#
X
X# DO NOT DELETE THIS LINE -- mkdep uses it.
X# DO NOT PUT ANYTHING AFTER THIS LINE, IT WILL GO AWAY.
X
XSixBit.o: SixBit.c ../h/globals.h
XSixBit.o: ../h/wordrules.h
Xfileindex.o: fileindex.c ../h/globals.h
Xfileindex.o: ../h/emalloc.h ../h/fileinfo.h
Xlqaddfile.o: lqaddfile.c
Xlqaddfile.o: ../h/globals.h ../h/fileinfo.h ../h/emalloc.h
Xlqaddfile.o: ../h/wordinfo.h ../h/pblock.h ../h/wordrules.h ../h/filter.h
Xlqkwik.o: lqkwik.c ../h/globals.h ../h/fileinfo.h
Xlqkwik.o: ../h/wordinfo.h ../h/pblock.h ../h/wordrules.h ../h/pblock.h
Xlqkwik.o: ../h/emalloc.h
Xlqphrase.o: lqphrase.c ../h/globals.h ../h/emalloc.h
Xlqphrase.o: ../h/fileinfo.h ../h/wordinfo.h ../h/pblock.h ../h/pblock.h
Xlqphrase.o: ../h/phrase.h
Xlqshow.o: lqshow.c ../h/globals.h
Xlqword.o: lqword.c ../h/globals.h
Xwordtable.o: wordtable.c ../h/globals.h
X
X# IF YOU PUT ANYTHING HERE IT WILL GO AWAY
@@@End of lq-text/src/lqtext/Makefile
echo x - lq-text/src/lqtext/ReadAhead.c 1>&2
sed 's/^X//' >lq-text/src/lqtext/ReadAhead.c <<'@@@End of lq-text/src/lqtext/ReadAhead.c'
@@@End of lq-text/src/lqtext/ReadAhead.c
echo x - lq-text/src/lqtext/fileindex.c 1>&2
sed 's/^X//' >lq-text/src/lqtext/fileindex.c <<'@@@End of lq-text/src/lqtext/fileindex.c'
X/* fileindex.c -- Copyright 1989, 1990 Liam R. Quin. All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/* A simple program to give information about one or more files about
X * which information is stored in the NX-Text database.
X *
X * $Id: fileindex.c,v 1.4 91/03/02 18:56:53 lee Rel1-10 $
X */
X
X#include "globals.h" /* defines and declarations for database filenames */
X
X#include <stdio.h>
X#include <sys/types.h>
X#include <malloc.h>
X#include "emalloc.h"
X#include "fileinfo.h"
X
Xstatic char *Revision = "@(#) lqtext 2.3 89/11/34";
X
X/* The position of the \n in the 26-char string returned by ctime(3): */
X#define DATENEWLINE 24
X
Xchar *progname;
Xint AsciiTrace = 0;
X
X/** System calls and library functions used in this file: **/
X
X/** Unix System calls: **/
Xextern void exit();
X/** System Library Functions: **/
X
X/** external lqtext functions: **/
Xextern void cleanupdb(), SetDefaults();
Xint SaveFileInfo(), GetFilterType();
X#ifndef efree
X extern void efree();
X#endif
X/** Functions defined within this file: **/
Xvoid AddInfo(), AllInfo(), Display(), PrintInfo();
X
Xint AllFiles = 0;
Xint ListMode = 0;
Xint AddFiles = 0;
X
Xint
Xmain(argc, argv)
X int argc;
X char *argv[];
X{
X extern int optind, getopt();
X /** extern char *optarg; (unused at the moment) **/
X int ch;
X int ErrorFlag = 0;
X
X progname = argv[0];
X
X SetDefaults(argc, argv);
X
X /* All programs take Zz:Vv */
X while ((ch = getopt(argc, argv, "Zz:VvAax")) != EOF) {
X switch (ch) {
X case 'z':
X case 'Z':
X break; /* done by SetDefaults(); */
X case 'V':
X fprintf(stderr, "%s version %s\n", progname, Revision);
X break;
X case 'v':
X AsciiTrace = 1;
X break;
X case 'A':
X AddFiles = 1;
X break;
X case 'a':
X AllFiles = 1;
X break;
X case 'l':
X ListMode = 1;
X break;
X case 'x':
X ErrorFlag = (-1);
X break;
X case '?':
X ErrorFlag = 1;
X break;
X }
X }
X
X /* Normally put call to lrqError here to give a helpful message,
X * but not yet ready to ship the error handling package, sorry
X */
X if (ErrorFlag) {
X fprintf(stderr, "%s: usage: %s [options] [files]\n",progname,progname);
X fprintf(stderr, "%s: options are:\n", progname);
X fputs("\
X -c file -- treat the named file as a list of common words\n\
X -d dir -- use the lq-text database in the directory \"dir\"\n\
X -l -- list mode: no header output or lines drawn\n\
X -s -- show the list of saved files\n\
X -t N -- set trace level to N [default: 0]\n\
X -V -- print version information\n\
X -v -- be verbose (same as -t 1)\n\
X -x -- print this explanation\n\
X\n\
XIn addition, if no files are given, the following are understood:\n\
X -A -- add the named files to the list of known files\n\
X -a -- list information about all files\n", stderr);
X exit((ErrorFlag > 0) ? 1 : 0);
X }
X
X if (AllFiles && AddFiles) {
X fprintf(stderr, "%s: do not use both -a and -A options\n", progname);
X fprintf(stderr, "\tuse %s -x for further explanation.\n", progname);
X exit(1);
X }
X
X if (optind >= argc && !AllFiles && !AddFiles) {
X fprintf(stderr,
X "%s: You must either give the -a option or specify files to list.\n",
X progname);
X fprintf(stderr, "\tuse %s -x for further explanation.\n", progname);
X exit(1);
X }
X
X if (!AddFiles || !ListMode) {
X printf("%-7.7s | T | %-20.20s | %s\n",
X "FID", "Date Last indexed", "Current Location");
X puts(
X"========|===|======================|=========================================="
X );
X }
X if (AllFiles) {
X AllInfo();
X } else {
X if (AddFiles) {
X extern lqWriteAccess();
X
X lqWriteAccess();
X }
X
X while (optind < argc) {
X if (AddFiles) {
X AddInfo(argv[optind++]);
X } else {
X PrintInfo(argv[optind++]); /* ugh */
X }
X }
X }
X cleanupdb(); /* close dbm files */
X exit(0);
X /*NOTREACHED*/
X return 1; /* for lint and gcc... */
X}
X
Xvoid
XPrintInfo(Name)
X char *Name;
X{
X extern t_FileInfo *GetFileInfo();
X long FID;
X extern long atol();
X extern t_FID Name2FID();
X
X t_FileInfo *FileInfo;
X
X if ((FID = Name2FID(Name)) == (t_FID) 0) {
X fprintf(stderr, "No FID information for filename: %s\n", Name);
X if ((FID = atol(Name)) == (t_FID) 0) {
X return;
X }
X }
X
X /* get info from the list */
X if ((FileInfo = GetFileInfo(FID)) == (t_FileInfo *) 0) {
X fprintf(stderr, "No index information for: %s\n", Name);
X return;
X }
X Display(FileInfo);
X}
X
Xvoid
XDisplay(FileInfo)
X t_FileInfo *FileInfo;
X{
X extern char *ctime();
X char *DateString;
X
X DateString = ctime(&(FileInfo->Date));
X DateString[DATENEWLINE] = '\0'; /* delete the trailing newline */
X
X if (ListMode) {
X printf("%lu %d %s %s\n",
X FileInfo->FID, FileInfo->FilterType, &DateString[4], FileInfo->Name);
X } else {
X printf("%7lu | %d | %-20.20s | %s\n",
X FileInfo->FID, FileInfo->FilterType, &DateString[4], FileInfo->Name);
X }
X}
X
X/**
XMon Sep 25 23:58:53 BST 1989
XFID | T | Date Last indexed | Current Location
X========|===|======================|===========================================
X 1 | 0 | Sep 25 20:31:26 1989 | /usr2/liam/Bible/NT/John/john01.kjv
X 2 | 0 | Sep 25 20:31:28 1989 | /usr2/liam/Bible/NT/John/john02.kjv
X 3 | 0 | Sep 25 20:31:30 1989 | /usr2/liam/Bible/NT/John/john03.kjv
X**/
X
Xvoid
XAllInfo()
X{
X extern long GetMaxFID();
X extern t_FileInfo *GetFileInfo();
X
X t_FileInfo *FileInfo;
X long FID;
X long MaxFid = GetMaxFID();
X
X for (FID = 0L; FID <= MaxFid; FID++) {
X if ((FileInfo = GetFileInfo(FID)) != (t_FileInfo *) 0) {
X Display(FileInfo);
X efree(FileInfo); /* NOTDONE use destroyfileinfo() */
X }
X }
X printf("Max File Identifier is %lu\n", MaxFid);
X}
X
Xvoid
XAddInfo(FileName)
X char *FileName;
X{
X extern time_t time();
X extern unsigned long GetNextFID();
X t_FileInfo FileInfo;
X
X FileInfo.Name = FileName;
X (void) time(&(FileInfo.Date));
X FileInfo.FID = GetNextFID();
X FileInfo.Stream = 0; /* force GetFilterType to use open()? */
X
X /* determine filter type */
X FileInfo.FilterType = GetFilterType(&FileInfo);
X
X printf("%d %s (type %d) %s\n",
X FileInfo.FID,
X FileInfo.Name,
X FileInfo.FilterType,
X SaveFileInfo(&FileInfo) == 0 ?
X "saved successfully." :
X "not saved."
X );
X}
X
X/*
X * $Log: fileindex.c,v $
X * Revision 1.4 91/03/02 18:56:53 lee
X * Now asks for write access iff [sic] necessary
X *
X * Revision 1.3 90/10/06 00:50:50 lee
X * Prepared for first beta release.
X *
X * Revision 1.2 90/08/29 21:44:51 lee
X * Alpha release
X *
X * Revision 1.1 90/08/09 19:17:11 lee
X * Initial revision
X *
X * Revision 2.2 89/10/08 20:45:46 lee
X * Working version of nx-text engine. Addfile and wordinfo work OK.
X *
X * Revision 2.1 89/10/02 01:14:18 lee
X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
X *
X * Revision 1.2 89/09/16 21:16:17 lee
X * First demonstratable version.
X *
X * Revision 1.1 89/09/07 21:05:55 lee
X * Initial revision
X *
X */
@@@End of lq-text/src/lqtext/fileindex.c
echo x - lq-text/src/lqtext/intersect.sh 1>&2
sed 's/^X//' >lq-text/src/lqtext/intersect.sh <<'@@@End of lq-text/src/lqtext/intersect.sh'
X:
X# intersect word-one word-two
X#
X# intersect -- Copyright 1990 Liam R. Quin. All Rights Reserved.
X# This code is NOT in the public domain.
X# See the file ../COPYRIGHT for full details.
X#
X# $Id: intersect.sh,v 1.3 91/03/03 00:18:59 lee Rel1-10 $
X#
X
X
XFileNumber=0
XFileList=
XProgram=lqphrase
XProgOpts=
XAll=/tmp/iAll$$
Xexport All
X
Xtrap '/bin/rm -f $All $tmp $First $FileList; exit' 0 1 2 3 15
X
Xif [ x"$1" = x"" ]
Xthen
X echo "$0: Usage: `basename $0` {-w word} | {-p phrase} ..." 1>&2
X exit 1
Xfi
X
X
Xfor i
Xdo
X if [ x"$i" = x"-p" ]
X then
X Program=lqphrase
X ProgOpts=
X elif [ x"$i" = x"-w" ]
X then
X Program=lqword
X ProgOpts=-l
X else
X tmp=/tmp/inter.$FileNumber
X $Program $ProgOpts "$i" | tee -a $ALL | awk '{ print $3 }' | sort -u > $tmp
X if [ x"$First" = x"" ]
X then
X First="$tmp"
X else
X FileList="$FileList $tmp"
X fi
X FileNumber=`expr $FileNumber + 1`
X fi
Xdone
X
X# Find matches...
Xtmp=/tmp/inter.tmp$$
X
Xfor i in $FileList
Xdo
X fgrep -x -f $First $i | sort -u > $tmp
X mv $tmp $First
Xdone
X
Xmv $First $tmp
Xsed 's/^/ /' $tmp > $First
X
Xfgrep -f $First $All
Xexit 0
X
X#
X#
X# $Log: intersect.sh,v $
X# Revision 1.3 91/03/03 00:18:59 lee
X# brought up to date a little...
X#
X# Revision 1.2 90/10/06 00:50:52 lee
X# Prepared for first beta release.
X#
X# Revision 1.1 90/08/29 21:45:01 lee
X# Initial revision
X#
X#
X#
@@@End of lq-text/src/lqtext/intersect.sh
echo x - lq-text/src/lqtext/lq.sh 1>&2
sed 's/^X//' >lq-text/src/lqtext/lq.sh <<'@@@End of lq-text/src/lqtext/lq.sh'
X#! /bin/sh
X: use /bin/sh
X# put the : line first on System V
X
X# lq -- Copyright 1990 Liam R. Quin. All Rights Reserved.
X# This code is NOT in the public domain.
X# See the file ../COPYRIGHT for full details.
X#
X# $Id: lq.sh,v 1.3 90/10/06 00:50:53 lee Rel1-10 $
X#
X
Xif [ x"`echo -n hello`" = x'hello' ]
Xthen
X N=-n
X C=
Xelse
X N=
X C='\c'
Xfi
X
Xquit=no
Xt=/tmp/lq$$
XListFile=/tmp/lqshow$$
Xexport ListFile
X
Xtrap '/bin/rm -f $t; exit' 0 1 2 3 15
X
X
Xwhile [ x"$quit" != x"yes" ]
Xdo
X cat << boy
X| Type a words or phrases to find, one per line,
X| and then press return.
Xboy
X x='fhdjfd'
X Phrases=
X while [ x"$x" != x"" ]
X do
X echo $N "| $C"
X read x
X if [ x"$x" != x"" ]
X then
X New=`echo "$x" | sed 's/"/:/g'`
X Phrases="${Phrases} \"$x\""
X fi
X done
X echo $Phrases
X eval lqphrase -p $Phrases \> $t
X if [ ! -s $t ]
X then
X echo "No match"
X else
X # determine the order in which matches will be presented to the user:
X sort +2 -o "$t" "$t" # (this is our ranking function)
X # (it only makes a difference if there was more than one phrase)
X
X # Now some arcanery, I'm afraid... The trick is that lqshow can be
X # given the name of a file descriptor in which to save the names of
X # any files the user selects (with "s").
X old_t="$t"
X t="$t ${ListFile}"
X lqshow -o 3 -f $t 3>> ${ListFile}
X t="$old_t"
X if [ -s ${ListFile} ]
X then ## the user typed s/k/whatever to save some files...
X # make the list by interpreting the list file:
X LIST=`awk '
X /^#.*$/ { next }
X ($1 == "s") { SAVE[$2]++ }
X ($1 == "d") { SAVE[$2] = 0 }
X END {
X for (i in SAVE) {
X if (SAVE[i] > 0) print i
X }
X }' $ListFile | sort -u`
X # make a new list file...
X echo "$LIST" | sed '/^[ ]*$/d' > $ListFile
X LIST="" # save memory
X fi
X # now see if it's still non-empty...
X if [ -s ${ListFile} ]
X then
X List="Type S filename to save the list of files (s also quits) "
X else
X /bin/rm -f ${ListFile}
X fi
X fi
X echo $List
X echo $N "Type q to quit, or return to continue: $C"
X read quit rest
X case "$quit" in
X [qQ]*) quit="yes" ;;
X [sS]) # save the list of matches
X cat $ListFile
X
X if [ ! -s "$ListFile" ]
X then
X echo "No files in the list to save."
X quit=no
X else
X if [ -z "$rest" ]
X then rest="lq.list"
X fi
X
X if [ -f "$rest" ]
X then echo "Appending to existing file $rest"
X fi
X
X cat $ListFile >> $rest
X rm $ListFile
X if [ x"$quit" = x"s" ]
X then quit=yes
X else quit=no
X fi
X fi
X ;;
X *) quit=no ;;
X esac
Xdone
X
X#
X# $Log: lq.sh,v $
X# Revision 1.3 90/10/06 00:50:53 lee
X# Prepared for first beta release.
X#
X#
X#
@@@End of lq-text/src/lqtext/lq.sh
echo x - lq-text/src/lqtext/lqaddfile.c 1>&2
sed 's/^X//' >lq-text/src/lqtext/lqaddfile.c <<'@@@End of lq-text/src/lqtext/lqaddfile.c'
X/* lqaddfile.c -- Copyright 1989, 1990 Liam R. Quin. All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/* addfile -- add a file to the LQ-Text text retrieval index
X * Liam Quin, August 1989 and later...
X *
X * $Id: lqaddfile.c,v 1.14 91/03/02 21:22:39 lee Rel1-10 $
X */
X
Xstatic char *Version = "@(#) $Id: lqaddfile.c,v 1.14 91/03/02 21:22:39 lee Rel1-10 $";
X
X#ifdef SYSV
Xextern int _filbuf(); /* used but not defined in stdio.h */
X#endif
X#include <stdio.h>
X#include <malloc.h>
X#include <ctype.h>
X#include <sys/types.h>
X#include <sys/stat.h>
X#ifdef BSD
X# include <strings.h>
X#else
X# include <string.h>
X#endif
X
X#include "globals.h" /* defines and declarations for database filenames */
X#include "fileinfo.h"
X#include "wordinfo.h"
X#include "wordrules.h"
X#include "filter.h"
X
X#include "emalloc.h"
X
X#define enew(var, type) (var = (type *) emalloc(sizeof(type)))
X
X#ifdef SYSV
X#define TOLOWER(ch) ch = tolower(ch)
X#else
X#define TOLOWER(ch) if (isupper(ch)) ch = tolower(ch)
X#endif
X
Xvoid DestroyFileInfo(), SaveFileInfo(), AddStream(), AddFrom();
Xextern lqWriteAccess(); /* Allow write access to the database */
X/* Symbol Table Interface */
Xextern void AddWord(), WriteCurrentMaxWID();
Xextern void DumpCache(), cleanupdb();
Xextern char *WordRoot();
Xextern int TooCommon(), GetFilterType();
Xint RealGetChar(), AddFile();
X
X/** System calls and library routines used in this file: **/
X/** System calls: **/
Xextern void exit();
Xextern int stat();
X/** Library Functions: **/
Xextern int atoi();
X#ifndef tolower
X extern int tolower();
X#endif
Xextern void perror();
X/**/
X
Xchar *progname = "@(#) : addfile.c,v 1.1 89/08/28 20:16:05 lee Locked $";
Xstatic int UseLineNumbers = 0;
X
X/* FROM pblock.c */
Xextern int AsciiTrace; /* provide increasingly verbose info if not zero */
X
Xstatic int LastChar = 0;
Xstatic int _chForLee = 0;
X
X#define GetChar(F) \
X ( LastChar ? \
X (++BytesRead, (_chForLee = LastChar), (LastChar = 0), _chForLee) : \
X ( (_chForLee = getc(FileInfo->Stream)) != '\'' || !InWord) ? \
X (++BytesRead, _chForLee) : RealGetChar(F) )
X
Xint
Xmain(argc, argv)
X int argc;
X char *argv[];
X{
X extern char *strrchr();
X extern int getopt(), cknatstr();
X extern void SetDefaults();
X extern char *optarg;
X extern int optind;
X extern int MaxWordsInCache; /* see wordtable.c */
X
X int c;
X int ErrorFlag = 0;
X int DoNothing = 0;
X char *InputFile = (char *) 0;
X
X#ifdef MALLOCTRACE
X malloc_debug(2);
X#endif
X
X progname = argv[0]; /* retain the full path at first */
X
X#ifdef M_MXFAST
X (void) mallopt(M_MXFAST, sizeof(t_WordPlace));
X /* may need to comment mallopt() out entirely for BSD -- use ifndef.
X * seems to work under SunOS, though.
X * When it works, it says "Allocate 100 or so chunks of this size at a
X * time, and whenver I ask for this much or less, give me one of the
X * chunks". Clearly it had better not be too large, but it is a big
X * win with a structure allocated for every occurrence of every word!
X */
X#endif
X
X SetDefaults(argc, argv);
X
X while ((c = getopt(argc, argv, "w:f:xVZz:")) != -1) {
X switch (c) {
X case 'w':
X if (!cknatstr(optarg)) {
X fprintf(stderr,
X "%s: -w must be given a number >= 0, not \"%s\"\n",
X progname, optarg);
X fprintf(stderr, "\tuse %s -xv for further information\n");
X exit(1);
X }
X MaxWordsInCache = atoi(optarg);
X break;
X case 'Z':
X case 'z':
X break; /* work done in SetDefault() */
X case 'V':
X fprintf(stderr, "%s: version: %s\n", progname, Version);
X DoNothing = 1;
X break;
X case 'f':
X if (InputFile) {
X fprintf(stderr,
X"%s: only one -f option allowed; use -xv for explanation\n", progname);
X
X exit(1);
X }
X InputFile = optarg;
X break;
X case 'x':
X ErrorFlag = (-1);
X break;
X default:
X case '?':
X ErrorFlag = 1;
X }
X }
X
X if ((progname = strrchr(progname, '/')) != (char *) NULL) {
X ++progname; /* step over the last / */
X } else {
X progname = argv[0];
X }
X
X if (ErrorFlag > 0) {
X fprintf(stderr, "use %s -x or %s -xv for an explanation.\n",
X progname, progname);
X exit(1);
X } else if (ErrorFlag < 0) { /* -x was used */
X fprintf(stderr, "%s -- add files to an lq-text retrieval database\n",
X progname);
X
X fputs("Options are:\n\
X -f file -- read the list of files to index from \"file\"\n\
X -c file -- cfile contains a list of common words to be ignored\n\
X -d dir -- use the lq-text database in the named directory\n\
X -t N -- set the trace level to N [default: N = 0]\n\
X -V -- print Version number and exit\n\
X -v -- be verbose (equivalent to -t 1)\n\
X -w n -- dump the word-cache every n words\n\
X -x -- print this eXplanation and exit\n\
X -- -- all following arguments are file names\n\
X\n\
X", stderr);
X if (AsciiTrace == 1) {
X /* used -v or -t1 */
X fprintf(stderr, "\n\
X Any remaining arguments are taken to be file names. The current\n\
XDOCPATH (%s) is searched for the files, and they are read and added\n\
Xto the index. (If you use the -f option, you should not give filename\n\
Xarguments on the command line, although you can use \"-f -\" to read the\n\
Xlist of files from standard input, one per line.\n\
XSetting (with -w) the size of the cache may dramatically\n\
Ximprove performance. Systems with memory larger than the data can try -w0.\n\
XSee lqtext(1) for more information.\n", DocPath);
X }
X exit(0);
X
X }
X
X if (DoNothing) {
X if (optind < argc) {
X fprintf(stderr, "%s: warning: %d extra argument%s ignored...\n",
X progname, argc - optind,
X argc - optind == 1 ? "" : "%s" );
X fprintf(stderr, "Use %s -x for an explanation\n", progname);
X }
X exit(0);
X }
X
X lqWriteAccess();
X
X if (InputFile) {
X if (optind < argc) {
X fprintf(stderr, "%s: -f: too many arguments; use -xv\n", progname);
X exit(1);
X }
X AddFrom(InputFile);
X } else for (; optind < argc; ++optind) {
X if (AddFile(argv[optind]) < 0 && AsciiTrace >= 1) {
X fprintf(stderr, "%s: warning: Problem adding file %s\n",
X progname, argv[optind]);
X }
X }
X
X#ifndef MALLOCTRACE
X DumpCache(0); /* the 0 means don't bother calling free() */
X#else
X DumpCache(1); /* Free everthing so whatever is left is a memory leak */
X#endif
X
X cleanupdb(); /* empty the dbm cache */
X WriteCurrentMaxWID();
X
X#ifdef MALLOCTRACE
X (void) fprintf(stderr, "%s: Malloctrace: checking...\n", progname);
X malloc_verify();
X (void) fprintf(stderr, "%s: Malloc Map\n", progname);
X mallocmap();
X#endif
X
X exit(0);
X /*NOTREACHED*/
X return 1; /* disaster if we get here -- it's just for lint! */
X}
X
Xvoid
XAddFrom(Name)
X char *Name;
X{
X char *GetLine();
X
X FILE *fp;
X char *Line;
X
X if (Name[0] == '-' && Name[1] == '\0') {
X fp = stdin;
X } else {
X fp = fopen(Name, "r");
X }
X
X if (fp == (FILE *) 0) {
X extern int errno;
X int e = errno;
X
X fprintf(stderr, "%s: -f: can't open ", progname);
X errno = e;
X perror(Name);
X exit(1);
X }
X
X while ((Line = GetLine(fp, Name)) != (char *) 0) {
X if (AddFile(Line) < 0 && AsciiTrace >= 1) {
X /* we already got one error message from AddFile() */
X fprintf(stderr, "%s: warning: Problem adding file %s\n",
X progname, Line);
X }
X }
X
X if (fp != stdin) {
X (void) fclose(fp);
X }
X}
X
Xstatic int LineInFile = 0;
Xstatic FILE *LastFile = 0;
X
Xchar *
XGetLine(fp, Name)
X FILE *fp;
X char *Name;
X{
X static char *Line = (char *) 0;
X static int Length = 0;
X int ch;
X register char *p;
X
X if (!Line) {
X if (Length <= 10) Length = 30;
X Line = emalloc(Length);
X }
X
X p = Line;
X
X if (fp == LastFile) {
X ++LineInFile;
X } else {
X LineInFile = 0; /* number lines from zero! */
X LastFile = fp;
X }
X
X while ((ch = getc(fp)) != EOF) {
X static int HaveWarned = 0;
X
X if (isspace(ch)) {
X if (p == Line) { /* ignore blank lines and leading blanks */
X continue;
X }
X if (ch == '\n') {
X if (p == (char *) 0) {
X /* how could this ever happen? do I need it? */
X p = Line;
X continue;
X }
X *p = '\0';
X return Line;
X }
X if (AsciiTrace && !HaveWarned) {
X fprintf(stderr,
X"%s: -f: Warning: spaces found in filenames read from \"%s\"\n",
X progname, Name);
X HaveWarned = 1;
X }
X }
X
X /* add the character to the string */
X if (p - Line + 1 >= Length) {
X int SaveWhere = p - Line;
X Length += 30;
X Line = erealloc(Line, Length);
X p = &Line[SaveWhere];
X }
X *p++ = ch;
X }
X
X if (p && Line && p != Line) {
X fprintf(stderr, "%s: -f: warning: no newline at the end of \"%s\"\n",
X progname, Name);
X *p = '\0';
X return Line;
X }
X
X return (char *) 0;
X}
X
Xextern int fclose(), pclose();
X
Xt_FileInfo *
XMakeFileInfo(Name)
X char *Name;
X{
X#ifdef BSD
X extern time_t time();
X#else
X extern long time();
X#endif
X extern t_FID Name2FID();
X extern t_FileInfo *GetFileInfo();
X extern t_FID GetNextFID();
X FILE *MakeInput();
X struct stat StatBuf;
X
X t_FileInfo *FileInfo = 0;
X t_FID FID;
X
X if (!Name || !*Name) return (t_FileInfo *) 0; /* sanity */
X
X if (stat(Name, &StatBuf) < 0) {
X#ifndef FindFile /* it is a macro these days... */
X extern char *FindFile();
X#endif
X extern int errno;
X
X int e = errno;
X char *doc;
X
X if ((doc = FindFile(Name)) == (char *) 0) {
X fprintf(stderr, "Can't index ");
X errno = e; /* fprintf might well clobber errno! */
X perror(Name);
X return (t_FileInfo *) 0;
X }
X
X if (stat(doc, &StatBuf) < 0) {
X e = errno;
X fprintf(stderr, "Can't index ");
X errno = e; /* fprintf might well clobber errno! */
X perror(Name);
X return (t_FileInfo *) 0;
X }
X Name = doc;
X }
X
X if (StatBuf.st_size == 0L) {
X if (AsciiTrace) {
X fprintf(stderr, "%s empty -- not indexed\n", Name);
X }
X return (t_FileInfo *) 0;
X }
X /* See if it's in the index already: */
X if ((FID = Name2FID(Name)) != (t_FID) 0) {
X
X if ((FileInfo = GetFileInfo(FID)) != (t_FileInfo *) 0) {
X /* Check to see if the file hass changed since it was last
X * indexed. If it has, we should delete the old one from
X * the database and give this one a new FID, but I have
X * not done that yet -- that's /usr/local/lib/lqtextd or
X * something, I suppose!
X */
X if (FileInfo->Date >= StatBuf.st_mtime) {
X if (AsciiTrace) {
X fprintf(stderr, "%s unchanged -- not indexed\n", Name);
X }
X DestroyFileInfo(FileInfo);
X return (t_FileInfo *) 0;
X }
X }
X } else {
X FID = GetNextFID((long) StatBuf.st_size);
X }
X
X if (FileInfo == (t_FileInfo *) 0) {
X /* Allocate Structure */
X enew(FileInfo, t_FileInfo);
X
X /* Although not always necessary, call emalloc here so that a
X * FileInfo can always be deleted with DestroyFileInfo()
X */
X FileInfo->Name = emalloc((unsigned)(strlen(Name) + 1));
X (void) strcpy(FileInfo->Name, Name);
X
X /* Other bits to set: */
X
X /* date */
X FileInfo->Date = StatBuf.st_mtime;
X
X /* file type */
X if ((FileInfo->FilterType = GetFilterType(FileInfo, &StatBuf)) < 0) {
X if (AsciiTrace) {
X fprintf(stderr, "%s unknown file type -- not indexed\n", Name);
X }
X (void) efree(FileInfo->Name);
X (void) efree((char *) FileInfo);
X return (t_FileInfo *) 0;
X }
X }
X
X FileInfo->FID = FID;
X FileInfo->Date = (long) time((long *) 0); /* it's a time_t on BSD */
X
X if ((FileInfo->Stream = MakeInput(FileInfo)) == (FILE *) 0) {
X fprintf(stderr, "%s: couldn't open filter for %s -- not indexed\n",
X progname, FileInfo->Name);
X (void) efree(FileInfo->Name);
X (void) efree((char *) FileInfo);
X return (t_FileInfo *) 0;
X }
X
X return FileInfo;
X}
X
Xvoid
XDestroyFileInfo(FileInfo)
X t_FileInfo *FileInfo;
X{
X if (FileInfo->Stream) {
X if (FileInfo->FilterType >= 0 && FileInfo->FilterType < MaxFilterType){
X (* FilterTable[FileInfo->FilterType].close)(FileInfo->Stream);
X }
X FileInfo->Stream = (FILE *) 0;
X }
X if (FileInfo->Name) (void) efree(FileInfo->Name);
X (void) efree((char *) FileInfo);
X}
X
Xint
XAddFile(Name)
X char *Name;
X{
X t_FileInfo *FileInfo;
X
X if (!Name || !*Name) return -1;
X if ((FileInfo = MakeFileInfo(Name)) == (t_FileInfo *) 0) return -1;
X
X AddStream(FileInfo);
X SaveFileInfo(FileInfo);
X DestroyFileInfo(FileInfo);
X
X return 0;
X}
X
XFILE *
XMakeInput(FileInfo)
X t_FileInfo *FileInfo;
X{
X FILE *fp;
X char *Buffer;
X unsigned BufLen;
X extern FILE *fopen(), *popen();
X
X#define FSTRING FilterTable[FileInfo->FilterType].String
X
X if (FileInfo->FilterType > MaxFilterType) {
X fprintf(stderr, "%s: Warning: filter type %d for %s too high (max %d)\n",
X progname, FileInfo->FilterType, FileInfo->Name, MaxFilterType);
X return (FILE *) 0;
X }
X
X if (FilterTable[FileInfo->FilterType].Type != FileInfo->FilterType) {
X fprintf(stderr, "Fatal Filter table error, %d\n", FileInfo->FilterType);
X exit(3);
X }
X
X if (FSTRING == (char *) 0) {
X return fopen(FileInfo->Name, "r");
X }
X
X BufLen = strlen(FileInfo->Name) * 2 + 4 + strlen(FSTRING);
X /* The +4 is to allow for an embedded " < " plus a \0;
X * we append "< Name", but also expand %s to be the Name, hence
X * the strlen * 2
X */
X Buffer = emalloc(BufLen);
X
X (void) sprintf(Buffer, FSTRING, FileInfo->Name);
X (void) strcat(Buffer, " < ");
X (void) strcat(Buffer, FileInfo->Name);
X
X fp = popen(Buffer, "r");
X (void) efree(Buffer);
X return fp;
X}
X
Xstatic long BytesRead = 0L;
Xstatic int InWord = 0;
X
X/* Character input */
X
X#ifdef __GNU__
Xinline
X#endif
Xint
XRealGetChar(FileInfo)
X t_FileInfo *FileInfo;
X{
X /* ASSERT: InWord && _chForLee == '\'' */
X LastChar = getc(FileInfo->Stream);
X if (WithinWord(LastChar) && LastChar != '\'') {
X BytesRead++;
X return '\'';
X } else {
X /* delete the single quote, as it was at the end of
X * a word, not in the middle
X */
X BytesRead++;
X return ' ';
X }
X /*NOTREACHED*/
X /* exit(1); */
X}
X
Xt_WordInfo *
XReadWord(FileInfo)
X t_FileInfo *FileInfo;
X{
X /* use two static storage areas so we can be called twice in a row.
X * This is necessary to implement the WPF_LASTINBLOCK flag.
X */
X static t_WordInfo This, That;
X static int ThisOrThat = 0;
X t_WordInfo *WordInfo;
X static char Buffer[MaxWordLength + 1];
X int ch;
X register char *q = Buffer;
X static int WordInBlock;
X static t_FID LastFid = 0L;
X static long LastPos = 0L;
X static int SawCommon = 0;
X static int SawLetters = 0;
X static int BlockInFile = 0L;
X static unsigned long LastBlock;
X unsigned long Start;
X
X WordInfo = (ThisOrThat ? &This : &That);
X
X if (FileInfo->FID != LastFid) {
X LastFid = FileInfo->FID;
X WordInBlock = (-1); /* none, yet! */
X LastPos = BlockInFile = LastBlock = 0L;
X BytesRead = 0L;
X SawCommon = SawLetters = 0;
X if (AsciiTrace) {
X fprintf(stderr, "Reading file \"%s\"", FileInfo->Name);
X }
X }
X
X /* Skip non-word characters */
X while ((ch = GetChar(FileInfo)) != EOF) {
X if (StartsWord(ch)) break;
X }
X
X /* ASSERT: we have read at least one character */
X
X if (ch == EOF) {
X if (AsciiTrace) {
X fprintf(stderr, "\n");
X }
X return (t_WordInfo *) 0;
X }
X
X Start = BytesRead - 1;
X
X if (UseLineNumbers) {
X BlockInFile = LineInFile;
X } else {
X BlockInFile = Start / FileBlockSize;
X }
X
X if (BlockInFile != LastBlock) {
X LastBlock = BlockInFile;
X if (AsciiTrace > 1) {
X fprintf(stderr, ".");
X#ifdef sun
X /* SunOS seems to line-buffer stderr! */
X fflush(stderr);
X#endif
X }
X WordInBlock = (-1);
X }
X
X if (isupper(ch)) {
X WordInfo->WordPlace.Flags = WPF_UPPERCASE;
X ch = tolower(ch);
X } else {
X WordInfo->WordPlace.Flags = 0;
X }
X
X InWord = 1; /* For GetChar() */
X
X do {
X if (q - Buffer < MaxWordLength) {
X *q++ = ch;
X }
X ch = GetChar(FileInfo);
X TOLOWER(ch);
X } while (WithinWord(ch) || EndsWord(ch));
X
X *q = '\0';
X InWord = 0;
X
X#ifdef __GNUC__
X /* this is to get round a gcc bug... */
X {
X int i = q - Buffer;
X WordInfo->Length = i;
X
X if (i < MinWordLength) {
X register char *p;
X
X for (p = Buffer; p < q; p++) {
X if (isalpha(*p)) {
X SawLetters = 1;
X break;
X }
X }
X return ReadWord(FileInfo);
X }
X }
X#else
X if ((WordInfo->Length = q - Buffer) < MinWordLength) {
X register char *p;
X
X for (p = Buffer; p < q; p++) {
X if (isalpha(*p)) {
X SawLetters = 1;
X break;
X }
X }
X return ReadWord(FileInfo);
X }
X#endif
X
X WordInfo->Word = Buffer;
X
X (void) WordRoot(WordInfo);
X
X WordInfo->Length = strlen(WordInfo->Word);
X
X if (TooCommon(WordInfo)) {
X SawCommon++;
X WordInBlock++;
X#ifdef ASCIITRACE
X if (AsciiTrace > 10) {
X fprintf(stderr, "%s too common to index\n", WordInfo->Word);
X }
X#endif
X return ReadWord(FileInfo);
X } else if (SawCommon) {
X SawCommon = 0;
X WordInfo->WordPlace.Flags |= (WPF_LASTWASCOMMON|WPF_LASTHADLETTERS);
X }
X if (SawLetters) {
X SawLetters = 0;
X WordInfo->WordPlace.Flags |= WPF_LASTHADLETTERS;
X }
X
X /* StuffBefore is the # of chars between the end of the last word and
X * the start of this one.
X */
X if (Start > 1L) {
X if (Start - (LastPos + 1) <= 0) {
X WordInfo->WordPlace.StuffBefore = 1; /* save a byte in the index */
X } else if (Start - (LastPos + 1) >= 255 ) {
X WordInfo->WordPlace.StuffBefore = 255;
X } else {
X WordInfo->WordPlace.StuffBefore = Start - (LastPos + 1);
X }
X } else {
X WordInfo->WordPlace.StuffBefore = 1; /* i.e., the default */
X }
X
X WordInfo->WordPlace.FID = WordInfo->FID = FileInfo->FID;
X WordInfo->WID = (t_WID) 0;
X WordInfo->Next = (t_WordInfo *) 0;
X WordInfo->WordPlaces = (t_WordPlace *) 0;
X WordInfo->WordPlacesInHere = 0;
X WordInfo->WordPlace.WordInBlock = (++WordInBlock);
X WordInfo->WordPlace.BlockInFile = BlockInFile;
X WordInfo->DataBlock = (char *) 0;
X
X WordInfo->Word[WordInfo->Length] = '\0';
X
X {
X /* I want to avoid using malloc() here...
X * Another kludge would be to malloc sizeof(t_WordInfo) +
X * strlen(WordInfo->Word + 1) and to put the string at the end
X * of (i.e. just after) the struct.
X */
X static char Word2[MaxWordLength + 1];
X static char Word1[MaxWordLength + 1];
X char *p = (ThisOrThat) ? Word1 : Word2;
X
X (void) strncpy(p, WordInfo->Word, (int) WordInfo->Length);
X WordInfo->Word = p;
X WordInfo->Word[WordInfo->Length] = '\0';
X }
X
X LastPos = BytesRead - 1;
X
X ThisOrThat = !ThisOrThat;
X /* toggle between 0 and 1. Boring life, really */
X
X if (!WordInfo->Word[0]) {
X fprintf(stderr, "Null word in ReadWord()\n");
X }
X return WordInfo;
X}
X
Xvoid
XAddStream(FileInfo)
X t_FileInfo *FileInfo;
X{
X /* I have to mark the last word in the block.
X * I do that by marking the previous word if it was in a differant block
X * than the current one.
X */
X t_WordInfo *WordInfo;
X t_WordInfo *LastWord = 0;
X
X BytesRead = 0;
X
X while ((WordInfo = ReadWord(FileInfo)) != (t_WordInfo *) 0) {
X if (LastWord) {
X if (LastWord->WordPlace.BlockInFile !=
X WordInfo->WordPlace.BlockInFile) {
X LastWord->WordPlace.Flags |= WPF_LASTINBLOCK;
X }
X AddWord(LastWord);
X }
X LastWord = WordInfo;
X }
X if (LastWord) {
X /* it's the last in the file, so it is also the last in the block */
X LastWord->WordPlace.Flags |= WPF_LASTINBLOCK;
X AddWord(LastWord);
X }
X
X if (AsciiTrace) {
X fprintf(stderr, "Read %lu bytes from \"%s\"\n", BytesRead, FileInfo->Name);
X }
X}
X
X/* lqaddfile has been carried through several incarnations of lq-text,
X * and hence has more than one Inital Revision in the following history.
X *
X * $Log: lqaddfile.c,v $
X * Revision 1.14 91/03/02 21:22:39 lee
X * Added write access call.
X *
X * Revision 1.13 91/03/02 18:53:25 lee
X * Common words are now counted, so you can now edit the common word list
X * without invalidating the index.
X *
X * Revision 1.12 90/10/06 00:50:54 lee
X * Prepared for first beta release.
X *
X * Revision 1.11 90/10/05 23:46:11 lee
X * Allow compilation with -UASCIITRACE
X *
X * Revision 1.10 90/10/04 17:54:46 lee
X * fixed a typo in the usage message.
X *
X * Revision 1.9 90/09/28 23:20:22 lee
X * Put more of GetChar into a macro and parameterised TOLOWER.
X *
X * Revision 1.8 90/09/28 22:19:04 lee
X * Did the previous fix _properly_!
X *
X * Revision 1.7 90/09/28 22:12:35 lee
X * Made getchar a macro, and deleted the call to CallFree...
X *
X * Revision 1.6 90/09/20 18:46:03 lee
X * Closed up a (very small) memory leak.
X *
X * Revision 1.5 90/09/19 20:16:41 lee
X * Fixed problems associated with indexing an empty file.
X *
X * Revision 1.4 90/08/29 21:45:18 lee
X * Alpha release
X *
X * Revision 1.3 90/08/09 19:17:12 lee
X * *** empty log message ***
X *
X * Revision 1.1 90/02/27 11:05:02 lee
X * Initial revision
X *
X * Revision 2.2 89/10/08 20:45:13 lee
X * Working version of nx-text engine. Addfile and wordinfo work OK.
X *
X * Revision 2.1 89/10/02 01:14:12 lee
X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
X *
X * Revision 1.3 89/09/17 23:02:42 lee
X * Various fixes; NumberInBlock now a short...
X *
X * Revision 1.2 89/09/16 21:16:11 lee
X * First demonstratable version.
X *
X * Revision 1.1 89/09/07 21:05:52 lee
X * Initial revision
X *
X */
@@@End of lq-text/src/lqtext/lqaddfile.c
echo end of part 06
--
Liam R. E. Quin, lee at sq.com, SoftQuad Inc., Toronto, +1 (416) 963-8337
More information about the Alt.sources
mailing list