lq-text Full Text Retrieval Database Part 05/13
Liam R. E. Quin
lee at sq.sq.com
Mon Mar 4 12:04:05 AEST 1991
: cut here --- cut here --
: To unbundle, sh this file
#! /bin/sh
: part 05
echo x - lq-text/src/liblqtext/asciitrace.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/asciitrace.c <<'@@@End of lq-text/src/liblqtext/asciitrace.c'
X/* asciitrace.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X * This file simply declares AsciiTrace, which is used for verbose (-v)
X * output (when set to 1), and for debugging/tracing at higher levels.
X */
X
Xint AsciiTrace = 0;
X
X/* $Id: asciitrace.c,v 1.3 90/10/06 00:21:12 lee Rel1-10 $
X *
X */
@@@End of lq-text/src/liblqtext/asciitrace.c
echo x - lq-text/src/liblqtext/bcopy.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/bcopy.c <<'@@@End of lq-text/src/liblqtext/bcopy.c'
X/* bcopy.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X#ifdef BCOPYTEST
X# include <stdio.h>
X#endif
X
X/* this is a simple replacement for bcopy() where the native bcopy()
X * does not handle overlapping blocks.
X * do
X * cc -DBCOPYTEST -o bcopy bcopy.c
X * and run "./bcopy" for a simple test. You should get three
X * identical lines of output.
X *
X * $Id: bcopy.c,v 1.3 90/10/06 00:21:24 lee Rel1-10 $
X */
X
Xbcopy(src, dest, nbytes)
X char *dest;
X char *src;
X int nbytes;
X{
X /* We have to be clever about this...
X * If src < dest then we copy from the top down
X * otherwise, copy from the bottom up...
X */
X
X register char *p, *q;
X
X if (src < dest) {
X for (p = &src[nbytes - 1], q =&dest[nbytes - 1]; nbytes--; q--, p--) {
X *q = *p;
X }
X } else {
X for (p = src, q = dest; nbytes--; p++, q++) {
X *q = *p;
X }
X }
X}
X
X#ifdef BCOPYTEST
Xmain()
X{
X char buffer[4096];
X char *s = "The naked children hugged each other";
X
X puts(s); /* first line */
X (void) sprintf(&buffer[12], "%s", s);
X bcopy(&buffer[12], buffer, strlen(s) + 1);
X printf("[%s]\n", buffer); /* 2nd line */
X bcopy(buffer, &buffer[12], strlen(s) + 1);
X printf("[%s]\n", &buffer[12]); /* 3rd line */
X}
X#endif
X
X/* $Log: bcopy.c,v $
X * Revision 1.3 90/10/06 00:21:24 lee
X * Prepared for first beta release.
X *
X *
X */
@@@End of lq-text/src/liblqtext/bcopy.c
echo x - lq-text/src/liblqtext/cmdname.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/cmdname.c <<'@@@End of lq-text/src/liblqtext/cmdname.c'
X/* cmdname.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
Xchar *cmdname = (char *) 0;
X
X/* $Id: cmdname.c,v 1.2 90/10/06 00:12:08 lee Rel1-10 $
X *
X * $Log: cmdname.c,v $
X * Revision 1.2 90/10/06 00:12:08 lee
X * Prepared for first beta release.
X *
X * Revision 1.1 90/03/24 17:08:19 lee
X * Initial revision
X *
X *
X */
@@@End of lq-text/src/liblqtext/cmdname.c
echo x - lq-text/src/liblqtext/malloc.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/malloc.c <<'@@@End of lq-text/src/liblqtext/malloc.c'
X/* malloc.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/*
X * malloc.c -- wrapper routines for free/malloc, that do checking and
X * print errors.
X *
X * $Id: malloc.c,v 1.6 91/03/03 00:14:27 lee Rel1-10 $
X *
X * $Log: malloc.c,v $
X * Revision 1.6 91/03/03 00:14:27 lee
X * Simpler interface if MALLOCTRACE undefined.
X *
X * Revision 1.5 90/10/06 00:12:11 lee
X * Prepared for first beta release.
X *
X * Revision 1.4 90/09/29 23:51:31 lee
X * Added MALLTRACE to detect memory leaks.
X *
X * Revision 1.3 90/08/29 21:46:55 lee
X * Alpha release.
X *
X * Revision 1.2 90/08/09 19:16:44 lee
X * BSD lint and fixes...
X *
X * Revision 2.2 89/10/08 20:46:10 lee
X * Working version of nx-text engine. Addfile and wordinfo work OK.
X *
X *
X */
X
X#include "globals.h"
X
X#include <malloc.h>
X#include <stdio.h>
X#include <ctype.h>
X
Xextern char *progname;
X
Xextern void exit();
X
Xint _LiamIsInCurses = 0;
X /* This should be in error.c */
X
XINLINE char *
X_emalloc(nbytes
X#ifdef MALLOCTRACE
X, FileName, Line
X#endif
X)
X unsigned nbytes;
X#ifdef MALLOCTRACE
X char *FileName;
X int Line;
X#endif
X{
X char *Result;
X
X if ((Result = malloc(nbytes)) == (char *) 0) {
X#ifdef MALLOCTRACE
X fprintf(stderr, "%s: %s: %d: emalloc(%u) failed.\n",
X progname, FileName, Line, nbytes);
X#else
X fprintf(stderr, "%s: emalloc(%u) failed.\n", progname, nbytes);
X#endif
X exit(1);
X }
X
X#ifdef MALLOCTRACE
X (void) fprintf(stderr, "malloc %u > 0x%x %s %d\n", nbytes, Result, FileName, Line);
X#endif
X return Result;
X}
X
XINLINE char *
X_ecalloc(Number, Size
X#ifdef MALLOCTRACE
X , FileName, Line
X#endif
X)
X unsigned Number;
X unsigned Size;
X#ifdef MALLOCTRACE
X char *FileName;
X int Line;
X#endif
X{
X char *Result;
X extern char *calloc();
X
X if ((Result = calloc(Number, Size)) == (char *) 0) {
X#ifdef MALLOCTRACE
X fprintf(stderr, "%s: %s: %d: ecalloc(%u x %u) failed.\n",
X progname, FileName, Line, Number, Size);
X#else
X fprintf(stderr, "%s: ecalloc(%u x %u) failed.\n",
X progname, Number, Size);
X#endif
X exit(1);
X }
X
X return Result;
X}
X
XINLINE char *
X_erealloc(OldString, NewSize
X#ifdef MALLOCTRACE
X , FileName, Line
X#endif
X)
X char *OldString;
X unsigned NewSize;
X#ifdef MALLOCTRACE
X char *FileName;
X int Line;
X#endif
X{
X char *Result;
X
X if ((Result = realloc(OldString, NewSize)) == (char *) 0) {
X#ifdef MALLOCTRACE
X fprintf(stderr, "%s: %s: %d: erealloc(0x%x, %u) failed.\n",
X progname, FileName, Line, OldString, NewSize);
X#else
X fprintf(stderr, "%s: erealloc(0x%x, %u) failed.\n",
X progname, OldString, NewSize);
X#endif
X exit(1);
X }
X
X#ifdef MALLOCTRACE
X (void) fprintf(stderr, "realloc 0x%x %u > 0x%x %s %d\n", OldString, NewSize, Result, FileName, Line);
X#endif
X return Result;
X}
X
XINLINE void
X_efree(String
X#ifdef MALLOCTRACE
X , FileName, Line
X#endif
X)
X char *String;
X#ifdef MALLOCTRACE
X char *FileName;
X int Line;
X#endif
X{
X if (!String) {
X#ifdef MALLOCTRACE
X (void) fprintf(stderr, "%s: %s: %d: Warning: free(0) ignored\n",
X progname, FileName, Line);
X#else
X (void) fprintf(stderr, "%s: Warning: free(0) ignored\n", progname);
X#endif
X return;
X }
X#ifdef MALLOCTRACE
X (void) fprintf(stderr, "free 0x%x\n", String);
X#endif
X
X (void) free(String);
X}
@@@End of lq-text/src/liblqtext/malloc.c
echo x - lq-text/src/liblqtext/numbers.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/numbers.c <<'@@@End of lq-text/src/liblqtext/numbers.c'
X/* numbers.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/* Routines to read and write numbers in a compressed format, preserving
X * block boundaries.
X * The actual compression seems to be about 50%, as most numbers turn
X * out to fit in 16 bits. Interestingly, there is room for another one
X * or two bits, I think, that could be used for something else, in the
X * main pblock index. For example, it could mark whether words were
X * plural/-"ing"/"un"-/ with 2 bits.
X *
X * $Id: numbers.c,v 1.4 90/10/06 00:12:16 lee Rel1-10 $
X *
X * $Log: numbers.c,v $
X * Revision 1.4 90/10/06 00:12:16 lee
X * Prepared for first beta release.
X *
X * Revision 1.3 90/08/09 19:16:49 lee
X * BSD lint and fixes...
X *
X * Revision 1.2 90/04/18 19:47:13 lee
X * More flexible (and slightly more compact) number format.
X *
X * Revision 2.2 89/10/08 20:46:36 lee
X * Working version of nx-text engine. Addfile and wordinfo work OK.
X *
X * Revision 2.1 89/10/02 01:15:15 lee
X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
X *
X * Revision 1.2 89/09/16 21:16:26 lee
X * First demonstratable version.
X *
X * Revision 1.1 89/09/07 21:06:01 lee
X * Initial revision
X *
X *
X */
X
X#include "globals.h"
X
X#ifdef SYSV
X extern int _filbuf(), _flsbuf();
X#endif
X#include <stdio.h>
X#include "numbers.h"
X
X/* ReadNumber and WriteNumber take/return a long, using a compression
X * algorithm to reduce the amount of data taken.
X * The current algorithm is simply like internet addresses:
X * a 0 in the top bit followed by a 0 means it's one byte
X * a 0 followed by a 1 means it's 2 bytes
X * a 1 followed by a 0 means it's 3 bytes, and
X * a 1 followed by a 1 means it's 4 bytes.
X * A better alternative might simply use a 1 in the top bit, hence fitting
X * 7 bits into each bytes. The advantages of considering more than
X * one number at a time and using compress-style LS packing are not clear.
X * In particular, speed of recovery is an issue too.
X *
X * The routines use (char *) pointers instead of files prefixes with an s.
X * see numbers.h for some related macros.
X *
X */
X
X
X#ifdef TESTNUMBERS
Xchar *progname;
X
Xint
Xmain(ac, av)
X int ac;
X char *av[];
X{
X extern long atol();
X FILE *f;
X extern FILE *fopen();
X
X progname = av[0];
X
X while (--ac) {
X unsigned long L = atol(*++av);
X unsigned long L2;
X
X f = fopen("/tmp/boy", "w");
X printf("Write %u\n", L);
X fWriteNumber(f, L);
X fclose(f);
X f = fopen("/tmp/boy", "r");
X L2 = fReadNumber(f);
X printf("Read %u\n", L2);
X if (L != L2) {
X printf("**** ERROR **** %ld != %ld\n", L, L2);
X }
X fclose(f);
X }
X return 0;
X}
X#endif /*TESTNUMBERS*/
X
XINLINE void
XfWriteNumber(f, Number)
X FILE *f;
X unsigned long Number;
X{
X /* Compressed numbers:
X * 7 bit numbers --> single byte;
X * 8...14 bits --> 2 bytes
X * 15...21 bits --> 3 bytes
X * 22..28 bits --> 4 bytes
X * 29..32 bits --> 5 bytes
X */
X while (Number > 0177) {
X putc((Number & 0177) | 0200, f);
X Number >>= 7;
X }
X putc(Number & 0177, f);
X}
X
X#define PutC(ch, S) (*((*S)++) = (char) (ch))
X
XINLINE void
XsWriteNumber(s, Number)
X char **s;
X unsigned long Number;
X{
X /* Compressed numbers:
X * 7 bit numbers --> single byte;
X * 8...14 bits --> 2 bytes
X * 15...21 bits --> 3 bytes
X * 22..28 bits --> 4 bytes
X * 29..32 bits --> 5 bytes
X */
X while (Number > 0177) {
X PutC((Number & 0177) | 0200, s);
X Number >>= 7;
X }
X PutC(Number & 0177, s);
X}
X
XINLINE unsigned long
XfReadNumber(f)
X FILE *f;
X{
X unsigned long Result = 0L;
X int ThereIsMore;
X int Shift = 0;
X
X /* Read a number, 7 bits at a time, lsb first, until there is
X * a byte without the top bit set -- that's the most significant
X * byte, and there is no more of this number.
X */
X do {
X Result |= ((ThereIsMore = getc(f)) & 0177) << Shift;
X ThereIsMore &= 0200;
X Shift += 7;
X } while (ThereIsMore);
X return Result;
X}
X
X#define GetC(S) \
X ( (unsigned int) * (unsigned char *) ((* (unsigned char **)S)++) )
X
XINLINE unsigned long
XsReadNumber(s)
X char **s;
X{
X unsigned long Result = 0L;
X int ThereIsMore;
X int Shift = 0;
X
X /* Read a number, 7 bits at a time, lsb first, until there is
X * a byte without the top bit set -- that's the most significant
X * byte, and there is no more of this number.
X */
X do {
X Result |= ((ThereIsMore = GetC(s)) & 0177) << Shift;
X ThereIsMore &= 0200;
X Shift += 7;
X } while (ThereIsMore);
X return Result;
X}
@@@End of lq-text/src/liblqtext/numbers.c
echo x - lq-text/src/liblqtext/pblock.c 1>&2
sed 's/^X//' >lq-text/src/liblqtext/pblock.c <<'@@@End of lq-text/src/liblqtext/pblock.c'
X/* pblock.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
X * This code is NOT in the public domain.
X * See the file COPYRIGHT for full details.
X */
X
X/* The physical Word Database for NX-Text...
X *
X * Interface by Liam Quin, 1989
X *
X * The main database is a mesh of linked lists, rather like tagliatelli.
X *
X * $Id: pblock.c,v 1.10 90/10/13 03:08:36 lee Rel1-10 $
X *
X * $Log: pblock.c,v $
X * Revision 1.10 90/10/13 03:08:36 lee
X * deleted an illegal dereference!
X *
X * Revision 1.9 90/10/06 00:12:17 lee
X * Prepared for first beta release.
X *
X * Revision 1.8 90/09/29 23:49:05 lee
X * commented out (with #if 0) some code that called WID2WordInfo needlessly.
X *
X * Revision 1.7 90/08/29 21:47:04 lee
X * Alpha release.
X *
X * Revision 1.6 90/08/09 19:16:52 lee
X * BSD lint and fixes...
X *
X * Revision 1.5 90/08/08 22:26:17 lee
X * Many major lint and gcc -Wall fixes...
X *
X * Revision 1.4 90/03/22 14:26:54 lee
X * Fixed the test for Sorting monotonicity (?)..., which had not been
X * checking that the FIDS were the same before checking the blocks...
X *
X * Revision 1.3 90/03/21 19:42:22 lee
X * new calls to efree();
X * removed WID from the data blocks.
X * doesn't work yet, though, sorry.
X *
X * Revision 2.2 89/10/08 20:46:53 lee
X * Working version of nx-text engine. Addfile and wordinfo work OK.
X *
X * Revision 2.1 89/10/02 01:15:24 lee
X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
X *
X * Revision 1.3 89/09/17 23:03:12 lee
X * Various fixes; NumberInBlock now a short...
X *
X * Revision 1.2 89/09/16 21:16:30 lee
X * First demonstratable version.
X *
X * Revision 1.1 89/09/07 21:06:05 lee
X * Initial revision
X *
X *
X */
X
X#include "globals.h" /* defines and declarations for database filenames */
X
X#include <stdio.h> /* stderr, also for fileinfo.h */
X#include <fcntl.h>
X#include <malloc.h>
X#include <sys/types.h>
X#include "fileinfo.h" /* for wordinfo.h */
X#include "wordinfo.h"
X#include "pblock.h"
X#include "numbers.h"
X#include "wordrules.h"
X#include "emalloc.h"
X
X#ifndef STREQ
X# define STREQ(boy,girl) ((*(boy) == *(girl)) && (!strcmp((boy),(girl))))
X#endif
X
X#define new(type) ( ((type) *) emalloc(sizeof(type)) )
X
Xextern t_WordInfo *WID2WordInfo();
Xextern t_WID Word2WID();
X
X/** Unix system calls that need to be declared: **/
Xextern void exit();
Xextern int open();
Xextern int read(), write();
X
X/** C library functions that need to be declared: **/
Xextern char *memcpy();
Xextern void perror();
Xextern void qsort();
Xextern int strcmp();
Xextern char *strcpy();
Xextern int strlen();
X
X/** lqtext library functions that need to be declared: **/
Xextern int MkWIB();
Xextern void MkWIBH();
Xextern int PutWordInfoIntoIndex();
Xextern void SortWordPlaces();
Xextern t_WordInfo *MakeWordInfo();
Xextern void SlayWordInfo();
X
X/** Functions within this file that need to be declared: **/
Xvoid DeleteWordPlaces();
Xstatic void FlushBlock();
X
Xt_WordPlace *GetWordPlaces();
Xt_pblock *Getpblock();
Xunsigned short PutWordPlaces();
Xint _PutByte(), PutLong();
Xunsigned char _GetByte();
Xunsigned long GetLong();
Xunsigned long FindFreeBlock();
Xvoid FillWordPlaces();
X/** **/
X
Xextern char *progname;
X
X/* If you find this macro confusing, see "The C Programming Language",
X * Kernighan & Ritchie, 1st. Edition, for a good introduction to
X * programming in C.
X * :-( :-)
X */
X#define PutByte(Byte, WID, sp, Blockp, BlockLength, LastStart, NextBlock) \
X ( (*(sp) - (unsigned char *) *(Blockp) >= *(BlockLength)) ? \
X _PutByte(Byte, WID, sp, Blockp, BlockLength, LastStart, NextBlock) : \
X ((*((*(sp))++) = (Byte)), 0))
X
X#define GetByte(WID, sp, Blockp, BlockLength, NextBlock) \
X ( (*(sp) - (unsigned char *) *(Blockp) >= *(BlockLength)) ? \
X _GetByte(WID, sp, Blockp, BlockLength, NextBlock) : *((*(sp))++) )
X
Xextern long lseek();
X
Xstatic int DataFile = -1;
X
X#ifdef ASCIITRACE
Xextern int AsciiTrace;
X#endif
X
Xchar *ReadBlock();
Xvoid WriteBlock();
Xunsigned long Putpblock();
X
X/* Layout of the physical index database (OUT OF DATE)
X * =====================================
X *
X * This file is the only interface to the database of FID/Offset pairs.
X *
X * The db is organised in blocks arranged in Tagliatelli format: a linked
X * list of blocks for each WID; there is a list for each WID in the Word
X * Index. The Word Index contains the block number of the start of the
X * chain.
X * Block 0 is the start of the free list (but is never itself free!)
X *
X * block 0... Free list header; otherwise currently unused.
X * block 1... first data block:
X * +---------------------------
X * | bytes 0...3: Offset of next block in this chain
X * | 4...7: Number of valid pairs in this block (0-->unused block)
X * | 8..11: WID to which this block refers (for checking)
X * | 12..15: Total number of WIDS in the chain
X * | (only the first block in each chain has this)
X * | The (FID, Offset) pairs follow, in compressed (Internet-style) format.
X * |
X * block 2... next data block (either the start of a new chain, or a
X * continuation of some other chain. Or maybe unused, especially if files
X * have been deleted).
X *
X * The block header is described by t_BlockHeader.
X *
X */
X
X#include "blkheader.h"
X
X/* Look up a word in the database...
X * and return a list of all the WordPlaces where it's found
X * BUG: should be called WordInfo2pblock().
X */
Xt_pblock *
XGetpblock(WordInfo)
X t_WordInfo *WordInfo;
X{
X t_pblock *pblock = 0;
X unsigned long HowManyToGet = 0L;
X t_WordPlace *WordPlaces;
X unsigned long CurrentPair = 0L;
X
X#if 0
X /* This code allows one to call GetPblock with a very minimal
X * wordinfo entry:
X */
X if (WordInfo->Offset == 0L) {
X /* No database entry found yet... */
X if (!WordInfo->WID && WordInfo->Length) {
X WordInfo->WID = Word2WID(WordInfo->Word, WordInfo->Length);
X }
X if (WordInfo->WID) {
X WordInfo = WID2WordInfo(WordInfo->WID);
X }
X }
X#endif
X
X if (!WordInfo->NumberOfWordPlaces) {
X return (t_pblock *) 0; /* nothing to write */
X }
X
X if (!WordInfo->Offset && !WordInfo->NumberOfWordPlaces) {
X /* no pblock offset, so no pblock! */
X return (t_pblock *) 0;
X }
X
X HowManyToGet = WordInfo->NumberOfWordPlaces;
X
X pblock = (t_pblock *) emalloc( sizeof(t_pblock) +
X (unsigned) (HowManyToGet + 1) * sizeof(t_WordPlace));
X
X WordPlaces = pblock->WordPlaces;
X pblock->WID = WordInfo->WID;
X pblock->ChainStart = WordInfo->Offset;
X pblock->NumberOfWordPlaces = WordInfo->NumberOfWordPlaces;
X
X /* First, the pairs in the WordInfo might suffice: */
X if (WordInfo->WordPlacesInHere >= HowManyToGet) {
X for (; CurrentPair < WordInfo->WordPlacesInHere; CurrentPair++) {
X WordPlaces[CurrentPair] = WordInfo->WordPlaces[CurrentPair];
X }
X }
X
X /* If they all fitted in the WordInfo block, well, that was a big win! */
X if (CurrentPair >= HowManyToGet) {
X pblock->ChainStart = 0L;
X return pblock;
X }
X
X /* So we need to read the entire list of WordPlaces from the database.
X * Although we may have already done the first few, I'm going to do them
X * all again because that ensures that the last few bytes in the
X * WordInfo data block can get used!
X */
X
X WordPlaces = GetWordPlaces(
X WordInfo->WID,
X WordInfo->WordPlaceStart,
X (unsigned) WIDBLOCKSIZE -
X (WordInfo->WordPlaceStart - WordInfo->DataBlock),
X WordInfo->Offset,
X HowManyToGet);
X
X
X
X if (WordPlaces == (t_WordPlace *) 0) {
X fprintf(stderr, "%s: SNAFU: no wordplaces for WID %ld, wanted %ld\n",
X progname, WordInfo->WID, HowManyToGet);
X exit(1);
X }
X
X /* copy the result... */
X (void) memcpy((char *) pblock->WordPlaces, (char *) WordPlaces,
X (int) (sizeof(t_WordPlace) * HowManyToGet));
X (void) efree((char *) WordPlaces);
X return pblock;
X}
X
X/* This is how many WordPlaces one could write into a block in WIDFILE:
X * The Header takes up 1 for the word length, MinWordLength or more for
X * the actual word, and another 1 for the number of places.
X * (32 - 5) / 3 is 27/3 is 9, but this is rather rare in practice!
X * See the comment just before PutPairs().
X * If this is not kept up to date, space may be wasted in the database,
X * or you will slow down lqaddfile.
X */
X#define MaxWordPlacesInAWordBlock ((WIDBLOCKSIZE - (MinWordLength + 2)/3))
X
X/* This should in fact take a (t_WordPlaceList *) or something.
X * It returns the number of words written, for checking in
X * DumpCache (wordtable.c)
X */
Xint
XWriteWordChain(WordPlaceList)
X t_WordPlaceList *WordPlaceList;
X{
X extern t_WID GetNextWID();
X extern t_WordInfo *FindWordInfoFromIndex();
X
X t_WID WID;
X int Length;
X t_pblock *pblock = (t_pblock *) 0;
X int NumberOfWordPlacesToAdd = 0;
X t_WordPlaceList *WP;
X t_WordInfo *IndexEntry = 0;
X char *FirstWord;
X register int i;
X int TotalWordsWritten = 0;
X
X if (!WordPlaceList) return 0; /* nothing to do */
X
X /* Sanity check: */
X if (!(WordPlaceList->Word) || !*(WordPlaceList->Word)) {
X fprintf(stderr, "Warning: WriteWordChain() asked to write null word\n");
X return 0;
X }
X
X Length = strlen(WordPlaceList->Word);
X
X /* This is undoubtedly a bad way to do things:
X * if IndexEntry is big, we don't want to fetch it when we could
X * simply stuff things on the end!
X */
X if ((WID = Word2WID(WordPlaceList->Word, Length)) != (t_WID) 0) {
X if ((IndexEntry = WID2WordInfo(WID)) != (t_WordInfo *) 0) {
X pblock = Getpblock(IndexEntry);
X }
X }
X
X if (!WID) {
X WID = GetNextWID();
X }
X
X#ifdef ASCIITRACE
X if (AsciiTrace >= 3) {
X fprintf(stderr, "Entry %lu for \"%s\"", WID, WordPlaceList->Word);
X if (IndexEntry) {
X fprintf(stderr, ", had %d pairs\n", IndexEntry->NumberOfWordPlaces);
X } else {
X fprintf(stderr, " (new entry)\n");
X }
X }
X#endif
X
X /* Count the number of entries we are going to add.
X * There may be several different words in the chain, but they are
X * sorted in word order.
X */
X FirstWord = WordPlaceList->Word;
X
X for (WP = WordPlaceList; WP; WP = WP->Next) {
X if (!STREQ(WP->Word, FirstWord)) break;
X ++NumberOfWordPlacesToAdd;
X if (WP != WordPlaceList) {
X WP->Word = (char *) 0; /* so we don't free it; see AddWord() */
X }
X }
X
X if (pblock == (t_pblock *) 0) {
X int oldnp = (IndexEntry) ? IndexEntry->NumberOfWordPlaces : 0;
X
X pblock = (t_pblock *) emalloc(sizeof(t_pblock) +
X (NumberOfWordPlacesToAdd + oldnp) * sizeof(t_WordPlace));
X pblock->NumberOfWordPlaces = 0;
X if (IndexEntry && IndexEntry->NumberOfWordPlaces) {
X register t_WordPlace *W, *Q;
X register unsigned long boy = 0L;
X
X if (IndexEntry->WordPlacesInHere < IndexEntry->NumberOfWordPlaces) {
X FillWordPlaces(IndexEntry);
X }
X
X for (W = IndexEntry->WordPlaces, Q = pblock->WordPlaces;
X boy < IndexEntry->NumberOfWordPlaces; boy++) {
X *Q++ = *W++;
X }
X pblock->NumberOfWordPlaces = IndexEntry->NumberOfWordPlaces;
X }
X } else {
X /* Remove the old information from disk.
X * This isn't as bad as it sounds, as it will be at the start
X * of the freelist, so when we write it out again it will be
X * in the buffer cache...
X * Although, it would be faster simply to append.
X */
X if (IndexEntry->Offset) {
X DeleteWordPlaces(IndexEntry->Offset, IndexEntry->WID);
X }
X /*NOSTRICT*/
X pblock = (t_pblock *) erealloc((char *) pblock, sizeof(t_pblock) +
X (unsigned) (NumberOfWordPlacesToAdd +
X pblock->NumberOfWordPlaces) * sizeof(t_WordPlace));
X }
X
X pblock->WID = WID;
X pblock->ChainStart = 0L; /* certainly it is now invalid! */
X i = pblock->NumberOfWordPlaces;
X pblock->NumberOfWordPlaces += NumberOfWordPlacesToAdd;
X
X for (WP = WordPlaceList; WP && NumberOfWordPlacesToAdd--; WP = WP->Next) {
X /* As we ignore the rest of these WordInfo entries, something
X * has to change here for more efficiency!
X * Probably we should get passed a linked list of WordPlaces
X * instead of WordInfo structures.
X */
X pblock->WordPlaces[i] = WP->WordPlace;
X i++;
X }
X
X if (WP && STREQ(WP->Word, FirstWord)) {
X fprintf(stderr, "Internal error counting pairs to add\n");
X exit(1);
X }
X
X if (NumberOfWordPlacesToAdd > 0) {
X fprintf(stderr, "I've got some pairs left over, \"%s\" line %d\n",
X __FILE__, __LINE__ - 1);
X exit(1);
X }
X
X /* We are going to need a WordInfo here... */
X if (IndexEntry == (t_WordInfo *) 0) {
X IndexEntry = MakeWordInfo(WID, Length, WordPlaceList->Word);
X }
X IndexEntry->NumberOfWordPlaces = pblock->NumberOfWordPlaces;
X
X /* Now, see if they all fit into the WordInfo itself! */
X
X pblock->ChainStart = IndexEntry->Offset = 0L;
X
X if (pblock->NumberOfWordPlaces <= MaxWordPlacesInAWordBlock) {
X (void) MkWIB(IndexEntry, pblock);
X }
X
X if (IndexEntry->WordPlacesInHere == pblock->NumberOfWordPlaces) {
X /* no pblock needed! */
X if (PutWordInfoIntoIndex(IndexEntry, (unsigned long) 0L) < 0) {
X extern int errno;
X int e = errno;
X fprintf(stderr, "%s: Couldn't put \"%s\" into the ",
X progname, IndexEntry->Word);
X errno = e;
X perror("index");
X exit(1);
X }
X } else {
X (void) Putpblock(IndexEntry, pblock); /* (this alters *WordInfo) */
X
X if (PutWordInfoIntoIndex(IndexEntry, pblock->ChainStart) < 0) {
X extern int errno;
X int e = errno;
X fprintf(stderr, "%s: Couldn't add word \"%s\" to the ",
X progname, IndexEntry->Word);
X errno = e;
X perror("word index");
X exit(1);
X }
X }
X
X if (pblock) {
X efree((char *) pblock);
X pblock = (t_pblock *) 0;
X }
X
X if (IndexEntry) {
X SlayWordInfo(IndexEntry);
X }
X
X TotalWordsWritten += NumberOfWordPlacesToAdd;
X
X /* Now see if there are any more words in the chain: */
X if (WP) {
X TotalWordsWritten += WriteWordChain(WP);
X }
X
X return TotalWordsWritten;
X}
X
Xstatic unsigned char pblockBuffer[BLOCKSIZE + 5];
X
X/* Write out an entire (presumably new) data entry, and
X * return a disk pointer to the start of the chain
X */
Xunsigned long
XPutpblock(WordInfo, pblock)
X t_WordInfo *WordInfo;
X t_pblock *pblock;
X{
X /* Assume that we can discard the PairBlock in WordInfo --
X * it was a pointer to a static buffer anyway.
X */
X
X if (WordInfo->DataBlock) {
X WordInfo->DataBlock = (char *) 0;
X }
X
X WordInfo->Offset = pblock->ChainStart = FindFreeBlock(WordInfo->WID);
X (void) MkWIBH(WordInfo, pblock);
X PutWordPlaces(
X pblock->WordPlaces,
X WordInfo->WID,
X (unsigned char *) WordInfo->WordPlaceStart,
X (unsigned)
X WIDBLOCKSIZE - (WordInfo->WordPlaceStart - WordInfo->DataBlock),
X WordInfo->Offset,
X pblock->NumberOfWordPlaces);
X
X return WordInfo->Offset;
X}
X
X/** WordPlaces are now stored as sequences, as follows:
X ** FID*2 -- 1, 2, 3 (usually) or 4 bytes 1-4
X ** (very, very occasionaly a variable-size number may be 5 bytes.)
X ** . the bottom bit in the stored number determines whether there
X ** is more than one FID to follow
X ** number of following places (only if prev. bit was 1) -- 1 byte 0-1
X ** For each following entry:-
X ** . for each of the following places:
X ** Block In File (long, 1, 2, 3 or 4 bytes, usually 1) 1-4
X ** Word In Block -- always 1 byte 1-1
X ** the bottom bit of this says if there are flags
X ** Flags -- always 1 byte, if present 0-1
X ** Stuff Before -- 1 byte 0-1
X ** (if there are no flags, there's no Stuff Before byte, and
X ** we use the default value of 1)
X **
X ** Hence: each sub-place takes from 2 to 7 bytes [was: 0 to 4]
X ** each Place sequence takes from 3 [min was 2 with old format]
X ** to (4 + 1) + 255 * (2..7) bytes.
X ** In most (I guess > 7/10) cases, flags will be 0, and
X ** StuffBefore be the default of 1.
X **
X ** I am hoping, of course, that the extra information stored is
X ** worth while!
X ** It might be possible to coalesce WordInBlock and BlockInFile using
X ** delta modulation -- i.e., storing the increment from the previous. In
X ** this case, a flag bit could mean that those two values each occupy a
X ** nibble in a single byte. Or, I could use a single byte, like this:
X ** [a b c d e f g h]
X ** a == 1 --> (efgh) is word in block inc., (bcd is block in file inc)
X ** but I need to do some real measurements to figure out how best to save
X ** space. It really is worth while keeping the format as simple as I can,
X ** as this speeds retrieval.
X **
X **/
X
Xunsigned short
XPutWordPlaces(WordPlaces, WID, Block, BlockLength, NextOffset, NumberToWrite)
X t_WordPlace *WordPlaces;
X t_WID WID;
X unsigned char *Block;
X unsigned BlockLength;
X unsigned long NextOffset;
X unsigned long NumberToWrite;
X{
X unsigned char *q = Block;
X unsigned long L;
X int CurrentPlace = 0;
X unsigned long LastStart = 0L;
X t_FID LastFID = 0;
X unsigned long LastBlock = 0L;
X
X /** IMPORTANT NOTICE: keep the definition of MaxWordPlacesInAWordBlock
X ** up to date (it's #defined above).
X **/
X
X /* sort the pblock to simplify subsequent accesses */
X if (NumberToWrite > 1) {
X SortWordPlaces(NumberToWrite, WordPlaces);
X }
X
X while (CurrentPlace < 0 || CurrentPlace < NumberToWrite) {
X unsigned short NumberOfRepeats;
X unsigned char U;
X t_FID FID = WordPlaces[CurrentPlace].FID;
X int LastPlace;
X
X /* Determine the number of Places in the same file;
X * note that we can write at most 255 in the same place, so
X * longer stretches are broken up into clumps of 255.
X * This is a reasonable tradeoff, I think. The alternative would
X * be to write NumberOfRepeats as a long, and lose if there were
X * (say) between 64 (old, 127 new) and 255 of them. This case only
X * occurs once in the New Testament anyway, and presumably is
X * generally quite rare.
X */
X NumberOfRepeats = 0;
X LastPlace = CurrentPlace;
X while (NumberOfRepeats < 255) {
X if (LastPlace >= NumberToWrite) {
X break;
X } else if (WordPlaces[LastPlace].FID != FID) {
X break;
X }
X ++NumberOfRepeats;
X ++LastPlace;
X }
X
X L = (FID - LastFID) << 1;
X LastFID = FID;
X if (NumberOfRepeats > 1) L |= 01L;
X if (PutLong(L, WID, &q, &Block, &BlockLength,
X &NextOffset, &LastStart) < 0) {
X return CurrentPlace;
X }
X if (L & 01L) {
X if (PutByte(NumberOfRepeats, WID, &q, &Block, &BlockLength,
X &LastStart, &NextOffset) < 0) {
X return CurrentPlace;
X }
X }
X
X LastBlock = 0;
X
X for (; NumberOfRepeats != 0; --NumberOfRepeats) {
X if (CurrentPlace > NumberToWrite) {
X fprintf(stderr,
X "Entry for file %lu (WID %ld) has more matches than expected\n",
X FID, WID);
X exit(1);
X /* This would represent a rather serious bug, I think! */
X }
X /* block number */
X if (WordPlaces[CurrentPlace].BlockInFile < LastBlock) {
X fprintf(stderr, "Sort WID %ld failed, non-monatonic blocks\n",
X WID);
X exit(1); /* can't cope with this one */
X } else if (CurrentPlace &&
X (WordPlaces[CurrentPlace].FID ==
X WordPlaces[CurrentPlace - 1].FID) &&
X (WordPlaces[CurrentPlace].BlockInFile == LastBlock) &&
X (WordPlaces[CurrentPlace].WordInBlock <=
X WordPlaces[CurrentPlace - 1].WordInBlock)) {
X fprintf(stderr,
X "Sort WID %ld failed, FID %ld: Blk %d: WIB %d <= %d\n",
X WID, FID, LastBlock, WordPlaces[CurrentPlace].WordInBlock,
X WordPlaces[CurrentPlace - 1].WordInBlock
X );
X }
X L = WordPlaces[CurrentPlace].BlockInFile - LastBlock;
X LastBlock += L;
X
X if (PutLong(L, WID, &q, &Block, &BlockLength, &NextOffset,
X &LastStart) < 0) {
X return CurrentPlace;
X }
X U = (WordPlaces[CurrentPlace].WordInBlock << 1);
X if (WordPlaces[CurrentPlace].StuffBefore != 1) {
X WordPlaces[CurrentPlace].Flags |= WPF_HASSTUFFBEFORE;
X }
X if (WordPlaces[CurrentPlace].Flags != WPF_HASSTUFFBEFORE) {
X U |= 01;
X }
X if (U > 255) {
X fprintf(stderr,
X "WID %lu: WordInBlock (0%o) from FID %lu too big\n",
X WID, U, FID);
X exit(1);
X }
X
X if (PutByte(U, WID, &q, &Block, &BlockLength, &LastStart,
X &NextOffset) < 0) {
X return CurrentPlace;
X }
X if (U & 01) {
X if (PutByte(WordPlaces[CurrentPlace].Flags, WID, &q, &Block,
X &BlockLength, &LastStart, &NextOffset) < 0) {
X return CurrentPlace;
X }
X }
X
X /* Even if there are flags, there still might not be a separate
X * entry for the number of preceding skipped bytes.
X */
X if ((U & 01) &&
X (WordPlaces[CurrentPlace].Flags & WPF_HASSTUFFBEFORE)) {
X if (PutByte(WordPlaces[CurrentPlace].StuffBefore, WID, &q,
X &Block, &BlockLength, &LastStart, &NextOffset) < 0) {
X return CurrentPlace;
X }
X }
X ++CurrentPlace;
X }
X if (CurrentPlace > LastPlace) {
X fprintf(stderr, "oops- I went wrong and wrote %ld > %ld\n",
X CurrentPlace, LastPlace);
X }
X }
X if (LastStart) {
X /* NextStart had better not be non-zero, but FlushBlock will
X * take care of it (we have wasted a block in that case!).
X * LastStart is zero if we fitted it all inside the WordInfo
X * block, although this is currently unlikely as we don't get
X * called in that case!
X * Oh, maybe we do.
X */
X FlushBlock((unsigned char *) Block, &NextOffset, &LastStart, WID);
X }
X return NumberToWrite;
X}
X
Xt_WordPlace *
XGetWordPlaces(WID, Block, BlockLength, NextOffset, NumberExpected)
X t_WID WID;
X char *Block;
X unsigned BlockLength;
X unsigned long NextOffset;
X unsigned long NumberExpected;
X{
X unsigned char *q = (unsigned char *) Block;
X unsigned long L;
X t_WordPlace *Places = (t_WordPlace *) 0;
X long CurrentPlace = 0;
X t_FID LastFID = (t_FID) 0;
X unsigned LastBlock = 0L;
X
X#ifdef ASCIITRACE
X if (AsciiTrace > 10) {
X fprintf(stderr,
X "GetWordPlaces WID %ld Blk %ld len %d next %ld No. %ld\n",
X WID, Block, BlockLength, NextOffset, NumberExpected);
X
X }
X#endif
X
X if (Block == (char *) 0) {
X fprintf(stderr, "GetWordPlaces Error %lu\n", WID);
X }
X
X /*NOSTRICT*/
X Places = (t_WordPlace *) emalloc(sizeof(t_WordPlace) * NumberExpected);
X
X while (CurrentPlace < NumberExpected) {
X unsigned short NumberOfRepeats;
X unsigned char Uchar;
X t_FID FID;
X
X /** First get the FID. The bottom bit of the number stored
X ** actually determines whether there are multiple Places
X ** stored here for the same FID.
X **/
X L = GetLong(WID, &q, &Block, &BlockLength, &NextOffset);
X FID = (L >> 1) + LastFID; /* Get rid of flag bit */
X LastFID = FID;
X NumberOfRepeats = (L & 01L) ?
X GetByte(WID, &q, &Block, &BlockLength, &NextOffset) : 1;
X
X /* Quick Sanity check */
X switch (NumberOfRepeats) {
X case 0L:
X fprintf(stderr, "Warning: no entries! for FID %lu\n", FID);
X case 1L:
X if (L & 01L) {
X fprintf(stderr, "Warning: FID %lu repeated 1 times!\n", FID);
X }
X }
X
X LastBlock = 0L;
X if (CurrentPlace + NumberOfRepeats > NumberExpected) {
X fprintf(stderr,
X "Entry for file %lu WID %ld has %lu matches, not %lu\n",
X FID, WID, CurrentPlace + NumberOfRepeats + 1, NumberExpected);
X NumberOfRepeats = NumberExpected - CurrentPlace;
X }
X for (; NumberOfRepeats != 0; --NumberOfRepeats) {
X Places[CurrentPlace].FID = FID;
X /* block number */
X L = GetLong(WID, &q, &Block, &BlockLength, &NextOffset);
X LastBlock += L;
X Places[CurrentPlace].BlockInFile = LastBlock;
X Uchar = GetByte(WID, &q, &Block, &BlockLength, &NextOffset);
X Places[CurrentPlace].WordInBlock = (Uchar >> 1);
X
X /* Sanity check: */
X if (CurrentPlace > 0 && Places[CurrentPlace].FID ==
X Places[CurrentPlace - 1].FID) {
X if (Places[CurrentPlace - 1].BlockInFile ==
X Places[CurrentPlace].BlockInFile) {
X if ( Places[CurrentPlace - 1].WordInBlock >=
X Places[CurrentPlace].WordInBlock) {
X fprintf(stderr,
X "%s: warning: %dth match for word %ld (FID %ld) WIB goes backwards!\n",
X progname, CurrentPlace, WID, FID);
X }
X } else if (Places[CurrentPlace - 1].BlockInFile >
X Places[CurrentPlace].BlockInFile) {
X fprintf(stderr,
X "%s: warning: %dth match for word %ld (FID %ld) BIF goes backwards!\n",
X progname, CurrentPlace, WID, FID);
X }
X }
X /* end of sanity test */
X
X if (Uchar & 01) { /* use if, not ?:, for profiler */
X Places[CurrentPlace].Flags =
X GetByte(WID, &q, &Block, &BlockLength, &NextOffset);
X } else {
X Places[CurrentPlace].Flags = 0;
X }
X
X /* If there are flags, there still might not be a separate
X * entry for the number of preceding skipped bytes.
X */
X if (Places[CurrentPlace].Flags & WPF_HASSTUFFBEFORE) {
X Places[CurrentPlace].StuffBefore =
X GetByte(WID, &q, &Block, &BlockLength, &NextOffset);
X } else {
X Places[CurrentPlace].StuffBefore = 1;
X }
X ++CurrentPlace;
X }
X }
X return Places;
X}
X
Xvoid
XFillWordPlaces(WordInfo)
X t_WordInfo *WordInfo;
X{
X WordInfo->WordPlaces = GetWordPlaces(
X WordInfo->WID,
X WordInfo->WordPlaceStart,
X WIDBLOCKSIZE - (WordInfo->WordPlaceStart - WordInfo->DataBlock),
X WordInfo->Offset,
X WordInfo->NumberOfWordPlaces
X );
X}
X
Xstatic void
XFlushBlock(Block, NextOffset, LastStart, WID)
X char *Block;
X unsigned long *NextOffset, *LastStart;
X t_WID WID;
X{
X if (*LastStart && Block) {
X /*NOSTRICT*/
X t_BlockHeader *BH = (t_BlockHeader *) Block;
X
X BH->NextOffset = ((*NextOffset) / BLOCKSIZE) << 1;
X WriteBlock(*LastStart, Block);
X }
X *LastStart = *NextOffset = 0L;
X}
X
X/* This is simply to help keep the source lines getting too long! */
Xtypedef unsigned char *UCP;
X
Xint
X_PutByte(Byte, WID, sp, Blockp, BlockLength, LastStart, NextBlock)
X unsigned char Byte;
X t_WID WID;
X unsigned char **sp;
X unsigned char **Blockp;
X unsigned *BlockLength;
X unsigned long *NextBlock;
X unsigned long *LastStart; /* for writing the linked list */
X{
X t_BlockHeader *BH;
X
X if (*sp - (*Blockp) >= (*BlockLength)) {
X if (!*NextBlock && !*LastStart) return -1; /* only do the 1st block */
X if (*NextBlock == (unsigned long) 0) {
X *NextBlock = FindFreeBlock(WID);
X }
X /* Complete the information in the previous block, if required */
X if (*LastStart) {
X BH = (t_BlockHeader *) (*Blockp);
X BH->NextOffset = ((*NextBlock) / BLOCKSIZE) << 1;
X /* Write the old block */
X WriteBlock(*LastStart, *Blockp);
X *LastStart = 0L;
X }
X *LastStart = (*NextBlock);
X *BlockLength = BLOCKSIZE;
X (*NextBlock) = 0L;
X *Blockp = pblockBuffer; /* Use static (to this file) data buffer */
X /*NOSTRICT*/
X BH = (t_BlockHeader *) (*Blockp);
X (*sp) = (UCP) BH->Data;
X }
X **sp = Byte;
X (*sp)++;
X return 0;
X}
X
Xunsigned char
X_GetByte(WID, sp, Blockp, BlockLength, NextBlock)
X t_WID WID;
X unsigned char **sp;
X unsigned char **Blockp;
X unsigned long *BlockLength;
X unsigned long *NextBlock;
X{
X t_BlockHeader *BH;
X
X if (*sp - (*Blockp) >= (*BlockLength)) {
X if (*NextBlock == (unsigned long) 0) {
X (*Blockp) = (*sp) = (UCP) 0;
X fprintf(stderr, "Database Corrupt, Next is zero\n");
X return 0;
X } else {
X (*sp) = (*Blockp) = (UCP) ReadBlock(*NextBlock);
X }
X /* Check the new block */
X if ((*Blockp) == (UCP) 0) {
X fprintf(stderr, "Database corrupt, %lu, sigh.\n", *NextBlock);
X exit(1);
X }
X /*NOSTRICT*/
X BH = (t_BlockHeader *) (*Blockp);
X *NextBlock = (BH->NextOffset >> 1) * BLOCKSIZE;
X *BlockLength = BLOCKSIZE;
X (*sp) = (UCP) BH->Data;
X }
X return *((*sp)++);
X}
X
X/* PutLong -- write a long number in compressed/abbreviated form into a
X * string. If this moves the string pointer beyond the block, write out
X * the block and start a new one. In that case, the number written may well
X * span the gap between the blocks. We use an overflow buffer to copy
X * the bytes (if any) that overflowed into it.
X * Then we write them at the start of the next block.
X *
X * This routine returns -1 and writes a partial number (no allocated block)
X * if *LastBlock and *NextBlock are zero. This allows PutwOrdPlaces to be
X * called to put the WordPlaces into the WIDFILE block without writing out
X * an entire chain.
X */
Xint
XPutLong(Long, WID, sp, Blockp, BlockLength, NextBlock, LastStart)
X unsigned long Long;
X t_WID WID;
X unsigned char **sp;
X unsigned char **Blockp;
X unsigned *BlockLength;
X unsigned long *NextBlock;
X unsigned long *LastStart; /* for writing the linked list */
X{
X t_BlockHeader *BH;
X unsigned char Buffer[sizeof(unsigned long) + 1];
X unsigned char *Bufp = Buffer;
X unsigned char *p;
X
X sWriteNumber((char **) sp, Long);
X
X if ((*sp) - (*Blockp) > (*BlockLength)) { /* gone too far! */
X if (!*NextBlock && !*LastStart) return -1;
X /* Save the overflow in Buffer:
X * the 1st 1 or more characters will fitted into the old block,
X * but we need them all in a lump for readnumber().
X * When we write the next block, we need to put the overflow
X * characters into the start of the next block.
X */
X for (p = &(*Blockp)[*BlockLength]; p < (*sp); p++) {
X *Bufp++ = *p;
X }
X if (*NextBlock == (unsigned long) 0) {
X *NextBlock = FindFreeBlock(WID);
X }
X /* Complete the information in the previous block, if required */
X if (*LastStart) {
X BH = (t_BlockHeader *) (*Blockp);
X BH->NextOffset = ((*NextBlock) / BLOCKSIZE) << 1;
X
X /* Write the old block */
X WriteBlock(*LastStart, *Blockp);
X }
X *LastStart = (*NextBlock);
X (*NextBlock) = 0L;
X *Blockp = pblockBuffer;
X BH = (t_BlockHeader *) (*Blockp);
X *BlockLength = BLOCKSIZE;
X (*sp) = (UCP) BH->Data;
X /* Now write the stuff from Buffer into the new block */
X if (Bufp > Buffer) {
X for (p = Buffer; p < Bufp; p++) {
X *((*sp)++) = (*p);
X }
X }
X }
X return 0;
X}
X
X/* This is the reverse of PutLong.
X * Things are slightly complicated by the need to provide sReadNumber
X * with a contiguous copy of all of the bytes in a number that spanned
X * a gap between data blocks.
X */
Xunsigned long
XGetLong(WID, sp, Blockp, BlockLength, NextBlock)
X t_WID WID;
X unsigned char **sp;
X unsigned char **Blockp;
X unsigned *BlockLength;
X unsigned long *NextBlock;
X{
X unsigned char Buffer[sizeof(unsigned long) + 1];
X long Result;
X t_BlockHeader *BH;
X unsigned char *NumberStart = (*sp);
X unsigned char *p;
X
X Result = sReadNumber(sp);
X
X /* Now, have we fallen off the end of the block? */
X if ((*sp) - (*Blockp) > (*BlockLength)) {
X unsigned char *bp = Buffer;
X
X if (*NextBlock == (unsigned long) 0) {
X return 0L;
X }
X
X /* Copy the first half of the number into the overflow buffer */
X for (p = NumberStart; p < &(*Blockp)[*BlockLength]; p++) {
X *bp++ = *p;
X }
X
X /** Now:
X ** . sp is garbage, as is NumberStart, as they point at the old
X ** data block
X ** . Buffer contains the first few bytes of the number
X ** . we need some more bytes, but don't yet know how many, as
X ** this depends on the number representation
X ** NOTE that we must have, however, that we know that there
X ** are more bytes, so that we know if we need the next block.
X ** . bp points 1 beyond the end of the 1st half of the number.
X **/
X
X (*sp) = *Blockp = (UCP) ReadBlock(*NextBlock);
X /* Check the new block */
X if ((*Blockp) == (UCP) 0) {
X fprintf(stderr, "Database corrupt, %lu, oh dear!\n", *NextBlock);
X exit(1);
X }
X BH = (t_BlockHeader *) *Blockp;
X *NextBlock = (BH->NextOffset >> 1) * BLOCKSIZE;
X *BlockLength = BLOCKSIZE;
X (*sp) = (UCP) BH->Data;
X /* Fill up the buffer from the new block */
X for (p = bp; p - Buffer < sizeof(Buffer) - 1; p++) {
X *p = *(*sp)++;
X }
X /* read the number from the buffer */
X (*sp) = Buffer;
X /* Try that number again... */
X Result = sReadNumber(sp);
X /* Now put sp where it should be. Part of the buffer was
X * from the old block...
X */
X (*sp) = (UCP) BH->Data + ((*sp) - bp);
X }
X return Result;
X}
X
Xvoid
XWriteBlock(Block, Data)
X unsigned long Block;
X char *Data;
X{
X if (DataFile < 0) {
X if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0666)) < 0) {
X fprintf(stderr, "Can't open database");
X perror(DataBase);
X exit(1);
X }
X }
X
X if (lseek(DataFile, (long) Block, 0) < 0) {
X perror("lseek");
X exit(1);
X }
X
X if (write(DataFile, Data, BLOCKSIZE) != BLOCKSIZE) {
X extern int errno;
X int e = errno;
X fprintf(stderr,
X "Warning: %s: E%d -- couldn't write %ld bytes at block %lu: ",
X DataBase, e, BLOCKSIZE, Block);
X errno = e;
X perror("write");
X }
X return;
X}
X
Xunsigned long
XFindFreeBlock(WID)
X t_WID WID;
X{
X char Data[BLOCKSIZE];
X t_BlockHeader *HP;
X unsigned long Here = 0L;
X unsigned long There = 0L;
X
X if (DataFile < 0) {
X if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0666)) < 0) {
X fprintf(stderr, "Can't open database");
X perror(DataBase);
X exit(1);
X }
X }
X
X do {
X (void) lseek(DataFile, (long) Here, 0);
X
X if (read(DataFile, Data, BLOCKSIZE) < BLOCKSIZE) {
X HP = (t_BlockHeader *) 0;
X There = 0L;
X Here = BLOCKSIZE;
X } else {
X HP = (t_BlockHeader *) Data;
X }
X
X /* It is free if it has no pairs (or if the block has
X * never been written to, of course)
X * Freed blocks are given a NextOffset or'd with 1.
X */
X if ((!HP || (HP->NextOffset & 01L)) && Here != 0L) {
X unsigned long NextFree = HP ? HP->NextOffset : 0L;
X
X if (NextFree) {
X /* make it suitable for use with lseek() */
X NextFree = (NextFree >> 1) * BLOCKSIZE;
X }
X
X /* Make the previous block in the chain point to the one
X * after this one in the free list, instead of this one.
X */
X
X if (HP) {
X (void) lseek(DataFile, (long) There, 0);
X (void) read(DataFile, Data, BLOCKSIZE);
X /* Well, we read it a second ago!*/
X }
X HP = (t_BlockHeader *) Data;
X HP->NextOffset = (NextFree / BLOCKSIZE) << 1;
X (void) lseek(DataFile, (long) There, 0);
X (void) write(DataFile, Data, BLOCKSIZE);
X
X goto GotOne;
X }
X
X There = Here;
X Here = (HP->NextOffset >> 1) * BLOCKSIZE;
X
X } while (Here != 0L);
X
X if ((Here = lseek(DataFile, 0L, 2)) == 0L) {
X Here = BLOCKSIZE;
X }
X
XGotOne:
X /* Mark it in use */
X HP->NextOffset = 0L;
X (void) lseek(DataFile, (long) Here, 0);
X (void) write(DataFile, Data, BLOCKSIZE);
X
X#ifdef ASCIITRACE
X if (AsciiTrace > 10) {
X fprintf(stderr, "\tFindFree --> %lu\n", (Here == 0L) ? BLOCKSIZE : Here);
X }
X#endif
X return (Here == 0L) ? BLOCKSIZE : Here;
X}
X
X/* Get a single disk block from the database */
Xchar *
XReadBlock(Offset)
X unsigned long Offset;
X{
X static char Buffer0[BLOCKSIZE];
X
X if (DataFile < 0) {
X if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0664)) < 0) {
X return (char *) 0;
X }
X }
X
X#ifdef ASCIITRACE
X if (AsciiTrace > 15) {
X fprintf(stderr, "===ReadBlock(%ld)\n", Offset);
X }
X#endif
X
X if (lseek(DataFile, (long) Offset, 0) < 0) {
X return (char *) 0;
X }
X
X /* Now we are in the right place */
X
X if (read(DataFile, Buffer0, BLOCKSIZE) != BLOCKSIZE) {
X return (char *) 0;
X }
X
X /* Now that we have the 1st block of the data.. */
X return Buffer0;
X}
X
Xint
XWordPlaceCompare(F1, F2)
X t_WordPlace *F1, *F2;
X{
X if (F1->FID != F2->FID) return F1->FID - F2->FID;
X if (F1->BlockInFile != F2->BlockInFile) {
X return F1->BlockInFile - F2->BlockInFile;
X }
X return F1->WordInBlock - F2->WordInBlock;
X}
X
Xint
XRemoveFIDFrompblock(FID, pblock)
X t_FID FID;
X t_pblock *pblock;
X{
X unsigned int ThisPair;
X int TotalWordPlacesToMove = 0;
X
X /* Mark unwanted pairs as deleted */
X for (ThisPair = 0; ThisPair < pblock->NumberOfWordPlaces; ThisPair++) {
X if (pblock->WordPlaces[ThisPair].FID == FID) {
X pblock->WordPlaces[ThisPair].FID = 0L;
X ++TotalWordPlacesToMove;
X }
X }
X if (!TotalWordPlacesToMove) return 0;
X
X if (pblock->NumberOfWordPlaces > 1) {
X SortWordPlaces(pblock->NumberOfWordPlaces, pblock->WordPlaces);
X }
X
X /* shuffle up any FID=0 pairs */
X {
X register t_WordPlace *Src, *Dest;
X register int n = TotalWordPlacesToMove;
X
X Src = pblock->WordPlaces;
X Dest = &pblock->WordPlaces[TotalWordPlacesToMove];
X
X while (n-- > 0) {
X *Src++ = *Dest++;
X }
X }
X
X return TotalWordPlacesToMove;
X}
X
Xvoid
XSortWordPlaces(NumberOfWordPlaces, WordPlaces)
X unsigned long NumberOfWordPlaces;
X t_WordPlace *WordPlaces;
X{
X /* sort the list */
X if (NumberOfWordPlaces > 2) {
X qsort(WordPlaces, (int) NumberOfWordPlaces,
X sizeof(t_WordPlace), WordPlaceCompare);
X } else {
X /* don't call qsort in the trivial cases... */
X if (NumberOfWordPlaces == 2) {
X if (WordPlaceCompare(&WordPlaces[0], &WordPlaces[1]) > 0) {
X t_WordPlace tmp;
X tmp = WordPlaces[0]; /* structure copy! */
X WordPlaces[0] = WordPlaces[1];
X WordPlaces[1] = tmp;
X }
X }
X }
X}
X
Xvoid
XDeleteWordPlaces(FirstBlock, WID)
X unsigned long FirstBlock;
X t_WID WID;
X{
X char Data[BLOCKSIZE];
X char BlockZero[BLOCKSIZE];
X t_BlockHeader *H;
X t_BlockHeader *HZERO; /* for the free list*/
X unsigned long ThisBlock;
X unsigned long Here;
X
X if (!FirstBlock) {
X return;
X }
X
X if (DataFile < 0) {
X if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0666)) < 0) {
X fprintf(stderr, "Can't open database");
X perror(DataBase);
X exit(1);
X }
X }
X
X (void) lseek(DataFile, 0L, 0);
X
X if (read(DataFile, BlockZero, BLOCKSIZE) <= 0) {
X return; /* nothing to delete */
X }
X
X /* Block zero is always the head of the free list */
X HZERO = (t_BlockHeader *) BlockZero;
X
X Here = FirstBlock;
X
X do {
X if (lseek(DataFile, (long) Here, 0) < 0) {
X fprintf(stderr, "Aaaargh:");
X perror("lseek");
X exit(1);
X }
X
X if (read(DataFile, Data, BLOCKSIZE) <= 0) {
X return; /* it's not there to delete */
X }
X
X H = (t_BlockHeader *) Data;
X
X ThisBlock = Here; /* so we can write the changed block */
X Here = (H->NextOffset >> 1) * BLOCKSIZE;
X if (Here == 0L) { /* next block... */
X H->NextOffset = HZERO->NextOffset;
X }
X H->NextOffset |= 01L; /* mark it as free */
X
X /* Write out the newly freed block, having saved its Next pointer
X */
X WriteBlock(ThisBlock, Data);
X } while (Here);
X
X /* Now make the free list point to the start of the new chain;
X * the end of the newly added chain points to the start of the
X * original chain. This means that the new blocks are still in the
X * Unix buffer cache, so this helps performance. I hope.
X */
X HZERO->NextOffset = (FirstBlock / BLOCKSIZE) << 1;
X (void) lseek(DataFile, 0L, 0);
X (void) write(DataFile, BlockZero, BLOCKSIZE);
X
X return;
X}
X
Xvoid
XDeletepblock(pblock)
X t_pblock *pblock;
X{
X char Data[BLOCKSIZE];
X char BlockZero[BLOCKSIZE];
X t_BlockHeader *H;
X t_BlockHeader *HZERO; /* for the free list*/
X unsigned long ThisBlock;
X unsigned long Here;
X
X if (!pblock || !pblock->ChainStart) {
X return;
X }
X
X if (DataFile < 0) {
X if ((DataFile = open(DataBase, O_RDWR|O_CREAT, 0666)) < 0) {
X fprintf(stderr, "Can't open database");
X perror(DataBase);
X exit(1);
X }
X }
X
X if (read(DataFile, BlockZero, BLOCKSIZE) <= 0) {
X return; /* nothing to delete */
X }
X
X /* Block zero is always the head of the free list */
X HZERO = (t_BlockHeader *) BlockZero;
X
X Here = pblock->ChainStart;
X
X do {
X if (lseek(DataFile, (long) Here, 0) < 0) {
X fprintf(stderr, "Aaaargh:");
X perror("lseek");
X exit(1);
X }
X
X if (read(DataFile, Data, BLOCKSIZE) <= 0) {
X return; /* it's not there to delete */
X }
X
X H = (t_BlockHeader *) Data;
X
X ThisBlock = Here; /* so we can write the changed block later */
X if ((Here = ((H->NextOffset >> 1) * BLOCKSIZE)) == 0L) {
X H->NextOffset = HZERO->NextOffset;
X }
X H->NextOffset |= 01L; /* mark it as free */
X
X /* Write out the newly freed block, having saved its Next pointer
X */
X WriteBlock(ThisBlock, Data);
X } while (Here);
X
X /* Now make the free list point to the start of the new chain;
X * the end of the newly added chain points to the start of the
X * original chain. This means that the new blocks are still in the
X * Unix buffer cache, so this helps performance. I hope.
X */
X HZERO->NextOffset = (pblock->ChainStart / BLOCKSIZE) << 1;
X (void) lseek(DataFile, 0L, 0);
X write(DataFile, BlockZero, BLOCKSIZE);
X
X return;
X}
@@@End of lq-text/src/liblqtext/pblock.c
echo end of part 05
--
Liam R. E. Quin, lee at sq.com, SoftQuad Inc., Toronto, +1 (416) 963-8337
More information about the Alt.sources
mailing list