ARC 5.20 w/Squashing, 5/9

Howard Chu hyc at umix.cc.umich.edu
Thu Apr 14 16:01:23 AEST 1988


XX				fflush(stdout);
XX				while (1) {
XX					printf("  Overwrite it (y/n)? ");
XX					fflush(stdout);
XX					fgets(buf, 100, stdin);
XX					*buf = toupper(*buf);
XX					if (*buf == 'Y' || *buf == 'N')
XX						break;
XX				}
XX				if (*buf == 'N') {
XX					printf("%s not extracted.\n", fix);
XX					fseek(arc, hdr->size, 1);
XX					return;
XX				}
XX#ifdef MTS
XX			}
XX#endif
XX		}
XX	}
XX#ifndef MTS
XX	if (!(f = fopen(fix, "w"))) {
XX#else
XX	{
XX		fortran         create();
XX		char            c_name[256];
XX		struct crsize {
XX			short           maxsize, cursize;
XX		}               c_size;
XX		char            c_vol[6];
XX		int             c_type;
XX		strcpy(c_name, fix);
XX		strcat(c_name, " ");
XX		c_size.maxsize = 0;
XX		c_size.cursize = hdr->length / 4096 + 1;
XX		memset(c_vol, 0, sizeof(c_vol));
XX		c_type = 0x100;
XX		create(c_name, &c_size, c_vol, &c_type);
XX	}
XX	if (image) {
XX		f = fopen(fix, "wb");
XX		fseek(f, 0, 0);
XX	} else
XX		f = fopen(fix, "w");
XX	if (!f) {
XX#endif
XX		if (warn) {
XX			printf("Cannot create %s\n", fix);
XX			nerrs++;
XX		}
XX		fseek(arc, hdr->size, 1);
XX		return;
XX	}
XX	/* now unpack the file */
XX
XX	unpack(arc, f, hdr);	/* unpack file from archive */
XX#ifdef MSDOS
XX	setstamp(f, hdr->date, hdr->time);	/* set the proper date/time
XX						 * stamp */
XX#endif
XX	fclose(f);		/* all done writing to file */
XX#ifdef BSD
XX	setstamp(fix, hdr->date, hdr->time);	/* use filename for BSD stamp */
XX#endif
XX}
SHAR_EOF
if test 5231 -ne "`wc -c arcext.c`"
then
echo shar: error transmitting arcext.c '(should have been 5231 characters)'
fi
echo shar: extracting arcio.c '(7950 characters)'
sed 's/^XX//' << \SHAR_EOF > arcio.c
XX/*
XX * $Log:	arcio.c,v $
XX * Revision 1.2  88/04/11  18:01:16  hyc
XX * added support for squashing. (changed max hdrver from 8 to 9)
XX * 
XX * Revision 1.1  88/04/11  17:59:43  hyc
XX * Initial revision
XX * 
XX * Revision 1.3  87/12/19  04:25:24  hyc
XX * As before, different location tho.
XX * 
XX * Revision 1.2  87/12/19  04:23:21  hyc
XX * Fix problem caused by indent(?) screwing up line boundaries...
XX * 
XX * Revision 1.1  87/12/19  04:21:23  hyc
XX * Initial revision
XX * 
XX * Revision 1.4  87/08/13  17:03:24  hyc
XX * Run thru the indent program...
XX *  Revision 1.3  87/07/21  11:41:29  hyc *** empty
XX * log message ***
XX * 
XX * Revision 1.2  87/07/21  08:19:45  hyc *** empty log message ***
XX * 
XX */
XX
XX/*
XX * ARC - Archive utility - ARCIO
XX * 
XX * Version 2.49, created on 07/25/86 at 16:44:23
XX * 
XX * (C) COPYRIGHT 1985,86 by System Enhancement Associates; ALL RIGHTS RESERVED
XX * 
XX * By:  Thom Henderson
XX * 
XX * Description: This file contains the file I/O routines used to manipulate an
XX * archive.
XX * 
XX * Language: Computer Innovations Optimizing C86
XX */
XX#include <stdio.h>
XX#include "arc.h"
XX
XXINT
XXreadhdr(hdr, f)			/* read a header from an archive */
XX	struct heads   *hdr;	/* storage for header */
XX	FILE           *f;	/* archive to read header from */
XX{
XX#ifndef MSDOS
XX	unsigned char   dummy[28];
XX	INT             i, j, k;
XX#endif
XX	char            name[13];	/* filename buffer */
XX	INT             try = 0;/* retry counter */
XX	static INT      first = 1;	/* true only on first read */
XX
XX	if (!f)			/* if archive didn't open */
XX		return 0;	/* then pretend it's the end */
XX	if (feof(f))		/* if no more data */
XX		return 0;	/* then signal end of archive */
XX
XX	if (fgetc(f) != 26) {	/* check archive validity */
XX		if (warn) {
XX			printf("An entry in %s has a bad header.", arcname);
XX			nerrs++;
XX		}
XX		while (!feof(f)) {
XX			try++;
XX			if (fgetc(f) == 26) {
XX				ungetc(hdrver = fgetc(f), f);
XX				if (hdrver >= 0 && hdrver <= 9)
XX					break;
XX			}
XX		}
XX
XX		if (feof(f) && first)
XX			abort("%s is not an archive", arcname);
XX
XX		if (warn)
XX			printf("  %d bytes skipped.\n", try);
XX
XX		if (feof(f))
XX			return 0;
XX	}
XX	hdrver = fgetc(f);	/* get header version */
XX	if (hdrver < 0)
XX		abort("Invalid header in archive %s", arcname);
XX	if (hdrver == 0)
XX		return 0;	/* note our end of archive marker */
XX	if (hdrver > 9) {
XX		fread(name, sizeof(char), 13, f);
XX#ifdef MTS
XX		atoe(name, strlen(name));
XX#endif
XX		printf("I don't know how to handle file %s in archive %s\n",
XX		       name, arcname);
XX		printf("I think you need a newer version of ARC.\n");
XX		exit(1);
XX	}
XX	/* amount to read depends on header type */
XX
XX	if (hdrver == 1) {	/* old style is shorter */
XX		fread(hdr, sizeof(struct heads) - sizeof(long int), 1, f);
XX		hdrver = 2;	/* convert header to new format */
XX		hdr->length = hdr->size;	/* size is same when not
XX						 * packed */
XX	} else
XX#ifdef MSDOS
XX		fread(hdr, sizeof(struct heads), 1, f);
XX#else
XX		fread(dummy, 27, 1, f);
XX
XX	for (i = 0; i < 13; hdr->name[i] = dummy[i], i++);
XX#ifdef MTS
XX	(void) atoe(hdr->name, strlen(hdr->name));
XX#endif
XX	hdr->size = (LONG) ((dummy[16] << 24) + (dummy[15] << 16) + (dummy[14] << 8)
XX			    + dummy[13]);
XX	hdr->date = (short) ((dummy[18] << 8) + dummy[17]);
XX	hdr->time = (short) ((dummy[20] << 8) + dummy[19]);
XX	hdr->crc = (short) ((dummy[22] << 8) + dummy[21]);
XX	hdr->length = (LONG) ((dummy[26] << 24) + (dummy[25] << 16)
XX			      + (dummy[24] << 8) + dummy[23]);
XX#endif
XX
XX	if (hdr->date > olddate
XX	    || (hdr->date == olddate && hdr->time > oldtime)) {
XX		olddate = hdr->date;
XX		oldtime = hdr->time;
XX	}
XX	first = 0;
XX	return 1;		/* we read something */
XX}
XX
XXput_int(number, f)		/* write a 2 byte integer */
XX	INT             number;
XX	FILE           *f;
XX{
XX	fputc(number & 255, f);
XX	fputc(number >> 8, f);
XX}
XX
XXput_long(number, f)		/* write a 4 byte integer */
XX	LONG            number;
XX	FILE           *f;
XX{
XX	put_int(number & 0xFFFF, f);
XX	put_int(number >> 16, f);
XX}
XX
XXINT
XXwritehdr(hdr, f)		/* write a header to an archive */
XX	struct heads   *hdr;	/* header to write */
XX	FILE           *f;	/* archive to write to */
XX{
XX	fputc(26, f);		/* write out the mark of ARC */
XX	fputc(hdrver, f);	/* write out the header version */
XX	if (!hdrver)		/* if that's the end */
XX		return;		/* then write no more */
XX#ifdef MSDOS
XX	fwrite(hdr, sizeof(struct heads), 1, f);
XX#else
XX	/* byte/word ordering hassles... */
XX#ifdef MTS
XX	etoa(hdr->name, strlen(hdr->name));
XX#endif
XX	fwrite(hdr->name, 1, 13, f);
XX	put_long(hdr->size, f);
XX	put_int(hdr->date, f);
XX	put_int(hdr->time, f);
XX	put_int(hdr->crc, f);
XX	put_long(hdr->length, f);
XX#endif
XX
XX	/* note the newest file for updating the archive timestamp */
XX
XX	if (hdr->date > arcdate
XX	    || (hdr->date == arcdate && hdr->time > arctime)) {
XX		arcdate = hdr->date;
XX		arctime = hdr->time;
XX	}
XX}
XX
XXINT
XXputc_tst(c, t)			/* put a character, with tests */
XX	char            c;	/* character to output */
XX	FILE           *t;	/* file to write to */
XX{
XX	if (t)
XX#ifdef BSD
XX		fputc(c, t);
XX#else
XX		if (fputc(c, t) == EOF)
XX			abort("Write fail (disk full?)");
XX#endif
XX}
XX
XX/*
XX * NOTE:  The filecopy() function is used to move large numbers of bytes from
XX * one file to another.  This particular version has been modified to improve
XX * performance in Computer Innovations C86 version 2.3 in the small memory
XX * model.  It may not work as expected with other compilers or libraries, or
XX * indeed with different versions of the CI-C86 compiler and library, or with
XX * the same version in a different memory model.
XX * 
XX * The following is a functional equivalent to the filecopy() routine that
XX * should work properly on any system using any compiler, albeit at the cost
XX * of reduced performance:
XX * 
XX * filecopy(f,t,size) 
XX *      FILE *f, *t; long size; 
XX * { 
XX *      while(size--)
XX *              putc_tst(fgetc(f),t); 
XX * }
XX */
XX#ifdef MSDOS
XX#include <fileio2.h>
XX
XXfilecopy(f, t, size)		/* bulk file copier */
XX	FILE           *f, *t;	/* files from and to */
XX	long            size;	/* bytes to copy */
XX{
XX	char           *buf;	/* buffer pointer */
XX	char           *alloc();/* buffer allocator */
XX	unsigned int    bufl;	/* buffer length */
XX	unsigned int    coreleft();	/* space available reporter */
XX	unsigned int    cpy;	/* bytes being copied */
XX	long            floc, tloc, fseek();	/* file pointers, setter */
XX	struct regval   reg;	/* registers for DOS calls */
XX
XX	if ((bufl = coreleft()) < 1000)	/* see how much space we have */
XX		abort("Out of memory");
XX	bufl -= 1000;		/* fudge factor for overhead */
XX	if (bufl > 60000)
XX		bufl = 60000;	/* avoid choking alloc() */
XX	if (bufl > size)
XX		bufl = size;	/* avoid wasting space */
XX	buf = alloc(bufl);	/* allocate our buffer */
XX
XX	floc = fseek(f, 0L, 1);	/* reset I/O system */
XX	tloc = fseek(t, 0L, 1);
XX
XX	segread(&reg.si);	/* set segment registers for DOS */
XX
XX	while (size > 0) {	/* while more to copy */
XX		reg.ax = 0x3F00;/* read from handle */
XX		reg.bx = filehand(f);
XX		reg.cx = bufl < size ? bufl : size;	/* amount to read */
XX		reg.dx = buf;
XX		if (sysint21(&reg, &reg) & 1)
XX			abort("Read fail");
XX
XX		cpy = reg.ax;	/* amount actually read */
XX		reg.ax = 0x4000;/* write to handle */
XX		reg.bx = filehand(t);
XX		reg.cx = cpy;
XX		reg.dx = buf;
XX		sysint21(&reg, &reg);
XX
XX		if (reg.ax != cpy)
XX			abort("Write fail (disk full?)");
XX
XX		size -= (long) cpy;
XX	}
XX
XX	free(buf);		/* all done with buffer */
XX}
XX#else
XX
XXfilecopy(f, t, size)		/* bulk file copier */
XX	FILE           *f, *t;	/* files from and to */
XX	LONG            size;	/* bytes to copy */
XX{
XX	char           *buf;	/* buffer pointer */
XX	char           *malloc();	/* buffer allocator */
XX	unsigned int    bufl;	/* buffer length */
XX	unsigned int    cpy;	/* bytes being copied */
XX#ifdef MTS
XX#define MTEXT    0x0010
XX#endif
XX
XX	bufl = 60000;
XX	if (bufl > size)
XX		bufl = size;	/* don't waste space */
XX
XX	buf = malloc(bufl);
XX
XX	while (size > 0) {
XX		cpy = fread(buf, sizeof(char), bufl < size ? bufl : size, f);
XX#ifdef MTS
XX		if ((f->_fmode & MTEXT) && !image)
XX			etoa(buf, cpy);
XX#endif
XX		if (fwrite(buf, sizeof(char), cpy, t) != cpy)
XX			abort("Write fail (no space?)");
XX		size -= cpy;
XX	}
XX
XX	free(buf);
XX}
XX#endif
SHAR_EOF
if test 7950 -ne "`wc -c arcio.c`"
then
echo shar: error transmitting arcio.c '(should have been 7950 characters)'
fi
echo shar: extracting arclst.c '(4534 characters)'
sed 's/^XX//' << \SHAR_EOF > arclst.c
XX/*
XX * $Log:	arclst.c,v $
XX * Revision 1.2  88/04/11  18:03:10  hyc
XX * added support for squashing, re-synch with MTS
XX * 
XX * Revision 1.1  88/04/11  18:01:46  hyc
XX * Initial revision
XX * 
XX * Revision 1.4  87/08/13  17:03:30  hyc
XX * Run thru the indent program...
XX *  Revision 1.3  87/07/21  11:41:35  hyc *** empty
XX * log message ***
XX * 
XX * Revision 1.2  87/07/21  08:23:17  hyc *** empty log message ***
XX * 
XX */
XX
XX/*
XX * ARC - Archive utility - ARCLST
XX * 
XX * Version 2.38, created on 07/25/86 at 17:52:20
XX * 
XX * (C) COPYRIGHT 1985,86 by System Enhancement Associates; ALL RIGHTS RESERVED
XX * 
XX * By:  Thom Henderson
XX * 
XX * Description: This file contains the routines used to list the contents of an
XX * archive.
XX * 
XX * Language: Computer Innovations Optimizing C86
XX */
XX#include <stdio.h>
XX#include "arc.h"
XX
XXINT
XXlstarc(num, arg)		/* list files in archive */
XX	INT             num;	/* number of arguments */
XX	char           *arg[];	/* pointers to arguments */
XX{
XX	struct heads    hdr;	/* header data */
XX	INT             list;	/* true to list a file */
XX	INT             did[25];/* true when argument was used */
XX	LONG            tnum, tlen, tsize;	/* totals */
XX	INT             n;	/* index */
XX	INT             lstfile();
XX
XX	tnum = tlen = tsize = 0;/* reset totals */
XX
XX	for (n = 0; n < num; n++)	/* for each argument */
XX		did[n] = 0;	/* reset usage flag */
XX	rempath(num, arg);	/* strip off paths */
XX
XX	if (!kludge) {
XX		printf("Name          Length  ");
XX		if (bose)
XX			printf("  Stowage    SF   Size now");
XX		printf("  Date     ");
XX		if (bose)
XX			printf("  Time    CRC");
XX		printf("\n");
XX
XX		printf("============  ========");
XX		if (bose)
XX			printf("  ========  ====  ========");
XX		printf("  =========");
XX		if (bose)
XX			printf("  ======  ====");
XX		printf("\n");
XX	}
XX	openarc(0);		/* open archive for reading */
XX
XX	if (num) {		/* if files were named */
XX		while (readhdr(&hdr, arc)) {	/* process all archive files */
XX			list = 0;	/* reset list flag */
XX			for (n = 0; n < num; n++) {	/* for each template
XX							 * given */
XX				if (match(hdr.name, arg[n])) {
XX					list = 1;	/* turn on list flag */
XX					did[n] = 1;	/* turn on usage flag */
XX					break;	/* stop looking */
XX				}
XX			}
XX
XX			if (list) {	/* if this file is wanted */
XX				if (!kludge)
XX					lstfile(&hdr);	/* then tell about it */
XX				tnum++;	/* update totals */
XX				tlen += hdr.length;
XX				tsize += hdr.size;
XX			}
XX			fseek(arc, hdr.size, 1);	/* move to next header */
XX		}
XX	} else
XX		while (readhdr(&hdr, arc)) {	/* else report on all files */
XX			if (!kludge)
XX				lstfile(&hdr);
XX			tnum++;	/* update totals */
XX			tlen += hdr.length;
XX			tsize += hdr.size;
XX			fseek(arc, hdr.size, 1);	/* skip to next header */
XX		}
XX
XX	closearc(0);		/* close archive after reading */
XX
XX	if (!kludge) {
XX		printf("        ====  ========");
XX		if (bose)
XX			printf("            ====  ========");
XX		printf("\n");
XX	}
XX	printf("Total %6ld  %8ld", tnum, tlen);
XX	if (bose) {
XX		if (tlen)
XX			printf("            %3ld%%", 100L - (100L * tsize) / tlen);
XX		else
XX			printf("            ---");
XX		printf("  %8ld", tsize);
XX	}
XX	printf("\n");
XX
XX	if (note) {
XX		for (n = 0; n < num; n++) {	/* report unused args */
XX			if (!did[n]) {
XX				printf("File not found: %s\n", arg[n]);
XX				nerrs++;
XX			}
XX		}
XX	}
XX}
XX
XXINT
XXlstfile(hdr)			/* tell about a file */
XX	struct heads   *hdr;	/* pointer to header data */
XX{
XX	INT             yr, mo, dy;	/* parts of a date */
XX	INT             hh, mm, ss;	/* parts of a time */
XX
XX	static char    *mon[] =	/* month abbreviations */
XX	{
XX	 "Jan", "Feb", "Mar", "Apr",
XX	 "May", "Jun", "Jul", "Aug",
XX	 "Sep", "Oct", "Nov", "Dec"
XX	};
XX
XX	yr = (hdr->date >> 9) & 0x7f;	/* dissect the date */
XX	mo = (hdr->date >> 5) & 0x0f;
XX	dy = hdr->date & 0x1f;
XX
XX	hh = (hdr->time >> 11) & 0x1f;	/* dissect the time */
XX	mm = (hdr->time >> 5) & 0x3f;
XX	ss = (hdr->time & 0x1f) * 2;
XX
XX	printf("%-12s  %8ld  ", hdr->name, hdr->length);
XX
XX	if (bose) {
XX		switch (hdrver) {
XX		case 1:
XX		case 2:
XX			printf("   --   ");
XX			break;
XX		case 3:
XX			printf(" Packed ");
XX			break;
XX		case 4:
XX			printf("Squeezed");
XX			break;
XX		case 5:
XX		case 6:
XX		case 7:
XX			printf("crunched");
XX			break;
XX		case 8:
XX			printf("Crunched");
XX			break;
XX		case 9:
XX			printf("Squashed");
XX			break;
XX		default:
XX			printf("Unknown!");
XX		}
XX
XX		if (hdr->length)
XX			printf("  %3d%%", 100L - (100L * hdr->size) / hdr->length);
XX		else
XX			printf("  ---");
XX		printf("  %8ld  ", hdr->size);
XX	}
XX	printf("%2d %3s %02d", dy, mon[mo - 1], (yr + 80) % 100);
XX
XX	if (bose)
XX		printf("  %2d:%02d%c  %04x",
XX		       (hh > 12 ? hh - 12 : hh), mm, (hh > 11 ? 'p' : 'a'),
XX		       hdr->crc & 0xffff);
XX
XX	printf("\n");
XX}
SHAR_EOF
if test 4534 -ne "`wc -c arclst.c`"
then
echo shar: error transmitting arclst.c '(should have been 4534 characters)'
fi
echo shar: extracting arclzw.c '(23276 characters)'
sed 's/^XX//' << \SHAR_EOF > arclzw.c
XX/*
XX * $Log:	arclzw.c,v $
XX * Revision 1.1  88/04/11  18:12:18  hyc
XX * Initial revision
XX * 
XX * Revision 1.2  87/12/20  03:17:39  hyc
XX * Fix some problems introduced by indent...
XX * 
XX * Revision 1.1  87/12/19  04:52:17  hyc
XX * Initial revision
XX * 
XX * Revision 1.3.1.1  87/08/13  17:03:36  hyc
XX * Run thru indent, fixed some signed vs. unsigned problems
XX * with bp <-> buf, and inbuf and localbuf...
XX *  Revision 1.3  87/07/21  11:41:41  hyc *** empty
XX * log message ***
XX * 
XX * Revision 1.2  87/07/21  08:45:24  hyc *** empty log message ***
XX * 
XX */
XX
XX/*
XX * ARC - Archive utility - ARCLZW
XX * 
XX * Version 2.03, created on 10/24/86 at 11:46:22
XX * 
XX * (C) COPYRIGHT 1985,86 by System Enhancement Associates; ALL RIGHTS RESERVED
XX * 
XX * By:  Thom Henderson
XX * 
XX * Description: This file contains the routines used to implement Lempel-Zev
XX * data compression, which calls for building a coding table on the fly.
XX * This form of compression is especially good for encoding files which
XX * contain repeated strings, and can often give dramatic improvements over
XX * traditional Huffman SQueezing.
XX * 
XX * Language: Computer Innovations Optimizing C86
XX * 
XX * Programming notes: In this section I am drawing heavily on the COMPRESS
XX * program from UNIX.  The basic method is taken from "A Technique for High
XX * Performance Data Compression", Terry A. Welch, IEEE Computer Vol 17, No 6
XX * (June 1984), pp 8-19.  Also see "Knuth's Fundamental Algorithms", Donald
XX * Knuth, Vol 3, Section 6.4.
XX * 
XX * As best as I can tell, this method works by tracing down a hash table of code
XX * strings where each entry has the property:
XX * 
XX * if <string> <char> is in the table then <string> is in the table.
XX */
XX#include <stdio.h>
XX#include "arc.h"
XX
XX/* definitions for older style crunching */
XX
XX#define FALSE    0
XX#define TRUE     !FALSE
XX#define TABSIZE  4096
XX#define NO_PRED  0xFFFF
XX#define EMPTY    0xFFFF
XX#define NOT_FND  0xFFFF
XX
XXstatic unsigned INT inbuf;	/* partial input code storage */
XXstatic INT      sp;		/* current stack pointer */
XX
XXstatic struct entry {		/* string table entry format */
XX	char            used;	/* true when this entry is in use */
XX	unsigned INT    next;	/* ptr to next in collision list */
XX	unsigned INT    predecessor;	/* code for preceeding string */
XX	unsigned char   follower;	/* char following string */
XX}               string_tab[TABSIZE];	/* the code string table */
XX
XX
XX/* definitions for the new dynamic Lempel-Zev crunching */
XX
XX#define BITS   12		/* maximum bits per code */
XX#define HSIZE  5003		/* 80% occupancy */
XX#define INIT_BITS 9		/* initial number of bits/code */
XX
XXstatic INT      n_bits;		/* number of bits/code */
XXstatic INT      maxcode;	/* maximum code, given n_bits */
XX#define MAXCODE(n)      ((1<<(n)) - 1)	/* maximum code calculation */
XXstatic INT      maxcodemax = 1 << BITS;	/* largest possible code (+1) */
XX
XXstatic char     buf[BITS];	/* input/output buffer */
XX
XXstatic unsigned char lmask[9] =	/* left side masks */
XX{
XX 0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00
XX};
XXstatic unsigned char rmask[9] =	/* right side masks */
XX{
XX 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff
XX};
XX
XXstatic INT      offset;		/* byte offset for code output */
XXstatic LONG     in_count;	/* length of input */
XXstatic LONG     bytes_out;	/* length of compressed output */
XXstatic LONG     bytes_ref;	/* output quality reference */
XXstatic LONG     bytes_last;	/* output size at last checkpoint */
XXstatic unsigned INT ent;
XX
XX/*
XX * To save much memory (which we badly need at this point), we overlay the
XX * table used by the previous version of Lempel-Zev with those used by the
XX * new version.  Since no two of these routines will be used together, we can
XX * safely do this.
XX */
XX#ifdef MSDOS
XXstatic LONG    *htab = string_tab;	/* hash code table   (crunch) */
XX#else
XXstatic LONG     htab[HSIZE];
XX#endif
XXstatic unsigned INT codetab[HSIZE];	/* string code table (crunch) */
XX
XXstatic unsigned INT *prefix = codetab;	/* prefix code table (uncrunch) */
XX#ifdef MSDOS
XXstatic unsigned char *suffix = string_tab;	/* suffix table (uncrunch) */
XX#else
XXstatic unsigned char suffix[HSIZE];
XX#endif
XX
XXstatic INT      free_ent;	/* first unused entry */
XXstatic INT      firstcmp;	/* true at start of compression */
XXstatic unsigned char stack[HSIZE];	/* local push/pop stack */
XX
XX/*
XX * block compression parameters -- after all codes are used up, and
XX * compression rate changes, start over.
XX */
XX
XXstatic INT      clear_flg;
XX#define CHECK_GAP 2048		/* ratio check interval */
XXstatic LONG     checkpoint;
XXINT             upd_tab();
XXstatic INT	putcode();
XX
XX/*
XX * the next two codes should not be changed lightly, as they must not lie
XX * within the contiguous general code space.
XX */
XX#define FIRST   257		/* first free entry */
XX#define CLEAR   256		/* table clear output code */
XX
XX/*
XX * The cl_block() routine is called at each checkpoint to determine if
XX * compression would likely improve by resetting the code table.  The method
XX * chosen to determine this is based on empirical observation that, in
XX * general, every 2k of input data should compress at least as well as the
XX * first 2k of input.
XX */
XX
XXstatic          INT
XXcl_block(t)			/* table clear for block compress */
XX	FILE           *t;	/* our output file */
XX{
XX	checkpoint = in_count + CHECK_GAP;
XX
XX	if (bytes_ref) {
XX		if (bytes_out - bytes_last > bytes_ref) {
XX			setmem(htab, HSIZE * sizeof(long), 0xff);
XX			free_ent = FIRST;
XX			clear_flg = 1;
XX			putcode(CLEAR, t);
XX			bytes_ref = 0;
XX		}
XX	} else
XX		bytes_ref = bytes_out - bytes_last;
XX
XX	bytes_last = bytes_out;	/* remember where we were */
XX}
XX
XX/*****************************************************************
XX *
XX * Output a given code.
XX * Inputs:
XX *      code:   A n_bits-bit integer.  If == -1, then EOF.  This assumes
XX *              that n_bits =< (LONG)wordsize - 1.
XX * Outputs:
XX *      Outputs code to the file.
XX * Assumptions:
XX *      Chars are 8 bits long.
XX * Algorithm:
XX *      Maintain a BITS character long buffer (so that 8 codes will
XX * fit in it exactly).  When the buffer fills up empty it and start over.
XX */
XX
XXstatic          INT
XXputcode(code, t)		/* output a code */
XX	INT             code;	/* code to output */
XX	FILE           *t;	/* where to put it */
XX{
XX	INT             r_off = offset;	/* right offset */
XX	INT             bits = n_bits;	/* bits to go */
XX	char           *bp = buf;	/* buffer pointer */
XX	INT             n;	/* index */
XX
XX	if (code >= 0) {	/* if a real code *//* Get to the first byte. */
XX		bp += (r_off >> 3);
XX		r_off &= 7;
XX
XX		/*
XX		 * Since code is always >= 8 bits, only need to mask the
XX		 * first hunk on the left. 
XX		 */
XX		*bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
XX		bp++;
XX		bits -= (8 - r_off);
XX		code >>= (8 - r_off);
XX
XX		/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
XX		if (bits >= 8) {
XX			*bp++ = code;
XX			code >>= 8;
XX			bits -= 8;
XX		}
XX		/* Last bits. */
XX		if (bits)
XX			*bp = code;
XX
XX		offset += n_bits;
XX
XX		if (offset == (n_bits << 3)) {
XX			bp = buf;
XX			bits = n_bits;
XX			bytes_out += bits;
XX			do
XX				putc_pak(*bp++, t);
XX			while (--bits);
XX			offset = 0;
XX		}
XX		/*
XX		 * If the next entry is going to be too big for the code
XX		 * size, then increase it, if possible. 
XX		 */
XX		if (free_ent > maxcode || clear_flg > 0) {
XX			/*
XX			 * Write the whole buffer, because the input side
XX			 * won't discover the size increase until after
XX			 * it has read it. 
XX			 */
XX			if (offset > 0) {
XX				bp = buf;	/* reset pointer for writing */
XX				bytes_out += n = n_bits;
XX				while (n--)
XX					putc_pak(*bp++, t);
XX			}
XX			offset = 0;
XX
XX			if (clear_flg) {	/* reset if clearing */
XX				maxcode = MAXCODE(n_bits = INIT_BITS);
XX				clear_flg = 0;
XX			} else {/* else use more bits */
XX				n_bits++;
XX				if (n_bits == BITS)
XX					maxcode = maxcodemax;
XX				else
XX					maxcode = MAXCODE(n_bits);
XX			}
XX		}
XX	} else {		/* dump the buffer on EOF */
XX		bytes_out += n = (offset + 7) / 8;
XX
XX		if (offset > 0)
XX			while (n--)
XX				putc_pak(*bp++, t);
XX		offset = 0;
XX	}
XX}
XX
XX/*****************************************************************
XX *
XX * Read one code from the standard input.  If EOF, return -1.
XX * Inputs:
XX *      cmpin
XX * Outputs:
XX *      code or -1 is returned.
XX */
XX
XXstatic          INT
XXgetcode(f)			/* get a code */
XX	FILE           *f;	/* file to get from */
XX{
XX	INT             code;
XX	static INT      offset = 0, size = 0;
XX	INT             r_off, bits;
XX	unsigned char  *bp = (unsigned char *) buf;
XX
XX	if (clear_flg > 0 || offset >= size || free_ent > maxcode) {
XX		/*
XX		 * If the next entry will be too big for the current code
XX		 * size, then we must increase the size. This implies
XX		 * reading a new buffer full, too. 
XX		 */
XX		if (free_ent > maxcode) {
XX			n_bits++;
XX			if (n_bits == BITS)
XX				maxcode = maxcodemax;	/* won't get any bigger
XX							 * now */
XX			else
XX				maxcode = MAXCODE(n_bits);
XX		}
XX		if (clear_flg > 0) {
XX			maxcode = MAXCODE(n_bits = INIT_BITS);
XX			clear_flg = 0;
XX		}
XX		for (size = 0; size < n_bits; size++) {
XX			if ((code = getc_unp(f)) == EOF)
XX				break;
XX			else
XX				buf[size] = code;
XX		}
XX		if (size <= 0)
XX			return -1;	/* end of file */
XX
XX		offset = 0;
XX		/* Round size down to integral number of codes */
XX		size = (size << 3) - (n_bits - 1);
XX	}
XX	r_off = offset;
XX	bits = n_bits;
XX
XX	/*
XX	 * Get to the first byte. 
XX	 */
XX	bp += (r_off >> 3);
XX	r_off &= 7;
XX
XX	/* Get first part (low order bits) */
XX	code = (*bp++ >> r_off);
XX	bits -= 8 - r_off;
XX	r_off = 8 - r_off;	/* now, offset into code word */
XX
XX	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
XX	if (bits >= 8) {
XX		code |= *bp++ << r_off;
XX		r_off += 8;
XX		bits -= 8;
XX	}
XX	/* high order bits. */
XX	code |= (*bp & rmask[bits]) << r_off;
XX	offset += n_bits;
XX
XX	return code & MAXCODE(BITS);
XX}
XX
XX/*
XX * compress a file
XX * 
XX * Algorithm:  use open addressing double hashing (no chaining) on the prefix
XX * code / next character combination.  We do a variant of Knuth's algorithm D
XX * (vol. 3, sec. 6.4) along with G. Knott's relatively-prime secondary probe.
XX * Here, the modular division first probe is gives way to a faster
XX * exclusive-or manipulation.  Also do block compression with an adaptive
XX * reset, where the code table is cleared when the compression ratio
XX * decreases, but after the table fills.  The variable-length output codes
XX * are re-sized at this point, and a special CLEAR code is generated for the
XX * decompressor.
XX */
XX
XXINT
XXinit_cm(f, t)			/* initialize for compression */
XX	FILE           *f;	/* file we will be compressing */
XX	FILE           *t;	/* where we will put it */
XX{
XX	offset = 0;
XX	bytes_out = bytes_last = 1;
XX	bytes_ref = 0;
XX	clear_flg = 0;
XX	in_count = 1;
XX	checkpoint = CHECK_GAP;
XX	maxcode = MAXCODE(n_bits = INIT_BITS);
XX	free_ent = FIRST;
XX	setmem(htab, HSIZE * sizeof(LONG), 0xff);
XX	n_bits = INIT_BITS;	/* set starting code size */
XX
XX	putc_pak(BITS, t);	/* note our max code length */
XX
XX	firstcmp = 1;		/* next byte will be first */
XX}
XX
XXINT
XXputc_cm(c, t)			/* compress a character */
XX	unsigned char   c;	/* character to compress */
XX	FILE           *t;	/* where to put it */
XX{
XX	static LONG     fcode;
XX	static INT      hshift;
XX	INT             i;
XX	INT             disp;
XX
XX	if (firstcmp) {		/* special case for first byte */
XX		ent = c;	/* remember first byte */
XX
XX		hshift = 0;
XX		for (fcode = (LONG) HSIZE; fcode < 65536L; fcode *= 2L)
XX			hshift++;
XX		hshift = 8 - hshift;	/* set hash code range bound */
XX
XX		firstcmp = 0;	/* no longer first */
XX		return;
XX	}
XX	in_count++;
XX
XX	fcode = (LONG) (((LONG) c << BITS) + ent);
XX	i = (c << hshift) ^ ent;/* xor hashing */
XX
XX	if (htab[i] == fcode) {
XX		ent = codetab[i];
XX		return;
XX	} else if (htab[i] < 0)	/* empty slot */
XX		goto nomatch;
XX	disp = HSIZE - i;	/* secondary hash (after G.Knott) */
XX	if (i == 0)
XX		disp = 1;
XX
XXprobe:
XX	if ((i -= disp) < 0)



More information about the Alt.sources mailing list