ARC 5.20 w/Squashing, 5/9
Howard Chu
hyc at umix.cc.umich.edu
Thu Apr 14 16:01:23 AEST 1988
XX fflush(stdout);
XX while (1) {
XX printf(" Overwrite it (y/n)? ");
XX fflush(stdout);
XX fgets(buf, 100, stdin);
XX *buf = toupper(*buf);
XX if (*buf == 'Y' || *buf == 'N')
XX break;
XX }
XX if (*buf == 'N') {
XX printf("%s not extracted.\n", fix);
XX fseek(arc, hdr->size, 1);
XX return;
XX }
XX#ifdef MTS
XX }
XX#endif
XX }
XX }
XX#ifndef MTS
XX if (!(f = fopen(fix, "w"))) {
XX#else
XX {
XX fortran create();
XX char c_name[256];
XX struct crsize {
XX short maxsize, cursize;
XX } c_size;
XX char c_vol[6];
XX int c_type;
XX strcpy(c_name, fix);
XX strcat(c_name, " ");
XX c_size.maxsize = 0;
XX c_size.cursize = hdr->length / 4096 + 1;
XX memset(c_vol, 0, sizeof(c_vol));
XX c_type = 0x100;
XX create(c_name, &c_size, c_vol, &c_type);
XX }
XX if (image) {
XX f = fopen(fix, "wb");
XX fseek(f, 0, 0);
XX } else
XX f = fopen(fix, "w");
XX if (!f) {
XX#endif
XX if (warn) {
XX printf("Cannot create %s\n", fix);
XX nerrs++;
XX }
XX fseek(arc, hdr->size, 1);
XX return;
XX }
XX /* now unpack the file */
XX
XX unpack(arc, f, hdr); /* unpack file from archive */
XX#ifdef MSDOS
XX setstamp(f, hdr->date, hdr->time); /* set the proper date/time
XX * stamp */
XX#endif
XX fclose(f); /* all done writing to file */
XX#ifdef BSD
XX setstamp(fix, hdr->date, hdr->time); /* use filename for BSD stamp */
XX#endif
XX}
SHAR_EOF
if test 5231 -ne "`wc -c arcext.c`"
then
echo shar: error transmitting arcext.c '(should have been 5231 characters)'
fi
echo shar: extracting arcio.c '(7950 characters)'
sed 's/^XX//' << \SHAR_EOF > arcio.c
XX/*
XX * $Log: arcio.c,v $
XX * Revision 1.2 88/04/11 18:01:16 hyc
XX * added support for squashing. (changed max hdrver from 8 to 9)
XX *
XX * Revision 1.1 88/04/11 17:59:43 hyc
XX * Initial revision
XX *
XX * Revision 1.3 87/12/19 04:25:24 hyc
XX * As before, different location tho.
XX *
XX * Revision 1.2 87/12/19 04:23:21 hyc
XX * Fix problem caused by indent(?) screwing up line boundaries...
XX *
XX * Revision 1.1 87/12/19 04:21:23 hyc
XX * Initial revision
XX *
XX * Revision 1.4 87/08/13 17:03:24 hyc
XX * Run thru the indent program...
XX * Revision 1.3 87/07/21 11:41:29 hyc *** empty
XX * log message ***
XX *
XX * Revision 1.2 87/07/21 08:19:45 hyc *** empty log message ***
XX *
XX */
XX
XX/*
XX * ARC - Archive utility - ARCIO
XX *
XX * Version 2.49, created on 07/25/86 at 16:44:23
XX *
XX * (C) COPYRIGHT 1985,86 by System Enhancement Associates; ALL RIGHTS RESERVED
XX *
XX * By: Thom Henderson
XX *
XX * Description: This file contains the file I/O routines used to manipulate an
XX * archive.
XX *
XX * Language: Computer Innovations Optimizing C86
XX */
XX#include <stdio.h>
XX#include "arc.h"
XX
XXINT
XXreadhdr(hdr, f) /* read a header from an archive */
XX struct heads *hdr; /* storage for header */
XX FILE *f; /* archive to read header from */
XX{
XX#ifndef MSDOS
XX unsigned char dummy[28];
XX INT i, j, k;
XX#endif
XX char name[13]; /* filename buffer */
XX INT try = 0;/* retry counter */
XX static INT first = 1; /* true only on first read */
XX
XX if (!f) /* if archive didn't open */
XX return 0; /* then pretend it's the end */
XX if (feof(f)) /* if no more data */
XX return 0; /* then signal end of archive */
XX
XX if (fgetc(f) != 26) { /* check archive validity */
XX if (warn) {
XX printf("An entry in %s has a bad header.", arcname);
XX nerrs++;
XX }
XX while (!feof(f)) {
XX try++;
XX if (fgetc(f) == 26) {
XX ungetc(hdrver = fgetc(f), f);
XX if (hdrver >= 0 && hdrver <= 9)
XX break;
XX }
XX }
XX
XX if (feof(f) && first)
XX abort("%s is not an archive", arcname);
XX
XX if (warn)
XX printf(" %d bytes skipped.\n", try);
XX
XX if (feof(f))
XX return 0;
XX }
XX hdrver = fgetc(f); /* get header version */
XX if (hdrver < 0)
XX abort("Invalid header in archive %s", arcname);
XX if (hdrver == 0)
XX return 0; /* note our end of archive marker */
XX if (hdrver > 9) {
XX fread(name, sizeof(char), 13, f);
XX#ifdef MTS
XX atoe(name, strlen(name));
XX#endif
XX printf("I don't know how to handle file %s in archive %s\n",
XX name, arcname);
XX printf("I think you need a newer version of ARC.\n");
XX exit(1);
XX }
XX /* amount to read depends on header type */
XX
XX if (hdrver == 1) { /* old style is shorter */
XX fread(hdr, sizeof(struct heads) - sizeof(long int), 1, f);
XX hdrver = 2; /* convert header to new format */
XX hdr->length = hdr->size; /* size is same when not
XX * packed */
XX } else
XX#ifdef MSDOS
XX fread(hdr, sizeof(struct heads), 1, f);
XX#else
XX fread(dummy, 27, 1, f);
XX
XX for (i = 0; i < 13; hdr->name[i] = dummy[i], i++);
XX#ifdef MTS
XX (void) atoe(hdr->name, strlen(hdr->name));
XX#endif
XX hdr->size = (LONG) ((dummy[16] << 24) + (dummy[15] << 16) + (dummy[14] << 8)
XX + dummy[13]);
XX hdr->date = (short) ((dummy[18] << 8) + dummy[17]);
XX hdr->time = (short) ((dummy[20] << 8) + dummy[19]);
XX hdr->crc = (short) ((dummy[22] << 8) + dummy[21]);
XX hdr->length = (LONG) ((dummy[26] << 24) + (dummy[25] << 16)
XX + (dummy[24] << 8) + dummy[23]);
XX#endif
XX
XX if (hdr->date > olddate
XX || (hdr->date == olddate && hdr->time > oldtime)) {
XX olddate = hdr->date;
XX oldtime = hdr->time;
XX }
XX first = 0;
XX return 1; /* we read something */
XX}
XX
XXput_int(number, f) /* write a 2 byte integer */
XX INT number;
XX FILE *f;
XX{
XX fputc(number & 255, f);
XX fputc(number >> 8, f);
XX}
XX
XXput_long(number, f) /* write a 4 byte integer */
XX LONG number;
XX FILE *f;
XX{
XX put_int(number & 0xFFFF, f);
XX put_int(number >> 16, f);
XX}
XX
XXINT
XXwritehdr(hdr, f) /* write a header to an archive */
XX struct heads *hdr; /* header to write */
XX FILE *f; /* archive to write to */
XX{
XX fputc(26, f); /* write out the mark of ARC */
XX fputc(hdrver, f); /* write out the header version */
XX if (!hdrver) /* if that's the end */
XX return; /* then write no more */
XX#ifdef MSDOS
XX fwrite(hdr, sizeof(struct heads), 1, f);
XX#else
XX /* byte/word ordering hassles... */
XX#ifdef MTS
XX etoa(hdr->name, strlen(hdr->name));
XX#endif
XX fwrite(hdr->name, 1, 13, f);
XX put_long(hdr->size, f);
XX put_int(hdr->date, f);
XX put_int(hdr->time, f);
XX put_int(hdr->crc, f);
XX put_long(hdr->length, f);
XX#endif
XX
XX /* note the newest file for updating the archive timestamp */
XX
XX if (hdr->date > arcdate
XX || (hdr->date == arcdate && hdr->time > arctime)) {
XX arcdate = hdr->date;
XX arctime = hdr->time;
XX }
XX}
XX
XXINT
XXputc_tst(c, t) /* put a character, with tests */
XX char c; /* character to output */
XX FILE *t; /* file to write to */
XX{
XX if (t)
XX#ifdef BSD
XX fputc(c, t);
XX#else
XX if (fputc(c, t) == EOF)
XX abort("Write fail (disk full?)");
XX#endif
XX}
XX
XX/*
XX * NOTE: The filecopy() function is used to move large numbers of bytes from
XX * one file to another. This particular version has been modified to improve
XX * performance in Computer Innovations C86 version 2.3 in the small memory
XX * model. It may not work as expected with other compilers or libraries, or
XX * indeed with different versions of the CI-C86 compiler and library, or with
XX * the same version in a different memory model.
XX *
XX * The following is a functional equivalent to the filecopy() routine that
XX * should work properly on any system using any compiler, albeit at the cost
XX * of reduced performance:
XX *
XX * filecopy(f,t,size)
XX * FILE *f, *t; long size;
XX * {
XX * while(size--)
XX * putc_tst(fgetc(f),t);
XX * }
XX */
XX#ifdef MSDOS
XX#include <fileio2.h>
XX
XXfilecopy(f, t, size) /* bulk file copier */
XX FILE *f, *t; /* files from and to */
XX long size; /* bytes to copy */
XX{
XX char *buf; /* buffer pointer */
XX char *alloc();/* buffer allocator */
XX unsigned int bufl; /* buffer length */
XX unsigned int coreleft(); /* space available reporter */
XX unsigned int cpy; /* bytes being copied */
XX long floc, tloc, fseek(); /* file pointers, setter */
XX struct regval reg; /* registers for DOS calls */
XX
XX if ((bufl = coreleft()) < 1000) /* see how much space we have */
XX abort("Out of memory");
XX bufl -= 1000; /* fudge factor for overhead */
XX if (bufl > 60000)
XX bufl = 60000; /* avoid choking alloc() */
XX if (bufl > size)
XX bufl = size; /* avoid wasting space */
XX buf = alloc(bufl); /* allocate our buffer */
XX
XX floc = fseek(f, 0L, 1); /* reset I/O system */
XX tloc = fseek(t, 0L, 1);
XX
XX segread(®.si); /* set segment registers for DOS */
XX
XX while (size > 0) { /* while more to copy */
XX reg.ax = 0x3F00;/* read from handle */
XX reg.bx = filehand(f);
XX reg.cx = bufl < size ? bufl : size; /* amount to read */
XX reg.dx = buf;
XX if (sysint21(®, ®) & 1)
XX abort("Read fail");
XX
XX cpy = reg.ax; /* amount actually read */
XX reg.ax = 0x4000;/* write to handle */
XX reg.bx = filehand(t);
XX reg.cx = cpy;
XX reg.dx = buf;
XX sysint21(®, ®);
XX
XX if (reg.ax != cpy)
XX abort("Write fail (disk full?)");
XX
XX size -= (long) cpy;
XX }
XX
XX free(buf); /* all done with buffer */
XX}
XX#else
XX
XXfilecopy(f, t, size) /* bulk file copier */
XX FILE *f, *t; /* files from and to */
XX LONG size; /* bytes to copy */
XX{
XX char *buf; /* buffer pointer */
XX char *malloc(); /* buffer allocator */
XX unsigned int bufl; /* buffer length */
XX unsigned int cpy; /* bytes being copied */
XX#ifdef MTS
XX#define MTEXT 0x0010
XX#endif
XX
XX bufl = 60000;
XX if (bufl > size)
XX bufl = size; /* don't waste space */
XX
XX buf = malloc(bufl);
XX
XX while (size > 0) {
XX cpy = fread(buf, sizeof(char), bufl < size ? bufl : size, f);
XX#ifdef MTS
XX if ((f->_fmode & MTEXT) && !image)
XX etoa(buf, cpy);
XX#endif
XX if (fwrite(buf, sizeof(char), cpy, t) != cpy)
XX abort("Write fail (no space?)");
XX size -= cpy;
XX }
XX
XX free(buf);
XX}
XX#endif
SHAR_EOF
if test 7950 -ne "`wc -c arcio.c`"
then
echo shar: error transmitting arcio.c '(should have been 7950 characters)'
fi
echo shar: extracting arclst.c '(4534 characters)'
sed 's/^XX//' << \SHAR_EOF > arclst.c
XX/*
XX * $Log: arclst.c,v $
XX * Revision 1.2 88/04/11 18:03:10 hyc
XX * added support for squashing, re-synch with MTS
XX *
XX * Revision 1.1 88/04/11 18:01:46 hyc
XX * Initial revision
XX *
XX * Revision 1.4 87/08/13 17:03:30 hyc
XX * Run thru the indent program...
XX * Revision 1.3 87/07/21 11:41:35 hyc *** empty
XX * log message ***
XX *
XX * Revision 1.2 87/07/21 08:23:17 hyc *** empty log message ***
XX *
XX */
XX
XX/*
XX * ARC - Archive utility - ARCLST
XX *
XX * Version 2.38, created on 07/25/86 at 17:52:20
XX *
XX * (C) COPYRIGHT 1985,86 by System Enhancement Associates; ALL RIGHTS RESERVED
XX *
XX * By: Thom Henderson
XX *
XX * Description: This file contains the routines used to list the contents of an
XX * archive.
XX *
XX * Language: Computer Innovations Optimizing C86
XX */
XX#include <stdio.h>
XX#include "arc.h"
XX
XXINT
XXlstarc(num, arg) /* list files in archive */
XX INT num; /* number of arguments */
XX char *arg[]; /* pointers to arguments */
XX{
XX struct heads hdr; /* header data */
XX INT list; /* true to list a file */
XX INT did[25];/* true when argument was used */
XX LONG tnum, tlen, tsize; /* totals */
XX INT n; /* index */
XX INT lstfile();
XX
XX tnum = tlen = tsize = 0;/* reset totals */
XX
XX for (n = 0; n < num; n++) /* for each argument */
XX did[n] = 0; /* reset usage flag */
XX rempath(num, arg); /* strip off paths */
XX
XX if (!kludge) {
XX printf("Name Length ");
XX if (bose)
XX printf(" Stowage SF Size now");
XX printf(" Date ");
XX if (bose)
XX printf(" Time CRC");
XX printf("\n");
XX
XX printf("============ ========");
XX if (bose)
XX printf(" ======== ==== ========");
XX printf(" =========");
XX if (bose)
XX printf(" ====== ====");
XX printf("\n");
XX }
XX openarc(0); /* open archive for reading */
XX
XX if (num) { /* if files were named */
XX while (readhdr(&hdr, arc)) { /* process all archive files */
XX list = 0; /* reset list flag */
XX for (n = 0; n < num; n++) { /* for each template
XX * given */
XX if (match(hdr.name, arg[n])) {
XX list = 1; /* turn on list flag */
XX did[n] = 1; /* turn on usage flag */
XX break; /* stop looking */
XX }
XX }
XX
XX if (list) { /* if this file is wanted */
XX if (!kludge)
XX lstfile(&hdr); /* then tell about it */
XX tnum++; /* update totals */
XX tlen += hdr.length;
XX tsize += hdr.size;
XX }
XX fseek(arc, hdr.size, 1); /* move to next header */
XX }
XX } else
XX while (readhdr(&hdr, arc)) { /* else report on all files */
XX if (!kludge)
XX lstfile(&hdr);
XX tnum++; /* update totals */
XX tlen += hdr.length;
XX tsize += hdr.size;
XX fseek(arc, hdr.size, 1); /* skip to next header */
XX }
XX
XX closearc(0); /* close archive after reading */
XX
XX if (!kludge) {
XX printf(" ==== ========");
XX if (bose)
XX printf(" ==== ========");
XX printf("\n");
XX }
XX printf("Total %6ld %8ld", tnum, tlen);
XX if (bose) {
XX if (tlen)
XX printf(" %3ld%%", 100L - (100L * tsize) / tlen);
XX else
XX printf(" ---");
XX printf(" %8ld", tsize);
XX }
XX printf("\n");
XX
XX if (note) {
XX for (n = 0; n < num; n++) { /* report unused args */
XX if (!did[n]) {
XX printf("File not found: %s\n", arg[n]);
XX nerrs++;
XX }
XX }
XX }
XX}
XX
XXINT
XXlstfile(hdr) /* tell about a file */
XX struct heads *hdr; /* pointer to header data */
XX{
XX INT yr, mo, dy; /* parts of a date */
XX INT hh, mm, ss; /* parts of a time */
XX
XX static char *mon[] = /* month abbreviations */
XX {
XX "Jan", "Feb", "Mar", "Apr",
XX "May", "Jun", "Jul", "Aug",
XX "Sep", "Oct", "Nov", "Dec"
XX };
XX
XX yr = (hdr->date >> 9) & 0x7f; /* dissect the date */
XX mo = (hdr->date >> 5) & 0x0f;
XX dy = hdr->date & 0x1f;
XX
XX hh = (hdr->time >> 11) & 0x1f; /* dissect the time */
XX mm = (hdr->time >> 5) & 0x3f;
XX ss = (hdr->time & 0x1f) * 2;
XX
XX printf("%-12s %8ld ", hdr->name, hdr->length);
XX
XX if (bose) {
XX switch (hdrver) {
XX case 1:
XX case 2:
XX printf(" -- ");
XX break;
XX case 3:
XX printf(" Packed ");
XX break;
XX case 4:
XX printf("Squeezed");
XX break;
XX case 5:
XX case 6:
XX case 7:
XX printf("crunched");
XX break;
XX case 8:
XX printf("Crunched");
XX break;
XX case 9:
XX printf("Squashed");
XX break;
XX default:
XX printf("Unknown!");
XX }
XX
XX if (hdr->length)
XX printf(" %3d%%", 100L - (100L * hdr->size) / hdr->length);
XX else
XX printf(" ---");
XX printf(" %8ld ", hdr->size);
XX }
XX printf("%2d %3s %02d", dy, mon[mo - 1], (yr + 80) % 100);
XX
XX if (bose)
XX printf(" %2d:%02d%c %04x",
XX (hh > 12 ? hh - 12 : hh), mm, (hh > 11 ? 'p' : 'a'),
XX hdr->crc & 0xffff);
XX
XX printf("\n");
XX}
SHAR_EOF
if test 4534 -ne "`wc -c arclst.c`"
then
echo shar: error transmitting arclst.c '(should have been 4534 characters)'
fi
echo shar: extracting arclzw.c '(23276 characters)'
sed 's/^XX//' << \SHAR_EOF > arclzw.c
XX/*
XX * $Log: arclzw.c,v $
XX * Revision 1.1 88/04/11 18:12:18 hyc
XX * Initial revision
XX *
XX * Revision 1.2 87/12/20 03:17:39 hyc
XX * Fix some problems introduced by indent...
XX *
XX * Revision 1.1 87/12/19 04:52:17 hyc
XX * Initial revision
XX *
XX * Revision 1.3.1.1 87/08/13 17:03:36 hyc
XX * Run thru indent, fixed some signed vs. unsigned problems
XX * with bp <-> buf, and inbuf and localbuf...
XX * Revision 1.3 87/07/21 11:41:41 hyc *** empty
XX * log message ***
XX *
XX * Revision 1.2 87/07/21 08:45:24 hyc *** empty log message ***
XX *
XX */
XX
XX/*
XX * ARC - Archive utility - ARCLZW
XX *
XX * Version 2.03, created on 10/24/86 at 11:46:22
XX *
XX * (C) COPYRIGHT 1985,86 by System Enhancement Associates; ALL RIGHTS RESERVED
XX *
XX * By: Thom Henderson
XX *
XX * Description: This file contains the routines used to implement Lempel-Zev
XX * data compression, which calls for building a coding table on the fly.
XX * This form of compression is especially good for encoding files which
XX * contain repeated strings, and can often give dramatic improvements over
XX * traditional Huffman SQueezing.
XX *
XX * Language: Computer Innovations Optimizing C86
XX *
XX * Programming notes: In this section I am drawing heavily on the COMPRESS
XX * program from UNIX. The basic method is taken from "A Technique for High
XX * Performance Data Compression", Terry A. Welch, IEEE Computer Vol 17, No 6
XX * (June 1984), pp 8-19. Also see "Knuth's Fundamental Algorithms", Donald
XX * Knuth, Vol 3, Section 6.4.
XX *
XX * As best as I can tell, this method works by tracing down a hash table of code
XX * strings where each entry has the property:
XX *
XX * if <string> <char> is in the table then <string> is in the table.
XX */
XX#include <stdio.h>
XX#include "arc.h"
XX
XX/* definitions for older style crunching */
XX
XX#define FALSE 0
XX#define TRUE !FALSE
XX#define TABSIZE 4096
XX#define NO_PRED 0xFFFF
XX#define EMPTY 0xFFFF
XX#define NOT_FND 0xFFFF
XX
XXstatic unsigned INT inbuf; /* partial input code storage */
XXstatic INT sp; /* current stack pointer */
XX
XXstatic struct entry { /* string table entry format */
XX char used; /* true when this entry is in use */
XX unsigned INT next; /* ptr to next in collision list */
XX unsigned INT predecessor; /* code for preceeding string */
XX unsigned char follower; /* char following string */
XX} string_tab[TABSIZE]; /* the code string table */
XX
XX
XX/* definitions for the new dynamic Lempel-Zev crunching */
XX
XX#define BITS 12 /* maximum bits per code */
XX#define HSIZE 5003 /* 80% occupancy */
XX#define INIT_BITS 9 /* initial number of bits/code */
XX
XXstatic INT n_bits; /* number of bits/code */
XXstatic INT maxcode; /* maximum code, given n_bits */
XX#define MAXCODE(n) ((1<<(n)) - 1) /* maximum code calculation */
XXstatic INT maxcodemax = 1 << BITS; /* largest possible code (+1) */
XX
XXstatic char buf[BITS]; /* input/output buffer */
XX
XXstatic unsigned char lmask[9] = /* left side masks */
XX{
XX 0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00
XX};
XXstatic unsigned char rmask[9] = /* right side masks */
XX{
XX 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff
XX};
XX
XXstatic INT offset; /* byte offset for code output */
XXstatic LONG in_count; /* length of input */
XXstatic LONG bytes_out; /* length of compressed output */
XXstatic LONG bytes_ref; /* output quality reference */
XXstatic LONG bytes_last; /* output size at last checkpoint */
XXstatic unsigned INT ent;
XX
XX/*
XX * To save much memory (which we badly need at this point), we overlay the
XX * table used by the previous version of Lempel-Zev with those used by the
XX * new version. Since no two of these routines will be used together, we can
XX * safely do this.
XX */
XX#ifdef MSDOS
XXstatic LONG *htab = string_tab; /* hash code table (crunch) */
XX#else
XXstatic LONG htab[HSIZE];
XX#endif
XXstatic unsigned INT codetab[HSIZE]; /* string code table (crunch) */
XX
XXstatic unsigned INT *prefix = codetab; /* prefix code table (uncrunch) */
XX#ifdef MSDOS
XXstatic unsigned char *suffix = string_tab; /* suffix table (uncrunch) */
XX#else
XXstatic unsigned char suffix[HSIZE];
XX#endif
XX
XXstatic INT free_ent; /* first unused entry */
XXstatic INT firstcmp; /* true at start of compression */
XXstatic unsigned char stack[HSIZE]; /* local push/pop stack */
XX
XX/*
XX * block compression parameters -- after all codes are used up, and
XX * compression rate changes, start over.
XX */
XX
XXstatic INT clear_flg;
XX#define CHECK_GAP 2048 /* ratio check interval */
XXstatic LONG checkpoint;
XXINT upd_tab();
XXstatic INT putcode();
XX
XX/*
XX * the next two codes should not be changed lightly, as they must not lie
XX * within the contiguous general code space.
XX */
XX#define FIRST 257 /* first free entry */
XX#define CLEAR 256 /* table clear output code */
XX
XX/*
XX * The cl_block() routine is called at each checkpoint to determine if
XX * compression would likely improve by resetting the code table. The method
XX * chosen to determine this is based on empirical observation that, in
XX * general, every 2k of input data should compress at least as well as the
XX * first 2k of input.
XX */
XX
XXstatic INT
XXcl_block(t) /* table clear for block compress */
XX FILE *t; /* our output file */
XX{
XX checkpoint = in_count + CHECK_GAP;
XX
XX if (bytes_ref) {
XX if (bytes_out - bytes_last > bytes_ref) {
XX setmem(htab, HSIZE * sizeof(long), 0xff);
XX free_ent = FIRST;
XX clear_flg = 1;
XX putcode(CLEAR, t);
XX bytes_ref = 0;
XX }
XX } else
XX bytes_ref = bytes_out - bytes_last;
XX
XX bytes_last = bytes_out; /* remember where we were */
XX}
XX
XX/*****************************************************************
XX *
XX * Output a given code.
XX * Inputs:
XX * code: A n_bits-bit integer. If == -1, then EOF. This assumes
XX * that n_bits =< (LONG)wordsize - 1.
XX * Outputs:
XX * Outputs code to the file.
XX * Assumptions:
XX * Chars are 8 bits long.
XX * Algorithm:
XX * Maintain a BITS character long buffer (so that 8 codes will
XX * fit in it exactly). When the buffer fills up empty it and start over.
XX */
XX
XXstatic INT
XXputcode(code, t) /* output a code */
XX INT code; /* code to output */
XX FILE *t; /* where to put it */
XX{
XX INT r_off = offset; /* right offset */
XX INT bits = n_bits; /* bits to go */
XX char *bp = buf; /* buffer pointer */
XX INT n; /* index */
XX
XX if (code >= 0) { /* if a real code *//* Get to the first byte. */
XX bp += (r_off >> 3);
XX r_off &= 7;
XX
XX /*
XX * Since code is always >= 8 bits, only need to mask the
XX * first hunk on the left.
XX */
XX *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
XX bp++;
XX bits -= (8 - r_off);
XX code >>= (8 - r_off);
XX
XX /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
XX if (bits >= 8) {
XX *bp++ = code;
XX code >>= 8;
XX bits -= 8;
XX }
XX /* Last bits. */
XX if (bits)
XX *bp = code;
XX
XX offset += n_bits;
XX
XX if (offset == (n_bits << 3)) {
XX bp = buf;
XX bits = n_bits;
XX bytes_out += bits;
XX do
XX putc_pak(*bp++, t);
XX while (--bits);
XX offset = 0;
XX }
XX /*
XX * If the next entry is going to be too big for the code
XX * size, then increase it, if possible.
XX */
XX if (free_ent > maxcode || clear_flg > 0) {
XX /*
XX * Write the whole buffer, because the input side
XX * won't discover the size increase until after
XX * it has read it.
XX */
XX if (offset > 0) {
XX bp = buf; /* reset pointer for writing */
XX bytes_out += n = n_bits;
XX while (n--)
XX putc_pak(*bp++, t);
XX }
XX offset = 0;
XX
XX if (clear_flg) { /* reset if clearing */
XX maxcode = MAXCODE(n_bits = INIT_BITS);
XX clear_flg = 0;
XX } else {/* else use more bits */
XX n_bits++;
XX if (n_bits == BITS)
XX maxcode = maxcodemax;
XX else
XX maxcode = MAXCODE(n_bits);
XX }
XX }
XX } else { /* dump the buffer on EOF */
XX bytes_out += n = (offset + 7) / 8;
XX
XX if (offset > 0)
XX while (n--)
XX putc_pak(*bp++, t);
XX offset = 0;
XX }
XX}
XX
XX/*****************************************************************
XX *
XX * Read one code from the standard input. If EOF, return -1.
XX * Inputs:
XX * cmpin
XX * Outputs:
XX * code or -1 is returned.
XX */
XX
XXstatic INT
XXgetcode(f) /* get a code */
XX FILE *f; /* file to get from */
XX{
XX INT code;
XX static INT offset = 0, size = 0;
XX INT r_off, bits;
XX unsigned char *bp = (unsigned char *) buf;
XX
XX if (clear_flg > 0 || offset >= size || free_ent > maxcode) {
XX /*
XX * If the next entry will be too big for the current code
XX * size, then we must increase the size. This implies
XX * reading a new buffer full, too.
XX */
XX if (free_ent > maxcode) {
XX n_bits++;
XX if (n_bits == BITS)
XX maxcode = maxcodemax; /* won't get any bigger
XX * now */
XX else
XX maxcode = MAXCODE(n_bits);
XX }
XX if (clear_flg > 0) {
XX maxcode = MAXCODE(n_bits = INIT_BITS);
XX clear_flg = 0;
XX }
XX for (size = 0; size < n_bits; size++) {
XX if ((code = getc_unp(f)) == EOF)
XX break;
XX else
XX buf[size] = code;
XX }
XX if (size <= 0)
XX return -1; /* end of file */
XX
XX offset = 0;
XX /* Round size down to integral number of codes */
XX size = (size << 3) - (n_bits - 1);
XX }
XX r_off = offset;
XX bits = n_bits;
XX
XX /*
XX * Get to the first byte.
XX */
XX bp += (r_off >> 3);
XX r_off &= 7;
XX
XX /* Get first part (low order bits) */
XX code = (*bp++ >> r_off);
XX bits -= 8 - r_off;
XX r_off = 8 - r_off; /* now, offset into code word */
XX
XX /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
XX if (bits >= 8) {
XX code |= *bp++ << r_off;
XX r_off += 8;
XX bits -= 8;
XX }
XX /* high order bits. */
XX code |= (*bp & rmask[bits]) << r_off;
XX offset += n_bits;
XX
XX return code & MAXCODE(BITS);
XX}
XX
XX/*
XX * compress a file
XX *
XX * Algorithm: use open addressing double hashing (no chaining) on the prefix
XX * code / next character combination. We do a variant of Knuth's algorithm D
XX * (vol. 3, sec. 6.4) along with G. Knott's relatively-prime secondary probe.
XX * Here, the modular division first probe is gives way to a faster
XX * exclusive-or manipulation. Also do block compression with an adaptive
XX * reset, where the code table is cleared when the compression ratio
XX * decreases, but after the table fills. The variable-length output codes
XX * are re-sized at this point, and a special CLEAR code is generated for the
XX * decompressor.
XX */
XX
XXINT
XXinit_cm(f, t) /* initialize for compression */
XX FILE *f; /* file we will be compressing */
XX FILE *t; /* where we will put it */
XX{
XX offset = 0;
XX bytes_out = bytes_last = 1;
XX bytes_ref = 0;
XX clear_flg = 0;
XX in_count = 1;
XX checkpoint = CHECK_GAP;
XX maxcode = MAXCODE(n_bits = INIT_BITS);
XX free_ent = FIRST;
XX setmem(htab, HSIZE * sizeof(LONG), 0xff);
XX n_bits = INIT_BITS; /* set starting code size */
XX
XX putc_pak(BITS, t); /* note our max code length */
XX
XX firstcmp = 1; /* next byte will be first */
XX}
XX
XXINT
XXputc_cm(c, t) /* compress a character */
XX unsigned char c; /* character to compress */
XX FILE *t; /* where to put it */
XX{
XX static LONG fcode;
XX static INT hshift;
XX INT i;
XX INT disp;
XX
XX if (firstcmp) { /* special case for first byte */
XX ent = c; /* remember first byte */
XX
XX hshift = 0;
XX for (fcode = (LONG) HSIZE; fcode < 65536L; fcode *= 2L)
XX hshift++;
XX hshift = 8 - hshift; /* set hash code range bound */
XX
XX firstcmp = 0; /* no longer first */
XX return;
XX }
XX in_count++;
XX
XX fcode = (LONG) (((LONG) c << BITS) + ent);
XX i = (c << hshift) ^ ent;/* xor hashing */
XX
XX if (htab[i] == fcode) {
XX ent = codetab[i];
XX return;
XX } else if (htab[i] < 0) /* empty slot */
XX goto nomatch;
XX disp = HSIZE - i; /* secondary hash (after G.Knott) */
XX if (i == 0)
XX disp = 1;
XX
XXprobe:
XX if ((i -= disp) < 0)
More information about the Alt.sources
mailing list