BSD ARC utility: Part 2 of 3.
Robert Wilhite
wilhite at usceast.UUCP
Wed Jan 14 07:14:34 AEST 1987
["Shar and enjoy"]
Here's the Really Working version of BSD ARC (Part 2 of 3),
which was forced into submission on our VAX.
-------------- cut here --------------
: This is a shar archive. Extract with sh, not csh.
echo x - arcext.c
cat > arcext.c << '!Funky!Stuff!'
/*
$define(arc,$ifdef(xarc,off,on))# macro switch for ARC only code
$define(xarc,$ifdef(xarc,on,off))# macro switch for XARC only code
*/
/* ARC - Archive utility - ARCEXT
$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =2.18), created on $tag(
TED_DATE DB =02/03/86) at $tag(
TED_TIME DB =22:55:19))#
$undefine(tag)#
$version
(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
By: Thom Henderson
Description:
This file contains the routines used to extract files from
an archive.
Language:
Computer Innovations Optimizing C86
*/
#include <stdio.h>
#include "arc.h"
#if ARC /* $emit($arc)# */
INT extarc(num,arg,prt) /* extract files from archive */
INT num; /* number of arguments */
char *arg[]; /* pointers to arguments */
INT prt; /* true if printing */
#endif /* $emit($xarc)# */
#if XARC
INT extarc() /* extract files from archive */
#endif /* $emit(on)# */
{
struct heads hdr; /* file header */
#if ARC /* $emit($arc)# */
INT save; /* true to save current file */
INT did[MAXARG]; /* true when argument was used */
char *i, *rindex(); /* string index */
char **name, *malloc(); /* name pointer list, allocator */
INT n; /* index */
INT extfile();
#if MSDOS
name = malloc(num*sizeof(char *)); /* get storage for name pointers */
#endif
#if BSD
name = (char **)malloc(num*sizeof(char *)); /* get storage for name pointers */
#endif
for(n=0; n<num; n++) /* for each argument */
{ did[n] = 0; /* reset usage flag */
if(!(i=rindex(arg[n],'\\'))) /* find start of name */
if(!(i=rindex(arg[n],'/')))
if(!(i=rindex(arg[n],':')))
i = arg[n]-1;
name[n] = i+1;
}
#endif /* $emit(on)# */
openarc(0); /* open archive for reading */
#if ARC /* $emit($arc)# */
if(num) /* if files were named */
{ while(readhdr(&hdr,arc)) /* while more files to check */
{ save = 0; /* reset save flag */
for(n=0; n<num; n++) /* for each template given */
{ if(match(hdr.name,name[n]))
{ save = 1; /* turn on save flag */
did[n] = 1; /* turn on usage flag */
break; /* stop looking */
}
}
if(save) /* extract if desired, else skip */
extfile(&hdr,arg[n],prt);
else fseek(arc,hdr.size,1);
}
}
else while(readhdr(&hdr,arc)) /* else extract all files */
extfile(&hdr,"",prt);
#endif /* $emit($xarc)# */
#if XARC
while(readhdr(&hdr,arc)) /* extract all files */
extfile(&hdr);
#endif /* $emit(on)# */
closearc(0); /* close archive after reading */
#if ARC /* $emit($arc)# */
if(note)
{ for(n=0; n<num; n++) /* report unused args */
{ if(!did[n])
{ printf("File not found: %s\n",arg[n]);
nerrs++;
}
}
}
free(name);
#endif /* $emit(on)# */
}
#if ARC /* $emit($arc)# */
static INT extfile(hdr,path,prt) /* extract a file */
struct heads *hdr; /* pointer to header data */
char *path; /* pointer to path name */
INT prt; /* true if printing */
#endif /* $emit($xarc)# */
#if XARC
static INT extfile(hdr) /* extract a file */
#endif /* $emit(on)# */
/* $define(use,$ife($arc,on,fix,hdr->name))# */
#if ARC
#define USE fix
#else
#define USE hdr->name
#endif
{
FILE *f, *fopen(); /* extracted file, opener */
char buf[STRLEN]; /* input buffer */
#if ARC /* $emit($arc)# */
char fix[STRLEN]; /* fixed name buffer */
char *i, *rindex(); /* string index */
if(prt) /* printing is much easier */
{ unpack(arc,stdout,hdr); /* unpack file from archive */
printf("\f"); /* eject the form */
return; /* see? I told you! */
}
strcpy(fix,path); /* note path name template */
if(!(i=rindex(fix,'\\'))) /* find start of name */
if(!(i=rindex(fix,'/')))
if(!(i=rindex(fix,':')))
i = fix-1;
strcpy(i+1,hdr->name); /* replace template with name */
#endif /* $emit(on)# */
if(note)
printf("Extracting file: %s\n",USE);
if(warn)
{ if(f=fopen(USE,"r")) /* see if it exists */
{ fclose(f);
printf("WARNING: File %s already exists!",USE);
while(1)
{ printf(" Overwrite it (y/n)? ");
fgets(buf,STRLEN,stdin);
*buf = toupper(*buf);
if(*buf=='Y' || *buf=='N')
break;
}
if(*buf=='N')
{ printf("%s not extracted.\n",USE);
fseek(arc,hdr->size,1);
return;
}
}
}
if(!(f=fopen(USE,"w")))
{ if(warn)
{ printf("Cannot create %s\n",USE);
nerrs++;
}
fseek(arc,hdr->size,1);
return;
}
/* now unpack the file */
unpack(arc,f,hdr); /* unpack file from archive */
setstamp(f,hdr->date,hdr->time); /* set the proper date/time stamp */
fclose(f); /* all done writing to file */
}
!Funky!Stuff!
echo x - arcio.c
cat > arcio.c << '!Funky!Stuff!'
static char *RCSid = "$Header: arcio.c,v 1.2 86/07/15 07:53:11 turner Exp $";
/*
* $Log: arcio.c,v $
* Hack-attack 1.3 86/12/20 01:23:45 wilhite at usceast.uucp
* Bludgeoned into submission for VAX 11/780 BSD4.2
* (ugly code, but fewer core dumps)
*
* Revision 1.2 86/07/15 07:53:11 turner
*
*
* Revision 1.1 86/06/26 15:00:21 turner
* initial version
*
*
*/
/* ARC - Archive utility - ARCIO
$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =2.30), created on $tag(
TED_DATE DB =02/03/86) at $tag(
TED_TIME DB =22:56:00))#
$undefine(tag)#
$version
(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
By: Thom Henderson
Description:
This file contains the file I/O routines used to manipulate
an archive.
Language:
Computer Innovations Optimizing C86
*/
#include <stdio.h>
#include "arc.h"
INT readhdr(hdr,f) /* read a header from an archive */
struct heads *hdr; /* storage for header */
FILE *f; /* archive to read header from */
{
#if BSD | ST
unsigned char dummy[28];
INT i,j,k;
#endif
char name[FNLEN]; /* filename buffer */
INT try = 0; /* retry counter */
static INT first = 1; /* true only on first read */
if(!f) /* if archive didn't open */
return 0; /* then pretend it's the end */
if(feof(f)) /* if no more data */
return 0; /* then signal end of archive */
if(fgetc(f)!=ARCMARK) /* check archive validity */
{ if(warn)
{ printf("An entry in %s has a bad header.\n",arcname);
nerrs++;
}
while(!feof(f))
{ try++;
if(fgetc(f)==ARCMARK)
{ ungetc(hdrver=fgetc(f),f);
if(hdrver>=0 && hdrver<=ARCVER)
break;
}
}
if(feof(f) && first)
abort("%s is not an archive",arcname);
if(warn)
printf(" %d bytes skipped.\n",try);
if(feof(f))
return 0;
}
hdrver = fgetc(f); /* get header version */
if(hdrver<0)
abort("Invalid header in archive %s",arcname);
if(hdrver==0)
return 0; /* note our end of archive marker */
if(hdrver>ARCVER)
{ fread(name,sizeof(char),FNLEN,f);
printf("I don't know how to handle file %s in archive %s\n",
name,arcname);
printf("I think you need a newer version of ARC.\n");
exit(1);
}
/* amount to read depends on header type */
if(hdrver==1) /* old style is shorter */
{ fread(hdr,sizeof(struct heads)-sizeof(long),1,f);
hdrver = 2; /* convert header to new format */
hdr->length = hdr->size; /* size is same when not packed */
}
else {
#if MSDOS
fread(hdr,sizeof(struct heads),1,f);
#endif
#if BSD | ST
fread(dummy,27,1,f);
for(i=0;i<13;hdr->name[i]=dummy[i],i++);
hdr->size = (long)((dummy[16]<<24) + (dummy[15]<<16) + (dummy[14]<<8)
+ dummy[13]);
hdr->date = (short)((dummy[18]<<8) + dummy[17]);
hdr->time = (short)((dummy[20]<<8) + dummy[19]);
hdr->crc = (short)((dummy[22]<<8) + dummy[21]);
hdr->length = (long)((dummy[26]<<24) + (dummy[25]<<16)
+ (dummy[24]<<8) + dummy[23]);
#endif
}
first = 0; return 1; /* we read something */
}
INT writehdr(hdr,f) /* write a header to an archive */
struct heads *hdr; /* header to write */
FILE *f; /* archive to write to */
{
unsigned char dummy[28];
fputc(ARCMARK,f); /* write out the mark of ARC */
fputc(hdrver,f); /* write out the header version */
if(!hdrver) /* if that's the end */
return; /* then write no more */
#if MSDOS
fwrite(hdr,sizeof(struct heads),1,f);
#endif
#if BSD | ST
/*
* put out the hdr in the brain damaged unaligned half back *sswards
* way HAL does it
*/
fwrite(hdr->name,1,13,f);
fwrite(&hdr->size,sizeof(long),1,f);
fwrite(&hdr->date,sizeof(INT),1,f);
fwrite(&hdr->time,sizeof(INT),1,f);
fwrite(&hdr->crc ,sizeof(INT),1,f);
fwrite(&hdr->length,sizeof(long),1,f);
#endif
/* note the newest file for updating the archive timestamp */
if(hdr->date>arcdate
||(hdr->date==arcdate && hdr->time>arctime))
{ arcdate = hdr->date;
arctime = hdr->time;
}
}
INT filecopy(f,t,size) /* bulk file copier */
FILE *f, *t; /* from, to */
long size; /* number of bytes */
{
INT len; /* length of a given copy */
INT putc_tst();
while(size--) /* while more bytes to move */
putc_tst(fgetc(f),t);
}
INT putc_tst(c,t) /* put a character, with tests */
char c; /* character to output */
FILE *t; /* file to write to */
{
if(t)
#if MSODS | ST
if(fputc(c,t)==EOF)
abort("Write fail (disk full?)");
#endif
#if BSD
/*
* for reasons beyond me BSD unix returns EOF
*/
fputc(c,t);
#endif
}
!Funky!Stuff!
echo x - arclst.c
cat > arclst.c << '!Funky!Stuff!'
static char *RCSid = "$Header: arclst.c,v 1.2 86/07/15 07:53:15 turner Exp $";
/*
* $Log: arclst.c,v $
* Hack-attack 1.3 86/12/20 01:23:45 wilhite at usceast.uucp
* Bludgeoned into submission for VAX 11/780 BSD4.2
* (ugly code, but fewer core dumps)
*
* Revision 1.2 86/07/15 07:53:15 turner
*
*
* Revision 1.1 86/06/26 15:00:23 turner
* initial version
*
*
*/
/* ARC - Archive utility - ARCLST
$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =2.34), created on $tag(
TED_DATE DB =02/03/86) at $tag(
TED_TIME DB =22:56:57))#
$undefine(tag)#
$version
(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
By: Thom Henderson
Description:
This file contains the routines used to list the contents
of an archive.
Language:
Computer Innovations Optimizing C86
*/
#include <stdio.h>
#include "arc.h"
INT lstarc(num,arg) /* list files in archive */
INT num; /* number of arguments */
char *arg[]; /* pointers to arguments */
{
struct heads hdr; /* header data */
INT list; /* true to list a file */
INT did[MAXARG]; /* true when argument was used */
long tnum, tlen, tsize; /* totals */
INT n; /* index */
INT lstfile();
tnum = tlen = tsize = 0; /* reset totals */
for(n=0; n<num; n++) /* for each argument */
did[n] = 0; /* reset usage flag */
rempath(num,arg); /* strip off paths */
if(!kludge)
{ printf("Name Length ");
if(bose)
printf(" Stowage SF Size now");
printf(" Date ");
if(bose)
printf(" Time CRC");
printf("\n");
printf("============ ========");
if(bose)
printf(" ======== ==== ========");
printf(" =========");
if(bose)
printf(" ====== ====");
printf("\n");
}
openarc(0); /* open archive for reading */
if(num) /* if files were named */
{ while(readhdr(&hdr,arc)) /* process all archive files */
{ list = 0; /* reset list flag */
for(n=0; n<num; n++) /* for each template given */
{ if(match(hdr.name,arg[n]))
{ list = 1; /* turn on list flag */
did[n] = 1; /* turn on usage flag */
break; /* stop looking */
}
}
if(list) /* if this file is wanted */
{ if(!kludge)
lstfile(&hdr); /* then tell about it */
tnum++; /* update totals */
tlen += hdr.length;
tsize += hdr.size;
}
fseek(arc,hdr.size,1); /* move to next header */
}
}
else while(readhdr(&hdr,arc)) /* else report on all files */
{ if(!kludge)
lstfile(&hdr);
tnum++; /* update totals */
tlen += hdr.length;
tsize += hdr.size;
fseek(arc,hdr.size,1); /* skip to next header */
}
closearc(0); /* close archive after reading */
if(!kludge)
{ printf(" ==== ========");
if(bose)
printf(" ==== ========");
printf("\n");
}
printf("Total %6ld %8ld ",tnum,tlen);
if(bose)
printf(" %3ld%% %8ld ",100L - (100L*tsize)/tlen,tsize);
printf("\n");
if(note)
{ for(n=0; n<num; n++) /* report unused args */
{ if(!did[n])
{ printf("File not found: %s\n",arg[n]);
nerrs++;
}
}
}
}
static INT lstfile(hdr) /* tell about a file */
struct heads *hdr; /* pointer to header data */
{
INT yr, mo, dy; /* parts of a date */
INT hh, mm, ss; /* parts of a time */
static char *mon[] = /* month abbreviations */
{ "Jan", "Feb", "Mar", "Apr",
"May", "Jun", "Jul", "Aug",
"Sep", "Oct", "Nov", "Dec"
};
yr = (hdr->date >> 9) & 0x7f; /* dissect the date */
mo = (hdr->date >> 5) & 0x0f;
dy = hdr->date & 0x1f;
hh = (hdr->time >> 11) & 0x1f; /* dissect the time */
mm = (hdr->time >> 5) & 0x3f;
ss = (hdr->time & 0x1f) * 2;
printf("%-12s %8ld ",hdr->name,hdr->length);
if(bose)
{ switch(hdrver)
{
case 1:
case 2:
printf(" -- ");
break;
case 3:
printf(" Packed ");
break;
case 4:
printf("Squeezed");
break;
case 5:
case 6:
case 7:
printf("crunched");
break;
case 8:
printf("Crunched");
break;
default:
printf("Unknown!");
}
printf(" %3d%%",100L - (100L*hdr->size)/hdr->length);
printf(" %8ld ",hdr->size);
}
printf("%2d %3s %02d", dy, mon[mo-1], (yr+80)%100);
if(bose)
printf(" %2d:%02d%c %04x",
(hh>12?hh-12:hh), mm, (hh>12?'p':'a'),
(unsigned INT)hdr->crc);
printf("\n");
}
!Funky!Stuff!
echo x - arclzw.c
cat > arclzw.c << '!Funky!Stuff!'
static char *RCSid = "$Header: arclzw.c,v 1.2 86/07/15 07:53:20 turner Exp $";
/*
* $Log: arclzw.c,v $
* Hack-attack 1.3 86/12/20 01:23:45 wilhite at usceast.uucp
* Bludgeoned into submission for VAX 11/780 BSD4.2
* (ugly code, but fewer core dumps)
*
* Revision 1.2 86/07/15 07:53:20 turner
*
*
* Revision 1.1 86/06/26 15:00:26 turner
* initial version
*
*
*/
/* ARC - Archive utility - ARCLZW
$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =1.88), created on $tag(
TED_DATE DB =01/20/86) at $tag(
TED_TIME DB =16:47:04))#
$undefine(tag)#
$version
(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
By: Thom Henderson
Description:
This file contains the routines used to implement Lempel-Zev
data compression, which calls for building a coding table on
the fly. This form of compression is especially good for encoding
files which contain repeated strings, and can often give dramatic
improvements over traditional Huffman SQueezing.
Language:
Computer Innovations Optimizing C86
Programming notes:
In this section I am drawing heavily on the COMPRESS program
from UNIX. The basic method is taken from "A Technique for High
Performance Data Compression", Terry A. Welch, IEEE Computer
Vol 17, No 6 (June 1984), pp 8-19. Also see "Knuth's Fundamental
Algorithms", Donald Knuth, Vol 3, Section 6.4.
As best as I can tell, this method works by tracing down a hash
table of code strings where each entry has the property:
if <string> <char> is in the table
then <string> is in the table.
*/
#include <stdio.h>
#include "arc.h"
/* definitions for older style crunching */
#define FALSE 0
#define TRUE !FALSE
#define TABSIZE 4096
#define NO_PRED 0xFFFF
#define EMPTY 0xFFFF
#define NOT_FND 0xFFFF
static unsigned INT inbuf; /* partial input code storage */
static INT sp; /* current stack pointer */
static struct entry /* string table entry format */
{ char used; /* true when this entry is in use */
unsigned INT next; /* ptr to next in collision list */
unsigned INT predecessor; /* code for preceeding string */
unsigned char follower; /* char following string */
} string_tab[TABSIZE]; /* the code string table */
/* definitions for the new dynamic Lempel-Zev crunching */
#define BITS 12 /* maximum bits per code */
#define HSIZE 5003 /* 80% occupancy */
#define INIT_BITS 9 /* initial number of bits/code */
static INT n_bits; /* number of bits/code */
static INT maxcode; /* maximum code, given n_bits */
#define MAXCODE(n) ((1<<(n)) - 1) /* maximum code calculation */
static INT maxcodemax = 1 << BITS; /* largest possible code (+1) */
static unsigned char buf[BITS]; /* input/output buffer */
static unsigned char lmask[9] = /* left side masks */
{ 0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00 };
static unsigned char rmask[9] = /* right side masks */
{ 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
static INT offset; /* byte offset for code output */
static long in_count; /* length of input */
static long bytes_out; /* length of compressed output */
static unsigned INT ent;
/* To save much memory (which we badly need at this point), we overlay
* the table used by the previous version of Lempel-Zev with those used
* by the new version. Since no two of these routines will be used
* together, we can safely do this. Note that the tables used for Huffman
* squeezing may NOT overlay these, since squeezing and crunching are done
* in parallel.
*/
#if MSODS
static long *htab = string_tab; /* hash code table (crunch) */
#endif
#if BSD | ST
static long htab[HSIZE]; /* hash code table (crunch) */
#endif
static unsigned INT codetab[HSIZE]; /* string code table (crunch) */
static unsigned INT *prefix = codetab; /* prefix code table (uncrunch) */
#if MSDOS
static unsigned char *suffix = string_tab; /* suffix table (uncrunch) */
#endif
#if BSD | ST
static unsigned char suffix[HSIZE]; /* suffix table (uncrunch) */
#endif
static INT free_ent; /* first unused entry */
static INT firstcmp; /* true at start of compression */
static unsigned char stack[HSIZE]; /* local push/pop stack */
/*
* block compression parameters -- after all codes are used up,
* and compression rate changes, start over.
*/
static INT clear_flg;
static long ratio;
#define CHECK_GAP 10000 /* ratio check interval */
static long checkpoint;
/*
* the next two codes should not be changed lightly, as they must not
* lie within the contiguous general code space.
*/
#define FIRST 257 /* first free entry */
#define CLEAR 256 /* table clear output code */
static INT cl_block(t) /* table clear for block compress */
FILE *t; /* our output file */
{
long rat;
INT putcode();
checkpoint = in_count + CHECK_GAP;
if(in_count > 0x007fffff) /* shift will overflow */
{ rat = bytes_out >> 8;
if(rat == 0) /* Don't divide by zero */
rat = 0x7fffffff;
else rat = in_count / rat;
}
else rat = (in_count<<8)/bytes_out;/* 8 fractional bits */
if(rat > ratio)
ratio = rat;
else
{ ratio = 0;
setmem (htab,HSIZE*sizeof(long),0xff);
free_ent = FIRST;
clear_flg = 1;
putcode(CLEAR,t);
}
}
/*****************************************************************
*
* Output a given code.
* Inputs:
* code: A n_bits-bit integer. If == -1, then EOF. This assumes
* that n_bits =< (long)wordsize - 1.
* Outputs:
* Outputs code to the file.
* Assumptions:
* Chars are 8 bits long.
* Algorithm:
* Maintain a BITS character long buffer (so that 8 codes will
* fit in it exactly). When the buffer fills up empty it and start over.
*/
static INT putcode(code,t) /* output a code */
INT code; /* code to output */
FILE *t; /* where to put it */
{
INT r_off = offset; /* right offset */
INT bits = n_bits; /* bits to go */
unsigned char *bp = buf; /* buffer pointer */
INT n; /* index */
if(code >= 0) /* if a real code */
{ /*
* Get to the first byte.
*/
bp += (r_off >> 3);
r_off &= 7;
/*
* Since code is always >= 8 bits, only need to mask the first
* hunk on the left.
*/
*bp = (*bp&rmask[r_off]) | (code<<r_off) & lmask[r_off];
bp++;
bits -= (8 - r_off);
code >>= (8 - r_off);
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if(bits >= 8)
{ *bp++ = code;
code >>= 8;
bits -= 8;
}
/* Last bits. */
if(bits)
*bp = code;
offset += n_bits;
if(offset == (n_bits << 3))
{ bp = buf;
bits = n_bits;
bytes_out += bits;
do
putc_pak(*bp++,t);
while(--bits);
offset = 0;
}
/*
* If the next entry is going to be too big for the code size,
* then increase it, if possible.
*/
if(free_ent>maxcode || clear_flg>0)
{ /*
* Write the whole buffer, because the input side won't
* discover the size increase until after it has read it.
*/
if(offset > 0)
{ bp = buf; /* reset pointer for writing */
bytes_out += n = n_bits;
while(n--)
putc_pak(*bp++,t);
}
offset = 0;
if(clear_flg) /* reset if clearing */
{ maxcode = MAXCODE(n_bits = INIT_BITS);
clear_flg = 0;
}
else /* else use more bits */
{ n_bits++;
if(n_bits == BITS)
maxcode = maxcodemax;
else
maxcode = MAXCODE(n_bits);
}
}
}
else /* dump the buffer on EOF */
{ bytes_out += n = (offset+7) / 8;
if(offset > 0)
while(n--)
putc_pak(*bp++,t);
offset = 0;
}
}
/*****************************************************************
*
* Read one code from the standard input. If EOF, return -1.
* Inputs:
* cmpin
* Outputs:
* code or -1 is returned.
*/
static INT getcode(f) /* get a code */
FILE *f; /* file to get from */
{
INT code;
static INT offset = 0, size = 0;
INT r_off, bits;
unsigned char *bp = buf;
if(clear_flg > 0 || offset >= size || free_ent > maxcode)
{ /*
* If the next entry will be too big for the current code
* size, then we must increase the size. This implies reading
* a new buffer full, too.
*/
if(free_ent > maxcode)
{ n_bits++;
if(n_bits == BITS)
maxcode = maxcodemax; /* won't get any bigger now */
else maxcode = MAXCODE(n_bits);
}
if(clear_flg > 0)
{ maxcode = MAXCODE(n_bits = INIT_BITS);
clear_flg = 0;
}
for(size=0; size<n_bits; size++)
{ if((code=getc_unp(f))==EOF)
break;
else buf[size] = code;
}
if(size <= 0)
return -1; /* end of file */
offset = 0;
/* Round size down to integral number of codes */
size = (size << 3)-(n_bits - 1);
}
r_off = offset;
bits = n_bits;
/*
* Get to the first byte.
*/
bp +=(r_off >> 3);
r_off &= 7;
/* Get first part (low order bits) */
code = (*bp++ >> r_off);
bits -= 8 - r_off;
r_off = 8 - r_off; /* now, offset into code word */
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if(bits >= 8)
{ code |= *bp++ << r_off;
r_off += 8;
bits -= 8;
}
/* high order bits. */
code |= (*bp & rmask[bits]) << r_off;
offset += n_bits;
return code;
}
/*
* compress a file
*
* Algorithm: use open addressing double hashing (no chaining) on the
* prefix code / next character combination. We do a variant of Knuth's
* algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
* secondary probe. Here, the modular division first probe is gives way
* to a faster exclusive-or manipulation. Also do block compression with
* an adaptive reset, where the code table is cleared when the compression
* ratio decreases, but after the table fills. The variable-length output
* codes are re-sized at this point, and a special CLEAR code is generated
* for the decompressor.
*/
INT init_cm(f,t) /* initialize for compression */
FILE *f; /* file we will be compressing */
FILE *t; /* where we will put it */
{
offset = 0;
bytes_out = 1;
clear_flg = 0;
ratio = 0;
in_count = 1;
checkpoint = CHECK_GAP;
maxcode = MAXCODE(n_bits = INIT_BITS);
free_ent = FIRST;
setmem(htab,HSIZE*sizeof(long),0xff);
n_bits = INIT_BITS; /* set starting code size */
putc_pak(BITS,t); /* note our max code length */
firstcmp = 1; /* next byte will be first */
}
INT putc_cm(c,t) /* compress a character */
unsigned char c; /* character to compress */
FILE *t; /* where to put it */
{
static long fcode;
static INT hshift;
INT i;
INT disp;
if(firstcmp) /* special case for first byte */
{ ent = c; /* remember first byte */
hshift = 0;
for(fcode=(long)HSIZE; fcode<65536L; fcode*=2L)
hshift++;
hshift = 8 - hshift; /* set hash code range bund */
firstcmp = 0; /* no longer first */
return;
}
in_count++;
fcode =(long)(((long)c << BITS)+ent);
i = (c<<hshift)^ent; /* xor hashing */
if(htab[i]==fcode)
{ ent = codetab[i];
return;
}
else if(htab[i]<0) /* empty slot */
goto nomatch;
disp = HSIZE - i; /* secondary hash (after G.Knott) */
if(i == 0)
disp = 1;
probe:
if((i -= disp) < 0)
i += HSIZE;
if(htab[i] == fcode)
{ ent = codetab[i];
return;
}
if(htab[i] > 0)
goto probe;
nomatch:
putcode(ent,t);
ent = c;
if(free_ent < maxcodemax)
{ codetab[i] = free_ent++; /* code -> hashtable */
htab[i] = fcode;
}
else if((long)in_count >= checkpoint)
cl_block(t);
}
long pred_cm(t) /* finish compressing a file */
FILE *t; /* where to put it */
{
putcode(ent,t); /* put out the final code */
putcode(-1,t); /* tell output we are done */
return bytes_out; /* say how big it got */
}
/*
* Decompress a file. This routine adapts to the codes in the file
* building the string table on-the-fly; requiring no table to be stored
* in the compressed file. The tables used herein are shared with those of
* the compress() routine. See the definitions above.
*/
INT decomp(f,t) /* decompress a file */
FILE *f; /* file to read codes from */
FILE *t; /* file to write text to */
{
unsigned char *stackp;
INT finchar;
INT code, oldcode, incode;
if((code=getc_unp(f))!=BITS)
abort("File packed with %d bits, I can only handle %d",code,BITS);
n_bits = INIT_BITS; /* set starting code size */
clear_flg = 0;
/*
* As above, initialize the first 256 entries in the table.
*/
maxcode = MAXCODE(n_bits=INIT_BITS);
for(code = 255; code >= 0; code--)
{ prefix[code] = 0;
suffix[code] = (unsigned char)code;
}
free_ent = FIRST;
finchar = oldcode = getcode(f);
if(oldcode == -1) /* EOF already? */
return; /* Get out of here */
putc_ncr((char)finchar,t); /* first code must be 8 bits=char */
stackp = stack;
while((code = getcode(f))> -1)
{ if(code==CLEAR)
{ for(code = 255; code >= 0; code--)
prefix[code] = 0;
clear_flg = 1;
free_ent = FIRST - 1;
if((code=getcode(f))==-1)/* O, untimely death! */
break;
}
incode = code;
/*
* Special case for KwKwK string.
*/
if(code >= free_ent)
{ *stackp++ = finchar;
code = oldcode;
}
/*
* Generate output characters in reverse order
*/
while(code >= 256)
{ *stackp++ = suffix[code];
code = prefix[code];
}
*stackp++ = finchar = suffix[code];
/*
* And put them out in forward order
*/
do
putc_ncr(*--stackp,t);
while(stackp > stack);
/*
* Generate the new entry.
*/
if((code=free_ent) < maxcodemax)
{ prefix[code] = (unsigned short)oldcode;
suffix[code] = finchar;
free_ent = code+1;
}
/*
* Remember previous code.
*/
oldcode = incode;
}
}
/*************************************************************************
* Please note how much trouble it can be to maintain upwards *
* compatibility. All that follows is for the sole purpose of unpacking *
* files which were packed using an older method. *
*************************************************************************/
/* The h() pointer points to the routine to use for calculating a hash
value. It is set in the init routines to point to either of oldh()
or newh().
oldh() calculates a hash value by taking the middle twelve bits
of the square of the key.
newh() works somewhat differently, and was tried because it makes
ARC about 23% faster. This approach was abandoned because dynamic
Lempel-Zev (above) works as well, and packs smaller also. However,
inadvertent release of a developmental copy forces us to leave this in.
*/
static unsigned INT (*h)(); /* pointer to hash function */
static unsigned INT oldh(pred,foll) /* old hash function */
unsigned INT pred; /* code for preceeding string */
unsigned char foll; /* value of following char */
{
long local; /* local hash value */
local = (pred + foll) | 0x0800; /* create the hash key */
local *= local; /* square it */
return (local >> 6) & 0x0FFF; /* return the middle 12 bits */
}
static unsigned INT newh(pred,foll) /* new hash function */
unsigned INT pred; /* code for preceeding string */
unsigned char foll; /* value of following char */
{
return ((pred+foll)*15073)&0xFFF; /* faster hash */
}
/* The eolist() function is used to trace down a list of entries with
duplicate keys until the last duplicate is found.
*/
static unsigned INT eolist(index) /* find last duplicate */
unsigned INT index;
{
INT temp;
while(temp=string_tab[index].next) /* while more duplicates */
index = temp;
return index;
}
/* The hash() routine is used to find a spot in the hash table for a new
entry. It performs a "hash and linear probe" lookup, using h() to
calculate the starting hash value and eolist() to perform the linear
probe. This routine DOES NOT detect a table full condition. That
MUST be checked for elsewhere.
*/
static unsigned INT hash(pred,foll) /* find spot in the string table */
unsigned INT pred; /* code for preceeding string */
unsigned char foll; /* char following string */
{
unsigned INT local, tempnext; /* scratch storage */
struct entry *ep; /* allows faster table handling */
local = (*h)(pred,foll); /* get initial hash value */
if(!string_tab[local].used) /* if that spot is free */
return local; /* then that's all we need */
else /* else a collision has occured */
{ local = eolist(local); /* move to last duplicate */
/* We must find an empty spot. We start looking 101 places
down the table from the last duplicate.
*/
tempnext = (local+101) & 0x0FFF;
ep = &string_tab[tempnext]; /* initialize pointer */
while(ep->used) /* while empty spot not found */
{ if(++tempnext==TABSIZE) /* if we are at the end */
{ tempnext = 0; /* wrap to beginning of table*/
ep = string_tab;
}
else ++ep; /* point to next element in table */
}
/* local still has the pointer to the last duplicate, while
tempnext has the pointer to the spot we found. We use
this to maintain the chain of pointers to duplicates.
*/
string_tab[local].next = tempnext;
return tempnext;
}
}
/* The unhash() function is used to search the hash table for a given key.
Like hash(), it performs a hash and linear probe search. It returns
either the number of the entry (if found) or NOT_FND (if not found).
*/
static unsigned INT unhash(pred,foll) /* search string table for a key */
unsigned INT pred; /* code of preceeding string */
unsigned char foll; /* character following string */
{
unsigned INT local, offset; /* scratch storage */
struct entry *ep; /* this speeds up access */
local = (*h)(pred,foll); /* initial hash */
while(1)
{ ep = &string_tab[local]; /* speed up table access */
if((ep->predecessor==pred) && (ep->follower==foll))
return local; /* we have a match */
if(!ep->next) /* if no more duplicates */
return NOT_FND; /* then key is not listed */
local = ep->next; /* move on to next duplicate */
}
}
/* The init_tab() routine is used to initialize our hash table.
You realize, of course, that "initialize" is a complete misnomer.
*/
static INT init_tab() /* set ground state in hash table */
{
unsigned INT i; /* table index */
INT upd_tab();
setmem((char *)string_tab,sizeof(string_tab),0);
for(i=0; i<256; i++) /* list all single byte strings */
upd_tab(NO_PRED,i);
inbuf = EMPTY; /* nothing is in our buffer */
}
/* The upd_tab routine is used to add a new entry to the string table.
As previously stated, no checks are made to ensure that the table
has any room. This must be done elsewhere.
*/
INT upd_tab(pred,foll) /* add an entry to the table */
unsigned INT pred; /* code for preceeding string */
unsigned INT foll; /* character which follows string */
{
struct entry *ep; /* pointer to current entry */
/* calculate offset just once */
ep = &string_tab[hash(pred,foll)];
ep->used = TRUE; /* this spot is now in use */
ep->next = 0; /* no duplicates after this yet */
ep->predecessor = pred; /* note code of preceeding string */
ep->follower = foll; /* note char after string */
}
/* This algorithm encoded a file into twelve bit strings (three nybbles).
The gocode() routine is used to read these strings a byte (or two)
at a time.
*/
static INT gocode(fd) /* read in a twelve bit code */
FILE *fd; /* file to get code from */
{
unsigned INT localbuf, returnval;
if(inbuf==EMPTY) /* if on a code boundary */
{ if((localbuf=getc_unp(fd))==EOF) /* get start of next code */
return EOF; /* pass back end of file status */
localbuf &= 0xFF; /* mask down to true byte value */
if((inbuf=getc_unp(fd))==EOF) /* get end of code, start of next */
return EOF; /* this should never happen */
inbuf &= 0xFF; /* mask down to true byte value */
returnval = ((localbuf<<4)&0xFF0) + ((inbuf>>4)&0x00F);
inbuf &= 0x000F; /* leave partial code pending */
}
else /* buffer contains first nybble */
{ if((localbuf=getc_unp(fd))==EOF)
return EOF;
localbuf &= 0xFF;
returnval = localbuf + ((inbuf<<8)&0xF00);
inbuf = EMPTY; /* note no hanging nybbles */
}
return returnval; /* pass back assembled code */
}
static INT push(c) /* push char onto stack */
INT c; /* character to push */
{
stack[sp] = ((char) c); /* coerce integer into a char */
if(++sp >= TABSIZE)
abort("Stack overflow\n");
}
static INT pop() /* pop character from stack */
{
if(sp>0)
return ((INT) stack[--sp]); /* leave ptr at next empty slot */
else return EMPTY;
}
/***** LEMPEL-ZEV DECOMPRESSION *****/
static INT code_count; /* needed to detect table full */
static unsigned INT code; /* where we are so far */
static INT firstc; /* true only on first character */
INT init_ucr(new) /* get set for uncrunching */
INT new; /* true to use new hash function */
{
if(new) /* set proper hash function */
h = newh;
else h = oldh;
sp = 0; /* clear out the stack */
init_tab(); /* set up atomic code definitions */
code_count = TABSIZE - 256; /* note space left in table */
firstc = 1; /* true only on first code */
}
INT getc_ucr(f) /* get next uncrunched byte */
FILE *f; /* file containing crunched data */
{
unsigned INT c; /* a character of input */
INT code, newcode;
static INT oldcode, finchar;
struct entry *ep; /* allows faster table handling */
if(firstc) /* first code is always known */
{ firstc = FALSE; /* but next will not be first */
oldcode = gocode(f);
return finchar = string_tab[oldcode].follower;
}
if(!sp) /* if stack is empty */
{ if((code=newcode=gocode(f))==EOF)
return EOF;
ep = &string_tab[code]; /* initialize pointer */
if(!ep->used) /* if code isn't known */
{ code = oldcode;
ep = &string_tab[code]; /* re-initialize pointer */
push(finchar);
}
while(ep->predecessor!=NO_PRED)
{ push(ep->follower); /* decode string backwards */
code = ep->predecessor;
ep = &string_tab[code];
}
push(finchar=ep->follower); /* save first character also */
/* The above loop will terminate, one way or another,
with string_tab[code].follower equal to the first
character in the string.
*/
if(code_count) /* if room left in string table */
{ upd_tab(oldcode,finchar);
--code_count;
}
oldcode = newcode;
}
return pop(); /* return saved character */
}
!Funky!Stuff!
echo x - arcm.h
cat > arcm.h << '!Funky!Stuff!'
/*
* $Header: arcm.h,v 1.2 86/07/15 07:53:40 turner Exp $
*/
/*
* $Log: arcm.h,v $
* Revision 1.2 86/07/15 07:53:40 turner
*
*
* Revision 1.1 86/06/26 15:01:25 turner
* initial version
*
*
*/
/*
*
* ARC archive utility - standard MACRO insert file.
*
* parameters:
*
*/
#define ARCMARK 26 /* special archive marker */
#define ARCVER 8 /* archive header version code */
#define STRLEN 100 /* system standard string length */
#define FNLEN 13 /* file name length */
#define MAXARG 25 /* maximum number of arguments */
#define ARC 1
#define XARC 0
!Funky!Stuff!
echo x - arcmatch.c
cat > arcmatch.c << '!Funky!Stuff!'
static char *RCSid = "$Header: arcmatch.c,v 1.2 86/07/15 07:53:42 turner Exp $";
/*
* $Log: arcmatch.c,v $
* Hack-attack 1.3 86/12/20 01:23:45 wilhite at usceast.uucp
* Bludgeoned into submission for VAX 11/780 BSD4.2
* (ugly code, but fewer core dumps)
*
* Revision 1.2 86/07/15 07:53:42 turner
*
*
* Revision 1.1 86/06/26 15:00:34 turner
* initial version
*
*
*/
/* ARC - Archive utility - ARCMATCH
$define(tag,$$segment(@1,$$index(@1,=)+1))#
$define(version,Version $tag(
TED_VERSION DB =2.17), created on $tag(
TED_DATE DB =12/17/85) at $tag(
TED_TIME DB =20:32:18))#
$undefine(tag)#
$version
(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
By: Thom Henderson
Description:
This file contains service routines needed to maintain an archive.
Language:
Computer Innovations Optimizing C86
*/
#include <stdio.h>
#include "arc.h"
INT match(n,t) /* test name against template */
char *n; /* name to test */
char *t; /* template to test against */
{
#if MSDOS
upper(n); upper(t); /* avoid case problems */
#endif
/* first match name part */
while((*n && *n!='.') || (*t && *t!='.'))
{ if(*n!=*t && *t!='?') /* match fail? */
{ if(*t!='*') /* wildcard fail? */
return 0; /* then no match */
else /* else jump over wildcard */
{ while(*n && *n!='.')
n++;
while(*t && *t!='.')
t++;
break; /* name part matches wildcard */
}
}
else /* match good for this char */
{ n++; /* advance to next char */
t++;
}
}
if(*n && *n=='.') n++; /* skip extension delimiters */
if(*t && *t=='.') t++;
/* now match name part */
while(*n || *t)
{ if(*n!=*t && *t!='?') /* match fail? */
{ if(*t!='*') /* wildcard fail? */
return 0; /* then no match */
else return 1; /* else good enough */
}
else /* match good for this char */
{ n++; /* advance to next char */
t++;
}
}
return 1; /* match worked */
}
INT rempath(nargs,arg) /* remove paths from filenames */
INT nargs; /* number of names */
char *arg[]; /* pointers to names */
{
char *i, *rindex(); /* string index, reverse indexer */
INT n; /* index */
for(n=0; n<nargs; n++) /* for each supplied name */
{ if(!(i=rindex(arg[n],'\\'))) /* search for end of path */
if(!(i=rindex(arg[n],'/')))
i=rindex(arg[n],':');
if(i) /* if path was found */
arg[n] = i+1; /* then skip it */
}
}
!Funky!Stuff!
echo x - arcmisc.c
cat > arcmisc.c << '!Funky!Stuff!'
#include <stdio.h>
#include "arc.h"
/* split up a file name (subroutine for makefnam)
*
* Hack-attack 1.3 86/12/20 01:23:45 wilhite at usceast.uucp
* Bludgeoned into submission for VAX 11/780 BSD4.2
* (ugly code, but fewer core dumps)
*/
static INT _makefn(source,dest)
unsigned char *source;
unsigned char *dest;
{
INT j;
setmem (dest, 17, 0); /* clear result field */
if (strlen (source) > 1 && source[1] == ':')
for (j = 0; j < 2;)
dest[j++] = *source++;
for (j = 3; *source && *source != '.'; ++source)
if (j < 11)
dest[j++] = *source;
for (j = 12; *source; ++source)
if (j < 16)
dest[j++] = *source;
}
/* make a file name using a template
*/
char *makefnam(rawfn,template,result)
unsigned char *rawfn; /* the original file name */
unsigned char *template; /* the template data */
unsigned char *result; /* where to place the result */
{
unsigned char et[17],er[17];
_makefn(template,et);
_makefn(rawfn,er);
*result=0; /* assure no data */
strcat(result,er[0]?er:et);
strcat(result,er[3]?er+3:et+3);
strcat(result,er[12]?er+12:et+12);
return ((char *)&result[0]);
}
INT freedir(dirs)
register struct direct **dirs;
{
register INT ii;
if(dirs == (struct direct **)0)
return(-1);
for(ii = 0; dirs[ii] != (struct direct *)0; ii++)
free(dirs[ii]);
free(dirs);
return(0);
}
#if MSDOS
#include <dir.h>
INT alphasort(dirptr1, dirptr2)
struct direct **dirptr1, **dirptr2;
{
return(strcmp((*dirptr1)->d_name, (*dirptr2)->d_name));
}
#endif
!Funky!Stuff!
exit 0
--
---------------------
Robert Wilhite
akgua!usceast!wilhite
More information about the Comp.sources.unix
mailing list