code for reading from .Z files (with free zcat :-) )
Graham Toal
gtoal at tharr.UUCP
Thu Nov 15 11:59:40 AEST 1990
Archive-name: zlib.shr
This posting consists of a set of routines which roughly simulate fopen,
fgetc, fgets, and fclose. The difference between these and the originals is
that these will read data from a .Z compressed file, decompressing it on the
fly. It does *not* uses pipes, processes, or intermediate files. This makes
it useful to add to any programs which read large text files sequentially.
An example of this might be a version of LaTeX which read its .sty files in
compressed form -- it satisfies the following criteria: 1) the files are read
sequentially; 2) the files are read from *much* more often than they are
written to.
I passed this code around a couple of years back, and forgot about it since.
I recently had to resurrect it, and have taken the chance to fix a couple of
bugs which had surfaced in the mean time, and to port it to MSDOS. (Of course
it still works on Unix or any standard ANSI C system)
I include as a test program a simple version of zcat; someone was asking
recently for a small uncompress program; well this is it.
The source is heavily based on the original compress. I've removed as much
unneccesary code as I could get away with, and simplified many expressions to
get them through the dismal MSDOS compilers.
Any comments/bug reports to me; Graham Toal <gtoal at ed.ac.uk>
#!/bin/sh-----cut here-----cut here-----cut here-----cut here-----
# shar: Shell Archiver
# Run the following text with /bin/sh to create:
# zcat.c
# zlib.h
# zlib.c
cat - << \SHAR_EOF > zcat.c
#include <stdio.h>
#include "zlib.h"
/*#include "zlib.c"*/ /* Written so it can be either included or linked in */
/* This part is optional... you probably wouldn't do this in real life */
#define FILE ZFILE
#define fgetc(in) zfgetc(in)
#define fopen(f, m) zfopen(f, m)
#define fclose(f) zfclose(f)
#ifndef __STDC__
int main(argc, argv)
int argc;
char **argv;
#else
int main(int argc, char **argv)
#endif
{
FILE *in;
int i, c;
if (argc == 1) {
in = zfilter(stdin);
for (c = fgetc(in); c != EOF; putchar(c), c = fgetc(in)) ;
fclose(in);
} else if (argc > 1) {
for (i = 1; i < argc; i++) {
in = fopen(argv[i], "r");
if (in != NULL) {
for (c = fgetc(in); c != EOF; putchar(c), c = fgetc(in)) ;
fclose(in);
} else {
fprintf(stderr, "%s: cannot open %s\n", argv[0], argv[i]);
}
}
}
return(0);
}
SHAR_EOF
cat - << \SHAR_EOF > zlib.h
#ifndef _ZLIB_H
#define _ZLIB_H 1
#ifdef MSDOS
#define PC_HUGE huge /* Microsoft C and contemptibles */
#else
#define PC_HUGE
#endif
#define ZEXT ".Z"
#ifdef __arm
#undef ZEXT
#define ZEXT "-z"
#endif
typedef struct zfiletype {
#define Z_BITS 16
#define Z_MAXBUF 256
FILE *file;
int flags;
int n_bits; /* number of bits/code */
int maxbits; /* user settable max # bits/code */
long maxcode; /* maximum code, given n_bits */
long free_ent; /* first unused entry */
int block_compress;
int clear_flg;
long stackp;
long finchar;
long code, oldcode, incode;
int offset, size;
unsigned char buf[Z_BITS]; /* Passed to getcode */
unsigned char PC_HUGE *tab_suffixof; /* There is a flag bit to say whether */
long PC_HUGE *tab_prefixof; /* these have been allocated. */
int init;
int bufput, bufget, bufend;
unsigned char buff[Z_MAXBUF];
int c1, c2;
int zeof;
} ZFILE;
#ifndef __STDC__
ZFILE *zfopen(/* char *fileptr, char *how */);
void zfclose(/* ZFILE *z */);
ZFILE *zfilter(/* FILE *f */);
int zfgetc(/* ZFILE *z */);
int zfeof(/* ZFILE *z */);
char *zfgets(/* char *line, int len, ZFILE *zfp */);
#else
ZFILE *zfopen(char *fileptr, char *how);
void zfclose(ZFILE *z);
ZFILE *zfilter(FILE *f);
int zfgetc(ZFILE *z);
int zfeof(ZFILE *z);
char *zfgets(char *line, int len, ZFILE *zfp);
#endif /* Not __STDC__ */
#endif
SHAR_EOF
cat - << \SHAR_EOF > zlib.c
/*#define MAIN*/
/*int debug = 1;*/
/*#define DEBUG 1*/
/* These wondrous debugging macros helped me find the nasty bug which
only manifested itself on msdos -- stackp has to be a long on msdos
because the array it is indexing is 'huge' ... */
#ifdef DEBUG
#define TRACT(lev, stmnt) \
if (lev <= debug) fprintf(stderr, "%d: %s\n", __LINE__, #stmnt);
#define TRACE(lev, stmnt) \
if (lev <= debug) fprintf(stderr, "%d: %s\n", __LINE__, #stmnt); stmnt
#define TRACA(lev, stmnt) \
stmnt; if (lev <= debug) fprintf(stderr, "%d: %s\n", __LINE__, #stmnt);
#define TRACL(lev, var) \
if (lev <= debug) fprintf(stderr, "%d: %s <- %ld\n", __LINE__, #var, var);
#else
#define TRACT(lev, stmnt)
#define TRACE(lev, stmnt) stmnt
#define TRACA(lev, stmnt) stmnt
#define TRACL(lev, var)
#endif
/*
*
* Originally:
*
* compress.c - File compression ala IEEE Computer, June 1984.
*
* Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
* Jim McKie (decvax!mcvax!jim)
* Steve Davies (decvax!vax135!petsd!peora!srd)
* Ken Turkowski (decvax!decwrl!turtlevax!ken)
* James A. Woods (decvax!ihnp4!ames!jaw)
* Joe Orost (decvax!vax135!petsd!joe)
*
* $Header: zlib.c,v 4.1 90/11/12 14:52:24 gtoal Release $
*
* Graham Toal, 3rd September 1988. My changes released to public domain.
* Updated Nov 90.
*
* The original decompress has been restructured so that data can be
* fetched on demand a byte at a time. This lets it be used as a filter
* for programs which read large data files - you do not need the disk
* space to decompress the input files first.
*
* (Incidentally, programs reading data off floppies will be speeded up
* because decompression is always faster than the equivalent amount
* of disk I/O).
*
* This implementation supplies 'z' versions of fopen, fputc, feof and fclose
* to be used as direct substitutes for the originals; it would be cleaner
* and more transparent if the decompress filter were hidden under the
* real stdio procedures. An extra call zfilter() is supplied to convert
* an already-opened stream into a z-stream: see the example at the end
* of this file.
*
* If a file opened by zfopen() was not compressed, the files contents are
* still recovered correctly at the low expense of an extra procedure call
* per byte. This makes the routines more generally usable - they can be
* left in production programs which can be speeded up in the field by
* compressing selected input files(*); also, files can be compressed or
* not selectively depending on whether the compression makes them
* smaller or not - code accessing the files does not need to know.
*
* [(*) reading from a compressed file off floppy disk is faster than
* reading from an uncompressed file. This probably isn't true of
* hard disks though.]
*
* BUGS: Opening a file "r" will not do CR/LF processing on computers with
* this file structure.
*/
#include <stdio.h>
#include <string.h>
#ifdef __STDC__
#include <stdlib.h>
#else
#define size_t int
#endif
#include <ctype.h>
#ifdef MSDOS
#include <malloc.h>
#endif
#ifndef min
#define min(a,b) ((a>b) ? b : a)
#endif
#define HSIZE 69001L /* 95% occupancy */
/*
* the next two codes should not be changed lightly, as they must not
* lie within the contiguous general code space.
*/
#define FIRST 257L /* first free entry */
#define CLEAR 256L /* table clear output code */
#define BIT_MASK 0x1f
#define BLOCK_MASK 0x80
#define INIT_BITS 9 /* initial number of bits/code */
#define CHECK_GAP 10000L/* ratio check interval */
#include "zlib.h"
#define NOT_COMPRESSED 1
#define ALLOCATED 2
#ifndef __STDC__
static void decompress_more( /* register ZFILE *z */ );
static long getcode( /* register ZFILE *z */ );
#else
static void decompress_more(register ZFILE *z);
static long getcode(register ZFILE *z);
#endif
#ifndef __STDC__
ZFILE *zfopen(fileptr, how)
char *fileptr;
char *how;
#else
ZFILE *zfopen(char *fileptr, char *how)
#endif
{
register ZFILE *z;
z = (ZFILE *) malloc(sizeof(ZFILE));
z->flags = 0;
z->maxbits = Z_BITS; /* user settable max # bits/code */
z->free_ent = 0; /* first unused entry */
z->block_compress = BLOCK_MASK;
z->clear_flg = 0;
z->init = 0;
z->zeof = (0 != 0);
z->c1 = EOF;
z->c2 = EOF;
z->bufput = 0;
z->bufget = 0;
z->bufend = Z_MAXBUF - 1;
z->maxbits = Z_BITS; /* user settable max # bits/code */
/* Open input file */
if (*how == 'r') {
z->file = fopen(fileptr, "rb");
if (z->file == NULL) {
char tempfname[256];
strcpy(tempfname, fileptr);
strcat(tempfname, ZEXT);
z->file = fopen(tempfname, "rb");
}
} else {
/* No compressed output yet, if ever... */
/* Compress the file explicitly once it has been written */
z->file = fopen(fileptr, how);
z->flags |= NOT_COMPRESSED;
}
if (z->file == NULL) {
free(z);
z = NULL;
}
/* Check the magic number */
if ((z != NULL)
&& ((fgetc(z->file) != 0x1F) || (fgetc(z->file) != 0x9D))) {
z->flags |= NOT_COMPRESSED;
fclose(z->file);
z->file = fopen(fileptr, how);
if (z->file == NULL) {
free(z);
z = NULL;
}
}
if ((z == NULL) || ((z->flags & NOT_COMPRESSED) != 0))
return (z);
z->maxbits = fgetc(z->file); /* set -b from file */
z->block_compress = z->maxbits & BLOCK_MASK;
z->maxbits &= BIT_MASK;
if (z->maxbits > Z_BITS) {
fprintf(stderr,
"%s: compressed with %d bits; decompress can only handle %d bits\n",
fileptr, z->maxbits, Z_BITS);
exit(0);
}
return (z);
}
#ifndef __STDC__
ZFILE *zfilter(f)
FILE *f;
#else
ZFILE *zfilter(FILE *f)
#endif
{
register ZFILE *z;
z = (ZFILE *) malloc(sizeof(ZFILE));
z->flags = 0;
z->maxbits = Z_BITS; /* user settable max # bits/code */
z->free_ent = 0; /* first unused entry */
z->block_compress = BLOCK_MASK;
z->clear_flg = 0;
z->init = 0;
z->zeof = (0 != 0);
z->c1 = EOF;
z->c2 = EOF;
z->bufput = 0;
z->bufget = 0;
z->bufend = Z_MAXBUF - 1;
z->maxbits = Z_BITS; /* user settable max # bits/code */
/* Open input file */
z->file = f;
if (z->file == NULL) {
free(z);
z = NULL;
}
/* Check the magic number */
if (z != NULL) {
z->c1 = fgetc(z->file);
z->c2 = fgetc(z->file);
if ((z->c1 != 0x1F) || (z->c2 != 0x9D)) {
z->flags |= NOT_COMPRESSED;
}
}
if ((z == NULL) || ((z->flags & NOT_COMPRESSED) != 0))
return (z);
z->maxbits = fgetc(z->file); /* set -b from file */
z->block_compress = z->maxbits & BLOCK_MASK;
z->maxbits &= BIT_MASK;
if (z->maxbits > Z_BITS) {
fprintf(stderr,
"stdin compressed with %d bits; decompress can only handle %d bits\n",
z->maxbits, Z_BITS);
exit(0);
}
return (z);
}
#ifndef __STDC__
int zfgetc(z)
ZFILE *z;
#else
int zfgetc(ZFILE *z)
#endif
{
int c;
/*
If buffer empty, and not end-of-file, call decompress_more(); return
next in buffer.
*/
if ((z->flags & NOT_COMPRESSED) != 0) {
if ((c = z->c1) >= 0) {
z->c1 = z->c2;
z->c2 = EOF;
return (c);
}
return (fgetc(z->file));
}
if ((z->bufget == z->bufput) && (!z->zeof)) {
decompress_more(z);
}
z->zeof = (z->bufput == z->bufget);
if (z->zeof) {
if ((z->flags & ALLOCATED) != 0) {
#ifdef MSDOS
hfree(z->tab_suffixof);
hfree(z->tab_prefixof);
#else
free(z->tab_suffixof);
free(z->tab_prefixof);
#endif
z->flags &= (~ALLOCATED);
}
return (EOF);
}
c = z->buff[z->bufget];
z->bufget++;
return (c);
}
#ifndef __STDC__
int zfeof(z)
ZFILE *z;
#else
int zfeof(ZFILE *z)
#endif
{
if ((z->flags & NOT_COMPRESSED) != 0) {
if (z->c1 != EOF) {
return (0 != 0);
}
return (feof(z->file));
}
return (z->zeof);
}
#ifndef __STDC__
void zfclose(z)
ZFILE *z;
#else
void zfclose(ZFILE *z)
#endif
{
if (z == 0)
return;
if (z->zeof) {
if ((z->flags & ALLOCATED) != 0) {
#ifdef MSDOS
hfree(z->tab_suffixof);
hfree(z->tab_prefixof);
#else
free(z->tab_suffixof);
free(z->tab_prefixof);
#endif
z->flags &= (~ALLOCATED);
}
}
free(z);
}
#ifndef __STDC__
char *zfgets(line, len, zfp)
char *line;
int len;
ZFILE *zfp;
#else
char *zfgets(char *line, int len, ZFILE *zfp)
#endif
{
/* I *hope* this is what fgets does - I only added it
here when I came across a program that needed it; I'm
including the '\n' in the string. */
int c, pos = 0;
for (;;) {
c = zfgetc(zfp);
if (c == EOF)
return (NULL);
c &= 255;
line[pos] = (char) c;
if (pos + 1 == len) /* Too long! */
break;
pos++;
if (c == '\n')
break;
}
line[pos] = '\0';
return (line);
}
#ifndef __STDC__
static void decompress_more(z)
register ZFILE *z;
#else
static void decompress_more(register ZFILE *z)
#endif
{
z->bufput = 0;
z->bufget = 0;
if (z->init != 0)
goto resume;
z->init = 1;
z->offset = 0;
z->size = 0;
#ifdef MSDOS
z->tab_suffixof =
(unsigned char PC_HUGE *) halloc(HSIZE, sizeof(unsigned char));
z->tab_prefixof =
(long PC_HUGE *) halloc(HSIZE, sizeof(long));
#else
z->tab_suffixof =
(unsigned char *) malloc((size_t) HSIZE * sizeof(unsigned char));
z->tab_prefixof = (long *) malloc((size_t) HSIZE * sizeof(long));
#endif
z->flags |= ALLOCATED;
z->n_bits = INIT_BITS;
z->maxcode = ((1L << (z->n_bits)) - 1L);
for (z->code = 255L; z->code >= 0L; z->code--) {
z->tab_prefixof[z->code] = 0L;
z->tab_suffixof[z->code] = (unsigned char) z->code;
}
z->free_ent = ((z->block_compress) ? FIRST : 256L);
z->finchar = z->oldcode = getcode(z);
if (z->oldcode == -1L)
return; /* EOF already? */
if (z->finchar < 0L || z->finchar >= 256L)
fprintf(stderr, "****\n");
z->buff[z->bufput] = (char) (z->finchar & 0xff);
z->bufput++;
z->stackp = 1L << Z_BITS; /* The 1L is for DOS huge arrays */
while ((z->code = getcode(z)) != EOF) {
if ((z->code == CLEAR) && z->block_compress) {
for (z->code = 255; z->code >= 0; z->code--)
z->tab_prefixof[z->code] = 0;
z->clear_flg = 1;
z->free_ent = FIRST - 1;
if ((z->code = getcode(z)) == EOF)
break; /* O, untimely death! */
} /* if */
z->incode = z->code;
if (z->code >= z->free_ent) {
z->tab_suffixof[z->stackp] = (unsigned char) z->finchar;
z->stackp += 1L;
z->code = z->oldcode;
}
while (z->code >= 256L) {
z->tab_suffixof[z->stackp] = z->tab_suffixof[z->code];
z->stackp += 1L;
z->code = z->tab_prefixof[z->code];
}
z->finchar = z->tab_suffixof[z->code];
z->tab_suffixof[z->stackp] = (unsigned char) z->finchar;
z->stackp += 1L;
do {
long tmp;
z->stackp -= 1L;
tmp = z->tab_suffixof[z->stackp];
z->buff[z->bufput++] = (unsigned char) (tmp & 255L);
if (z->bufput == z->bufend) {
return; /* Logically a setjmp/longjump, but this is
more portable */
resume:; /* jumped to here -- is jumping into a loop
safe? */
/* - or should I use jumps for the loop too? */
} /* if */
} while (z->stackp > (1L << Z_BITS));
/* ^ This is why I changed stackp from a pointer. */
/* Pointer comparisons can be dubious... */
if ((z->code = z->free_ent) < (1L << z->maxbits)) {
z->tab_prefixof[z->code] = z->oldcode;
z->tab_suffixof[z->code] = (unsigned char) z->finchar;
z->free_ent = z->code + 1;
}
z->oldcode = z->incode;
} /* while */
} /* decompress more */
static unsigned char rmask[9] =
{0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
#ifndef __STDC__
static long getcode(z)
register ZFILE *z;
#else
static long getcode(register ZFILE *z)
#endif
{ /* Should be int!!! */
register long code;
register long r_off, bits;
register int bp;
bp = 0;
if (z->clear_flg != 0 ||
z->offset >= z->size ||
z->free_ent > z->maxcode) {
if (z->free_ent > z->maxcode) {
z->n_bits++;
if (z->n_bits == z->maxbits) {
z->maxcode = (1L << z->maxbits); /* won't get any bigger now */
} else {
z->maxcode = ((1L << (z->n_bits)) - 1L);
}
}
if (z->clear_flg != 0) {
z->n_bits = INIT_BITS;
z->maxcode = ((1L << (z->n_bits)) - 1L);
z->clear_flg = 0;
}
z->size = fread(z->buf, 1, (size_t) z->n_bits, z->file);
if (z->size <= 0) {
fclose(z->file);
return (EOF); /* end of file */
}
z->offset = 0;
z->size = (z->size << 3) - (z->n_bits - 1);
}
r_off = z->offset;
bits = z->n_bits;
bp = bp + ((int) r_off >> 3);
r_off = r_off & 7;
code = ((long) z->buf[bp++] >> r_off);
bits = bits - 8 + r_off;
r_off = 8 - r_off; /* now, offset into code word */
if (bits >= 8) {
code = code | ((long) z->buf[bp++] << r_off);
r_off = r_off + 8;
bits = bits - 8;
}
code = code
| ((long) ((long) (z->buf[bp]) & (long) rmask[bits]) << (long) r_off);
z->offset = z->offset + z->n_bits;
return (code);
}
#ifdef MAIN
/* This part is optional... */
#define FILE ZFILE
#define fgetc(in) zfgetc(in)
#define fopen(f, m) zfopen(f, m)
#define fclose(f) zfclose(f)
#ifndef __STDC__
int main(argc, argv)
int argc;
char **argv;
#else
int main(int argc, char **argv)
#endif
{
FILE *in;
int i, c;
if (argc == 1) {
in = zfilter(stdin);
for (c = fgetc(in); c != EOF; fputc(c, stderr), c = fgetc(in));
zfclose(in);
} else if (argc > 1) {
for (i = 1; i < argc; i++) {
in = fopen(argv[i], "r");
if (in != NULL) {
for (c = fgetc(in); c != EOF; fputc(c, stderr), c = fgetc(in));
fclose(in);
} else {
fprintf(stderr, "%s: cannot open %s\n", argv[0], argv[i]);
}
}
}
return (0);
}
#endif
SHAR_EOF
--
(* Posted from tharr.uucp - Public Access Unix - +44 (234) 261804 *)
More information about the Alt.sources
mailing list