v20i070: Freeze/Melt compression program, Patch03
Leonid A. Broukhis
leo at s514.ipmce.su
Fri Jun 28 03:27:27 AEST 1991
Submitted-by: Leonid A. Broukhis <leo at s514.ipmce.su>
Posting-number: Volume 20, Issue 70
Archive-name: freeze/patch03
Patch-To: freeze: Volume 17, Issue 67-68
Here is the 3rd patch for Freeze/melt program (bug fixes, some
compression speedup, etc.) but I think that Brad Templeton's
compression program will bury my Freeze out. This patch
is probably last (if no more bugs will be found).
------------------ cut here ---------------------
*** ../distribution/README Wed Mar 27 19:44:41 1991
--- README Tue Jun 25 13:05:32 1991
***************
*** 151,157 ****
Compression rate is *INDEPENDENT* of the hash table size, but may vary
with different static Huffman tables. It is about 2% worse than the same
! of ARJ and LHA.
My aim is not the maximum compression, the same range is enough. :-)
--- 151,158 ----
Compression rate is *INDEPENDENT* of the hash table size, but may vary
with different static Huffman tables. It is about 2% worse than the same
! of ARJ 1.00 and LHA 2.05, but, unfortunately, ARJ 2.00 beats Freeze
! on 8%.
My aim is not the maximum compression, the same range is enough. :-)
***************
*** 166,168 ****
--- 167,172 ----
Encode/Decode_Char/Position), so if you want the speed and/or compression
rate `a la vogue' you may replace the low-level routines with the homebrew
(f. ex.) ones and enjoy the results.
+
+ (I tried to implement splay trees instead of Huffman ones and instead of
+ static table for position information, but this gives nothing.)
*** ../distribution/bitio.c Mon May 13 17:05:09 1991
--- bitio.c Thu Jun 13 21:15:14 1991
***************
*** 6,15 ****
short GetByte ()
{
! register u_short dx = getbuf;
register u_short c;
! if (getlen <= 8) {
c = getchar ();
if ((short)c < 0) {
--- 6,15 ----
short GetByte ()
{
! register u_short dx = bitbuf;
register u_short c;
! if (bitlen <= 8) {
c = getchar ();
if ((short)c < 0) {
***************
*** 22,32 ****
corrupt_flag = 1;
c = 0;
}
! dx |= c << (8 - getlen);
! getlen += 8;
}
! getbuf = dx << 8;
! getlen -= 8;
return (dx >> 8) & 0xff;
}
--- 22,32 ----
corrupt_flag = 1;
c = 0;
}
! dx |= c << (8 - bitlen);
! bitlen += 8;
}
! bitbuf = dx << 8;
! bitlen -= 8;
return (dx >> 8) & 0xff;
}
***************
*** 36,42 ****
short GetNBits (n)
register u_short n;
{
! register u_short dx = getbuf;
register u_short c;
static u_short mask[17] = {
--- 36,42 ----
short GetNBits (n)
register u_short n;
{
! register u_short dx = bitbuf;
register u_short c;
static u_short mask[17] = {
***************
*** 50,56 ****
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
};
! if (getlen <= 8)
{
c = getchar ();
if ((short)c < 0) {
--- 50,56 ----
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
};
! if (bitlen <= 8)
{
c = getchar ();
if ((short)c < 0) {
***************
*** 59,69 ****
corrupt_flag = 1;
c = 0;
}
! dx |= c << (8 - getlen);
! getlen += 8;
}
! getbuf = dx << n;
! getlen -= n;
return (dx >> shift[n]) & mask[n];
}
--- 59,69 ----
corrupt_flag = 1;
c = 0;
}
! dx |= c << (8 - bitlen);
! bitlen += 8;
}
! bitbuf = dx << n;
! bitlen -= n;
return (dx >> shift[n]) & mask[n];
}
***************
*** 72,79 ****
register u_short l;
u_short c;
{
! register u_short len = putlen;
! register u_short b = putbuf;
b |= c >> len;
if ((len += l) >= 8) {
putchar ((int)(b >> 8));
--- 72,79 ----
register u_short l;
u_short c;
{
! register u_short len = bitlen;
! register u_short b = bitbuf;
b |= c >> len;
if ((len += l) >= 8) {
putchar ((int)(b >> 8));
***************
*** 89,94 ****
}
if (ferror(stdout))
writeerr();
! putbuf = b;
! putlen = len;
}
--- 89,94 ----
}
if (ferror(stdout))
writeerr();
! bitbuf = b;
! bitlen = len;
}
*** ../distribution/encode.c Mon May 13 17:05:10 1991
--- encode.c Thu Jun 13 22:11:35 1991
***************
*** 1,5 ****
#include "freeze.h"
-
#include "lz.h"
/*
--- 1,4 ----
***************
*** 91,99 ****
if (verbose) {
register short pos =
(r - 1 - match_position) & (N - 1),
! len = match_length;
fputc('"', stderr);
! for(;len;len--, pos++)
fprintf(stderr, "%s",
pr_char(text_buf[pos]));
fprintf(stderr, "\"\n");
--- 90,98 ----
if (verbose) {
register short pos =
(r - 1 - match_position) & (N - 1),
! leng = match_length;
fputc('"', stderr);
! for(; leng; leng--, pos++)
fprintf(stderr, "%s",
pr_char(text_buf[pos]));
fprintf(stderr, "\"\n");
*** ../distribution/freeze.c Mon May 13 17:05:11 1991
--- freeze.c Fri Jun 7 18:59:17 1991
***************
*** 25,30 ****
--- 25,31 ----
* are now separated.
* Version 2.2: Tunable static Huffman table for position information,
* this info may be given in the command string now.
+ * Version 2.2.3: Bug fixes, 10% freezing speedup.
*/
#ifdef COMPAT
***************
*** 510,517 ****
else if (debug == 0) melt();
else printcodes();
#endif /* DEBUG */
if(topipe == 0) {
! copystat(*fileptr, ofname); /* Copy stats */
if((exit_stat == 1) || (!quiet))
putc('\n', stderr);
}
--- 511,523 ----
else if (debug == 0) melt();
else printcodes();
#endif /* DEBUG */
+
+ /* check output status, and close to make sure data is written */
+ if ( ferror(stdout) || fclose(stdout) < 0 )
+ writeerr();
+
if(topipe == 0) {
! copystat(*fileptr); /* Copy stats */
if((exit_stat == 1) || (!quiet))
putc('\n', stderr);
}
***************
*** 563,568 ****
--- 569,575 ----
}
}
exit(exit_stat);
+ /*NOTREACHED*/
}
long in_count = 1; /* length of input */
***************
*** 612,619 ****
exit ( 1 );
}
! copystat(ifname, ofname)
! char *ifname, *ofname;
{
#ifdef __TURBOC__
struct ftime utimbuf;
--- 619,626 ----
exit ( 1 );
}
! copystat(ifname)
! char *ifname;
{
#ifdef __TURBOC__
struct ftime utimbuf;
***************
*** 660,666 ****
perror(ofname);
#ifndef MSDOS
! chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */
#endif
#ifdef __TURBOC__
--- 667,674 ----
perror(ofname);
#ifndef MSDOS
! /* Copy ownership */
! chown(ofname, (int) statbuf.st_uid, (int) statbuf.st_gid);
#endif
#ifdef __TURBOC__
*** ../distribution/huf.c Mon May 13 18:54:10 1991
--- huf.c Thu Jun 13 21:15:14 1991
***************
*** 7,20 ****
/* */
/*----------------------------------------------------------------------*/
- #ifdef COMPAT
- #undef N
- #undef F
- #define F (new_flg ? _F : _FO)
- #define N (new_flg ? _NN : _NO)
- #endif
-
-
/* TABLE OF ENCODE/DECODE for upper 6 bits position information */
/* The contents of this table are used for freezing only, so we use
--- 7,12 ----
***************
*** 40,53 ****
short son[_T]; /* points to son node (son[i],son[i+]) */
! u_short getbuf = 0;
! uchar getlen = 0;
uchar corrupt_flag = 0; /* If a file is corrupt, use fcat */
- u_short putbuf = 0;
- uchar putlen = 0;
-
/* Initialize tree */
StartHuff ()
--- 32,42 ----
short son[_T]; /* points to son node (son[i],son[i+]) */
! u_short bitbuf = 0;
! uchar bitlen = 0;
uchar corrupt_flag = 0; /* If a file is corrupt, use fcat */
/* Initialize tree */
StartHuff ()
***************
*** 80,87 ****
#ifdef DEBUG
symbols_out = refers_out = 0;
#endif
! putlen = getlen = 0;
! putbuf = getbuf = 0;
corrupt_flag = 0;
}
--- 69,76 ----
#ifdef DEBUG
symbols_out = refers_out = 0;
#endif
! bitlen = 0;
! bitbuf = 0;
corrupt_flag = 0;
}
***************
*** 218,225 ****
EncodeEnd ()
{
! if (putlen) {
! putchar((int)(putbuf >> 8));
bytes_out++;
if (ferror(stdout))
writeerr();
--- 207,214 ----
EncodeEnd ()
{
! if (bitlen) {
! putchar((int)(bitbuf >> 8));
bytes_out++;
if (ferror(stdout))
writeerr();
***************
*** 228,243 ****
short DecodeChar ()
{
! register u_short c;
! register u_short dx;
! register u_short cc;
c = son[_R];
/* trace from root to leaf,
got bit is 0 to small(son[]), 1 to large (son[]+1) son node */
while (c < _T) {
! dx = getbuf;
! if (getlen <= 8) {
if ((short)(cc = getchar()) < 0) {
if (corrupt_flag) {
corrupt_message();
--- 217,230 ----
short DecodeChar ()
{
! register u_short c, cc, dx;
c = son[_R];
/* trace from root to leaf,
got bit is 0 to small(son[]), 1 to large (son[]+1) son node */
while (c < _T) {
! dx = bitbuf;
! if (bitlen <= 8) {
if ((short)(cc = getchar()) < 0) {
if (corrupt_flag) {
corrupt_message();
***************
*** 246,256 ****
corrupt_flag = 1;
cc = 0;
}
! dx |= cc << (8 - getlen);
! getlen += 8;
}
! getbuf = dx << 1;
! getlen--;
c += (dx >> 15) & 1;
c = son[c];
}
--- 233,243 ----
corrupt_flag = 1;
cc = 0;
}
! dx |= cc << (8 - bitlen);
! bitlen += 8;
}
! bitbuf = dx << 1;
! bitlen--;
c += (dx >> 15) & 1;
c = son[c];
}
***************
*** 321,328 ****
/* trace from root to leaf,
got bit is 0 to small(son[]), 1 to large (son[]+1) son node */
while (c < _TO) {
! dx = getbuf;
! if (getlen <= 8) {
if ((short)(cc = getchar()) < 0) {
if (corrupt_flag) {
corrupt_message();
--- 308,315 ----
/* trace from root to leaf,
got bit is 0 to small(son[]), 1 to large (son[]+1) son node */
while (c < _TO) {
! dx = bitbuf;
! if (bitlen <= 8) {
if ((short)(cc = getchar()) < 0) {
if (corrupt_flag) {
corrupt_message();
***************
*** 331,341 ****
corrupt_flag = 1;
cc = 0;
}
! dx |= cc << (8 - getlen);
! getlen += 8;
}
! getbuf = dx << 1;
! getlen--;
c += (dx >> 15) & 1;
c = son[c];
}
--- 318,328 ----
corrupt_flag = 1;
cc = 0;
}
! dx |= cc << (8 - bitlen);
! bitlen += 8;
}
! bitbuf = dx << 1;
! bitlen--;
c += (dx >> 15) & 1;
c = son[c];
}
*** ../distribution/huf.h Wed Mar 27 19:45:04 1991
--- huf.h Thu Jun 13 21:15:15 1991
***************
*** 1,9 ****
! extern u_short getbuf;
! extern uchar getlen;
extern uchar corrupt_flag;
-
- extern u_short putbuf;
- extern uchar putlen;
#define MAX_FREQ (u_short)0x8000 /* tree update timing from frequency */
--- 1,6 ----
! extern u_short bitbuf;
! extern uchar bitlen;
extern uchar corrupt_flag;
#define MAX_FREQ (u_short)0x8000 /* tree update timing from frequency */
*** ../distribution/lz.c Mon May 13 15:12:09 1991
--- lz.c Fri Jun 7 18:48:53 1991
***************
*** 18,28 ****
hash_t prev[N + 1];
#ifndef __XENIX__
- u_short
#ifdef __TURBOC__
! huge
! #endif
! next[array_size];
#else
#if parts == 2
u_short next0[32768], next1[8193];
--- 18,28 ----
hash_t prev[N + 1];
#ifndef __XENIX__
#ifdef __TURBOC__
! u_short huge * next = (u_short huge *) NULL;
! #else
! u_short next[array_size];
! #endif /* TURBOC */
#else
#if parts == 2
u_short next0[32768], next1[8193];
***************
*** 66,71 ****
--- 66,80 ----
node_steps = node_matches = 0;
#endif
+ #ifdef __TURBOC__
+ if (!next && (next = (u_short huge *)
+ farmalloc(array_size * sizeof(u_short))) == NULL) {
+ fprintf(stderr, "Not enough memory (%dK wanted)\n",
+ array_size * sizeof(u_short) / 1024);
+ exit(3);
+ }
+ #endif
+
for (i = N + 1; i < array_size; i++ )
nextof(i) = NIL;
for (i = 0; i < sizeof(prev)/sizeof(*prev); i++ )
***************
*** 76,119 ****
Get_Next_Match (r)
u_short r;
{
! register uchar *key;
! register u_short m, i, p;
#ifdef GATHER_STAT
node_matches++;
#endif
- key = &text_buf[p = r];
- m = 0;
- while (m < _F) {
- if ((p = nextof(p)) == NIL) {
- match_length = m;
- return;
- }
! /* This statement is due to ideas of Boyer and Moore: */
! if(key[m] != text_buf[p + m])
! continue;
! /* This statement is due to my ideas: :-) */
! /* It gives up to 8% speedup on files with big redundancy (text, etc.) */
! if(key[m >> 1] != text_buf[p + (m >> 1)])
! continue;
#ifdef GATHER_STAT
node_steps++;
#endif
! /* This statement doesn't take a lot of execution time -
! about 20% (in profiling we trust)
! */
! for (i = 0; i < _F && key[i] == text_buf[p + i]; i++);
!
! if (i > m) {
! match_position = ((r - p) & (N - 1)) - 1;
! m = i;
! }
! }
#ifdef DEBUG
if (verbose)
fprintf(stderr, "Replacing node: %d -> %d\n", p, r);
--- 85,121 ----
Get_Next_Match (r)
u_short r;
{
! register u_short p = r;
! register int m;
! register uchar *key = text_buf + r, *pattern;
#ifdef GATHER_STAT
node_matches++;
#endif
! match_length = 0;
! do {
! do {
! if ((p = nextof(p)) == NIL)
! return;
! pattern = text_buf + p;
! /* This statement is due to ideas of Boyer and Moore: */
! for (m = match_length;
! m >= 0 && key[m] == pattern[m]; m--);
! } while (m >= 0);
#ifdef GATHER_STAT
node_steps++;
#endif
! for (m = match_length;
! ++m < _F && key[m] == pattern[m]; );
!
! match_length = m;
! match_position = ((r - p) & (N - 1)) - 1;
! } while (m < _F);
#ifdef DEBUG
if (verbose)
fprintf(stderr, "Replacing node: %d -> %d\n", p, r);
***************
*** 121,125 ****
nextof(prev[p]) = nextof(p);
prev[nextof(p)] = prev[p];
prev[p] = NIL; /* remove p, it is further than r */
- match_length = _F;
}
--- 123,126 ----
*** ../distribution/lz.h Mon May 13 17:05:12 1991
--- lz.h Tue Jun 25 12:40:25 1991
***************
*** 36,46 ****
#ifndef __XENIX__
#define nextof(i) next[i]
- extern u_short
#ifdef __TURBOC__
! huge
! #endif
! next[];
#else
#define parts (array_size/32768 + 1)
#define nextof(i) next[(i) >> 15][(i) & 0x7fff]
--- 36,46 ----
#ifndef __XENIX__
#define nextof(i) next[i]
#ifdef __TURBOC__
! extern u_short huge * next;
! #else
! extern u_short next[];
! #endif /* TURBOC */
#else
#define parts (array_size/32768 + 1)
#define nextof(i) next[(i) >> 15][(i) & 0x7fff]
*** ../distribution/makefile Mon May 13 17:05:13 1991
--- makefile Tue Jun 25 12:26:33 1991
***************
*** 1,3 ****
--- 1,4 ----
+ SHELL = /bin/sh
DEST = /usr/local/bin
MANDEST = /usr/local/man/cat1
EXTHDRS =
***************
*** 9,15 ****
CC = gcc
! CFLAGS = -DBITS=18 -O -DCOMPAT -fstrength-reduce #-DBSD42 -DSUN4
LINTFLAGS = -DBITS=15 -DCOMPAT -DDEBUG -DGATHER_STAT -x
--- 10,16 ----
CC = gcc
! CFLAGS = -DBITS=18 -O -fstrength-reduce #-DBSD42 -DSUN4
LINTFLAGS = -DBITS=15 -DCOMPAT -DDEBUG -DGATHER_STAT -x
***************
*** 42,48 ****
all: $(PROGRAM) statist $(CATMAN)
lint: $(SRCS)
! lint $(LINTFLAGS) $(SRCS) $(LIBS)
$(PROGRAM): $(OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $(PROGRAM)
--- 43,49 ----
all: $(PROGRAM) statist $(CATMAN)
lint: $(SRCS)
! lint $(LINTFLAGS) $(SRCS) > lint.out
$(PROGRAM): $(OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $(PROGRAM)
***************
*** 50,58 ****
statist: statist.c freeze.h lz.h huf.h
$(CC) $(CFLAGS) -UCOMPAT $(LDFLAGS) -o statist statist.c $(LIBS)
! clean:; rm -f *.o *.b .,* core a.out $(PROGRAM) statist
install: $(DEST)/$(PROGRAM) $(MANDEST)/$(MAN)
$(DEST)/$(PROGRAM): $(PROGRAM)
install -s -c $(PROGRAM) $(DEST)
--- 51,64 ----
statist: statist.c freeze.h lz.h huf.h
$(CC) $(CFLAGS) -UCOMPAT $(LDFLAGS) -o statist statist.c $(LIBS)
! clean:; rm -f *.o *.b .,* core *.out $(PROGRAM) statist
install: $(DEST)/$(PROGRAM) $(MANDEST)/$(MAN)
+
+ patch:; rm -f patch.out
+ -for i in ../distribution/* ; do \
+ (diff -c $$i `basename $$i` >> patch.out); \
+ done
$(DEST)/$(PROGRAM): $(PROGRAM)
install -s -c $(PROGRAM) $(DEST)
*** ../distribution/patchlevel.h Mon May 13 17:05:13 1991
--- patchlevel.h Tue Jun 4 20:12:31 1991
***************
*** 1 ****
! #define PATCHLEVEL 2
--- 1 ----
! #define PATCHLEVEL 3
*** ../distribution/statist.c Mon May 13 15:12:12 1991
--- statist.c Tue May 14 21:18:45 1991
***************
*** 69,78 ****
#endif
giveres() {
u_short c;
signal(SIGINT, giveres);
for(c = 0; c < 62; c++) {
register short *p;
- register int j, k;
j = 0;
p = prnt;
--- 69,78 ----
#endif
giveres() {
u_short c;
+ register int j, k;
signal(SIGINT, giveres);
for(c = 0; c < 62; c++) {
register short *p;
j = 0;
p = prnt;
***************
*** 79,92 ****
k = p[c + T];
do j++; while ((k = p[k]) != R) ;
! if (j <= 8)
bits[j]++;
}
! printf("%d %d %d %d %d %d %d %d\n",
! bits[1], bits[2], bits[3], bits[4],
! bits[5], bits[6], bits[7], bits[8]);
fflush(stdout);
! for (c = 1; c <= 8; c++) bits[c] = 0;
}
freeze ()
--- 79,114 ----
k = p[c + T];
do j++; while ((k = p[k]) != R) ;
! if (j <= 6)
bits[j]++;
}
!
! k = bits[1] + bits[2] + bits[3] + bits[4] +
! bits[5] + bits[6];
!
! k = 62 - k; /* free variable length codes for 7 & 8 bits */
!
! j = 128 * bits[1] + 64 * bits[2] + 32 * bits[3] +
! 16 * bits[4] + 8 * bits[5] + 4 * bits[6];
!
! j = 256 - j; /* free byte images for these codes */
!
! /* Equation:
! bits[7] + bits[8] = k
! 2 * bits[7] + bits[8] = j
! */
! j -= k;
! if (j < 0 || k < j) {
! printf("Not enough information\n");
! } else {
! bits[7] = j;
! bits[8] = k - j;
! printf("%d %d %d %d %d %d %d %d\n",
! bits[1], bits[2], bits[3], bits[4],
! bits[5], bits[6], bits[7], bits[8]);
! }
fflush(stdout);
! for (c = 1; c <= 6; c++) bits[c] = 0;
}
freeze ()
------------------ cut here ---------------------
--
Leonid A. Broukhis | 89-1-95 Liberty St. | "BROUKHIS" is Hebrew for
7+095 494.6241 (h) | Moscow 123481 USSR | "BENEDICTAE"
7+095 132.9475 (o) | (leo at s514.ipmce.su) | {Licet omnia qualibet dicas}
exit 0 # Just in case...
--
Kent Landfield INTERNET: kent at sparky.IMD.Sterling.COM
Sterling Software, IMD UUCP: uunet!sparky!kent
Phone: (402) 291-8300 FAX: (402) 291-4362
Please send comp.sources.misc-related mail to kent at uunet.uu.net.
More information about the Comp.sources.misc
mailing list