run length encoding (alt.sources.d discussions)
Graham Toal
gtoal at tharr.UUCP
Thu Sep 6 23:16:16 AEST 1990
Archive-name: runlen.c
/*
( by the way, this and the previous huffman prog aren't meant to
be utilities for using - they're just for experimenting with;
I *don't* want yet another non-standard compression prog floating
around. I'd never live it down ;-) )
More experimentation with compression: a simple run-length filter.
This is the first idea that came into my head - these codings
are chosen so that all but a <dle> on its own stay the same
size or get smaller. However maybe a better idea would be a
coding which moved 3-byte sequences into 2-bytes; on data which
is arrays of machine words, 2, 3, and 4 byte sequences are
disproportionately common. (24 or 32-bit-wide picture files?)
Anyway, this took a 1028K scan (b/w, 300dpi) of a line drawing
(Doonesbury cartoon) and shrunk it to 395K; then the huffman
code I posted last week compressed that to 206K.
This compares middling well to 188K from 'compress'.
A textual postscript file went from 310K, rle -> 249K, huff -> 137K.
<dle> <dle><0>
<dle><dle> <dle><1>
<dle><dle> ... n ... <dle><dle> <dle><2><n-1>
a a
aa aa
aaa aaa
aaaa a<dle><3>
:
aaaaaaa ... n ... aa a<dle><n-1>
:
aaa ... 256 ... aaaaa a<dle><255>
Maybe folks could hack up other simple filters, and experiment
with putting them together in various orders. By the way, I tried
delta compression (processing the difference between the last byte
and this byte, rather than the byte itself). Didn't help. Got worse
in fact :-(
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define RLE_MAGIC 290690L
FILE *input_file;
FILE *output_file;
#ifndef TRUE
#define TRUE (0==0)
#define FALSE (0!=0)
#endif
int DLE = 234;
void output_run(FILE *f, int ch, int count) {
if (ch == DLE) {
fputc(DLE, f);
if (count > 2) fputc(2, f);
fputc(count-1, f);
} else {
fputc(ch, f);
if (count == 1) return;
if (count > 3) fputc(DLE, f); else fputc(ch, f);
if (count == 3) fputc(ch, f);
if (count > 3) fputc(count-1, f);
}
}
void hfputw(long w, FILE *f)
{
fputc((int)((w>>24L)&255L), f);
fputc((int)((w>>16L)&255L), f);
fputc((int)((w>> 8L)&255L), f);
fputc((int)((w )&255L), f);
}
long hfgetw(FILE *f)
{
long w;
w = fgetc(f); w = w<<8L;
w |= fgetc(f); w = w<<8L;
w |= fgetc(f); w = w<<8L;
w |= fgetc(f);
return(w);
}
int main(int argc, char **argv)
{
long fsize;
int curr, last;
int count;
int a;
if (argc != 4 || *argv[1] != '-') {
fprintf(stderr, "syntax: %s {-e,-d} input output\n", argv[0]);
exit(0);
}
input_file = fopen(argv[2], "rb");
if (input_file == NULL) {
fprintf(stderr, "%s: cannot open input file %s\n", argv[0], argv[2]);
exit(0);
}
if (strcmp(argv[1], "-e") == 0) {
output_file = fopen(argv[3], "wb");
if (output_file == NULL) {
fprintf(stderr, "%s: cannot open output file %s\n", argv[0], argv[3]);
fclose(input_file);
exit(0);
}
fseek(input_file, 0L, SEEK_END);
fsize = ftell(input_file);
fclose(input_file);
input_file = fopen(argv[2], "rb");
hfputw(RLE_MAGIC, output_file);
hfputw(fsize, output_file);
curr = fgetc(input_file); count = 1;
for (;;) {
for (;;) {
a = fgetc(input_file);
if (a == EOF) break;
if (a != curr) break;
count++;
if (count == 257) break;
}
if (a == EOF) break;
output_run(output_file, curr, count);
curr = a; count = 1;
}
output_run(output_file, curr, count);
fclose(input_file);
fclose(output_file);
} else if (strcmp(argv[1], "-d") == 0) {
long check_magic;
check_magic = hfgetw(input_file);
if (check_magic != RLE_MAGIC) {
fprintf(stderr, "%s: input file %s not huffman encoded (%ld vs %ld)\n",
argv[0], argv[2], check_magic, RLE_MAGIC);
exit(0);
}
output_file = fopen(argv[3], "wb");
fsize = hfgetw(input_file);
last = -1;
for (;;) {
int i;
a = fgetc(input_file);
if (a == EOF) break;
if (a == DLE) {
count = fgetc(input_file);
if (count <= 2 && last != -1) fputc(last, output_file), last = -1;
if (count == 0) {
fputc(DLE, output_file);
} else if (count == 1) {
fputc(DLE, output_file);
fputc(DLE, output_file);
} else if (count == 2) {
count = fgetc(input_file);
for (i = 0; i < count+1; i++) fputc(DLE, output_file);
} else {
if (last == -1) fprintf(stderr, "Error #1\n");
for (i = 0; i < count+1; i++) fputc(last, output_file);
}
last = -1;
} else {
if (last != -1) fputc(last, output_file);
last = a;
}
}
if (last != -1) fputc(last, output_file);
fclose(input_file);
fclose(output_file);
} else {
fprintf(stderr, "syntax: %s {-e,-d} input output\n", argv[0]);
}
exit(0);
}
More information about the Alt.sources
mailing list