Message compression utility
Bob Eager
cur022%cluster at ukc.ac.uk
Mon Jan 14 05:44:30 AEST 1991
I was asked by a few people to post this, so here it is. It is a small
C program for compressing messages. It takes a list of messages and message
numbers as input, and spits out a C function and some arrays. The function
takes a message number as argument and returns the message. It is most
efficient if the message numbers are a low numbered dense set.
I have included a sample message file and a program for testing the function
that is generated.
My thanks to Peter Stephens of Edinburgh University (now Edinburgh Portable
Compilers) for writing the code on which this C version is based.
There are only three files, and this is not UNIX specific, so the three files
are just concatenated below.
-----------------------------------------------------------------
/*
* File: compmess.c
*
* Program to compress a file of error messages, generating suitable
* arrays to hold the compressed form, and also some code to expand
* them again
*
* Bob Eager January 1991
*
* Thanks for Peter Stephens of Edinburgh University for the original idea.
*
*/
/*
* Values for exit status:
* 0 - Success
* 1 - Argument error
* 2 - Failed to open file
* 3 - Workspace overflow
*
*/
/*
* Compile time options.
* Define exactly one target system name to be 1; all the rest should be 0.
*
*/
#define IBMPC 1 /* Normal IBM PC (8086,80186,80286) */
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAXLETT 1000 /* Max length of 'lett' array */
#define MAXLINE 80 /* Max input line length */
#define MAXWORD 2000 /* Max length of 'word' array */
#if IBMPC
typedef long INT;
#endif
/* Forward references */
static void do_listing(FILE *);
static void do_output(FILE *);
static int getword(char **,char *);
static void lit(int *,char *,FILE *);
static void mess(char *,int);
static FILE *openio(char *,char *);
static int readn(FILE *);
static void squash(char *);
static void usage(void);
/* Local data */
static INT lett[MAXLETT]; /* Encoded words */
static int next = 0; /* Index of next free slot in 'lett' array */
static int nmax = 0; /* Highest message number seen */
static int num = 0; /* Next free slot in 'word' array */
static int nummax = 0; /* Pointer to last message in 'word' */
static char *progname; /* Program name */
static int word[MAXWORD]; /* Message and word numbers */
const char intt[128] = { /* Array to map characters to 6-bit code */
63,63,63,63,63,63,63,63,
63,63,63,63,63,63,63,63,
63,63,63,63,63,63,63,63,
63,63,63,63,63,63,63,63,
63,63,63,61,63,60,27,30,
31,32,63,63,63,28,59,29,
63,63,63,63,63,63,63,63,
63,63,63,63,63,63,63,63,
63, 1, 2, 3, 4, 5, 6, 7,
8, 9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,
24,25,26,63,63,63,63,63,
63,33,34,35,36,37,38,39,
40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,
56,57,58,63,63,63,63,63
};
const char outtt[64] = { /* Array to map 6-bit codes to characters */
'?','A','B','C','D','E','F','G',
'H','I','J','K','L','M','N','O',
'P','Q','R','S','T','U','V','W',
'X','Y','Z','&','-','/','\'','(',
')','a','b','c','d','e','f','g',
'h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w',
'x','y','z','.','%','#','?','?'
};
void main(int argc,char *argv[])
{ int n; /* Current message number */
int ch,i;
char *ptr; /* Pointer to next input word */
FILE *infp,*outfp,*listfp; /* I/O file pointers */
char input[MAXLINE]; /* Input line buffer */
char wk1[MAXLINE]; /* String work area */
progname = argv[0];
ptr = strrchr(progname,'.');
if(ptr != (char *) NULL) *ptr = '\0';
ptr = strrchr(progname,'\\');
if(ptr != (char *) NULL) progname = ++ptr;
for(i = 0; progname[i] != '\0'; i++)
progname[i] = tolower(progname[i]);
if(argc != 4) usage();
infp = openio(argv[1],"r");
outfp = openio(argv[2],"w");
listfp = openio(argv[3],"w");
for(i = 0; i < MAXWORD; i++) word[i] = 0;
lett[0] = (INT) 0; /* Initialise terminator */
/* Main loop - once for each message */
while((n = readn(infp)) != 0) {
if(n > nmax) nmax = n; /* Update high water mark */
if(num >= MAXWORD) {
fputs("Overflow of 'word' array\n",stderr);
exit(3);
}
word[num] = n; /* Store message number at start */
nummax = num; /* Store number of last message so far */
for(ch = fgetc(infp); ch != '\n'; ch = fgetc(infp)) {
if(ch != '"') continue;
/* Scan for start of message */
fscanf(infp,"%[^\"]",&input[0]);
/* Read message within quotes */
while(ch != '\n') ch = fgetc(infp);
/* Skip trailing junk on line */
break;
}
squash(&input[0]); /* Squash multiple spaces */
if((strlen(input) == 0) || (strcmp(input," ") == 0))
continue; /* Ignore empty lines and messages */
num++; /* Point beyond message number */
fprintf(listfp,"\n%3d",n);
/* Output message number to listing */
ptr = &input[0];
while(getword(&ptr,&wk1[0])) {
/* Get next word in message */
if(strlen(wk1) != 0) {
lit(&i,&wk1[0],listfp);
/* Get word index to 'i' */
if(num >= MAXWORD) {
fputs("Overflow of 'word' array\n",stderr);
exit(3);
}
word[num++] = i | 0x8000;
/* Store with continuation bit */
}
}
}
if(num >= MAXWORD) {
fputs("Overflow of 'word' array\n",stderr);
exit(3);
}
word[num] = 0; /* Store word list terminator */
fputc('\n',listfp);
fclose(infp);
/* Generate listing of messages */
do_listing(listfp);
fclose(listfp);
/* Generate actual output file */
do_output(outfp);
fclose(outfp);
fprintf(stderr,"%s: function 'message' generated\n",progname);
exit(0);
}
/*
* Routine to output the listing file
*
*/
static void do_listing(FILE *listfp)
{ char temp[MAXLINE];
int i;
fputs("\n\n",listfp);
for(i = 1; i <= nmax; i++) {
mess(&temp[0],i); /* Get message 'i' to 'temp' */
if(strlen(temp) != 0) {
fprintf(listfp,"%3d %s\n",i,temp);
}
}
}
/*
* Routine to generate the main output file
*
* C version
*
*/
static void do_output(FILE *outfp)
{ int i,j,k;
char m[MAXLINE];
fputs("/*\n * File: message.c\n *\n",outfp);
fprintf(outfp," * This file is generated automatically by the '%s' program\n",progname);
fputs(" *\n * It should never be edited; rather, alter the message file then\n",outfp);
fprintf(outfp," * rerun '%s'\n *\n */\n\n",progname);
fprintf(outfp,"#include <stdio.h>\n\n/",progname);
for(i = 1; i <= 70; i++) fputc('*',outfp);
fputs("\n * Outputs an error message stored in a compressed format *\n",outfp);
fputs(" *",outfp);
for(i = 1; i <= 68; i++) fputc(' ',outfp);
fputc('*',outfp);
fputc('\n',outfp);
for(i = 1; i <= nmax; i++) {
mess(&m[0],i); /* Get message 'i' to 'm' */
k = strlen(m);
if(k != 0) { /* If message exists */
fprintf(outfp," * %3d %s",i,m);
for(j = 1; j <= 58 - k; j++) fputc(' ',outfp);
fputs("*\n",outfp);
}
}
fputs(" *",outfp);
for(i = 1; i <= 68; i++) fputc(' ',outfp);
fputs("*\n *",outfp);
for(i = 1; i <= 69; i++) fputc('*',outfp);
fputs("/\n\n",outfp);
next--; /* Point to last used slot in 'lett' */
fputs("typedef\tlong\tINT;\n\n",outfp);
fprintf(outfp,"#define\tMWORDMAX\t%d\n",num+1);
fprintf(outfp,"#define\tDEFAULT\t\t%d\n\n",nummax+1);
fputs("const char outtt[64] = {\n",outfp);
fputs("\t'?','A','B','C','D','E','F','G',\n",outfp);
fputs("\t 'H','I','J','K','L','M','N',\n",outfp);
fputs("\t 'O','P','Q','R','S','T','U',\n",outfp);
fputs("\t 'V','W','X','Y','Z','&','-',\n",outfp);
fputs("\t '/','\\'','(',')',\n",outfp);
fputs("\t 'a','b','c','d','e','f','g',\n",outfp);
fputs("\t 'h','i','j','k','l','m','n',\n",outfp);
fputs("\t 'o','p','q','r','s','t','u',\n",outfp);
fputs("\t 'v','w','x','y','z','.','%',\n",outfp);
fputs("\t '#','?','?'\n};\n\n",outfp);
fputs("const int mword[MWORDMAX+1] = {\n",outfp);
fputs("\t0,\n\t",outfp);
for(i = 0; i <= num; i++) {
fprintf(outfp,"0x%04x",word[i]);
if(i != num) fputc(',',outfp);
else fputc('\n',outfp);
if(((i + 1) % 8) == 0) {
fputc('\n',outfp);
if(i != num)
fputc('\t',outfp);
}
}
fputs("};\n",outfp);
fprintf(outfp,"\nconst INT mlett[%d] = {\n\t0,\n\t",next+2);
for(i = 0; i <= next; i++) {
#if IBMPC
fprintf(outfp,"0x%08lx",lett[i]);
#else
fprintf(outfp,"0x%08x",lett[i]);
#endif
if(i != next) fputc(',',outfp);
if(((i + 1) % 4) == 0) {
fputc('\n',outfp);
if(i != next)
fputc('\t',outfp);
}
}
if((next+1)%4 != 0) fputc('\n',outfp);
fputs("};\n\n",outfp);
fprintf(outfp,"void message(char *mes,int n)\n");
fputs("{\tint i,j,k,q;\n",outfp);
fputs("\tINT m,sh;\n\n",outfp);
fputs("\t*mes++ = \' \';\n",outfp);
fputs("\t*mes = \'\\0\';\n",outfp);
fputs("\tj = 0;\n",outfp);
fputs("\tfor(i = 0; i < MWORDMAX+1; i++) {\n",outfp);
fputs("\t\tif(n == mword[i]) {\n",outfp);
fputs("\t\t\tj = 1;\n",outfp);
fputs("\t\t\tbreak;\n",outfp);
fputs("\t\t}\n",outfp);
fputs("\t}\n\n",outfp);
fputs("\tif(j == 0) {\n",outfp);
fputs("\t\ti = DEFAULT;\n",outfp);
fputs("\t\tj = 1;\n",outfp);
fputs("\t}\n\n",outfp);
fputs("\twhile(1) {\n",outfp);
fputs("\t\tk = mword[i+j];\n",outfp);
fputs("\t\tif((k & 0x8000) == 0) break;\n",outfp);
fputs("\t\tk &= 0x7fff;\n",outfp);
fputs("\t\tif(j != 1) *mes++ = ' ';\n",outfp);
fputs("\t\tdo {\n",outfp);
fputs("\t\t\tm = mlett[k+1];\n",outfp);
fputs("\t\t\tsh = 25;\n",outfp);
fputs("\t\t\tdo {\n",outfp);
fputs("\t\t\t\tq = (int) ((m >> sh) & 0x3f);\n",outfp);
fputs("\t\t\t\tif(q != 0) *mes++ = outtt[q];\n",outfp);
fputs("\t\t\t\tsh -= 6;\n",outfp);
fputs("\t\t\t} while(sh >= 0);\n",outfp);
fputs("\t\t\tk++;\n",outfp);
fputs("\t\t} while((m & 1) != 0);\n",outfp);
fputs("\t\tj++;\n",outfp);
fputs("\t}\n",outfp);
fputs("\t*mes = '\\0';\t\t\t/* Terminate string */\n",outfp);
fputs("}\n",outfp);
fputs("\n/*\n * End of file: message.c\n *\n*/\n",outfp);
}
/*
* Function to extract the next word from the string 's' and place
* it in 'word'. Returns zero if no words left, otherwise returns 1.
*
*/
static int getword(char **s,char *word)
{ char ch;
char *p = *s; /* Get working copy of pointer */
if(*p == '\0') return(0); /* String empty */
while(1) { /* Lose leading spaces */
ch = *p++;
if(ch != ' ') break;
}
while((ch != ' ') && (ch != '\0')) {
*word++ = ch;
ch = *p++;
}
if(ch == '\0') p--; /* Back off to null terminator */
*word = '\0'; /* Add terminator */
/* 'p' now points beyond the space. */
*s = p; /* Copy back pointer */
return(1); /* A word has been read */
}
/*
* This routine searches for the word 'txt' in the current word list.
* If found, it returns the word index in 'p'.
* If not found, it adds the word to the word list and again returns
* the word index in 'p'.
*
*/
static void lit(int *p,char *txt,FILE *listfp)
{ int ch = 0;
int i,j;
int l = 0;
int txtlen = strlen(txt);
INT w = 0;
INT sh = 25;
while(ch < txtlen) {
i = txt[ch++]; /* Get next character in word */
i = intt[i]; /* Convert to 6-bit code */
w = w | (((INT) i) << sh);
sh -= 6;
if(sh >= 0) continue;
if(ch < txtlen) w |= 1;
if(next+l >= MAXLETT) {
fprintf(stderr,"Overflow of 'lett' array\n");
exit(3);
}
lett[next+l] = w;
w = 0;
sh = 25;
l++;
}
if(sh != 25) {
if(next+l >= MAXLETT) {
fprintf(stderr,"Overflow of 'lett' array\n");
exit(3);
}
lett[next+l] = w;
l++;
}
/* Store any remainder */
for(i = 0; i < next; i++) {
for(j = 0; j < l; j++) {
if(lett[i+j] != lett[next+j]) goto fail;
}
goto found;
fail:;
}
*p = next;
next += l;
fprintf(listfp," word entered");
return;
found:
*p = i;
fprintf(listfp," word found ");
}
/*
* Routine to return a string corresponding to message 'n'.
* The string is stored in the character array 'mes'.
*
*/
static void mess(char *mes,int n)
{ int i,j,k,q;
INT m,sh;
j = 0;
for(i = 0; i < num; i++) {
if(n == word[i]) {
j = 1;
break;
}
}
if(j == 0) {
*mes = '\0';
return;
}
while(1) {
k = word[i+j];
if((k & 0x8000) == 0) break;
k &= 0x7fff;
if(j != 1) *mes++ = ' ';
do {
m = lett[k];
sh = 25;
do {
q = (int) ((m >> sh) & 0x3f);
if(q != 0) *mes++ = outtt[q];
sh -= 6;
} while(sh >= 0);
k++;
} while((m & 1) != 0);
j++;
}
*mes = '\0'; /* Terminate string */
}
/*
* Function to open a file in a specified mode
* Does not return if there is a failure, but outputs an error
* message and exits
*
*/
static FILE *openio(char *file,char *mode)
{ FILE *fp;
fp = fopen(file,mode);
if(fp == (FILE *) NULL) {
fprintf(stderr,"%s: cannot open '%s'\n",progname,file);
exit(2);
}
}
/*
* Function to read a number from input 'fp' and return its value.
*
*/
static int readn(FILE *fp)
{ int res;
int ch = '0';
for(res = 0; isdigit(ch); ch = fgetc(fp)) {
res = res*10 + (ch - '0');
}
return(res);
}
/*
* Routine to convert multiple spaces to single spaces in the string 's'.
*
*/
static void squash(char *s)
{ char ch;
char lastch = 'x';
int i;
int j = 0;
for(i = 0; s[i] != '\0'; i++) {
ch = s[i];
if(!((ch == ' ') && (lastch == ' '))) {
s[j++] = ch;
lastch = ch;
}
}
s[j] = '\0';
}
/*
* Routine to output brief usage information, then exit.
*
*/
static void usage()
{ fprintf(stderr,"Usage: %s input output listing\n",progname);
exit(1);
}
/*
* End of file: compmess.c
*
*/
-----------------------------------------------------------------
1 "REPEAT is not required"
2 "Label & has already been set in this block"
4 "& is not a Switch name at current textual level"
5 "Switch name & in expression or assignment"
6 "Switch label &(#) set a second time"
7 "Name & has already been declared"
0
-----------------------------------------------------------------
/*
* File: testmess.c
*
* Test program for message compression system
*
* Link with message.c for testing
*
*/
#include <stdio.h>
#include <stdlib.h>
/* External references */
extern void message(char *,int);
void main(argc,argv)
int argc;
char *argv[];
{ int n;
char mes[80];
while(1) {
fscanf(stdin,"%d",&n); /* Read a message number */
if(n == 0) break;
message(&mes[0],n);
fprintf(stdout,"Message %d => \\%s\\\n",n,mes);
}
exit(0);
}
/*
* End of file: testmess.c
*
*/
-------------------------+-------------------------------------------------
Bob Eager | University of Kent at Canterbury
| +44 227 764000 ext 7589
-------------------------+-------------------------------------------------
More information about the Alt.sources
mailing list