WordStar to nroff filter
david at varian.UUCP
david at varian.UUCP
Wed Mar 14 11:10:57 AEST 1984
The following program was written several years ago here for the purpose
of moving a few large WordStar files to UNIX and nroff; it was used once
and the output fixed up by hand, and all further changes to those files
were done on UNIX. The programmer who wrote it is no longer at Varian,
but I am in touch with him, and he gave his OK for posting to the net;
however, he didn't want his name on it, as it is not a finished product.
Folks are welcome to use it as a starting base, and if anyone improves it,
please send me the changes, and I'll see that the author gets them as well.
I believe that there is very little, if any change, in WordStar format
from Version 2.0 to the current 3.3; we actually use Version 3.0.
David Brown (415) 945-2199
Varian Instruments 2700 Mitchell Dr. Walnut Creek, Ca. 94598
{ihnp4,tektronix,hplabs,sytek,dual}!zehntel!varian!david
{amd70,fortune}!varian!david
...!decvax!sytek!zehntel!varian!david
...!ucbvax!menlo70!sytek!zehntel!varian!david
/*
Program: ws_nroff Revised: 01-Sep-81
Purpose: This program accepts a text file including formatting in-
formation for "Wordstar", and transforms it to a text file
including similar formatting information for "nroff".
Input: "Wordstar" V2.0 document file via standard input
(Wordstar is a word processor which runs under CP/M.)
Output: "nroff" source file via standard output
(nroff is a text processor which runs under UNIX V7.)
WARNING!!!
This program is incomplete - many WordStar functions are not
handled but just translated to comments - see the code below.
In addition, there are problems with un-hyphenating hyphenated
words and with indentation. There may be a problem with some
cases of underlining. Output generated from this program
should be inspected and corrected before feeding to nroff.
Author: the author wishes to remain anonymous due to the incompleteness
of the program. However, he would appreciate seeing additions and
fixes; if you improve this program, please send changes to David
Brown at Varian:
USENET: {zehntel,fortune,adm70}!varian!david
and I will forward them.
*/
#include <stdio.h>
#include <ctype.h> /* for "islower", "toupper" */
#define capital(c) ((islower(c))? toupper(c) : (c))
#define ON 1
#define OFF 0
#define LINLIM (0x100-2) /* maximum input line length */
#define CHRMASK '\177' /* for stripping flag bit */
#define CR '\015' /* carriage return character */
#define SOFTCR '\215' /* discardable carriage return */
#define LF '\012' /* line feed (newline) character */
#define SOFTLF '\212' /* soft line feed character */
#define FF '\014' /* form feed character */
#define SOFTSP '\240' /* soft space character */
#define SOFTHPH '\036' /* ^^ is soft hyphen character */
#define SOFTHPH2 '\037' /* ^_ is soft hyphen character */
#define BOLDFCE '\002' /* ^B is boldface toggle character */
#define DSTRIKE '\004' /* ^D is doublestrike toggle character */
#define UNDERLN '\023' /* ^S is underline toggle character */
#define CPMEOF '\032' /* ^Z is WORDSTAR end-of-file char */
/*
reference numbers for WORDSTAR "dot" commands
"MP" denotes "merge-print" commands
*/
#define AV ('A'<<8) | 'V' /* ask for variable value MP */
#define BP ('B'<<8) | 'P' /* bidirectional print */
#define CP ('C'<<8) | 'P' /* conditional page */
#define CS ('C'<<8) | 'S' /* clear screen MP */
#define CW ('C'<<8) | 'W' /* character width */
#define DF ('D'<<8) | 'F' /* data file MP */
#define DM ('D'<<8) | 'M' /* display message MP */
#define FI ('F'<<8) | 'I' /* file insert MP */
#define FM ('F'<<8) | 'M' /* footing margin */
#define FO ('F'<<8) | 'O' /* footing */
#define HE ('H'<<8) | 'E' /* heading */
#define HM ('H'<<8) | 'M' /* heading margin */
#define IG ('I'<<8) | 'G' /* ignore (unprinted comment) */
#define DOT ('.'<<8) | ' ' /* ignore (unprinted comment) */
#define IJ ('I'<<8) | 'J' /* interpret input as justified MP */
#define LH ('L'<<8) | 'H' /* line height */
#define LM ('L'<<8) | 'M' /* left margin MP */
#define LS ('L'<<8) | 'S' /* line spacing MP */
#define MB ('M'<<8) | 'B' /* margin at bottom */
#define MT ('M'<<8) | 'T' /* margin at top */
#define OJ ('O'<<8) | 'J' /* output justification MP */
#define OP ('O'<<8) | 'P' /* omit page number */
#define PA ('P'<<8) | 'A' /* new page */
#define PC ('P'<<8) | 'C' /* page number column */
#define PF ('P'<<8) | 'F' /* print-time line forming MP */
#define PL ('P'<<8) | 'L' /* paper length */
#define PN ('P'<<8) | 'N' /* page number */
#define PO ('P'<<8) | 'O' /* page offset */
#define RM ('R'<<8) | 'M' /* right margin MP */
#define RP ('R'<<8) | 'P' /* repeat MP */
#define RV ('R'<<8) | 'V' /* read variables MP */
#define SR ('S'<<8) | 'R' /* subscript/superscript roll */
#define SV ('S'<<8) | 'V' /* set variable MP */
#define UJ ('U'<<8) | 'J' /* micro-justification on/off */
#define NOT_FOUND -1 /* no-such-command error flag */
#define LISTLEN 34 /* length of command list */
/* nroff font definitions */
#define ROMAN 'R'
#define BOLD 'B'
#define ITALIC 'I'
/**/
main()
{
int len = LINLIM; /* greatest acceptable line length */
int getln();
char line[LINLIM+2];
printf(".pl 11i\n"); /* default page length */
printf(".po 0.8i\n"); /* default page offset */
printf(".ll 6.5i\n"); /* default line length */
printf(".pc #\n"); /* interpret "#" as page no. */
for (len = LINLIM; len = getln(line, len); len = LINLIM)
putln (line, len);
} /* end of "main" */
/**/
getln(line, maxlen)
/*
Fill buffer with available characters until end-of-line or end-of-page or
end-of-file, or until buffer full.
*/
char line[];
int maxlen;
{
register i = 0;
char chr = 0;
i = 0;
chr = 0;
do
{
line[i++] = chr = getchar();
}
while (chr != LF && chr != SOFTLF && chr != FF && chr != EOF && i < maxlen);
if (chr == EOF) i--; /* discard EOF character */
line[i] = '\0';
if (i >= maxlen && chr != LF && chr != SOFTLF && chr != FF)
fprintf (stderr, "getln: line too long, %x hex\n", i);
return (i);
} /* end of "getln" */
/**/
putln(line, len)
char line[];
int len; /* line length up to terminal null */
{
static int vertsp = 0,
indent = 0,
softflag = OFF,
underline = OFF,
boldface = OFF,
doublestrike = OFF;
static char font = ROMAN,
carryover[LINLIM+2] = 0;
int tmp = 0;
register i;
/* Special actions for beginning of line */
/* skip control characters */
for (i=0; line[i]==BOLDFCE || line[i] == DSTRIKE || line[i] == UNDERLN; i++)
;
/* squeeze spaces out */
for (tmp = i; line[tmp] == SOFTSP || line[tmp] == ' '; tmp++)
;
strcpy (line+i, line+tmp);
if (indent != tmp-i)
{
indent = tmp - i; /* set new indentation level */
printf ("'in %d\n", indent);
}
/* check for special lines */
switch (line[i] & CHRMASK)
{
case CR: /* blank line */
if (line[i] == CR && line[i+1] == LF) /* count only hard CR & LF */
vertsp++;
goto endputln; /* discard original line */
break;
case '.': /* control line (first char = dot) */
if (indent != 0)
{
printvsp (vertsp); /* issue vertical space collected */
vertsp = 0;
printf("\\&"); /* dot wasn't really first char, */
break; /* so hide it from nroff */
}
stripflags(line);
switch (cmd(line)) /* which control command? */
{
case CP: /* Conditional Page break */
printvsp (vertsp); /* issue vertical space collected */
vertsp = 0;
if (sscanf(line+3, "%d", &tmp) == 0) /* null argument */
{
stdterm (line, len);
printf (".\\\" %s", line);
}
else /* valid argument */
printf (".if (\\.h - \\nl < %u) .bp\n", tmp);
goto endputln; /* discard original line */
case CS: /* Clear Screen */
goto endputln; /* discard original line */
case CW: /* Character Width */
if (sscanf(&line[3], "%d", &tmp) == 0) /* null argument */
{
stdterm (line, len);
printf (".\\\" %s", line);
}
else /* valid argument */
printf (".ps %d\n", tmp*0.6);
goto endputln; /* discard original line */
case DM: /* Display Message on tty */
stdterm (line, len);
printf (".tm %s", &line[i]);
goto endputln;
case PA: /* Page Advance */
printvsp (vertsp);
vertsp = 0;
printf (".bp\n");
goto endputln; /* discard original line */
case DF: /* Data File */
case FI: /* File Insert */
case FO: /* Footing Text */
case FM: /* Footing Margin */
case HE: /* heading */
case HM: /* heading margin */
case IG: /* ignore (unprinted comment) */
case DOT: /* ignore (unprinted comment) */
case IJ: /* interpret input as justified MP */
case LH: /* line height */
case LM: /* left margin MP */
case LS: /* line spacing MP */
case MB: /* margin at bottom */
case MT: /* margin at top */
case OJ: /* output justification MP */
case OP: /* omit page number */
case PC: /* page number column */
case PF: /* print-time line forming MP */
case PL: /* paper length */
case PN: /* page number */
case PO: /* page offset */
case RM: /* right margin MP */
case RP: /* repeat MP */
case RV: /* read variables MP */
case SR: /* subscript/superscript roll */
case SV: /* set variable MP */
case UJ: /* micro-justification on/off */
case NOT_FOUND: /* ".??" is unknown control command */
default:
stdterm (line, len);
printf (".\\\" %s", line); /* print as comment */
goto endputln;
} /* end of "dot command" cases */
break;
default: /* begining of normal text line */
break;
} /* end of first character cases */
/*
First character checks completed; prepare to check the rest of the line.
*/
printvsp (vertsp); /* issue accumulated vertical space */
vertsp = 0;
printf ("%s", carryover); /* first half of hyphenated word */
carryover[0] = '\0';
softflag = OFF;
/*
Massage the non-blank line
*/
for (i=0; line[i]; i++)
{
switch (line[i] & CHRMASK)
{
case ' ':
if (softflag == ON && line[i] == SOFTSP)
{
tmp = i;
while (line[++i] == SOFTSP) /* discard soft spaces */
;
strcpy (line+tmp, line+i);
i = tmp-1;
}
break;
case SOFTHPH:
/*
This soft hyphen never ends a line. Evidently it is left
over from a time when the word was broken at this point.
The soft hyphen is simply discarded.
*/
strcpy (line+i, line+i+1);
i--;
break;
case SOFTHPH2:
/*
This soft hyphen always ends a line. The first part of the
word must be saved to be installed after any leading blanks
on the next line.
*/
line[i] = '\0';
do
i--;
while (line[i] != ' ' && line[i] != '\t' && i > 0);
strcpy (carryover, line+i+1);
stdterm (line, i);
break;
case CR:
if (line[i] == CR && line[i+1] == LF)
vertsp++;
stdterm (line, i+1);
break;
case UNDERLN: /* underline toggle */
/*
Reverse the state of the underline switch.
Change to or from underline (italic) font, depending on present
state of underline switch.
*/
underline = (underline) ? OFF : ON;
if (underline == ON) font = ITALIC;
else if (boldface == ON) font = BOLD;
else if (doublestrike == ON) font = BOLD;
else font = ROMAN;
/*
Move first part of line out, inserting font setting.
*/
line[i++] = 0;
stripflags (line);
printf ("%s\\f%c", line, font);
strcpy (line, line+i);
i = -1;
break;
case BOLDFCE: /* boldface toggle */
/*
Reverse the state of the boldface switch.
Change to or from boldface font, depending on present state of
underline switch and boldface switch.
*/
boldface = (boldface) ? OFF : ON;
if (underline == OFF && doublestrike == OFF)
{
if (boldface == ON) font = BOLD;
else if (doublestrike == ON) font = BOLD;
else font = ROMAN;
/*
Move first part of line out, inserting font setting.
*/
line[i++] = 0;
stripflags (line);
printf ("%s\\f%c", line, font);
strcpy (line, line+i);
i = -1;
}
else /* just delete control char */
{
strcpy (line+i, line+i+1);
i--;
}
break;
case DSTRIKE: /* doublestrike toggle */
/*
Reverse the state of the doublestrike switch.
Change to or from boldface font, depending on present state of
underline switch, boldface switch, and doublestrike switch.
*/
doublestrike = (doublestrike) ? OFF : ON;
if (underline == OFF && boldface == OFF)
{
if (doublestrike == ON) font = BOLD;
else font = ROMAN;
/*
Move first part of line out, inserting font setting.
*/
line[i++] = 0;
stripflags (line);
printf ("%s\\f%c", line, font);
strcpy (line, line+i);
i = -1;
}
else /* just delete control char */
{
strcpy (line+i, line+i+1);
i--;
}
break;
case FF: /* page break */
stdterm (line, i); /* terminate line, dropping FF char */
printf ("%s", line); /* print the line */
strcpy (line, ".bp\n"); /* issue page-break command */
break;
case CPMEOF: /* WORDSTAR end of file character */
case EOF:
line[i--] = 0; /* decrement index anticipating */
break; /* automatic increment */
default: /* just any old character */
break;
} /* end of character switch */
softflag = (line[i] & ~CHRMASK) ? ON : OFF; /* copy flag */
line[i] &= CHRMASK; /* strip flag bit */
} /* end of line massager */
if (pagepos == 0)
if (topmargin >= 1)
for (i = topmargin; i; i--)
{
if (i-headmargin == 1) printf (".tl '%s'\n", header);
else putchar('\n');
pagepos++;
}
printf ("%s", line);
pagepos = (++pagepos)%pagelen;
endputln:;
} /* end of "putln" */
/**/
cmd(line)
/*
See if first two characters following "." in line form a WORDSTAR
"dot" command, and return the command identifier or "not found" flag.
*/
char line[];
{
unsigned lowstop, highstop, target;
register position;
static unsigned dotcmd[LISTLEN]=
{
/* Integers are composed of characters in command. */
AV,
BP,
CP,
CS,
CW,
DF,
DM,
FI,
FM,
FO,
HE,
HM,
IG,
DOT,
IJ,
LH,
LM,
LS,
MB,
MT,
OJ,
OP,
PA,
PC,
PF,
PL,
PN,
PO,
RM,
RP,
RV,
SR,
SV,
UJ
};
lowstop = 0;
highstop = LISTLEN-1;
position = 0;
if (line[0] == '.') /* make sure dot is present */
{
target = (capital(line[1]) <<8) | capital(line[2]);
while (dotcmd[position] != target && lowstop <= highstop)
{
position = (lowstop + highstop)/2;
if (target < dotcmd[position])
highstop = position - 1;
else
lowstop = position + 1;
}
if (dotcmd[position] == DOT) /* anything close qualifies */
target = dotcmd[position]; /* as a comment ("..x") */
}
else /* no initial dot */
target = NOT_FOUND;
return ((dotcmd[position] == target) ? dotcmd[position] : NOT_FOUND);
} /* end of "cmd" */
printvsp(vertsp)
/*
Insert command for accumulated vertical spaces.
*/
int vertsp; /* vertical spaces accumulated */
{
switch (vertsp)
{
case 0: /* no solid vertical space */
break;
case 1: /* one vertical break */
printf (".br\n"); /* create control line */
break;
case 2: /* one blank line */
printf (".sp\n"); /* no arg. -- 1 blank line */
break;
default: /* two or more blank lines */
printf (".sp %d\n", vertsp-1); /* need arg. here */
break;
}
} /* end of "printvsp" */
stdterm(line, len)
/*
Apply standard terminator to line.
Len is length of line. Terminal null is not counted.
Linebuf must have room for 0 to 2 characters beyond current end of line.
*/
char line[];
int len;
{
if (line[len-1] == LF || line[len-1] == SOFTLF)
len--;
if (line[len-1] == CR || line[len-1] == SOFTCR)
len--;
line[len++] = '\n';
line[len] = 0;
}
stripflags(line)
/*
Strip the flag bit from every character in the line.
*/
char line[];
{
register i;
for (i=0; line[i] &= CHRMASK; i++)
;
}
More information about the Comp.sources.unix
mailing list