texchk (part 1 of 2) - syntax checker for LaTeX
sources-request at panda.UUCP
sources-request at panda.UUCP
Wed Dec 18 01:10:10 AEST 1985
Mod.sources: Volume 3, Issue 63
Submitted by: talcott!think!massar
This is 'texchk', a syntax checker for the LaTeX TeX macro package.
JP Massar
massar at think.com
ihnp4!think!massar
------------------------------------------------------------------------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
# README
# Makefile
# texchk.1
# cmds.h
# ctools.h
# texchars.h
# texchk.h
# texchk.c
# This archive created: Tue Dec 17 09:59:40 1985
export PATH; PATH=/bin:$PATH
echo shar: extracting "'README'" '(1687 characters)'
if test -f 'README'
then
echo shar: will not over-write existing file "'README'"
else
cat << \SHAR_EOF > 'README'
Instructions for installing texchk:
Assuming you are running a BSD 4.2 or compatible system:
Edit the file Makefile.
Change the BIN variable to the directory where you want the
executable to go.
Change the MS variable to be the 'x' which makes /usr/man/manx
acceptable to you.
Change the CC variable if you wish to compile with other than 'cc'.
Type 'make install -n' to insure that you understand what is
happening.
Type 'make install'.
Type 'make man'.
Type 'texchk -c -v <your-own-latex-file>' and verify that it is
producing reasonable output.
Type 'make clean'.
Assuming you are on a non BSD 4.2 compatible system:
You will probably have to change the call to 'rindex' in ctools.c.
(Compiling with "-Drindex=strrchr" is sufficient - see the Makefile).
If your compiler is brain damaged you may have to extensively
shorten and revise the names of various variables and preprocessor
constants.
Take out the -DBSD42 flag from the Makefile. If your system does
not have <string.h> (Berkeley called it strings.h), you may have
to edit ctools.c and texchk.c to declare the strxxx functions.
Modulo these changes, I got texchk to run under BRL's SYSTEM V
emulation package so hopefully it will run on most systems.
The files ctools.c and ctools.h are potentially useful in their own
right. They contain a library of routines I find or found useful. You
might look through it for functions you will find useful or you could
install the whole thing as a library as has been done here at TMC.
JP Massar
massar at think.com
ihnp4!think!massar
SHAR_EOF
if test 1687 -ne "`wc -c < 'README'`"
then
echo shar: error transmitting "'README'" '(should have been 1687 characters)'
fi
fi
echo shar: extracting "'Makefile'" '(592 characters)'
if test -f 'Makefile'
then
echo shar: will not over-write existing file "'Makefile'"
else
cat << \SHAR_EOF > 'Makefile'
CC = cc
#use the following DEFINES line for non-BSD systems
#DEFINES = -Drindex=strchr
DEFINES = -DBSD42
#use the following CFLAGS line for debugging
#CFLAGS = -g $(DEFINES)
CFLAGS = -O $(DEFINES)
SOURCES = texchk.c cmds.c texchars.c errors.c verbatim.c ctools.c
OBJECTS = texchk.o cmds.o texchars.o errors.o verbatim.o ctools.o
LIBS =
BIN = /usr/local
MS=1
MAN = /usr/man/man$(MS)
texchk: $(OBJECTS)
$(CC) -o texchk $(CFLAGS) $(OBJECTS) $(LIBS)
install: texchk
rm -f $(BIN)/texchk
mv texchk $(BIN)/texchk
man:
cp texchk.1 $(MAN)/texchk.$(MS)
man texchk
clean:
rm -f texchk *.o *~
SHAR_EOF
if test 592 -ne "`wc -c < 'Makefile'`"
then
echo shar: error transmitting "'Makefile'" '(should have been 592 characters)'
fi
fi
echo shar: extracting "'texchk.1'" '(2494 characters)'
if test -f 'texchk.1'
then
echo shar: will not over-write existing file "'texchk.1'"
else
cat << \SHAR_EOF > 'texchk.1'
.TH TEXCHK 1 3/13/85
.SH NAME
TEXCHK - syntax checker for LaTeX.
.SH SYNOPSIS
.B texchk
[ -v -c ] [ file1 file2 ... ]
.SH DESCRIPTION
By default
.I texchk
reads from standard input and outputs error messages and whatnot to
standard error. If filenames are given as arguments, each file is read
and parsed in turn.
.PP
.I texchk
checks for the proper nesting of matching '{' '}' and '[' ']' pairs,
matching '\\begin{<environment>}' '\\end{<environment>} pairs, and Math Mode
and Display Math Mode constructs. If improper nesting is detected, an error
message consisting of a reason, a line number, and the actual line of text
on which the error occurred is displayed.
.PP
In general, the program will halt after finding and printing the first
error, because attempts at further diagnosis would likely result in a
stream of spurious error messages.
.PP
Unlike LaTeX
.I texchk
is fast. It can grovel over a large file in a matter of a few seconds,
and can be run repeatedly without causing the CPU to beg for mercy.
.PP
The command line options are:
.TP
.B \-v
Verbose option. Produces on the standard output a trace of each environment
.I texchk
enters and leaves, along with the number of the line it is currently
processing. This is used basically to assure yourself that
.I texchk
is actually doing something, if you give it a huge file to process.
(And is mildly interesting).
.TP
.B \-c
Check mode. Tells
.I texchk
to check each keyword (e.g., \\hspace, \\mbox) in the file against a list of
known keywords, and issue a warning if the keyword is not in the list.
Also, each keyword is checked as to whether it is only legal if given
inside of math mode, and if so and if math mode is not enabled, an error
message is given (but processing continues).
.SH FILES
/usr/src/local/cmd/texchk source directory.
.br
/usr/local/texchk executable image.
.SH AUTHOR
JP Massar, Thinking Machines Corporation
.SH BUGS
.I texchk
makes no claims to being perfect. It is quite possible that an 'error'
flagged by
.I texchk
is not really an error at all if run through LaTeX.
.PP
It does not check for number of arguments, presence of optional arguments,
etc.
.PP
It does not know that certain keywords can only be used in certain modes.
.PP
It does not understand that '[' and ']' can be used in regular text
without necessarily being paired. Also, \\left], for example,
will confuse it totally.
.PP
Although not officially part of LaTeX,
.I texchk
understands the '$$' construct of TeX.
SHAR_EOF
if test 2494 -ne "`wc -c < 'texchk.1'`"
then
echo shar: error transmitting "'texchk.1'" '(should have been 2494 characters)'
fi
fi
echo shar: extracting "'cmds.h'" '(803 characters)'
if test -f 'cmds.h'
then
echo shar: will not over-write existing file "'cmds.h'"
else
cat << \SHAR_EOF > 'cmds.h'
typedef struct {
int cmdlength;
char *cmd;
char is_math_mode_only;
unsigned flags;
} Latex_Command, *Ptr_Latex_Command;
extern Latex_Command Command_Table[];
extern char *Math_Environments[];
#define COMMAND_TABLE_SIZE (sizeof(Command_Table)/sizeof(Latex_Command))
#define IS_MATH_MODE(i) (Command_Table[(i)].is_math_mode_only)
#define NOT_FOUND -1
extern int command_lookup ();
/* ARGUMENTS:
char *command;
*/
/* 'command' is the name of a presumed latex command, including the */
/* initial backslash. Returns NOT_FOUND if command is not in the command */
/* table, otherwise the index into the command table of the entry for */
/* the input command. */
extern int is_math_environment();
/* ARGUMENTS:
char *keyword;
*/
/* Returns 1 or 0 */
SHAR_EOF
if test 803 -ne "`wc -c < 'cmds.h'`"
then
echo shar: error transmitting "'cmds.h'" '(should have been 803 characters)'
fi
fi
echo shar: extracting "'ctools.h'" '(10814 characters)'
if test -f 'ctools.h'
then
echo shar: will not over-write existing file "'ctools.h'"
else
cat << \SHAR_EOF > 'ctools.h'
/* -*- Mode: C; Package: (CTOOLS C) -*- */
#ifndef Bool
#define Bool int
#endif
#ifndef T
#define T 1
#endif
#ifndef F
#define F 0
#endif
#ifndef MAXINT
#define MAXINT 2147483647
#define MAXINTSTR "2147483647"
#endif
#ifndef MAXPATHLEN
#define MAXPATHLEN 80
#endif
extern char *emalloc();
/* int space; */
/* space must be greater than 0 */
/* Causes executution to halt with a 'Fatal error' message if memory */
/* cannot be allocated, otherwise returns pointer to malloc-ed space */
extern char *anewstr();
/* char *astring; */
/* emalloc's space and copies astring into that space. Returns pointer */
/* to new string. */
extern int copy();
/* char *dest, *src; int n; */
/* copies exactly n bytes. */
/* return value undefined. Use only as procedure. */
extern int fill();
/* char *addr, ch; int n; */
/* copies ch into n consecutive bytes. */
/* return value undefined. Use only as procedure. */
extern int to_upper_if_lower();
/* char ch; Returns possibly upper-cased value. */
extern int to_lower_if_upper();
/* char ch; Returns possibly lower-cased value. */
extern int buffconcat();
/* char *buffer, *s1, *s2; */
/* s1 and s2 must be null terminated. Buffer must be at least */
/* strlen(s1) + strlen(s2) + 1 characters long. Buffer is null */
/* terminated upon completion. */
/* return value undefined. Use only as procedure. */
extern int nbuffconcat();
/* char *buffer; int n; char *s1,*s2,*s3,*s4,*s5,*s6; */
/* all the strings must be null terminated. Buffer must be big enough */
/* to hold the null terminated result. 0 < n < 7 .
/* returns -1 if n is out of range, otherwise 0 */
extern int slcompare();
/* char *s1; int l1; char *s2; int l2 */
/* does not stop if it encounters a null character. */
/* returns 0 if equal, -1 if not equal */
extern int slge_compare();
/* char *s1; int l1; char *s2; int l2 */
/* does not stop if it encounters a null character. */
/* returns 0 if equal, -1 if s1 < s2, 1 if s1 > s2 */
extern int nocase_compare();
/* char *s1; int l1; char *s2; int l2 */
/* does not stop if it encounters a null character. */
/* returns 0 if equal, -1 if s1 < s2, 1 if s1 > s2 case independently. */
extern char * strfind();
/* char *s1; char *s2; int fast; */
/* finds s2 as a substring of s1. s1 and s2 are null terminated. */
/* returns 0 if not found, otherwise pointer into s1 to first matching */
/* character. */
/* WARNING: will access off the end of s1 in the interest of efficiency */
/* if 'fast' is non-zero. */
extern char * strncfind();
/* char *s1; char *s2; int fast; */
/* finds s2 as a substring of s1 case independently. s1 and s2 are */
/* null terminated. */
/* returns 0 if not found, otherwise pointer into s1 to first matching */
/* character. */
/* WARNING: will access off the end of s1 in the interest of efficiency */
/* if 'fast' is non-zero. */
extern char * strsearch();
/* char *s1; int l1; char *s2; int l2 */
/* finds s2 as a substring of s1. Does not stop if it encounters a null. */
/* returns pointer into s1, otherwise (char *) 0 if search fails */
/* case dependent */
extern char * strncsearch();
/* char *s1; int l1; char *s2; int l2 */
/* finds s2 as a substring of s1. */
/* returns pointer into s1, otherwise (char *) 0 if search fails */
/* case independent */
extern int remove_excess_blanks();
/* char *newstring, *oldstring; */
/* newstring must be long enough to hold the result, which may be as */
/* long as oldstring. oldstring must be null terminated. */
/* an excess blank is any blank before the first non-blank character, */
/* any blank after the last non-blank character, and any blank immediately */
/* following a blank. */
/* returns length of newstring */
extern int yes_or_no_check();
/* char *astring; */
/* returns 1 for yes, 0 for no, -1 for neither. */
/* astring must be one of "YES", "Y", "NO", "N" in any capitalization. */
/* These routines return T if every char satisfies a certain condition. */
/* These returns all returns T if given a null string. */
extern Bool all_digits();
extern Bool all_whitespace();
extern Bool all_uppercase();
extern Bool all_lowercase();
extern Bool all_alphabetic();
extern Bool all_alphanumeric();
extern Bool all_ascii();
extern int str_to_pos_int();
/* char *astring; int low,high; */
/* low must be >= 0. */
/* returns -1 if *astring is not composed of digits. */
/* returns -2 if the integer is out of range. */
/* despite its name, 0 can be returned as a legitimate value. */
/* treats all digit strings as decimal. */
extern int sreverse();
/* char *buffer; char *astring; */
/* puts the characters of astring in reverse order into buffer. */
/* buffer must be at least as long as astring + 1. */
/* buffer is null terminated when done. */
/* No return value. Use only as procedure. */
extern char *ip_sreverse();
/* char *astring; */
/* Returns astring with its characters reversed. */
/* reversal is done in place. */
#define PATH_MAXPATHLEN 256
char *temp_path();
/*
char *dir; char *filename;
Returns a pointer to a character string containing the string
<dir>/<filename>. The pointer points to a buffer which will may get
overwritten if any functions in this package are subsequently called.
0 is returned if the pathname would exceed PATH_MAXPATHLEN-1 chars.
*/
char *perm_path();
/*
char *dir; char *filename;
Same as temp_path, except the pathname string is malloc'ed and is thus
permanent unless specifically freed by the user. Further, no limit
on the size of the path is made.
*/
char *make_path();
/*
char *dir; char *filename; char *extension; Bool perm;
Creates <dir>/<filename><extension> . The string returned is permanent
or not depending on 'perm'. If perm is not true, 0 will be returned if
the resulting path exceeds PATH_MAXPATHLEN-1 chars.
*/
char *make_path_numeric_extension();
/*
char *dir; char *filename; int extension; Bool perm;
Same as make_path except that extension is first converted into a
string using sprintf.
*/
char *just_filename();
/*
char *path; Bool new; Bool perm;
Given a path of the form /<x>/<y>/<z> returns <z>. If new is not set
then a pointer into the original input string is returned. If new is
set a copy is returned, either permanent or not depending on perm.
*/
#define ANSWER_NO 0
#define ANSWER_YES 1
#define ANSWER_HELP 2
#define ANSWER_QUIT 3
#define ANSWER_EOF 4
#define AT_EOF -1
#define TOO_MANY_CHARS -2
#define IOERROR -3
#define TOO_MANY_LINES -4
#define LINE_TOO_LONG -5
extern read_yes_or_no ();
/* FILE *iport, *oport; char *prompt; char *helpstring; char *quitstring; */
/* prints prompt, then reads from iport until is gets 'Y', 'N', 'YES' or */
/* 'NO' (case independently). If helpstring and/or quitstring are not */
/* "" or (char *) 0 then if the user types in one of those ANSWER_HELP */
/* or ANSWER_QUIT are returned, otherwise ANSWER_NO or ANSWER_YES are */
/* eventually returned. */
extern int getline ();
/* FILE *iport; char *buffer; int buflen; */
/* reads a line into buffer. Does not put the '\n' into buffer. */
/* Returns AT_EOF if at end of file when called. If it encounters */
/* end of file after reading at least one character, the eof is treated */
/* as if it were a newline. Returns TOO_MANY_CHARS if more than */
/* buflen - 1 characters are read before encountering a newline. */
/* In this case exactly buflen - 1 characters are read. */
/* The last character read is always follwed by a '\0'. */
/* if successful getline returns the number of characters read exclusive */
/* of a terminating newline or eof. */
extern int getlines();
/* FILE *fp; int n; char ***ptr_lines; char *linebuf; int maxlinelen; */
/* See documentation for getfile below */
/* If called, 'n' must have a value 0. */
extern int getfile();
/* char *filename; char ***ptr_lines; char *linebuf; int maxlinelen; */
/* read in a file as an array of character strings */
/* 'maxlinelen+1' is the maximum length a line of the file is allowed */
/* to be. 'linebuf' must be at least 'maxlinelen+1' characters long. */
/* Returns the number of lines in the file (and therefore the number */
/* of entries in *ptr_lines) if successful. Returns IOERROR if it */
/* could not open the file to read from. Returns TOO_MANY_CHARS if */
/* it encounters a line longer than 'maxlinelen' characters.
/* Space for each line is malloc'ed as it is read in and the text for */
/* the jth line is stored in (*ptr_lines)[j] */
/* Only works for fairly small files as it recurses its way through the */
/* file and does a lot of malloc-ing. Use read_file_into_buffer or */
/* ngetfile for large files. */
extern int ngetlines();
/* FILE *fp; int n; char ***ptr_lines; char *linebuf; int maxlinelen; */
/* Same as getlines, except at most 'n' lines will be read. Returns */
/* TOO_MANY_LINES if more than 'n' lines are present. */
extern int ngetfile();
/* int n; char *filename; char ***ptr_lines; char *linebuf; int maxlinelen; */
/* See ngetlines above. */
extern int read_file_into_buffer();
/* char *filename;
char ***ptr_lines;
int maxlines;
char *buffer;
int buflen;
char *linebuffer;
int linebuflen;
*/
/* *ptr_lines should be an array of character string pointers maxlines */
/* big. buffer should be an array of characters buflen long. The routine */
/* reads lines using getline and stores them into buffer, terminating each */
/* with a null. A pointer to the nth line read is stored in *ptr_lines[n] */
/* Returns IOERROR if it cannot open the file for reading, TOO_MANY_LINES */
/* if more than maxlines were read in, TOO_MANY_CHARS if buffer is */
/* filled before end of file is reached, and LINE_TOO_LONG is any line is */
/* longer than linebuflen. Returns number of lines read in if successful. */
extern char *efopen();
/* char *filename; char *mode */
/* Actually returns a (FILE *), so one must cast the return value to this */
/* type. It doesn't return a (FILE *) explicitly because then to include */
/* this file one would have to include <stdio.h> explicitly before it. */
/* The routine simply calls fopen with the same arguments, but prints a */
/* reasonable error message and calls exit if the call to fopen fails. */
Bool check_string();
/* char *str; long minlen; long maxlen; */
/* Returns T if str is not 0 and has a length between minlen and maxlen */
/* inclusived, otherwise returns F. */
#ifndef check_int
#define check_int(i,minval,maxval) ((i) >= (minval) && (i) <= (maxval))
#endif
SHAR_EOF
if test 10814 -ne "`wc -c < 'ctools.h'`"
then
echo shar: error transmitting "'ctools.h'" '(should have been 10814 characters)'
fi
fi
echo shar: extracting "'texchars.h'" '(198 characters)'
if test -f 'texchars.h'
then
echo shar: will not over-write existing file "'texchars.h'"
else
cat << \SHAR_EOF > 'texchars.h'
extern init_legal_chars();
extern char Lgl_Chars[];
extern char Lgl_Single_Char_Commands[];
#define LGL_CHAR(x) (Lgl_Chars[(x)])
#define LGL_SINGLE_COMMAND_CHAR(x) (Lgl_Single_Char_Commands[(x)])
SHAR_EOF
if test 198 -ne "`wc -c < 'texchars.h'`"
then
echo shar: error transmitting "'texchars.h'" '(should have been 198 characters)'
fi
fi
echo shar: extracting "'texchk.h'" '(1489 characters)'
if test -f 'texchk.h'
then
echo shar: will not over-write existing file "'texchk.h'"
else
cat << \SHAR_EOF > 'texchk.h'
/* these are the types of tokens we look for in the input text */
typedef enum {
ESCAPE_BEGIN, ESCAPE_END, ESCAPE_ANY,
LEFT_SQUARE_BRACKET, RIGHT_SQUARE_BRACKET,
LEFT_CURLY_BRACKET, RIGHT_CURLY_BRACKET,
MATH, DOUBLE_MATH, ESCAPE_SINGLE_CHAR
} envtype;
/* some of the input tokens cause the LaTeX environment to be 'pushed' */
/* following is the definition of the stack on which these tokens are stored */
typedef struct {
envtype etype;
char *keyword;
int linenum;
} Stack_Entry, *Ptr_Stack_Entry;
#define MAX_ENTRIES 10000
#define MAX_KEYWORD_LENGTH 100
extern Stack_Entry Lex_Stack[];
extern int Lex_TOS;
#define Stack_Empty (Lex_TOS < 0)
#define LCB '{'
#define RCB '}'
#define LSB '['
#define RSB ']'
#define MATH_CHAR '$'
#define ESCAPE '\\'
#define COMMENT '%'
#define ENDSTRING "end"
#define BEGINSTRING "begin"
/* these are the possible actions we can take when we see a token. */
/* the 'CHECK' action has not been implemented. */
typedef enum {POP,PUSH,CHECK_SINGLE,DOLLAR,DOLLAR_DOLLAR,CHECK} Actions;
/* we keep track of where we are in the text so we can print out nice */
/* error messages */
extern long Current_Line, Current_Char;
extern char Line_Buffer[];
#define MAXLL 1024
extern Indent_Level; /* for verbose pretty print */
extern FILE *fp;
/* this is LaTeX's definition of whitespace */
#define ISWHITE(x) ((x) == ' ' || (x) == '\t')
#define SPACES_PER_INDENT_LEVEL 2
SHAR_EOF
if test 1489 -ne "`wc -c < 'texchk.h'`"
then
echo shar: error transmitting "'texchk.h'" '(should have been 1489 characters)'
fi
fi
echo shar: extracting "'texchk.c'" '(16712 characters)'
if test -f 'texchk.c'
then
echo shar: will not over-write existing file "'texchk.c'"
else
cat << \SHAR_EOF > 'texchk.c'
/* Texchk -- a LaTeX syntax and spelling checker.
Written by JP Massar, Thinking Machines Corporation, Cambridge, MA
This code is hereby released into the public domain, for better or worse.
*/
#include <stdio.h>
#include <ctype.h>
/* if your system doesn't have either string.h or strings.h you */
/* may have to declare the string functions yourself */
#ifdef BSD42
#include <strings.h>
#else
#include <string.h>
#endif
#ifdef TMC
#include <ctools.h>
#else
#include "ctools.h"
#endif
#include "texchk.h"
#include "cmds.h"
#include "texchars.h"
Bool Verbose_Mode = F; /* -v option */
Bool Check_Mode = F; /* -c option */
int Indent_Level = 0; /* for verbose output mode */
Stack_Entry Lex_Stack[MAX_ENTRIES]; /* environment stack */
int Lex_TOS = -1;
FILE *fp; /* file being processed */
Bool Already_At_Eof = F;
long Current_Line = 0; /* where we are in input text */
long Current_Char = 0; /* where we are in input text */
long Line_Length = 0; /* current line length */
char Line_Buffer[MAXLL]; /* buffer for input text */
Bool In_Math_Mode = F;
int Math_Mode_Depth = 0;
char Keyword_Buffer[MAX_KEYWORD_LENGTH];
new_file ()
{
Current_Line = 0;
Current_Char = 0;
Line_Length = 0;
In_Math_Mode = F;
Math_Mode_Depth = 0;
Indent_Level = 0;
}
do_indent (level) int level;
{
int j,i;
for (j = 0; j < level; j++)
for (i = 0; i < SPACES_PER_INDENT_LEVEL; i++) putc(' ',stderr);
}
lex_push (etype,keyword,linenum) envtype etype; char *keyword; long linenum;
/* push an environment onto the stack */
{
if (++Lex_TOS >= MAX_ENTRIES) {
fprintf(stderr,"Stack overflow...Process terminating.\n");
texit();
}
Lex_Stack[Lex_TOS].etype = etype;
Lex_Stack[Lex_TOS].keyword = keyword;
Lex_Stack[Lex_TOS].linenum = linenum;
}
lex_pop (ptr_etype,ptr_keyword,ptr_linenum)
/* pop an environment and return its components */
envtype *ptr_etype;
char **ptr_keyword;
long *ptr_linenum;
{
if (Lex_TOS < 0) {
fprintf(stderr,"Stack underflow...Process terminating\n");
texit();
}
*ptr_etype = Lex_Stack[Lex_TOS].etype;
*ptr_keyword = Lex_Stack[Lex_TOS].keyword;
*ptr_linenum = Lex_Stack[Lex_TOS].linenum;
Lex_TOS--;
}
curstack (ptr_etype,ptr_keyword,ptr_linenum)
/* get the components of the current stack entry, but leave the entry */
/* on the stack. */
envtype *ptr_etype;
char **ptr_keyword;
long *ptr_linenum;
{
if (Stack_Empty) {
fprintf(stderr,"Fatal error, bad call to curstack\n");
texit();
}
lex_pop(ptr_etype,ptr_keyword,ptr_linenum);
lex_push(*ptr_etype,*ptr_keyword,*ptr_linenum);
}
char *copy_keyword (starttoken,endtoken) int starttoken,endtoken;
/* grab a keyword from the Line_Buffer and copy it into a static buffer */
{
int len;
if (MAX_KEYWORD_LENGTH <= (len = (endtoken - starttoken) + 1)) {
keyword_length_error();
texit();
}
strncpy(Keyword_Buffer,Line_Buffer + starttoken,len);
Keyword_Buffer[len] = '\0';
return(Keyword_Buffer);
}
do_pop (etype,keyword) envtype etype; char *keyword;
/* make sure that the current environment is the matching begin-environment */
/* for the end-environment that we have just discovered. If so, pop the */
/* environment off the stack. If not its an error. */
{
envtype oldetype;
char *oldkeyword;
long oldlinenum;
char *s, *e;
lex_pop(&oldetype,&oldkeyword,&oldlinenum);
switch (etype) {
case ESCAPE_END :
s = "\\begin";
e = "\\end";
if (oldetype != ESCAPE_BEGIN) goto nesterror;
if (0 != strcmp(oldkeyword,keyword)) goto nesterror;
break;
case RIGHT_SQUARE_BRACKET :
s = "[";
e = "]";
if (oldetype != LEFT_SQUARE_BRACKET) goto nesterror;
break;
case RIGHT_CURLY_BRACKET :
s = "{";
e = "}";
if (oldetype != LEFT_CURLY_BRACKET) goto nesterror;
break;
case MATH :
s = "Begin Math Mode";
e = "End Math Mode";
if (oldetype != etype) goto nesterror;
break;
case DOUBLE_MATH :
s = "Begin Display Math Mode";
e = "End Display Math Mode";
if (oldetype != etype) goto nesterror;
break;
}
return(0);
nesterror:
nest_error(s,e,oldlinenum,oldkeyword);
texit();
}
int get_a_char ()
/* buffered input routine, to keep track of line number */
{
int ch,rval;
if (Current_Char >= Line_Length) {
switch (rval = getline(fp,Line_Buffer,MAXLL-2)) {
case AT_EOF:
return(EOF);
break;
case TOO_MANY_CHARS :
line_too_long_error();
texit();
break;
default :
Line_Buffer[rval] = '\n';
Line_Buffer[++rval] = '\0';
Line_Length = rval;
Current_Char = 0;
Current_Line++;
break;
}
}
ch = (int) (255 & Line_Buffer[Current_Char++]);
if (!LGL_CHAR(ch)) bad_char_error(ch,T);
return(ch);
}
unget_a_char ()
{
if (Current_Char == 0) {
fprintf(stderr,"Invalid unget...process terminating\n");
texit();
}
Current_Char--;
}
char *get_keyword ()
/* read a keyword. Keywords consist of contiguous alphabetic characters */
/* keyword returned is in a static buffer. */
{
int starttoken,endtoken,ch;
starttoken = Current_Char - 1;
endtoken = Current_Char - 1;
while (isalpha(ch = get_a_char())) {
endtoken++;
}
if (ch == EOF) {
Already_At_Eof = 1;
}
else unget_a_char();
return(copy_keyword(starttoken,endtoken));
}
char *get_begin_end_keyword ()
/* called after a \begin or \end construct is found. */
/* begin and end keywords are enclosed in {}. */
/* a warning is issued if there is any whitespace within the {}s */
/* returns a string constituting what is in between the {}s save for */
/* whitespace immediately after the { and immediately before the } */
/* keyword returned is in a static buffer. */
{
int ch;
int starttoken,endtoken;
ch = get_a_char();
if (ch != LCB) {
no_brace_after_begin_end_error();
texit();
}
starttoken = Current_Char;
endtoken = starttoken - 1;
while (RCB != (ch = get_a_char())) {
if (ch == '\n')
warning_close_brace();
else if (ch == EOF) {
eof_error();
texit();
}
else
endtoken++;
}
/* ignore whitespace after '{' and before '}' */
if (ISWHITE(Line_Buffer[starttoken]) || ISWHITE(Line_Buffer[endtoken])) {
warning_blanks_in_cb();
}
while (starttoken < endtoken && ISWHITE(Line_Buffer[starttoken]))
starttoken++;
if (starttoken >= endtoken) {
blank_begin_end_error();
texit();
}
while (endtoken > starttoken && ISWHITE(Line_Buffer[endtoken]))
endtoken--;
return(copy_keyword(starttoken,endtoken));
}
get_token (action,etype,keyword)
/* get the next significant token from the input stream. Based on its type */
/* an action to perform is computed. The significant part of the token is */
/* returns in *keyword, which points to a static buffer. */
/* returns 0 on encountering EOF, otherwise returns 1. */
Actions *action;
envtype *etype;
char **keyword;
{
int ch,isbegin,isend;
*keyword = 0;
if (Already_At_Eof) return(0);
readloop:
if (EOF == (ch = get_a_char())) return(0);
switch (ch) {
case LSB :
*etype = LEFT_SQUARE_BRACKET;
*action = PUSH;
*keyword = "[";
return(1);
case RSB :
*etype = RIGHT_SQUARE_BRACKET;
*action = POP;
*keyword = "]";
return(1);
case LCB :
*etype = LEFT_CURLY_BRACKET;
*action = PUSH;
*keyword = "{";
return(1);
case RCB :
*etype = RIGHT_CURLY_BRACKET;
*action = POP;
*keyword = "}";
return(1);
case MATH_CHAR :
/* Is the next character also a '$'? If so this is 'Display Math Mode' */
if (EOF == (ch = get_a_char())) {
*action = DOLLAR;
*etype = MATH;
*keyword = "$";
Already_At_Eof = 1;
}
else if (ch == MATH_CHAR) {
*action = DOLLAR_DOLLAR;
*etype = DOUBLE_MATH;
*keyword = "$$";
}
else {
unget_a_char();
*action = DOLLAR;
*etype = MATH;
*keyword = "$";
}
return(1);
case ESCAPE :
/* treat specially \begin and \end */
if (EOF == (ch = get_a_char())) {
eof_error();
texit();
}
/* first check for single character non-alphabetic commands */
if (!isalpha(ch)) {
*action = CHECK_SINGLE;
*etype = ESCAPE_SINGLE_CHAR;
Keyword_Buffer[0] = ch;
Keyword_Buffer[1] = '\0';
*keyword = Keyword_Buffer;
return(1);
}
*keyword = get_keyword();
isbegin = (0 == strcmp(*keyword,BEGINSTRING));
isend = (0 == strcmp(*keyword,ENDSTRING));
if (!isbegin && !isend) {
*action = CHECK;
*etype = ESCAPE_ANY;
return(1);
}
*etype = isbegin ? ESCAPE_BEGIN : ESCAPE_END;
*action = isbegin ? PUSH : POP;
*keyword = get_begin_end_keyword();
return(1);
case COMMENT :
/* just read in the rest of the line and ignore what's on it */
while ('\n' != (ch = get_a_char())) {
if (EOF == ch) return(0);
}
goto readloop;
default :
goto readloop;
}
}
push_math_mode (key) char *key;
{
if (Verbose_Mode) {
do_indent(Indent_Level++);
fprintf (
stderr,"Line %d: Entering math mode using <%s>\n",Current_Line,key
);
}
Math_Mode_Depth++;
In_Math_Mode = T;
lex_push(MATH,key,Current_Line);
}
pop_math_mode (key) char *key;
{
envtype etype;
char *keyword;
long linenum;
if (Verbose_Mode) {
do_indent(--Indent_Level);
fprintf (
stderr,"Line %d: Leaving math mode using <%s>\n",Current_Line,key
);
}
Math_Mode_Depth--;
In_Math_Mode = (Math_Mode_Depth > 0);
lex_pop(&etype,&keyword,&linenum);
}
math_mode_action (action,keyword) Actions action; char *keyword;
/* check for math mode tokens, and enter or leave math mode as appropriate */
{
char *stack_keyword;
long linenum;
envtype etype;
char *key, *matching_keyword;
switch (action) {
/* If there is a matching '$' or '$$' as the latest entry on the stack */
/* we pop it because it is a matching token. Otherwise, we push it, */
/* even if we are already in math mode. */
case (DOLLAR) :
case (DOLLAR_DOLLAR) :
key = (action == DOLLAR) ? "$" : "$$";
if (!In_Math_Mode) {
push_math_mode(key);
break;
}
curstack(&etype,&stack_keyword,&linenum);
if (0 != strcmp(key,stack_keyword)) {
push_math_mode(key);
}
else {
pop_math_mode(key);
}
break;
/* just adjust Math Mode for PUSH and POP, because in process_file */
/* we will do the actual pushing and popping of these environments. */
case (PUSH) :
if (is_math_environment(keyword)) {
Math_Mode_Depth++;
In_Math_Mode = T;
}
break;
case (POP) :
if (is_math_environment(keyword)) {
Math_Mode_Depth--;
In_Math_Mode = (Math_Mode_Depth == 0);
}
break;
/* look for \( and \[ commands which put us into math mode, and \) and */
/* \] commands which pop us out of math mode. Make sure if we are */
/* popping that the proper pushed math mode command is the current */
/* stack entry. */
case (CHECK_SINGLE) :
if (*keyword == '(' || *keyword == '[') {
push_math_mode(anewstr(keyword));
}
else if (*keyword == ')' || *keyword == ']') {
if (Stack_Empty) {
stack_empty_error(MATH,keyword);
texit();
}
curstack(&etype,&stack_keyword,&linenum);
matching_keyword = (*keyword == ')') ? "(" : "[";
if (0 != strcmp(matching_keyword,stack_keyword)) {
nest_error(matching_keyword,keyword,linenum,stack_keyword);
texit();
}
pop_math_mode(keyword);
}
break;
}
}
process_file ()
/* Get significant LaTeX forms from the input file. For each one, depending */
/* on its nature perform a verification or manipulate the environment stack. */
/* When we are done the stack should be empty. */
/* The file has already been opened using the global file descriptor 'fp' */
{
Actions action;
envtype etype;
char *keyword;
int cmd_index,ch;
while (0 != get_token(&action,&etype,&keyword)) {
switch (action) {
case (POP) :
/* \end{keyword},, '}', ']' */
if (Stack_Empty) {
stack_empty_error(etype,keyword);
texit();
}
math_mode_action(POP,keyword);
if (Verbose_Mode && *keyword != '}' && *keyword != ']') {
do_indent(--Indent_Level);
printf("line %d: \\end{%s}\n",Current_Line,keyword);
}
do_pop(etype,keyword);
break;
case (PUSH) :
/* \begin{keyword}, '{', '[' */
math_mode_action(PUSH,keyword);
if (Verbose_Mode && *keyword != '{' && *keyword != '[') {
do_indent(Indent_Level++);
printf("line %d: \\begin{%s}\n",Current_Line,keyword);
}
if (0==strcmp("verbatim",keyword) || 0==strcmp("verbatim*",keyword)) {
do_verbatim(keyword);
break;
}
else {
lex_push(etype,anewstr(keyword),Current_Line);
break;
}
case (DOLLAR) :
math_mode_action(DOLLAR,keyword);
break;
case (DOLLAR_DOLLAR) :
math_mode_action(DOLLAR_DOLLAR,keyword);
break;
case (CHECK_SINGLE) :
/* check for \(, \[, \), \] for math mode */
math_mode_action(CHECK_SINGLE,keyword);
if (Check_Mode) {
if (!LGL_SINGLE_COMMAND_CHAR(*keyword)) {
single_char_command_error(*keyword);
}
if (NOT_FOUND == (cmd_index = command_lookup(keyword))) {
fprintf(stderr,"Fatal error:\n");
fprintf(stderr,"Command Table and Legal Chars out of sync\n");
exit(1);
}
if (!In_Math_Mode && IS_MATH_MODE(cmd_index)) {
math_keyword_error(keyword);
}
}
break;
case (CHECK) :
/* \command token */
if (0 == strcmp("verb",keyword)) {
if ('*' != (ch = get_a_char())) unget_a_char();
do_verb();
break;
}
if (Check_Mode) {
if (NOT_FOUND == (cmd_index = command_lookup(keyword))) {
keyword_error(keyword);
}
else if (!In_Math_Mode && IS_MATH_MODE(cmd_index)) {
math_keyword_error(keyword);
}
}
break;
default :
fprintf(stderr,"Invalid return from get_token...\n");
texit();
}
}
if (!Stack_Empty) {
eof_error();
texit();
}
return(0);
}
texit ()
{
fclose(fp);
exit(1);
}
usage ()
{
fprintf(stderr,"\nUnrecognized argument to texchk\n");
fprintf(stderr,"Usage: texchk [ -v -c ] [ file1 file2 ... ]\n");
exit(1);
}
main (argc,argv) int argc; char **argv;
{
char **argptr;
int j,input_files = 0;
init_legal_chars();
/* process command line arguments */
argptr = argv;
while (*++argptr != 0) {
if (**argptr == '-') {
if (strlen(*argptr) != 2) {
usage();
}
switch ((*argptr)[1]) {
case 'v' :
Verbose_Mode = T;
break;
case 'c' :
Check_Mode = T;
break;
default :
usage();
break;
}
*argptr = '\0';
}
else input_files = 1;
}
/* read and process each file */
if (!input_files) {
printf("\n");
fp = stdin;
process_file();
printf("\nOK!\n\n");
}
else {
for (j = 1; j < argc; j++) {
if (argv[j] != '\0') {
printf("\nChecking file %s.\n\n",argv[j]);
new_file();
fp = (FILE *) efopen(argv[j],"r");
process_file();
fclose(fp);
}
}
}
exit(0);
}
SHAR_EOF
if test 16712 -ne "`wc -c < 'texchk.c'`"
then
echo shar: error transmitting "'texchk.c'" '(should have been 16712 characters)'
fi
fi
exit 0
# End of shell archive
More information about the Mod.sources
mailing list