Trivial parser code
Marty
fouts at AMES-NAS.ARPA
Sat Dec 28 04:00:08 AEST 1985
I just got the latest 'dragon book', which is considerablly different
from the original, and in chapter 2 it has this trivial parser in C, so I
typed it in and it worked.
Figuring that others who get the book (which I recommend doing, if
you're at all interested in compilers) would want the same code, I bundled
it up, and here it is:
(Interesting that the book carries a 1986 copyright?)
----- CUT HERE And unpack with sh -----
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
# README
# emitter.c
# error.c
# global.h
# init.c
# lexer.c
# main.c
# makefile
# parser.c
# symbol.c
# This archive created: Fri Dec 27 09:47:34 1985
export PATH; PATH=/bin:$PATH
echo shar: extracting "'README'" '(511 characters)'
sed 's/^ X//' << \SHAR_EOF > 'README'
Trans is a simple parser for arithmatic expressions. It is from Aho, A.
V., R Sethi, and J. D. Ullman [1986] "Compilers Principles, Techniques, and
Tools"; 73-78. For a complete description, see the book.
Trans accepts simple expressions involving identifies and integer
constants and produces 'reverse polish' results. Expressions are terminated
by a semicolon. It terminates on any error, or on end of file from standard
input.
Sample valid input:
a + b; CXY - d; (a * b) * (c - d);
SHAR_EOF
echo shar: extracting "'emitter.c'" '(540 characters)'
sed 's/^ X//' << \SHAR_EOF > 'emitter.c'
/*
* Aho, A. V., R Sethi, and J. D. Ullman [1986]
* Compilers Principles, Techniques, and Tools
* pages 73 - 78
*/
#include "global.h"
emit(t, tval) /* generates output */
int t, tval;
{
switch(t) {
case '+': case '-': case '*': case '/':
printf("%c\n", t); break;
case DIV:
printf("DIV\n"); break;
case MOD:
printf("MOD\n"); break;
case NUM:
printf("%d\n", tval); break;
case ID:
printf("%s\n", symtable[tval].lexptr); break;
default:
printf("token %d, tokenval %d\n", t, tval);
}
}
SHAR_EOF
echo shar: extracting "'error.c'" '(297 characters)'
sed 's/^ X//' << \SHAR_EOF > 'error.c'
/*
* Aho, A. V., R Sethi, and J. D. Ullman [1986]
* Compilers Principles, Techniques, and Tools
* pages 73 - 78
*/
#include "global.h"
error(m) /* generates all error messages */
char *m;
{
fprintf(stderr, "line %d: %s\n", lineno, m);
exit(1); /* unsuccessful termination */
}
SHAR_EOF
echo shar: extracting "'global.h'" '(501 characters)'
sed 's/^ X//' << \SHAR_EOF > 'global.h'
/*
* Aho, A. V., R Sethi, and J. D. Ullman [1986]
* Compilers Principles, Techniques, and Tools
* pages 73 - 78
*/
#include <stdio.h>
#include <ctype.h>
#define BSIZE 128 /* buffer size */
#define NONE -1
#define EOS '\0'
#define NUM 256
#define DIV 257
#define MOD 258
#define ID 259
#define DONE 260
int tokenval; /* value of token attribute */
int lineno;
struct entry { /* form of symbol table entry */
char *lexptr;
int token;
};
struct entry symtable[]; /* symbol table */
SHAR_EOF
echo shar: extracting "'init.c'" '(351 characters)'
sed 's/^ X//' << \SHAR_EOF > 'init.c'
/*
* Aho, A. V., R Sethi, and J. D. Ullman [1986]
* Compilers Principles, Techniques, and Tools
* pages 73 - 78
*/
#include "global.h"
struct entry keywords[] = {
"div", DIV,
"mod", MOD,
0, 0
};
init() /* loads keywords into symtable */
{
struct entry *p;
for (p = keywords; p->token; p++)
insert(p->lexptr, p->token);
}
SHAR_EOF
echo shar: extracting "'lexer.c'" '(1022 characters)'
sed 's/^ X//' << \SHAR_EOF > 'lexer.c'
/*
* Aho, A. V., R Sethi, and J. D. Ullman [1986]
* Compilers Principles, Techniques, and Tools
* pages 73 - 78
*/
#include "global.h"
char lexbuf[BSIZE];
int lineno = 1;
int tokenval = NONE;
int lexan() /* lexical analyzer */
{
int t;
while (1) {
t = getchar();
if (t == ' ' || t == '\t')
; /* strip out white space */
else if (t == '\n')
lineno = lineno + 1;
else if (isdigit(t)) { /* t is a digit */
ungetc(t, stdin);
scanf("%d", &tokenval);
return NUM;
}
else if (isalpha(t)) { /* t is a letter */
int p, b = 0;
while (isalnum(t)) { /* t is alphanumeric */
lexbuf[b] = t;
t = getchar();
b = b + 1;
if (b >= BSIZE)
error("compiler error");
}
lexbuf[b] = EOS;
if (t != EOF)
ungetc(t, stdin);
p = lookup(lexbuf);
if (p == 0)
p = insert(lexbuf, ID);
tokenval = p;
return symtable[p].token;
}
else if (t == EOF)
return DONE;
else {
tokenval = NONE;
return t;
}
}
}
SHAR_EOF
echo shar: extracting "'main.c'" '(220 characters)'
sed 's/^ X//' << \SHAR_EOF > 'main.c'
/*
* Aho, A. V., R Sethi, and J. D. Ullman [1986]
* Compilers Principles, Techniques, and Tools
* pages 73 - 78
*/
#include "global.h"
main()
{
init();
parse();
exit(0); /* successful termination */
}
SHAR_EOF
echo shar: extracting "'makefile'" '(338 characters)'
sed 's/^ X//' << \SHAR_EOF > 'makefile'
SRCS = lexer.c parser.c emitter.c symbol.c init.c error.c main.c
OBJS = lexer.o parser.o emitter.o symbol.o init.o error.o main.o
HDRS = global.h /usr/include/stdio.h /usr/include/ctype.h
PROG = trans
all: $(PROG)
trans: $(HDRS) $(OBJS)
$(CC) $(CFLAGS) -o $(PROG) $(OBJS)
clean:
-rm $(OBJS) $(PROG)
lint:
lint -lc $(SRCS)
SHAR_EOF
echo shar: extracting "'parser.c'" '(1108 characters)'
sed 's/^ X//' << \SHAR_EOF > 'parser.c'
/*
* Aho, A. V., R Sethi, and J. D. Ullman [1986]
* Compilers Principles, Techniques, and Tools
* pages 73 - 78
*/
#include "global.h"
int lookahead;
parse() /* parses and translates expression list */
{
lookahead = lexan();
while (lookahead != DONE) {
expr(); match(';');
}
}
expr()
{
int t;
term();
while (1)
switch (lookahead) {
case '+': case '-':
t = lookahead;
match(lookahead); term(); emit(t,NONE);
continue;
default:
return;
}
}
term()
{
int t;
factor();
while (1)
switch(lookahead) {
case '*': case '/': case DIV: case MOD:
t = lookahead;
match(lookahead); factor(); emit(t,NONE);
continue;
default:
return;
}
}
factor()
{
switch(lookahead) {
case '(':
match('('); expr(); match(')'); break;
case NUM:
emit(NUM, tokenval); match(NUM); break;
case ID:
emit(ID, tokenval); match(ID); break;
default:
error("syntax error");
}
}
match(t)
int t;
{
if (lookahead == t)
lookahead = lexan();
else error("syntax error");
}
SHAR_EOF
echo shar: extracting "'symbol.c'" '(1123 characters)'
sed 's/^ X//' << \SHAR_EOF > 'symbol.c'
/*
* Aho, A. V., R Sethi, and J. D. Ullman [1986]
* Compilers Principles, Techniques, and Tools
* pages 73 - 78
*/
#include "global.h"
#define STRMAX 999 /* size of lexemes array */
#define SYMMAX 100 /* size of symtable */
char lexemes[STRMAX];
int lastchar = -1; /* last used position in lexemes */
struct entry symtable[SYMMAX];
int lastentry = 0; /* last used position in symtable */
int lookup(s) /* returns position of entry for s */
char s[];
{
int p;
for (p = lastentry; p > 0; p = p - 1)
if (strcmp(symtable[p].lexptr, s) == 0)
return p;
return 0;
}
int insert(s, tok) /* returns position of entry for s */
char s[];
int tok;
{
int len;
len = strlen(s); /* strlen computes length of s */
if (lastentry + 1 >= SYMMAX)
error("symbol table full");
if (lastchar + len + 1 >= STRMAX)
error("lexemes array full");
lastentry = lastentry + 1;
symtable[lastentry].token = tok;
symtable[lastentry].lexptr = &lexemes[lastchar + 1];
lastchar = lastchar + len + 1;
strcpy(symtable[lastentry].lexptr, s);
return lastentry;
}
SHAR_EOF
# End of shell archive
exit 0
More information about the Comp.sources.unix
mailing list