Scanner for Ada parser
David Norris
david at ssc-vax.UUCP
Wed Dec 7 07:12:25 AEST 1983
An earlier article, containing my revised grammar for MIL-STD Ada,
generated some interest. Rather than send the C source of the lexical
analyzer via mail, I am posting it so all may benefit. There is much room for
improvement, both in the grammar (error recovery) and the scanner. Comments
or improvements greatly appreciated.
-- Dave Norris
-- ..!uw-beaver!ssc-vax!david
-----------------------------------------------------------------------------
#include <stdio.h>
#include <ctype.h>
#include "y.tab.c"
char id[20]; /* identifier from yylex */
int inum; /* integer from yylex */
double rnum; /* real number from yylex */
int i; /* numeric value of character in number */
int base; /* base of based number */
int sleng; /* string length */
#define MAXLINE 132
int ch; /* last character read from source program */
char line[MAXLINE]; /* current line of source text */
int cc; /* character counter */
int lc; /* program location counter */
int ll ; /* length of current line */
int endoffile; /* end of file indicator */
struct key {
char *keyword;
int keyvalue;
} keywordtab[] = {
"ABORT", ABORT,
"ABS", ABS,
"ACCEPT", ACCEPT,
"ACCESS", ACCESS,
"ALL", ALL,
"AND", AND,
"ARRAY", ARRAY,
"AT", AT,
"BEGIN", BEGIN,
"BODY", BODY,
"CASE", CASE,
"CONSTANT", CONSTANT,
"DECLARE", DECLARE,
"DELAY", DELAY,
"DELTA", DELTA,
"DIGITS", DIGITS,
"DO", DO,
"ELSE", ELSE,
"ELSIF", ELSIF,
"END", END,
"ENTRY", ENTRY,
"EXCEPTION", EXCEPTION,
"EXIT", EXIT,
"FOR", FOR,
"FUNCTION", FUNCTION,
"GENERIC", GENERIC,
"GOTO", GOTO,
"IF", IF,
"IN", IN,
"IS", IS,
"LIMITED", LIMITED,
"LOOP", LOOP,
"MOD", MOD,
"NEW", NEW,
"NOT", NOT,
"NULL", NULL,
"OF", OF,
"OR", OR,
"OTHERS", OTHERS,
"OUT", OUT,
"PACKAGE", PACKAGE,
"PRAGMA", PRAGMA,
"PRIVATE", PRIVATE,
"PROCEDURE", PROCEDURE,
"RAISE", RAISE,
"RANGE", RANGE,
"RECORD", RECORD,
"REM", REM,
"RENAMES", RENAMES,
"RETURN", RETURN,
"REVERSE", REVERSE,
"SELECT", SELECT,
"SEPARATE", SEPARATE,
"SUBTYPE", SUBTYPE,
"TASK", TASK,
"TERMINATE", TERMINATE,
"THEN", THEN,
"TYPE", TYPE,
"USE", USE,
"WHEN", WHEN,
"WHILE", WHILE,
"WITH", WITH,
"XOR", XOR
};
#define NKEYS (sizeof(keywordtab) / sizeof(struct key))
main()
{
lc = 0; /* reset line count */
ll = 0; /* reset line length */
cc = 0; /* reset character count */
ch = ' '; /* reset ch (fool yylex into getting first token */
endoffile = 0; /* reset end of file indicator */
printf("Ada compiler\n\n");
if (yyparse() == 0)
printf("\n%d syntax error(s)",yynerrs);
else
printf("\ncompilation aborted.\n");
}
yyerror(s) char *s;
{
int i;
printf("**-=> ");
for (i = 0; i < cc; i++) printf(" ");
printf("^ ");
printf("%s\n",s);
}
binary(word)
char *word;
{
int low,high,mid,cond;
low = 0;
high = NKEYS - 1;
while (low <= high) {
mid = (low+high) / 2;
if ((cond = strcmp(word,keywordtab[mid].keyword)) < 0)
high = mid - 1;
else if (cond > 0)
low = mid + 1;
else
return(keywordtab[mid].keyvalue);
}
return(-1);
}
nextch()
{
int lim;
if (cc == ll) {
cc = -1;
ll = 0;
lim = MAXLINE;
while (--lim > 0 && (ch=getchar())!=EOF && ch!='\n')
line[ll++] = ch;
if (ch == EOF)
return EOF;
if (ch == '\n')
line[ll++] = ch;
line[ll] = '\0';
printf("%5d : %s",++lc,line);
}
ch = line[++cc];
}
/* compute value of character ch using base. return true if */
/* value is acceptable in the given number base. */
inbase()
{
if (isdigit(ch))
i = ch - '0';
else if (toupper(ch) >= 'A' && toupper(ch) <= 'F')
i = ch - 'A' + 10;
else
return(0);
if (i < base)
return(1);
else
return(0);
}
/* return integer value of the string of input digits. */
getinteger()
{
int num;
num = 0;
while (inbase()) {
num = num * base + i;
nextch();
if (ch == '_') {
nextch();
if (inbase() == 0)
yyerror("extended digit expected");
}
}
return(num);
}
getfraction()
{
double num;
double divi;
divi = 1.0 / base;
while (inbase()) {
num = num + i * divi;
divi = divi / base;
nextch();
if (ch == '_') {
nextch();
if (inbase() == 0)
yyerror("extended digit expected");
}
}
return(num);
}
yylex()
{
int k;
char based_ch; /* character denoting based literal; either # or : */
int exponent; /* integer exponent of numeric literal */
int sign; /* sign of numeric literal */
/* skip white space */
while (ch == ' ' || ch == '\n' || ch == '\t' || ch == '\0')
nextch();
/* check for alpha */
if (isalpha(ch)) {
k = 0;
while (isalpha(ch) || isdigit(ch)) {
if (isalpha(ch)) ch = toupper(ch);
id[k++] = ch;
if (ch == '_') {
nextch();
if (isalpha(ch) || isdigit(ch))
id[k++] = '_';
else
yyerror("letter or digit expected");
} else
nextch();
}
id[k] = '\0';
k = binary(id);
if (k == -1)
return(IDENTIFIER);
else
return(k);
}
/* check for number */
else if (isdigit(ch)) {
base = 10;
inum = getinteger();
if (ch == '#' || ch == ':') {
/* process based number */
based_ch = ch;
nextch();
base = inum;
inum = getinteger();
if (ch == based_ch) nextch();
else if (ch == '.') {
rnum = inum + getfraction();
if (ch == based_ch) nextch();
else
yyerror("mismatched # or : in based number");
}
}
else if (ch == '.') {
/* process real number */
nextch();
if (ch == '.')
--cc;
else
rnum = inum + getfraction();
}
if (ch == 'E' || ch == 'e') {
/* process exponent */
nextch();
if (ch == '+')
nextch();
else if (ch == '-') {
sign = -1;
nextch();
}
exponent = getinteger();
}
return(NUMERIC_LITERAL);
}
else {
switch(ch) {
case '"' :
while(1) {
nextch();
if (ch == '"') {
nextch();
if (ch != '"')
return(STRING_LITERAL);
}
}
case '\'' :
nextch();
if (isalpha(ch) || ch == '(') {
nextch();
if (ch == '\'') {
nextch();
return(CHARACTER_LITERAL);
} else {
--cc;
return(QUOTE);
}
} else {
nextch();
if (ch != '\'')
yyerror("quote expected");
else
nextch();
return(CHARACTER_LITERAL);
}
case ':' :
nextch();
if (ch == '=') {
nextch();
return(REPLACEMENT);
}
else
return(COLON);
case '<' :
nextch();
if (ch == '<') {
nextch();
return(LEFT_LABEL);
}
else if (ch == '=') {
nextch();
return(LESS_EQUAL);
}
else if (ch == '>') {
nextch();
return(BOX);
}
else
return(LESS_THAN);
case '>' :
nextch();
if (ch == '>') {
nextch();
return(RIGHT_LABEL);
}
else if (ch == '=') {
nextch();
return(GREATER_EQUAL);
}
else
return(GREATER_THAN);
case '.' :
nextch();
if (ch == '.') {
nextch();
return(ELLIPSIS);
}
else
return(PERIOD);
case '-' :
nextch();
if (ch == '-') {
cc = ll;
ch = ' ';
return(yylex());
}
else
return(MINUS);
case '*' :
nextch();
if (ch == '*') {
nextch();
return(DOUBLE_STAR);
}
else
return(SPLAT);
case '=' :
nextch();
if (ch == '>') {
nextch();
return(ARROW);
}
else
return(EQUAL_TO);
case '/' :
nextch();
if (ch == '=') {
nextch();
return(NOT_EQUAL_TO);
}
else
return(SLASH);
case '+' :
nextch();
return(PLUS);
case '|' :
nextch();
return(BAR);
case '&' :
nextch();
return(AMPERSAND);
case ';' :
nextch();
return(SEMICOLON);
case ',' :
nextch();
return(COMMA);
case '(' :
nextch();
return(LEFT_PAREN);
case ')' :
nextch();
return(RIGHT_PAREN);
case EOF :
if (endoffile == 0) {
endoffile = 1;
return(EOF);
}
else {
yyerror("unexpected end of file");
exit();
}
default :
yyerror("invalid character");
return(yylex());
}
}
}
More information about the Comp.sources.unix
mailing list