yaccable grammars for C and C++
USENET News
news at awdprime.UUCP
Fri Mar 23 12:30:45 AEST 1990
In article <1435 at io.UUCP> jar at io.UUCP (Jim Roskind x5570) writes:
Thanks for the grammers. Some people requested a lexer. Here is mine
that I quick hacked up. I'm sure that someone can use this build a
complete one for your grammers.
Things to fix:
o It doesn't return same token values per jim's grammers (easy fix).
o You'll need to fix it to return different values for identifiers,
enums, and typedefs (see comment at {ident}).
o It doesn't support floating point yet either (easy fix, I have {float}
and {exp} defined but not used or tested).
o You'll want to handle cases like "test \" foobar" in the
string handling section (look for STRING and CHAR_CONST).
I couldn't figure out how to do it in 30 seconds or less so I'll
leave it up to you.
With this I was able to parse a fairly large C program that I had removed
all the typedefs from (no enums) using the grammer right out of the back
of _The C Programming Language_ Second Edition.
ENJOY!!!!! --- don't forget to remove the .sig at the bottom :-)
-------------- clexer.l for ANSI C ------------------
%{
#include <stdio.h>
#include "y.tab.h"
#define STRDUP(X) ((char *)strcpy(malloc(strlen(X)+1),X))
extern int yychar;
static int column = 0;
static int linenum = 1;
#define count(x) counter(x)
#ifndef YYDEBUG
int yydebug = 0;
#else
int yydebug = 1;
#endif
%}
alpha [a-zA-Z]
digit [0-9]
special [\_]
ident (({alpha}|{special})({alpha}|{digit}|{special})*)
int ({digit}+)
exp ([Ee][-+]?{digit}+)
float ([-+]?{digit}+\.?{digit}*)
%p 3000
%%
^\#.* { count(0); /* skip cpp lines */ }
[\ \n\t\v\f]+ { count(0); /* skip white space */ }
"/*" { count(1); skipcomments(); }
"..." { count(1); return DOTDOTDOT; }
">=" { count(1); return GE; }
"<=" { count(1); return LE; }
"!=" { count(1); return NOTEQU; }
"==" { count(1); return EQU; }
"*=" { count(1); return MULTEQU; }
"/=" { count(1); return DIVEQU; }
"%=" { count(1); return MODEQU; }
"+=" { count(1); return INCEQU; }
"-=" { count(1); return DECEQU; }
"<<=" { count(1); return SHIFTLEFTEQU; }
">>=" { count(1); return SHIFTRIGHTEQU; }
"&=" { count(1); return ANDEQU; }
"|=" { count(1); return OREQU; }
"^=" { count(1); return XOREQU; }
"<<" { count(1); return SHIFTLEFT; }
">>" { count(1); return SHIFTRIGHT; }
"++" { count(1); return INC; }
"--" { count(1); return DEC; }
"->" { count(1); return POINTS; }
"&&" { count(1); return LOGICALAND; }
"||" { count(1); return LOGICALOR; }
"(" { count(1); return '('; }
"," { count(1); return ','; }
")" { count(1); return ')'; }
";" { count(1); return ';'; }
"{" { count(1); return '{'; }
"}" { count(1); return '}'; }
"[" { count(1); return '['; }
"]" { count(1); return ']'; }
"*" { count(1); return '*'; }
"/" { count(1); return '/'; }
"+" { count(1); return '+'; }
"-" { count(1); return '-'; }
"%" { count(1); return '%'; }
"^" { count(1); return '^'; }
"&" { count(1); return '&'; }
"?" { count(1); return '?'; }
":" { count(1); return ':'; }
"!" { count(1); return '!'; }
"." { count(1); return '.'; }
"~" { count(1); return '~'; }
"<" { count(1); return '<'; }
">" { count(1); return '>'; }
"." { count(1); return '.'; }
"=" { count(1); return '='; }
if { count(1); return IF; }
else { count(1); return ELSE; }
while { count(1); return WHILE; }
do { count(1); return DO; }
for { count(1); return FOR; }
switch { count(1); return SWITCH; }
case { count(1); return CASE; }
default { count(1); return DEFAULT; }
goto { count(1); return GOTO; }
continue { count(1); return CONTINUE; }
break { count(1); return BREAK; }
return { count(1); return RETURN; }
sizeof { count(1); return SIZEOF; }
auto { count(1); return AUTO; }
register { count(1); return REGISTER; }
static { count(1); return STATIC; }
extern { count(1); return EXTERN; }
typedef { count(1); return TYPEDEF; }
void { count(1); return VOID; }
char { count(1); return CHAR; }
short { count(1); return SHORT; }
int { count(1); return INT; }
long { count(1); return LONG; }
float { count(1); return FLOAT; }
double { count(1); return DOUBLE; }
unsigned { count(1); return UNSIGNED; }
enum { count(1); return ENUM; }
const { count(1); return CONST; }
volatile { count(1); return VOLATILE; }
struct { count(1); return STRUCT; }
union { count(1); return UNION; }
\'.*\' {
count(1);
yylval.strval = STRDUP(yytext+1);
yylval.strval[strlen(yylval.strval)-1] = 0;
/* return STRING without quotes */
return CHAR_CONST;
}
{int} {
count(1);
yylval.intval = atoi(yytext);
return INTEGER_CONST;
}
\".*\" {
count(1);
yylval.strval = STRDUP(yytext+1);
yylval.strval[strlen(yylval.strval)-1] = 0;
/* return STRING without quotes */
return STRING;
}
{ident} {
count(1);
yylval.strval = STRDUP(yytext);
/*
* we need to hash this and make a structure that tells us
* what is going on so we know what to do with it
* is it an enum, typedef, or identifier
*
* BTW: you'll have to cooperate with the grammer to do this.
*/
return IDENTIFIER;
}
. { count(1); return ERROR; }
%%
yywrap() {return(1);}
/* Skip over comments. */
skipcomments()
{
char c;
while (1) {
while ((c = input()) != '*')
if (c == '\n') {
column = 0;
linenum++;
}
else if (c == '\t')
column += 8 - (column % 8);
else
column++;
if ((c = input()) == '/') {
column++;
#ifdef LEXDEBUG
printf ("symbol found: %s\n", "*/");
#endif
return;
}
unput(c);
}
}
/*ARGSUSED*/
counter (notwhite)
{
register char *s;
#ifdef LEXDEBUG
if (notwhite)
printf ("symbol found: %s\n", yytext);
#endif
for (s = yytext; *s; s++)
if (*s == '\n') {
column = 0;
linenum++;
}
else if (*s == '\t')
column += 8 - (column % 8);
else
column++;
}
yyerror (s)
char *s;
{
fprintf (stderr, "YYERROR: %s: line %d col %d\n", s, linenum, column);
fprintf (stderr, "YYERROR: yytext=`%s' symbol was (%d)\n", yytext, yychar);
}
-- sanders
For every message of the day, a new improved message will arise to overcome it.
Reply-To: cs.utexas.edu!ibmaus!auschs!sanders.austin.ibm.com!sanders (ugh!)
More information about the Comp.lang.c
mailing list