Xref for yacc

Thu Jul 10 08:52:08 AEST 1986

	The following source forms the xref program for syntactically correct
Yacc files and runs on UN*X 4.2. All tokens and non-terminals in the grammar
are located, together with a list of the line numbers at which they occur. The
output is a numbered listing of the Yacc grammar upto the second '%%' or EOF,
followed by the xref table. Should a syntax error occur before normal termin-
ation, the grammar listing ends and as much of the table as has been constructed
is printed. All comments /* .. */ , %{ .. %} and actions { .. } are ignored,
although they appear in the listing.
	Table data is stored in "table[]", which holds MAXINTS terminal /
non-terminal names, each of length <= MAXCHARS. A total of MAXDEFS defining (rhs
of rule) occurances can be held, and a total of MAXOCCS lhs occurances. Each
identifier which may be the start token has the message held in "start_maybe[]"
printed - likewise all possible tokens are accompanied by "token_maybe[]". All
defining occurances are printed following "declared_at_mark[]", and all other
occurances following "occurs_at_mark[]".

	An example of the output is given below.

   1 :	%{
   2 :	# include <ctype.h>
   3 :	%}
   4 :	
   5 :	
   6 :		/*******************************************************\
   7 :		*							*
   8 :		*	Date : Fri Jul  4 00:50:04 BST 1986		*
   9 :		*							*
  10 :		\*******************************************************/
  11 :	
  12 :	%token	IDENTIFIER START_TOKEN NUMBER
  13 :	
  14 :	%start	small
  15 :	
  16 :	%%
  17 :	
  18 :	small
  19 :		:	start middle
  20 :				{
  21 :					printf("\n\nMiddle");
  22 :					yyclearin;
  23 :					return(0);
  24 :				}
  25 :			end postamble
  26 :		;
  27 :	
  28 :	start
  29 :		:	/* empty */
  30 :		|	START_TOKEN
  31 :		;
  32 :	
  33 :	middle
  34 :		:
  35 :		;
  36 :	
  37 :	middle
  38 :		:	MID_TOKEN
  39 :		|	/* empty */
  40 :		;
  41 :	
  42 :	%%

'small' -
~~~~~~~		*18 , never occurs on rhs of rule - start rule?
'start' -
~~~~~~~		*28 , 19
'middle' -
~~~~~~~		*33 , *37, 19
'end' -
~~~~~~~		is not declared - token??, 25
'postamble' -
~~~~~~~		is not declared - token??, 25
'START_TOKEN' -
~~~~~~~		is not declared - token??, 30
'MID_TOKEN' -
~~~~~~~		is not declared - token??, 38

	End of X-ref
	~~~~~~~~~~~~

	The text below should be stored in the file

	pack.out

and can be converted from the single file to the three files

	xref-grammar, xref-regular_expr, xref-line.h

by issuing the command

	sh pack.out

	Issue the commands

	lex xref-regular_expr
	yacc -vd xref-grammar
	cc y.tab.c -o xref

to produce an executable version and use as any UN*X command.

<---------------------------------- Cut here ---------------------------------->
#!/bin/sh
echo 'Start of pack.out, part 01 of 01:'
echo 'x - xref-grammar'
sed 's/^X//' > xref-grammar << '/'
X%{
X
X# include <ctype.h>
X# include <stdio.h>
X
X%}
X
X
X	/*******************************************************\
X	*							*
X	*	X_reference program for YACC files		*
X	*	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~		*
X	*							*
X	*	Cathy Taylor,					*
X	*	c/o Department of Computing,			*
X	*	University of Lancaster,			*
X	*	Bailrigg, Lancaster, England.			*
X	*							*
X	*	Date : Fri Jul  4 00:50:04 BST 1986		*
X	*							*
X	\*******************************************************/
X
X
X	/***********************************************\
X	*						*
X	*	Yacc Input Syntax			*
X	*	~~~~~~~~~~~~~~~~~			*
X	*						*
X	*	Adapted from the document		*
X	*	'YACC - Yet Another Compiler Compiler'	*
X	*		by				*
X	*	   S. C. Johnson			*
X	*						*
X	*	Date: Tue Jul  1 02:40:18 BST 1986	*
X	*						*
X	\***********************************************/
X
X
X%token	IDENTIFIER CHARACTER NUMBER
X%token	LEFT RIGHT NONASSOC TOKEN PREC TYPE START UNION
X%token	PER PERCURL ACT
X%token	COLON SEMICOLON COMMA OR LESS GREATER
X
X%start	spec
X
X%%
X
Xspec
X	:	defs PER rules tail
X			{
X				printf("\n\n");
X				yyclearin;
X				return(0);
X			}
X	;
X
Xtail
X	:	/* empty */
X	|	PER 
X	;
X
Xdefs
X	:	/* empty */
X	|	def_bk
X	;
X
Xdef_bk
X	:	def
X	|	def_bk def
X	;
X
Xdef
X	:	START IDENTIFIER
X	|	UNION
X	|	PERCURL
X	|	rword tag nlist
X	;
X
Xrword
X	:	TOKEN
X	|	LEFT
X	|	RIGHT
X	|	NONASSOC
X	|	TYPE
X	;
X
Xtag
X	:	/* empty */
X	|	LESS IDENTIFIER GREATER
X	;
X
Xnlist
X	:	nmno
X	|	nlist opt_comma nmno
X	;
X
Xopt_comma
X	:	/* empty */
X	|	COMMA
X	;
X
Xnmno
X	:	IDENTIFIER opt_num
X	;
X
Xopt_num
X	:	/* empty */
X	|	NUMBER
X	;
Xrules
X	:	rule
X	|	rules rule
X	;
X
Xrule
X	:	IDENTIFIER
X		{
X			yyaction(ON_C_IDENT,line);
X		}
X		COLON body SEMICOLON
X	;
X
Xbody
X	:	body_block
X	|	body OR body_block
X	;
X
Xbody_block
X	:	/* empty */
X	|	body_block body_entity
X	;
X
Xbody_entity
X	:	opt_prec id_ent
X	|	ACT
X	;
X
Xid_ent
X	:	IDENTIFIER
X		{
X			yyaction(ON_IDENT,line);
X		}
X	|	CHARACTER
X	;
X
Xopt_prec
X	:	/* empty */
X	|	PREC
X	;
X
X
X%%
X
X# include	<stdio.h>
X# include	"lex.yy.c"
X# include	"xref-line.h"
X
X#define	ON_C_IDENT	000
X#define	ON_IDENT	001
X
X#define	MAXIDENTS	1000
X#define MAXCHARS	100
X#define	MAXDEFS		20
X#define	MAXOCCS		1000
X
Xstruct	IREC {
X		char	ident[MAXCHARS];
X		int	desc[MAXDEFS];
X		int	nextdesc;
X		int	occ[MAXOCCS];
X		int	nextocc;
X		} idents[MAXIDENTS];
X
X
Xyyaction (action,ln)
Xint	action;
Xint	ln;
X{
X	int	id;
X	
X	id = 0;
X	while (	strcmp(idents[id].ident,yytext) != 0 && strcmp(idents[id].ident,"") != 0 )
X		id++;
X
X	if ( strcmp(idents[id].ident, yytext) != 0 )
X	{
X
X	/*******************************************************\
X	*							*
X	*	New non-terminal to be stored.			*
X	*	No distinction is made here between tokens	*
X	*	and (non) terminals.				*
X	*							*
X	\*******************************************************/
X
X		strcpy(idents[id].ident,yytext);
X		idents[id].nextdesc = 0;
X		idents[id].nextocc = 0;
X	} /* fi */
X
X	switch (action) {
X	case ON_C_IDENT:
X
X	/*******************************************************\
X	*							*
X	*	Add to list of definition lines.		*
X	*							*
X	\*******************************************************/
X
X		idents[id].desc[idents[id].nextdesc++] = ln;
X		break;
X
X	case ON_IDENT:
X				
X	/*******************************************************\
X	*							*
X	*	Add to list of occurance lines.			*
X	*							*
X	\*******************************************************/
X
X		idents[id].occ[idents[id].nextocc++] = ln;
X		break;
X
X	default		:
X		fprintf (stdout, "yyaction: invalid action\n");
X		return (-1);
X		} /* hctiws */
X	return (0);
X} /* corp */
X
Xnline(ln)
Xint	ln;
X{
X	printf("%4d :\t",ln);
X}
X
X
X	char	declared_at_mark[] = "*";
X	char	occurs_at_mark[] = "";
X	char	token_maybe[] = "is not declared - token??";
X	char	start_maybe[] = "never occurs on rhs of rule - start rule?";
X	
X/*
X*	Strings for output
X*/
X
Xmain ()
X{
X	int	ind,id;
X
X	strcpy(idents[0].ident,"");
X
X	line = 0;
X	nline(++line);
X
X	yyparse ();
X
X	id = 0;
X	while( strcmp(idents[id].ident,"") != 0 )
X	{
X		printf("\n'%s' -\n~~~~~~~\t\t",idents[id].ident);
X		if (idents[id].nextdesc == 0 )
X		    printf("%s",token_maybe);
X		else
X		{
X		    ind = 0;
X		    printf("*%d ",idents[id].desc[ind++]);
X		    for ( ind=1; ind < idents[id].nextdesc ; ind++)
X			printf(", %s%d",declared_at_mark,idents[id].desc[ind]);
X		}
X		if (idents[id].occ[0] == 0)
X		    printf(", %s",start_maybe);
X		else
X		{
X		    for ( ind = 0; ind < idents[id].nextocc ; ind++ )
X			printf(", %s%d",occurs_at_mark,idents[id].occ[ind]);
X		}
X		id++;
X	}
X	printf("\n\n\tEnd of X-ref\n\t~~~~~~~~~~~~\n");
X} /* niam */
X
Xyyerror(mess)
Xchar	*mess;
X{
X	printf("\n\t%s\n",mess);
X} /* corp */
/
echo 'x - xref-regular_expr'
sed 's/^X//' > xref-regular_expr << '/'
X
X	/*******************************************************\
X	*							*
X	*	X_ref for YACC - LEX file			*
X	*	~~~~~~~~~~~~~~~~~~~~~~~~~			*
X	*							*
X	*	Date: Tue Jul  1 03:36:21 BST 1986		*
X	*							*
X	*	Cathy Taylor,					*
X	*	c/o Department of Computing,			*
X	*	University of Lancaster,			*
X	*	Bailrigg, Lancaster, England.			*
X	*							*
X	\*******************************************************/
X
X%{
X
X# include	<stdio.h>
X# include	"xref-line.h"
X
X#define		TRUE 1
X#define 	FALSE 0
X
Xint	recognised;
Xchar	c;
X
X%}
X
X	/* abbreviations */
X
Xdigit		[0-9]
Xu_case		[A-Z]
Xl_case		[a-z]
Xid_char		[A-Za-z0-9_]
Xletter		[A-Za-z]
Xwhite		[\t ]
X
X
X%%
X
X"/*"			{
X				ECHO;
X				recognised = FALSE;
X				c = nextchar();
X				while (recognised == FALSE)
X				{
X					while (c != '*')
X						c = nextchar();
X					c = nextchar();
X					if (c == '\/')
X						recognised = TRUE;
X				}
X			}
X"%{"			{
X				ECHO;
X				recognised = FALSE;
X				c = nextchar();
X				while (recognised == FALSE)
X				{
X					while (c != '\%')
X						c = nextchar();
X					c = nextchar();
X					if (c == '\}')
X						recognised = TRUE;
X				}
X				return(PERCURL);
X			}
X"{"			{
X
X/*
X*	Although LEX can cope with the full definition,
X*	( "{"[^\}]*"}" ) this may overrun the lex buffer (200 chars).
X*	Thus this routine.
X*/
X
X				ECHO;
X				c = nextchar();
X				while (c != '\}')
X					c = nextchar();
X				return(ACT);
X			}
X{letter}{id_char}*	{
X				ECHO;
X				return(IDENTIFIER);
X			}
X"'"\\?[^']+"'"		{
X				ECHO;
X				return(CHARACTER);
X			}
X{white}+		{	
X				ECHO;
X			}
X{digit}+		{
X				ECHO;
X				return(NUMBER);
X			}
X"%"{white}*"left"	{
X				ECHO;
X				return(LEFT);
X			}
X"%"{white}*"right"	{
X				ECHO;
X				return(RIGHT);
X			}
X"%"{white}*"nonassoc"	{
X				ECHO;
X				return(NONASSOC);
X			}
X"%"{white}*"token"	{
X				ECHO;
X				return(TOKEN);
X			}
X"%"{white}*"prec"	{
X				ECHO;
X				return(PREC);
X			}
X"%"{white}*"type"	{
X				ECHO;
X				return(TYPE);
X			}
X"%"{white}*"start"	{
X				ECHO;
X				return(START);
X			}
X"%"{white}*"union"	{
X				ECHO;
X				return(UNION);
X			}
X"%%"			{
X				ECHO;
X				return(PER);
X			}
X":"			{
X				ECHO;
X				return(COLON);
X			}
X";"			{
X				ECHO;
X				return(SEMICOLON);
X			}
X","			{
X				ECHO;
X				return(COMMA);
X			}
X"|"			{
X				ECHO;
X				return(OR);
X			}
X"<"			{
X				ECHO;
X				return(LESS);
X			}
X">"			{
X				ECHO;
X				return(GREATER);
X			}
X"\n"			{
X				ECHO;
X				nline(++line);
X			}
X
X%%
X
Xyywrap()
X{
X	/* wrap-up procedure */
X	return(1);
X}
X
Xnextchar()
X{
X	char	c;
X	
X	c = input();
X	printf("%c",c);
X	if (c == '\n')
X		nline(++line);
X	return(c);
X}
/
echo 'x - xref-line.h'
sed 's/^X//' > xref-line.h << '/'
X
X	int	line;
/
echo 'Part 01 of pack.out complete.'
exit
<---------------------------------- Cut here ---------------------------------->

-- 

	 - Marxist unite!
	 - Comrades unite!
	 - Bad-spellers of the world untie!

UUCP:  ...!seismo!mcvax!ukc!dcl-cs!cathy
DARPA: cathy%lancs.comp at ucl-cs	| Post: University of Lancaster,
JANET: cathy at uk.ac.lancs.comp	|	Department of Computing,
Phone: +44 524 65201 ext ????	|	Bailrigg, Lancaster, LA1 4YR, UK.