v14i083: Flex, a lex replacement, Part05/05
Rich Salz
rsalz at uunet.uu.net
Wed May 4 08:05:12 AEST 1988
Submitted-by: Vern Paxson <vern at lbl-csam.arpa>
Posting-number: Volume 14, Issue 83
Archive-name: flex/part05
#! /bin/sh
# This is a shell archive. Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file". To overwrite existing
# files, type "sh file -c". You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g.. If this archive is complete, you
# will see the following message at the end:
# "End of archive 5 (of 5)."
# Contents: tblcmp.c
# Wrapped by rsalz at fig.bbn.com on Tue May 3 17:31:35 1988
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'tblcmp.c' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'tblcmp.c'\"
else
echo shar: Extracting \"'tblcmp.c'\" \(39351 characters\)
sed "s/^X//" >'tblcmp.c' <<'END_OF_FILE'
X/* tblcmp - table compression routines */
X
X/*
X * Copyright (c) 1987, the University of California
X *
X * The United States Government has rights in this work pursuant to
X * contract no. DE-AC03-76SF00098 between the United States Department of
X * Energy and the University of California.
X *
X * This program may be redistributed. Enhancements and derivative works
X * may be created provided the new works, if made available to the general
X * public, are made available for use by anyone.
X */
X
X#include "flexdef.h"
X
X/* bldtbl - build table entries for dfa state
X *
X * synopsis
X * int state[numecs], statenum, totaltrans, comstate, comfreq;
X * bldtbl( state, statenum, totaltrans, comstate, comfreq );
X *
X * State is the statenum'th dfa state. It is indexed by equivalence class and
X * gives the number of the state to enter for a given equivalence class.
X * totaltrans is the total number of transitions out of the state. Comstate
X * is that state which is the destination of the most transitions out of State.
X * Comfreq is how many transitions there are out of State to Comstate.
X *
X * A note on terminology:
X * "protos" are transition tables which have a high probability of
X * either being redundant (a state processed later will have an identical
X * transition table) or nearly redundant (a state processed later will have
X * many of the same out-transitions). A "most recently used" queue of
X * protos is kept around with the hope that most states will find a proto
X * which is similar enough to be usable, and therefore compacting the
X * output tables.
X * "templates" are a special type of proto. If a transition table is
X * homogeneous or nearly homogeneous (all transitions go to the same
X * destination) then the odds are good that future states will also go
X * to the same destination state on basically the same character set.
X * These homogeneous states are so common when dealing with large rule
X * sets that they merit special attention. If the transition table were
X * simply made into a proto, then (typically) each subsequent, similar
X * state will differ from the proto for two out-transitions. One of these
X * out-transitions will be that character on which the proto does not go
X * to the common destination, and one will be that character on which the
X * state does not go to the common destination. Templates, on the other
X * hand, go to the common state on EVERY transition character, and therefore
X * cost only one difference.
X */
X
bldtbl( state, statenum, totaltrans, comstate, comfreq )
int state[], statenum, totaltrans, comstate, comfreq;
X
X {
X int extptr, extrct[2][CSIZE + 1];
X int mindiff, minprot, i, d;
X int checkcom;
X
X /* If extptr is 0 then the first array of extrct holds the result of the
X * "best difference" to date, which is those transitions which occur in
X * "state" but not in the proto which, to date, has the fewest differences
X * between itself and "state". If extptr is 1 then the second array of
X * extrct hold the best difference. The two arrays are toggled
X * between so that the best difference to date can be kept around and
X * also a difference just created by checking against a candidate "best"
X * proto.
X */
X
X extptr = 0;
X
X /* if the state has too few out-transitions, don't bother trying to
X * compact its tables
X */
X
X if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) )
X mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
X
X else
X {
X /* checkcom is true if we should only check "state" against
X * protos which have the same "comstate" value
X */
X
X checkcom = comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE;
X
X minprot = firstprot;
X mindiff = totaltrans;
X
X if ( checkcom )
X {
X /* find first proto which has the same "comstate" */
X for ( i = firstprot; i != NIL; i = protnext[i] )
X if ( protcomst[i] == comstate )
X {
X minprot = i;
X mindiff = tbldiff( state, minprot, extrct[extptr] );
X break;
X }
X }
X
X else
X {
X /* since we've decided that the most common destination out
X * of "state" does not occur with a high enough frequency,
X * we set the "comstate" to zero, assuring that if this state
X * is entered into the proto list, it will not be considered
X * a template.
X */
X comstate = 0;
X
X if ( firstprot != NIL )
X {
X minprot = firstprot;
X mindiff = tbldiff( state, minprot, extrct[extptr] );
X }
X }
X
X /* we now have the first interesting proto in "minprot". If
X * it matches within the tolerances set for the first proto,
X * we don't want to bother scanning the rest of the proto list
X * to see if we have any other reasonable matches.
X */
X
X if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE )
X { /* not a good enough match. Scan the rest of the protos */
X for ( i = minprot; i != NIL; i = protnext[i] )
X {
X d = tbldiff( state, i, extrct[1 - extptr] );
X if ( d < mindiff )
X {
X extptr = 1 - extptr;
X mindiff = d;
X minprot = i;
X }
X }
X }
X
X /* check if the proto we've decided on as our best bet is close
X * enough to the state we want to match to be usable
X */
X
X if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE )
X {
X /* no good. If the state is homogeneous enough, we make a
X * template out of it. Otherwise, we make a proto.
X */
X
X if ( comfreq * 100 >= totaltrans * TEMPLATE_SAME_PERCENTAGE )
X mktemplate( state, statenum, comstate );
X
X else
X {
X mkprot( state, statenum, comstate );
X mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
X }
X }
X
X else
X { /* use the proto */
X mkentry( extrct[extptr], numecs, statenum,
X prottbl[minprot], mindiff );
X
X /* if this state was sufficiently different from the proto
X * we built it from, make it, too, a proto
X */
X
X if ( mindiff * 100 >= totaltrans * NEW_PROTO_DIFF_PERCENTAGE )
X mkprot( state, statenum, comstate );
X
X /* since mkprot added a new proto to the proto queue, it's possible
X * that "minprot" is no longer on the proto queue (if it happened
X * to have been the last entry, it would have been bumped off).
X * If it's not there, then the new proto took its physical place
X * (though logically the new proto is at the beginning of the
X * queue), so in that case the following call will do nothing.
X */
X
X mv2front( minprot );
X }
X }
X }
X
X
X/* cmptmps - compress template table entries
X *
X * synopsis
X * cmptmps();
X *
X * template tables are compressed by using the 'template equivalence
X * classes', which are collections of transition character equivalence
X * classes which always appear together in templates - really meta-equivalence
X * classes. until this point, the tables for templates have been stored
X * up at the top end of the nxt array; they will now be compressed and have
X * table entries made for them.
X */
X
cmptmps()
X
X {
X int tmpstorage[CSIZE + 1];
X register int *tmp = tmpstorage, i, j;
X int totaltrans, trans;
X
X peakpairs = numtemps * numecs + tblend;
X
X if ( usemecs )
X {
X /* create equivalence classes base on data gathered on template
X * transitions
X */
X
X nummecs = cre8ecs( tecfwd, tecbck, numecs );
X }
X
X else
X nummecs = numecs;
X
X if ( lastdfa + numtemps + 1 >= current_max_dfas )
X increase_max_dfas();
X
X /* loop through each template */
X
X for ( i = 1; i <= numtemps; ++i )
X {
X totaltrans = 0; /* number of non-jam transitions out of this template */
X
X for ( j = 1; j <= numecs; ++j )
X {
X trans = tnxt[numecs * i + j];
X
X if ( usemecs )
X {
X /* the absolute value of tecbck is the meta-equivalence class
X * of a given equivalence class, as set up by cre8ecs
X */
X if ( tecbck[j] > 0 )
X {
X tmp[tecbck[j]] = trans;
X
X if ( trans > 0 )
X ++totaltrans;
X }
X }
X
X else
X {
X tmp[j] = trans;
X
X if ( trans > 0 )
X ++totaltrans;
X }
X }
X
X /* it is assumed (in a rather subtle way) in the skeleton that
X * if we're using meta-equivalence classes, the def[] entry for
X * all templates is the jam template, i.e., templates never default
X * to other non-jam table entries (e.g., another template)
X */
X
X /* leave room for the jam-state after the last real state */
X mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans );
X }
X }
X
X
X
X/* expand_nxt_chk - expand the next check arrays */
X
expand_nxt_chk()
X
X {
X register int old_max = current_max_xpairs;
X
X current_max_xpairs += MAX_XPAIRS_INCREMENT;
X
X ++num_reallocs;
X
X nxt = reallocate_integer_array( nxt, current_max_xpairs );
X chk = reallocate_integer_array( chk, current_max_xpairs );
X
X bzero( (char *) (chk + old_max),
X MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) );
X }
X
X
X/* find_table_space - finds a space in the table for a state to be placed
X *
X * synopsis
X * int *state, numtrans, block_start;
X * int find_table_space();
X *
X * block_start = find_table_space( state, numtrans );
X *
X * State is the state to be added to the full speed transition table.
X * Numtrans is the number of out-transitions for the state.
X *
X * find_table_space() returns the position of the start of the first block (in
X * chk) able to accommodate the state
X *
X * In determining if a state will or will not fit, find_table_space() must take
X * into account the fact that an end-of-buffer state will be added at [0],
X * and an action number will be added in [-1].
X */
X
int find_table_space( state, numtrans )
int *state, numtrans;
X
X {
X /* firstfree is the position of the first possible occurrence of two
X * consecutive unused records in the chk and nxt arrays
X */
X register int i;
X register int *state_ptr, *chk_ptr;
X register int *ptr_to_last_entry_in_state;
X
X /* if there are too many out-transitions, put the state at the end of
X * nxt and chk
X */
X if ( numtrans > MAX_XTIONS_FOR_FULL_INTERIOR_FIT )
X {
X /* if table is empty, return the first available spot in chk/nxt,
X * which should be 1
X */
X if ( tblend < 2 )
X return ( 1 );
X
X i = tblend - numecs; /* start searching for table space near the
X * end of chk/nxt arrays
X */
X }
X
X else
X i = firstfree; /* start searching for table space from the
X * beginning (skipping only the elements
X * which will definitely not hold the new
X * state)
X */
X
X while ( 1 ) /* loops until a space is found */
X {
X if ( i + numecs > current_max_xpairs )
X expand_nxt_chk();
X
X /* loops until space for end-of-buffer and action number are found */
X while ( 1 )
X {
X if ( chk[i - 1] == 0 ) /* check for action number space */
X {
X if ( chk[i] == 0 ) /* check for end-of-buffer space */
X break;
X
X else
X i += 2; /* since i != 0, there is no use checking to
X * see if (++i) - 1 == 0, because that's the
X * same as i == 0, so we skip a space
X */
X }
X
X else
X ++i;
X
X if ( i + numecs > current_max_xpairs )
X expand_nxt_chk();
X }
X
X /* if we started search from the beginning, store the new firstfree for
X * the next call of find_table_space()
X */
X if ( numtrans <= MAX_XTIONS_FOR_FULL_INTERIOR_FIT )
X firstfree = i + 1;
X
X /* check to see if all elements in chk (and therefore nxt) that are
X * needed for the new state have not yet been taken
X */
X
X state_ptr = &state[1];
X ptr_to_last_entry_in_state = &chk[i + numecs + 1];
X
X for ( chk_ptr = &chk[i + 1]; chk_ptr != ptr_to_last_entry_in_state;
X ++chk_ptr )
X if ( *(state_ptr++) != 0 && *chk_ptr != 0 )
X break;
X
X if ( chk_ptr == ptr_to_last_entry_in_state )
X return ( i );
X
X else
X ++i;
X }
X }
X
X
X/* genctbl - generates full speed compressed transition table
X *
X * synopsis
X * genctbl();
X */
X
genctbl()
X
X {
X register int i;
X
X /* table of verify for transition and offset to next state */
X printf( "static struct yy_trans_info yy_transition[%d] =\n",
X tblend + numecs + 1 );
X printf( " {\n" );
X
X /* We want the transition to be represented as the offset to the
X * next state, not the actual state number, which is what it currently is.
X * The offset is base[nxt[i]] - base[chk[i]]. That's just the
X * difference between the starting points of the two involved states
X * (to - from).
X *
X * first, though, we need to find some way to put in our end-of-buffer
X * flags and states. We do this by making a state with absolutely no
X * transitions. We put it at the end of the table.
X */
X /* at this point, we're guaranteed that there's enough room in nxt[]
X * and chk[] to hold tblend + numecs entries. We need just two slots.
X * One for the action and one for the end-of-buffer transition. We
X * now *assume* that we're guaranteed the only character we'll try to
X * index this nxt/chk pair with is EOB, i.e., 0, so we don't have to
X * make sure there's room for jam entries for other characters.
X */
X
X base[lastdfa + 1] = tblend + 2;
X nxt[tblend + 1] = END_OF_BUFFER_ACTION;
X chk[tblend + 1] = numecs + 1;
X chk[tblend + 2] = 1; /* anything but EOB */
X nxt[tblend + 2] = 0; /* so that "make test" won't show arb. differences */
X
X /* make sure every state has a end-of-buffer transition and an action # */
X for ( i = 0; i <= lastdfa; ++i )
X {
X chk[base[i]] = EOB_POSITION;
X chk[base[i] - 1] = ACTION_POSITION;
X nxt[base[i] - 1] = dfaacc[i].dfaacc_state; /* action number */
X }
X
X for ( i = 0; i <= lastsc * 2; ++i )
X nxt[base[i] - 1] = DEFAULT_ACTION;
X
X dataline = 0;
X datapos = 0;
X
X for ( i = 0; i <= tblend; ++i )
X {
X if ( chk[i] == EOB_POSITION )
X transition_struct_out( 0, base[lastdfa + 1] - i );
X
X else if ( chk[i] == ACTION_POSITION )
X transition_struct_out( 0, nxt[i] );
X
X else if ( chk[i] > numecs || chk[i] == 0 )
X transition_struct_out( 0, 0 ); /* unused slot */
X
X else /* verify, transition */
X transition_struct_out( chk[i], base[nxt[i]] - (i - chk[i]) );
X }
X
X
X /* here's the final, end-of-buffer state */
X transition_struct_out( chk[tblend + 1], nxt[tblend + 1] );
X transition_struct_out( chk[tblend + 2], nxt[tblend + 2] );
X
X printf( " };\n" );
X printf( "\n" );
X
X /* table of pointers to start states */
X printf( "static struct yy_trans_info *yy_state_ptr[%d] =\n",
X lastsc * 2 + 1 );
X printf( " {\n" );
X
X for ( i = 0; i <= lastsc * 2; ++i )
X printf( " &yy_transition[%d],\n", base[i] );
X
X printf( " };\n" );
X
X if ( useecs )
X genecs();
X }
X
X
X/* gentabs - generate data statements for the transition tables
X *
X * synopsis
X * gentabs();
X */
X
gentabs()
X
X {
X int i, j, k, *accset, nacc, *acc_array;
X char clower();
X
X /* *everything* is done in terms of arrays starting at 1, so provide
X * a null entry for the zero element of all FTL arrays
X */
X static char ftl_long_decl[] = "static long int %c[%d] =\n { 0,\n";
X static char ftl_short_decl[] = "static short int %c[%d] =\n { 0,\n";
X static char ftl_char_decl[] = "static char %c[%d] =\n { 0,\n";
X
X acc_array = allocate_integer_array( current_max_dfas );
X nummt = 0;
X
X if ( fulltbl )
X jambase = lastdfa + 1; /* home of "jam" pseudo-state */
X
X printf( "#define YY_JAM %d\n", jamstate );
X printf( "#define YY_JAM_BASE %d\n", jambase );
X
X if ( usemecs )
X printf( "#define YY_TEMPLATE %d\n", lastdfa + 2 );
X
X if ( reject )
X {
X /* write out accepting list and pointer list
X * first we generate the ACCEPT array. In the process, we compute
X * the indices that will go into the ALIST array, and save the
X * indices in the dfaacc array
X */
X
X printf( accnum > 127 ? ftl_short_decl : ftl_char_decl,
X ACCEPT, max( numas, 1 ) + 1 );
X
X j = 1; /* index into ACCEPT array */
X
X for ( i = 1; i <= lastdfa; ++i )
X {
X acc_array[i] = j;
X
X if ( accsiz[i] != 0 )
X {
X accset = dfaacc[i].dfaacc_set;
X nacc = accsiz[i];
X
X if ( trace )
X fprintf( stderr, "state # %d accepts: ", i );
X
X for ( k = 1; k <= nacc; ++k )
X {
X ++j;
X mkdata( accset[k] );
X
X if ( trace )
X {
X fprintf( stderr, "[%d]", accset[k] );
X
X if ( k < nacc )
X fputs( ", ", stderr );
X else
X putc( '\n', stderr );
X }
X }
X }
X }
X
X /* add accepting number for the "jam" state */
X acc_array[i] = j;
X
X dataend();
X }
X
X else
X {
X for ( i = 1; i <= lastdfa; ++i )
X acc_array[i] = dfaacc[i].dfaacc_state;
X
X acc_array[i] = 0; /* add (null) accepting number for jam state */
X }
X
X /* spit out ALIST array. If we're doing "reject", it'll be pointers
X * into the ACCEPT array. Otherwise it's actual accepting numbers.
X * In either case, we just dump the numbers.
X */
X
X /* "lastdfa + 2" is the size of ALIST; includes room for FTL arrays
X * beginning at 0 and for "jam" state
X */
X k = lastdfa + 2;
X
X if ( reject )
X /* we put a "cap" on the table associating lists of accepting
X * numbers with state numbers. This is needed because we tell
X * where the end of an accepting list is by looking at where
X * the list for the next state starts.
X */
X ++k;
X
X printf( ((reject && numas > 126) || accnum > 127) ?
X ftl_short_decl : ftl_char_decl, ALIST, k );
X
X /* set up default actions */
X for ( i = 1; i <= lastsc * 2; ++i )
X acc_array[i] = DEFAULT_ACTION;
X
X acc_array[end_of_buffer_state] = END_OF_BUFFER_ACTION;
X
X for ( i = 1; i <= lastdfa; ++i )
X {
X mkdata( acc_array[i] );
X
X if ( ! reject && trace && acc_array[i] )
X fprintf( stderr, "state # %d accepts: [%d]\n", i, acc_array[i] );
X }
X
X /* add entry for "jam" state */
X mkdata( acc_array[i] );
X
X if ( reject )
X /* add "cap" for the list */
X mkdata( acc_array[i] );
X
X dataend();
X
X if ( useecs )
X genecs();
X
X if ( usemecs )
X {
X /* write out meta-equivalence classes (used to index templates with) */
X
X if ( trace )
X fputs( "\n\nMeta-Equivalence Classes:\n", stderr );
X
X printf( ftl_char_decl, MATCHARRAY, numecs + 1 );
X
X for ( i = 1; i <= numecs; ++i )
X {
X if ( trace )
X fprintf( stderr, "%d = %d\n", i, abs( tecbck[i] ) );
X
X mkdata( abs( tecbck[i] ) );
X }
X
X dataend();
X }
X
X if ( ! fulltbl )
X {
X int total_states = lastdfa + numtemps;
X
X printf( tblend > MAX_SHORT ? ftl_long_decl : ftl_short_decl,
X BASEARRAY, total_states + 1 );
X
X for ( i = 1; i <= lastdfa; ++i )
X {
X register int d = def[i];
X
X if ( base[i] == JAMSTATE )
X base[i] = jambase;
X
X if ( d == JAMSTATE )
X def[i] = jamstate;
X
X else if ( d < 0 )
X {
X /* template reference */
X ++tmpuses;
X def[i] = lastdfa - d + 1;
X }
X
X mkdata( base[i] );
X }
X
X /* generate jam state's base index */
X mkdata( base[i] );
X
X for ( ++i /* skip jam state */; i <= total_states; ++i )
X {
X mkdata( base[i] );
X def[i] = jamstate;
X }
X
X dataend();
X
X printf( tblend > MAX_SHORT ? ftl_long_decl : ftl_short_decl,
X DEFARRAY, total_states + 1 );
X
X for ( i = 1; i <= total_states; ++i )
X mkdata( def[i] );
X
X dataend();
X
X printf( lastdfa > MAX_SHORT ? ftl_long_decl : ftl_short_decl,
X NEXTARRAY, tblend + 1 );
X
X for ( i = 1; i <= tblend; ++i )
X {
X if ( nxt[i] == 0 || chk[i] == 0 )
X nxt[i] = jamstate; /* new state is the JAM state */
X
X mkdata( nxt[i] );
X }
X
X dataend();
X
X printf( lastdfa > MAX_SHORT ? ftl_long_decl : ftl_short_decl,
X CHECKARRAY, tblend + 1 );
X
X for ( i = 1; i <= tblend; ++i )
X {
X if ( chk[i] == 0 )
X ++nummt;
X
X mkdata( chk[i] );
X }
X
X dataend();
X }
X }
X
X
X/* generate equivalence-class tables */
X
genecs()
X
X {
X register int i, j;
X static char ftl_char_decl[] = "static char %c[%d] =\n { 0,\n";
X int numrows;
X
X printf( ftl_char_decl, ECARRAY, CSIZE + 1 );
X
X for ( i = 1; i <= CSIZE; ++i )
X {
X if ( caseins && (i >= 'A') && (i <= 'Z') )
X ecgroup[i] = ecgroup[clower( i )];
X
X ecgroup[i] = abs( ecgroup[i] );
X mkdata( ecgroup[i] );
X }
X
X dataend();
X
X if ( trace )
X {
X fputs( "\n\nEquivalence Classes:\n\n", stderr );
X
X numrows = (CSIZE + 1) / 8;
X
X for ( j = 1; j <= numrows; ++j )
X {
X for ( i = j; i <= CSIZE; i = i + numrows )
X {
X if ( i >= 1 && i <= 31 )
X fprintf( stderr, "^%c = %-2d",
X 'A' + i - 1, ecgroup[i] );
X
X else if ( i >= 32 && i <= 126 )
X fprintf( stderr, " %c = %-2d", i, ecgroup[i] );
X
X else if ( i == 127 )
X fprintf( stderr, "^@ = %-2d", ecgroup[i] );
X
X else
X fprintf( stderr, "\nSomething Weird: %d = %d\n", i,
X ecgroup[i] );
X
X putc( '\t', stderr );
X }
X
X putc( '\n', stderr );
X }
X }
X }
X
X
X/* inittbl - initialize transition tables
X *
X * synopsis
X * inittbl();
X *
X * Initializes "firstfree" to be one beyond the end of the table. Initializes
X * all "chk" entries to be zero. Note that templates are built in their
X * own tbase/tdef tables. They are shifted down to be contiguous
X * with the non-template entries during table generation.
X */
inittbl()
X
X {
X register int i;
X
X bzero( (char *) chk, current_max_xpairs * sizeof( int ) / sizeof( char ) );
X
X tblend = 0;
X firstfree = tblend + 1;
X numtemps = 0;
X
X if ( usemecs )
X {
X /* set up doubly-linked meta-equivalence classes
X * these are sets of equivalence classes which all have identical
X * transitions out of TEMPLATES
X */
X
X tecbck[1] = NIL;
X
X for ( i = 2; i <= numecs; ++i )
X {
X tecbck[i] = i - 1;
X tecfwd[i - 1] = i;
X }
X
X tecfwd[numecs] = NIL;
X }
X }
X
X
X/* make_tables - generate transition tables
X *
X * synopsis
X * make_tables();
X *
X * Generates transition tables and finishes generating output file
X */
X
make_tables()
X
X {
X if ( fullspd )
X { /* need to define YY_TRANS_OFFSET_TYPE as a size large
X * enough to hold the biggest offset
X */
X int total_table_size = tblend + numecs + 1;
X
X printf( "#define YY_TRANS_OFFSET_TYPE %s\n",
X total_table_size > MAX_SHORT ? "long" : "short" );
X }
X
X if ( fullspd || fulltbl )
X skelout();
X
X /* compute the tables and copy them to output file */
X if ( fullspd )
X genctbl();
X
X else
X gentabs();
X
X skelout();
X
X (void) fclose( temp_action_file );
X temp_action_file = fopen( action_file_name, "r" );
X
X /* copy prolog from action_file to output file */
X action_out();
X
X skelout();
X
X /* copy actions from action_file to output file */
X action_out();
X
X skelout();
X
X /* copy remainder of input to output */
X
X line_directive_out( stdout );
X (void) flexscan(); /* copy remainder of input to output */
X }
X
X
X/* mkdeftbl - make the default, "jam" table entries
X *
X * synopsis
X * mkdeftbl();
X */
X
mkdeftbl()
X
X {
X int i;
X
X jamstate = lastdfa + 1;
X
X if ( tblend + numecs > current_max_xpairs )
X expand_nxt_chk();
X
X for ( i = 1; i <= numecs; ++i )
X {
X nxt[tblend + i] = 0;
X chk[tblend + i] = jamstate;
X }
X
X jambase = tblend;
X
X base[jamstate] = jambase;
X
X /* should generate a run-time array bounds check if
X * ever used as a default
X */
X def[jamstate] = BAD_SUBSCRIPT;
X
X tblend += numecs;
X ++numtemps;
X }
X
X
X/* mkentry - create base/def and nxt/chk entries for transition array
X *
X * synopsis
X * int state[numchars + 1], numchars, statenum, deflink, totaltrans;
X * mkentry( state, numchars, statenum, deflink, totaltrans );
X *
X * "state" is a transition array "numchars" characters in size, "statenum"
X * is the offset to be used into the base/def tables, and "deflink" is the
X * entry to put in the "def" table entry. If "deflink" is equal to
X * "JAMSTATE", then no attempt will be made to fit zero entries of "state"
X * (i.e., jam entries) into the table. It is assumed that by linking to
X * "JAMSTATE" they will be taken care of. In any case, entries in "state"
X * marking transitions to "SAME_TRANS" are treated as though they will be
X * taken care of by whereever "deflink" points. "totaltrans" is the total
X * number of transitions out of the state. If it is below a certain threshold,
X * the tables are searched for an interior spot that will accommodate the
X * state array.
X */
X
mkentry( state, numchars, statenum, deflink, totaltrans )
register int *state;
int numchars, statenum, deflink, totaltrans;
X
X {
X register int minec, maxec, i, baseaddr;
X int tblbase, tbllast;
X
X if ( totaltrans == 0 )
X { /* there are no out-transitions */
X if ( deflink == JAMSTATE )
X base[statenum] = JAMSTATE;
X else
X base[statenum] = 0;
X
X def[statenum] = deflink;
X return;
X }
X
X for ( minec = 1; minec <= numchars; ++minec )
X {
X if ( state[minec] != SAME_TRANS )
X if ( state[minec] != 0 || deflink != JAMSTATE )
X break;
X }
X
X if ( totaltrans == 1 )
X {
X /* there's only one out-transition. Save it for later to fill
X * in holes in the tables.
X */
X stack1( statenum, minec, state[minec], deflink );
X return;
X }
X
X for ( maxec = numchars; maxec > 0; --maxec )
X {
X if ( state[maxec] != SAME_TRANS )
X if ( state[maxec] != 0 || deflink != JAMSTATE )
X break;
X }
X
X /* Whether we try to fit the state table in the middle of the table
X * entries we have already generated, or if we just take the state
X * table at the end of the nxt/chk tables, we must make sure that we
X * have a valid base address (i.e., non-negative). Note that not only are
X * negative base addresses dangerous at run-time (because indexing the
X * next array with one and a low-valued character might generate an
X * array-out-of-bounds error message), but at compile-time negative
X * base addresses denote TEMPLATES.
X */
X
X /* find the first transition of state that we need to worry about. */
X if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE )
X { /* attempt to squeeze it into the middle of the tabls */
X baseaddr = firstfree;
X
X while ( baseaddr < minec )
X {
X /* using baseaddr would result in a negative base address below
X * find the next free slot
X */
X for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr )
X ;
X }
X
X if ( baseaddr + maxec - minec >= current_max_xpairs )
X expand_nxt_chk();
X
X for ( i = minec; i <= maxec; ++i )
X if ( state[i] != SAME_TRANS )
X if ( state[i] != 0 || deflink != JAMSTATE )
X if ( chk[baseaddr + i - minec] != 0 )
X { /* baseaddr unsuitable - find another */
X for ( ++baseaddr;
X baseaddr < current_max_xpairs &&
X chk[baseaddr] != 0;
X ++baseaddr )
X ;
X
X if ( baseaddr + maxec - minec >= current_max_xpairs )
X expand_nxt_chk();
X
X /* reset the loop counter so we'll start all
X * over again next time it's incremented
X */
X
X i = minec - 1;
X }
X }
X
X else
X {
X /* ensure that the base address we eventually generate is
X * non-negative
X */
X baseaddr = max( tblend + 1, minec );
X }
X
X tblbase = baseaddr - minec;
X tbllast = tblbase + maxec;
X
X if ( tbllast >= current_max_xpairs )
X expand_nxt_chk();
X
X base[statenum] = tblbase;
X def[statenum] = deflink;
X
X for ( i = minec; i <= maxec; ++i )
X if ( state[i] != SAME_TRANS )
X if ( state[i] != 0 || deflink != JAMSTATE )
X {
X nxt[tblbase + i] = state[i];
X chk[tblbase + i] = statenum;
X }
X
X if ( baseaddr == firstfree )
X /* find next free slot in tables */
X for ( ++firstfree; chk[firstfree] != 0; ++firstfree )
X ;
X
X tblend = max( tblend, tbllast );
X }
X
X
X/* mk1tbl - create table entries for a state (or state fragment) which
X * has only one out-transition
X *
X * synopsis
X * int state, sym, onenxt, onedef;
X * mk1tbl( state, sym, onenxt, onedef );
X */
X
mk1tbl( state, sym, onenxt, onedef )
int state, sym, onenxt, onedef;
X
X {
X if ( firstfree < sym )
X firstfree = sym;
X
X while ( chk[firstfree] != 0 )
X if ( ++firstfree >= current_max_xpairs )
X expand_nxt_chk();
X
X base[state] = firstfree - sym;
X def[state] = onedef;
X chk[firstfree] = state;
X nxt[firstfree] = onenxt;
X
X if ( firstfree > tblend )
X {
X tblend = firstfree++;
X
X if ( firstfree >= current_max_xpairs )
X expand_nxt_chk();
X }
X }
X
X
X/* mkprot - create new proto entry
X *
X * synopsis
X * int state[], statenum, comstate;
X * mkprot( state, statenum, comstate );
X */
X
mkprot( state, statenum, comstate )
int state[], statenum, comstate;
X
X {
X int i, slot, tblbase;
X
X if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE )
X {
X /* gotta make room for the new proto by dropping last entry in
X * the queue
X */
X slot = lastprot;
X lastprot = protprev[lastprot];
X protnext[lastprot] = NIL;
X }
X
X else
X slot = numprots;
X
X protnext[slot] = firstprot;
X
X if ( firstprot != NIL )
X protprev[firstprot] = slot;
X
X firstprot = slot;
X prottbl[slot] = statenum;
X protcomst[slot] = comstate;
X
X /* copy state into save area so it can be compared with rapidly */
X tblbase = numecs * (slot - 1);
X
X for ( i = 1; i <= numecs; ++i )
X protsave[tblbase + i] = state[i];
X }
X
X
X/* mktemplate - create a template entry based on a state, and connect the state
X * to it
X *
X * synopsis
X * int state[], statenum, comstate, totaltrans;
X * mktemplate( state, statenum, comstate, totaltrans );
X */
X
mktemplate( state, statenum, comstate )
int state[], statenum, comstate;
X
X {
X int i, numdiff, tmpbase, tmp[CSIZE + 1];
X char transset[CSIZE + 1];
X int tsptr;
X
X ++numtemps;
X
X tsptr = 0;
X
X /* calculate where we will temporarily store the transition table
X * of the template in the tnxt[] array. The final transition table
X * gets created by cmptmps()
X */
X
X tmpbase = numtemps * numecs;
X
X if ( tmpbase + numecs >= current_max_template_xpairs )
X {
X current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT;
X
X ++num_reallocs;
X
X tnxt = reallocate_integer_array( tnxt, current_max_template_xpairs );
X }
X
X for ( i = 1; i <= numecs; ++i )
X if ( state[i] == 0 )
X tnxt[tmpbase + i] = 0;
X else
X {
X transset[tsptr++] = i;
X tnxt[tmpbase + i] = comstate;
X }
X
X if ( usemecs )
X mkeccl( transset, tsptr, tecfwd, tecbck, numecs );
X
X mkprot( tnxt + tmpbase, -numtemps, comstate );
X
X /* we rely on the fact that mkprot adds things to the beginning
X * of the proto queue
X */
X
X numdiff = tbldiff( state, firstprot, tmp );
X mkentry( tmp, numecs, statenum, -numtemps, numdiff );
X }
X
X
X/* mv2front - move proto queue element to front of queue
X *
X * synopsis
X * int qelm;
X * mv2front( qelm );
X */
X
mv2front( qelm )
int qelm;
X
X {
X if ( firstprot != qelm )
X {
X if ( qelm == lastprot )
X lastprot = protprev[lastprot];
X
X protnext[protprev[qelm]] = protnext[qelm];
X
X if ( protnext[qelm] != NIL )
X protprev[protnext[qelm]] = protprev[qelm];
X
X protprev[qelm] = NIL;
X protnext[qelm] = firstprot;
X protprev[firstprot] = qelm;
X firstprot = qelm;
X }
X }
X
X
X/* ntod - convert an ndfa to a dfa
X *
X * synopsis
X * ntod();
X *
X * creates the dfa corresponding to the ndfa we've constructed. the
X * dfa starts out in state #1.
X */
ntod()
X
X {
X int *accset, ds, nacc, newds;
X int duplist[CSIZE + 1], sym, hashval, numstates, dsize;
X int targfreq[CSIZE + 1], targstate[CSIZE + 1], state[CSIZE + 1];
X int *nset, *dset;
X int targptr, totaltrans, i, comstate, comfreq, targ;
X int *epsclosure(), snstods(), symlist[CSIZE + 1];
X
X /* this is so find_table_space(...) will know where to start looking in
X * chk/nxt for unused records for space to put in the state
X */
X if ( fullspd )
X firstfree = 0;
X
X accset = allocate_integer_array( accnum + 1 );
X nset = allocate_integer_array( current_max_dfa_size );
X
X todo_head = todo_next = 0;
X
X#define ADD_QUEUE_ELEMENT(element) \
X if ( ++element >= current_max_dfas ) \
X { /* check for queue overflowing */ \
X if ( todo_head == 0 ) \
X increase_max_dfas(); \
X else \
X element = 0; \
X }
X
X#define NEXT_QUEUE_ELEMENT(element) ((element + 1) % (current_max_dfas + 1))
X
X for ( i = 0; i <= CSIZE; ++i )
X {
X duplist[i] = NIL;
X symlist[i] = false;
X }
X
X for ( i = 0; i <= accnum; ++i )
X accset[i] = NIL;
X
X if ( trace )
X {
X dumpnfa( scset[1] );
X fputs( "\n\nDFA Dump:\n\n", stderr );
X }
X
X inittbl();
X
X if ( fullspd )
X {
X for ( i = 0; i <= numecs; ++i )
X state[i] = 0;
X place_state( state, 0, 0 );
X }
X
X if ( fulltbl )
X {
X /* declare it "short" because it's a real long-shot that that
X * won't be large enough
X */
X printf( "static short int %c[][%d] =\n {\n", NEXTARRAY,
X numecs + 1 ); /* '}' so vi doesn't get too confused */
X
X /* generate 0 entries for state #0 */
X for ( i = 0; i <= numecs; ++i )
X mk2data( 0 );
X
X /* force ',' and dataflush() next call to mk2data */
X datapos = NUMDATAITEMS;
X
X /* force extra blank line next dataflush() */
X dataline = NUMDATALINES;
X }
X
X /* create the first states */
X
X for ( i = 1; i <= lastsc * 2; ++i )
X {
X numstates = 1;
X
X /* for each start condition, make one state for the case when
X * we're at the beginning of the line (the '%' operator) and
X * one for the case when we're not
X */
X if ( i % 2 == 1 )
X nset[numstates] = scset[(i / 2) + 1];
X else
X nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] );
X
X nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
X
X if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
X {
X numas = numas + nacc;
X totnst = totnst + numstates;
X
X todo[todo_next] = ds;
X ADD_QUEUE_ELEMENT(todo_next);
X }
X }
X
X if ( fulltbl )
X {
X if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
X flexfatal( "could not create unique end-of-buffer state" );
X
X numas += 1;
X
X todo[todo_next] = end_of_buffer_state;
X ADD_QUEUE_ELEMENT(todo_next);
X }
X
X while ( todo_head != todo_next )
X {
X targptr = 0;
X totaltrans = 0;
X
X for ( i = 1; i <= numecs; ++i )
X state[i] = 0;
X
X ds = todo[todo_head];
X todo_head = NEXT_QUEUE_ELEMENT(todo_head);
X
X dset = dss[ds];
X dsize = dfasiz[ds];
X
X if ( trace )
X fprintf( stderr, "state # %d:\n", ds );
X
X sympartition( dset, dsize, symlist, duplist );
X
X for ( sym = 1; sym <= numecs; ++sym )
X {
X if ( symlist[sym] )
X {
X symlist[sym] = 0;
X
X if ( duplist[sym] == NIL )
X { /* symbol has unique out-transitions */
X numstates = symfollowset( dset, dsize, sym, nset );
X nset = epsclosure( nset, &numstates, accset,
X &nacc, &hashval );
X
X if ( snstods( nset, numstates, accset,
X nacc, hashval, &newds ) )
X {
X totnst = totnst + numstates;
X todo[todo_next] = newds;
X ADD_QUEUE_ELEMENT(todo_next);
X numas = numas + nacc;
X }
X
X state[sym] = newds;
X
X if ( trace )
X fprintf( stderr, "\t%d\t%d\n", sym, newds );
X
X targfreq[++targptr] = 1;
X targstate[targptr] = newds;
X ++numuniq;
X }
X
X else
X {
X /* sym's equivalence class has the same transitions
X * as duplist(sym)'s equivalence class
X */
X targ = state[duplist[sym]];
X state[sym] = targ;
X
X if ( trace )
X fprintf( stderr, "\t%d\t%d\n", sym, targ );
X
X /* update frequency count for destination state */
X
X i = 0;
X while ( targstate[++i] != targ )
X ;
X
X ++targfreq[i];
X ++numdup;
X }
X
X ++totaltrans;
X duplist[sym] = NIL;
X }
X }
X
X numsnpairs = numsnpairs + totaltrans;
X
X if ( caseins && ! useecs )
X {
X register int j;
X
X for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
X state[i] = state[j];
X }
X
X if ( fulltbl )
X {
X /* supply array's 0-element */
X if ( ds == end_of_buffer_state )
X mk2data( 0 );
X else
X mk2data( end_of_buffer_state );
X
X for ( i = 1; i <= numecs; ++i )
X mk2data( state[i] );
X
X /* force ',' and dataflush() next call to mk2data */
X datapos = NUMDATAITEMS;
X
X /* force extra blank line next dataflush() */
X dataline = NUMDATALINES;
X }
X
X else if ( fullspd )
X place_state( state, ds, totaltrans );
X
X else
X {
X /* determine which destination state is the most common, and
X * how many transitions to it there are
X */
X
X comfreq = 0;
X comstate = 0;
X
X for ( i = 1; i <= targptr; ++i )
X if ( targfreq[i] > comfreq )
X {
X comfreq = targfreq[i];
X comstate = targstate[i];
X }
X
X bldtbl( state, ds, totaltrans, comstate, comfreq );
X }
X }
X
X if ( fulltbl )
X dataend();
X
X else
X {
X cmptmps(); /* create compressed template entries */
X
X /* create tables for all the states with only one out-transition */
X while ( onesp > 0 )
X {
X mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
X onedef[onesp] );
X --onesp;
X }
X
X mkdeftbl();
X }
X
X }
X
X
X/* place_state - place a state into full speed transition table
X *
X * synopsis
X * int *state, statenum, transnum;
X * place_state( state, statenum, transnum );
X *
X * State is the statenum'th state. It is indexed by equivalence class and
X * gives the number of the state to enter for a given equivalence class.
X * Transnum is the number of out-transitions for the state.
X */
X
place_state( state, statenum, transnum )
int *state, statenum, transnum;
X
X {
X register int i;
X register int *state_ptr;
X int position = find_table_space( state, transnum );
X
X /* base is the table of start positions */
X base[statenum] = position;
X
X /* put in action number marker; this non-zero number makes sure that
X * find_table_space() knows that this position in chk/nxt is taken
X * and should not be used for another accepting number in another state
X */
X chk[position - 1] = 1;
X
X /* put in end-of-buffer marker; this is for the same purposes as above */
X chk[position] = 1;
X
X /* place the state into chk and nxt */
X state_ptr = &state[1];
X
X for ( i = 1; i <= numecs; ++i, ++state_ptr )
X if ( *state_ptr != 0 )
X {
X chk[position + i] = i;
X nxt[position + i] = *state_ptr;
X }
X
X if ( position + numecs > tblend )
X tblend = position + numecs;
X }
X
X
X/* stack1 - save states with only one out-transition to be processed later
X *
X * synopsis
X * int statenum, sym, nextstate, deflink;
X * stack1( statenum, sym, nextstate, deflink );
X *
X * if there's room for another state one the "one-transition" stack, the
X * state is pushed onto it, to be processed later by mk1tbl. If there's
X * no room, we process the sucker right now.
X */
X
stack1( statenum, sym, nextstate, deflink )
int statenum, sym, nextstate, deflink;
X
X {
X if ( onesp >= ONE_STACK_SIZE )
X mk1tbl( statenum, sym, nextstate, deflink );
X
X else
X {
X ++onesp;
X onestate[onesp] = statenum;
X onesym[onesp] = sym;
X onenext[onesp] = nextstate;
X onedef[onesp] = deflink;
X }
X }
X
X
X/* tbldiff - compute differences between two state tables
X *
X * synopsis
X * int state[], pr, ext[];
X * int tbldiff, numdifferences;
X * numdifferences = tbldiff( state, pr, ext )
X *
X * "state" is the state array which is to be extracted from the pr'th
X * proto. "pr" is both the number of the proto we are extracting from
X * and an index into the save area where we can find the proto's complete
X * state table. Each entry in "state" which differs from the corresponding
X * entry of "pr" will appear in "ext".
X * Entries which are the same in both "state" and "pr" will be marked
X * as transitions to "SAME_TRANS" in "ext". The total number of differences
X * between "state" and "pr" is returned as function value. Note that this
X * number is "numecs" minus the number of "SAME_TRANS" entries in "ext".
X */
X
int tbldiff( state, pr, ext )
int state[], pr, ext[];
X
X {
X register int i, *sp = state, *ep = ext, *protp;
X register int numdiff = 0;
X
X protp = &protsave[numecs * (pr - 1)];
X
X for ( i = numecs; i > 0; --i )
X {
X if ( *++protp == *++sp )
X *++ep = SAME_TRANS;
X else
X {
X *++ep = *sp;
X ++numdiff;
X }
X }
X
X return ( numdiff );
X }
END_OF_FILE
if test 39351 -ne `wc -c <'tblcmp.c'`; then
echo shar: \"'tblcmp.c'\" unpacked with wrong size!
fi
# end of 'tblcmp.c'
fi
echo shar: End of archive 5 \(of 5\).
cp /dev/null ark5isdone
MISSING=""
for I in 1 2 3 4 5 ; do
if test ! -f ark${I}isdone ; then
MISSING="${MISSING} ${I}"
fi
done
if test "${MISSING}" = "" ; then
echo You have unpacked all 5 archives.
rm -f ark[1-9]isdone
else
echo You still need to unpack the following archives:
echo " " ${MISSING}
fi
## End of shell archive.
exit 0
--
Please send comp.sources.unix-related mail to rsalz at uunet.uu.net.
More information about the Comp.sources.unix
mailing list