mawk0.97.shar 5 of 6

Mike Brennan brennan at ssc-vax.UUCP
Sun May 12 00:57:57 AEST 1991

------------------cut here----------------
      case  SC_SPACE  :   goto reswitch ;

      case  SC_COMMENT :
          eat_comment() ; goto reswitch ;

      case  SC_NL  : 
          lineno++ ; eat_nl() ;
          ct_ret(NL) ;

      case SC_ESCAPE :
          while ( scan_code[ c = next() ] == SC_SPACE ) ;
          if ( c == '\n')
          { token_lineno = ++lineno ; goto reswitch ; }
          if ( c == 0 )  ct_ret(EOF) ;
          un_next() ;
          yylval.ival = '\\' ;
          ct_ret(UNEXPECTED) ;

      case  SC_SEMI_COLON  : 
          eat_nl() ;
          ct_ret(SEMI_COLON) ;

      case  SC_LBRACE :  
          eat_nl() ; brace_cnt++ ;
          ct_ret(LBRACE) ;

      case  SC_PLUS  :
          test2_ret('+', INC, '=', ADD_ASG, PLUS ) ;

      case  SC_MINUS :
          test2_ret('-', DEC, '=', SUB_ASG, MINUS ) ;

      case  SC_COMMA :  eat_nl() ; ct_ret(COMMA) ;

      case  SC_MUL  :  test1_ret('=', MUL_ASG, MUL) ;
      case  SC_DIV :   
          { int *p = can_precede_re ;

                if ( *p == current_token )
                    ct_ret( collect_RE() ) ;
            while ( *p++ != -1 ) ;

            test1_ret( '=', DIV_ASG , DIV ) ;

      case  SC_MOD  :  test1_ret('=', MOD_ASG, MOD) ;
      case  SC_POW :   test1_ret('=' , POW_ASG, POW) ;
      case  SC_LPAREN : 
          paren_cnt++ ;
          ct_ret(LPAREN) ;

      case  SC_RPAREN : 
          if ( --paren_cnt < 0 )
          { compile_error( "extra ')'" ) ;
            paren_cnt = 0 ;
            goto reswitch ; }

          ct_ret(RPAREN) ;

      case  SC_LBOX   : ct_ret(LBOX) ;
      case  SC_RBOX   : ct_ret(RBOX) ;

      case  SC_MATCH  : ct_ret(MATCH) ;

      case  SC_EQUAL  :
          test1_ret( '=', EQ, ASSIGN ) ;

      case  SC_NOT : /* !  */
          test2_ret('=', NEQ, '~', NOT_MATCH, NOT ) ;

      case  SC_LT  :  /* '<' */
          if ( getline_flag )
          { getline_flag = 0 ; ct_ret(IO_IN) ; }
          { ct_ret( ifnext('=', LTE , LT) ) ; }

      case  SC_GT  :  /* '>' */
          if ( print_flag && paren_cnt == 0 )
          { print_flag = 0 ;
            /* there are 3 types of IO_OUT 
               -- build the error string in temp_buff */
            temp_buff.string_buff[0] = '>' ;
            if ( next() == '>' ) 
              yylval.ival = F_APPEND ;
              temp_buff.string_buff[1] = '>' ;
              temp_buff.string_buff[2] =  0 ;
            { un_next() ; 
              yylval.ival = F_TRUNC ; 
              temp_buff.string_buff[1] = 0 ;
            return current_token = IO_OUT ;

          ct_ret( ifnext('=', GTE , GT) ) ;

      case  SC_OR :
          if ( next() == '|' ) 
          { eat_nl() ; ct_ret(brace_cnt?OR:P_OR) ; }
          { un_next() ; 

            if ( print_flag && paren_cnt == 0 )
            { print_flag = 0 ; 
              yylval.ival = PIPE_OUT;
              temp_buff.string_buff[0] = '|' ;
              temp_buff.string_buff[1] = 0 ;
              ct_ret(IO_OUT) ;
            else  ct_ret(PIPE) ;

      case  SC_AND :
          if ( next() == '&' )  
          { eat_nl() ; ct_ret(brace_cnt?AND:P_AND) ; }
          { un_next() ; yylval.ival = '&' ; ct_ret(UNEXPECTED) ; }

      case  SC_QMARK  :  ct_ret(QMARK) ;
      case  SC_COLON  :  ct_ret(COLON) ;
      case  SC_RBRACE :
          if ( --brace_cnt < 0 )
          { compile_error("extra '}'" ) ;
            brace_cnt = 0 ; goto reswitch ; }

          if ( (c = current_token) == NL || c == SEMI_COLON 
               || c == SC_FAKE_SEMI_COLON  || c == RBRACE  )
          { eat_nl() ; ct_ret(RBRACE) ; }

          brace_cnt++ ; un_next() ;
          current_token = SC_FAKE_SEMI_COLON ;
          return  SEMI_COLON ;

      case  SC_DIGIT  :
      case  SC_DOT    :
          { double d ;
            int flag ;

            if ( (d = collect_decimal(c, &flag)) == 0.0 )
                if ( flag )  ct_ret(flag) ;
                else  yylval.cp = &cell_zero ;
            else if ( d == 1.0 ) yylval.cp = &cell_one ;
            { yylval.cp = new_CELL() ;
              yylval.cp->type = C_DOUBLE ;
              yylval.cp->dval = d ; 
            ct_ret( CONSTANT ) ;

      case  SC_DOLLAR :  /* '$' */
          { double d ;
            int flag ;

            while ( scan_code[c = next()] == SC_SPACE )  ;
            if ( scan_code[c] != SC_DIGIT &&
                 scan_code[c] != SC_DOT )
            { un_next() ; ct_ret(DOLLAR) ; }
            /* compute field address at compile time */
            if ( (d = collect_decimal(c, &flag)) == 0.0 )
                if ( flag )  ct_ret(flag) ; /* an error */
                else  yylval.cp = &field[0] ;
            { int k = (int) d ;

              if ( k > MAX_FIELD )
              { compile_error(
                   "maximum field index(%d) exceeded" , k ) ;
                k = MAX_FIELD ;
              else  yylval.cp = &field[k] ;

            ct_ret(FIELD) ;

      case  SC_DQUOTE :
          return current_token = collect_string() ;

      case  SC_IDCHAR : /* collect an identifier */
            { unsigned char *p =
                    (unsigned char *)temp_buff.string_buff + 1 ;
              SYMTAB *stp ;

              temp_buff.string_buff[0] = c ;

              while ( 
                (c = scan_code[ *p++ = next()]) == SC_IDCHAR ||
                       c == SC_DIGIT )  ;
              un_next() ; * --p = 0 ;

              switch( (stp = find(temp_buff.string_buff))->type )
              { case ST_NONE :  
                  /* check for function call before defined */
                      if ( next() == '(' )
                      { stp->type = ST_FUNCT ;
                        stp->stval.fbp = (FBLOCK *)
                                zmalloc(sizeof(FBLOCK)) ;
                        stp->stval.fbp->name = stp->name ;
                        stp->stval.fbp->code = (INST *) 0 ;
                        yylval.fbp = stp->stval.fbp ;
                        current_token = FUNCT_ID ;
                      { yylval.stp = stp ;
                        current_token = ID ;
                      un_next() ;
                      break ;
                case ST_VAR :
                case  ST_ARRAY :
                case  ST_LOCAL_NONE :
                case  ST_LOCAL_VAR :
                case  ST_LOCAL_ARRAY :

                      yylval.stp = stp ;
                      current_token = ID ;
                      break ;

                case ST_FUNCT :
                      yylval.fbp = stp->stval.fbp ;
                      current_token = FUNCT_ID ;
                      break ;

                case ST_KEYWORD :  
                      current_token = stp-> ;
                      break ;

                case  ST_BUILTIN :
                      yylval.bip = stp->stval.bip ;
                      current_token = BUILTIN ;
                      break ;

                case  ST_FIELD  :
                      yylval.cp = stp->stval.cp ;
                      current_token = FIELD ;
                      break ;

                case  ST_LENGTH  :
                    { CELL *bi_length() ;
                      static BI_REC length_bi_rec =
                      { "length", bi_length, 1, 1 } ;

                      while ( scan_code[ c = next() ] == SC_SPACE ) ;
                      un_next() ;

                      if ( c == '(' )
                      { yylval.bip = &length_bi_rec ;
                        current_token = BUILTIN ;
                      else current_token = LENGTH ;
                    break ;

                default : 
                      bozo("find returned bad st type") ;
              return  current_token  ;

      case  SC_UNEXPECTED :
            yylval.ival = c & 0xff ;
            ct_ret(UNEXPECTED) ;
    return  0 ; /* never get here make lint happy */

/* collect a decimal constant in temp_buff.
   Return the value and error conditions by reference */

static double collect_decimal(c, flag)
  int c ; int *flag ;
{ register unsigned char *p = (unsigned char*) temp_buff.string_buff + 1;
  unsigned char *endp ;
  double d ;

  *flag = 0 ;
  temp_buff.string_buff[0] = c ;

  if ( c == '.' )
  { if ( scan_code[*p++ = next()] != SC_DIGIT )
    { *flag = UNEXPECTED ; yylval.ival = '.' ;
      return 0.0 ; }
  {  while ( scan_code[*p++ = next()] == SC_DIGIT ) ;
     if ( p[-1] != '.' )
     { un_next() ; p-- ; }
  /* get rest of digits after decimal point */
  while ( scan_code[*p++ = next()] == SC_DIGIT )  ;

  /* check for exponent */
  if ( p[-1] != 'e' && p[-1] != 'E' )
  { un_next() ; * --p = 0 ; }
  else  /* get the exponent */
    if ( scan_code[*p = next()] != SC_DIGIT &&
         *p != '-' && *p != '+' )
    { *++p = 0 ; *flag = BAD_DECIMAL ;
      return 0.0 ; }
    else  /* get the rest of the exponent */
    { p++ ;
      while ( scan_code[*p++ = next()] == SC_DIGIT )  ;
      un_next() ; * --p = 0 ;

  errno = 0 ; /* check for overflow/underflow */
  d = strtod( temp_buff.string_buff, &endp ) ;
  if ( errno )
      compile_error( "%s : decimal %sflow" , temp_buff.string_buff,
        d == 0.0 ? "under" : "over") ;
  if ( endp != p )
  { *flag = BAD_DECIMAL ; return 0.0 ; }
  return d ;

/*----------  process escape characters ---------------*/

static char hex_val['f' - 'A' + 1] = {
10,11,12,13,14,15, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0,
10,11,12,13,14,15 } ;

#define isoctal(x)  ((x)>='0'&&(x)<='7')

#define  hex_value(x)   hex_val[(x)-'A']

#define ishex(x) (scan_code[x] == SC_DIGIT ||\
                  'A' <= (x) && (x) <= 'f' && hex_value(x))

static int PROTO(octal, (char **)) ;
static int PROTO(hex, (char **)) ;

/* process one , two or three octal digits
   moving a pointer forward by reference */
static int octal( start_p )
  char **start_p ;
{ register char *p = *start_p ;
  register unsigned x ;

  x = *p++ - '0' ;
  if ( isoctal(*p) )
    x = (x<<3) + *p++ - '0' ;
    if ( isoctal(*p) )   x = (x<<3) + *p++ - '0' ;
  *start_p = p ;
  return  x & 0xff ;

/* process one or two hex digits
   moving a pointer forward by reference */

static int  hex( start_p )
  unsigned char **start_p ;
{ register unsigned char *p = *start_p ;
  register unsigned x ;
  unsigned t ;

  if ( scan_code[*p] == SC_DIGIT )
        x = *p++ - '0' ;
  else  x = hex_value(*p++) ;

  if ( scan_code[*p] == SC_DIGIT )
        x = (x<<4) + *p++ - '0' ;
  if ( 'A' <= *p && *p <= 'f' && (t = hex_value(*p)) )
  { x = (x<<4) + t ; p++ ; }

  *start_p = p ;
  return x ;

static char escape_test[] = 
  "n\nt\tb\br\rf\fa\07v\013\\\\\"\"\'\'" ;

/* process the escape characters in a string, in place . */

static char *rm_escape(s)
  char *s ;
{ register char *p, *q ;
  char *t ;

  q = p = s ;

  while ( *p )
      if ( *p == '\\' )
        if ( t = strchr(escape_test, * ++p) )
          p++ ; *q++ = t[1] ; 
        if ( isoctal(*p) ) 
          t = p ;  *q++ = octal(&t) ; p = t ;
        if ( *p == 'x' && ishex(*(unsigned char*)(p+1)) )
          t = p+1 ; *q++ = hex(&t) ; p = t ;
        else  /* not an escape sequence */
          *q++ = '\\' ; *q++ = *p++ ;
      else  *q++ = *p++ ;

  *q = 0 ;
  return s ;

static  int  collect_string()
{ register unsigned char *p = (unsigned char *)temp_buff.string_buff ;
  int c ;
  int e_flag = 0 ; /* on if have an escape char */

  while ( 1 )
      switch( scan_code[ *p++ = next() ] )
      { case  SC_DQUOTE : /* done */
              * --p = 0 ;  goto out ;

        case  SC_NL :
              p[-1] = 0 ;
              /* fall thru */

        case  0 :   /* unterminated string */
              "runaway string constant \"%.10s ..." ,
              temp_buff.string_buff, token_lineno ) ;
              mawk_exit(1) ;

        case SC_ESCAPE :
              if ( (c = next()) == '\n' )
              { p-- ; lineno++ ; }
                if ( c == 0 )  un_next() ;   
                { *p++ = c ; e_flag = 1 ; }

              break ;

        default : break ;

    yylval.cp = new_CELL() ;
    yylval.cp->type = C_STRING ;
    yylval.cp->ptr = (PTR) new_STRING(
         e_flag ? rm_escape( temp_buff.string_buff ) 
                : temp_buff.string_buff ) ;
    return  CONSTANT ;

static  int  collect_RE()
{ register unsigned char *p = (unsigned char*) temp_buff.string_buff ;
  int c ;
  STRING *sval ;

  while ( 1 )
      switch( scan_code[ *p++ = next() ] )
      { case  SC_DIV : /* done */
              * --p = 0 ;  goto out ;

        case  SC_NL :
              p[-1] = 0 ;
              /* fall thru */

        case  0 :   /* unterminated re */
              "runaway regular expression /%.10s ..." ,
              temp_buff.string_buff, token_lineno ) ;
              mawk_exit(1) ;

        case SC_ESCAPE :
              switch( c = next() )
              { case '/' :  
                      p[-1] = '/' ; break ;

                case '\n' :
                      p-- ;  break ;

                case  0   :
                      un_next() ;  break ;

                default :
                      *p++ = c ; break ;
              break ;

  /* now we've got the RE, so compile it */
  sval = new_STRING( temp_buff.string_buff ) ;
  yylval.cp = new_CELL() ;
  yylval.cp->type = C_RE ;
  yylval.cp->ptr = re_compile(sval) ;
  free_STRING(sval) ;
  return RE ;

@//E*O*F mawk0.97/scan.c//
chmod u=rw,g=r,o=r mawk0.97/scan.c
echo x - mawk0.97/scan.h
sed 's/^@//' > "mawk0.97/scan.h" <<'@//E*O*F mawk0.97/scan.h//'

copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.

/* $Log:	scan.h,v $
 * Revision 2.2  91/04/09  12:39:31  brennan
 * added static to funct decls to satisfy STARDENT compiler
 * Revision 2.1  91/04/08  08:23:54  brennan
 * VERSION 0.97

/* scan.h  */

#define  SCAN_H_INCLUDED   1

#include <stdio.h>

#ifndef   MAKESCAN
#include  "symtype.h"
#include  "parse.h"

extern  char scan_code[256] ;

/*  the scan codes to compactify the main switch */

#define  SC_SPACE               1
#define  SC_NL                  2
#define  SC_SEMI_COLON          3
#define  SC_FAKE_SEMI_COLON     4
#define  SC_LBRACE              5
#define  SC_RBRACE              6
#define  SC_QMARK               7
#define  SC_COLON               8
#define  SC_OR                  9
#define  SC_AND                10
#define  SC_PLUS               11
#define  SC_MINUS              12
#define  SC_MUL                13
#define  SC_DIV                14
#define  SC_MOD                15
#define  SC_POW                16
#define  SC_LPAREN             17
#define  SC_RPAREN             18
#define  SC_LBOX               19
#define  SC_RBOX               20
#define  SC_IDCHAR             21
#define  SC_DIGIT              22
#define  SC_DQUOTE             23
#define  SC_ESCAPE             24
#define  SC_COMMENT            25
#define  SC_EQUAL              26
#define  SC_NOT                27
#define  SC_LT                 28
#define  SC_GT                 29
#define  SC_COMMA              30
#define  SC_DOT                31
#define  SC_MATCH              32
#define  SC_DOLLAR             33
#define  SC_UNEXPECTED         34

#ifndef  MAKESCAN

/* global functions in scan.c */

void  PROTO(scan_init, (int, char *) ) ;
void  PROTO(scan_cleanup, (void) ) ;
void  PROTO(eat_nl, (void) ) ;
int   PROTO(yylex, (void) ) ;

extern  YYSTYPE  yylval ;

#define  ct_ret(x)  return current_token = (x)

#define  next() (*buffp ? *buffp++ : slow_next())
#define  un_next()  buffp--

#define  ifnext(c,x,y) (next()==c?x:(un_next(),y))

#define  test1_ret(c,x,d)  if ( next() == (c) ) ct_ret(x) ;\
                           else { un_next() ; ct_ret(d) ; }

#define  test2_ret(c1,x1,c2,x2,d)   switch( next() )\
                                   { case c1: ct_ret(x1) ;\
                                     case c2: ct_ret(x2) ;\
                                     default: un_next() ;\
                                              ct_ret(d) ; }
#endif  /* ! MAKESCAN  */
@//E*O*F mawk0.97/scan.h//
chmod u=rw,g=r,o=r mawk0.97/scan.h
echo x - mawk0.97/scancode.c
sed 's/^@//' > "mawk0.97/scancode.c" <<'@//E*O*F mawk0.97/scancode.c//'

/* scancode.c */

char scan_code[256] = {
 0,34,34,34,34,34,34,34,34, 1, 2, 1, 1, 1,34,34,
22,22,22,22,22,22,22,22,22,22, 8, 3,28,26,29, 7,
21,21,21,21,21,21,21,21,21,21,21, 5, 9, 6,32,34,
} ;
@//E*O*F mawk0.97/scancode.c//
chmod u=rw,g=r,o=r mawk0.97/scancode.c
echo x - mawk0.97/sizes.h
sed 's/^@//' > "mawk0.97/sizes.h" <<'@//E*O*F mawk0.97/sizes.h//'

copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.

/* $Log:	sizes.h,v $
 * Revision 2.1  91/04/08  08:24:09  brennan
 * VERSION 0.97

/*  sizes.h  */

#ifndef  SIZES_H
#define  SIZES_H

#define  HASH_PRIME  53
#define  A_HASH_PRIME 37

/* allow some put not a lot of recursion */
#define  EVAL_STACK_SIZE  64
#define  EVAL_STACK_SIZE  256

#define  MAX_COMPILE_ERRORS  5 /* quit if more than 4 errors */

#define  BUFFSZ    4096   /* input buffer size */

#define  MAX_LOOP_DEPTH   20
/* should never be exceeded, doesn't matter if its too
   big (unless gross) because resources sized by it are freed */

#define  MAX_FIELD   100  /* biggest field number */
#define  SPRINTF_SZ   300  /* biggest sprintf string length */

/* the size of the temp buffer in front of main_buff */
#define  PTR_SZ   sizeof(PTR)

#define  PAGE_SZ    1024  /* max instructions for a block */

#endif   /* SIZES_H */
@//E*O*F mawk0.97/sizes.h//
chmod u=rw,g=r,o=r mawk0.97/sizes.h
echo x - mawk0.97/split.c
sed 's/^@//' > "mawk0.97/split.c" <<'@//E*O*F mawk0.97/split.c//'

copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.

/* $Log:	split.c,v $
 * Revision 2.1  91/04/08  08:24:11  brennan
 * VERSION 0.97

/* split.c */

#include "mawk.h"
#include "symtype.h"
#include "bi_vars.h"
#include "bi_funct.h"
#include "memory.h"
#include "scan.h"
#include "regexp.h"
#include "field.h"
#include <string.h>

/* split string s on SPACE without changing s.
   load the pieces into STRINGS and ptrs into
   return the number of pieces */

int space_split( s )  
  register char *s ;
{ char *back = strchr(s,0) ;
  int i = 0 ;
  int len ;
  char *q ;
  STRING  *sval ;

  while ( 1 )
  { while ( scan_code[*(unsigned char*)s] == SC_SPACE )  s++ ;
    if ( *s == 0 )  break ;
    /* mark the front with q */
    q = s++ ;
    *back = ' ' ; /* sentinal */
    while ( scan_code[*(unsigned char*)s] != SC_SPACE )  s++ ;
    *back = 0 ;
    sval = (STRING *) (temp_buff.ptr_buff[i++] = 
         (PTR) new_STRING((char *) 0, len = s - q )) ;
    (void) memcpy(sval->str, q, len) ;
  if ( i > MAX_FIELD ) 
     rt_overflow("maximum number of fields", MAX_FIELD) ;
  return i ;

char *re_pos_match(s, re, lenp)
  register char *s ; 
  PTR re ; unsigned *lenp ;
  while ( s = REmatch(s, re, lenp) )
        if ( *lenp )   return s ;
        if ( *s == 0 )  break ;
        else s++ ;

  return (char *) 0 ;

int re_split(s, re)
  char *s ;
  PTR  re ;
{ register char *t ;
  int i = 0 ;
  unsigned mlen, len ;
  STRING *sval ;

  while ( t = re_pos_match(s, re, &mlen) )
  { sval = (STRING*)(temp_buff.ptr_buff[i++] = (PTR)
            new_STRING( (char *)0, len = t-s) ) ;
    (void) memcpy(sval->str, s, len) ;
    s = t + mlen ;
  temp_buff.ptr_buff[i++] = (PTR) new_STRING(s) ;
  if ( i > MAX_FIELD ) 
     rt_overflow("maximum number of fields", MAX_FIELD) ;
  return i ;
/*  split(s, X, r)
    split s into array X on r

    entry: sp[0] holds r
           sp[-1] pts at X
           sp[-2] holds s
CELL *bi_split(sp)
  register CELL *sp ;
  int cnt ;   /* the number of pieces */
  double dcnt ; /* double version of cnt */
  CELL  *cp ;
  char *ofmt ;

  if ( sp->type < C_RE )  cast_for_split(sp) ;
        /* can be C_RE, C_SPACE or C_SNULL */
  sp -= 2 ;
  if ( sp->type < C_STRING )  cast1_to_s(sp) ;

  if ( string(sp)->len == 0 ) /* nothing to split */
  { free_STRING( string(sp) ) ;
    sp->type = C_DOUBLE ; sp->dval = 0.0 ;
    return sp ;

  switch ( (sp+2)->type )
    case C_RE :
        cnt = re_split(string(sp)->str, (sp+2)->ptr) ;
        break ;

    case C_SPACE :
        cnt = space_split(string(sp)->str) ;
        break ;

    /* this case could be done by C_RE, but very slowly.
       Since it is the common way to eliminate fields,
       we'll treat the special case for speed */
    case C_SNULL : /* split on empty string */
        cnt = 1 ;
        temp_buff.ptr_buff[0] = sp->ptr ;
        string(sp)->ref_cnt++ ;
        break ;

    default : bozo("bad splitting cell in bi_split") ;

  /* now load the array */

  free_STRING( string(sp) ) ;

  sp->type = C_DOUBLE ;
  sp->dval = dcnt = (double) cnt ;

  ofmt = string(field + OFMT)->str ;
  A = (ARRAY) (sp+1)->ptr  ;

  while ( cnt )
  { char xbuff[256] ;
    /* this big in case the user did something goofy with
       OFMT  */
    (void) sprintf(xbuff, ofmt, dcnt ) ;
    dcnt -= 1.0 ;
    cp = array_find( A, xbuff, 1) ;
    cell_destroy(cp) ;
    cp->ptr = temp_buff.ptr_buff[--cnt] ;
    cp->type = C_MBSTRN ;

  return sp ;

@//E*O*F mawk0.97/split.c//
chmod u=rw,g=r,o=r mawk0.97/split.c
echo x - mawk0.97/symtype.h
sed 's/^@//' > "mawk0.97/symtype.h" <<'@//E*O*F mawk0.97/symtype.h//'

copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.

/*$Log:	symtype.h,v $
 * Revision 2.1  91/04/08  08:24:14  brennan
 * VERSION 0.97

/* types related to symbols are defined here */

#ifndef  SYMTYPE_H
#define  SYMTYPE_H

/* struct to hold info about builtins */
typedef struct {
char *name ;
PF_CP  fp ;  /* ptr to function that does the builtin */
unsigned char min_args, max_args ; 
/* info for parser to check correct number of arguments */
} BI_REC ;

   structures and types for arrays

/* array hash nodes */

typedef  struct anode {
struct anode *link ;
STRING *sval ;
CELL   *cp ;

/* note ARRAY is a ptr to a hash table */

CELL *PROTO(array_find, (ARRAY,void *, int) ) ;
int PROTO(array_test, (ARRAY, STRING *) ) ;
INST *PROTO(array_loop, (INST *, CELL *, CELL *) ) ;
void PROTO(array_delete, (ARRAY, STRING *) ) ;
CELL *PROTO(array_cat, (CELL *, int) ) ;
void PROTO(array_free, (ARRAY) ) ;

#define new_ARRAY() (ARRAY)memset(zmalloc(A_HASH_PRIME *\
                        sizeof(ANODE*)), 0, A_HASH_PRIME*sizeof(ANODE*))

extern  ARRAY  Argv ;

/* for parsing  (i,j) in A  */
typedef  struct {
INST *start ;
int cnt ;
} ARG2_REC ;

  user defined functions

typedef  struct fblock {
char *name ;
INST *code  ;
unsigned short nargs ;
char *typev ;  /* array of size nargs holding types */
} FBLOCK ;   /* function block */

void  PROTO(add_to_fdump_list, (FBLOCK *) ) ;
void  PROTO( fdump, (void) ) ;

  elements of the symbol table

#define  ST_NONE 0
#define  ST_VAR   1
#define  ST_KEYWORD   2
#define  ST_BUILTIN 3 /* a pointer to a builtin record */
#define  ST_ARRAY   4 /* a void * ptr to a hash table */
#define  ST_FIELD   5  /* a cell ptr to a field */
#define  ST_FUNCT   6
#define  ST_LENGTH  7  /* length is special */
#define  ST_LOCAL_NONE  8
#define  ST_LOCAL_VAR   9
#define  ST_LOCAL_ARRAY 10

#define  is_local(stp)   ((stp)->type>=ST_LOCAL_NONE)

typedef  struct {
char *name ;
char type ;
unsigned char offset ;  /* offset in stack frame for local vars */
union {
CELL *cp ;
int  kw ;
PF_CP fp ;
BI_REC *bip ;
ARRAY  array ; 
FBLOCK  *fbp ;
} stval ;

 structures for type checking function calls

typedef  struct ca_rec {
struct ca_rec  *link ;
short type ;
short arg_num ;  /* position in callee's stack */
/*---------  this data only set if we'll  need to patch -------*/
/* happens if argument is an ID or type ST_NONE or ST_LOCAL_NONE */

int call_offset ;
/* where the type is stored */
SYMTAB  *sym_p ;  /* if type is ST_NONE  */
char *type_p ;  /* if type  is ST_LOCAL_NONE */
}  CA_REC  ; /* call argument record */

/* type field of CA_REC matches with ST_ types */
#define   CA_EXPR       ST_LOCAL_VAR
#define   CA_ARRAY      ST_LOCAL_ARRAY

typedef  struct fcall {
struct fcall *link ;
FBLOCK  *callee ;
short   call_scope ;
FBLOCK  *call ;  /* only used if call_scope == SCOPE_FUNCT  */
INST    *call_start ; /* computed later as code may be moved */
CA_REC  *arg_list ;
short   arg_cnt_checked ;
unsigned line_no ; /* for error messages */

extern  FCALL_REC  *resolve_list ;

void PROTO(resolve_fcalls, (void) ) ;
void PROTO(check_fcall, (FBLOCK*,int,FBLOCK*,CA_REC*,unsigned) ) ;

/* hash.c */
unsigned  PROTO( hash, (char *) ) ;
SYMTAB *PROTO( insert, (char *) ) ;
SYMTAB *PROTO( find, (char *) ) ;
SYMTAB *PROTO( save_id, (char *) ) ;
void    PROTO( restore_ids, (void) ) ;

/* error.c */
void  PROTO(type_error, (SYMTAB *) ) ;

#endif  /* SYMTYPE_H */
@//E*O*F mawk0.97/symtype.h//
chmod u=rw,g=r,o=r mawk0.97/symtype.h
echo x - mawk0.97/types.h
sed 's/^@//' > "mawk0.97/types.h" <<'@//E*O*F mawk0.97/types.h//'

copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.

/* $Log:	types.h,v $
 * Revision 2.1  91/04/08  08:24:15  brennan
 * VERSION 0.97

/*  types.h  */

#ifndef  TYPES_H
#define  TYPES_H

typedef  void *PTR ;
typedef  char *PTR ;

#include  "sizes.h"

/*  CELL  types  */

#define  C_NOINIT                0
#define  C_DOUBLE                1
#define  C_STRING                2
#define  C_STRNUM                3
#define  C_MBSTRN                4 
        /*could be STRNUM, has not been checked */
#define  C_RE                    5
#define  C_SPACE                 6
        /* split on space */
#define  C_SNULL                 7
        /* split on the empty string  */
#define  C_REPL                  8
        /* a replacement string   '\&' changed to &  */
#define  C_REPLV                 9
        /* a vector replacement -- broken on &  */
#define  NUM_CELL_TYPES         10

/* these defines are used to check types for two
   CELLs which are adjacent in memory */

#define  TWO_NOINITS  (2*(1<<C_NOINIT))
#define  TWO_DOUBLES  (2*(1<<C_DOUBLE))
#define  TWO_STRINGS  (2*(1<<C_STRING))
#define  TWO_STRNUMS  (2*(1<<C_STRNUM))
#define  TWO_MBSTRNS  (2*(1<<C_MBSTRN))

typedef  struct {
unsigned short ref_cnt ;
unsigned short len ;
char str[4] ;

typedef  struct cell {
short type ;
short vcnt ; /* only used if type == C_REPLV   */
PTR   ptr ;
double  dval ;
}  CELL ;

/* all builtins are passed the evaluation stack pointer and
   return its new value, here is the type */

#ifdef __STDC__
typedef CELL *(*PF_CP)(CELL *) ;
typedef CELL *(*PF_CP)() ;

/* an element of code (instruction) */
typedef  union {
int  op ;
PTR  ptr ;
}  INST ;

/* a scratch buffer type */
union tbuff {
PTR   ptr_buff[MAX_FIELD] ;
char   string_buff[TEMP_BUFF_SZ + BUFFSZ + 1] ;
} ;

@//E*O*F mawk0.97/types.h//
chmod u=rw,g=r,o=r mawk0.97/types.h
echo x - mawk0.97/zmalloc.c
sed 's/^@//' > "mawk0.97/zmalloc.c" <<'@//E*O*F mawk0.97/zmalloc.c//'

copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.

/*$Log:	zmalloc.c,v $
 * Revision 2.2  91/04/09  12:39:45  brennan
 * added static to funct decls to satisfy STARDENT compiler
 * Revision 2.1  91/04/08  08:24:17  brennan
 * VERSION 0.97

/*  zmalloc.c  */
#include  "mawk.h"
#include  "zmalloc.h"

void PROTO( mawk_exit, (int) ) ;

  zmalloc() gets mem from malloc() in CHUNKS of 2048 bytes
  and cuts these blocks into smaller pieces that are multiples
  of eight bytes.  When a piece is returned via zfree(), it goes
  on a linked linear list indexed by its size.  The lists are
  an array, pool[].

  E.g., if you ask for 22 bytes with p = zmalloc(22), you actually get
  a piece of size 24.  When you free it with zfree(p,22) , it is added
  to the list at pool[2].

#define ZBLOCKSZ    8    
#define ZSHIFT      3
#define POOLSZ      16

#define  CHUNK          256    
        /* number of blocks to get from malloc */

static PTR  PROTO( emalloc, (unsigned) ) ;
void PROTO( errmsg, (int , char *, ...) ) ;

static PTR emalloc(size)
  unsigned size ;
{ PTR p ;

  if( !(p = malloc(size)) )
  { errmsg(0, "out of memory") ; mawk_exit(1) ; }
  return p ;

typedef  union  zblock {
char dummy[ZBLOCKSZ] ;
union zblock *link ;
}  ZBLOCK  ;

/* ZBLOCKS of sizes 1, 2, ... 16
   which is bytes of sizes 8, 16, ... , 128
   are stored on the linked linear lists in
   pool[0], pool[1], ... , pool[15]

static  ZBLOCK  *pool[POOLSZ] ;

PTR   zmalloc( size )
  unsigned size ;
{ register unsigned blocks ;
  register ZBLOCK *p ;
  static  unsigned amt_avail ;
  static  ZBLOCK  *avail ;

  if ( size > POOLSZ * ZBLOCKSZ )  return emalloc(size) ;

  blocks = (size >> ZSHIFT) + ((size & (ZBLOCKSZ-1)) != 0) ;

  if ( p = pool[blocks-1] )
  { pool[blocks-1] = p->link ; return (PTR) p ; }

  if ( blocks > amt_avail )
  { if ( amt_avail ) /* free avail */
    { avail->link = pool[--amt_avail] ; pool[amt_avail] = avail ; }
    if ( !(avail = (ZBLOCK *) malloc(CHUNK*ZBLOCKSZ)) )
    { /* if we get here, almost out of memory */
        amt_avail = 0 ;   return  emalloc(size) ; }
    amt_avail = CHUNK ;
  /* get p from the avail pile */
  p = avail ; avail += blocks ; amt_avail -= blocks ; 
  return (PTR) p ;

void  zfree( p, size)
  register PTR p ;  unsigned size ;
{ register int index ; ;

  if ( size > POOLSZ * ZBLOCKSZ )  free(p) ;
    index  = (size >> ZSHIFT) + ((size & (ZBLOCKSZ-1)) != 0) - 1;
    ((ZBLOCK *) p)->link = pool[index] ;
    pool[index] = (ZBLOCK *) p ;

PTR  zrealloc( p, old_size, new_size )
  register PTR  p ;
  unsigned old_size, new_size ;
{ register PTR q ;

  (void) memcpy(q = zmalloc(new_size), p, 
                old_size < new_size ? old_size : new_size) ;
  zfree(p, old_size) ;
  return q ;

@//E*O*F mawk0.97/zmalloc.c//
chmod u=rw,g=r,o=r mawk0.97/zmalloc.c
echo x - mawk0.97/zmalloc.h
sed 's/^@//' > "mawk0.97/zmalloc.h" <<'@//E*O*F mawk0.97/zmalloc.h//'

copyright 1991, Michael D. Brennan

This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.

/*$Log:	zmalloc.h,v $
 * Revision 2.1  91/04/08  08:24:19  brennan
 * VERSION 0.97

/* zmalloc.h */

#ifndef  ZMALLOC_H
#define  ZMALLOC_H

#ifdef   __STDC__
#include  <stdlib.h>
#include  <string.h>   /* memcpy() */


PTR  memcpy(), malloc(), realloc() ;
void free() ;

PTR  PROTO( zmalloc, (unsigned) ) ;
void PROTO( zfree, (PTR, unsigned) ) ;
PTR  PROTO( zrealloc , (PTR,unsigned,unsigned) ) ;

#endif  /* ZMALLOC_H */
@//E*O*F mawk0.97/zmalloc.h//
chmod u=rw,g=r,o=r mawk0.97/zmalloc.h
echo mkdir - mawk0.97/rexp
mkdir mawk0.97/rexp
chmod u=rwx,g=rx,o=rx mawk0.97/rexp
echo x - mawk0.97/rexp/Makefile
sed 's/^@//' > "mawk0.97/rexp/Makefile" <<'@//E*O*F mawk0.97/rexp/Makefile//'

# This is a makefile for mawk,
# an implementation of AWK (1988).
# This builds a regular expression library
# Remove the -DMAWK and the library has general use.
# (Even if left in, the diff is very small)


C=rexp.c rexp0.c rexp1.c rexp2.c rexp3.c rexpdb.c

regexp.a : $(C)
	rm -f *.o
	cc -c $(CFLAGS) $?
	ar r regexp.a *.o
	rm -f *.o


@//E*O*F mawk0.97/rexp/Makefile//
chmod u=rw,g=r,o=r mawk0.97/rexp/Makefile
echo x - mawk0.97/rexp/rexp.c
sed 's/^@//' > "mawk0.97/rexp/rexp.c" <<'@//E*O*F mawk0.97/rexp/rexp.c//'

copyright 1991, Michael D. Brennan

This is a source file for mawk an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.

/*   rexp.c   */

/*  op precedence  parser for regular expressions  */

#include  "rexp.h"

/* static  prototypes */
void  PROTO( op_pop, (void) ) ;

/*  DATA   */
int   REerrno ;
char *REerrlist[] = { (char *) 0 ,
/* 1  */    "missing '('",
/* 2  */    "missing ')'",
/* 3  */    "bad class -- [], [^] or [" ,
/* 4  */    "missing operand" ,
/* 5  */    "resource exhaustion -- regular expression too large",
/* 6  */    "null regular expression" } ;

/* E5 is very unlikely to occur */

/* This table drives the operator precedence parser */
static  int  table[8][8]  =  {

/*        0   |   CAT   *   +   ?   (   )   */
/* 0 */   0,  L,  L,    L,  L,  L,  L,  E1,
/* | */   G,  G,  L,    L,  L,  L,  L,  G,
/* CAT*/  G,  G,  G,    L,  L,  L,  L,  G,
/* * */   G,  G,  G,    G,  G,  G, E7,  G,
/* + */   G,  G,  G,    G,  G,  G, E7,  G,
/* ? */   G,  G,  G,    G,  G,  G, E7,  G,
/* ( */   E2, L,  L,    L,  L,  L,  L,  EQ,
/* ) */   G , G,  G,    G,  G,  G,  E7,  G     }   ;

typedef struct
{ int  token ;
  int  prec ;   }  OP ;

#define  STACKSZ   96

  m_ptr -> top filled slot on the m_stack
  op_ptr -> top filled slot on op_stack, 
     initially this is only half filled with the token
     the precedence is added later

static  OP  *op_stack, *op_limit, *op_ptr ;
static  MACHINE *m_stack, *m_limit, *m_ptr ;

/* inline for speed on the m_stack */
#define m_pop() (m_ptr<m_stack?RE_error_trap(-E4): *m_ptr--)
#define m_push(x)  if(++m_ptr==m_limit) RE_error_trap(-E5);*m_ptr=(x)


static jmp_buf  err_buf  ;  /*  used to trap on error */

MACHINE  RE_error_trap(x)  /* return is dummy to make macro OK */
  int x ;
  while ( m_ptr >= m_stack ) RE_free( m_ptr-- -> start ) ;
  RE_free(m_stack) ; RE_free(op_stack) ;
  REerrno = x ;
  longjmp(err_buf, 1 ) ;
  /* dummy return to make compiler happy */
  return *m_stack ;

VOID *REcompile(re)
  char *re ;
{ MACHINE  m  ;
  register int  t ;

  RE_lex_init(re) ;

  if ( *re == 0 )
  { STATE *p = (STATE *) RE_malloc( sizeof(STATE) ) ;
    p->type = M_ACCEPT ;
    return  (VOID *) p ;

  if ( setjmp(err_buf) )   return (VOID *) 0 ;
     /* global error trap */

  /* initialize the stacks  */
  m_stack =(MACHINE *) RE_malloc(STACKSZ*sizeof(MACHINE)) ;
  m_ptr = m_stack - 1 ;
  m_limit = m_stack + STACKSZ ;
  op_ptr = op_stack = (OP *) RE_malloc(STACKSZ*sizeof(OP)) ;
  op_ptr->token = 0 ;
  op_limit = op_stack + STACKSZ ;

  t = RE_lex(&m) ;

  while( 1 )
   { switch( t )
         case T_STR  :
         case T_ANY  :
         case T_U    :
         case T_START :
         case T_END :
         case T_CLASS :  m_push(m) ;  break ;

         case  0 :   /*  end of reg expr   */
           if ( op_ptr -> token == 0 )  /*  done   */
           { m = m_pop() ;
             if ( m_ptr < m_stack )  /* DONE !!! */
             { free(m_stack) ; free(op_stack) ;
               return  (VOID *) m.start ;
               /*  machines still on the stack  */
             RE_panic("values still on machine stack") ;
         /*  case 0  falls  thru to default
             which is operator case  */


           if ( (op_ptr -> prec = table[op_ptr -> token][t]) == G )
               { while ( op_ptr -> prec != L )  op_pop() ;
                 continue ; }

           if ( op_ptr -> prec < 0 )
              if ( op_ptr->prec == E7 ) 
                  RE_panic("parser returns E7") ;
              else  RE_error_trap(-op_ptr->prec) ;

           if ( ++op_ptr == op_stack + STACKSZ ) /* stack overflow */
                 RE_error_trap(-E5) ;
           op_ptr -> token = t ;
    t = RE_lex(&m) ;

static void  op_pop()
{ register int  t  ;
  MACHINE m, n ;

  if ( (t = op_ptr-- -> token) >= T_LP ) return ;
        /* nothing to do with '(' or ')' */
  if ( t <= T_CAT )  /* binary operation */
        n = m_pop() ;
  m = m_pop() ;

  switch( t )
  {  case  T_CAT :  RE_cat(&m, &n) ;  break ;
     case  T_OR  :  RE_or( &m, &n) ;  break ;
     case T_STAR  :  RE_close( &m) ;  break ;
     case T_PLUS  :  RE_poscl( &m ) ; break ;
     case T_Q     :  RE_01( &m ) ;    break ;
     default       :
        RE_panic("strange token popped from op_stack") ;
  m_push(m) ;

/* getting here means a logic flaw or unforeseen case */
void RE_panic( s )
  char *s ;
{ fprintf( stderr, "REcompile() - panic:  %s\n", s) ;
  exit(100) ; }

@//E*O*F mawk0.97/rexp/rexp.c//
chmod u=rw,g=r,o=r mawk0.97/rexp/rexp.c
echo x - mawk0.97/rexp/rexp.h
sed 's/^@//' > "mawk0.97/rexp/rexp.h" <<'@//E*O*F mawk0.97/rexp/rexp.h//'

copyright 1991, Michael D. Brennan

This is a source file for mawk an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.

/*  rexp.h    */

#ifndef  REXP_H
#define  REXP_H

#include  <string.h>
#include  <stdio.h>
#include  <setjmp.h>

#ifndef   PROTO
#ifdef    __STDC__
#define  PROTO(name, args)   name  args
#define  PROTO(name, args)   name()

#ifdef  __STDC__
#define  VOID   void
#include <stdlib.h>
#define  VOID   char
char *malloc(), *realloc() ;
void free() ;

/* user can change this  */

#define  RE_malloc(x)    RE_xmalloc(x)
#define  RE_realloc(x,l)   RE_xrealloc(x,l)
#define  RE_free(x)      free(x)

VOID  *PROTO( RE_xmalloc, (unsigned) ) ;
VOID  *PROTO( RE_xrealloc, (void *,unsigned) ) ;

/*  finite machine  state types  */

#define  M_STR     	0
#define  M_CLASS   	1
#define  M_ANY     	2
#define  M_START   	3
#define  M_END     	4
#define  M_U       	5
#define  M_1J      	6
#define  M_2JA     	7
#define  M_2JB     	8
#define  M_ACCEPT  	9
#define  U_ON      	10

#define  U_OFF     0
#define  END_OFF   0
#define  END_ON    (2*U_ON)

typedef  unsigned char BV[32] ;  /* bit vector */

typedef  struct
{ char type ;
  unsigned char  len ;  /* used for M_STR  */
     char *str  ;  /* string */
     BV   *bvp ;   /*  class  */
     int   jump ;
   }  data ;
}     STATE  ;

#define  STATESZ  (sizeof(STATE))

typedef  struct
{ STATE  *start, *stop ; }   MACHINE ;

/*  tokens   */
#define  T_OR   1       /* | */
#define  T_CAT  2       
#define  T_STAR 3       /* * */
#define  T_PLUS 4       /* + */
#define  T_Q    5       /* ? */
#define  T_LP   6       /* ( */
#define  T_RP   7       /* ) */
#define  T_START 8      /* ^ */
#define  T_END  9       /* $ */
#define  T_ANY  10      /* . */
#define  T_CLASS 11     /* starts with [ */
#define  T_SLASH 12     /*  \  */
#define  T_CHAR  13     /* all the rest */
#define  T_STR   14
#define  T_U     15

/*  precedences and error codes  */
#define  L   0
#define  EQ  1
#define  G   2
#define  E1  (-1)
#define  E2  (-2)
#define  E3  (-3)
#define  E4  (-4)
#define  E5  (-5)
#define  E6  (-6)
#define  E7  (-7)

#define  MEMORY_FAILURE      5

/* struct for the run time stack */
typedef struct {
STATE *m ;   /*   save the machine ptr */
int    u ;   /*   save the u_flag */
char  *s ;   /*   save the active string ptr */
char  *ss ;  /*   save the match start -- only used by REmatch */
} RT_STATE ;   /* run time state */

/*  error  trap   */
extern int REerrno ;
MACHINE   PROTO(RE_error_trap, (int) ) ;

MACHINE   PROTO( RE_u, (void) ) ;
MACHINE   PROTO( RE_start, (void) ) ;
MACHINE   PROTO( RE_end, (void) ) ;
MACHINE   PROTO( RE_any, (void) ) ;
MACHINE   PROTO( RE_str, (char *, unsigned) ) ;
MACHINE   PROTO( RE_class, (BV *) ) ;
void      PROTO( RE_cat, (MACHINE *, MACHINE *) ) ;
void      PROTO( RE_or, (MACHINE *, MACHINE *) ) ;
void      PROTO( RE_close, (MACHINE *) ) ;
void      PROTO( RE_poscl, (MACHINE *) ) ;
void      PROTO( RE_01, (MACHINE *) ) ;
void      PROTO( RE_panic, (char *) ) ;
char     *PROTO( str_str, (char *, char *, unsigned) ) ;

void      PROTO( RE_lex_init , (char *) ) ;
int       PROTO( RE_lex , (MACHINE *) ) ;
void      PROTO( RE_run_stack_init, (void) ) ;
RT_STATE *PROTO( RE_new_run_stack, (void) ) ;

#endif   /* REXP_H  */
@//E*O*F mawk0.97/rexp/rexp.h//
chmod u=rw,g=r,o=r mawk0.97/rexp/rexp.h
echo x - mawk0.97/rexp/rexp0.c
sed 's/^@//' > "mawk0.97/rexp/rexp0.c" <<'@//E*O*F mawk0.97/rexp/rexp0.c//'

copyright 1991, Michael D. Brennan

This is a source file for mawk an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.

/*  rexp0.c   */

/*  lexical scanner  */

#include  "rexp.h"

/* static functions */
static int  PROTO( do_str, (int, char **, MACHINE *) ) ;
static int  PROTO( do_class, (char **, MACHINE *) ) ;
static int  PROTO( escape, (char **) ) ;
static BV   *PROTO( store_bvp, (BV *) ) ;
static int  PROTO( ctohex, (int) ) ;

#ifndef  EG  /* if EG make next array visible */
char  RE_char2token[ '|' + 1 ] = {
13,13,13,13,1} ;

#define  char2token(x) ( (unsigned char)(x) > '|' ? T_CHAR : RE_char2token[x] )

#define NOT_STARTED    (-1)

static  int  prev  ;
static  char   *lp  ;     /*  ptr to reg exp string  */
static  unsigned re_len ;

void  RE_lex_init( re )
  char *re ;
  lp = re ;
  re_len = strlen(re) + 1  ;
  prev = NOT_STARTED ;
  RE_run_stack_init() ;

int   RE_lex( mp )
  MACHINE  *mp ;
{ register int c ;

  switch( c = char2token(*lp) )
     case T_OR :
     case T_PLUS :
     case T_STAR :
     case T_Q :
     case T_RP :
           lp++ ;  return  prev = c ;
     case T_SLASH :
           if ( lp[1] != 0 )  break ;
           /* else fall thru */

     case 0   :   return 0 ;
     case T_LP :
           switch( prev )
             case T_CHAR :
             case T_STR  :
             case T_ANY :
             case T_CLASS :
             case T_START :
             case T_RP :
             case T_PLUS :
             case T_STAR :
             case T_Q :
             case T_U :
                  return prev = T_CAT ;
             default  :
                  lp++ ;
                  return prev = T_LP ;

  /*  *lp  is  an operand, but implicit cat op is possible   */
  switch( prev )
   { case  NOT_STARTED :
     case  T_OR :
     case  T_LP :
     case T_CAT :

          switch( c )
           { case  T_ANY : 
             { static plus_is_star_flag = 0 ;

                  if ( * ++lp == '*' )
                  { lp++ ;  *mp = RE_u() ;
                    return  prev = T_U ; }
                  if ( *lp == '+' )
                      if ( plus_is_star_flag )
                      { lp++ ;  *mp = RE_u() ;
                        plus_is_star_flag = 0 ;
                        return prev = T_U ;
                      { plus_is_star_flag = 1 ;
                        lp-- ; *mp = RE_any() ;
                        return prev = T_ANY ;
                  { *mp = RE_any() ;
                    prev = T_ANY ;
              break ;
             case  T_SLASH :
                  lp++ ; c = escape(&lp) ;
                  prev = do_str(c, &lp, mp) ;
                  break ;

             case  T_CHAR  :
                  c = *lp++ ;
                  prev = do_str(c, &lp, mp) ;
                  break ;

             case T_CLASS : prev = do_class(&lp, mp) ;
                            break ;

             case T_START : *mp = RE_start() ; lp++ ;
                            prev = T_START ;
                            break ;

             case T_END :  
                     lp++ ; *mp = RE_end() ;
                     return  prev = T_END ;

             default :
                     RE_panic("bad switch in RE_lex") ;
           break ;

     default : /* don't advance the pointer, return T_CAT */
          return prev = T_CAT ;
    /* check for end character */
    if ( *lp == '$' )
    { mp->start->type += END_ON ; lp++ ; }
    return prev ;

static  int  do_str( c, pp, mp)
  int c ; /* the first character */
  char **pp ;  /* where to put the re_char pointer on exit */
  MACHINE  *mp ;  /* where to put the string machine */
{ register char *p , *s ;
  char *str ;
  unsigned len ;

  p = *pp ;
  s = str = RE_malloc( re_len ) ;
  *s++ = c ;  len = 1 ;

  while ( 1 )
  { char *save ;
    switch( char2token(*p) )
      case  T_CHAR :  *s++ = *p++ ;
                      break ;
      case  T_SLASH :
                      save = ++p ;
                      *s++ = escape(&save) ;
                      p = save ;
                      break ;

      default  :  goto  out ;
    len++ ;
  /* if len > 1 and we failed on a ? + or * , need to back up */
  if ( len > 1 && (*p == '*' || *p == '+' || *p == '?' ) )
  { len-- ; p-- ; s-- ; }

  *s = 0 ;
  *pp = p ;
  *mp = RE_str((char *) RE_realloc(str, len+1) , len) ;
  return  T_STR ;


#define  on( b, x)  ( (b)[(x)>>3] |= ( 1 << ((x)&7) ))

static  void  PROTO(block_on, (BV,int,int) ) ;

static  void  block_on( b, x, y)
  BV b ; int x, y ;  /* must call with x<=y */
{ int lo = x >> 3 ;
  int hi = y >> 3 ;
  int  i, j, bit  ;

  if ( lo == hi )
    { j = x&7 ; bit =  1 << j ; i = (y&7) - j + 1 ;
      for ( ; i ; i-- , bit <<= 1 )  b[lo] |= bit ; }
    { for ( i = lo + 1 ; i <= hi - 1 ; i++ )  b[i] = 0xff ;
      b[lo] |= ( 0xff << (x&7) ) ;
      b[hi] |= ~( 0xff << ((y&7)+1)) ;

/* build a BV for a character class.
   *start points at the '['
   on exit:   *start points at the character after ']'
              mp points at a machine that recognizes the class

static int  do_class( start, mp)
  char **start ; MACHINE  *mp ;
{ register char *p ;
  register BV   *bvp ;
  int  prev ;
  char *q , *t;
  int  cnt ;
  int comp_flag ;

  p = (*start) + 1 ;
  if ( *p == ']' || *p == '^' && *(p+1) == ']' )
         RE_error_trap(-E3) ;
  while ( 1 )  /* find the back of the class */
    { if ( ! (q = strchr(p,']')) )  /* no closing bracket */
         RE_error_trap(-E3) ;
      p = q-1 ;
      cnt = 0 ;
      while ( *p == '\\') { cnt++ ; p-- ; }
      if ( (cnt & 1) == 0 )  /* even number of \ */  break ;
      p = q+1 ;
  /*  q  now  pts at the back of the class   */
  p = (*start) + 1 ;
  *start = q + 1 ;

  bvp = (BV *) RE_malloc( sizeof(BV) ) ;
  (void) memset( bvp, 0, sizeof(BV) ) ;

  comp_flag = *p == '^' ? p++ , 1 : 0 ;
  prev = -1 ;  /* indicates  -  cannot be part of a range  */

  while ( p < q )
     switch( *p )
      { case '\\' :
          t = ++p ;
          prev = escape(&t) ;
          on(*bvp, prev) ;
          p = t ;
          continue ;

        case '-' :
          if ( prev == -1 || p+1 == q || prev > *(p+1) )
             { prev = '-' ; on(*bvp, '-') ; }
             { p++ ;
               block_on(*bvp, prev, *p) ;
               prev = -1 ;
          break ;

        default :
          prev = *p ;
          on(*bvp, *p) ;
          break ;
      p++ ;

  if ( comp_flag )
    for ( p = (char *) bvp ; p < (char *) bvp + sizeof(BV) ; p++)  *p = ~*p ;

  /* make sure zero is off */
  (*bvp)[0] &= 0xfe ;

  *mp = RE_class( store_bvp( bvp ) ) ;
  return  T_CLASS ;

/* storage for bit vectors so they can be reused ,
   stored in an unsorted linear array 
   the array grows as needed

#define         BV_GROWTH       6

static BV *store_bvp( bvp )
  BV *bvp ;
  static BV **bv_base, **bv_limit ;
  static BV **bv_next ; /* next empty slot in the array */

  register BV **p ;
  unsigned t ;

  if ( bv_next == bv_limit ) /* need to grow */
    if ( ! bv_base )  /* first growth */
    {  t = 0 ; bv_base = (BV**)RE_malloc(BV_GROWTH*sizeof(BV*)) ; }
    { t = bv_next - bv_base ;
      bv_base = (BV**) RE_realloc(bv_base, (t+BV_GROWTH)*sizeof(BV*)) ;

    bv_next = bv_base + t ;
    bv_limit = bv_next + BV_GROWTH ;

  /* put bvp in bv_next as a sentinal */
  *bv_next = bvp ;
  p = bv_base ;
  while ( memcmp(*p, bvp, sizeof(BV)) )  p++ ;

  if ( p == bv_next )  /* it is new */
        bv_next++ ;
  else  /* we already have it */  RE_free(bvp) ;

  return *p ;

/* ----------   convert escape sequences  -------------*/

#define isoctal(x)  ((x)>='0'&&(x)<='7')

#define  NOT_HEX        16
static char hex_val['f' - 'A' + 1] = {
10,11,12,13,14,15, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0,
10,11,12,13,14,15 } ;

/* interpret 1 character as hex */
static int ctohex( c )
  register int c ;
{ int t ;

  if ( c >= '0' && c <= '9' )  return c - '0' ;

  if ( c >= 'A' && c <= 'f' && ( t = hex_val[c-'A'] ))  return t ;

  return NOT_HEX ;

static char escape_test[] = "n\nt\tb\br\rf\fa\07v\013" ;

  return the char 
  and move the pointer forward
  on entry *s -> at the character after the slash

static int escape(start_p)
  char **start_p ;
{ register char *p = *start_p ;
  register unsigned x ;
  unsigned xx ;
  char *t ;

  if ( t = strchr(escape_test, *p) )
  { *start_p = p + 1 ;
    return  t[1] ;

  if ( isoctal(*p) )
  { x = *p++ - '0' ;
    if ( isoctal(*p) )
    { x = (x<<3) + *p++ - '0' ;
      if ( isoctal(*p) )
         x = (x<<3) + *p++ - '0' ;
    *start_p = p ;
    return  x & 0xff ;

  if ( *p == 0 )  return 0 ;

  if ( *p++ == 'x' ) /* might be a hex digit */
  {  if ( (x = ctohex(*p)) == NOT_HEX ) 
     { *start_p  = p ;  return 'x' ; }

     /* look for another hex digit */
     if ( (xx = ctohex(* ++p)) != NOT_HEX )
     { x = (x<<4) + xx ; p++ ; }

     *start_p = p ; return x ;
  /* anything else \c -> c */
  *start_p = p ;
  return p[-1]  ;
@//E*O*F mawk0.97/rexp/rexp0.c//
chmod u=rw,g=r,o=r mawk0.97/rexp/rexp0.c
echo x - mawk0.97/rexp/rexp1.c
sed 's/^@//' > "mawk0.97/rexp/rexp1.c" <<'@//E*O*F mawk0.97/rexp/rexp1.c//'

copyright 1991, Michael D. Brennan

This is a source file for mawk an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.

/*  rexp1.c   */

/*  re machine  operations  */

#include  "rexp.h"

static MACHINE *PROTO( new_TWO , (int) ) ;

static  MACHINE  *new_TWO(type)
  int type ;
  static  MACHINE  x ;

  x.start = (STATE *) RE_malloc(2*STATESZ) ;
  x.stop = x.start + 1 ;
  x.start->type = type ;
  x.stop->type = M_ACCEPT ;
  return &x ;
} ;

/*  build a machine that recognizes any  */
{ return  * new_TWO(M_ANY) ; }

/*  build a machine that recognizes the start of string  */
MACHINE  RE_start()
{ return  * new_TWO(M_START) ; }

{ return  * new_TWO(M_END) ; }

/*  build a machine that recognizes a class  */
MACHINE  RE_class( bvp )
  BV *bvp  ;
{ register MACHINE *p = new_TWO(M_CLASS) ;

  p->start->data.bvp = bvp ;
  return *p ;

{ return  *new_TWO(M_U) ; }

MACHINE  RE_str( str, len)
  char *str ;
  unsigned len ;
{ register MACHINE *p = new_TWO(M_STR) ;

  p->start->len = len ;
  p->start->data.str = str ;
  return *p ;

/*  replace m and n by a machine that recognizes  mn   */
void  RE_cat( mp, np)
  MACHINE  *mp, *np ;
{ unsigned sz1, sz2, sz ;

  sz1 = mp->stop - mp->start  ;
  sz2 = np->stop - np->start + 1 ;
  sz  = sz1 + sz2 ;

  mp->start = (STATE *) RE_realloc( mp->start, sz * STATESZ ) ;
  mp->stop = mp->start + (sz - 1) ;
  (void)  memcpy( mp->start + sz1, np->start, sz2 * STATESZ ) ;
  RE_free( np->start ) ;

 /*  replace m by a machine that recognizes m|n  */

void  RE_or( mp, np)
  MACHINE  *mp, *np ;
{ register STATE *p ;
  unsigned szm, szn ;

  szm = mp->stop - mp->start + 1 ;
  szn = np->stop - np->start + 1 ;

  p = (STATE *) RE_malloc( (szm+szn+1) * STATESZ ) ;
  (void) memcpy( p+1, mp->start, szm * STATESZ ) ;
  RE_free( mp->start) ;
  mp->start = p ;
  (mp->stop  = p + szm + szn) -> type = M_ACCEPT ;
  p->type = M_2JA ;
  p->data.jump = szm+1 ;
  (void) memcpy( p + szm + 1 , np->start, szn * STATESZ) ;
  RE_free( np->start ) ;
  (p += szm)->type = M_1J ;
  p->data.jump = szn ;


/*  replace m by m*   */

void  RE_close( mp )
  MACHINE  *mp ;
{ register STATE *p ;
  unsigned sz ;

  sz = mp->stop - mp->start + 1 ;
  p = (STATE *) RE_malloc( (sz+2) * STATESZ ) ;
  (void) memcpy( p+1, mp->start, sz * STATESZ) ;
  RE_free( mp->start ) ;
  mp->start = p ;
  mp->stop  = p + (sz+1) ;
  p->type = M_2JA ;
  p->data.jump = sz + 1 ;
  (p += sz) -> type = M_2JB ;
  p->data.jump = -(sz-1) ;
  (p+1)->type = M_ACCEPT ;

/*  replace m  by  m+  (positive closure)   */

void  RE_poscl( mp )
  MACHINE  *mp ;
{ register STATE *p ;
  unsigned  sz ;

  sz = mp->stop - mp->start + 1 ;
  mp->start = p = (STATE *) RE_realloc(mp->start ,  (sz+1) * STATESZ ) ;
  mp->stop  = p + sz ;
  p +=  --sz ;
  p->type = M_2JB ;
  p->data.jump = -sz ;
  (p+1)->type = M_ACCEPT ;

/* replace  m  by  m? (zero or one)  */

void  RE_01( mp )
  MACHINE  *mp ;
{ unsigned  sz ;
  register  STATE *p ;

  sz = mp->stop - mp->start + 1 ;
  p = (STATE *) RE_malloc( (sz+1) * STATESZ ) ;
  (void) memcpy( p+1, mp->start, sz * STATESZ) ;
  RE_free( mp->start ) ;
  mp->start = p ;
  mp->stop = p + sz ;
  p->type = M_2JB ;
  p->data.jump = sz ;


VOID *RE_xmalloc( sz ) 
  unsigned sz ;
{ register VOID *p ;

  if ( ! ( p = malloc(sz) ) )  RE_error_trap(MEMORY_FAILURE) ;
  return p ;

VOID *RE_xrealloc( p, sz)
  register VOID *p ; unsigned sz ;
{ if ( ! ( p = realloc( p, sz) ) )  RE_error_trap(MEMORY_FAILURE) ;
  return p ;

@//E*O*F mawk0.97/rexp/rexp1.c//
chmod u=rw,g=r,o=r mawk0.97/rexp/rexp1.c
echo x - mawk0.97/rexp/rexp2.c
sed 's/^@//' > "mawk0.97/rexp/rexp2.c" <<'@//E*O*F mawk0.97/rexp/rexp2.c//'

copyright 1991, Michael D. Brennan

This is a source file for mawk an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.

See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.

/*  rexp2.c   */

/*  test a string against a machine   */

#include "rexp.h"
#include <string.h>

/* statics */
static RT_STATE *PROTO(slow_push,(RT_STATE *,STATE*,char*,int)); 

/*  check that a bit is on  */
#define  ison(b,x) ( (b)[(x)>>3] & ( 1 << ((x)&7)  ))

RT_STATE *RE_run_stack_base; 
RT_STATE *RE_run_stack_limit ;
/* for statistics and debug */
static RT_STATE *stack_max ; 

void RE_run_stack_init()
{ if ( !RE_run_stack_base )
    RE_run_stack_base = (RT_STATE *)
                 RE_malloc(sizeof(RT_STATE) * 16 ) ;
    RE_run_stack_limit = RE_run_stack_base + 16 ;
    stack_max = RE_run_stack_base-1 ;

RT_STATE  *RE_new_run_stack()
{ int oldsize = RE_run_stack_limit - RE_run_stack_base ;

  RE_run_stack_base = (RT_STATE *) RE_realloc( RE_run_stack_base ,
          (oldsize+8) * sizeof(RT_STATE) ) ;
  RE_run_stack_limit = RE_run_stack_base + oldsize + 8 ;
  return  stack_max = RE_run_stack_base + oldsize ;

static RT_STATE *slow_push(sp, m, s, u)
  RT_STATE *sp ;
  STATE *m ;
  char *s ;
  int   u ;
  if ( sp > stack_max )
     if ( (stack_max = sp) == RE_run_stack_limit )
	     sp = RE_new_run_stack() ;

  sp->m = m ; sp->s = s ; sp->u = u ;
  return sp ;

#ifdef   DEBUG
void  print_max_stack(f)
  FILE *f ;
{ fprintf(f, "stack_max = %d\n", stack_max-RE_run_stack_base+1) ; }

#ifdef   DEBUG
#define  push(mx,sx,ux)   stackp = slow_push(++stackp, mx, sx, ux)
#define  push(mx,sx,ux)   if (++stackp == RE_run_stack_limit)\
                                stackp = slow_push(stackp,mx,sx,ux) ;\

More information about the Alt.sources mailing list