mawk0.97.shar 5 of 6
Mike Brennan
brennan at ssc-vax.UUCP
Sun May 12 00:57:57 AEST 1991
------------------cut here----------------
case SC_SPACE : goto reswitch ;
case SC_COMMENT :
eat_comment() ; goto reswitch ;
case SC_NL :
lineno++ ; eat_nl() ;
ct_ret(NL) ;
case SC_ESCAPE :
while ( scan_code[ c = next() ] == SC_SPACE ) ;
if ( c == '\n')
{ token_lineno = ++lineno ; goto reswitch ; }
if ( c == 0 ) ct_ret(EOF) ;
un_next() ;
yylval.ival = '\\' ;
ct_ret(UNEXPECTED) ;
case SC_SEMI_COLON :
eat_nl() ;
ct_ret(SEMI_COLON) ;
case SC_LBRACE :
eat_nl() ; brace_cnt++ ;
ct_ret(LBRACE) ;
case SC_PLUS :
test2_ret('+', INC, '=', ADD_ASG, PLUS ) ;
case SC_MINUS :
test2_ret('-', DEC, '=', SUB_ASG, MINUS ) ;
case SC_COMMA : eat_nl() ; ct_ret(COMMA) ;
case SC_MUL : test1_ret('=', MUL_ASG, MUL) ;
case SC_DIV :
{ int *p = can_precede_re ;
do
if ( *p == current_token )
ct_ret( collect_RE() ) ;
while ( *p++ != -1 ) ;
test1_ret( '=', DIV_ASG , DIV ) ;
}
case SC_MOD : test1_ret('=', MOD_ASG, MOD) ;
case SC_POW : test1_ret('=' , POW_ASG, POW) ;
case SC_LPAREN :
paren_cnt++ ;
ct_ret(LPAREN) ;
case SC_RPAREN :
if ( --paren_cnt < 0 )
{ compile_error( "extra ')'" ) ;
paren_cnt = 0 ;
goto reswitch ; }
ct_ret(RPAREN) ;
case SC_LBOX : ct_ret(LBOX) ;
case SC_RBOX : ct_ret(RBOX) ;
case SC_MATCH : ct_ret(MATCH) ;
case SC_EQUAL :
test1_ret( '=', EQ, ASSIGN ) ;
case SC_NOT : /* ! */
test2_ret('=', NEQ, '~', NOT_MATCH, NOT ) ;
case SC_LT : /* '<' */
if ( getline_flag )
{ getline_flag = 0 ; ct_ret(IO_IN) ; }
else
{ ct_ret( ifnext('=', LTE , LT) ) ; }
case SC_GT : /* '>' */
if ( print_flag && paren_cnt == 0 )
{ print_flag = 0 ;
/* there are 3 types of IO_OUT
-- build the error string in temp_buff */
temp_buff.string_buff[0] = '>' ;
if ( next() == '>' )
{
yylval.ival = F_APPEND ;
temp_buff.string_buff[1] = '>' ;
temp_buff.string_buff[2] = 0 ;
}
else
{ un_next() ;
yylval.ival = F_TRUNC ;
temp_buff.string_buff[1] = 0 ;
}
return current_token = IO_OUT ;
}
ct_ret( ifnext('=', GTE , GT) ) ;
case SC_OR :
if ( next() == '|' )
{ eat_nl() ; ct_ret(brace_cnt?OR:P_OR) ; }
else
{ un_next() ;
if ( print_flag && paren_cnt == 0 )
{ print_flag = 0 ;
yylval.ival = PIPE_OUT;
temp_buff.string_buff[0] = '|' ;
temp_buff.string_buff[1] = 0 ;
ct_ret(IO_OUT) ;
}
else ct_ret(PIPE) ;
}
case SC_AND :
if ( next() == '&' )
{ eat_nl() ; ct_ret(brace_cnt?AND:P_AND) ; }
else
{ un_next() ; yylval.ival = '&' ; ct_ret(UNEXPECTED) ; }
case SC_QMARK : ct_ret(QMARK) ;
case SC_COLON : ct_ret(COLON) ;
case SC_RBRACE :
if ( --brace_cnt < 0 )
{ compile_error("extra '}'" ) ;
brace_cnt = 0 ; goto reswitch ; }
if ( (c = current_token) == NL || c == SEMI_COLON
|| c == SC_FAKE_SEMI_COLON || c == RBRACE )
{ eat_nl() ; ct_ret(RBRACE) ; }
brace_cnt++ ; un_next() ;
current_token = SC_FAKE_SEMI_COLON ;
return SEMI_COLON ;
case SC_DIGIT :
case SC_DOT :
{ double d ;
int flag ;
if ( (d = collect_decimal(c, &flag)) == 0.0 )
if ( flag ) ct_ret(flag) ;
else yylval.cp = &cell_zero ;
else if ( d == 1.0 ) yylval.cp = &cell_one ;
else
{ yylval.cp = new_CELL() ;
yylval.cp->type = C_DOUBLE ;
yylval.cp->dval = d ;
}
ct_ret( CONSTANT ) ;
}
case SC_DOLLAR : /* '$' */
{ double d ;
int flag ;
while ( scan_code[c = next()] == SC_SPACE ) ;
if ( scan_code[c] != SC_DIGIT &&
scan_code[c] != SC_DOT )
{ un_next() ; ct_ret(DOLLAR) ; }
/* compute field address at compile time */
if ( (d = collect_decimal(c, &flag)) == 0.0 )
if ( flag ) ct_ret(flag) ; /* an error */
else yylval.cp = &field[0] ;
else
{ int k = (int) d ;
if ( k > MAX_FIELD )
{ compile_error(
"maximum field index(%d) exceeded" , k ) ;
k = MAX_FIELD ;
}
else yylval.cp = &field[k] ;
}
ct_ret(FIELD) ;
}
case SC_DQUOTE :
return current_token = collect_string() ;
case SC_IDCHAR : /* collect an identifier */
{ unsigned char *p =
(unsigned char *)temp_buff.string_buff + 1 ;
SYMTAB *stp ;
temp_buff.string_buff[0] = c ;
while (
(c = scan_code[ *p++ = next()]) == SC_IDCHAR ||
c == SC_DIGIT ) ;
un_next() ; * --p = 0 ;
switch( (stp = find(temp_buff.string_buff))->type )
{ case ST_NONE :
/* check for function call before defined */
if ( next() == '(' )
{ stp->type = ST_FUNCT ;
stp->stval.fbp = (FBLOCK *)
zmalloc(sizeof(FBLOCK)) ;
stp->stval.fbp->name = stp->name ;
stp->stval.fbp->code = (INST *) 0 ;
yylval.fbp = stp->stval.fbp ;
current_token = FUNCT_ID ;
}
else
{ yylval.stp = stp ;
current_token = ID ;
}
un_next() ;
break ;
case ST_VAR :
case ST_ARRAY :
case ST_LOCAL_NONE :
case ST_LOCAL_VAR :
case ST_LOCAL_ARRAY :
yylval.stp = stp ;
current_token = ID ;
break ;
case ST_FUNCT :
yylval.fbp = stp->stval.fbp ;
current_token = FUNCT_ID ;
break ;
case ST_KEYWORD :
current_token = stp->stval.kw ;
break ;
case ST_BUILTIN :
yylval.bip = stp->stval.bip ;
current_token = BUILTIN ;
break ;
case ST_FIELD :
yylval.cp = stp->stval.cp ;
current_token = FIELD ;
break ;
case ST_LENGTH :
{ CELL *bi_length() ;
static BI_REC length_bi_rec =
{ "length", bi_length, 1, 1 } ;
while ( scan_code[ c = next() ] == SC_SPACE ) ;
un_next() ;
if ( c == '(' )
{ yylval.bip = &length_bi_rec ;
current_token = BUILTIN ;
}
else current_token = LENGTH ;
}
break ;
default :
bozo("find returned bad st type") ;
}
return current_token ;
}
case SC_UNEXPECTED :
yylval.ival = c & 0xff ;
ct_ret(UNEXPECTED) ;
}
return 0 ; /* never get here make lint happy */
}
/* collect a decimal constant in temp_buff.
Return the value and error conditions by reference */
static double collect_decimal(c, flag)
int c ; int *flag ;
{ register unsigned char *p = (unsigned char*) temp_buff.string_buff + 1;
unsigned char *endp ;
double d ;
*flag = 0 ;
temp_buff.string_buff[0] = c ;
if ( c == '.' )
{ if ( scan_code[*p++ = next()] != SC_DIGIT )
{ *flag = UNEXPECTED ; yylval.ival = '.' ;
return 0.0 ; }
}
else
{ while ( scan_code[*p++ = next()] == SC_DIGIT ) ;
if ( p[-1] != '.' )
{ un_next() ; p-- ; }
}
/* get rest of digits after decimal point */
while ( scan_code[*p++ = next()] == SC_DIGIT ) ;
/* check for exponent */
if ( p[-1] != 'e' && p[-1] != 'E' )
{ un_next() ; * --p = 0 ; }
else /* get the exponent */
if ( scan_code[*p = next()] != SC_DIGIT &&
*p != '-' && *p != '+' )
{ *++p = 0 ; *flag = BAD_DECIMAL ;
return 0.0 ; }
else /* get the rest of the exponent */
{ p++ ;
while ( scan_code[*p++ = next()] == SC_DIGIT ) ;
un_next() ; * --p = 0 ;
}
errno = 0 ; /* check for overflow/underflow */
d = strtod( temp_buff.string_buff, &endp ) ;
if ( errno )
compile_error( "%s : decimal %sflow" , temp_buff.string_buff,
d == 0.0 ? "under" : "over") ;
if ( endp != p )
{ *flag = BAD_DECIMAL ; return 0.0 ; }
return d ;
}
/*---------- process escape characters ---------------*/
static char hex_val['f' - 'A' + 1] = {
10,11,12,13,14,15, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
10,11,12,13,14,15 } ;
#define isoctal(x) ((x)>='0'&&(x)<='7')
#define hex_value(x) hex_val[(x)-'A']
#define ishex(x) (scan_code[x] == SC_DIGIT ||\
'A' <= (x) && (x) <= 'f' && hex_value(x))
static int PROTO(octal, (char **)) ;
static int PROTO(hex, (char **)) ;
/* process one , two or three octal digits
moving a pointer forward by reference */
static int octal( start_p )
char **start_p ;
{ register char *p = *start_p ;
register unsigned x ;
x = *p++ - '0' ;
if ( isoctal(*p) )
{
x = (x<<3) + *p++ - '0' ;
if ( isoctal(*p) ) x = (x<<3) + *p++ - '0' ;
}
*start_p = p ;
return x & 0xff ;
}
/* process one or two hex digits
moving a pointer forward by reference */
static int hex( start_p )
unsigned char **start_p ;
{ register unsigned char *p = *start_p ;
register unsigned x ;
unsigned t ;
if ( scan_code[*p] == SC_DIGIT )
x = *p++ - '0' ;
else x = hex_value(*p++) ;
if ( scan_code[*p] == SC_DIGIT )
x = (x<<4) + *p++ - '0' ;
else
if ( 'A' <= *p && *p <= 'f' && (t = hex_value(*p)) )
{ x = (x<<4) + t ; p++ ; }
*start_p = p ;
return x ;
}
static char escape_test[] =
"n\nt\tb\br\rf\fa\07v\013\\\\\"\"\'\'" ;
/* process the escape characters in a string, in place . */
static char *rm_escape(s)
char *s ;
{ register char *p, *q ;
char *t ;
q = p = s ;
while ( *p )
if ( *p == '\\' )
{
if ( t = strchr(escape_test, * ++p) )
{
p++ ; *q++ = t[1] ;
}
else
if ( isoctal(*p) )
{
t = p ; *q++ = octal(&t) ; p = t ;
}
else
if ( *p == 'x' && ishex(*(unsigned char*)(p+1)) )
{
t = p+1 ; *q++ = hex(&t) ; p = t ;
}
else /* not an escape sequence */
{
*q++ = '\\' ; *q++ = *p++ ;
}
}
else *q++ = *p++ ;
*q = 0 ;
return s ;
}
static int collect_string()
{ register unsigned char *p = (unsigned char *)temp_buff.string_buff ;
int c ;
int e_flag = 0 ; /* on if have an escape char */
while ( 1 )
switch( scan_code[ *p++ = next() ] )
{ case SC_DQUOTE : /* done */
* --p = 0 ; goto out ;
case SC_NL :
p[-1] = 0 ;
/* fall thru */
case 0 : /* unterminated string */
compile_error(
"runaway string constant \"%.10s ..." ,
temp_buff.string_buff, token_lineno ) ;
mawk_exit(1) ;
case SC_ESCAPE :
if ( (c = next()) == '\n' )
{ p-- ; lineno++ ; }
else
if ( c == 0 ) un_next() ;
else
{ *p++ = c ; e_flag = 1 ; }
break ;
default : break ;
}
out:
yylval.cp = new_CELL() ;
yylval.cp->type = C_STRING ;
yylval.cp->ptr = (PTR) new_STRING(
e_flag ? rm_escape( temp_buff.string_buff )
: temp_buff.string_buff ) ;
return CONSTANT ;
}
static int collect_RE()
{ register unsigned char *p = (unsigned char*) temp_buff.string_buff ;
int c ;
STRING *sval ;
while ( 1 )
switch( scan_code[ *p++ = next() ] )
{ case SC_DIV : /* done */
* --p = 0 ; goto out ;
case SC_NL :
p[-1] = 0 ;
/* fall thru */
case 0 : /* unterminated re */
compile_error(
"runaway regular expression /%.10s ..." ,
temp_buff.string_buff, token_lineno ) ;
mawk_exit(1) ;
case SC_ESCAPE :
switch( c = next() )
{ case '/' :
p[-1] = '/' ; break ;
case '\n' :
p-- ; break ;
case 0 :
un_next() ; break ;
default :
*p++ = c ; break ;
}
break ;
}
out:
/* now we've got the RE, so compile it */
sval = new_STRING( temp_buff.string_buff ) ;
yylval.cp = new_CELL() ;
yylval.cp->type = C_RE ;
yylval.cp->ptr = re_compile(sval) ;
free_STRING(sval) ;
return RE ;
}
@//E*O*F mawk0.97/scan.c//
chmod u=rw,g=r,o=r mawk0.97/scan.c
echo x - mawk0.97/scan.h
sed 's/^@//' > "mawk0.97/scan.h" <<'@//E*O*F mawk0.97/scan.h//'
/********************************************
scan.h
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* $Log: scan.h,v $
* Revision 2.2 91/04/09 12:39:31 brennan
* added static to funct decls to satisfy STARDENT compiler
*
* Revision 2.1 91/04/08 08:23:54 brennan
* VERSION 0.97
*
*/
/* scan.h */
#ifndef SCAN_H_INCLUDED
#define SCAN_H_INCLUDED 1
#include <stdio.h>
#ifndef MAKESCAN
#include "symtype.h"
#include "parse.h"
#endif
extern char scan_code[256] ;
/* the scan codes to compactify the main switch */
#define SC_SPACE 1
#define SC_NL 2
#define SC_SEMI_COLON 3
#define SC_FAKE_SEMI_COLON 4
#define SC_LBRACE 5
#define SC_RBRACE 6
#define SC_QMARK 7
#define SC_COLON 8
#define SC_OR 9
#define SC_AND 10
#define SC_PLUS 11
#define SC_MINUS 12
#define SC_MUL 13
#define SC_DIV 14
#define SC_MOD 15
#define SC_POW 16
#define SC_LPAREN 17
#define SC_RPAREN 18
#define SC_LBOX 19
#define SC_RBOX 20
#define SC_IDCHAR 21
#define SC_DIGIT 22
#define SC_DQUOTE 23
#define SC_ESCAPE 24
#define SC_COMMENT 25
#define SC_EQUAL 26
#define SC_NOT 27
#define SC_LT 28
#define SC_GT 29
#define SC_COMMA 30
#define SC_DOT 31
#define SC_MATCH 32
#define SC_DOLLAR 33
#define SC_UNEXPECTED 34
#ifndef MAKESCAN
/* global functions in scan.c */
void PROTO(scan_init, (int, char *) ) ;
void PROTO(scan_cleanup, (void) ) ;
void PROTO(eat_nl, (void) ) ;
int PROTO(yylex, (void) ) ;
extern YYSTYPE yylval ;
#define ct_ret(x) return current_token = (x)
#define next() (*buffp ? *buffp++ : slow_next())
#define un_next() buffp--
#define ifnext(c,x,y) (next()==c?x:(un_next(),y))
#define test1_ret(c,x,d) if ( next() == (c) ) ct_ret(x) ;\
else { un_next() ; ct_ret(d) ; }
#define test2_ret(c1,x1,c2,x2,d) switch( next() )\
{ case c1: ct_ret(x1) ;\
case c2: ct_ret(x2) ;\
default: un_next() ;\
ct_ret(d) ; }
#endif /* ! MAKESCAN */
#endif
@//E*O*F mawk0.97/scan.h//
chmod u=rw,g=r,o=r mawk0.97/scan.h
echo x - mawk0.97/scancode.c
sed 's/^@//' > "mawk0.97/scancode.c" <<'@//E*O*F mawk0.97/scancode.c//'
/* scancode.c */
char scan_code[256] = {
0,34,34,34,34,34,34,34,34, 1, 2, 1, 1, 1,34,34,
34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,
1,27,23,25,33,15,10,34,17,18,13,11,30,12,31,14,
22,22,22,22,22,22,22,22,22,22, 8, 3,28,26,29, 7,
34,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,
21,21,21,21,21,21,21,21,21,21,21,19,24,20,16,21,
34,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,
21,21,21,21,21,21,21,21,21,21,21, 5, 9, 6,32,34,
34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,
34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,
34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,
34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,
34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,
34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,
34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,
34,34,34,34,34,34,34,34,34,34,34,34,34,34,34,34
} ;
@//E*O*F mawk0.97/scancode.c//
chmod u=rw,g=r,o=r mawk0.97/scancode.c
echo x - mawk0.97/sizes.h
sed 's/^@//' > "mawk0.97/sizes.h" <<'@//E*O*F mawk0.97/sizes.h//'
/********************************************
sizes.h
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* $Log: sizes.h,v $
* Revision 2.1 91/04/08 08:24:09 brennan
* VERSION 0.97
*
*/
/* sizes.h */
#ifndef SIZES_H
#define SIZES_H
#define HASH_PRIME 53
#define A_HASH_PRIME 37
#if SMALL_EVAL_STACK
/* allow some put not a lot of recursion */
#define EVAL_STACK_SIZE 64
#else
#define EVAL_STACK_SIZE 256
#endif
#define MAX_COMPILE_ERRORS 5 /* quit if more than 4 errors */
#define BUFFSZ 4096 /* input buffer size */
#define MAX_LOOP_DEPTH 20
/* should never be exceeded, doesn't matter if its too
big (unless gross) because resources sized by it are freed */
#define MAX_FIELD 100 /* biggest field number */
#define SPRINTF_SZ 300 /* biggest sprintf string length */
/* the size of the temp buffer in front of main_buff */
#define PTR_SZ sizeof(PTR)
#define TEMP_BUFF_SZ (MAX_FIELD*PTR_SZ > SPRINTF_SZ ?\
MAX_FIELD*PTR_SZ : SPRINTF_SZ )
#define PAGE_SZ 1024 /* max instructions for a block */
#endif /* SIZES_H */
@//E*O*F mawk0.97/sizes.h//
chmod u=rw,g=r,o=r mawk0.97/sizes.h
echo x - mawk0.97/split.c
sed 's/^@//' > "mawk0.97/split.c" <<'@//E*O*F mawk0.97/split.c//'
/********************************************
split.c
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* $Log: split.c,v $
* Revision 2.1 91/04/08 08:24:11 brennan
* VERSION 0.97
*
*/
/* split.c */
#include "mawk.h"
#include "symtype.h"
#include "bi_vars.h"
#include "bi_funct.h"
#include "memory.h"
#include "scan.h"
#include "regexp.h"
#include "field.h"
#include <string.h>
/* split string s on SPACE without changing s.
load the pieces into STRINGS and ptrs into
temp_buff.ptr_buff[]
return the number of pieces */
int space_split( s )
register char *s ;
{ char *back = strchr(s,0) ;
int i = 0 ;
int len ;
char *q ;
STRING *sval ;
while ( 1 )
{ while ( scan_code[*(unsigned char*)s] == SC_SPACE ) s++ ;
if ( *s == 0 ) break ;
/* mark the front with q */
q = s++ ;
*back = ' ' ; /* sentinal */
while ( scan_code[*(unsigned char*)s] != SC_SPACE ) s++ ;
*back = 0 ;
sval = (STRING *) (temp_buff.ptr_buff[i++] =
(PTR) new_STRING((char *) 0, len = s - q )) ;
(void) memcpy(sval->str, q, len) ;
}
if ( i > MAX_FIELD )
rt_overflow("maximum number of fields", MAX_FIELD) ;
return i ;
}
char *re_pos_match(s, re, lenp)
register char *s ;
PTR re ; unsigned *lenp ;
{
while ( s = REmatch(s, re, lenp) )
if ( *lenp ) return s ;
else
if ( *s == 0 ) break ;
else s++ ;
return (char *) 0 ;
}
int re_split(s, re)
char *s ;
PTR re ;
{ register char *t ;
int i = 0 ;
unsigned mlen, len ;
STRING *sval ;
while ( t = re_pos_match(s, re, &mlen) )
{ sval = (STRING*)(temp_buff.ptr_buff[i++] = (PTR)
new_STRING( (char *)0, len = t-s) ) ;
(void) memcpy(sval->str, s, len) ;
s = t + mlen ;
}
temp_buff.ptr_buff[i++] = (PTR) new_STRING(s) ;
if ( i > MAX_FIELD )
rt_overflow("maximum number of fields", MAX_FIELD) ;
return i ;
}
/* split(s, X, r)
split s into array X on r
entry: sp[0] holds r
sp[-1] pts at X
sp[-2] holds s
*/
CELL *bi_split(sp)
register CELL *sp ;
{
int cnt ; /* the number of pieces */
double dcnt ; /* double version of cnt */
ARRAY A ;
CELL *cp ;
char *ofmt ;
if ( sp->type < C_RE ) cast_for_split(sp) ;
/* can be C_RE, C_SPACE or C_SNULL */
sp -= 2 ;
if ( sp->type < C_STRING ) cast1_to_s(sp) ;
if ( string(sp)->len == 0 ) /* nothing to split */
{ free_STRING( string(sp) ) ;
sp->type = C_DOUBLE ; sp->dval = 0.0 ;
return sp ;
}
switch ( (sp+2)->type )
{
case C_RE :
cnt = re_split(string(sp)->str, (sp+2)->ptr) ;
break ;
case C_SPACE :
cnt = space_split(string(sp)->str) ;
break ;
/* this case could be done by C_RE, but very slowly.
Since it is the common way to eliminate fields,
we'll treat the special case for speed */
case C_SNULL : /* split on empty string */
cnt = 1 ;
temp_buff.ptr_buff[0] = sp->ptr ;
string(sp)->ref_cnt++ ;
break ;
default : bozo("bad splitting cell in bi_split") ;
}
/* now load the array */
free_STRING( string(sp) ) ;
sp->type = C_DOUBLE ;
sp->dval = dcnt = (double) cnt ;
ofmt = string(field + OFMT)->str ;
A = (ARRAY) (sp+1)->ptr ;
while ( cnt )
{ char xbuff[256] ;
/* this big in case the user did something goofy with
OFMT */
(void) sprintf(xbuff, ofmt, dcnt ) ;
dcnt -= 1.0 ;
cp = array_find( A, xbuff, 1) ;
cell_destroy(cp) ;
cp->ptr = temp_buff.ptr_buff[--cnt] ;
cp->type = C_MBSTRN ;
}
return sp ;
}
@//E*O*F mawk0.97/split.c//
chmod u=rw,g=r,o=r mawk0.97/split.c
echo x - mawk0.97/symtype.h
sed 's/^@//' > "mawk0.97/symtype.h" <<'@//E*O*F mawk0.97/symtype.h//'
/********************************************
symtype.h
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/*$Log: symtype.h,v $
* Revision 2.1 91/04/08 08:24:14 brennan
* VERSION 0.97
*
*/
/* types related to symbols are defined here */
#ifndef SYMTYPE_H
#define SYMTYPE_H
/* struct to hold info about builtins */
typedef struct {
char *name ;
PF_CP fp ; /* ptr to function that does the builtin */
unsigned char min_args, max_args ;
/* info for parser to check correct number of arguments */
} BI_REC ;
/*---------------------------
structures and types for arrays
*--------------------------*/
/* array hash nodes */
typedef struct anode {
struct anode *link ;
STRING *sval ;
CELL *cp ;
} ANODE, **ARRAY ;
/* note ARRAY is a ptr to a hash table */
CELL *PROTO(array_find, (ARRAY,void *, int) ) ;
int PROTO(array_test, (ARRAY, STRING *) ) ;
INST *PROTO(array_loop, (INST *, CELL *, CELL *) ) ;
void PROTO(array_delete, (ARRAY, STRING *) ) ;
CELL *PROTO(array_cat, (CELL *, int) ) ;
void PROTO(array_free, (ARRAY) ) ;
#define new_ARRAY() (ARRAY)memset(zmalloc(A_HASH_PRIME *\
sizeof(ANODE*)), 0, A_HASH_PRIME*sizeof(ANODE*))
extern ARRAY Argv ;
/* for parsing (i,j) in A */
typedef struct {
INST *start ;
int cnt ;
} ARG2_REC ;
/*------------------------
user defined functions
------------------------*/
typedef struct fblock {
char *name ;
INST *code ;
unsigned short nargs ;
char *typev ; /* array of size nargs holding types */
} FBLOCK ; /* function block */
void PROTO(add_to_fdump_list, (FBLOCK *) ) ;
void PROTO( fdump, (void) ) ;
/*-------------------------
elements of the symbol table
-----------------------*/
#define ST_NONE 0
#define ST_VAR 1
#define ST_KEYWORD 2
#define ST_BUILTIN 3 /* a pointer to a builtin record */
#define ST_ARRAY 4 /* a void * ptr to a hash table */
#define ST_FIELD 5 /* a cell ptr to a field */
#define ST_FUNCT 6
#define ST_LENGTH 7 /* length is special */
#define ST_LOCAL_NONE 8
#define ST_LOCAL_VAR 9
#define ST_LOCAL_ARRAY 10
#define is_local(stp) ((stp)->type>=ST_LOCAL_NONE)
typedef struct {
char *name ;
char type ;
unsigned char offset ; /* offset in stack frame for local vars */
union {
CELL *cp ;
int kw ;
PF_CP fp ;
BI_REC *bip ;
ARRAY array ;
FBLOCK *fbp ;
} stval ;
} SYMTAB ;
/*****************************
structures for type checking function calls
******************************/
typedef struct ca_rec {
struct ca_rec *link ;
short type ;
short arg_num ; /* position in callee's stack */
/*--------- this data only set if we'll need to patch -------*/
/* happens if argument is an ID or type ST_NONE or ST_LOCAL_NONE */
int call_offset ;
/* where the type is stored */
SYMTAB *sym_p ; /* if type is ST_NONE */
char *type_p ; /* if type is ST_LOCAL_NONE */
} CA_REC ; /* call argument record */
/* type field of CA_REC matches with ST_ types */
#define CA_EXPR ST_LOCAL_VAR
#define CA_ARRAY ST_LOCAL_ARRAY
typedef struct fcall {
struct fcall *link ;
FBLOCK *callee ;
short call_scope ;
FBLOCK *call ; /* only used if call_scope == SCOPE_FUNCT */
INST *call_start ; /* computed later as code may be moved */
CA_REC *arg_list ;
short arg_cnt_checked ;
unsigned line_no ; /* for error messages */
} FCALL_REC ;
extern FCALL_REC *resolve_list ;
void PROTO(resolve_fcalls, (void) ) ;
void PROTO(check_fcall, (FBLOCK*,int,FBLOCK*,CA_REC*,unsigned) ) ;
/* hash.c */
unsigned PROTO( hash, (char *) ) ;
SYMTAB *PROTO( insert, (char *) ) ;
SYMTAB *PROTO( find, (char *) ) ;
SYMTAB *PROTO( save_id, (char *) ) ;
void PROTO( restore_ids, (void) ) ;
/* error.c */
void PROTO(type_error, (SYMTAB *) ) ;
#endif /* SYMTYPE_H */
@//E*O*F mawk0.97/symtype.h//
chmod u=rw,g=r,o=r mawk0.97/symtype.h
echo x - mawk0.97/types.h
sed 's/^@//' > "mawk0.97/types.h" <<'@//E*O*F mawk0.97/types.h//'
/********************************************
types.h
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* $Log: types.h,v $
* Revision 2.1 91/04/08 08:24:15 brennan
* VERSION 0.97
*
*/
/* types.h */
#ifndef TYPES_H
#define TYPES_H
#if HAVE_VOID_PTR
typedef void *PTR ;
#else
typedef char *PTR ;
#endif
#include "sizes.h"
/* CELL types */
#define C_NOINIT 0
#define C_DOUBLE 1
#define C_STRING 2
#define C_STRNUM 3
#define C_MBSTRN 4
/*could be STRNUM, has not been checked */
#define C_RE 5
#define C_SPACE 6
/* split on space */
#define C_SNULL 7
/* split on the empty string */
#define C_REPL 8
/* a replacement string '\&' changed to & */
#define C_REPLV 9
/* a vector replacement -- broken on & */
#define NUM_CELL_TYPES 10
/* these defines are used to check types for two
CELLs which are adjacent in memory */
#define TWO_NOINITS (2*(1<<C_NOINIT))
#define TWO_DOUBLES (2*(1<<C_DOUBLE))
#define TWO_STRINGS (2*(1<<C_STRING))
#define TWO_STRNUMS (2*(1<<C_STRNUM))
#define TWO_MBSTRNS (2*(1<<C_MBSTRN))
#define NOINIT_AND_DOUBLE ((1<<C_NOINIT)+(1<<C_DOUBLE))
#define NOINIT_AND_STRING ((1<<C_NOINIT)+(1<<C_STRING))
#define NOINIT_AND_STRNUM ((1<<C_NOINIT)+(1<<C_STRNUM))
#define DOUBLE_AND_STRING ((1<<C_DOUBLE)+(1<<C_STRING))
#define DOUBLE_AND_STRNUM ((1<<C_STRNUM)+(1<<C_DOUBLE))
#define STRING_AND_STRNUM ((1<<C_STRING)+(1<<C_STRNUM))
#define NOINIT_AND_MBSTRN ((1<<C_NOINIT)+(1<<C_MBSTRN))
#define DOUBLE_AND_MBSTRN ((1<<C_DOUBLE)+(1<<C_MBSTRN))
#define STRING_AND_MBSTRN ((1<<C_STRING)+(1<<C_MBSTRN))
#define STRNUM_AND_MBSTRN ((1<<C_STRNUM)+(1<<C_MBSTRN))
typedef struct {
unsigned short ref_cnt ;
unsigned short len ;
char str[4] ;
} STRING ;
typedef struct cell {
short type ;
short vcnt ; /* only used if type == C_REPLV */
PTR ptr ;
double dval ;
} CELL ;
/* all builtins are passed the evaluation stack pointer and
return its new value, here is the type */
#ifdef __STDC__
typedef CELL *(*PF_CP)(CELL *) ;
#else
typedef CELL *(*PF_CP)() ;
#endif
/* an element of code (instruction) */
typedef union {
int op ;
PTR ptr ;
} INST ;
/* a scratch buffer type */
union tbuff {
PTR ptr_buff[MAX_FIELD] ;
char string_buff[TEMP_BUFF_SZ + BUFFSZ + 1] ;
} ;
#endif
@//E*O*F mawk0.97/types.h//
chmod u=rw,g=r,o=r mawk0.97/types.h
echo x - mawk0.97/zmalloc.c
sed 's/^@//' > "mawk0.97/zmalloc.c" <<'@//E*O*F mawk0.97/zmalloc.c//'
/********************************************
zmalloc.c
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/*$Log: zmalloc.c,v $
* Revision 2.2 91/04/09 12:39:45 brennan
* added static to funct decls to satisfy STARDENT compiler
*
* Revision 2.1 91/04/08 08:24:17 brennan
* VERSION 0.97
*
*/
/* zmalloc.c */
#include "mawk.h"
#include "zmalloc.h"
void PROTO( mawk_exit, (int) ) ;
/*
zmalloc() gets mem from malloc() in CHUNKS of 2048 bytes
and cuts these blocks into smaller pieces that are multiples
of eight bytes. When a piece is returned via zfree(), it goes
on a linked linear list indexed by its size. The lists are
an array, pool[].
E.g., if you ask for 22 bytes with p = zmalloc(22), you actually get
a piece of size 24. When you free it with zfree(p,22) , it is added
to the list at pool[2].
*/
#define ZBLOCKSZ 8
#define ZSHIFT 3
#define POOLSZ 16
#define CHUNK 256
/* number of blocks to get from malloc */
static PTR PROTO( emalloc, (unsigned) ) ;
void PROTO( errmsg, (int , char *, ...) ) ;
static PTR emalloc(size)
unsigned size ;
{ PTR p ;
if( !(p = malloc(size)) )
{ errmsg(0, "out of memory") ; mawk_exit(1) ; }
return p ;
}
typedef union zblock {
char dummy[ZBLOCKSZ] ;
union zblock *link ;
} ZBLOCK ;
/* ZBLOCKS of sizes 1, 2, ... 16
which is bytes of sizes 8, 16, ... , 128
are stored on the linked linear lists in
pool[0], pool[1], ... , pool[15]
*/
static ZBLOCK *pool[POOLSZ] ;
PTR zmalloc( size )
unsigned size ;
{ register unsigned blocks ;
register ZBLOCK *p ;
static unsigned amt_avail ;
static ZBLOCK *avail ;
if ( size > POOLSZ * ZBLOCKSZ ) return emalloc(size) ;
blocks = (size >> ZSHIFT) + ((size & (ZBLOCKSZ-1)) != 0) ;
if ( p = pool[blocks-1] )
{ pool[blocks-1] = p->link ; return (PTR) p ; }
if ( blocks > amt_avail )
{ if ( amt_avail ) /* free avail */
{ avail->link = pool[--amt_avail] ; pool[amt_avail] = avail ; }
if ( !(avail = (ZBLOCK *) malloc(CHUNK*ZBLOCKSZ)) )
{ /* if we get here, almost out of memory */
amt_avail = 0 ; return emalloc(size) ; }
amt_avail = CHUNK ;
}
/* get p from the avail pile */
p = avail ; avail += blocks ; amt_avail -= blocks ;
return (PTR) p ;
}
void zfree( p, size)
register PTR p ; unsigned size ;
{ register int index ; ;
if ( size > POOLSZ * ZBLOCKSZ ) free(p) ;
else
{
index = (size >> ZSHIFT) + ((size & (ZBLOCKSZ-1)) != 0) - 1;
((ZBLOCK *) p)->link = pool[index] ;
pool[index] = (ZBLOCK *) p ;
}
}
PTR zrealloc( p, old_size, new_size )
register PTR p ;
unsigned old_size, new_size ;
{ register PTR q ;
(void) memcpy(q = zmalloc(new_size), p,
old_size < new_size ? old_size : new_size) ;
zfree(p, old_size) ;
return q ;
}
@//E*O*F mawk0.97/zmalloc.c//
chmod u=rw,g=r,o=r mawk0.97/zmalloc.c
echo x - mawk0.97/zmalloc.h
sed 's/^@//' > "mawk0.97/zmalloc.h" <<'@//E*O*F mawk0.97/zmalloc.h//'
/********************************************
zmalloc.h
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/*$Log: zmalloc.h,v $
* Revision 2.1 91/04/08 08:24:19 brennan
* VERSION 0.97
*
*/
/* zmalloc.h */
#ifndef ZMALLOC_H
#define ZMALLOC_H
#ifdef __STDC__
#include <stdlib.h>
#include <string.h> /* memcpy() */
#else
PTR memcpy(), malloc(), realloc() ;
void free() ;
#endif
PTR PROTO( zmalloc, (unsigned) ) ;
void PROTO( zfree, (PTR, unsigned) ) ;
PTR PROTO( zrealloc , (PTR,unsigned,unsigned) ) ;
#endif /* ZMALLOC_H */
@//E*O*F mawk0.97/zmalloc.h//
chmod u=rw,g=r,o=r mawk0.97/zmalloc.h
echo mkdir - mawk0.97/rexp
mkdir mawk0.97/rexp
chmod u=rwx,g=rx,o=rx mawk0.97/rexp
echo x - mawk0.97/rexp/Makefile
sed 's/^@//' > "mawk0.97/rexp/Makefile" <<'@//E*O*F mawk0.97/rexp/Makefile//'
####################################
# This is a makefile for mawk,
# an implementation of AWK (1988).
####################################
#
#
# This builds a regular expression library
# Remove the -DMAWK and the library has general use.
# (Even if left in, the diff is very small)
#
CFLAGS = -O -DMAWK
C=rexp.c rexp0.c rexp1.c rexp2.c rexp3.c rexpdb.c
regexp.a : $(C)
rm -f *.o
cc -c $(CFLAGS) $?
ar r regexp.a *.o
rm -f *.o
@//E*O*F mawk0.97/rexp/Makefile//
chmod u=rw,g=r,o=r mawk0.97/rexp/Makefile
echo x - mawk0.97/rexp/rexp.c
sed 's/^@//' > "mawk0.97/rexp/rexp.c" <<'@//E*O*F mawk0.97/rexp/rexp.c//'
/********************************************
rexp.c
copyright 1991, Michael D. Brennan
This is a source file for mawk an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* rexp.c */
/* op precedence parser for regular expressions */
#include "rexp.h"
/* static prototypes */
void PROTO( op_pop, (void) ) ;
/* DATA */
int REerrno ;
char *REerrlist[] = { (char *) 0 ,
/* 1 */ "missing '('",
/* 2 */ "missing ')'",
/* 3 */ "bad class -- [], [^] or [" ,
/* 4 */ "missing operand" ,
/* 5 */ "resource exhaustion -- regular expression too large",
/* 6 */ "null regular expression" } ;
/* E5 is very unlikely to occur */
/* This table drives the operator precedence parser */
static int table[8][8] = {
/* 0 | CAT * + ? ( ) */
/* 0 */ 0, L, L, L, L, L, L, E1,
/* | */ G, G, L, L, L, L, L, G,
/* CAT*/ G, G, G, L, L, L, L, G,
/* * */ G, G, G, G, G, G, E7, G,
/* + */ G, G, G, G, G, G, E7, G,
/* ? */ G, G, G, G, G, G, E7, G,
/* ( */ E2, L, L, L, L, L, L, EQ,
/* ) */ G , G, G, G, G, G, E7, G } ;
/*====================================
THE STACKS
==========================*/
typedef struct
{ int token ;
int prec ; } OP ;
#define STACKSZ 96
/*---------------------------
m_ptr -> top filled slot on the m_stack
op_ptr -> top filled slot on op_stack,
initially this is only half filled with the token
the precedence is added later
*----------------------*/
static OP *op_stack, *op_limit, *op_ptr ;
static MACHINE *m_stack, *m_limit, *m_ptr ;
/* inline for speed on the m_stack */
#define m_pop() (m_ptr<m_stack?RE_error_trap(-E4): *m_ptr--)
#define m_push(x) if(++m_ptr==m_limit) RE_error_trap(-E5);*m_ptr=(x)
/*=======================*/
static jmp_buf err_buf ; /* used to trap on error */
MACHINE RE_error_trap(x) /* return is dummy to make macro OK */
int x ;
{
while ( m_ptr >= m_stack ) RE_free( m_ptr-- -> start ) ;
RE_free(m_stack) ; RE_free(op_stack) ;
REerrno = x ;
longjmp(err_buf, 1 ) ;
/* dummy return to make compiler happy */
return *m_stack ;
}
VOID *REcompile(re)
char *re ;
{ MACHINE m ;
register int t ;
RE_lex_init(re) ;
if ( *re == 0 )
{ STATE *p = (STATE *) RE_malloc( sizeof(STATE) ) ;
p->type = M_ACCEPT ;
return (VOID *) p ;
}
if ( setjmp(err_buf) ) return (VOID *) 0 ;
/* global error trap */
/* initialize the stacks */
m_stack =(MACHINE *) RE_malloc(STACKSZ*sizeof(MACHINE)) ;
m_ptr = m_stack - 1 ;
m_limit = m_stack + STACKSZ ;
op_ptr = op_stack = (OP *) RE_malloc(STACKSZ*sizeof(OP)) ;
op_ptr->token = 0 ;
op_limit = op_stack + STACKSZ ;
t = RE_lex(&m) ;
while( 1 )
{ switch( t )
{
case T_STR :
case T_ANY :
case T_U :
case T_START :
case T_END :
case T_CLASS : m_push(m) ; break ;
case 0 : /* end of reg expr */
if ( op_ptr -> token == 0 ) /* done */
{ m = m_pop() ;
if ( m_ptr < m_stack ) /* DONE !!! */
{ free(m_stack) ; free(op_stack) ;
return (VOID *) m.start ;
}
/* machines still on the stack */
RE_panic("values still on machine stack") ;
}
/* case 0 falls thru to default
which is operator case */
default:
if ( (op_ptr -> prec = table[op_ptr -> token][t]) == G )
{ while ( op_ptr -> prec != L ) op_pop() ;
continue ; }
if ( op_ptr -> prec < 0 )
if ( op_ptr->prec == E7 )
RE_panic("parser returns E7") ;
else RE_error_trap(-op_ptr->prec) ;
if ( ++op_ptr == op_stack + STACKSZ ) /* stack overflow */
RE_error_trap(-E5) ;
op_ptr -> token = t ;
}
t = RE_lex(&m) ;
}
}
static void op_pop()
{ register int t ;
MACHINE m, n ;
if ( (t = op_ptr-- -> token) >= T_LP ) return ;
/* nothing to do with '(' or ')' */
if ( t <= T_CAT ) /* binary operation */
n = m_pop() ;
m = m_pop() ;
switch( t )
{ case T_CAT : RE_cat(&m, &n) ; break ;
case T_OR : RE_or( &m, &n) ; break ;
case T_STAR : RE_close( &m) ; break ;
case T_PLUS : RE_poscl( &m ) ; break ;
case T_Q : RE_01( &m ) ; break ;
default :
RE_panic("strange token popped from op_stack") ;
}
m_push(m) ;
}
/* getting here means a logic flaw or unforeseen case */
void RE_panic( s )
char *s ;
{ fprintf( stderr, "REcompile() - panic: %s\n", s) ;
exit(100) ; }
@//E*O*F mawk0.97/rexp/rexp.c//
chmod u=rw,g=r,o=r mawk0.97/rexp/rexp.c
echo x - mawk0.97/rexp/rexp.h
sed 's/^@//' > "mawk0.97/rexp/rexp.h" <<'@//E*O*F mawk0.97/rexp/rexp.h//'
/********************************************
rexp.h
copyright 1991, Michael D. Brennan
This is a source file for mawk an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* rexp.h */
#ifndef REXP_H
#define REXP_H
#include <string.h>
#include <stdio.h>
#include <setjmp.h>
#ifndef PROTO
#ifdef __STDC__
#define PROTO(name, args) name args
#else
#define PROTO(name, args) name()
#endif
#endif
#ifdef __STDC__
#define VOID void
#include <stdlib.h>
#else
#define VOID char
char *malloc(), *realloc() ;
void free() ;
#endif
/* user can change this */
#define RE_malloc(x) RE_xmalloc(x)
#define RE_realloc(x,l) RE_xrealloc(x,l)
#define RE_free(x) free(x)
VOID *PROTO( RE_xmalloc, (unsigned) ) ;
VOID *PROTO( RE_xrealloc, (void *,unsigned) ) ;
/* finite machine state types */
#define M_STR 0
#define M_CLASS 1
#define M_ANY 2
#define M_START 3
#define M_END 4
#define M_U 5
#define M_1J 6
#define M_2JA 7
#define M_2JB 8
#define M_ACCEPT 9
#define U_ON 10
#define U_OFF 0
#define END_OFF 0
#define END_ON (2*U_ON)
typedef unsigned char BV[32] ; /* bit vector */
typedef struct
{ char type ;
unsigned char len ; /* used for M_STR */
union
{
char *str ; /* string */
BV *bvp ; /* class */
int jump ;
} data ;
} STATE ;
#define STATESZ (sizeof(STATE))
typedef struct
{ STATE *start, *stop ; } MACHINE ;
/* tokens */
#define T_OR 1 /* | */
#define T_CAT 2
#define T_STAR 3 /* * */
#define T_PLUS 4 /* + */
#define T_Q 5 /* ? */
#define T_LP 6 /* ( */
#define T_RP 7 /* ) */
#define T_START 8 /* ^ */
#define T_END 9 /* $ */
#define T_ANY 10 /* . */
#define T_CLASS 11 /* starts with [ */
#define T_SLASH 12 /* \ */
#define T_CHAR 13 /* all the rest */
#define T_STR 14
#define T_U 15
/* precedences and error codes */
#define L 0
#define EQ 1
#define G 2
#define E1 (-1)
#define E2 (-2)
#define E3 (-3)
#define E4 (-4)
#define E5 (-5)
#define E6 (-6)
#define E7 (-7)
#define MEMORY_FAILURE 5
/* struct for the run time stack */
typedef struct {
STATE *m ; /* save the machine ptr */
int u ; /* save the u_flag */
char *s ; /* save the active string ptr */
char *ss ; /* save the match start -- only used by REmatch */
} RT_STATE ; /* run time state */
/* error trap */
extern int REerrno ;
MACHINE PROTO(RE_error_trap, (int) ) ;
MACHINE PROTO( RE_u, (void) ) ;
MACHINE PROTO( RE_start, (void) ) ;
MACHINE PROTO( RE_end, (void) ) ;
MACHINE PROTO( RE_any, (void) ) ;
MACHINE PROTO( RE_str, (char *, unsigned) ) ;
MACHINE PROTO( RE_class, (BV *) ) ;
void PROTO( RE_cat, (MACHINE *, MACHINE *) ) ;
void PROTO( RE_or, (MACHINE *, MACHINE *) ) ;
void PROTO( RE_close, (MACHINE *) ) ;
void PROTO( RE_poscl, (MACHINE *) ) ;
void PROTO( RE_01, (MACHINE *) ) ;
void PROTO( RE_panic, (char *) ) ;
char *PROTO( str_str, (char *, char *, unsigned) ) ;
void PROTO( RE_lex_init , (char *) ) ;
int PROTO( RE_lex , (MACHINE *) ) ;
void PROTO( RE_run_stack_init, (void) ) ;
RT_STATE *PROTO( RE_new_run_stack, (void) ) ;
#endif /* REXP_H */
@//E*O*F mawk0.97/rexp/rexp.h//
chmod u=rw,g=r,o=r mawk0.97/rexp/rexp.h
echo x - mawk0.97/rexp/rexp0.c
sed 's/^@//' > "mawk0.97/rexp/rexp0.c" <<'@//E*O*F mawk0.97/rexp/rexp0.c//'
/********************************************
rexp0.c
copyright 1991, Michael D. Brennan
This is a source file for mawk an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* rexp0.c */
/* lexical scanner */
#include "rexp.h"
/* static functions */
static int PROTO( do_str, (int, char **, MACHINE *) ) ;
static int PROTO( do_class, (char **, MACHINE *) ) ;
static int PROTO( escape, (char **) ) ;
static BV *PROTO( store_bvp, (BV *) ) ;
static int PROTO( ctohex, (int) ) ;
#ifndef EG /* if EG make next array visible */
static
#endif
char RE_char2token[ '|' + 1 ] = {
0,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,9,13,13,13,
6,7,3,4,13,13,10,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
13,13,5,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
13,13,13,13,13,13,13,13,13,13,11,12,13,8,13,13,13,13,13,13,
13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
13,13,13,13,1} ;
#define char2token(x) ( (unsigned char)(x) > '|' ? T_CHAR : RE_char2token[x] )
#define NOT_STARTED (-1)
static int prev ;
static char *lp ; /* ptr to reg exp string */
static unsigned re_len ;
void RE_lex_init( re )
char *re ;
{
lp = re ;
re_len = strlen(re) + 1 ;
prev = NOT_STARTED ;
RE_run_stack_init() ;
}
int RE_lex( mp )
MACHINE *mp ;
{ register int c ;
switch( c = char2token(*lp) )
{
case T_OR :
case T_PLUS :
case T_STAR :
case T_Q :
case T_RP :
lp++ ; return prev = c ;
case T_SLASH :
if ( lp[1] != 0 ) break ;
/* else fall thru */
case 0 : return 0 ;
case T_LP :
switch( prev )
{
case T_CHAR :
case T_STR :
case T_ANY :
case T_CLASS :
case T_START :
case T_RP :
case T_PLUS :
case T_STAR :
case T_Q :
case T_U :
return prev = T_CAT ;
default :
lp++ ;
return prev = T_LP ;
}
}
/* *lp is an operand, but implicit cat op is possible */
switch( prev )
{ case NOT_STARTED :
case T_OR :
case T_LP :
case T_CAT :
switch( c )
{ case T_ANY :
{ static plus_is_star_flag = 0 ;
if ( * ++lp == '*' )
{ lp++ ; *mp = RE_u() ;
return prev = T_U ; }
else
if ( *lp == '+' )
if ( plus_is_star_flag )
{ lp++ ; *mp = RE_u() ;
plus_is_star_flag = 0 ;
return prev = T_U ;
}
else
{ plus_is_star_flag = 1 ;
lp-- ; *mp = RE_any() ;
return prev = T_ANY ;
}
else
{ *mp = RE_any() ;
prev = T_ANY ;
}
}
break ;
case T_SLASH :
lp++ ; c = escape(&lp) ;
prev = do_str(c, &lp, mp) ;
break ;
case T_CHAR :
c = *lp++ ;
prev = do_str(c, &lp, mp) ;
break ;
case T_CLASS : prev = do_class(&lp, mp) ;
break ;
case T_START : *mp = RE_start() ; lp++ ;
prev = T_START ;
break ;
case T_END :
lp++ ; *mp = RE_end() ;
return prev = T_END ;
default :
RE_panic("bad switch in RE_lex") ;
}
break ;
default : /* don't advance the pointer, return T_CAT */
return prev = T_CAT ;
}
/* check for end character */
if ( *lp == '$' )
{ mp->start->type += END_ON ; lp++ ; }
return prev ;
}
static int do_str( c, pp, mp)
int c ; /* the first character */
char **pp ; /* where to put the re_char pointer on exit */
MACHINE *mp ; /* where to put the string machine */
{ register char *p , *s ;
char *str ;
unsigned len ;
p = *pp ;
s = str = RE_malloc( re_len ) ;
*s++ = c ; len = 1 ;
while ( 1 )
{ char *save ;
switch( char2token(*p) )
{
case T_CHAR : *s++ = *p++ ;
break ;
case T_SLASH :
save = ++p ;
*s++ = escape(&save) ;
p = save ;
break ;
default : goto out ;
}
len++ ;
}
out:
/* if len > 1 and we failed on a ? + or * , need to back up */
if ( len > 1 && (*p == '*' || *p == '+' || *p == '?' ) )
{ len-- ; p-- ; s-- ; }
*s = 0 ;
*pp = p ;
*mp = RE_str((char *) RE_realloc(str, len+1) , len) ;
return T_STR ;
}
/*--------------------------------------------
BUILD A CHARACTER CLASS
*---------------------------*/
#define on( b, x) ( (b)[(x)>>3] |= ( 1 << ((x)&7) ))
static void PROTO(block_on, (BV,int,int) ) ;
static void block_on( b, x, y)
BV b ; int x, y ; /* must call with x<=y */
{ int lo = x >> 3 ;
int hi = y >> 3 ;
int i, j, bit ;
if ( lo == hi )
{ j = x&7 ; bit = 1 << j ; i = (y&7) - j + 1 ;
for ( ; i ; i-- , bit <<= 1 ) b[lo] |= bit ; }
else
{ for ( i = lo + 1 ; i <= hi - 1 ; i++ ) b[i] = 0xff ;
b[lo] |= ( 0xff << (x&7) ) ;
b[hi] |= ~( 0xff << ((y&7)+1)) ;
}
}
/* build a BV for a character class.
*start points at the '['
on exit: *start points at the character after ']'
mp points at a machine that recognizes the class
*/
static int do_class( start, mp)
char **start ; MACHINE *mp ;
{ register char *p ;
register BV *bvp ;
int prev ;
char *q , *t;
int cnt ;
int comp_flag ;
p = (*start) + 1 ;
if ( *p == ']' || *p == '^' && *(p+1) == ']' )
RE_error_trap(-E3) ;
while ( 1 ) /* find the back of the class */
{ if ( ! (q = strchr(p,']')) ) /* no closing bracket */
RE_error_trap(-E3) ;
p = q-1 ;
cnt = 0 ;
while ( *p == '\\') { cnt++ ; p-- ; }
if ( (cnt & 1) == 0 ) /* even number of \ */ break ;
p = q+1 ;
}
/* q now pts at the back of the class */
p = (*start) + 1 ;
*start = q + 1 ;
bvp = (BV *) RE_malloc( sizeof(BV) ) ;
(void) memset( bvp, 0, sizeof(BV) ) ;
comp_flag = *p == '^' ? p++ , 1 : 0 ;
prev = -1 ; /* indicates - cannot be part of a range */
while ( p < q )
{
switch( *p )
{ case '\\' :
t = ++p ;
prev = escape(&t) ;
on(*bvp, prev) ;
p = t ;
continue ;
case '-' :
if ( prev == -1 || p+1 == q || prev > *(p+1) )
{ prev = '-' ; on(*bvp, '-') ; }
else
{ p++ ;
block_on(*bvp, prev, *p) ;
prev = -1 ;
}
break ;
default :
prev = *p ;
on(*bvp, *p) ;
break ;
}
p++ ;
}
if ( comp_flag )
for ( p = (char *) bvp ; p < (char *) bvp + sizeof(BV) ; p++) *p = ~*p ;
/* make sure zero is off */
(*bvp)[0] &= 0xfe ;
*mp = RE_class( store_bvp( bvp ) ) ;
return T_CLASS ;
}
/* storage for bit vectors so they can be reused ,
stored in an unsorted linear array
the array grows as needed
*/
#define BV_GROWTH 6
static BV *store_bvp( bvp )
BV *bvp ;
{
static BV **bv_base, **bv_limit ;
static BV **bv_next ; /* next empty slot in the array */
register BV **p ;
unsigned t ;
if ( bv_next == bv_limit ) /* need to grow */
{
if ( ! bv_base ) /* first growth */
{ t = 0 ; bv_base = (BV**)RE_malloc(BV_GROWTH*sizeof(BV*)) ; }
else
{ t = bv_next - bv_base ;
bv_base = (BV**) RE_realloc(bv_base, (t+BV_GROWTH)*sizeof(BV*)) ;
}
bv_next = bv_base + t ;
bv_limit = bv_next + BV_GROWTH ;
}
/* put bvp in bv_next as a sentinal */
*bv_next = bvp ;
p = bv_base ;
while ( memcmp(*p, bvp, sizeof(BV)) ) p++ ;
if ( p == bv_next ) /* it is new */
bv_next++ ;
else /* we already have it */ RE_free(bvp) ;
return *p ;
}
/* ---------- convert escape sequences -------------*/
#define isoctal(x) ((x)>='0'&&(x)<='7')
#define NOT_HEX 16
static char hex_val['f' - 'A' + 1] = {
10,11,12,13,14,15, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
10,11,12,13,14,15 } ;
/* interpret 1 character as hex */
static int ctohex( c )
register int c ;
{ int t ;
if ( c >= '0' && c <= '9' ) return c - '0' ;
if ( c >= 'A' && c <= 'f' && ( t = hex_val[c-'A'] )) return t ;
return NOT_HEX ;
}
static char escape_test[] = "n\nt\tb\br\rf\fa\07v\013" ;
/*-----------------
return the char
and move the pointer forward
on entry *s -> at the character after the slash
*-------------------*/
static int escape(start_p)
char **start_p ;
{ register char *p = *start_p ;
register unsigned x ;
unsigned xx ;
char *t ;
if ( t = strchr(escape_test, *p) )
{ *start_p = p + 1 ;
return t[1] ;
}
if ( isoctal(*p) )
{ x = *p++ - '0' ;
if ( isoctal(*p) )
{ x = (x<<3) + *p++ - '0' ;
if ( isoctal(*p) )
x = (x<<3) + *p++ - '0' ;
}
*start_p = p ;
return x & 0xff ;
}
if ( *p == 0 ) return 0 ;
if ( *p++ == 'x' ) /* might be a hex digit */
{ if ( (x = ctohex(*p)) == NOT_HEX )
{ *start_p = p ; return 'x' ; }
/* look for another hex digit */
if ( (xx = ctohex(* ++p)) != NOT_HEX )
{ x = (x<<4) + xx ; p++ ; }
*start_p = p ; return x ;
}
/* anything else \c -> c */
*start_p = p ;
return p[-1] ;
}
@//E*O*F mawk0.97/rexp/rexp0.c//
chmod u=rw,g=r,o=r mawk0.97/rexp/rexp0.c
echo x - mawk0.97/rexp/rexp1.c
sed 's/^@//' > "mawk0.97/rexp/rexp1.c" <<'@//E*O*F mawk0.97/rexp/rexp1.c//'
/********************************************
rexp1.c
copyright 1991, Michael D. Brennan
This is a source file for mawk an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* rexp1.c */
/* re machine operations */
#include "rexp.h"
static MACHINE *PROTO( new_TWO , (int) ) ;
static MACHINE *new_TWO(type)
int type ;
{
static MACHINE x ;
x.start = (STATE *) RE_malloc(2*STATESZ) ;
x.stop = x.start + 1 ;
x.start->type = type ;
x.stop->type = M_ACCEPT ;
return &x ;
} ;
/* build a machine that recognizes any */
MACHINE RE_any()
{ return * new_TWO(M_ANY) ; }
/* build a machine that recognizes the start of string */
MACHINE RE_start()
{ return * new_TWO(M_START) ; }
MACHINE RE_end()
{ return * new_TWO(M_END) ; }
/* build a machine that recognizes a class */
MACHINE RE_class( bvp )
BV *bvp ;
{ register MACHINE *p = new_TWO(M_CLASS) ;
p->start->data.bvp = bvp ;
return *p ;
}
MACHINE RE_u()
{ return *new_TWO(M_U) ; }
MACHINE RE_str( str, len)
char *str ;
unsigned len ;
{ register MACHINE *p = new_TWO(M_STR) ;
p->start->len = len ;
p->start->data.str = str ;
return *p ;
}
/* replace m and n by a machine that recognizes mn */
void RE_cat( mp, np)
MACHINE *mp, *np ;
{ unsigned sz1, sz2, sz ;
sz1 = mp->stop - mp->start ;
sz2 = np->stop - np->start + 1 ;
sz = sz1 + sz2 ;
mp->start = (STATE *) RE_realloc( mp->start, sz * STATESZ ) ;
mp->stop = mp->start + (sz - 1) ;
(void) memcpy( mp->start + sz1, np->start, sz2 * STATESZ ) ;
RE_free( np->start ) ;
}
/* replace m by a machine that recognizes m|n */
void RE_or( mp, np)
MACHINE *mp, *np ;
{ register STATE *p ;
unsigned szm, szn ;
szm = mp->stop - mp->start + 1 ;
szn = np->stop - np->start + 1 ;
p = (STATE *) RE_malloc( (szm+szn+1) * STATESZ ) ;
(void) memcpy( p+1, mp->start, szm * STATESZ ) ;
RE_free( mp->start) ;
mp->start = p ;
(mp->stop = p + szm + szn) -> type = M_ACCEPT ;
p->type = M_2JA ;
p->data.jump = szm+1 ;
(void) memcpy( p + szm + 1 , np->start, szn * STATESZ) ;
RE_free( np->start ) ;
(p += szm)->type = M_1J ;
p->data.jump = szn ;
}
/* UNARY OPERATIONS */
/* replace m by m* */
void RE_close( mp )
MACHINE *mp ;
{ register STATE *p ;
unsigned sz ;
sz = mp->stop - mp->start + 1 ;
p = (STATE *) RE_malloc( (sz+2) * STATESZ ) ;
(void) memcpy( p+1, mp->start, sz * STATESZ) ;
RE_free( mp->start ) ;
mp->start = p ;
mp->stop = p + (sz+1) ;
p->type = M_2JA ;
p->data.jump = sz + 1 ;
(p += sz) -> type = M_2JB ;
p->data.jump = -(sz-1) ;
(p+1)->type = M_ACCEPT ;
}
/* replace m by m+ (positive closure) */
void RE_poscl( mp )
MACHINE *mp ;
{ register STATE *p ;
unsigned sz ;
sz = mp->stop - mp->start + 1 ;
mp->start = p = (STATE *) RE_realloc(mp->start , (sz+1) * STATESZ ) ;
mp->stop = p + sz ;
p += --sz ;
p->type = M_2JB ;
p->data.jump = -sz ;
(p+1)->type = M_ACCEPT ;
}
/* replace m by m? (zero or one) */
void RE_01( mp )
MACHINE *mp ;
{ unsigned sz ;
register STATE *p ;
sz = mp->stop - mp->start + 1 ;
p = (STATE *) RE_malloc( (sz+1) * STATESZ ) ;
(void) memcpy( p+1, mp->start, sz * STATESZ) ;
RE_free( mp->start ) ;
mp->start = p ;
mp->stop = p + sz ;
p->type = M_2JB ;
p->data.jump = sz ;
}
/*===================================
MEMORY ALLOCATION
*==============================*/
VOID *RE_xmalloc( sz )
unsigned sz ;
{ register VOID *p ;
if ( ! ( p = malloc(sz) ) ) RE_error_trap(MEMORY_FAILURE) ;
return p ;
}
VOID *RE_xrealloc( p, sz)
register VOID *p ; unsigned sz ;
{ if ( ! ( p = realloc( p, sz) ) ) RE_error_trap(MEMORY_FAILURE) ;
return p ;
}
@//E*O*F mawk0.97/rexp/rexp1.c//
chmod u=rw,g=r,o=r mawk0.97/rexp/rexp1.c
echo x - mawk0.97/rexp/rexp2.c
sed 's/^@//' > "mawk0.97/rexp/rexp2.c" <<'@//E*O*F mawk0.97/rexp/rexp2.c//'
/********************************************
rexp2.c
copyright 1991, Michael D. Brennan
This is a source file for mawk an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* rexp2.c */
/* test a string against a machine */
#include "rexp.h"
#include <string.h>
/* statics */
static RT_STATE *PROTO(slow_push,(RT_STATE *,STATE*,char*,int));
/* check that a bit is on */
#define ison(b,x) ( (b)[(x)>>3] & ( 1 << ((x)&7) ))
RT_STATE *RE_run_stack_base;
RT_STATE *RE_run_stack_limit ;
/* for statistics and debug */
static RT_STATE *stack_max ;
void RE_run_stack_init()
{ if ( !RE_run_stack_base )
{
RE_run_stack_base = (RT_STATE *)
RE_malloc(sizeof(RT_STATE) * 16 ) ;
RE_run_stack_limit = RE_run_stack_base + 16 ;
stack_max = RE_run_stack_base-1 ;
}
}
RT_STATE *RE_new_run_stack()
{ int oldsize = RE_run_stack_limit - RE_run_stack_base ;
RE_run_stack_base = (RT_STATE *) RE_realloc( RE_run_stack_base ,
(oldsize+8) * sizeof(RT_STATE) ) ;
RE_run_stack_limit = RE_run_stack_base + oldsize + 8 ;
return stack_max = RE_run_stack_base + oldsize ;
}
static RT_STATE *slow_push(sp, m, s, u)
RT_STATE *sp ;
STATE *m ;
char *s ;
int u ;
{
if ( sp > stack_max )
if ( (stack_max = sp) == RE_run_stack_limit )
sp = RE_new_run_stack() ;
sp->m = m ; sp->s = s ; sp->u = u ;
return sp ;
}
#ifdef DEBUG
void print_max_stack(f)
FILE *f ;
{ fprintf(f, "stack_max = %d\n", stack_max-RE_run_stack_base+1) ; }
#endif
#ifdef DEBUG
#define push(mx,sx,ux) stackp = slow_push(++stackp, mx, sx, ux)
#else
#define push(mx,sx,ux) if (++stackp == RE_run_stack_limit)\
stackp = slow_push(stackp,mx,sx,ux) ;\
More information about the Alt.sources
mailing list