mawk0.97.shar 1 of 6 (6 pieces not 4)
Mike Brennan
brennan at ssc-vax.UUCP
Sun May 12 00:49:56 AEST 1991
------------------cut here----------------
# This is a shell archive. Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
#
# Wrapped by ssc-bee!brennan on Fri May 10 18:11:41 PDT 1991
# Contents: mawk0.97/ mawk0.97/rexp/ mawk0.97/test/ mawk0.97/examples/
# mawk0.97/msdos/ mawk0.97/packing.list mawk0.97/README
# mawk0.97/LIMITATIONS mawk0.97/Makefile mawk0.97/mawk.manual
# mawk0.97/array.c mawk0.97/bi_funct.c mawk0.97/bi_funct.h
# mawk0.97/bi_vars.c mawk0.97/bi_vars.h mawk0.97/cast.c mawk0.97/code.c
# mawk0.97/code.h mawk0.97/da.c mawk0.97/error.c mawk0.97/execute.c
# mawk0.97/fcall.c mawk0.97/field.c mawk0.97/field.h mawk0.97/files.c
# mawk0.97/files.h mawk0.97/fin.c mawk0.97/fin.h mawk0.97/hash.c
# mawk0.97/init.c mawk0.97/init.h mawk0.97/jmp.c mawk0.97/jmp.h
# mawk0.97/kw.c mawk0.97/machine.h mawk0.97/main.c mawk0.97/makescan.c
# mawk0.97/matherr.c mawk0.97/mawk.h mawk0.97/memory.c mawk0.97/memory.h
# mawk0.97/parse.y mawk0.97/print.c mawk0.97/re_cmpl.c mawk0.97/regexp.h
# mawk0.97/repl.h mawk0.97/scan.c mawk0.97/scan.h mawk0.97/scancode.c
# mawk0.97/sizes.h mawk0.97/split.c mawk0.97/symtype.h mawk0.97/types.h
# mawk0.97/zmalloc.c mawk0.97/zmalloc.h mawk0.97/rexp/Makefile
# mawk0.97/rexp/rexp.c mawk0.97/rexp/rexp.h mawk0.97/rexp/rexp0.c
# mawk0.97/rexp/rexp1.c mawk0.97/rexp/rexp2.c mawk0.97/rexp/rexp3.c
# mawk0.97/rexp/rexpdb.c mawk0.97/test/README mawk0.97/test/benchmarks
# mawk0.97/test/cat.awk mawk0.97/test/concat.awk mawk0.97/test/fields.awk
# mawk0.97/test/loops.awk mawk0.97/test/newton.awk
# mawk0.97/test/primes.awk mawk0.97/test/qsort.awk mawk0.97/test/reg0.awk
# mawk0.97/test/reg1.awk mawk0.97/test/reg2.awk mawk0.97/test/sample
# mawk0.97/test/squeeze.awk mawk0.97/test/test.sh mawk0.97/test/wc.awk
# mawk0.97/test/wfrq.awk mawk0.97/test/wfrq0.awk mawk0.97/test/words.awk
# mawk0.97/test/words0.awk mawk0.97/examples/decl.awk
# mawk0.97/examples/deps.awk mawk0.97/examples/gdecl.awk
# mawk0.97/examples/nocomment.awk mawk0.97/msdos/INSTALL
# mawk0.97/msdos/makefile mawk0.97/msdos/mklib.bat
# mawk0.97/msdos/rand48.asm mawk0.97/msdos/rand48.h
# mawk0.97/msdos/rand48_0.c mawk0.97/msdos/reargv.c
echo mkdir - mawk0.97
mkdir mawk0.97
chmod u=rwx,g=rx,o=rx mawk0.97
echo x - mawk0.97/packing.list
sed 's/^@//' > "mawk0.97/packing.list" <<'@//E*O*F mawk0.97/packing.list//'
################################################
# These files form the mawk distribution
#
# Mawk is an implementation of the AWK Programming Language as
# defined and described in Aho, Kernighan and Weinberger, The
# Awk Programming Language, Addison-Wesley, 1988.
#
################################################
# Source code written by Michael D. Brennan
# Copyright (C) 1991 , Michael D. Brennan
################################################
packing.list this file
README how to get started
LIMITATIONS restrictions on use
Makefile mawk makefile
mawk.manual mock manual
######################
array.c source files
bi_funct.c
bi_funct.h
bi_vars.c
bi_vars.h
cast.c
code.c
code.h
da.c
error.c
execute.c
fcall.c
field.c
field.h
files.c
files.h
fin.c
fin.h
hash.c
init.c
init.h
jmp.c
jmp.h
kw.c
machine.h
main.c
makescan.c
matherr.c
mawk.h
memory.c
memory.h
parse.y
print.c
re_cmpl.c
regexp.h
repl.h
scan.c
scan.h
scancode.c
sizes.h
split.c
symtype.h
types.h
zmalloc.c
zmalloc.h
########################
# directory: rexp
rexp/Makefile makefile for regexp.a
rexp/rexp.c source for regular matching library
rexp/rexp.h
rexp/rexp0.c
rexp/rexp1.c
rexp/rexp2.c
rexp/rexp3.c
rexp/rexpdb.c
#######################
# directory: test benchmarking directory
test/README
test/benchmarks
test/cat.awk
test/concat.awk
test/fields.awk
test/loops.awk
test/newton.awk
test/primes.awk
test/qsort.awk
test/reg0.awk
test/reg1.awk
test/reg2.awk
test/sample sample input file for test.sh
test/squeeze.awk
test/test.sh
test/wc.awk
test/wfrq.awk
test/wfrq0.awk
test/words.awk
test/words0.awk
######################
# directory: examples useful awk programs
examples/decl.awk
examples/deps.awk
examples/gdecl.awk
examples/nocomment.awk
######################
# directory msdos
msdos/INSTALL
msdos/makefile
msdos/mklib.bat
msdos/rand48.asm
msdos/rand48.h
msdos/rand48_0.c
msdos/reargv.c
@//E*O*F mawk0.97/packing.list//
chmod u=rw,g=r,o=r mawk0.97/packing.list
echo x - mawk0.97/README
sed 's/^@//' > "mawk0.97/README" <<'@//E*O*F mawk0.97/README//'
to build mawk:
make sure there is an appropriate description of
your system in machine.h
set CFLAGS in the Makefile to pick the appropriate blob
in machine.h
run make
PS:
I expected to have bcopy() <-> memcpy()
hassles on 4.3BSD, but didn't
Is this right? or did someone add memcpy(), strchr() etc
to that machine?
If 4.3BSD in machine.h is wrong, let me know at
brennan at bcsaic.boeing.com
@//E*O*F mawk0.97/README//
chmod u=r,g=r,o=r mawk0.97/README
echo x - mawk0.97/LIMITATIONS
sed 's/^@//' > "mawk0.97/LIMITATIONS" <<'@//E*O*F mawk0.97/LIMITATIONS//'
Mawk is an implementation of the AWK Programming Language
as defined in Aho, Kernighan and Weinberger, The AWK
Programming Language, Addison-Wesley, 1988.
The source code is original work, in the sense that its
development relied only on the specification of the AWK
language in the book above. Most of the algorithms and
data structures used in this code are not original --
but based on knowledge acquired from numerous sources.
Originality is claimed only for the aggregate work. Any
ideas or techniques in this code can be freely copied and
used in other work.
The source code may be modified provided the copyright
notices remain intact, and modifications are unambiguously
distinct from the original. I want to retain credit for my
work and do not want credit for yours.
Redistribution in any form is permitted provided the built-in
variable VERSION is retained, and its initial value only
modified by appending extra lines.
For example, if you modify a mawk with VERSION
mawk x.xx Mon Year, Copyright (C) Michael D. Brennan
then add an extra line to VERSION without modifying the
first line.
mawk x.xx Mon Year, Copyright (C) Michael D. Brennan
mod y.yy Mon Year, your name
Michael D. Brennan
16 Apr 1991
@//E*O*F mawk0.97/LIMITATIONS//
chmod u=r,g=r,o=r mawk0.97/LIMITATIONS
echo x - mawk0.97/Makefile
sed 's/^@//' > "mawk0.97/Makefile" <<'@//E*O*F mawk0.97/Makefile//'
# ###################################################
# This is a makefile for mawk,
# an implementation of The AWK Programmin Language, 1988.
#
#
SHELL=/bin/sh
####################################
# CFLAGS needs to match a define in machine.h
# unless machine.h uses a built-in compiler flag
#
CFLAGS = -O -DULTRIX
#CFLAGS = -O -DBSD43
YACC=yacc -dv
#YACC=bison -dvy
#######################################
O=parse.o scan.o memory.o main.o hash.o execute.o code.o\
da.o error.o init.o bi_vars.o cast.o print.o bi_funct.o\
kw.o jmp.o array.o field.o split.o re_cmpl.o zmalloc.o\
fin.o files.o scancode.o matherr.o fcall.o
REXP_C=rexp/rexp.c rexp/rexp0.c rexp/rexp1.c rexp/rexp2.c\
rexp/rexp3.c rexp/rexpdb.c
mawk : $(O) rexp/regexp.a
cc $(CFLAGS) -o mawk $(O) -lm rexp/regexp.a
rexp/regexp.a : $(REXP_C)
cd rexp ; make
parse.c : parse.y
@echo expect 3 shift/reduce conflicts
$(YACC) parse.y
mv y.tab.c parse.c
-if cmp -s y.tab.h parse.h ;\
then rm y.tab.h ;\
else mv y.tab.h parse.h ; fi
scancode.c : makescan.c scan.h
cc -o makescan.exe makescan.c
makescan.exe > scancode.c
rm makescan.exe
array.o : bi_vars.h sizes.h zmalloc.h memory.h types.h machine.h mawk.h symtype.h
bi_funct.o : fin.h bi_vars.h sizes.h memory.h zmalloc.h regexp.h types.h machine.h field.h repl.h files.h bi_funct.h mawk.h symtype.h init.h
bi_vars.o : bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h mawk.h symtype.h init.h
cast.o : parse.h sizes.h memory.h zmalloc.h types.h machine.h field.h scan.h repl.h mawk.h symtype.h
code.o : sizes.h memory.h zmalloc.h types.h machine.h code.h mawk.h init.h
da.o : sizes.h memory.h zmalloc.h types.h machine.h field.h repl.h code.h bi_funct.h mawk.h symtype.h
error.o : parse.h bi_vars.h sizes.h types.h machine.h scan.h mawk.h symtype.h
execute.o : sizes.h memory.h zmalloc.h regexp.h types.h machine.h field.h code.h repl.h bi_funct.h mawk.h symtype.h
fcall.o : sizes.h memory.h zmalloc.h types.h machine.h code.h mawk.h symtype.h
field.o : parse.h bi_vars.h sizes.h memory.h zmalloc.h regexp.h types.h machine.h field.h scan.h repl.h mawk.h symtype.h init.h
files.o : fin.h sizes.h memory.h zmalloc.h types.h machine.h files.h mawk.h
fin.o : parse.h fin.h bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h scan.h mawk.h symtype.h
hash.o : sizes.h memory.h zmalloc.h types.h machine.h mawk.h symtype.h
init.o : bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h code.h mawk.h symtype.h init.h
jmp.o : sizes.h memory.h zmalloc.h types.h machine.h code.h jmp.h mawk.h init.h
kw.o : parse.h sizes.h types.h machine.h mawk.h symtype.h init.h
main.o : fin.h bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h code.h files.h mawk.h init.h
makescan.o : parse.h scan.h symtype.h
matherr.o : sizes.h types.h machine.h mawk.h
memory.o : sizes.h memory.h zmalloc.h types.h machine.h mawk.h
parse.o : bi_vars.h sizes.h memory.h zmalloc.h types.h machine.h field.h code.h files.h bi_funct.h mawk.h jmp.h symtype.h
print.o : bi_vars.h parse.h sizes.h memory.h zmalloc.h types.h machine.h field.h scan.h files.h bi_funct.h mawk.h symtype.h
re_cmpl.o : parse.h sizes.h memory.h zmalloc.h regexp.h types.h machine.h scan.h repl.h mawk.h symtype.h
scan.o : parse.h fin.h sizes.h memory.h zmalloc.h types.h machine.h field.h scan.h repl.h files.h mawk.h symtype.h init.h
split.o : bi_vars.h parse.h sizes.h memory.h zmalloc.h regexp.h types.h machine.h field.h scan.h bi_funct.h mawk.h symtype.h
zmalloc.o : sizes.h zmalloc.h types.h machine.h mawk.h
@//E*O*F mawk0.97/Makefile//
chmod u=r,g=r,o=r mawk0.97/Makefile
echo x - mawk0.97/mawk.manual
sed 's/^@//' > "mawk0.97/mawk.manual" <<'@//E*O*F mawk0.97/mawk.manual//'
Mawk Manual
Mawk implements the awk language as defined in Aho, Kernighan and
Weinberger, The AWK Programming Language, Addison-Wesley, 1988, ISBN
0-201-07981-X, hereafter called the AWK book. Chapter 2 serves as a
reference to the language and the rest (8 total chapters) provides a
wide range of examples and applications. This book is must reading to
understand the versatility of the language.
The 1988 version of the language is sometimes called new awk as opposed
to the 1977 version (awk or old awk.) Virtially every Unix system has
old awk, somewhere in the documentation will be an (old) awk tutorial
(probably in support tools). If you use (old) awk, the transition to
new awk is easy. The language has been extended and ambiguous points
clarified, but old awk programs still run under new awk.
This manual assumes you know (old) awk, and hence concentrates on the
new features of awk. Feature xxx is new means xxx was added to the 1988
version.
Experienced new awk users should read sections 9 and 12, and skim
sections 7 and 8.
1. Command line
mawk [-Fs] 'program' optional_list_of_files
mawk [-Fs] -f program_file optional_list_of_files
2. Program blocks
Program blocks are of the form:
pattern { action }
pattern can be:
regular_expression
expression
( pattern )
! pattern
pattern || pattern
pattern && pattern
pattern , pattern (range pattern)
BEGIN
END
Range, BEGIN and END patterns cannot be combined to form new patterns.
BEGIN and END patterns require an action; otherwise, if action is
omitted it is implicitly { print }.
NR==2 { print } # prints line number 2
NR==2 # also prints line number 2
If pattern is omitted then action is always applied.
{ print $NF }
prints the last field of every record.
3. Statement format and loops
Statements are terminated by newlines, semi-colons or both. Groups of
statements are blocked via { ... } as in C. The last statement in a
block doesn't need a terminator. Blank lines have no meaning; an empty
statement is terminated with a semi-colon. Long statements can be
continued with a backslash, \. A statement can be broken without a
backslash after a comma, left brace, &&, ||, do, else, the right
parenthesis of an if, while or for statement, and the right parenthesis
of a function definition.
Loops are for(){}, while(){} and do{}while() as in C.
4. Expression syntax
The expression syntax and grouping of the language is similar to C.
Primary expressions are numeric constants, string constants, variables,
arrays and functions. Complex expressions are composed with the
following operators in order of increasing precedence.
assignment: = += -+ *= /= ^=
conditional: ? :
logical or: ||
logical and: &&
array membership : in
matching : ~ !~
relational : < > <= >= == !=
concatenation: (no explicit operator)
add ops: + -
mul ops: * / %
unary : + -
logical not : !
exponentiation: ^
inc and dec: ++ -- (both post and pre)
field: $
5. Builtin variables.
The following variables are built-in and initialized before program
execution.
ARGC number of command line arguments
ARGV array of command line arguments, 0..ARGC-1
FILENAME name of the current input file
FNR current record number in the current input file
FS splits records into fields as a regular expression
NF number of fields in the current record, i.e., $0
NR current record number in the total input stream
OFMT format for printing numbers; initially = "%.6g"
OFS inserted between fields on output, initially = " "
ORS terminates each record on output, initially = "\n"
RLENGTH length of the last call to the built-in function, match()
RS input record separator, initially = " "
RSTART index of the last call to match()
SUBSEP used to build multiple array subscripts, initially = "\034"
VERSION Mawk version, unique to mawk.
ARGC, ARGV, FNR, RLENGTH, RSTART and SUBSEP are new.
The current input record is stored in the field, $0. The fields of $0
determined by splitting with RS are stored in $1, $2, ..., $NF.
6. Built-in Functions
String functions
index(s,t)
length(s), length
split(s, A, r), split(s, A)
substr(s,i,n) , substr(s,i)
sprintf(format, expr_list)
match(s,r) returns the index where string s matches
regular expression r or 0 if no match. As
a side effect, sets RSTART and RLENGTH.
gsub(r, s, t) Global substitution, every match of regular
expression r in variable t is replaced by s.
The number of matches/replacements is returned.
sub(r, s, t) Like gsub(), except at most one replacement.
Match(), gsub() and sub() are new. If r is an expr it is coerced to
string and then treated as a regular expression. In sub and gsub, t can
be a variable, field or array element, i.e., it must have storage to
hold the modification. Sub(r,s) and gsub(r,s) are the same as
sub(r,s,$0) and gsub(r,s,$0). In the replacement string s, an & is
replaced by the matched piece and a literal & is obtained with \&.
E.g.,
y = x = "abbc"
sub(/b+/, "B&B" , x)
sub(/b+/, "B\&B" , y)
print x, y
outputs: aBbbBc aB&Bc
Arithmetic functions
atan2(y,x) arctan of y/x between -pi and pi.
cos(x)
exp(x)
int(x) x.dddd -> x.0
log(x)
rand() returns random number , 0 <= r < 1.
sin(x)
sqrt(x)
srand(x) , srand() seeds random number generator, uses clock
if x is omitted.
Output functions
print writes $0 ORS to stdout.
print expr1 , expr2 , ... , exprn
writes expr1 OFS expr2 OFS ... OFS exprn ORS to
stdout.
printf format, expr_list
Acts like the C library function, writing to
stdout. Supported conversions are
%c, %d, %e, %f, %g, %o, %s and %x.
- , width and .prec are supported.
Dynamic widths can be built using string operations
Output can be redirected
print[f] > file
>> file
| command
File and command are awk expressions that are interpreted as a filename
or a shell command.
Input functions
getline read $0, update NF, NR and FNR.
getline < file read $0 from file, update NF.
getline var read var from input stream, update NR, FNR.
getline var < file read var from next record of file
command | getline read $0 from piped command, update NF.
command | getline var read var from next record of piped command.
(Old) awk had getline, the redirection facilities are new.
Files or commands are closed with
close(expr)
where expr is command or file as a string. Close returns 0 if expr was
in fact an open file or command else -1. Close is needed if you want to
reread a file, rerun a command, have a large number of output files
without mawk running out of resources or wait for an output command to
finish. Here is an example of the last case:
{ .... do some processing on each input line
# send the processed line to sort
print | "sort > temp_file"
}
END { # reread the sorted input
close( "sort > temp_file") # makes sure sort is finished
cnt=1
while ( getline line[cnt++] < "temp_file" > 0 ) ;
system( "rm temp_file" ) # cleanup
... process line[1], line[2] ... line[cnt-1]
}
The system() function executes a command and returns the command's exit
status. Mawk uses the shell in the environment variable SHELL to
execute system or command pipelines; defaulting to "/bin/sh" if SHELL is
not set.
7. String constants
String constants are written as in C.
"This is a string with a newline at the end.\n"
Strings can be continued across a line by escaping (\) the newline. The
following escape sequences are recognized.
\\ \
\" "
\' '
\a alert, ascii 7
\b backspace, ascii 8
\t tab, ascii 9
\n newline, ascii 10
\v vertical tab, ascii 11
\f formfeed, ascii 12
\r carriage return, ascii 13
\ddd 1, 2 or 3 octal digits for ascii ddd
\xhh 1 or 2 hex digits for ascii hh
If you escape any other character \c, you get \c, i.e. the escape is
ignored. Mawk is different than most awks here; the AWK book says \c is
c. The reason mawk chooses to be different is for easier conversion of
strings to regular expressions.
8. Regular expressions
Awk notation for regular expressions is in the style of egrep(1). In
awk, regular expressions are enclosed in / ... /. A regular expression
/r/, is a set of strings.
s ~ /r/
is an awk expression that evaluates to 1 if an element of /r/ is a
substring of s and evaluates to 0 otherwise. ~ is called the match
operator and the expression is read "s matches r".
s ~ /^r/ is 1 if some element of r is a prefix of s.
s ~ /r$/ is 1 if some element of r is a suffix of s.
s ~ /^r$/ is 1 if s is an element of r.
Replacing ~ by !~ , the not match operator, reverses the meanings. In
patterns, /r/ and !/r/ are shorthand for $0 ~ /r/ and $0 !~ /r/.
Regular expressions are combined by the following rules.
// stands for the one element set "" (not the empty set).
/c/ for a character c is the one element set "c".
/rs/ is all elements of /r/ concatenated with all
elements of /s/.
/r|s/ is the set union of /r/ and /s/.
/r*/ called the closure of r is // union /rr/ union /rrr/ ...
In words, r repeated zero or more times.
The above operations are sufficient to describe all regular expressions,
but for ease of notation awk defines additional operations and notation.
/r?/ // union /r/. In words r 0 or 1 time.
/r+/ Positive closure of r. R 1 or more times.
(r) Same as r -- allows grouping.
. Stands for any character (for mawk this means
ascii 1 through ascii 255)
[c1c2..cn] A character class same as (c1|c2|...|cn) where
ci's are single characters.
[^c1c2..cn] Complement of the class [c1c2..cn]. For mawk
complement in the ascii character set 1 to 255.
Ranges c1-cn are allowed in character classes. For example,
/[_a-zA-Z][_a-zA-Z0-9]*/
expresses the set of possible identifiers in awk.
The operators have increasing precedence:
|
implicit concatenation
+ * ?
So /a|b+/ means a or (1 or more b's), and /(a|b)+/ means (a or b) one or
more times. The so called regular expression metacharacters are \ ^ $ .
[ ] | ( ) * + ? . To stand for themselves as characters they have to be
escaped. (They don't have to be escaped in classes, inside classes the
meta-meaning is off). The same escape sequences that are recognized in
strings (see above) are recognized in regular expressions. For mawk,
the escape rule for \c changes to c.
For example,
/[ \t]*/ is optional space
/^[-+]?([0-9]+\.?|\.[0-9])[0-9]*([eE][-+]?[0-9]+)?$/
is numbers in the Awk language.
Note, . must be escaped to have
its meaning as decimal point.
For building regular expressions, you can think of ^ and $ as phantom
characters at the front and back of every string. So /(^a|b$|^A.*B$)/
is the set of strings that start with a or end with b or (start with A
and end with B).
Dynamic regular expressions are new. You can write
x ~ expr
and expr is interpreted as a regular expression. The result of x ~ y
can vary with the variable y; so
x ~ /a\+b/ and x ~ "a\+b"
are the same, or are they? In mawk, they are; in some other awk's they
are not. In the second expression, "a\+b" is scanned twice: once as a
string constant and then again as a regular expression. In mawk the
first scan gives the four character string 'a' '\' '+' 'b' because mawk
treats \+ as \+; the second scan gives a regular expression matched by
the three character string 'a' '+' 'b' because on the second scan \+
becomes +.
If \c becomes c in strings, you need to double escape metacharacters,
i.e., write
x ~ "a\\+b".
Exercise: what happens if you double escape in mawk?
In strings if you only escape characters with defined escape sequences
such as \t or \n or meta-characters when you expect to use a string as a
regular expression, then mawk's rules are intuitive and simple. See
example/cdecl.awk and example/gdecl.awk for the same program with single
and double escapes, the first is clearer.
9. How Mawk splits lines, records and files.
Mawk uses the essentially the same algorithm to split lines into pieces
with split(), records into fields on FS, and files into records on RS.
Split( s, A, sep ) splits string s into array A with separator sep as
follows:
Sep is interpreted as a regular expression.
If s = "", there are no pieces and split returns 0.
Otherwise s is split into pieces by the matches with sep
of positive length treated as a separator between pieces,
so the number of pieces is the number of matches + 1.
Matches of the null string do not split.
So sep = "b+" and sep = "b*" split the same although the
latter executes more slowly.
Split(s, A) is the same as split(s, A, FS).
With mawk you can write sep as a regular expression, i.e.,
split(s, A, "b+") and split(s, A, /b+/) are the same.
Sep = " " (a single space) is special. Before the algorithm is
applied, white-space is trimmed from the front and back of s.
Mawk defines white-space as SPACE, TAB, FORMFEED, VERTICAL TAB
or NEWLINE, i.e [ \t\f\v\n]. Usually this means SPACE or TAB
because NEWLINE usually separates records, and the other
characters are rare. The above algorithm
is then applied with sep = "[ \t\f\v\n]+".
If length(sep) = 1, then regular expression metacharacters do
not have to be escaped, i.e. split(s, A, "+") is the same as
split(s, A, /\+/).
Splitting records into fields works exactly the same except the pieces
are loaded into $1, $2 ... $NF.
Records are also the same, RS is treated as a regular expression. But
there is a slight difference, RS is really a record terminator (ORS is
really a terminator also).
E.g., if FS = ":" and $0 = "a:b:" , then
NF = 3 and $1 = "a", $2 = "b" and $3 = "", but
if "a:b:" is the contents of an input file and RS = ":", then
there are two records "a" and "b".
RS = " " does not have special meaning as with FS.
Not all versions of (new) awk support RS as a regular expression. This
feature of mawk is useful and improves performance.
BEGIN { RS = "[^a-zA-Z]+"
getline
if ( $0 == "" ) NR = 0
else word[1] = $0
}
{ word[NR] = $0 }
END { ... do something with word[1]...word[NR] }
isolates words in a document over twice as fast as reading one line at a
time and then examining each field with FS = "[^a-zA-Z]+".
To remove comments from C code:
BEGIN { RS = "/\*([^*]|\*[^/])*\*/" # comment is RS
ORS = " "
}
{ print }
END { printf "\n" }
10. Multi-line records
Since mawk interprets RS as a regular expression, multi-line records are
easy. Setting RS = "\n\n+", makes one or more blank lines separate
records. If FS = " " (the default), then single newlines, by the rules
for space above, become space.
For example, if a file is "a b\nc\n\n", RS = "\n\n+" and
FS = " ", then there is one record "a b\nc" with three
fields "a", "b" and "c". Changing FS = "\n", gives two
fields "a b" and "c"; changing FS = "", gives one field
identical to the record.
For compatibility with (old) awk, setting RS = "" has the same
effect on determining records as RS = "\n([ \t]*\n)+".
Most of the time when you change RS for mult-line records, you
will also want to change ORS to "\n\n".
11. User functions.
User defined functions are new. They can be passed expressions by value
or arrays by reference. Function calls can be nested and support
recursion. The syntax is
function funcname( args ) {
.... body
}
Newlines are ignored after the ')' so the '{' can start on a different
line. Inside the body, you can use a return statement
return expr
return
As in C, there is no distinction between functions and procedures. A
function does not need an explicit return. Extra arguments act as local
variables. For example, csplit(s, A) puts each character of s in array
A.
function csplit(s, A, i)
{
for(i=1; i <= length(s) ; i++)
A[i] = substr(s, i, 1)
}
Putting lots of space between the passed arguments and the local
variables is a convention that can be ignored if you don't like it.
Dynamic regular expressions allow regular expressions to be passed to
user defined functions. The following function gobble() is the lexical
scanner for a recursive descent parser, the whole program is in
examples/cdecl.awk.
function gobble( r, x) # eat regular expression
# r off the front of global variable line
{
if ( match( line, "^(" r ")") )
{
x = substr(line, 1, RLENGTH)
line = substr(line, RLENGTH)
}
else x = ""
return x
}
You can call a function before it is defined, but the function name and
the '(' must not be separated by white space to avoid confusion with
concatenation.
12. Other differences in mawk
The main differences between mawk and other awks have been discussed, RS
as a regular expression and regular expression metacharacters don't have
to be double escaped. Here are some others:
VERSION -- built-in variable holding version number of mawk.
mawk 'BEGIN{print VERSION}' shows it.
-D -- command line flag causes mawk to dump to stderr
a mawk assembler listing of the current program.
The program is executed by a stack machine internal
to mawk. The op codes are in code.h, the machine in
execute.c.
srand() --
During initialization, mawk seeds the random number generator
by silently calling srand(), so calling srand() yourself is
unnecessary. The main use of srand is to use srand(x) to get
a repeatable stream of random numbers. Srand(x) returns x
and srand() returns the value of the system clock in some form
of ticks.
13. MsDOS
For a number of reasons, entering a mawk program on the command line
using command.com as your shell is an exercise in futility, so under
MsDOS the command syntax is
mawk [-Fs] optional_list_of_files
You'll get a prompt, and then type in the program. The -f option works
as before.
If you use a DOS shell that gives you a Unix style command line, to use
it you'll need to provide a C function reargv() that retrieves argc and
argv[] from your shell. The details are in msdos/INSTALL.
Some features are missing from the DOS version of mawk: No system(), and
no input or output pipes. To provide a hook to stderr, I've added
errmsg( "string" )
which prints "string\n" to stderr which will be the console and only the
console under command.com. A better solution would be to associate a
file with handle 2, so print and printf would be available. Consider
the errmsg() feature as temporary.
For compatibility with Unix, CR are silently stripped from input and LF
silently become CRLF on output.
WARNING: If you write an infinite loop that does not print to the
screen, then you will have to reboot. For example
x = 1
while( x < 10 ) A[x] = x
x++
By mistake the x++ is outside the loop. What you need to do is type
control break and the keyboard hardware will generate an interrupt and
the operating system will service that interrupt and terminate your
program, but unfortunately MsDOS does not have such a feature.
14. Bugs
Currently mawk cannot handle \0 (NUL) characters in input files
otherwise mawk is 8 bit clean. Also "a\0b", doesn't work right -- you
get "a". You can't use \0 in regular expressions either.
printf "A string%c more string\n" , 0
does work, but more by luck than design since it doesn't work with
sprintf().
15. Releases
This release is version 0.97. After a reasonable period of time, any
bugs that appear will be fixed, and this release will become version
1.0.
Evidently features have been added to awk by Aho, Kernighan and
Weinberger since the 1988 release of the AWK book. Version 1.1 will add
whatever features are necessary to remain compatible with the language
as defined by its designers.
After that ... ?
16. Correspondence
Send bug reports or other correspondence to
Mike Brennan
brennan at bcsaic.boeing.com
If you have some interesting awk programs, contributions to the examples
directory would be appreciated.
@//E*O*F mawk0.97/mawk.manual//
chmod u=rw,g=r,o=r mawk0.97/mawk.manual
echo x - mawk0.97/array.c
sed 's/^@//' > "mawk0.97/array.c" <<'@//E*O*F mawk0.97/array.c//'
/********************************************
array.c
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* $Log: array.c,v $
* Revision 2.1 91/04/08 08:22:15 brennan
* VERSION 0.97
*
*/
#include "mawk.h"
#include "symtype.h"
#include "memory.h"
#include "bi_vars.h"
extern int returning ;
/* flag -- on if returning from function call */
extern unsigned hash() ;
/* An array A is a pointer to a hash table of size
A_HASH_PRIME holding linked lists of ANODEs.
When an index is deleted via delete A[i], the
ANODE is not removed from the hash chain. A[i].cp
and A[i].sval are both freed and sval is set NULL.
This method of deletion simplifies for( i in A ) loops.
*/
/* is sval in A ? */
int array_test( A, sval)
ARRAY A ;
STRING *sval ;
{ char *s = sval->str ;
register ANODE *p = A[ hash(s) % A_HASH_PRIME ] ;
while ( p )
{ if ( p->sval && strcmp(s, p->sval->str) == 0 ) return 1 ;
p = p->link ; }
/* not there */
return 0 ;
}
/* find x in array a
if flag is ON x is a char* else a STRING*,
computes a[x] as a CELL*
*/
CELL *array_find( a, x, flag)
ARRAY a ; PTR x ; int flag ;
{ register ANODE *p ; /* search with p */
ANODE *q ; /* pts at a deleted node */
unsigned h ;
char *s ;
s = flag ? (char *) x : ( (STRING *) x) -> str ;
p = a[ h = hash(s) % A_HASH_PRIME ] ;
q = (ANODE *) 0 ;
while ( p )
{
if ( p->sval )
{
if ( strcmp(s,p->sval->str) == 0 ) /* found */
return p->cp ;
}
else /* a deleted node */
if ( !q ) q = p ;
p = p->link ;
}
/* not there make one */
if ( q ) p = q ; /* reuse the node */
else
{ p = (ANODE *) zmalloc( sizeof(ANODE) ) ;
p->link = a[h] ; a[h] = p ; }
if ( flag ) p->sval = new_STRING(s) ;
else
{ p->sval = (STRING *) x ; p->sval->ref_cnt++ ; }
p->cp = new_CELL() ; p->cp->type = C_NOINIT ;
return p->cp ;
}
void array_delete( a, sval)
ARRAY a ; STRING *sval ;
{ char *s = sval->str ;
register ANODE *p = a[ hash(s) % A_HASH_PRIME ] ;
while ( p )
{ if ( p->sval && strcmp(s, p->sval->str)== 0 ) /* found */
{
cell_destroy(p->cp) ; free_CELL(p->cp) ;
free_STRING(p->sval) ; p->sval = (STRING *) 0 ;
break ;
}
p = p->link ;
}
}
/* for ( i in A ) ,
loop over elements of an array
sp[0].ptr : a pointer to A ( the hash table of A)
sp[-1] : a pointer to i ( a cell ptr)
cdp[0] : a stop op to catch breaks
cdp[1] : offset from cdp of the code after the loop (n+2)
cdp[2] : start of body of the loop
cdp[3..n] : the rest of the body
cdp[n+1] : a stop op to delimit the body and catch continues
*/
INST *array_loop( cdp, sp, fp) /* passed code, stack and frame ptrs */
INST *cdp ;
CELL *sp, *fp ;
{ int i ;
register ANODE *p ;
ARRAY A = (ARRAY) sp-- -> ptr ;
register CELL *cp = (CELL *) sp-- -> ptr ;
for ( i = 0 ; i < A_HASH_PRIME ; i++ )
for ( p = A[i] ; p ; p = p->link )
{ if ( ! p->sval /* its deleted */ ) continue ;
cell_destroy(cp) ;
cp->type = C_STRING ;
cp->ptr = (PTR) p->sval ;
p->sval->ref_cnt++ ;
/* execute the body of the loop */
if ( execute(cdp+2, sp, fp) == cdp /* exec'ed a break statement */
|| returning /* function return in body of loop */
)
goto break2 /* break both for loops */ ;
}
break2 :
return cdp + cdp[1].op ;
}
/* cat together cnt elements on the eval stack to form
an array index using SUBSEP */
CELL *array_cat( sp, cnt)
register CELL *sp ;
int cnt ;
{ register CELL *p ; /* walks the stack */
CELL subsep ; /* a copy of bi_vars[SUBSEP] */
unsigned subsep_len ;
char *subsep_str ;
unsigned total_len ; /* length of cat'ed expression */
CELL *top ; /* sp at entry */
char *t ; /* target ptr when catting */
STRING *sval ; /* build new STRING here */
/* get a copy of subsep, we may need to cast */
(void) cellcpy(&subsep, bi_vars + SUBSEP) ;
if ( subsep.type < C_STRING ) cast1_to_s(&subsep) ;
subsep_len = string(&subsep)->len ;
subsep_str = string(&subsep)->str ;
total_len = --cnt * subsep_len ;
top = sp ;
sp -= cnt ;
for( p = sp ; p <= top ; p++ )
{
if ( p->type < C_STRING ) cast1_to_s(p) ;
total_len += string(p)->len ;
}
sval = new_STRING((char *)0, total_len) ;
t = sval->str ;
/* put the pieces together */
for( p = sp ; p < top ; p++ )
{ (void) memcpy(t, string(p)->str, string(p)->len) ;
(void) memcpy( t += string(p)->len, subsep_str, subsep_len) ;
t += subsep_len ;
}
/* p == top */
(void) memcpy(t, string(p)->str, string(p)->len) ;
/* done, now cleanup */
free_STRING(string(&subsep)) ;
while ( p >= sp ) { free_STRING(string(p)) ; p-- ; }
sp->type = C_STRING ;
sp->ptr = (PTR) sval ;
return sp ;
}
/* free all memory used by an array,
only used for arrays local to a function call
*/
void array_free(A)
ARRAY A ;
{ register ANODE *p ;
register int i ;
ANODE *q ;
for( i = 0 ; i < A_HASH_PRIME ; i++ )
{ p = A[i] ;
while ( p )
{ /* check its not a deleted node */
if ( p->sval )
{ free_STRING(p->sval) ;
cell_destroy(p->cp) ;
free_CELL(p->cp) ;
}
q = p ; p = p->link ;
zfree( q, sizeof(ANODE)) ;
}
}
zfree(A, sizeof(ANODE *) * A_HASH_PRIME ) ;
}
@//E*O*F mawk0.97/array.c//
chmod u=rw,g=r,o=r mawk0.97/array.c
echo x - mawk0.97/bi_funct.c
sed 's/^@//' > "mawk0.97/bi_funct.c" <<'@//E*O*F mawk0.97/bi_funct.c//'
/********************************************
bi_funct.c
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* $Log: bi_funct.c,v $
* Revision 2.3 91/04/17 06:34:00 brennan
* index("","") should be 1 not 0 for consistency with match("",//)
*
* Revision 2.2 91/04/09 12:38:42 brennan
* added static to funct decls to satisfy STARDENT compiler
*
* Revision 2.1 91/04/08 08:22:17 brennan
* VERSION 0.97
*
*/
#include "mawk.h"
#include "bi_funct.h"
#include "bi_vars.h"
#include "memory.h"
#include "init.h"
#include "files.h"
#include "fin.h"
#include "field.h"
#include "regexp.h"
#include "repl.h"
#include <math.h>
#ifndef BSD43
void PROTO( srand48, (long) ) ;
double PROTO( drand48, (void) ) ;
#endif
/* statics */
static STRING *PROTO(gsub, (PTR, CELL *, char *, int) ) ;
static void PROTO( fplib_err, (char *, double, char *) ) ;
/* global for the disassembler */
BI_REC bi_funct[] = { /* info to load builtins */
"index" , bi_index , 2, 2 ,
"substr" , bi_substr, 2, 3,
"sprintf" , bi_sprintf, 1, 255,
"sin", bi_sin , 1, 1 ,
"cos", bi_cos , 1, 1 ,
"atan2", bi_atan2, 2,2,
"exp", bi_exp, 1, 1,
"log", bi_log , 1, 1 ,
"int", bi_int, 1, 1,
"sqrt", bi_sqrt, 1, 1,
"rand" , bi_rand, 0, 0,
"srand", bi_srand, 0, 1,
"close", bi_close, 1, 1,
"system", bi_system, 1, 1,
#if DOS /* this might go away, when pipes and system are added
for DOS */
"errmsg", bi_errmsg, 1, 1,
#endif
(char *) 0, (PF_CP) 0, 0, 0 } ;
void bi_funct_init()
{ register BI_REC *p = bi_funct ;
register SYMTAB *stp ;
while ( p->name )
{ stp = insert( p->name ) ;
stp->type = ST_BUILTIN ;
stp->stval.bip = p++ ;
}
/* seed rand() off the clock */
{ CELL c ;
c.type = 0 ; (void) bi_srand(&c) ;
}
stp = insert( "length") ;
stp->type = ST_LENGTH ;
}
/**************************************************
string builtins (except split (in split.c) and [g]sub (at end))
**************************************************/
CELL *bi_length(sp)
register CELL *sp ;
{ unsigned len ;
if ( sp->type < C_STRING ) cast1_to_s(sp) ;
len = string(sp)->len ;
free_STRING( string(sp) ) ;
sp->type = C_DOUBLE ;
sp->dval = (double) len ;
return sp ;
}
char *str_str(target, key , key_len)
register char *target, *key ;
unsigned key_len ;
{
switch( key_len )
{ case 0 : return (char *) 0 ;
case 1 : return strchr( target, *key) ;
case 2 :
while ( target = strchr(target, *key) )
if ( target[1] == key[1] ) return target ;
else target++ ;
/*failed*/
return (char *) 0 ;
}
key_len-- ;
while ( target = strchr(target, *key) )
if ( memcmp(target+1, key+1, key_len) == 0 ) return target ;
else target++ ;
/*failed*/
return (char *) 0 ;
}
CELL *bi_index(sp)
register CELL *sp ;
{ register int idx ;
unsigned len ;
char *p ;
sp-- ;
if ( TEST2(sp) != TWO_STRINGS )
cast2_to_s(sp) ;
if ( len = string(sp+1)->len )
idx = (p = str_str(string(sp)->str,string(sp+1)->str,len))
? p - string(sp)->str + 1 : 0 ;
else /* index of the empty string */
idx = 1 ;
free_STRING( string(sp) ) ;
free_STRING( string(sp+1) ) ;
sp->type = C_DOUBLE ;
sp->dval = (double) idx ;
return sp ;
}
/* substr(s, i, n)
if l = length(s)
then get the characters
from max(1,i) to min(l,n-i-1) inclusive */
CELL *bi_substr(sp)
CELL *sp ;
{ int n_args, len ;
register int i, n ;
char *s ; /* substr(s, i, n) */
STRING *sval ;
n_args = sp->type ;
sp -= n_args ;
if ( sp->type < C_STRING ) cast1_to_s(sp) ;
s = (sval = string(sp)) -> str ;
if ( n_args == 2 )
{ n = 0x7fff ; /* essentially infinity */
if ( sp[1].type != C_DOUBLE ) cast1_to_d(sp+1) ;
}
else
{ if ( sp[1].type + sp[2].type != TWO_STRINGS ) cast2_to_d(sp+1) ;
n = (int) sp[2].dval ;
}
i = (int) sp[1].dval - 1 ; /* i now indexes into string */
if ( (len = strlen(s)) == 0 ) return sp ;
/* get to here is s is not the null string */
if ( i < 0 ) { n += i ; i = 0 ; }
if ( n > len - i ) n = len - i ;
if ( n <= 0 ) /* the null string */
{ free_STRING( sval ) ;
sp->ptr = (PTR) &null_str ;
null_str.ref_cnt++ ;
}
else /* got something */
{
sp->ptr = (PTR) new_STRING((char *)0, n) ;
(void) memcpy(string(sp)->str, s+i, n) ;
string(sp)->str[n] = 0 ;
}
return sp ;
}
/*
match(s,r)
sp[0] holds s, sp[-1] holds r
*/
CELL *bi_match(sp)
register CELL *sp ;
{ double d ;
char *p ;
unsigned length ;
if ( sp->type != C_RE ) cast_to_RE(sp) ;
if ( (--sp)->type < C_STRING ) cast1_to_s(sp) ;
if ( p = REmatch(string(sp)->str, (sp+1)->ptr, &length) )
d = (double) ( p - string(sp)->str + 1 ) ;
else d = 0.0 ;
cell_destroy( & bi_vars[RSTART] ) ;
cell_destroy( & bi_vars[RLENGTH] ) ;
bi_vars[RSTART].type = C_DOUBLE ;
bi_vars[RSTART].dval = d ;
bi_vars[RLENGTH].type = C_DOUBLE ;
bi_vars[RLENGTH].dval = (double) length ;
free_STRING(string(sp)) ;
sp->type = C_DOUBLE ; sp->dval = d ;
return sp ;
}
/************************************************
arithemetic builtins
************************************************/
static void fplib_err( fname, val, error)
char *fname ;
double val ;
char *error ;
{
rt_error("%s(%g) : %s" , fname, val, error) ;
}
CELL *bi_sin(sp)
register CELL *sp ;
{
#if ! STDC_MATHERR
if ( sp->type != C_DOUBLE ) cast1_to_d(sp) ;
sp->dval = sin( sp->dval ) ;
return sp ;
#else
double x ;
errno = 0 ;
if ( sp->type != C_DOUBLE ) cast1_to_d(sp) ;
x = sp->dval ;
sp->dval = sin( sp->dval ) ;
if ( errno ) fplib_err("sin", x, "loss of precision") ;
return sp ;
#endif
}
CELL *bi_cos(sp)
register CELL *sp ;
{
#if ! STDC_MATHERR
if ( sp->type != C_DOUBLE ) cast1_to_d(sp) ;
sp->dval = cos( sp->dval ) ;
return sp ;
#else
double x ;
errno = 0 ;
if ( sp->type != C_DOUBLE ) cast1_to_d(sp) ;
x = sp->dval ;
sp->dval = cos( sp->dval ) ;
if ( errno ) fplib_err("cos", x, "loss of precision") ;
return sp ;
#endif
}
CELL *bi_atan2(sp)
register CELL *sp ;
{
#if ! STDC_MATHERR
sp-- ;
if ( TEST2(sp) != TWO_DOUBLES ) cast2_to_d(sp) ;
sp->dval = atan2(sp->dval, (sp+1)->dval) ;
return sp ;
#else
errno = 0 ;
sp-- ;
if ( TEST2(sp) != TWO_DOUBLES ) cast2_to_d(sp) ;
sp->dval = atan2(sp->dval, (sp+1)->dval) ;
if ( errno ) rt_error("atan2(0,0) : domain error") ;
return sp ;
#endif
}
CELL *bi_log(sp)
register CELL *sp ;
{
#if ! STDC_MATHERR
if ( sp->type != C_DOUBLE ) cast1_to_d(sp) ;
sp->dval = log( sp->dval ) ;
return sp ;
#else
double x ;
errno = 0 ;
if ( sp->type != C_DOUBLE ) cast1_to_d(sp) ;
x = sp->dval ;
sp->dval = log( sp->dval ) ;
if ( errno ) fplib_err("log", x, "domain error") ;
return sp ;
#endif
}
CELL *bi_exp(sp)
register CELL *sp ;
{
#if ! STDC_MATHERR
if ( sp->type != C_DOUBLE ) cast1_to_d(sp) ;
sp->dval = exp(sp->dval) ;
return sp ;
#else
double x ;
errno = 0 ;
if ( sp->type != C_DOUBLE ) cast1_to_d(sp) ;
x = sp->dval ;
sp->dval = exp(sp->dval) ;
if ( errno && sp->dval) fplib_err("exp", x, "overflow") ;
/* on underflow sp->dval==0, ignore */
return sp ;
#endif
}
CELL *bi_int(sp)
register CELL *sp ;
{ if ( sp->type != C_DOUBLE ) cast1_to_d(sp) ;
sp->dval = sp->dval >= 0.0 ? floor( sp->dval ) : ceil(sp->dval) ;
return sp ;
}
CELL *bi_sqrt(sp)
register CELL *sp ;
{
#if ! STDC_MATHERR
if ( sp->type != C_DOUBLE ) cast1_to_d(sp) ;
sp->dval = sqrt( sp->dval ) ;
return sp ;
#else
double x ;
errno = 0 ;
if ( sp->type != C_DOUBLE ) cast1_to_d(sp) ;
x = sp->dval ;
sp->dval = sqrt( sp->dval ) ;
if ( errno ) fplib_err("sqrt", x, "domain error") ;
return sp ;
#endif
}
#ifdef __TURBOC__
long biostime(int, long) ;
#define time(x) (biostime(0,0L)<<4)
#else
#include <sys/types.h>
#if 0
#ifndef STARDENT
#include <sys/timeb.h>
#endif
#endif
#endif
CELL *bi_srand(sp)
register CELL *sp ;
{ register long l ;
void srand48() ;
if ( sp-- -> type ) /* user seed */
{ if ( sp->type != C_DOUBLE ) cast1_to_d(sp) ;
l = (long) sp->dval ; }
else
{ l = (long) time( (time_t *) 0 ) ;
(++sp)->type = C_DOUBLE ;
sp->dval = (double) l ;
}
srand48(l) ;
return sp ;
}
CELL *bi_rand(sp)
register CELL *sp ;
{
(++sp)->type = C_DOUBLE ;
sp->dval = drand48() ;
return sp ;
}
/*************************************************
miscellaneous builtins
close, system and getline
*************************************************/
CELL *bi_close(sp)
register CELL *sp ;
{ int x ;
if ( sp->type < C_STRING ) cast1_to_s(sp) ;
x = file_close( (STRING *) sp->ptr) ;
free_STRING( string(sp) ) ;
sp->type = C_DOUBLE ;
sp->dval = (double) x ;
return sp ;
}
#if ! DOS
CELL *bi_system(sp)
CELL *sp ;
{ int pid ;
unsigned ret_val ;
if ( !shell ) shell = (shell = getenv("SHELL")) ? shell : "/bin/sh" ;
if ( sp->type < C_STRING ) cast1_to_s(sp) ;
switch( pid = fork() )
{ case -1 : /* fork failed */
errmsg(errno, "could not create a new process") ;
ret_val = 128 ;
break ;
case 0 : /* the child */
(void) execl(shell, shell, "-c", string(sp)->str, (char *) 0) ;
/* if get here, execl() failed */
errmsg(errno, "execute of %s failed", shell) ;
fflush(stderr) ;
_exit(128) ;
default : /* wait for the child */
ret_val = wait_for(pid) ;
if ( ret_val & 0xff ) ret_val = 128 ;
else ret_val = (ret_val & 0xff00) >> 8 ;
break ;
}
cell_destroy(sp) ;
sp->type = C_DOUBLE ;
sp->dval = (double) ret_val ;
return sp ;
}
#else /* DOS */
CELL *bi_system( sp )
register CELL *sp ;
{ rt_error("no system call in MsDos --yet") ;
return sp ;
}
/* prints errmsgs for DOS */
CELL *bi_errmsg(sp)
register CELL *sp ;
{
cast1_to_s(sp) ;
fprintf(stderr, "%s\n", string(sp)->str) ;
free_STRING(string(sp)) ;
sp->type = C_DOUBLE ;
sp->dval = 0.0 ;
return sp ;
}
#endif
/* getline() */
/* if type == 0 : stack is 0 , target address
if type == F_IN : stack is F_IN, expr(filename), target address
if type == PIPE_IN : stack is PIPE_IN, target address, expr(pipename)
*/
CELL *bi_getline(sp)
register CELL *sp ;
{
CELL tc , *cp ;
char *p ;
unsigned len ;
FIN *fin_p ;
switch( sp->type )
{
case 0 :
sp-- ;
if ( main_fin == (FIN *) -1 && ! open_main() )
goto open_failure ;
if ( ! main_fin || !(p = FINgets(main_fin, &len)) )
goto eof ;
cp = (CELL *) sp->ptr ;
if ( TEST2(bi_vars+NR) != TWO_DOUBLES ) cast2_to_d(bi_vars+NR) ;
bi_vars[NR].dval += 1.0 ;
bi_vars[FNR].dval += 1.0 ;
break ;
case F_IN :
sp-- ;
if ( sp->type < C_STRING ) cast1_to_s(sp) ;
fin_p = (FIN *) file_find(sp->ptr, F_IN) ;
free_STRING(string(sp) ) ;
sp-- ;
if ( ! fin_p ) goto open_failure ;
if ( ! (p = FINgets(fin_p, &len)) ) goto eof ;
cp = (CELL *) sp->ptr ;
break ;
case PIPE_IN :
sp -= 2 ;
if ( sp->type < C_STRING ) cast1_to_s(sp) ;
fin_p = (FIN *) file_find(sp->ptr, PIPE_IN) ;
free_STRING(string(sp)) ;
if ( ! fin_p ) goto open_failure ;
if ( ! (p = FINgets(fin_p, &len)) ) goto eof ;
cp = (CELL *) (sp+1)->ptr ;
break ;
default : bozo("type in bi_getline") ;
}
/* we've read a line , store it */
if ( len == 0 )
{ tc.type = C_STRING ;
tc.ptr = (PTR) &null_str ;
null_str.ref_cnt++ ;
}
else
{ tc.type = C_MBSTRN ;
tc.ptr = (PTR) new_STRING((char *) 0, len) ;
(void) memcpy( string(&tc)->str, p, len) ;
}
if ( cp >= field && cp < field+NUM_FIELDS )
field_assign(cp-field, &tc) ;
else
{ cell_destroy(cp) ;
(void) cellcpy(cp, &tc) ;
}
cell_destroy(&tc) ;
sp->dval = 1.0 ; goto done ;
open_failure :
sp->dval = -1.0 ; goto done ;
eof :
sp->dval = 0.0 ; /* fall thru to done */
done :
sp->type = C_DOUBLE ;
return sp ;
}
/**********************************************
sub() and gsub()
**********************************************/
/* entry: sp[0] = address of CELL to sub on
sp[-1] = substitution CELL
sp[-2] = regular expression to match
*/
CELL *bi_sub( sp )
register CELL *sp ;
{ CELL *cp ; /* pointer to the replacement target */
CELL tc ; /* build the new string here */
CELL sc ; /* copy of the target CELL */
char *front, *middle, *back ; /* pieces */
unsigned front_len, middle_len, back_len ;
sp -= 2 ;
if ( sp->type != C_RE ) cast_to_RE(sp) ;
if ( sp[1].type != C_REPL && sp[1].type != C_REPLV )
cast_to_REPL(sp+1) ;
cp = (CELL *) (sp+2)->ptr ;
/* make a copy of the target, because we won't change anything
including type unless the match works */
(void) cellcpy(&sc, cp) ;
if ( sc.type < C_STRING ) cast1_to_s(&sc) ;
front = string(&sc)->str ;
if ( middle = REmatch(front, sp->ptr, &middle_len) )
{
front_len = middle - front ;
back = middle + middle_len ;
back_len = string(&sc)->len - front_len - middle_len ;
if ( (sp+1)->type == C_REPLV )
{ STRING *sval = new_STRING((char *) 0, middle_len) ;
(void) memcpy(sval->str, middle, middle_len) ;
(void) replv_to_repl(sp+1, sval) ;
free_STRING(sval) ;
}
tc.type = C_STRING ;
tc.ptr = (PTR) new_STRING((char *) 0,
front_len + string(sp+1)->len + back_len ) ;
{ char *p = string(&tc)->str ;
if ( front_len )
{ (void) memcpy(p, front, front_len) ;
p += front_len ;
}
if ( string(sp+1)->len )
{ (void) memcpy(p, string(sp+1)->str, string(sp+1)->len) ;
p += string(sp+1)->len ;
}
if ( back_len ) (void) memcpy(p, back, back_len) ;
}
if ( cp >= field && cp < field+NUM_FIELDS )
field_assign(cp-field, &tc) ;
else
{ cell_destroy(cp) ;
(void) cellcpy(cp, &tc) ;
}
free_STRING(string(&tc)) ;
}
free_STRING(string(&sc)) ;
repl_destroy(sp+1) ;
sp->type = C_DOUBLE ;
sp->dval = middle != (char *) 0 ? 1.0 : 0.0 ;
return sp ;
}
static unsigned repl_cnt ; /* number of global replacements */
/* recursive global subsitution
dealing with empty matches makes this mildly painful
*/
static STRING *gsub( re, repl, target, flag)
PTR re ;
CELL *repl ; /* always of type REPL or REPLV */
char *target ;
int flag ; /* if on, match of empty string at front is OK */
{ char *front, *middle ;
STRING *back ;
unsigned front_len, middle_len ;
STRING *ret_val ;
CELL xrepl ; /* a copy of repl so we can change repl */
if ( ! (middle = REmatch(target, re, &middle_len)) )
return new_STRING(target) ; /* no match */
(void) cellcpy(&xrepl, repl) ;
if ( !flag && middle_len == 0 && middle == target )
{ /* match at front that's not allowed */
if ( *target == 0 ) /* target is empty string */
{ null_str.ref_cnt++ ;
return & null_str ;
}
else
{ char xbuff[2] ;
front_len = 0 ;
/* make new repl with target[0] */
repl_destroy(repl) ;
xbuff[0] = *target++ ; xbuff[1] = 0 ;
repl->type = C_REPL ;
repl->ptr = (PTR) new_STRING( xbuff ) ;
back = gsub(re, &xrepl, target, 1) ;
}
}
else /* a match that counts */
{ repl_cnt++ ;
front = target ;
front_len = middle - target ;
if ( *middle == 0 ) /* matched back of target */
{ back = &null_str ; null_str.ref_cnt++ ; }
else back = gsub(re, &xrepl, middle + middle_len, 0) ;
/* patch the &'s if needed */
if ( repl->type == C_REPLV )
{ STRING *sval = new_STRING((char *) 0, middle_len) ;
(void) memcpy(sval->str, middle, middle_len) ;
(void) replv_to_repl(repl, sval) ;
free_STRING(sval) ;
}
}
/* put the three pieces together */
ret_val = new_STRING((char *)0,
front_len + string(repl)->len + back->len);
{ char *p = ret_val->str ;
if ( front_len )
{ (void) memcpy(p, front, front_len) ; p += front_len ; }
if ( string(repl)->len )
{ (void) memcpy(p, string(repl)->str, string(repl)->len) ;
p += string(repl)->len ;
}
if ( back->len ) (void) memcpy(p, back->str, back->len) ;
}
/* cleanup, repl is freed by the caller */
repl_destroy(&xrepl) ;
free_STRING(back) ;
return ret_val ;
}
/* set up for call to gsub() */
CELL *bi_gsub( sp )
register CELL *sp ;
{ CELL *cp ; /* pts at the replacement target */
CELL sc ; /* copy of replacement target */
CELL tc ; /* build the result here */
sp -= 2 ;
if ( sp->type != C_RE ) cast_to_RE(sp) ;
if ( (sp+1)->type != C_REPL && (sp+1)->type != C_REPLV )
cast_to_REPL(sp+1) ;
(void) cellcpy(&sc, cp = (CELL *)(sp+2)->ptr) ;
if ( sc.type < C_STRING ) cast1_to_s(&sc) ;
repl_cnt = 0 ;
tc.ptr = (PTR) gsub(sp->ptr, sp+1, string(&sc)->str, 1) ;
if ( repl_cnt )
{ tc.type = C_STRING ;
if ( cp >= field && cp < field + NUM_FIELDS )
field_assign(cp-field, &tc) ;
else
{ cell_destroy(cp) ; (void) cellcpy(cp, &tc) ; }
}
/* cleanup */
free_STRING(string(&sc)) ; free_STRING(string(&tc)) ;
repl_destroy(sp+1) ;
sp->type = C_DOUBLE ;
sp->dval = (double) repl_cnt ;
return sp ;
}
@//E*O*F mawk0.97/bi_funct.c//
chmod u=rw,g=r,o=r mawk0.97/bi_funct.c
echo x - mawk0.97/bi_funct.h
sed 's/^@//' > "mawk0.97/bi_funct.h" <<'@//E*O*F mawk0.97/bi_funct.h//'
/********************************************
bi_funct.h
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* $Log: bi_funct.h,v $
* Revision 2.2 91/04/22 08:00:13 brennan
* prototype for bi_errmsg() under DOS
*
* Revision 2.1 91/04/08 08:22:20 brennan
* VERSION 0.97
*
*/
#ifndef BI_FUNCT_H
#define BI_FUNCT_H 1
#include "symtype.h"
extern BI_REC bi_funct[] ;
void PROTO(bi_init, (void) ) ;
/* builtin string functions */
CELL *PROTO( bi_print, (CELL *) ) ;
CELL *PROTO( bi_printf, (CELL *) ) ;
CELL *PROTO( bi_length, (CELL *) ) ;
CELL *PROTO( bi_index, (CELL *) ) ;
CELL *PROTO( bi_substr, (CELL *) ) ;
CELL *PROTO( bi_sprintf, (CELL *) ) ;
CELL *PROTO( bi_split, (CELL *) ) ;
CELL *PROTO( bi_match, (CELL *) ) ;
CELL *PROTO( bi_getline, (CELL *) ) ;
CELL *PROTO( bi_sub, (CELL *) ) ;
CELL *PROTO( bi_gsub, (CELL *) ) ;
/* builtin arith functions */
CELL *PROTO( bi_sin, (CELL *) ) ;
CELL *PROTO( bi_cos, (CELL *) ) ;
CELL *PROTO( bi_atan2, (CELL *) ) ;
CELL *PROTO( bi_log, (CELL *) ) ;
CELL *PROTO( bi_exp, (CELL *) ) ;
CELL *PROTO( bi_int, (CELL *) ) ;
CELL *PROTO( bi_sqrt, (CELL *) ) ;
CELL *PROTO( bi_srand, (CELL *) ) ;
CELL *PROTO( bi_rand, (CELL *) ) ;
/* other builtins */
CELL *PROTO( bi_close, (CELL *) ) ;
CELL *PROTO( bi_system, (CELL *) ) ;
#if DOS
CELL *PROTO(bi_errmsg, (CELL *) ) ;
#endif
#endif /* BI_FUNCT_H */
@//E*O*F mawk0.97/bi_funct.h//
chmod u=rw,g=r,o=r mawk0.97/bi_funct.h
echo x - mawk0.97/bi_vars.c
sed 's/^@//' > "mawk0.97/bi_vars.c" <<'@//E*O*F mawk0.97/bi_vars.c//'
/********************************************
bi_vars.c
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* $Log: bi_vars.c,v $
* Revision 2.1 91/04/08 08:22:22 brennan
* VERSION 0.97
*
*/
/* bi_vars.c */
#include "mawk.h"
#include "symtype.h"
#include "bi_vars.h"
#include "field.h"
#include "init.h"
#include "memory.h"
/* the builtin variables */
CELL bi_vars[NUM_BI_VAR] ;
/* the order here must match the order in bi_vars.h */
static char *bi_var_names[NUM_BI_VAR] = {
"ARGC" ,
"FILENAME" ,
"NR" ,
"FNR" ,
"OFS" ,
"ORS" ,
"RLENGTH" ,
"RSTART" ,
"SUBSEP",
"VERSION"
} ;
/* insert the builtin vars in the hash table */
void bi_vars_init()
{ register int i ;
register SYMTAB *s ;
for ( i = 0 ; i < NUM_BI_VAR ; i++ )
{ s = insert( bi_var_names[i] ) ;
s->type = ST_VAR ; s->stval.cp = bi_vars + i ;
/* bi_vars[i].type = 0 which is C_NOINIT */
}
/* set defaults */
bi_vars[FILENAME].type = C_STRING ;
bi_vars[FILENAME].ptr = (PTR) new_STRING( "" ) ;
bi_vars[ OFS ].type = C_STRING ;
bi_vars[OFS].ptr = (PTR) new_STRING( " " ) ;
bi_vars[ ORS ].type = C_STRING ;
bi_vars[ORS].ptr = (PTR) new_STRING( "\n" ) ;
bi_vars[ SUBSEP ].type = C_STRING ;
bi_vars[SUBSEP].ptr = (PTR) new_STRING( "\034" ) ;
bi_vars[VERSION].type = C_STRING ;
bi_vars[VERSION].ptr = (PTR) new_STRING( VERSION_STRING ) ;
bi_vars[NR].type = bi_vars[FNR].type = C_DOUBLE ;
/* dval is already 0.0 */
cell_zero.type = C_DOUBLE ;
cell_one.type = C_DOUBLE ;
cell_one.dval = 1.0 ;
}
CELL cell_zero ;
CELL cell_one ;
@//E*O*F mawk0.97/bi_vars.c//
chmod u=rw,g=r,o=r mawk0.97/bi_vars.c
echo x - mawk0.97/bi_vars.h
sed 's/^@//' > "mawk0.97/bi_vars.h" <<'@//E*O*F mawk0.97/bi_vars.h//'
/********************************************
bi_vars.h
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* $Log: bi_vars.h,v $
* Revision 2.1 91/04/08 08:26:30 brennan
* VERSION 0.97
*
*/
/* bi_vars.h */
#ifndef BI_VARS_H
#define BI_VARS_H 1
#define VERSION_STRING \
"mawk 0.97 Mar 1991, Copyright (C) Michael D. Brennan"
/* If use different command line syntax for DOS
mark that in VERSION */
#if DOS && ! HAVE_REARGV
#undef VERSION_STRING
#define VERSION_STRING \
"mawk 0.97DOS Mar 1991, Copyright (C) Michael D. Brennan"
#endif
/* builtin variables NF, RS, FS, OFMT are stored
internally in field[], so side effects of assignment can
be handled
*/
#define ARGC 0
#define FILENAME 1
#define NR 2 /* NR must be exactly one in front of FNR */
#define FNR 3
#define OFS 4
#define ORS 5
#define RLENGTH 6
#define RSTART 7
#define SUBSEP 8
#define VERSION 9
#define NUM_BI_VAR 10
extern CELL bi_vars[NUM_BI_VAR] ;
#endif
@//E*O*F mawk0.97/bi_vars.h//
chmod u=rw,g=r,o=r mawk0.97/bi_vars.h
echo x - mawk0.97/cast.c
sed 's/^@//' > "mawk0.97/cast.c" <<'@//E*O*F mawk0.97/cast.c//'
/********************************************
cast.c
copyright 1991, Michael D. Brennan
This is a source file for mawk, an implementation of
the Awk programming language as defined in
Aho, Kernighan and Weinberger, The AWK Programming Language,
Addison-Wesley, 1988.
See the accompaning file, LIMITATIONS, for restrictions
regarding modification and redistribution of this
program in source or binary form.
********************************************/
/* $Log: cast.c,v $
* Revision 2.1 91/04/08 08:22:44 brennan
* VERSION 0.97
*
*/
/* cast.c */
#include "mawk.h"
#include "field.h"
#include "memory.h"
#include "scan.h"
#include "repl.h"
#include <string.h>
int pow2[NUM_CELL_TYPES] = {1,2,4,8,16,32,64,128,256,512} ;
void cast1_to_d( cp )
register CELL *cp ;
{
switch( cp->type )
{ case C_NOINIT : cp->dval = 0.0 ; break ;
case C_DOUBLE : return ;
case C_MBSTRN :
case C_STRING :
{ register STRING *s = (STRING *) cp->ptr ;
#if FPE_TRAPS /* look for overflow error */
errno = 0 ;
cp->dval = strtod(s->str,(char **)0) ;
if ( errno && cp->dval != 0.0 ) /* ignore underflow */
rt_error("overflow converting %s to double", s) ;
#else
cp->dval = strtod(s->str,(char **)0) ;
#endif
free_STRING(s) ;
}
break ;
case C_STRNUM :
/* don't need to convert, but do need to free the STRING part */
free_STRING( string(cp) ) ;
break ;
default :
bozo("cast on bad type") ;
}
cp->type = C_DOUBLE ;
}
void cast2_to_d( cp )
register CELL *cp ;
{ register STRING *s ;
switch( cp->type )
More information about the Alt.sources
mailing list