v15i091: Perl, release 2, Part02/15
Rich Salz
rsalz at uunet.uu.net
Sat Jul 9 02:04:44 AEST 1988
Submitted-by: Larry Wall <lwall at jpl-devvax.jpl.nasa.gov>
Posting-number: Volume 15, Issue 91
Archive-name: perl2/part02
#! /bin/sh
# Make a new directory for the perl sources, cd to it, and run kits 1
# thru 15 through sh. When all 15 kits have been run, read README.
echo "This is perl 2.0 kit 2 (of 15). If kit 2 is complete, the line"
echo '"'"End of kit 2 (of 15)"'" will echo at the end.'
echo ""
export PATH || (echo "You didn't use sh, you clunch." ; kill $$)
mkdir eg eg/g t 2>/dev/null
echo Extracting regexp.c
sed >regexp.c <<'!STUFFY!FUNK!' -e 's/X//'
X/* NOTE: this is derived from Henry Spencer's regexp code, and should not
X * confused with the original package (see point 3 below). Thanks, Henry!
X */
X
X/* Additional note: this code is very heavily munged from Henry's version
X * in places. In some spots I've traded clarity for efficiency, so don't
X * blame Henry for some of the lack of readability.
X */
X
X/* $Header: regexp.c,v 2.0 88/06/05 00:10:45 root Exp $
X *
X * $Log: regexp.c,v $
X * Revision 2.0 88/06/05 00:10:45 root
X * Baseline version 2.0.
X *
X */
X
X/*
X * regcomp and regexec -- regsub and regerror are not used in perl
X *
X * Copyright (c) 1986 by University of Toronto.
X * Written by Henry Spencer. Not derived from licensed software.
X *
X * Permission is granted to anyone to use this software for any
X * purpose on any computer system, and to redistribute it freely,
X * subject to the following restrictions:
X *
X * 1. The author is not responsible for the consequences of use of
X * this software, no matter how awful, even if they arise
X * from defects in it.
X *
X * 2. The origin of this software must not be misrepresented, either
X * by explicit claim or by omission.
X *
X * 3. Altered versions must be plainly marked as such, and must not
X * be misrepresented as being the original software.
X *
X * Beware that some of this code is subtly aware of the way operator
X * precedence is structured in regular expressions. Serious changes in
X * regular-expression syntax might require a total rethink.
X */
X#include "EXTERN.h"
X#include "perl.h"
X
X/*
X * The "internal use only" fields in regexp.h are present to pass info from
X * compile to execute that permits the execute phase to run lots faster on
X * simple cases. They are:
X *
X * regstart str that must begin a match; Nullch if none obvious
X * reganch is the match anchored (at beginning-of-line only)?
X * regmust string (pointer into program) that match must include, or NULL
X * [regmust changed to STR* for bminstr()--law]
X * regmlen length of regmust string
X * [regmlen not used currently]
X *
X * Regstart and reganch permit very fast decisions on suitable starting points
X * for a match, cutting down the work a lot. Regmust permits fast rejection
X * of lines that cannot possibly match. The regmust tests are costly enough
X * that regcomp() supplies a regmust only if the r.e. contains something
X * potentially expensive (at present, the only such thing detected is * or +
X * at the start of the r.e., which can involve a lot of backup). Regmlen is
X * supplied because the test in regexec() needs it and regcomp() is computing
X * it anyway.
X * [regmust is now supplied always. The tests that use regmust have a
X * heuristic that disables the test if it usually matches.]
X *
X * [In fact, we now use regmust in many cases to locate where the search
X * starts in the string, so if regback is >= 0, the regmust search is never
X * wasted effort. The regback variable says how many characters back from
X * where regmust matched is the earliest possible start of the match.
X * For instance, /[a-z].foo/ has a regmust of 'foo' and a regback of 2.]
X */
X
X/*
X * Structure for regexp "program". This is essentially a linear encoding
X * of a nondeterministic finite-state machine (aka syntax charts or
X * "railroad normal form" in parsing technology). Each node is an opcode
X * plus a "next" pointer, possibly plus an operand. "Next" pointers of
X * all nodes except BRANCH implement concatenation; a "next" pointer with
X * a BRANCH on both ends of it is connecting two alternatives. (Here we
X * have one of the subtle syntax dependencies: an individual BRANCH (as
X * opposed to a collection of them) is never concatenated with anything
X * because of operator precedence.) The operand of some types of node is
X * a literal string; for others, it is a node leading into a sub-FSM. In
X * particular, the operand of a BRANCH node is the first node of the branch.
X * (NB this is *not* a tree structure: the tail of the branch connects
X * to the thing following the set of BRANCHes.) The opcodes are:
X */
X
X/* definition number opnd? meaning */
X#define END 0 /* no End of program. */
X#define BOL 1 /* no Match "" at beginning of line. */
X#define EOL 2 /* no Match "" at end of line. */
X#define ANY 3 /* no Match any one character. */
X#define ANYOF 4 /* str Match any character in this string. */
X#define ANYBUT 5 /* str Match any character not in this string. */
X#define BRANCH 6 /* node Match this alternative, or the next... */
X#define BACK 7 /* no Match "", "next" ptr points backward. */
X#define EXACTLY 8 /* str Match this string (preceded by length). */
X#define NOTHING 9 /* no Match empty string. */
X#define STAR 10 /* node Match this (simple) thing 0 or more times. */
X#define PLUS 11 /* node Match this (simple) thing 1 or more times. */
X#define ALNUM 12 /* no Match any alphanumeric character */
X#define NALNUM 13 /* no Match any non-alphanumeric character */
X#define BOUND 14 /* no Match "" at any word boundary */
X#define NBOUND 15 /* no Match "" at any word non-boundary */
X#define SPACE 16 /* no Match any whitespace character */
X#define NSPACE 17 /* no Match any non-whitespace character */
X#define DIGIT 18 /* no Match any numeric character */
X#define NDIGIT 19 /* no Match any non-numeric character */
X#define REF 20 /* no Match some already matched string */
X#define OPEN 30 /* no Mark this point in input as start of #n. */
X /* OPEN+1 is number 1, etc. */
X#define CLOSE 40 /* no Analogous to OPEN. */
X
X/*
X * Opcode notes:
X *
X * BRANCH The set of branches constituting a single choice are hooked
X * together with their "next" pointers, since precedence prevents
X * anything being concatenated to any individual branch. The
X * "next" pointer of the last BRANCH in a choice points to the
X * thing following the whole choice. This is also where the
X * final "next" pointer of each individual branch points; each
X * branch starts with the operand node of a BRANCH node.
X *
X * BACK Normal "next" pointers all implicitly point forward; BACK
X * exists to make loop structures possible.
X *
X * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
X * BRANCH structures using BACK. Simple cases (one character
X * per match) are implemented with STAR and PLUS for speed
X * and to minimize recursive plunges.
X *
X * OPEN,CLOSE ...are numbered at compile time.
X */
X
X/* The following have no fixed length. */
Xchar varies[] = {BRANCH,BACK,STAR,PLUS,REF,0};
X
X/* The following always have a length of 1. */
Xchar simple[] = {ANY,ANYOF,ANYBUT,ALNUM,NALNUM,SPACE,NSPACE,DIGIT,NDIGIT,0};
X
X/*
X * A node is one char of opcode followed by two chars of "next" pointer.
X * "Next" pointers are stored as two 8-bit pieces, high order first. The
X * value is a positive offset from the opcode of the node containing it.
X * An operand, if any, simply follows the node. (Note that much of the
X * code generation knows about this implicit relationship.)
X *
X * Using two bytes for the "next" pointer is vast overkill for most things,
X * but allows patterns to get big without disasters.
X *
X * [If ALIGN is defined, the "next" pointer is always aligned on an even
X * boundary, and reads the offset directly as a short. Also, there is no
X * special test to reverse the sign of BACK pointers since the offset is
X * stored negative.]
X */
X
X#ifndef STATIC
X#define STATIC static
X#endif
X
X#define ALIGN
X#define FASTANY
X#ifdef DEBUG
X#undef DEBUG
X#endif
X#ifdef DEBUGGING
X#define DEBUG
X#endif
X
X#ifdef DEBUG
Xint regnarrate = 0;
Xvoid regdump();
XSTATIC char *regprop();
X#endif
X
X
X#define OP(p) (*(p))
X
X#ifdef ALIGN
X#define NEXT(p) (*(short*)(p+1))
X#else
X#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
X#endif
X
X#define OPERAND(p) ((p) + 3)
X
X#ifdef ALIGN
X#define NEXTOPER(p) ((p) + 4)
X#else
X#define NEXTOPER(p) ((p) + 3)
X#endif
X
X#define MAGIC 0234
X
X/*
X * Utility definitions.
X */
X#ifndef CHARBITS
X#define UCHARAT(p) ((int)*(unsigned char *)(p))
X#else
X#define UCHARAT(p) ((int)*(p)&CHARBITS)
X#endif
X
X#define FAIL(m) fatal("/%s/: %s",regprecomp,m)
X#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
X#define META "^$.[()|?+*\\"
X
X/*
X * Flags to be passed up and down.
X */
X#define HASWIDTH 01 /* Known never to match null string. */
X#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
X#define SPSTART 04 /* Starts with * or +. */
X#define WORST 0 /* Worst case. */
X
X/*
X * Global work variables for regcomp().
X */
Xstatic char *regprecomp; /* uncompiled string. */
Xstatic char *regparse; /* Input-scan pointer. */
Xstatic int regnpar; /* () count. */
Xstatic char regdummy;
Xstatic char *regcode; /* Code-emit pointer; ®dummy = don't. */
Xstatic long regsize; /* Code size. */
Xstatic int regfold;
X
X/*
X * Forward declarations for regcomp()'s friends.
X */
XSTATIC char *reg();
XSTATIC char *regbranch();
XSTATIC char *regpiece();
XSTATIC char *regatom();
XSTATIC char *regclass();
XSTATIC char *regchar();
XSTATIC char *regnode();
XSTATIC char *regnext();
XSTATIC void regc();
XSTATIC void reginsert();
XSTATIC void regtail();
XSTATIC void regoptail();
X#ifndef STRCSPN
XSTATIC int strcspn();
X#endif
X
X/*
X - regcomp - compile a regular expression into internal code
X *
X * We can't allocate space until we know how big the compiled form will be,
X * but we can't compile it (and thus know how big it is) until we've got a
X * place to put the code. So we cheat: we compile it twice, once with code
X * generation turned off and size counting turned on, and once "for real".
X * This also means that we don't allocate space until we are sure that the
X * thing really will compile successfully, and we never have to move the
X * code and thus invalidate pointers into it. (Note that it has to be in
X * one piece because free() must be able to free it all.) [NB: not true in perl]
X *
X * Beware that the optimization-preparation code in here knows about some
X * of the structure of the compiled regexp. [I'll say.]
X */
Xregexp *
Xregcomp(exp,fold,rare)
Xchar *exp;
Xint fold;
Xint rare;
X{
X register regexp *r;
X register char *scan;
X register STR *longest;
X register int len;
X register char *first;
X int flags;
X int back;
X int curback;
X extern char *safemalloc();
X extern char *savestr();
X
X if (exp == NULL)
X fatal("NULL regexp argument");
X
X /* First pass: determine size, legality. */
X regfold = fold;
X regparse = exp;
X regprecomp = savestr(exp);
X regnpar = 1;
X regsize = 0L;
X regcode = ®dummy;
X regc(MAGIC);
X if (reg(0, &flags) == NULL) {
X safefree(regprecomp);
X return(NULL);
X }
X
X /* Small enough for pointer-storage convention? */
X if (regsize >= 32767L) /* Probably could be 65535L. */
X FAIL("regexp too big");
X
X /* Allocate space. */
X r = (regexp *)safemalloc(sizeof(regexp) + (unsigned)regsize);
X if (r == NULL)
X FAIL("regexp out of space");
X
X /* Second pass: emit code. */
X r->precomp = regprecomp;
X r->subbase = NULL;
X regparse = exp;
X regnpar = 1;
X regcode = r->program;
X regc(MAGIC);
X if (reg(0, &flags) == NULL)
X return(NULL);
X
X /* Dig out information for optimizations. */
X r->regstart = Nullstr; /* Worst-case defaults. */
X r->reganch = 0;
X r->regmust = Nullstr;
X r->regback = -1;
X r->regstclass = Nullch;
X scan = r->program+1; /* First BRANCH. */
X if (!fold && OP(regnext(scan)) == END) {/* Only one top-level choice. */
X scan = NEXTOPER(scan);
X
X first = scan;
X while ((OP(first) > OPEN && OP(first) < CLOSE) ||
X (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) ||
X (OP(first) == PLUS) )
X first = NEXTOPER(first);
X
X /* Starting-point info. */
X if (OP(first) == EXACTLY)
X r->regstart = str_make(OPERAND(first)+1);
X else if ((exp = index(simple,OP(first))) && exp > simple)
X r->regstclass = first;
X else if (OP(first) == BOUND || OP(first) == NBOUND)
X r->regstclass = first;
X else if (OP(first) == BOL)
X r->reganch++;
X
X#ifdef DEBUGGING
X if (debug & 512)
X fprintf(stderr,"first %d next %d offset %d\n",
X OP(first), OP(NEXTOPER(first)), first - scan);
X#endif
X /*
X * If there's something expensive in the r.e., find the
X * longest literal string that must appear and make it the
X * regmust. Resolve ties in favor of later strings, since
X * the regstart check works with the beginning of the r.e.
X * and avoiding duplication strengthens checking. Not a
X * strong reason, but sufficient in the absence of others.
X * [Now we resolve ties in favor of the earlier string if
X * it happens that curback has been invalidated, since the
X * earlier string may buy us something the later one won't.]
X */
X longest = str_new(10);
X len = 0;
X curback = 0;
X while (scan != NULL) {
X if (OP(scan) == BRANCH) {
X if (OP(regnext(scan)) == BRANCH) {
X curback = -30000;
X while (OP(scan) == BRANCH)
X scan = regnext(scan);
X }
X else /* single branch is ok */
X scan = NEXTOPER(scan);
X }
X if (OP(scan) == EXACTLY) {
X if (curback - back == len) {
X str_cat(longest, OPERAND(scan)+1);
X len += *OPERAND(scan);
X curback += *OPERAND(scan);
X }
X else if (*OPERAND(scan) >= len + (curback >= 0)) {
X str_set(longest, OPERAND(scan)+1);
X len = *OPERAND(scan);
X back = curback;
X curback += len;
X }
X else
X curback += *OPERAND(scan);
X }
X else if (index(varies,OP(scan)))
X curback = -30000;
X else if (index(simple,OP(scan)))
X curback++;
X scan = regnext(scan);
X }
X if (len) {
X r->regmust = longest;
X if (back < 0)
X back = -1;
X r->regback = back;
X if (len > !(sawstudy))
X fbmcompile(r->regmust);
X *(long*)&r->regmust->str_nval = 100;
X#ifdef DEBUGGING
X if (debug & 512)
X fprintf(stderr,"must = '%s' back=%d\n",
X longest,back);
X#endif
X }
X else
X str_free(longest);
X }
X
X r->do_folding = fold;
X r->nparens = regnpar - 1;
X#ifdef DEBUG
X if (debug & 512)
X regdump(r);
X#endif
X return(r);
X}
X
X/*
X - reg - regular expression, i.e. main body or parenthesized thing
X *
X * Caller must absorb opening parenthesis.
X *
X * Combining parenthesis handling with the base level of regular expression
X * is a trifle forced, but the need to tie the tails of the branches to what
X * follows makes it hard to avoid.
X */
Xstatic char *
Xreg(paren, flagp)
Xint paren; /* Parenthesized? */
Xint *flagp;
X{
X register char *ret;
X register char *br;
X register char *ender;
X register int parno;
X int flags;
X
X *flagp = HASWIDTH; /* Tentatively. */
X
X /* Make an OPEN node, if parenthesized. */
X if (paren) {
X if (regnpar >= NSUBEXP)
X FAIL("too many () in regexp");
X parno = regnpar;
X regnpar++;
X ret = regnode(OPEN+parno);
X } else
X ret = NULL;
X
X /* Pick up the branches, linking them together. */
X br = regbranch(&flags);
X if (br == NULL)
X return(NULL);
X if (ret != NULL)
X regtail(ret, br); /* OPEN -> first. */
X else
X ret = br;
X if (!(flags&HASWIDTH))
X *flagp &= ~HASWIDTH;
X *flagp |= flags&SPSTART;
X while (*regparse == '|') {
X regparse++;
X br = regbranch(&flags);
X if (br == NULL)
X return(NULL);
X regtail(ret, br); /* BRANCH -> BRANCH. */
X if (!(flags&HASWIDTH))
X *flagp &= ~HASWIDTH;
X *flagp |= flags&SPSTART;
X }
X
X /* Make a closing node, and hook it on the end. */
X ender = regnode((paren) ? CLOSE+parno : END);
X regtail(ret, ender);
X
X /* Hook the tails of the branches to the closing node. */
X for (br = ret; br != NULL; br = regnext(br))
X regoptail(br, ender);
X
X /* Check for proper termination. */
X if (paren && *regparse++ != ')') {
X FAIL("unmatched () in regexp");
X } else if (!paren && *regparse != '\0') {
X if (*regparse == ')') {
X FAIL("unmatched () in regexp");
X } else
X FAIL("junk on end of regexp"); /* "Can't happen". */
X /* NOTREACHED */
X }
X
X return(ret);
X}
X
X/*
X - regbranch - one alternative of an | operator
X *
X * Implements the concatenation operator.
X */
Xstatic char *
Xregbranch(flagp)
Xint *flagp;
X{
X register char *ret;
X register char *chain;
X register char *latest;
X int flags;
X
X *flagp = WORST; /* Tentatively. */
X
X ret = regnode(BRANCH);
X chain = NULL;
X while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
X latest = regpiece(&flags);
X if (latest == NULL)
X return(NULL);
X *flagp |= flags&HASWIDTH;
X if (chain == NULL) /* First piece. */
X *flagp |= flags&SPSTART;
X else
X regtail(chain, latest);
X chain = latest;
X }
X if (chain == NULL) /* Loop ran zero times. */
X (void) regnode(NOTHING);
X
X return(ret);
X}
X
X/*
X - regpiece - something followed by possible [*+?]
X *
X * Note that the branching code sequences used for ? and the general cases
X * of * and + are somewhat optimized: they use the same NOTHING node as
X * both the endmarker for their branch list and the body of the last branch.
X * It might seem that this node could be dispensed with entirely, but the
X * endmarker role is not redundant.
X */
Xstatic char *
Xregpiece(flagp)
Xint *flagp;
X{
X register char *ret;
X register char op;
X register char *next;
X int flags;
X
X ret = regatom(&flags);
X if (ret == NULL)
X return(NULL);
X
X op = *regparse;
X if (!ISMULT(op)) {
X *flagp = flags;
X return(ret);
X }
X
X if (!(flags&HASWIDTH) && op != '?')
X FAIL("regexp *+ operand could be empty");
X *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
X
X if (op == '*' && (flags&SIMPLE))
X reginsert(STAR, ret);
X else if (op == '*') {
X /* Emit x* as (x&|), where & means "self". */
X reginsert(BRANCH, ret); /* Either x */
X regoptail(ret, regnode(BACK)); /* and loop */
X regoptail(ret, ret); /* back */
X regtail(ret, regnode(BRANCH)); /* or */
X regtail(ret, regnode(NOTHING)); /* null. */
X } else if (op == '+' && (flags&SIMPLE))
X reginsert(PLUS, ret);
X else if (op == '+') {
X /* Emit x+ as x(&|), where & means "self". */
X next = regnode(BRANCH); /* Either */
X regtail(ret, next);
X regtail(regnode(BACK), ret); /* loop back */
X regtail(next, regnode(BRANCH)); /* or */
X regtail(ret, regnode(NOTHING)); /* null. */
X } else if (op == '?') {
X /* Emit x? as (x|) */
X reginsert(BRANCH, ret); /* Either x */
X regtail(ret, regnode(BRANCH)); /* or */
X next = regnode(NOTHING); /* null. */
X regtail(ret, next);
X regoptail(ret, next);
X }
X regparse++;
X if (ISMULT(*regparse))
X FAIL("nested *?+ in regexp");
X
X return(ret);
X}
X
Xstatic int foo;
X
X/*
X - regatom - the lowest level
X *
X * Optimization: gobbles an entire sequence of ordinary characters so that
X * it can turn them into a single node, which is smaller to store and
X * faster to run. Backslashed characters are exceptions, each becoming a
X * separate node; the code is simpler that way and it's not worth fixing.
X *
X * [Yes, it is worth fixing, some scripts can run twice the speed.]
X */
Xstatic char *
Xregatom(flagp)
Xint *flagp;
X{
X register char *ret;
X int flags;
X
X *flagp = WORST; /* Tentatively. */
X
X switch (*regparse++) {
X case '^':
X ret = regnode(BOL);
X break;
X case '$':
X ret = regnode(EOL);
X break;
X case '.':
X ret = regnode(ANY);
X *flagp |= HASWIDTH|SIMPLE;
X break;
X case '[':
X ret = regclass();
X *flagp |= HASWIDTH|SIMPLE;
X break;
X case '(':
X ret = reg(1, &flags);
X if (ret == NULL)
X return(NULL);
X *flagp |= flags&(HASWIDTH|SPSTART);
X break;
X case '\0':
X case '|':
X case ')':
X FAIL("internal urp in regexp"); /* Supposed to be caught earlier. */
X break;
X case '?':
X case '+':
X case '*':
X FAIL("?+* follows nothing in regexp");
X break;
X case '\\':
X switch (*regparse) {
X case '\0':
X FAIL("trailing \\ in regexp");
X case 'w':
X ret = regnode(ALNUM);
X *flagp |= HASWIDTH|SIMPLE;
X regparse++;
X break;
X case 'W':
X ret = regnode(NALNUM);
X *flagp |= HASWIDTH|SIMPLE;
X regparse++;
X break;
X case 'b':
X ret = regnode(BOUND);
X *flagp |= SIMPLE;
X regparse++;
X break;
X case 'B':
X ret = regnode(NBOUND);
X *flagp |= SIMPLE;
X regparse++;
X break;
X case 's':
X ret = regnode(SPACE);
X *flagp |= HASWIDTH|SIMPLE;
X regparse++;
X break;
X case 'S':
X ret = regnode(NSPACE);
X *flagp |= HASWIDTH|SIMPLE;
X regparse++;
X break;
X case 'd':
X ret = regnode(DIGIT);
X *flagp |= HASWIDTH|SIMPLE;
X regparse++;
X break;
X case 'D':
X ret = regnode(NDIGIT);
X *flagp |= HASWIDTH|SIMPLE;
X regparse++;
X break;
X case 'n':
X case 'r':
X case 't':
X case 'f':
X goto defchar;
X case '0': case '1': case '2': case '3': case '4':
X case '5': case '6': case '7': case '8': case '9':
X if (isdigit(regparse[1]))
X goto defchar;
X else {
X ret = regnode(REF + *regparse++ - '0');
X *flagp |= SIMPLE;
X }
X break;
X default:
X goto defchar;
X }
X break;
X default: {
X register int len;
X register char ender;
X register char *p;
X char *oldp;
X int foo;
X
X defchar:
X ret = regnode(EXACTLY);
X regc(0); /* save spot for len */
X for (len=0, p=regparse-1; len < 127 && *p; len++) {
X oldp = p;
X switch (*p) {
X case '^':
X case '$':
X case '.':
X case '[':
X case '(':
X case ')':
X case '|':
X goto loopdone;
X case '\\':
X switch (*++p) {
X case '\0':
X FAIL("trailing \\ in regexp");
X case 'w':
X case 'W':
X case 'b':
X case 'B':
X case 's':
X case 'S':
X case 'd':
X case 'D':
X --p;
X goto loopdone;
X case 'n':
X ender = '\n';
X p++;
X break;
X case 'r':
X ender = '\r';
X p++;
X break;
X case 't':
X ender = '\t';
X p++;
X break;
X case 'f':
X ender = '\f';
X p++;
X break;
X case '0': case '1': case '2': case '3':case '4':
X case '5': case '6': case '7': case '8':case '9':
X if (isdigit(p[1])) {
X foo = *p++ - '0';
X foo <<= 3;
X foo += *p - '0';
X if (isdigit(p[1]))
X foo = (foo<<3) + *++p - '0';
X ender = foo;
X p++;
X }
X else {
X --p;
X goto loopdone;
X }
X break;
X default:
X ender = *p++;
X break;
X }
X break;
X default:
X ender = *p++;
X break;
X }
X if (regfold && isupper(ender))
X ender = tolower(ender);
X if (ISMULT(*p)) { /* Back off on ?+*. */
X if (len)
X p = oldp;
X else {
X len++;
X regc(ender);
X }
X break;
X }
X regc(ender);
X }
X loopdone:
X regparse = p;
X if (len <= 0)
X FAIL("internal disaster in regexp");
X *flagp |= HASWIDTH;
X if (len == 1)
X *flagp |= SIMPLE;
X *OPERAND(ret) = len;
X regc('\0');
X }
X break;
X }
X
X return(ret);
X}
X
X#ifdef FASTANY
Xstatic void
Xregset(bits,def,c)
Xchar *bits;
Xint def;
Xregister int c;
X{
X if (regcode == ®dummy)
X return;
X if (def)
X bits[c >> 3] &= ~(1 << (c & 7));
X else
X bits[c >> 3] |= (1 << (c & 7));
X}
X
Xstatic char *
Xregclass()
X{
X register char *bits;
X register int class;
X register int lastclass;
X register int range = 0;
X register char *ret;
X register int def;
X
X if (*regparse == '^') { /* Complement of range. */
X ret = regnode(ANYBUT);
X regparse++;
X def = 0;
X } else {
X ret = regnode(ANYOF);
X def = 255;
X }
X bits = regcode;
X for (class = 0; class < 32; class++)
X regc(def);
X if (*regparse == ']' || *regparse == '-')
X regset(bits,def,lastclass = *regparse++);
X while (*regparse != '\0' && *regparse != ']') {
X class = UCHARAT(regparse++);
X if (class == '\\') {
X class = UCHARAT(regparse++);
X switch (class) {
X case 'w':
X for (class = 'a'; class <= 'z'; class++)
X regset(bits,def,class);
X for (class = 'A'; class <= 'Z'; class++)
X regset(bits,def,class);
X for (class = '0'; class <= '9'; class++)
X regset(bits,def,class);
X regset(bits,def,'_');
X lastclass = 1234;
X continue;
X case 's':
X regset(bits,def,' ');
X regset(bits,def,'\t');
X regset(bits,def,'\r');
X regset(bits,def,'\f');
X regset(bits,def,'\n');
X lastclass = 1234;
X continue;
X case 'd':
X for (class = '0'; class <= '9'; class++)
X regset(bits,def,class);
X lastclass = 1234;
X continue;
X case 'n':
X class = '\n';
X break;
X case 'r':
X class = '\r';
X break;
X case 't':
X class = '\t';
X break;
X case 'f':
X class = '\f';
X break;
X case 'b':
X class = '\b';
X break;
X case '0': case '1': case '2': case '3': case '4':
X case '5': case '6': case '7': case '8': case '9':
X class -= '0';
X if (isdigit(*regparse)) {
X class <<= 3;
X class += *regparse++ - '0';
X }
X if (isdigit(*regparse)) {
X class <<= 3;
X class += *regparse++ - '0';
X }
X break;
X }
X }
X if (!range && class == '-' && *regparse && *regparse != ']') {
X range = 1;
X continue;
X }
X if (range) {
X if (lastclass > class)
X FAIL("invalid [] range in regexp");
X }
X else
X lastclass = class - 1;
X range = 0;
X for (lastclass++; lastclass <= class; lastclass++) {
X regset(bits,def,lastclass);
X if (regfold && isupper(lastclass))
X regset(bits,def,tolower(lastclass));
X }
X lastclass = class;
X }
X if (*regparse != ']')
X FAIL("unmatched [] in regexp");
X regset(bits,0,0); /* always bomb out on null */
X regparse++;
X return ret;
X}
X
X#else /* !FASTANY */
Xstatic char *
Xregclass()
X{
X register int class;
X register int lastclass;
X register int range = 0;
X register char *ret;
X
X if (*regparse == '^') { /* Complement of range. */
X ret = regnode(ANYBUT);
X regparse++;
X } else
X ret = regnode(ANYOF);
X if (*regparse == ']' || *regparse == '-')
X regc(lastclass = *regparse++);
X while (*regparse != '\0' && *regparse != ']') {
X class = UCHARAT(regparse++);
X if (class == '\\') {
X class = UCHARAT(regparse++);
X switch (class) {
X case 'w':
X for (class = 'a'; class <= 'z'; class++)
X regc(class);
X for (class = 'A'; class <= 'Z'; class++)
X regc(class);
X for (class = '0'; class <= '9'; class++)
X regc(class);
X regc('_');
X lastclass = 1234;
X continue;
X case 's':
X regc(' ');
X regc('\t');
X regc('\r');
X regc('\f');
X regc('\n');
X lastclass = 1234;
X continue;
X case 'd':
X for (class = '0'; class <= '9'; class++)
X regc(class);
X lastclass = 1234;
X continue;
X case 'n':
X class = '\n';
X break;
X case 'r':
X class = '\r';
X break;
X case 't':
X class = '\t';
X break;
X case 'f':
X class = '\f';
X break;
X case 'b':
X class = '\b';
X break;
X case '0': case '1': case '2': case '3': case '4':
X case '5': case '6': case '7': case '8': case '9':
X class -= '0';
X if (isdigit(*regparse)) {
X class <<= 3;
X class += *regparse++ - '0';
X }
X if (isdigit(*regparse)) {
X class <<= 3;
X class += *regparse++ - '0';
X }
X break;
X }
X }
X if (!range && class == '-' && *regparse && *regparse != ']') {
X range = 1;
X continue;
X }
X if (range) {
X if (lastclass > class)
X FAIL("invalid [] range in regexp");
X }
X else
X lastclass = class - 1;
X range = 0;
X for (lastclass++; lastclass <= class; lastclass++) {
X regc(lastclass);
X if (regfold && isupper(lastclass))
X regc(tolower(lastclass));
X }
X lastclass = class;
X }
X regc('\0');
X if (*regparse != ']')
X FAIL("unmatched [] in regexp");
X regparse++;
X return ret;
X}
X#endif /* NOTDEF */
X
Xstatic char *
Xregchar(ch,flagp)
Xint ch;
Xint *flagp;
X{
X char *ret;
X
X ret = regnode(EXACTLY);
X regc(1);
X regc(ch);
X regc('\0');
X regparse++;
X *flagp |= HASWIDTH|SIMPLE;
X return ret;
X}
X
X/*
X - regnode - emit a node
X */
Xstatic char * /* Location. */
Xregnode(op)
Xchar op;
X{
X register char *ret;
X register char *ptr;
X
X ret = regcode;
X if (ret == ®dummy) {
X#ifdef ALIGN
X if (!(regsize & 1))
X regsize++;
X#endif
X regsize += 3;
X return(ret);
X }
X
X#ifdef ALIGN
X if (!((long)ret & 1))
X *ret++ = 127;
X#endif
X ptr = ret;
X *ptr++ = op;
X *ptr++ = '\0'; /* Null "next" pointer. */
X *ptr++ = '\0';
X regcode = ptr;
X
X return(ret);
X}
X
X/*
X - regc - emit (if appropriate) a byte of code
X */
Xstatic void
Xregc(b)
Xchar b;
X{
X if (regcode != ®dummy)
X *regcode++ = b;
X else
X regsize++;
X}
X
X/*
X - reginsert - insert an operator in front of already-emitted operand
X *
X * Means relocating the operand.
X */
Xstatic void
Xreginsert(op, opnd)
Xchar op;
Xchar *opnd;
X{
X register char *src;
X register char *dst;
X register char *place;
X
X if (regcode == ®dummy) {
X#ifdef ALIGN
X regsize += 4;
X#else
X regsize += 3;
X#endif
X return;
X }
X
X src = regcode;
X#ifdef ALIGN
X regcode += 4;
X#else
X regcode += 3;
X#endif
X dst = regcode;
X while (src > opnd)
X *--dst = *--src;
X
X place = opnd; /* Op node, where operand used to be. */
X *place++ = op;
X *place++ = '\0';
X *place++ = '\0';
X}
X
X/*
X - regtail - set the next-pointer at the end of a node chain
X */
Xstatic void
Xregtail(p, val)
Xchar *p;
Xchar *val;
X{
X register char *scan;
X register char *temp;
X register int offset;
X
X if (p == ®dummy)
X return;
X
X /* Find last node. */
X scan = p;
X for (;;) {
X temp = regnext(scan);
X if (temp == NULL)
X break;
X scan = temp;
X }
X
X#ifdef ALIGN
X offset = val - scan;
X *(short*)(scan+1) = offset;
X#else
X if (OP(scan) == BACK)
X offset = scan - val;
X else
X offset = val - scan;
X *(scan+1) = (offset>>8)&0377;
X *(scan+2) = offset&0377;
X#endif
X}
X
X/*
X - regoptail - regtail on operand of first argument; nop if operandless
X */
Xstatic void
Xregoptail(p, val)
Xchar *p;
Xchar *val;
X{
X /* "Operandless" and "op != BRANCH" are synonymous in practice. */
X if (p == NULL || p == ®dummy || OP(p) != BRANCH)
X return;
X regtail(NEXTOPER(p), val);
X}
X
X/*
X * regexec and friends
X */
X
X/*
X * Global work variables for regexec().
X */
Xstatic char *reginput; /* String-input pointer. */
Xstatic char *regbol; /* Beginning of input, for ^ check. */
Xstatic char **regstartp; /* Pointer to startp array. */
Xstatic char **regendp; /* Ditto for endp. */
Xstatic char *reglastparen; /* Similarly for lastparen. */
Xstatic char *regtill;
X
Xstatic char *regmystartp[10]; /* For remembering backreferences. */
Xstatic char *regmyendp[10];
X
X/*
X * Forwards.
X */
XSTATIC int regtry();
XSTATIC int regmatch();
XSTATIC int regrepeat();
X
Xextern char sawampersand;
Xextern int multiline;
X
X/*
X - regexec - match a regexp against a string
X */
Xint
Xregexec(prog, stringarg, strend, beginning, minend, screamer)
Xregister regexp *prog;
Xchar *stringarg;
Xchar *strend; /* pointer to null at end of string */
Xint beginning; /* is ^ valid at the beginning of stringarg? */
Xint minend; /* end of match must be at least minend after stringarg */
XSTR *screamer;
X{
X register char *s;
X extern char *index();
X register int tmp, i;
X register char *string = stringarg;
X register char *c;
X extern char *savestr();
X
X /* Be paranoid... */
X if (prog == NULL || string == NULL) {
X fatal("NULL regexp parameter");
X return(0);
X }
X
X regprecomp = prog->precomp;
X /* Check validity of program. */
X if (UCHARAT(prog->program) != MAGIC) {
X FAIL("corrupted regexp program");
X }
X
X if (prog->do_folding) {
X i = strend - string;
X string = savestr(string);
X strend = string + i;
X for (s = string; *s; s++)
X if (isupper(*s))
X *s = tolower(*s);
X }
X
X /* If there is a "must appear" string, look for it. */
X s = string;
X if (prog->regmust != Nullstr) {
X if (beginning && screamer) {
X if (screamfirst[prog->regmust->str_rare] >= 0)
X s = screaminstr(screamer,prog->regmust);
X else
X s = Nullch;
X }
X else
X s = fbminstr(s,strend,prog->regmust);
X if (!s) {
X ++*(long*)&prog->regmust->str_nval; /* hooray */
X goto phooey; /* not present */
X }
X else if (prog->regback >= 0) {
X s -= prog->regback;
X if (s < string)
X s = string;
X }
X else if (--*(long*)&prog->regmust->str_nval < 0) { /* boo */
X str_free(prog->regmust);
X prog->regmust = Nullstr; /* disable regmust */
X s = string;
X }
X else
X s = string;
X }
X
X /* Mark beginning of line for ^ . */
X if (beginning)
X regbol = string;
X else
X regbol = NULL;
X
X /* see how far we have to get to not match where we matched before */
X regtill = string+minend;
X
X /* Simplest case: anchored match need be tried only once. */
X /* [unless multiline is set] */
X if (prog->reganch) {
X if (regtry(prog, string))
X goto got_it;
X else if (multiline) {
X /* for multiline we only have to try after newlines */
X if (s > string)
X s--;
X while ((s = index(s, '\n')) != NULL) {
X if (*++s && regtry(prog, s))
X goto got_it;
X }
X }
X goto phooey;
X }
X
X /* Messy cases: unanchored match. */
X if (prog->regstart) {
X /* We know what string it must start with. */
X if (prog->regstart->str_pok == 3) {
X while ((s = fbminstr(s, strend, prog->regstart)) != NULL) {
X if (regtry(prog, s))
X goto got_it;
X s++;
X }
X }
X else {
X c = prog->regstart->str_ptr;
X while ((s = instr(s, c)) != NULL) {
X if (regtry(prog, s))
X goto got_it;
X s++;
X }
X }
X }
X else if (c = prog->regstclass) {
X /* We know what class it must start with. */
X switch (OP(c)) {
X case ANYOF: case ANYBUT:
X c = OPERAND(c);
X while (i = *s) {
X if (!(c[i >> 3] & (1 << (i&7))))
X if (regtry(prog, s))
X goto got_it;
X s++;
X }
X break;
X case BOUND:
X tmp = 0;
X while (i = *s) {
X if (tmp != (isalpha(i) || isdigit(i) || i == '_')) {
X tmp = !tmp;
X if (regtry(prog, s))
X goto got_it;
X }
X s++;
X }
X if (tmp && regtry(prog,s))
X goto got_it;
X break;
X case NBOUND:
X tmp = 0;
X while (i = *s) {
X if (tmp != (isalpha(i) || isdigit(i) || i == '_'))
X tmp = !tmp;
X else if (regtry(prog, s))
X goto got_it;
X s++;
X }
X if (!tmp && regtry(prog,s))
X goto got_it;
X break;
X case ALNUM:
X while (i = *s) {
X if (isalpha(i) || isdigit(i) || i == '_')
X if (regtry(prog, s))
X goto got_it;
X s++;
X }
X break;
X case NALNUM:
X while (i = *s) {
X if (!isalpha(i) && !isdigit(i) && i != '_')
X if (regtry(prog, s))
X goto got_it;
X s++;
X }
X break;
X case SPACE:
X while (i = *s) {
X if (isspace(i))
X if (regtry(prog, s))
X goto got_it;
X s++;
X }
X break;
X case NSPACE:
X while (i = *s) {
X if (!isspace(i))
X if (regtry(prog, s))
X goto got_it;
X s++;
X }
X break;
X case DIGIT:
X while (i = *s) {
X if (isdigit(i))
X if (regtry(prog, s))
X goto got_it;
X s++;
X }
X break;
X case NDIGIT:
X while (i = *s) {
X if (!isdigit(i))
X if (regtry(prog, s))
X goto got_it;
X s++;
X }
X break;
X }
X }
X else
X /* We don't know much -- general case. */
X do {
X if (regtry(prog, s))
X goto got_it;
X } while (*s++ != '\0');
X
X /* Failure. */
X goto phooey;
X
X got_it:
X if (prog->nparens || sawampersand || prog->do_folding) {
X s = savestr(stringarg); /* so $digit will always work */
X if (prog->subbase)
X safefree(prog->subbase);
X prog->subbase = s;
X tmp = prog->subbase - string;
X for (i = 0; i <= prog->nparens; i++) {
X if (prog->endp[i]) {
X prog->startp[i] += tmp;
X prog->endp[i] += tmp;
X }
X }
X if (prog->do_folding) {
X safefree(string);
X }
X }
X return(1);
X
X phooey:
X if (prog->do_folding) {
X safefree(string);
X }
X return(0);
X}
X
X/*
X - regtry - try match at specific point
X */
Xstatic int /* 0 failure, 1 success */
Xregtry(prog, string)
Xregexp *prog;
Xchar *string;
X{
X register int i;
X register char **sp;
X register char **ep;
X
X reginput = string;
X regstartp = prog->startp;
X regendp = prog->endp;
X reglastparen = &prog->lastparen;
X
X sp = prog->startp;
X ep = prog->endp;
X if (prog->nparens) {
X for (i = NSUBEXP; i > 0; i--) {
X *sp++ = NULL;
X *ep++ = NULL;
X }
X }
X if (regmatch(prog->program + 1) && reginput >= regtill) {
X prog->startp[0] = string;
X prog->endp[0] = reginput;
X return(1);
X } else
X return(0);
X}
X
X/*
X - regmatch - main matching routine
X *
X * Conceptually the strategy is simple: check to see whether the current
X * node matches, call self recursively to see whether the rest matches,
X * and then act accordingly. In practice we make some effort to avoid
X * recursion, in particular by going through "ordinary" nodes (that don't
X * need to know whether the rest of the match failed) by a loop instead of
X * by recursion.
X */
X/* [lwall] I've hoisted the register declarations to the outer block in order to
X * maybe save a little bit of pushing and popping on the stack. It also takes
X * advantage of machines that use a register save mask on subroutine entry.
X */
Xstatic int /* 0 failure, 1 success */
Xregmatch(prog)
Xchar *prog;
X{
X register char *scan; /* Current node. */
X char *next; /* Next node. */
X extern char *index();
X register int nextchar;
X register int n; /* no or next */
X register int ln; /* len or last */
X register char *s; /* operand or save */
X register char *locinput = reginput;
X
X nextchar = *reginput;
X scan = prog;
X#ifdef DEBUG
X if (scan != NULL && regnarrate)
X fprintf(stderr, "%s(\n", regprop(scan));
X#endif
X while (scan != NULL) {
X#ifdef DEBUG
X if (regnarrate)
X fprintf(stderr, "%s...\n", regprop(scan));
X#endif
X
X#ifdef ALIGN
X next = scan + NEXT(scan);
X if (next == scan)
X next = NULL;
X#else
X next = regnext(scan);
X#endif
X
X switch (OP(scan)) {
X case BOL:
X if (locinput == regbol ||
X (nextchar && locinput[-1] == '\n') ) {
X regtill--;
X break;
X }
X return(0);
X case EOL:
X if (nextchar != '\0' && nextchar != '\n')
X return(0);
X regtill--;
X break;
X case ANY:
X if (nextchar == '\0' || nextchar == '\n')
X return(0);
X nextchar = *++locinput;
X break;
X case EXACTLY:
X s = OPERAND(scan);
X ln = *s++;
X /* Inline the first character, for speed. */
X if (*s != nextchar)
X return(0);
X if (ln > 1 && strncmp(s, locinput, ln) != 0)
X return(0);
X locinput += ln;
X nextchar = *locinput;
X break;
X case ANYOF:
X case ANYBUT:
X s = OPERAND(scan);
X if (nextchar < 0)
X nextchar = UCHARAT(locinput);
X if (s[nextchar >> 3] & (1 << (nextchar&7)))
X return(0);
X nextchar = *++locinput;
X break;
X#ifdef NOTDEF
X case ANYOF:
X if (nextchar == '\0' || index(OPERAND(scan), nextchar) == NULL)
X return(0);
X nextchar = *++locinput;
X break;
X case ANYBUT:
X if (nextchar == '\0' || index(OPERAND(scan), nextchar) != NULL)
X return(0);
X nextchar = *++locinput;
X break;
X#endif
X case ALNUM:
X if (!nextchar)
X return(0);
X if (!isalpha(nextchar) && !isdigit(nextchar) &&
X nextchar != '_')
X return(0);
X nextchar = *++locinput;
X break;
X case NALNUM:
X if (!nextchar)
X return(0);
X if (isalpha(nextchar) || isdigit(nextchar) ||
X nextchar == '_')
X return(0);
X nextchar = *++locinput;
X break;
X case NBOUND:
X case BOUND:
X if (locinput == regbol) /* was last char in word? */
X ln = 0;
X else
X ln = (isalpha(locinput[-1]) ||
X isdigit(locinput[-1]) ||
X locinput[-1] == '_' );
X n = (isalpha(nextchar) || isdigit(nextchar) ||
X nextchar == '_' ); /* is next char in word? */
X if ((ln == n) == (OP(scan) == BOUND))
X return(0);
X break;
X case SPACE:
X if (!nextchar)
X return(0);
X if (!isspace(nextchar))
X return(0);
X nextchar = *++locinput;
X break;
X case NSPACE:
X if (!nextchar)
X return(0);
X if (isspace(nextchar))
X return(0);
X nextchar = *++locinput;
X break;
X case DIGIT:
X if (!isdigit(nextchar))
X return(0);
X nextchar = *++locinput;
X break;
X case NDIGIT:
X if (!nextchar)
X return(0);
X if (isdigit(nextchar))
X return(0);
X nextchar = *++locinput;
X break;
X case REF:
X case REF+1:
X case REF+2:
X case REF+3:
X case REF+4:
X case REF+5:
X case REF+6:
X case REF+7:
X case REF+8:
X case REF+9:
X n = OP(scan) - REF;
X s = regmystartp[n];
X if (!s)
X return(0);
X if (!regmyendp[n])
X return(0);
X if (s == regmyendp[n])
X break;
X /* Inline the first character, for speed. */
X if (*s != nextchar)
X return(0);
X ln = regmyendp[n] - s;
X if (ln > 1 && strncmp(s, locinput, ln) != 0)
X return(0);
X locinput += ln;
X nextchar = *locinput;
X break;
X
X case NOTHING:
X break;
X case BACK:
X break;
X case OPEN+1:
X case OPEN+2:
X case OPEN+3:
X case OPEN+4:
X case OPEN+5:
X case OPEN+6:
X case OPEN+7:
X case OPEN+8:
X case OPEN+9:
X n = OP(scan) - OPEN;
X reginput = locinput;
X
X regmystartp[n] = locinput; /* for REF */
X if (regmatch(next)) {
X /*
X * Don't set startp if some later
X * invocation of the same parentheses
X * already has.
X */
X if (regstartp[n] == NULL)
X regstartp[n] = locinput;
X return(1);
X } else
X return(0);
X /* NOTREACHED */
X case CLOSE+1:
X case CLOSE+2:
X case CLOSE+3:
X case CLOSE+4:
X case CLOSE+5:
X case CLOSE+6:
X case CLOSE+7:
X case CLOSE+8:
X case CLOSE+9: {
X n = OP(scan) - CLOSE;
X reginput = locinput;
X
X regmyendp[n] = locinput; /* for REF */
X if (regmatch(next)) {
X /*
X * Don't set endp if some later
X * invocation of the same parentheses
X * already has.
X */
X if (regendp[n] == NULL) {
X regendp[n] = locinput;
X *reglastparen = n;
X }
X return(1);
X } else
X return(0);
X }
X break;
X case BRANCH: {
X if (OP(next) != BRANCH) /* No choice. */
X next = NEXTOPER(scan); /* Avoid recursion. */
X else {
X do {
X reginput = locinput;
X if (regmatch(NEXTOPER(scan)))
X return(1);
X#ifdef ALIGN
X if (n = NEXT(scan))
X scan += n;
X else
X scan = NULL;
X#else
X scan = regnext(scan);
X#endif
X } while (scan != NULL && OP(scan) == BRANCH);
X return(0);
X /* NOTREACHED */
X }
X }
X break;
X case STAR:
X case PLUS:
X /*
X * Lookahead to avoid useless match attempts
X * when we know what character comes next.
X */
X if (OP(next) == EXACTLY)
X nextchar = *(OPERAND(next)+1);
X else
X nextchar = '\0';
X ln = (OP(scan) == STAR) ? 0 : 1;
X reginput = locinput;
X n = regrepeat(NEXTOPER(scan));
X while (n >= ln) {
X /* If it could work, try it. */
X if (nextchar == '\0' || *reginput == nextchar)
X if (regmatch(next))
X return(1);
X /* Couldn't or didn't -- back up. */
X n--;
X reginput = locinput + n;
X }
X return(0);
X case END:
X reginput = locinput; /* put where regtry can find it */
X return(1); /* Success! */
X default:
X printf("%x %d\n",scan,scan[1]);
X FAIL("regexp memory corruption");
X }
X
X scan = next;
X }
X
X /*
X * We get here only if there's trouble -- normally "case END" is
X * the terminating point.
X */
X FAIL("corrupted regexp pointers");
X /*NOTREACHED*/
X}
X
X/*
X - regrepeat - repeatedly match something simple, report how many
X */
X/*
X * [This routine now assumes that it will only match on things of length 1.
X * That was true before, but now we assume scan - reginput is the count,
X * rather than incrementing count on every character.]
X */
Xstatic int
Xregrepeat(p)
Xchar *p;
X{
X register char *scan;
X register char *opnd;
X register int c;
X
X scan = reginput;
X opnd = OPERAND(p);
X switch (OP(p)) {
X case ANY:
X while (*scan && *scan != '\n')
X scan++;
X break;
X case EXACTLY: /* length of string is 1 */
X opnd++;
X while (*opnd == *scan)
X scan++;
X break;
X#ifdef FASTANY
X case ANYOF:
X case ANYBUT:
X c = UCHARAT(scan);
X while (!(opnd[c >> 3] & (1 << (c & 7)))) {
X scan++;
X c = UCHARAT(scan);
X }
X break;
X#else
X case ANYOF:
X while (*scan != '\0' && index(opnd, *scan) != NULL)
X scan++;
X break;
X case ANYBUT:
X while (*scan != '\0' && index(opnd, *scan) == NULL)
X scan++;
X break;
X#endif /* FASTANY */
X case ALNUM:
X while (*scan && (isalpha(*scan) || isdigit(*scan) ||
X *scan == '_'))
X scan++;
X break;
X case NALNUM:
X while (*scan && (!isalpha(*scan) && !isdigit(*scan) &&
X *scan != '_'))
X scan++;
X break;
X case SPACE:
X while (*scan && isspace(*scan))
X scan++;
X break;
X case NSPACE:
X while (*scan && !isspace(*scan))
X scan++;
X break;
X case DIGIT:
X while (*scan && isdigit(*scan))
X scan++;
X break;
X case NDIGIT:
X while (*scan && !isdigit(*scan))
X scan++;
X break;
X default: /* Oh dear. Called inappropriately. */
X FAIL("internal regexp foulup");
X /* NOTREACHED */
X }
X
X c = scan - reginput;
X reginput = scan;
X
X return(c);
X}
X
X/*
X - regnext - dig the "next" pointer out of a node
X *
X * [Note, when ALIGN is defined there are two places in regmatch() that bypass
X * this code for speed.]
X */
Xstatic char *
Xregnext(p)
Xregister char *p;
X{
X register int offset;
X
X if (p == ®dummy)
X return(NULL);
X
X offset = NEXT(p);
X if (offset == 0)
X return(NULL);
X
X#ifdef ALIGN
X return(p+offset);
X#else
X if (OP(p) == BACK)
X return(p-offset);
X else
X return(p+offset);
X#endif
X}
X
X#ifdef DEBUG
X
XSTATIC char *regprop();
X
X/*
X - regdump - dump a regexp onto stdout in vaguely comprehensible form
X */
Xvoid
Xregdump(r)
Xregexp *r;
X{
X register char *s;
X register char op = EXACTLY; /* Arbitrary non-END op. */
X register char *next;
X extern char *index();
X
X
X s = r->program + 1;
X while (op != END) { /* While that wasn't END last time... */
X#ifdef ALIGN
X if (!((long)s & 1))
X s++;
X#endif
X op = OP(s);
X printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */
X next = regnext(s);
X if (next == NULL) /* Next ptr. */
X printf("(0)");
X else
X printf("(%d)", (s-r->program)+(next-s));
X s += 3;
X if (op == ANYOF || op == ANYBUT) {
X s += 32;
X }
X if (op == EXACTLY) {
X /* Literal string, where present. */
X s++;
X while (*s != '\0') {
X putchar(*s);
X s++;
X }
X s++;
X }
X putchar('\n');
X }
X
X /* Header fields of interest. */
X if (r->regstart)
X printf("start `%s' ", r->regstart->str_ptr);
X if (r->regstclass)
X printf("stclass `%s' ", regprop(OP(r->regstclass)));
X if (r->reganch)
X printf("anchored ");
X if (r->regmust != NULL)
X printf("must have \"%s\" back %d ", r->regmust->str_ptr,
X r->regback);
X printf("\n");
X}
X
X/*
X - regprop - printable representation of opcode
X */
Xstatic char *
Xregprop(op)
Xchar *op;
X{
X register char *p;
X static char buf[50];
X
X (void) strcpy(buf, ":");
X
X switch (OP(op)) {
X case BOL:
X p = "BOL";
X break;
X case EOL:
X p = "EOL";
X break;
X case ANY:
X p = "ANY";
X break;
X case ANYOF:
X p = "ANYOF";
X break;
X case ANYBUT:
X p = "ANYBUT";
X break;
X case BRANCH:
X p = "BRANCH";
X break;
X case EXACTLY:
X p = "EXACTLY";
X break;
X case NOTHING:
X p = "NOTHING";
X break;
X case BACK:
X p = "BACK";
X break;
X case END:
X p = "END";
X break;
X case ALNUM:
X p = "ALNUM";
X break;
X case NALNUM:
X p = "NALNUM";
X break;
X case BOUND:
X p = "BOUND";
X break;
X case NBOUND:
X p = "NBOUND";
X break;
X case SPACE:
X p = "SPACE";
X break;
X case NSPACE:
X p = "NSPACE";
X break;
X case DIGIT:
X p = "DIGIT";
X break;
X case NDIGIT:
X p = "NDIGIT";
X break;
X case REF:
X case REF+1:
X case REF+2:
X case REF+3:
X case REF+4:
X case REF+5:
X case REF+6:
X case REF+7:
X case REF+8:
X case REF+9:
X sprintf(buf+strlen(buf), "REF%d", OP(op)-REF);
X p = NULL;
X break;
X case OPEN+1:
X case OPEN+2:
X case OPEN+3:
X case OPEN+4:
X case OPEN+5:
X case OPEN+6:
X case OPEN+7:
X case OPEN+8:
X case OPEN+9:
X sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
X p = NULL;
X break;
X case CLOSE+1:
X case CLOSE+2:
X case CLOSE+3:
X case CLOSE+4:
X case CLOSE+5:
X case CLOSE+6:
X case CLOSE+7:
X case CLOSE+8:
X case CLOSE+9:
X sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
X p = NULL;
X break;
X case STAR:
X p = "STAR";
X break;
X case PLUS:
X p = "PLUS";
X break;
X default:
X FAIL("corrupted regexp opcode");
X }
X if (p != NULL)
X (void) strcat(buf, p);
X return(buf);
X}
X#endif
X
X#ifdef NOTDEF
X/*
X * The following is provided for those people who do not have strcspn() in
X * their C libraries. They should get off their butts and do something
X * about it; at least one public-domain implementation of those (highly
X * useful) string routines has been published on Usenet.
X */
X#ifndef STRCSPN
X/*
X * strcspn - find length of initial segment of s1 consisting entirely
X * of characters not from s2
X */
X
Xstatic int
Xstrcspn(s1, s2)
Xchar *s1;
Xchar *s2;
X{
X register char *scan1;
X register char *scan2;
X register int count;
X
X count = 0;
X for (scan1 = s1; *scan1 != '\0'; scan1++) {
X for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */
X if (*scan1 == *scan2++)
X return(count);
X count++;
X }
X return(count);
X}
X#endif
X#endif /* NOTDEF */
X
Xregfree(r)
Xstruct regexp *r;
X{
X if (r->precomp)
X safefree(r->precomp);
X if (r->subbase)
X safefree(r->subbase);
X if (r->regmust)
X str_free(r->regmust);
X if (r->regstart)
X str_free(r->regstart);
X safefree((char*)r);
X}
!STUFFY!FUNK!
echo ""
echo "End of kit 2 (of 15)"
cat /dev/null >kit2isdone
run=''
config=''
for iskit in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
if test -f kit${iskit}isdone; then
run="$run $iskit"
else
todo="$todo $iskit"
fi
done
case $todo in
'')
echo "You have run all your kits. Please read README and then type Configure."
chmod 755 Configure
;;
*) echo "You have run$run."
echo "You still need to run$todo."
;;
esac
: Someone might mail this, so...
exit
--
Please send comp.sources.unix-related mail to rsalz at uunet.uu.net.
More information about the Comp.sources.unix
mailing list