v24i022: GNU Diff, version 1.15, Part07/08
Rich Salz
rsalz at uunet.uu.net
Tue Feb 26 08:14:31 AEST 1991
Submitted-by: Paul Eggert <eggert at twinsun.com>
Posting-number: Volume 24, Issue 22
Archive-name: gnudiff1.15/part07
#! /bin/sh
# This is a shell archive. Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file". To overwrite existing
# files, type "sh file -c". You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g.. If this archive is complete, you
# will see the following message at the end:
# "End of archive 7 (of 8)."
# Contents: regex.c1
# Wrapped by eggert at ata on Mon Jan 7 11:25:31 1991
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'regex.c1' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'regex.c1'\"
else
echo shar: Extracting \"'regex.c1'\" \(45472 characters\)
sed "s/^X//" >'regex.c1' <<'END_OF_FILE'
X/* Extended regular expression matching and search library.
X Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
X
X This program is free software; you can redistribute it and/or modify
X it under the terms of the GNU General Public License as published by
X the Free Software Foundation; either version 1, or (at your option)
X any later version.
X
X This program is distributed in the hope that it will be useful,
X but WITHOUT ANY WARRANTY; without even the implied warranty of
X MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
X GNU General Public License for more details.
X
X You should have received a copy of the GNU General Public License
X along with this program; if not, write to the Free Software
X Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
X
X
X/* To test, compile with -Dtest. This Dtestable feature turns this into
X a self-contained program which reads a pattern, describes how it
X compiles, then reads a string and searches for it.
X
X On the other hand, if you compile with both -Dtest and -Dcanned you
X can run some tests we've already thought of. */
X
X
X#ifdef emacs
X
X/* The `emacs' switch turns on certain special matching commands
X that make sense only in emacs. */
X
X#include "config.h"
X#include "lisp.h"
X#include "buffer.h"
X#include "syntax.h"
X
X#else /* not emacs */
X
X#if defined (USG) || defined (STDC_HEADERS)
X#ifndef BSTRING
X#include <string.h>
X#define bcopy(s,d,n) memcpy((d),(s),(n))
X#define bcmp(s1,s2,n) memcmp((s1),(s2),(n))
X#define bzero(s,n) memset((s),0,(n))
X#endif
X#endif
X
X#ifdef STDC_HEADERS
X#include <stdlib.h>
X#else
Xchar *malloc ();
Xchar *realloc ();
X#endif
X
X/* Make alloca work the best possible way. */
X#ifdef __GNUC__
X#define alloca __builtin_alloca
X#else
X#ifdef sparc
X#include <alloca.h>
X#else
Xchar *alloca ();
X#endif
X#endif
X
X
X/* Define the syntax stuff, so we can do the \<, \>, etc. */
X
X/* This must be nonzero for the wordchar and notwordchar pattern
X commands in re_match_2. */
X#ifndef Sword
X#define Sword 1
X#endif
X
X#define SYNTAX(c) re_syntax_table[c]
X
X
X#ifdef SYNTAX_TABLE
X
Xchar *re_syntax_table;
X
X#else /* not SYNTAX_TABLE */
X
Xstatic char re_syntax_table[256];
X
X
Xstatic void
Xinit_syntax_once ()
X{
X register int c;
X static int done = 0;
X
X if (done)
X return;
X
X bzero (re_syntax_table, sizeof re_syntax_table);
X
X for (c = 'a'; c <= 'z'; c++)
X re_syntax_table[c] = Sword;
X
X for (c = 'A'; c <= 'Z'; c++)
X re_syntax_table[c] = Sword;
X
X for (c = '0'; c <= '9'; c++)
X re_syntax_table[c] = Sword;
X
X done = 1;
X}
X
X#endif /* SYNTAX_TABLE */
X#endif /* emacs */
X
X/* We write fatal error messages on standard error. */
X#include <stdio.h>
X
X/* isalpha(3) etc. are used for the character classes. */
X#include <ctype.h>
X/* Sequents are missing isgraph. */
X#ifndef isgraph
X#define isgraph(c) (isprint((c)) && !isspace((c)))
X#endif
X
X/* Get the interface, including the syntax bits. */
X#include "regex.h"
X
X
X/* These are the command codes that appear in compiled regular
X expressions, one per byte. Some command codes are followed by
X argument bytes. A command code can specify any interpretation
X whatsoever for its arguments. Zero-bytes may appear in the compiled
X regular expression.
X
X The value of `exactn' is needed in search.c (search_buffer) in emacs.
X So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
X `exactn' we use here must also be 1. */
X
Xenum regexpcode
X {
X unused=0,
X exactn=1, /* Followed by one byte giving n, then by n literal bytes. */
X begline, /* Fail unless at beginning of line. */
X endline, /* Fail unless at end of line. */
X jump, /* Followed by two bytes giving relative address to jump to. */
X on_failure_jump, /* Followed by two bytes giving relative address of
X place to resume at in case of failure. */
X finalize_jump, /* Throw away latest failure point and then jump to
X address. */
X maybe_finalize_jump, /* Like jump but finalize if safe to do so.
X This is used to jump back to the beginning
X of a repeat. If the command that follows
X this jump is clearly incompatible with the
X one at the beginning of the repeat, such that
X we can be sure that there is no use backtracking
X out of repetitions already completed,
X then we finalize. */
X dummy_failure_jump, /* Jump, and push a dummy failure point. This
X failure point will be thrown away if an attempt
X is made to use it for a failure. A + construct
X makes this before the first repeat. Also
X use it as an intermediary kind of jump when
X compiling an or construct. */
X succeed_n, /* Used like on_failure_jump except has to succeed n times;
X then gets turned into an on_failure_jump. The relative
X address following it is useless until then. The
X address is followed by two bytes containing n. */
X jump_n, /* Similar to jump, but jump n times only; also the relative
X address following is in turn followed by yet two more bytes
X containing n. */
X set_number_at, /* Set the following relative location to the
X subsequent number. */
X anychar, /* Matches any (more or less) one character. */
X charset, /* Matches any one char belonging to specified set.
X First following byte is number of bitmap bytes.
X Then come bytes for a bitmap saying which chars are in.
X Bits in each byte are ordered low-bit-first.
X A character is in the set if its bit is 1.
X A character too large to have a bit in the map
X is automatically not in the set. */
X charset_not, /* Same parameters as charset, but match any character
X that is not one of those specified. */
X start_memory, /* Start remembering the text that is matched, for
X storing in a memory register. Followed by one
X byte containing the register number. Register numbers
X must be in the range 0 through RE_NREGS. */
X stop_memory, /* Stop remembering the text that is matched
X and store it in a memory register. Followed by
X one byte containing the register number. Register
X numbers must be in the range 0 through RE_NREGS. */
X duplicate, /* Match a duplicate of something remembered.
X Followed by one byte containing the index of the memory
X register. */
X before_dot, /* Succeeds if before point. */
X at_dot, /* Succeeds if at point. */
X after_dot, /* Succeeds if after point. */
X begbuf, /* Succeeds if at beginning of buffer. */
X endbuf, /* Succeeds if at end of buffer. */
X wordchar, /* Matches any word-constituent character. */
X notwordchar, /* Matches any char that is not a word-constituent. */
X wordbeg, /* Succeeds if at word beginning. */
X wordend, /* Succeeds if at word end. */
X wordbound, /* Succeeds if at a word boundary. */
X notwordbound,/* Succeeds if not at a word boundary. */
X syntaxspec, /* Matches any character whose syntax is specified.
X followed by a byte which contains a syntax code,
X e.g., Sword. */
X notsyntaxspec /* Matches any character whose syntax differs from
X that specified. */
X };
X
X
X/* Number of failure points to allocate space for initially,
X when matching. If this number is exceeded, more space is allocated,
X so it is not a hard limit. */
X
X#ifndef NFAILURES
X#define NFAILURES 80
X#endif
X
X#ifdef CHAR_UNSIGNED
X#define SIGN_EXTEND_CHAR(c) ((c)>(char)127?(c)-256:(c)) /* for IBM RT */
X#endif
X#ifndef SIGN_EXTEND_CHAR
X#define SIGN_EXTEND_CHAR(x) (x)
X#endif
X
X
X/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
X#define STORE_NUMBER(destination, number) \
X { (destination)[0] = (number) & 0377; \
X (destination)[1] = (number) >> 8; }
X
X/* Same as STORE_NUMBER, except increment the destination pointer to
X the byte after where the number is stored. Watch out that values for
X DESTINATION such as p + 1 won't work, whereas p will. */
X#define STORE_NUMBER_AND_INCR(destination, number) \
X { STORE_NUMBER(destination, number); \
X (destination) += 2; }
X
X
X/* Put into DESTINATION a number stored in two contingous bytes starting
X at SOURCE. */
X#define EXTRACT_NUMBER(destination, source) \
X { (destination) = *(source) & 0377; \
X (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; }
X
X/* Same as EXTRACT_NUMBER, except increment the pointer for source to
X point to second byte of SOURCE. Note that SOURCE has to be a value
X such as p, not, e.g., p + 1. */
X#define EXTRACT_NUMBER_AND_INCR(destination, source) \
X { EXTRACT_NUMBER (destination, source); \
X (source) += 2; }
X
X
X/* Specify the precise syntax of regexps for compilation. This provides
X for compatibility for various utilities which historically have
X different, incompatible syntaxes.
X
X The argument SYNTAX is a bit-mask comprised of the various bits
X defined in regex.h. */
X
Xint
Xre_set_syntax (syntax)
X int syntax;
X{
X int ret;
X
X ret = obscure_syntax;
X obscure_syntax = syntax;
X return ret;
X}
X
X/* Set by re_set_syntax to the current regexp syntax to recognize. */
Xint obscure_syntax = 0;
X
X
X
X/* Macros for re_compile_pattern, which is found below these definitions. */
X
X#define CHAR_CLASS_MAX_LENGTH 6
X
X/* Fetch the next character in the uncompiled pattern, translating it if
X necessary. */
X#define PATFETCH(c) \
X {if (p == pend) goto end_of_pattern; \
X c = * (unsigned char *) p++; \
X if (translate) c = translate[c]; }
X
X/* Fetch the next character in the uncompiled pattern, with no
X translation. */
X#define PATFETCH_RAW(c) \
X {if (p == pend) goto end_of_pattern; \
X c = * (unsigned char *) p++; }
X
X#define PATUNFETCH p--
X
X
X/* If the buffer isn't allocated when it comes in, use this. */
X#define INIT_BUF_SIZE 28
X
X/* Make sure we have at least N more bytes of space in buffer. */
X#define GET_BUFFER_SPACE(n) \
X { \
X while (b - bufp->buffer + (n) >= bufp->allocated) \
X EXTEND_BUFFER; \
X }
X
X/* Make sure we have one more byte of buffer space and then add CH to it. */
X#define BUFPUSH(ch) \
X { \
X GET_BUFFER_SPACE (1); \
X *b++ = (char) (ch); \
X }
X
X/* Extend the buffer by twice its current size via reallociation and
X reset the pointers that pointed into the old allocation to point to
X the correct places in the new allocation. If extending the buffer
X results in it being larger than 1 << 16, then flag memory exhausted. */
X#define EXTEND_BUFFER \
X { char *old_buffer = bufp->buffer; \
X if (bufp->allocated == (1L<<16)) goto too_big; \
X bufp->allocated *= 2; \
X if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \
X bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated); \
X if (bufp->buffer == 0) \
X goto memory_exhausted; \
X b = (b - old_buffer) + bufp->buffer; \
X if (fixup_jump) \
X fixup_jump = (fixup_jump - old_buffer) + bufp->buffer; \
X if (laststart) \
X laststart = (laststart - old_buffer) + bufp->buffer; \
X begalt = (begalt - old_buffer) + bufp->buffer; \
X if (pending_exact) \
X pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
X }
X
X/* Set the bit for character C in a character set list. */
X#define SET_LIST_BIT(c) (b[(c) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
X
X/* Get the next unsigned number in the uncompiled pattern. */
X#define GET_UNSIGNED_NUMBER(num) \
X { if (p != pend) \
X { \
X PATFETCH (c); \
X while (isdigit (c)) \
X { \
X if (num < 0) \
X num = 0; \
X num = num * 10 + c - '0'; \
X if (p == pend) \
X break; \
X PATFETCH (c); \
X } \
X } \
X }
X
X/* Subroutines for re_compile_pattern. */
Xstatic void store_jump (), insert_jump (), store_jump_n (),
X insert_jump_n (), insert_op_2 ();
X
X
X/* re_compile_pattern takes a regular-expression string
X and converts it into a buffer full of byte commands for matching.
X
X PATTERN is the address of the pattern string
X SIZE is the length of it.
X BUFP is a struct re_pattern_buffer * which points to the info
X on where to store the byte commands.
X This structure contains a char * which points to the
X actual space, which should have been obtained with malloc.
X re_compile_pattern may use realloc to grow the buffer space.
X
X The number of bytes of commands can be found out by looking in
X the `struct re_pattern_buffer' that bufp pointed to, after
X re_compile_pattern returns. */
X
Xchar *
Xre_compile_pattern (pattern, size, bufp)
X char *pattern;
X int size;
X struct re_pattern_buffer *bufp;
X{
X register char *b = bufp->buffer;
X register char *p = pattern;
X char *pend = pattern + size;
X register unsigned c, c1;
X char *p1;
X unsigned char *translate = (unsigned char *) bufp->translate;
X
X /* Address of the count-byte of the most recently inserted `exactn'
X command. This makes it possible to tell whether a new exact-match
X character can be added to that command or requires a new `exactn'
X command. */
X
X char *pending_exact = 0;
X
X /* Address of the place where a forward-jump should go to the end of
X the containing expression. Each alternative of an `or', except the
X last, ends with a forward-jump of this sort. */
X
X char *fixup_jump = 0;
X
X /* Address of start of the most recently finished expression.
X This tells postfix * where to find the start of its operand. */
X
X char *laststart = 0;
X
X /* In processing a repeat, 1 means zero matches is allowed. */
X
X char zero_times_ok;
X
X /* In processing a repeat, 1 means many matches is allowed. */
X
X char many_times_ok;
X
X /* Address of beginning of regexp, or inside of last \(. */
X
X char *begalt = b;
X
X /* In processing an interval, at least this many matches must be made. */
X int lower_bound;
X
X /* In processing an interval, at most this many matches can be made. */
X int upper_bound;
X
X /* Place in pattern (i.e., the {) to which to go back if the interval
X is invalid. */
X char *beg_interval = 0;
X
X /* Stack of information saved by \( and restored by \).
X Four stack elements are pushed by each \(:
X First, the value of b.
X Second, the value of fixup_jump.
X Third, the value of regnum.
X Fourth, the value of begalt. */
X
X int stackb[40];
X int *stackp = stackb;
X int *stacke = stackb + 40;
X int *stackt;
X
X /* Counts \('s as they are encountered. Remembered for the matching \),
X where it becomes the register number to put in the stop_memory
X command. */
X
X int regnum = 1;
X
X bufp->fastmap_accurate = 0;
X
X#ifndef emacs
X#ifndef SYNTAX_TABLE
X /* Initialize the syntax table. */
X init_syntax_once();
X#endif
X#endif
X
X if (bufp->allocated == 0)
X {
X bufp->allocated = INIT_BUF_SIZE;
X if (bufp->buffer)
X /* EXTEND_BUFFER loses when bufp->allocated is 0. */
X bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE);
X else
X /* Caller did not allocate a buffer. Do it for them. */
X bufp->buffer = (char *) malloc (INIT_BUF_SIZE);
X if (!bufp->buffer) goto memory_exhausted;
X begalt = b = bufp->buffer;
X }
X
X while (p != pend)
X {
X PATFETCH (c);
X
X switch (c)
X {
X case '$':
X {
X char *p1 = p;
X /* When testing what follows the $,
X look past the \-constructs that don't consume anything. */
X if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
X while (p1 != pend)
X {
X if (*p1 == '\\' && p1 + 1 != pend
X && (p1[1] == '<' || p1[1] == '>'
X || p1[1] == '`' || p1[1] == '\''
X#ifdef emacs
X || p1[1] == '='
X#endif
X || p1[1] == 'b' || p1[1] == 'B'))
X p1 += 2;
X else
X break;
X }
X if (obscure_syntax & RE_TIGHT_VBAR)
X {
X if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend)
X goto normal_char;
X /* Make operand of last vbar end before this `$'. */
X if (fixup_jump)
X store_jump (fixup_jump, jump, b);
X fixup_jump = 0;
X BUFPUSH (endline);
X break;
X }
X /* $ means succeed if at end of line, but only in special contexts.
X If validly in the middle of a pattern, it is a normal character. */
X
X if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend)
X goto invalid_pattern;
X if (p1 == pend || *p1 == '\n'
X || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
X || (obscure_syntax & RE_NO_BK_PARENS
X ? *p1 == ')'
X : *p1 == '\\' && p1[1] == ')')
X || (obscure_syntax & RE_NO_BK_VBAR
X ? *p1 == '|'
X : *p1 == '\\' && p1[1] == '|'))
X {
X BUFPUSH (endline);
X break;
X }
X goto normal_char;
X }
X case '^':
X /* ^ means succeed if at beg of line, but only if no preceding
X pattern. */
X
X if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart)
X goto invalid_pattern;
X if (laststart && p - 2 >= pattern && p[-2] != '\n'
X && !(obscure_syntax & RE_CONTEXT_INDEP_OPS))
X goto normal_char;
X if (obscure_syntax & RE_TIGHT_VBAR)
X {
X if (p != pattern + 1
X && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
X goto normal_char;
X BUFPUSH (begline);
X begalt = b;
X }
X else
X BUFPUSH (begline);
X break;
X
X case '+':
X case '?':
X if ((obscure_syntax & RE_BK_PLUS_QM)
X || (obscure_syntax & RE_LIMITED_OPS))
X goto normal_char;
X handle_plus:
X case '*':
X /* If there is no previous pattern, char not special. */
X if (!laststart)
X {
X if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
X goto invalid_pattern;
X else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
X goto normal_char;
X }
X /* If there is a sequence of repetition chars,
X collapse it down to just one. */
X zero_times_ok = 0;
X many_times_ok = 0;
X while (1)
X {
X zero_times_ok |= c != '+';
X many_times_ok |= c != '?';
X if (p == pend)
X break;
X PATFETCH (c);
X if (c == '*')
X ;
X else if (!(obscure_syntax & RE_BK_PLUS_QM)
X && (c == '+' || c == '?'))
X ;
X else if ((obscure_syntax & RE_BK_PLUS_QM)
X && c == '\\')
X {
X int c1;
X PATFETCH (c1);
X if (!(c1 == '+' || c1 == '?'))
X {
X PATUNFETCH;
X PATUNFETCH;
X break;
X }
X c = c1;
X }
X else
X {
X PATUNFETCH;
X break;
X }
X }
X
X /* Star, etc. applied to an empty pattern is equivalent
X to an empty pattern. */
X if (!laststart)
X break;
X
X /* Now we know whether or not zero matches is allowed
X and also whether or not two or more matches is allowed. */
X if (many_times_ok)
X {
X /* If more than one repetition is allowed, put in at the
X end a backward relative jump from b to before the next
X jump we're going to put in below (which jumps from
X laststart to after this jump). */
X GET_BUFFER_SPACE (3);
X store_jump (b, maybe_finalize_jump, laststart - 3);
X b += 3; /* Because store_jump put stuff here. */
X }
X /* On failure, jump from laststart to b + 3, which will be the
X end of the buffer after this jump is inserted. */
X GET_BUFFER_SPACE (3);
X insert_jump (on_failure_jump, laststart, b + 3, b);
X pending_exact = 0;
X b += 3;
X if (!zero_times_ok)
X {
X /* At least one repetition is required, so insert a
X dummy-failure before the initial on-failure-jump
X instruction of the loop. This effects a skip over that
X instruction the first time we hit that loop. */
X GET_BUFFER_SPACE (6);
X insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
X b += 3;
X }
X break;
X
X case '.':
X laststart = b;
X BUFPUSH (anychar);
X break;
X
X case '[':
X if (p == pend)
X goto invalid_pattern;
X while (b - bufp->buffer
X > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
X EXTEND_BUFFER;
X
X laststart = b;
X if (*p == '^')
X {
X BUFPUSH (charset_not);
X p++;
X }
X else
X BUFPUSH (charset);
X p1 = p;
X
X BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
X /* Clear the whole map */
X bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
X
X if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not)
X SET_LIST_BIT ('\n');
X
X
X /* Read in characters and ranges, setting map bits. */
X while (1)
X {
X PATFETCH (c);
X
X /* If set, \ escapes characters when inside [...]. */
X if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\')
X {
X PATFETCH(c1);
X SET_LIST_BIT (c1);
X continue;
X }
X if (c == ']')
X {
X if (p == p1 + 1)
X {
X /* If this is an empty bracket expression. */
X if ((obscure_syntax & RE_NO_EMPTY_BRACKETS)
X && p == pend)
X goto invalid_pattern;
X }
X else
X /* Stop if this isn't merely a ] inside a bracket
X expression, but rather the end of a bracket
X expression. */
X break;
X }
X /* Get a range. */
X if (p[0] == '-' && p[1] != ']')
X {
X PATFETCH (c1);
X PATFETCH (c1);
X
X if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1)
X goto invalid_pattern;
X
X if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END)
X && c1 == '-' && *p != ']')
X goto invalid_pattern;
X
X while (c <= c1)
X {
X SET_LIST_BIT (c);
X c++;
X }
X }
X else if ((obscure_syntax & RE_CHAR_CLASSES)
X && c == '[' && p[0] == ':')
X {
X /* Longest valid character class word has six characters. */
X char str[CHAR_CLASS_MAX_LENGTH];
X PATFETCH (c);
X c1 = 0;
X /* If no ] at end. */
X if (p == pend)
X goto invalid_pattern;
X while (1)
X {
X /* Don't translate the ``character class'' characters. */
X PATFETCH_RAW (c);
X if (c == ':' || c == ']' || p == pend
X || c1 == CHAR_CLASS_MAX_LENGTH)
X break;
X str[c1++] = c;
X }
X str[c1] = '\0';
X if (p == pend
X || c == ']' /* End of the bracket expression. */
X || p[0] != ']'
X || p + 1 == pend
X || (strcmp (str, "alpha") != 0
X && strcmp (str, "upper") != 0
X && strcmp (str, "lower") != 0
X && strcmp (str, "digit") != 0
X && strcmp (str, "alnum") != 0
X && strcmp (str, "xdigit") != 0
X && strcmp (str, "space") != 0
X && strcmp (str, "print") != 0
X && strcmp (str, "punct") != 0
X && strcmp (str, "graph") != 0
X && strcmp (str, "cntrl") != 0))
X {
X /* Undo the ending character, the letters, and leave
X the leading : and [ (but set bits for them). */
X c1++;
X while (c1--)
X PATUNFETCH;
X SET_LIST_BIT ('[');
X SET_LIST_BIT (':');
X }
X else
X {
X /* The ] at the end of the character class. */
X PATFETCH (c);
X if (c != ']')
X goto invalid_pattern;
X for (c = 0; c < (1 << BYTEWIDTH); c++)
X {
X if ((strcmp (str, "alpha") == 0 && isalpha (c))
X || (strcmp (str, "upper") == 0 && isupper (c))
X || (strcmp (str, "lower") == 0 && islower (c))
X || (strcmp (str, "digit") == 0 && isdigit (c))
X || (strcmp (str, "alnum") == 0 && isalnum (c))
X || (strcmp (str, "xdigit") == 0 && isxdigit (c))
X || (strcmp (str, "space") == 0 && isspace (c))
X || (strcmp (str, "print") == 0 && isprint (c))
X || (strcmp (str, "punct") == 0 && ispunct (c))
X || (strcmp (str, "graph") == 0 && isgraph (c))
X || (strcmp (str, "cntrl") == 0 && iscntrl (c)))
X SET_LIST_BIT (c);
X }
X }
X }
X else
X SET_LIST_BIT (c);
X }
X
X /* Discard any character set/class bitmap bytes that are all
X 0 at the end of the map. Decrement the map-length byte too. */
X while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
X b[-1]--;
X b += b[-1];
X break;
X
X case '(':
X if (! (obscure_syntax & RE_NO_BK_PARENS))
X goto normal_char;
X else
X goto handle_open;
X
X case ')':
X if (! (obscure_syntax & RE_NO_BK_PARENS))
X goto normal_char;
X else
X goto handle_close;
X
X case '\n':
X if (! (obscure_syntax & RE_NEWLINE_OR))
X goto normal_char;
X else
X goto handle_bar;
X
X case '|':
X if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
X && (! laststart || p == pend))
X goto invalid_pattern;
X else if (! (obscure_syntax & RE_NO_BK_VBAR))
X goto normal_char;
X else
X goto handle_bar;
X
X case '{':
X if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
X && (obscure_syntax & RE_INTERVALS)))
X goto normal_char;
X else
X goto handle_interval;
X
X case '\\':
X if (p == pend) goto invalid_pattern;
X PATFETCH_RAW (c);
X switch (c)
X {
X case '(':
X if (obscure_syntax & RE_NO_BK_PARENS)
X goto normal_backsl;
X handle_open:
X if (stackp == stacke) goto nesting_too_deep;
X
X /* Laststart should point to the start_memory that we are about
X to push (unless the pattern has RE_NREGS or more ('s). */
X *stackp++ = b - bufp->buffer;
X if (regnum < RE_NREGS)
X {
X BUFPUSH (start_memory);
X BUFPUSH (regnum);
X }
X *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
X *stackp++ = regnum++;
X *stackp++ = begalt - bufp->buffer;
X fixup_jump = 0;
X laststart = 0;
X begalt = b;
X break;
X
X case ')':
X if (obscure_syntax & RE_NO_BK_PARENS)
X goto normal_backsl;
X handle_close:
X if (stackp == stackb) goto unmatched_close;
X begalt = *--stackp + bufp->buffer;
X if (fixup_jump)
X store_jump (fixup_jump, jump, b);
X if (stackp[-1] < RE_NREGS)
X {
X BUFPUSH (stop_memory);
X BUFPUSH (stackp[-1]);
X }
X stackp -= 2;
X fixup_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
X laststart = *--stackp + bufp->buffer;
X break;
X
X case '|':
X if ((obscure_syntax & RE_LIMITED_OPS)
X || (obscure_syntax & RE_NO_BK_VBAR))
X goto normal_backsl;
X handle_bar:
X if (obscure_syntax & RE_LIMITED_OPS)
X goto normal_char;
X /* Insert before the previous alternative a jump which
X jumps to this alternative if the former fails. */
X GET_BUFFER_SPACE (6);
X insert_jump (on_failure_jump, begalt, b + 6, b);
X pending_exact = 0;
X b += 3;
X /* The alternative before the previous alternative has a
X jump after it which gets executed if it gets matched.
X Adjust that jump so it will jump to the previous
X alternative's analogous jump (put in below, which in
X turn will jump to the next (if any) alternative's such
X jump, etc.). The last such jump jumps to the correct
X final destination. */
X if (fixup_jump)
X store_jump (fixup_jump, jump, b);
X
X /* Leave space for a jump after previous alternative---to be
X filled in later. */
X fixup_jump = b;
X b += 3;
X
X laststart = 0;
X begalt = b;
X break;
X
X case '{':
X if (! (obscure_syntax & RE_INTERVALS)
X /* Let \{ be a literal. */
X || ((obscure_syntax & RE_INTERVALS)
X && (obscure_syntax & RE_NO_BK_CURLY_BRACES))
X /* If it's the string "\{". */
X || (p - 2 == pattern && p == pend))
X goto normal_backsl;
X handle_interval:
X beg_interval = p - 1; /* The {. */
X /* If there is no previous pattern, this isn't an interval. */
X if (!laststart)
X {
X if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
X goto invalid_pattern;
X else
X goto normal_backsl;
X }
X /* It also isn't an interval if not preceded by an re
X matching a single character or subexpression, or if
X the current type of intervals can't handle back
X references and the previous thing is a back reference. */
X if (! (*laststart == anychar
X || *laststart == charset
X || *laststart == charset_not
X || *laststart == start_memory
X || (*laststart == exactn && laststart[1] == 1)
X || (! (obscure_syntax & RE_NO_BK_REFS)
X && *laststart == duplicate)))
X {
X if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
X goto normal_char;
X
X /* Posix extended syntax is handled in previous
X statement; this is for Posix basic syntax. */
X if (obscure_syntax & RE_INTERVALS)
X goto invalid_pattern;
X
X goto normal_backsl;
X }
X lower_bound = -1; /* So can see if are set. */
X upper_bound = -1;
X GET_UNSIGNED_NUMBER (lower_bound);
X if (c == ',')
X {
X GET_UNSIGNED_NUMBER (upper_bound);
X if (upper_bound < 0)
X upper_bound = RE_DUP_MAX;
X }
X if (upper_bound < 0)
X upper_bound = lower_bound;
X if (! (obscure_syntax & RE_NO_BK_CURLY_BRACES))
X {
X if (c != '\\')
X goto invalid_pattern;
X PATFETCH (c);
X }
X if (c != '}' || lower_bound < 0 || upper_bound > RE_DUP_MAX
X || lower_bound > upper_bound
X || ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
X && p != pend && *p == '{'))
X {
X if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
X goto unfetch_interval;
X else
X goto invalid_pattern;
X }
X
X /* If upper_bound is zero, don't want to succeed at all;
X jump from laststart to b + 3, which will be the end of
X the buffer after this jump is inserted. */
X
X if (upper_bound == 0)
X {
X GET_BUFFER_SPACE (3);
X insert_jump (jump, laststart, b + 3, b);
X b += 3;
X }
X
X /* Otherwise, after lower_bound number of succeeds, jump
X to after the jump_n which will be inserted at the end
X of the buffer, and insert that jump_n. */
X else
X { /* Set to 5 if only one repetition is allowed and
X hence no jump_n is inserted at the current end of
X the buffer; then only space for the succeed_n is
X needed. Otherwise, need space for both the
X succeed_n and the jump_n. */
X
X unsigned slots_needed = upper_bound == 1 ? 5 : 10;
X
X GET_BUFFER_SPACE (slots_needed);
X /* Initialize the succeed_n to n, even though it will
X be set by its attendant set_number_at, because
X re_compile_fastmap will need to know it. Jump to
X what the end of buffer will be after inserting
X this succeed_n and possibly appending a jump_n. */
X insert_jump_n (succeed_n, laststart, b + slots_needed,
X b, lower_bound);
X b += 5; /* Just increment for the succeed_n here. */
X
X /* More than one repetition is allowed, so put in at
X the end of the buffer a backward jump from b to the
X succeed_n we put in above. By the time we've gotten
X to this jump when matching, we'll have matched once
X already, so jump back only upper_bound - 1 times. */
X
X if (upper_bound > 1)
X {
X store_jump_n (b, jump_n, laststart, upper_bound - 1);
X b += 5;
X /* When hit this when matching, reset the
X preceding jump_n's n to upper_bound - 1. */
X BUFPUSH (set_number_at);
X GET_BUFFER_SPACE (2);
X STORE_NUMBER_AND_INCR (b, -5);
X STORE_NUMBER_AND_INCR (b, upper_bound - 1);
X }
X /* When hit this when matching, set the succeed_n's n. */
X GET_BUFFER_SPACE (5);
X insert_op_2 (set_number_at, laststart, b, 5, lower_bound);
X b += 5;
X }
X pending_exact = 0;
X beg_interval = 0;
X break;
X
X
X unfetch_interval:
X /* If an invalid interval, match the characters as literals. */
X if (beg_interval)
X p = beg_interval;
X else
X {
X fprintf (stderr,
X "regex: no interval beginning to which to backtrack.\n");
X exit (1);
X }
X
X beg_interval = 0;
X PATFETCH (c); /* normal_char expects char in `c'. */
X goto normal_char;
X break;
X
X#ifdef emacs
X case '=':
X BUFPUSH (at_dot);
X break;
X
X case 's':
X laststart = b;
X BUFPUSH (syntaxspec);
X PATFETCH (c);
X BUFPUSH (syntax_spec_code[c]);
X break;
X
X case 'S':
X laststart = b;
X BUFPUSH (notsyntaxspec);
X PATFETCH (c);
X BUFPUSH (syntax_spec_code[c]);
X break;
X#endif /* emacs */
X
X case 'w':
X laststart = b;
X BUFPUSH (wordchar);
X break;
X
X case 'W':
X laststart = b;
X BUFPUSH (notwordchar);
X break;
X
X case '<':
X BUFPUSH (wordbeg);
X break;
X
X case '>':
X BUFPUSH (wordend);
X break;
X
X case 'b':
X BUFPUSH (wordbound);
X break;
X
X case 'B':
X BUFPUSH (notwordbound);
X break;
X
X case '`':
X BUFPUSH (begbuf);
X break;
X
X case '\'':
X BUFPUSH (endbuf);
X break;
X
X case '1':
X case '2':
X case '3':
X case '4':
X case '5':
X case '6':
X case '7':
X case '8':
X case '9':
X if (obscure_syntax & RE_NO_BK_REFS)
X goto normal_char;
X c1 = c - '0';
X if (c1 >= regnum)
X {
X if (obscure_syntax & RE_NO_EMPTY_BK_REF)
X goto invalid_pattern;
X else
X goto normal_char;
X }
X /* Can't back reference to a subexpression if inside of it. */
X for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
X if (*stackt == c1)
X goto normal_char;
X laststart = b;
X BUFPUSH (duplicate);
X BUFPUSH (c1);
X break;
X
X case '+':
X case '?':
X if (obscure_syntax & RE_BK_PLUS_QM)
X goto handle_plus;
X else
X goto normal_backsl;
X break;
X
X default:
X normal_backsl:
X /* You might think it would be useful for \ to mean
X not to translate; but if we don't translate it
X it will never match anything. */
X if (translate) c = translate[c];
X goto normal_char;
X }
X break;
X
X default:
X normal_char: /* Expects the character in `c'. */
X if (!pending_exact || pending_exact + *pending_exact + 1 != b
X || *pending_exact == 0177 || *p == '*' || *p == '^'
X || ((obscure_syntax & RE_BK_PLUS_QM)
X ? *p == '\\' && (p[1] == '+' || p[1] == '?')
X : (*p == '+' || *p == '?'))
X || ((obscure_syntax & RE_INTERVALS)
X && ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
X ? *p == '{'
X : (p[0] == '\\' && p[1] == '{'))))
X {
X laststart = b;
X BUFPUSH (exactn);
X pending_exact = b;
X BUFPUSH (0);
X }
X BUFPUSH (c);
X (*pending_exact)++;
X }
X }
X
X if (fixup_jump)
X store_jump (fixup_jump, jump, b);
X
X if (stackp != stackb) goto unmatched_open;
X
X bufp->used = b - bufp->buffer;
X return 0;
X
X invalid_pattern:
X return "Invalid regular expression";
X
X unmatched_open:
X return "Unmatched \\(";
X
X unmatched_close:
X return "Unmatched \\)";
X
X end_of_pattern:
X return "Premature end of regular expression";
X
X nesting_too_deep:
X return "Nesting too deep";
X
X too_big:
X return "Regular expression too big";
X
X memory_exhausted:
X return "Memory exhausted";
X}
X
X
X/* Store a jump of the form <OPCODE> <relative address>.
X Store in the location FROM a jump operation to jump to relative
X address FROM - TO. OPCODE is the opcode to store. */
X
Xstatic void
Xstore_jump (from, opcode, to)
X char *from, *to;
X char opcode;
X{
X from[0] = opcode;
X STORE_NUMBER(from + 1, to - (from + 3));
X}
X
X
X/* Open up space before char FROM, and insert there a jump to TO.
X CURRENT_END gives the end of the storage not in use, so we know
X how much data to copy up. OP is the opcode of the jump to insert.
X
X If you call this function, you must zero out pending_exact. */
X
Xstatic void
Xinsert_jump (op, from, to, current_end)
X char op;
X char *from, *to, *current_end;
X{
X register char *pfrom = current_end; /* Copy from here... */
X register char *pto = current_end + 3; /* ...to here. */
X
X while (pfrom != from)
X *--pto = *--pfrom;
X store_jump (from, op, to);
X}
X
X
X/* Store a jump of the form <opcode> <relative address> <n> .
X
X Store in the location FROM a jump operation to jump to relative
X address FROM - TO. OPCODE is the opcode to store, N is a number the
X jump uses, say, to decide how many times to jump.
X
X If you call this function, you must zero out pending_exact. */
X
Xstatic void
Xstore_jump_n (from, opcode, to, n)
X char *from, *to;
X char opcode;
X unsigned n;
X{
X from[0] = opcode;
X STORE_NUMBER (from + 1, to - (from + 3));
X STORE_NUMBER (from + 3, n);
X}
X
X
X/* Similar to insert_jump, but handles a jump which needs an extra
X number to handle minimum and maximum cases. Open up space at
X location FROM, and insert there a jump to TO. CURRENT_END gives the
X end of the storage in use, so we know how much data to copy up. OP is
X the opcode of the jump to insert.
X
X If you call this function, you must zero out pending_exact. */
X
Xstatic void
Xinsert_jump_n (op, from, to, current_end, n)
X char op;
X char *from, *to, *current_end;
X unsigned n;
X{
X register char *pfrom = current_end; /* Copy from here... */
X register char *pto = current_end + 5; /* ...to here. */
X
X while (pfrom != from)
X *--pto = *--pfrom;
X store_jump_n (from, op, to, n);
X}
X
X
X/* Open up space at location THERE, and insert operation OP followed by
X NUM_1 and NUM_2. CURRENT_END gives the end of the storage in use, so
X we know how much data to copy up.
X
X If you call this function, you must zero out pending_exact. */
X
Xstatic void
Xinsert_op_2 (op, there, current_end, num_1, num_2)
X char op;
X char *there, *current_end;
X int num_1, num_2;
X{
X register char *pfrom = current_end; /* Copy from here... */
X register char *pto = current_end + 5; /* ...to here. */
X
X while (pfrom != there)
X *--pto = *--pfrom;
X
X there[0] = op;
X STORE_NUMBER (there + 1, num_1);
X STORE_NUMBER (there + 3, num_2);
X}
X
X
X
X/* Given a pattern, compute a fastmap from it. The fastmap records
X which of the (1 << BYTEWIDTH) possible characters can start a string
X that matches the pattern. This fastmap is used by re_search to skip
X quickly over totally implausible text.
X
X The caller must supply the address of a (1 << BYTEWIDTH)-byte data
X area as bufp->fastmap.
X The other components of bufp describe the pattern to be used. */
X
Xvoid
Xre_compile_fastmap (bufp)
X struct re_pattern_buffer *bufp;
X{
X unsigned char *pattern = (unsigned char *) bufp->buffer;
X int size = bufp->used;
X register char *fastmap = bufp->fastmap;
X register unsigned char *p = pattern;
X register unsigned char *pend = pattern + size;
X register int j, k;
X unsigned char *translate = (unsigned char *) bufp->translate;
X
X unsigned char *stackb[NFAILURES];
X unsigned char **stackp = stackb;
X
X unsigned is_a_succeed_n;
X
X bzero (fastmap, (1 << BYTEWIDTH));
X bufp->fastmap_accurate = 1;
X bufp->can_be_null = 0;
X
X while (p)
X {
X is_a_succeed_n = 0;
X if (p == pend)
X {
X bufp->can_be_null = 1;
X break;
X }
X#ifdef SWITCH_ENUM_BUG
X switch ((int) ((enum regexpcode) *p++))
X#else
X switch ((enum regexpcode) *p++)
X#endif
X {
X case exactn:
X if (translate)
X fastmap[translate[p[1]]] = 1;
X else
X fastmap[p[1]] = 1;
X break;
X
X case begline:
X case before_dot:
X case at_dot:
X case after_dot:
X case begbuf:
X case endbuf:
X case wordbound:
X case notwordbound:
X case wordbeg:
X case wordend:
X continue;
X
X case endline:
X if (translate)
X fastmap[translate['\n']] = 1;
X else
X fastmap['\n'] = 1;
X
X if (bufp->can_be_null != 1)
X bufp->can_be_null = 2;
X break;
X
X case jump_n:
X case finalize_jump:
X case maybe_finalize_jump:
X case jump:
X case dummy_failure_jump:
X EXTRACT_NUMBER_AND_INCR (j, p);
X p += j;
X if (j > 0)
X continue;
X /* Jump backward reached implies we just went through
X the body of a loop and matched nothing.
X Opcode jumped to should be an on_failure_jump.
X Just treat it like an ordinary jump.
X For a * loop, it has pushed its failure point already;
X If so, discard that as redundant. */
X
X if ((enum regexpcode) *p != on_failure_jump
X && (enum regexpcode) *p != succeed_n)
X continue;
X p++;
X EXTRACT_NUMBER_AND_INCR (j, p);
X p += j;
X if (stackp != stackb && *stackp == p)
X stackp--;
X continue;
X
X case on_failure_jump:
X handle_on_failure_jump:
X EXTRACT_NUMBER_AND_INCR (j, p);
X *++stackp = p + j;
X if (is_a_succeed_n)
X EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
X continue;
X
X case succeed_n:
X is_a_succeed_n = 1;
X /* Get to the number of times to succeed. */
X p += 2;
X /* Increment p past the n for when k != 0. */
X EXTRACT_NUMBER_AND_INCR (k, p);
X if (k == 0)
X {
X p -= 4;
X goto handle_on_failure_jump;
X }
X continue;
X
X case set_number_at:
X p += 4;
X continue;
X
X case start_memory:
X case stop_memory:
X p++;
X continue;
X
X case duplicate:
X bufp->can_be_null = 1;
X fastmap['\n'] = 1;
X case anychar:
X for (j = 0; j < (1 << BYTEWIDTH); j++)
X if (j != '\n')
X fastmap[j] = 1;
X if (bufp->can_be_null)
X return;
X /* Don't return; check the alternative paths
X so we can set can_be_null if appropriate. */
X break;
X
X case wordchar:
X for (j = 0; j < (1 << BYTEWIDTH); j++)
X if (SYNTAX (j) == Sword)
X fastmap[j] = 1;
X break;
X
X case notwordchar:
X for (j = 0; j < (1 << BYTEWIDTH); j++)
X if (SYNTAX (j) != Sword)
X fastmap[j] = 1;
X break;
X
X#ifdef emacs
X case syntaxspec:
X k = *p++;
X for (j = 0; j < (1 << BYTEWIDTH); j++)
X if (SYNTAX (j) == (enum syntaxcode) k)
X fastmap[j] = 1;
X break;
X
X case notsyntaxspec:
X k = *p++;
X for (j = 0; j < (1 << BYTEWIDTH); j++)
X if (SYNTAX (j) != (enum syntaxcode) k)
X fastmap[j] = 1;
X break;
X#endif /* not emacs */
X
X case charset:
X for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
X if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
X {
X if (translate)
X fastmap[translate[j]] = 1;
X else
X fastmap[j] = 1;
X }
X break;
X
X case charset_not:
X /* Chars beyond end of map must be allowed */
X for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
X if (translate)
X fastmap[translate[j]] = 1;
X else
X fastmap[j] = 1;
X
X for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
X if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
X {
X if (translate)
X fastmap[translate[j]] = 1;
X else
X fastmap[j] = 1;
X }
X break;
X }
X
X /* Get here means we have successfully found the possible starting
X characters of one path of the pattern. We need not follow this
X path any farther. Instead, look at the next alternative
X remembered in the stack. */
X if (stackp != stackb)
X p = *stackp--;
X else
X break;
X }
X}
END_OF_FILE
if test 45472 -ne `wc -c <'regex.c1'`; then
echo shar: \"'regex.c1'\" unpacked with wrong size!
fi
# end of 'regex.c1'
fi
if test -r regex.c1 -a -r regex.c2
then
echo shar: concatenating \"regex.c1\" and \"regex.c2\" into \"regex.c\"
cat regex.c1 regex.c2 >regex.c || echo shar: \"regex.c\" creation failed!
fi
echo shar: End of archive 7 \(of 8\).
cp /dev/null ark7isdone
MISSING=""
for I in 1 2 3 4 5 6 7 8 ; do
if test ! -f ark${I}isdone ; then
MISSING="${MISSING} ${I}"
fi
done
if test "${MISSING}" = "" ; then
echo You have unpacked all 8 archives.
rm -f ark[1-9]isdone
else
echo You still need to unpack the following archives:
echo " " ${MISSING}
fi
## End of shell archive.
exit 0
exit 0 # Just in case...
--
Please send comp.sources.unix-related mail to rsalz at uunet.uu.net.
Use a domain-based address or give alternate paths, or you may lose out.
More information about the Comp.sources.unix
mailing list