v07i030: AWL -- layout language for widget hierarchies, Part15/17
Paul Vixie
vixie at wrl.dec.com
Mon May 7 05:03:36 AEST 1990
Submitted-by: vixie at wrl.dec.com (Paul Vixie)
Posting-number: Volume 7, Issue 30
Archive-name: awl/part15
#! /bin/sh
# This is a shell archive. Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file". To overwrite existing
# files, type "sh file -c". You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g.. If this archive is complete, you
# will see the following message at the end:
# "End of archive 15 (of 17)."
# Contents: strsed.c
# Wrapped by vixie at jove.pa.dec.com on Mon Apr 30 01:25:27 1990
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'strsed.c' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'strsed.c'\"
else
echo shar: Extracting \"'strsed.c'\" \(41443 characters\)
sed "s/^X//" >'strsed.c' <<'END_OF_FILE'
X#ifndef lint
static char *rcsid = "$Header: /usr/src/local/awl/RCS/strsed.c,v 2.4 90/04/19 20:08:03 jkh Exp $";
X#endif lint
X
X/*
X * Strsed.c
X *
X * ed(1)/tr(1)-like search, replace, transliterate. See the
X * manpage for details. See the README for copyright information.
X *
X * Usage:
X *
X * strsed(string, pattern, 0);
X * char *string;
X * char *pattern;
X * or
X * strsed(string, pattern, range);
X * char *string;
X * char *pattern;
X * int range[2];
X *
X *
X * Terry Jones
X * terry at distel.pcs.com
X * ...!{pyramid,unido}!pcsbst!distel!terry
X *
X * PCS Computer Systeme GmbH
X * Pfaelzer-Wald-Str 36
X * 8000 Muenchen 90
X * West Germany 49-89-68004288
X *
X * January 8th, 1990.
X *
X */
X
X/*
X * $Log: strsed.c,v $
X * Revision 2.4 90/04/19 20:08:03 jkh
X * Alpha checkin.
X *
X * Revision 2.4 90/04/19 15:40:38 terry
X * Made it possible to use any delimiter. E.g. s/ex/on/ is the same as
X * s.ex.on.
X * Fixed trailing backslash bugger. Made range always contain something
X * after search and replace to indicate if a substitute actually occurred.
X *
X * Revision 2.3 90/04/17 19:36:12 terry
X * Made realloc ok too....
X *
X * Revision 2.2 90/04/17 19:27:02 terry
X * Did things to make malloc() and free() calls more portable.
X *
X * Revision 2.1 90/04/15 18:06:09 terry
X * Added changes suggested by John B. Thiel. Added empty regs and empty
X * regexp structs.
X *
X * Revision 2.0 90/04/09 16:06:19 terry
X * Added dollops of #ifdef's to deal with Henry Spencer or
X * GNU regex packages. Also added optimisation that saves the
X * last compiled pattern. All seems to work fine except the
X * register reference inside a regex with the HS stuff.
X *
X * Revision 1.19 90/04/09 11:57:01 terry
X * little things.
X *
X * Revision 1.17 90/03/08 20:44:32 terry
X * Final cleanup.
X *
X * Revision 1.16 90/03/07 15:46:35 terry
X * Changed backslash_eliminate to only malloc on
X * REPLACEMENT type. Added ".*" optimisation so that
X * the regex functions are never called.
X *
X * Revision 1.15 90/03/06 22:27:49 terry
X * Removed varargs stuff since the 3rd argument is now
X * compulsory. Cleaned up. A few comments even.
X *
X * Revision 1.14 90/03/06 21:50:28 terry
X * Touched up memory stuff. Added mem_find(). Changed
X * buf_sz and buf_inc to be a reasonable refelection
X * of the length of the input.
X *
X * Revision 1.13 90/03/06 20:22:48 terry
X * Major rearrangements. Added mem(), mem_init(), mem_save(),
X * mem_free() to handle memory in a vastly improved fashion.
X * Calls to malloc are minimised as far as possible.
X *
X * Revision 1.12 90/03/06 13:23:33 terry
X * Made map static.
X *
X * Revision 1.11 90/01/10 15:51:12 terry
X * checked in with -k by terry at 90.01.18.20.03.08.
X *
X * Revision 1.11 90/01/10 15:51:12 terry
X * *** empty log message ***
X *
X * Revision 1.10 90/01/10 12:48:40 terry
X * Fixed handling of perverted character ranges in nextch().
X * a-f-c now means a-c.
X *
X * Revision 1.9 90/01/10 12:03:48 terry
X * Pounded on space allocation, added more_space,
X * remove free() in build_map, tested tiny buffer sizes etc.
X *
X * Revision 1.8 90/01/09 18:15:12 terry
X * added backslash elimination to str.
X * altered backslash_elimantion to take one of three types
X * REGEX, NORMAL or REPLACEMENT depending on the
X * elimination desired. Changed interpretation of \
X * followed by a single digit to be that character if the
X * type of elimination is NORMAL. i.e. \4 = ^D.
X *
X * Revision 1.7 90/01/09 17:05:05 terry
X * Frozen version for release to comp.sources.unix
X *
X * Revision 1.6 90/01/09 16:47:54 terry
X * Altered pure searching return values to be -1
X *
X * Revision 1.5 90/01/09 14:54:34 terry
X * *** empty log message ***
X *
X * Revision 1.4 90/01/09 14:51:04 terry
X * removed #include <stdio> silliness.
X *
X * Revision 1.2 90/01/09 10:48:22 terry
X * Fixed handling of } and - metacharacters inside
X * transliteration request strings in backslash_eliminate().
X *
X * Revision 1.1 90/01/08 17:41:35 terry
X * Initial revision
X *
X *
X */
X
X#include <ctype.h>
X#include <string.h>
X
X#ifdef GNU_REGEX
X#include "regex.h"
X#endif
X
X#ifdef HS_REGEX
X#include "regexp.h"
X#endif
X
X#define BYTEWIDTH 8
X#define REGEX 0
X#define REPLACEMENT 1
X#define NORMAL 2
X
X/*
X * And this is supposed to make freeing easier. It's a little hard to
X * keep track of what can and cannot be freed in what follows, so I
X * ignore it and every time a malloc is done for one of the things
X * below (and these are the only ones possible) we free if need be and
X * then alloc some more if it can't be avoided. No-one (who is going
X * to free) needs to call malloc then. And no-one need call free.
X * Wonderful in theory...
X */
X
X#define MEM_STR 0
X#define MEM_PAT 1
X#define MEM_FROM 2
X#define MEM_TO 3
X#define MEM_NEWSTR 4
X#define MEM_MAP 5
X#define MEM_MAP_SAVE 6
X
X#define MEM_SLOTS 7
X
X/*
X * This calls mem_free(), which free()s all the allocated storage EXCEPT
X * for the piece whose address is 'n'. If something goes wrong below
X * we call RETURN(0) and if we want to return some address we call RETURN
X * with the address to be returned.
X */
X
X#define RETURN(n) \
X mem_free(n); \
X return (char *)n
X
static struct {
X char *s;
X int size;
X int used;
X} mem_slots[MEM_SLOTS];
X
X
X#define more_space(need) \
X if (need > 0 && space != -1){ \
X if (space - (need) < 0){ \
X buf_sz += buf_inc + (need) - space; \
X if (!(new_str = (char *)realloc(new_str, (unsigned)buf_sz))){ \
X RETURN(0); \
X } \
X mem_slots[MEM_NEWSTR].s = new_str; \
X mem_slots[MEM_NEWSTR].size = buf_sz; \
X space = buf_inc; \
X } \
X else{ \
X space -= need; \
X } \
X }
X
X#ifdef GNU_REGEX
X#define NO_MATCH -1
X#define EMPTY_REGISTER -1
X#endif
X#ifdef HS_REGEX
X#define NO_MATCH 0
X#define EMPTY_REGISTER ((char *)0)
X#endif
X
char *
strsed(string, pattern, range)
register char *string;
register char *pattern;
int *range;
X{
X extern char *realloc();
X extern char *strdup();
X extern void free();
X
X#ifdef GNU_REGEX
X extern char *re_compile_pattern();
X extern int re_search();
X static struct re_pattern_buffer re_comp_buf;
X struct re_registers regs;
X static struct re_registers empty_regs;
X#endif
X
X#ifdef HS_REGEX
X static regexp empty_exp;
X static regexp *exp;
X#endif
X
X char *backslash_eliminate();
X FWRD_STATIC char *mem();
X FWRD_STATIC void mem_init();
X FWRD_STATIC void mem_free();
X
X char *from;
X char *new_str;
X char *pat;
X char *str;
X char *tmp;
X char *to;
X static char map[1 << BYTEWIDTH];
X int buf_sz;
X int buf_inc;
X int global = 0;
X int match;
X int new_pos = 0;
X int search_only = 0;
X int seenbs = 0;
X int space;
X int match_all = 0;
X register int str_len;
X static int first_time = 1;
X static char *last_exp = (char *)0;
X int repeat;
X char delimiter;
X
X if (!string || !pattern){
X RETURN(0);
X }
X
X /*
X * If this is the first time we've been called, clear the memory slots.
X */
X if (first_time){
X register int i;
X mem_init();
X#ifdef GNU_REGEX
X /* Zero the fake regs that we use if the regex is ".*" */
X for (i = 0; i < RE_NREGS; i++){
X empty_regs.start[i] = empty_regs.end[i] = EMPTY_REGISTER;
X }
X#endif
X
X#ifdef HS_REGEX
X /* We use first_time again if we are GNU_REGEX, and reset it later. */
X first_time = 0;
X
X /* Zero the fake regexp that we use if the regex is ".*" */
X for (i = 0; i < NSUBEXP; i++){
X empty_exp.startp[i] = empty_exp.endp[i] = EMPTY_REGISTER;
X }
X#endif
X }
X
X /*
X * Take our own copies of the string and pattern since we promised
X * in the man page not to hurt the originals.
X */
X str = mem(MEM_STR, strlen(string) + 1);
X str[0] = '\0';
X strcat(str, string);
X pat = mem(MEM_PAT, strlen(pattern) + 1);
X pat[0] = '\0';
X strcat(pat, pattern);
X
X /*
X * If escape sequences are not already removed elsewhere, remove
X * them from the string. If you don't know what you're doing here
X * or are in any doubt, don't define ESCAPED_STRING.
X */
X#ifndef ESCAPED_STRING
X if (!(str = backslash_eliminate(str, NORMAL, MEM_STR))){
X RETURN(0);
X }
X#endif
X
X str_len = strlen(str);
X
X /*
X * Set up the size of our buffer (in which we build the
X * newstring, and the size by which we increment it when
X * (and if) the need arises. There shouldn't be too much
X * growth in the average case. Of course some people will
X * go and do things like
X *
X * strsed(string, "s/.*$/\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0")
X *
X * and they will be somewhat penalised. Oh well.
X *
X */
X
X buf_sz = str_len < 8 ? 16 : str_len << 1;
X buf_inc = buf_sz;
X
X /*
X * Get the action.
X * s = substitue and g = global.
X * anything else is invalid.
X *
X * If one of these is present, the next char is the delimiter.
X * Otherwise the character is taken as the delimiter itself.
X * This is more flexible, for example the following are all
X * legal:
X *
X * s/pinto/bean/
X * /pinto/bean
X * /pinto/
X * g/pinto/bean/
X *
X */
X switch (*pat){
X case 'g':{
X global = 1;
X pat++;
X break;
X }
X case 's':{
X pat++;
X break;
X }
X default:{
X break;
X }
X }
X
X if (!*pat){
X RETURN(0);
X }
X
X delimiter = *pat++;
X
X /*
X * Now split 'pat' into its two components. These are delimited (or
X * should be) by (unquoted) 'delimiter'. The first we point to with 'from'
X * and the second with 'to'.
X *
X * Someone should write a function to make this sort of thing trivial...
X *
X */
X
X from = to = pat;
X
X while (*to){
X if (seenbs){
X seenbs = 0;
X }
X else{
X if (*to == '\\'){
X seenbs = 1;
X }
X else if (*to == delimiter){
X break;
X }
X }
X to++;
X }
X
X if (!*to){
X RETURN(0);
X }
X
X *to++ = '\0';
X
X if (*to){
X tmp = to + strlen(to) - 1;
X
X /*
X * Make sure that the last character is the delimiter,
X * and wasn't preceded by \.
X *
X */
X
X if (*tmp != delimiter || *(tmp - 1) == '\\'){
X RETURN(0);
X }
X
X *tmp = '\0';
X }
X else{
X /*
X * Search only.
X * It doesn't make sense to say
X *
X * strsed(string, "g/abc/", range)
X *
X * because we are only searching and returning the
X * matched indexes. So turn off global (in case it's on)
X * so that we will return just the first instance.
X *
X * If no range has been given either, then there's no
X * point in going on.
X *
X */
X
X if (!range){
X RETURN(0);
X }
X
X global = 0;
X search_only = 1;
X }
X
X /*
X * Eliminate backslashes and character ranges etc.
X * Check that 'to' is a non-empty string before bothering
X * to try and eliminate things.
X *
X */
X
X if (!(from = backslash_eliminate(from, REGEX, MEM_FROM))){
X RETURN(0);
X }
X if (to && !(to = backslash_eliminate(to, REPLACEMENT, MEM_TO))){
X RETURN(0);
X }
X
X /*
X * If the first char of 'to' is '\0' then we are deleting or
X * searching only. We don't have to worry about space since
X * the transformed string will be less than or equal in length
X * to the original. We just overwrite.
X * We set space = -1 so that later on we can avoid worrying
X * about overflow etc.
X *
X * Otherwise, we are doing a substitution. Here we have to
X * worry about space because the replacement may be larger
X * than the original. malloc some room and if we overflow it
X * later we will realloc. Slows things down if the new string
X * turns out to be too much bigger. Oh well.
X *
X */
X
X if (*to){
X if (!(new_str = mem(MEM_NEWSTR, buf_sz + 1))){
X RETURN(0);
X }
X space = buf_sz;
X }
X else{
X new_str = str;
X space = -1;
X }
X
X
X /*
X * Check to see if the regexp is the same as last time.
X * If so, we can save ourselves a call to regexec (or whatever
X * function your regex package uses).
X *
X */
X
X if (last_exp){
X if (!strcmp(from, last_exp)){
X repeat = 1;
X }
X else{
X free(last_exp);
X last_exp = strdup(from);
X repeat = 0;
X }
X }
X else {
X last_exp = strdup(from);
X repeat = 0;
X }
X
X /*
X * Initialise the range integers to -1, since they may be checked after we
X * return, even if we are not just searching.
X */
X if (range){
X range[0] = range[1] = -1;
X }
X
X /*
X * Check for the special case where the regex is ".*" since
X * then we can save a call to compile and to match, since we
X * know what will happen. We can just fake it.
X *
X */
X
X if (from[0] == '.' && from[1] == '*' && from[2] == '\0'){
X register int i;
X
X match_all = 1;
X
X /*
X * For safety's sake, clear out the register values.
X * There might be a register reference in the replacement.
X * There will be nothing in the registers (since the search
X * pattern was ".*"). Since we aren't calling the regex
X * stuff we can't rely on it to set these to -1 (or 0 - as the
X * case may be).
X */
X#ifdef GNU_REGEX
X regs = empty_regs;
X#endif
X#ifdef HS_REGEX
X exp = &empty_exp;
X#endif
X }
X
X
X#ifdef GNU_REGEX
X /*
X * Do the first_time check for GNU. Notice the "else" here. We don't
X * want to do this if the regex is ".*", even if it is our first time.
X */
X else{
X if (first_time){
X extern char *malloc();
X if (!(re_comp_buf.buffer = (char *)malloc((unsigned)200))){
X RETURN(0);
X }
X
X re_comp_buf.allocated = 200;
X
X if (!(re_comp_buf.fastmap = (char *)malloc((unsigned)1 << BYTEWIDTH))){
X RETURN(0);
X }
X first_time = 0;
X }
X
X if (!repeat){
X re_comp_buf.translate = 0;
X re_comp_buf.used = 0;
X }
X }
X#endif
X
X /*
X * If we are not optimising a ".*" or repeating the regex we had last time,
X * compile the regular expression.
X */
X
X if (!match_all && !repeat){
X#ifdef GNU_REGEX
X if (re_compile_pattern(from, strlen(from), &re_comp_buf)){
X RETURN(0);
X }
X#endif
X
X#ifdef HS_REGEX
X if ((exp = regcomp(from)) == (regexp *)0){
X RETURN(0);
X }
X#endif
X }
X
X /*
X * Now get on with the matching/replacing etc.
X */
X
X do {
X if (match_all){
X /* Fake a match instead of calling re_search() or regexec(). */
X match = 1;
X#ifdef GNU_REGEX
X regs.start[0] = 0;
X regs.end[0] = str_len;
X#endif
X#ifdef HS_REGEX
X exp->startp[0] = str;
X exp->endp[0] = str + str_len;
X#endif
X }
X else{
X#ifdef GNU_REGEX
X match = re_search(&re_comp_buf, str, str_len, 0, str_len, ®s);
X#endif
X#ifdef HS_REGEX
X match = regexec(exp, str);
X#endif
X }
X
X if (search_only){
X /*
X * Show what happened and return.
X */
X
X#ifdef GNU_REGEX
X range[0] = match == NO_MATCH ? -1 : regs.start[0];
X range[1] = match == NO_MATCH ? -1 : regs.end[0];
X#endif
X#ifdef HS_REGEX
X range[0] = match == NO_MATCH ? -1 : (int)(exp->startp[0] - str);
X range[1] = match == NO_MATCH ? -1 : (int)(exp->endp[0] - str);
X#endif
X RETURN(str);
X }
X
X
X if (match != NO_MATCH){
X register int need;
X
X /* Set up the range so it can be used later if the caller wants it. */
X if (range){
X#ifdef GNU_REGEX
X range[0] = regs.start[0];
X range[1] = regs.end[0];
X#endif
X#ifdef HS_REGEX
X range[0] = (int)(exp->startp[0] - str);
X range[1] = (int)(exp->endp[0] - str);
X#endif
X }
X
X /*
X * Copy that portion that was not matched. It will
X * be unchanged in the output string.
X *
X */
X
X#ifdef GNU_REGEX
X need = regs.start[0];
X#endif
X#ifdef HS_REGEX
X need = (int)(exp->startp[0] - str);
X#endif
X
X if (need > 0){
X more_space(need);
X strncpy(new_str + new_pos, str, need);
X new_pos += need;
X }
X
X /*
X * Put in the replacement text (if any).
X * We substitute the contents of 'to', watching for register
X * references.
X */
X
X tmp = to;
X while (*tmp){
X if (*tmp == '\\' && isdigit(*(tmp + 1))){
X
X /* A register reference. */
X
X register int reg = *(tmp + 1) - '0';
X int translit = 0;
X#ifdef GNU_REGEX
X need = regs.end[reg] - regs.start[reg];
X#endif
X#ifdef HS_REGEX
X need = (int)(exp->endp[reg] - exp->startp[reg]);
X#endif
X
X /*
X * Check for a transliteration request.
X *
X */
X if (*(tmp + 2) == '{'){
X /* A transliteration table. Build the map. */
X FWRD_STATIC char *build_map();
X if (!(tmp = build_map(tmp + 2, map))){
X RETURN(0);
X }
X translit = 1;
X }
X else{
X tmp += 2;
X translit = 0;
X }
X
X more_space(need);
X
X /*
X * Copy in the register contents (if it matched), transliterating if need be.
X *
X */
X#ifdef GNU_REGEX
X if (regs.start[reg] != EMPTY_REGISTER){
X register int i;
X for (i = regs.start[reg]; i < regs.end[reg]; i++){
X new_str[new_pos++] = translit ? map[str[i]] : str[i];
X }
X }
X#endif
X
X#ifdef HS_REGEX
X if (exp->startp[reg] != EMPTY_REGISTER){
X register char *s;
X for (s = exp->startp[reg]; s < exp->endp[reg]; s++){
X new_str[new_pos++] = translit ? map[*s] : *s;
X }
X }
X#endif
X }
X else{
X /* A plain character, put it in. */
X more_space(1);
X new_str[new_pos++] = *tmp++;
X }
X }
X
X /*
X * Move forward over the matched text.
X *
X */
X#ifdef GNU_REGEX
X str += regs.end[0];
X str_len -= regs.end[0];
X#endif
X#ifdef HS_REGEX
X str = exp->endp[0];
X str_len -= (int)(exp->endp[0] - exp->startp[0]);
X#endif
X }
X } while (global && match != NO_MATCH && *str);
X
X /*
X * Copy the final portion of the string. This is the section that
X * was not matched (and hence which remains unchanged) by the last
X * match. Then we head off home.
X *
X */
X more_space(str_len);
X (void) strcpy(new_str + new_pos, str);
X RETURN(new_str);
X}
X
X#define DIGIT(x) (isdigit(x) ? (x) - '0' : islower(x) ? (x) + 10 - 'a' : (x) + 10 - 'A')
X
char *
backslash_eliminate(str, type, who)
char *str;
int type;
int who;
X{
X /*
X * Remove backslashes from the strings. Turn \040 etc. into a single
X * character (we allow eight bit values). Currently NUL is not
X * allowed.
X *
X * Turn "\n" and "\t" into '\n' and '\t' characters. Etc.
X *
X * The string may grow slightly here. Under normal circumstances
X * it will stay the same length or get shorter. It is only in the
X * case where we have to turn {a-z}{A-Z} into \0{a-z}{A-Z} that
X * we add two chars. This only happens when we are doing a REPLACEMENT.
X * So we can't overwrite str, and we have to
X * malloc. Sad, but the only ways I could find around it (at this
X * late stage) were really gross. I allowed an extra
X * 100 bytes which should cover most idiotic behaviour.
X * I count the extra space and exit nicely if they do do something
X * extremely silly.
X *
X * 'i' is an index into new_str.
X *
X * 'type' tells us how to interpret escaped characters.
X *
X * type = REGEX
X * if the pattern is a regular expression. If it is then
X * we leave escaped things alone (except for \n and \t and
X * friends).
X *
X * type = REPLACEMENT
X * if this is a replacement pattern. In this case we change
X * \( and \) to ( and ), but leave \1 etc alone as they are
X * register references. - becomes a metacharacter between
X * { and }.
X *
X * type = NORMAL
X * We do \n and \t elimination, as well as \040 etc, plus
X * all other characters that we find quoted we unquote.
X * type = NORMAL when we do a backslash elimination on the
X * string argument to strsed.
X *
X * who tells us where to tell mem where to stick the new string.
X *
X * \{m,n\} syntax (see ed(1)) is not supported.
X *
X */
X
X FWRD_STATIC char *mem();
X char *new_str;
X int extra = 100;
X int seenlb = 0;
X register int i = 0;
X register int seenbs = 0;
X int first_half = 0;
X
X if (type == REPLACEMENT){
X if (!(new_str = mem(who, strlen(str) + 1 + extra))){
X return 0;
X }
X }
X else{
X new_str = str;
X }
X
X while (*str){
X if (seenbs){
X seenbs = 0;
X switch (*str){
X case '\\':{
X new_str[i++] = '\\';
X str++;
X break;
X }
X
X case '-':{
X if (seenlb){
X /* Keep it quoted. */
X new_str[i++] = '\\';
X }
X new_str[i++] = '-';
X str++;
X break;
X }
X
X case '}':{
X if (seenlb){
X /* Keep it quoted. */
X new_str[i++] = '\\';
X }
X new_str[i++] = '}';
X str++;
X break;
X }
X
X case 'n':{
X new_str[i++] = '\n';
X str++;
X break;
X }
X
X case 't':{
X new_str[i++] = '\t';
X str++;
X break;
X }
X
X case 's':{
X new_str[i++] = ' ';
X str++;
X break;
X }
X
X case 'r':{
X new_str[i++] = '\r';
X str++;
X break;
X }
X
X case 'f':{
X new_str[i++] = '\f';
X str++;
X break;
X }
X
X case 'b':{
X new_str[i++] = '\b';
X str++;
X break;
X }
X
X case 'v':{
X new_str[i++] = '\13';
X str++;
X break;
X }
X
X case 'z':{
X str++;
X break;
X }
X
X case '0': case '1': case '2': case '3': case '4':
X case '5': case '6': case '7': case '8': case '9':{
X
X char val;
X
X /*
X * Three digit octal constant.
X *
X */
X if (*str >= '0' && *str <= '3' &&
X *(str + 1) >= '0' && *(str + 1) <= '7' &&
X *(str + 2) >= '0' && *(str + 2) <= '7'){
X
X val = (DIGIT(*str) << 6) +
X (DIGIT(*(str + 1)) << 3) +
X DIGIT(*(str + 2));
X
X if (!val){
X /*
X * NUL is not allowed.
X */
X return 0;
X }
X
X new_str[i++] = val;
X str += 3;
X break;
X }
X
X /*
X * One or two digit hex constant.
X * If two are there they will both be taken.
X * Use \z to split them up if this is not wanted.
X *
X */
X if (*str == '0' && (*(str + 1) == 'x' || *(str + 1) == 'X') && isxdigit(*(str + 2))){
X val = DIGIT(*(str + 2));
X if (isxdigit(*(str + 3))){
X val = (val << 4) + DIGIT(*(str + 3));
X str += 4;
X }
X else{
X str += 3;
X }
X
X if (!val){
X return 0;
X }
X
X new_str[i++] = val;
X break;
X }
X
X /*
X * Two or three decimal digits.
X * (One decimal digit is taken as either a register reference
X * or as a decimal digit if NORMAL is true below.)
X *
X */
X if (isdigit(*(str + 1))){
X val = DIGIT(*str) * 10 + DIGIT(*(str + 1));
X if (isdigit(*(str + 2))){
X val = 10 * val + DIGIT(*(str + 2));
X str += 3;
X }
X else{
X str += 2;
X }
X
X if (!val){
X return 0;
X }
X
X new_str[i++] = val;
X break;
X }
X
X /*
X * A register reference or else a single decimal digit if this
X * is a normal string..
X *
X * Emit \4 (etc) if we are not NORMAL (unless the digit is a 0
X * and we are processing an r.e. This is because \0 makes no
X * sense in an r.e., only in a replacement. If we do have \0
X * and it is an r.e. we return.)
X *
X */
X if (*str == '0' && type == REGEX){
X return 0;
X }
X
X if (type == NORMAL){
X if (!(val = DIGIT(*str))){
X return 0;
X }
X new_str[i++] = val;
X str++;
X }
X else{
X new_str[i++] = '\\';
X new_str[i++] = *str++;
X }
X break;
X }
X
X default:{
X if (type == REGEX){
X new_str[i++] = '\\';
X }
X new_str[i++] = *str++;
X break;
X }
X }
X }
X else{
X if (*str == '\\'){
X seenbs = 1;
X str++;
X }
X else if (type == REPLACEMENT && *str == '}'){
X if (*(str + 1) == '{' && first_half){
X new_str[i++] = *str++;
X new_str[i++] = *str++;
X first_half = 0;
X }
X else{
X seenlb = 0;
X new_str[i++] = *str++;
X }
X }
X else if (type == REPLACEMENT && !seenlb && *str == '{'){
X /*
X * Within { and }, \- should be left as such. So we can differentiate
X * between s/fred/\-/ and s/fred/{\-a-z}{+A-Z}
X *
X * We stick in a "\0" here in the case that \X has not just been
X * seen. (X = 0..9) Which is to say, {a-z}{A-Z} defaults to
X * \0{a-z}{A-Z}
X *
X */
X
X seenlb = 1;
X first_half = 1;
X
X if (i < 2 || new_str[i - 2] != '\\' || !(new_str[i - 1] >= '0' && new_str[i - 1] <= '9')){
X if ((extra -= 2) < 0){
X /* ran out of extra room. */
X return 0;
X }
X new_str[i++] = '\\';
X new_str[i++] = '0';
X }
X new_str[i++] = *str++;
X }
X else{
X /*
X * A normal char.
X *
X */
X new_str[i++] = *str++;
X }
X }
X }
X
X if (seenbs){
X /*
X * The final character was a '\'. Put it in as a single backslash.
X *
X */
X new_str[i++] = '\\';
X }
X
X new_str[i] = '\0';
X return new_str;
X}
X
static char *
build_map(s, map)
char *s;
char *map;
X{
X /*
X * Produce a mapping table for the given transliteration.
X * We are passed something that looks like "{a-z}{A-Z}"
X * Look out for \ chars, these are used to quote } and -.
X *
X * Return a pointer to the char after the closing }.
X * We cannot clobber s.
X *
X * The building of maps is somewhat optimised.
X * If the string is the same as the last one we were
X * called with then we don't do anything. It would be better
X * to remember all the transliterations we have seen, in
X * order (because in a global substitution we will
X * apply them in the same order repeatedly) and then we
X * could do the minimum amount of building. This is a
X * compromise because it is a fairly safe bet that there will
X * not be more than one transliteration done.
X *
X */
X
X char *in;
X char *out;
X char *str;
X char *tmp;
X char c;
X FWRD_STATIC char *mem();
X FWRD_STATIC char nextch();
X int i = 0;
X int range_count = 0;
X int seenbs = 0;
X static char *last = 0;
X static int last_len;
X
X if (!s){
X return 0;
X }
X
X if (last && !strncmp(s, last, last_len)){
X /* Re-use the map. */
X return s + last_len;
X }
X else{
X /*
X * Make a copy of s in both 'last' and 'str'
X */
X int len = strlen(s) + 1;
X if (!(str = mem(MEM_MAP, len)) || !(last = mem(MEM_MAP_SAVE, len))){
X return 0;
X }
X str[0] = last[0] = '\0';
X strcat(str, s);
X strcat(last, s);
X }
X
X tmp = str + 1;
X in = str;
X
X while (*tmp){
X if (seenbs){
X if (*tmp == '-'){
X /*
X * Keep the \ before a - since this is the range
X * separating metacharacter. We don't keep } quoted,
X * we just put it in. Then it is passed as a normal
X * char (no longer a metachar) to nextch().
X *
X */
X str[i++] = '\\';
X }
X str[i++] = *tmp++;
X seenbs = 0;
X }
X else{
X if (*tmp == '\\'){
X seenbs = 1;
X tmp++;
X }
X else if (*tmp == '}'){
X if (!range_count){
X /* seen first range. */
X range_count = 1;
X str[i++] = '\0';
X tmp++;
X while (*tmp == ' ' || *tmp == '\t'){
X tmp++;
X }
X if (*tmp != '{'){
X return 0;
X }
X out = str + i;
X tmp++;
X }
X else{
X /* seen both ranges. */
X str[i++] = '\0';
X tmp++;
X range_count = 2;
X break;
X }
X }
X else{
X /* A plain defenceless character. */
X str[i++] = *tmp++;
X }
X }
X }
X
X if (range_count != 2){
X return 0;
X }
X
X last_len = tmp - str;
X
X /*
X * Now 'out' and 'in' both point to character ranges.
X * These will look something like "A-Z" but may be
X * more complicated and have {} and - in them elsewhere.
X *
X */
X
X for (i = 0; i < 1 << BYTEWIDTH; i++){
X map[i] = i;
X }
X
X /*
X * Ready the range expanding function.
X *
X */
X (void) nextch(in, 0);
X (void) nextch(out, 1);
X
X /*
X * For each char in 'in', assign it a value in
X * 'map' corresponding to the next char in 'out'.
X *
X */
X
X while ((c = nextch((char *)0, 0))){
X map[c] = nextch((char *)0, 1);
X }
X
X return tmp;
X}
X
static char
nextch(str, who)
char *str;
int who;
X{
X /*
X * Given a range like {a-z0237-9}
X * return successive characters from the range on
X * successive calls. The first call (when str != 0)
X * sets things up.
X *
X * We must handle strange things like
X * {a-b-c-z} = {a-z}
X * and {z-l-a} = {z-a}
X * and {f-f-f-f-h} = {f-h}
X * and {a-z-f-h-y-d-b} = {a-b}
X *
X * and so on.
X *
X * This function will remember two strings and will return
X * the next charcter in the range specified by 'who'. This
X * makes the building of the transliteration table above
X * a trivial loop.
X *
X * I can't be bothered to comment this as much as it
X * deserves right now... 8-)
X *
X */
X
X static char *what[2] = {0, 0};
X static char last[2] = {0, 0};
X static int increment[2];
X static int pos[2];
X
X if (who < 0 || who > 1){
X return 0;
X }
X
X if (str){
X /* Set up for this string. */
X what[who] = str;
X pos[who] = 0;
X return 1;
X }
X else if (!what[who]){
X return 0;
X }
X
X if (!pos[who] && what[who][0] == '-'){
X return 0;
X }
X
X switch (what[who][pos[who]]){
X
X case '-':{
X /* we're in mid-range. */
X last[who] += increment[who];
X if (what[who][pos[who] + 1] == last[who]){
X pos[who] += 2;
X }
X return last[who];
X }
X
X case '\0':{
X /*
X * We've finished. Keep on returning the
X * last thing you saw if who = 1.
X */
X if (who){
X return last[1];
X }
X return 0;
X }
X
X /* FALLTHROUGH */
X case '\\':{
X pos[who]++;
X }
X
X default:{
X last[who] = what[who][pos[who]++];
X /*
X * If we have reached a '-' then this is the start of a
X * range. Keep on moving forward until we see a sensible
X * end of range character. Then set up increment so that
X * we do the right thing next time round. We leave pos
X * pointing at the '-' sign.
X *
X */
X
X while (what[who][pos[who]] == '-'){
X int inc = 1;
X if (what[who][pos[who] + inc] == '\\'){
X inc++;
X }
X if (!what[who][pos[who] + inc]){
X return 0;
X }
X if (what[who][pos[who] + inc + 1] == '-'){
X pos[who] += inc + 1;
X continue;
X }
X increment[who] = what[who][pos[who] + inc] - last[who];
X if (!increment[who]){
X pos[who] += 2;
X continue;
X }
X if (increment[who] > 0){
X increment[who] = 1;
X break;
X }
X else if (increment[who] < 0){
X increment[who] = -1;
X break;
X }
X }
X return last[who];
X }
X }
X}
X
static char *
mem(who, size)
int who;
int size;
X{
X /*
X * Get 'size' bytes of memeory one way or another.
X *
X * The 'mem_slots' array holds currently allocated hunks.
X * If we can use one that's already in use then do so, otherwise
X * try and find a hunk not in use somewhere else in the table.
X * As a last resort call malloc. All a bit specialised and
X * not too clear. Seems to works fine though.
X */
X
X FWRD_STATIC void mem_save();
X extern char *malloc();
X
X if (who < 0 || who >= MEM_SLOTS){
X return 0;
X }
X
X if (mem_slots[who].used){
X /*
X * There is already something here. Either move/free it or
X * return it if it is already big enough to hold this request.
X */
X if (mem_slots[who].size >= size){
X /* It is already big enough. */
X return mem_slots[who].s;
X }
X else{
X mem_save(who);
X }
X }
X else{
X /*
X * The slot was not in use. Check to see if there is space
X * allocated here already that we can use. If there is and
X * we can, use it, if there is and it's not big enough try to
X * save it. if there isn't then try to find it in another free slot,
X * otherwise don't worry, the malloc below will get us some.
X */
X if (mem_slots[who].s && mem_slots[who].size >= size){
X /* We'll take it. */
X mem_slots[who].used = 1;
X return mem_slots[who].s;
X }
X
X if (mem_slots[who].s){
X mem_save(who);
X }
X else{
X FWRD_STATIC int mem_find();
X int x = mem_find(size);
X if (x != -1){
X mem_slots[who].s = mem_slots[x].s;
X mem_slots[who].size = mem_slots[x].size;
X mem_slots[who].used = 1;
X mem_slots[x].s = (char *)0;
X return mem_slots[who].s;
X }
X }
X }
X
X /*
X * Have to use malloc 8-(
X */
X
X if (!(mem_slots[who].s = (char *)malloc((unsigned)size))){
X return 0;
X }
X mem_slots[who].size = size;
X mem_slots[who].used = 1;
X
X return mem_slots[who].s;
X}
X
static int
mem_find(size)
int size;
X{
X /*
X * See if we can find an unused but allocated slot with 'size'
X * (or more) space available. Return the index, or -1 if not.
X */
X
X register int i;
X
X for (i = 0; i < MEM_SLOTS; i++){
X if (!mem_slots[i].used && mem_slots[i].s && mem_slots[i].size >= size){
X return i;
X }
X }
X return -1;
X}
X
static void
mem_save(x)
int x;
X{
X /*
X * There is some memory in mem_slots[x] and we try to save it rather
X * than free it. In order we try to
X *
X * 1) put it in an unused slot that has no allocation.
X * 2) put it in an unused slot that has an allocation smaller than x's
X * 3) free it since there are no free slots and all the full ones are bigger.
X *
X */
X
X extern void free();
X register int i;
X register int saved = 0;
X
X /*
X * First we try to find somewhere unused and with no present allocation.
X */
X for (i = 0; i < MEM_SLOTS; i++){
X if (!mem_slots[i].used && !mem_slots[i].s){
X saved = 1;
X mem_slots[i].s = mem_slots[x].s;
X mem_slots[i].size = mem_slots[x].size;
X mem_slots[i].used = 0;
X break;
X }
X }
X
X /*
X * No luck yet. Try for a place that is not being used but which has
X * space allocated, and which is smaller than us (and all other such spots).
X * Pick on the smallest, yeah.
X */
X if (!saved){
X register int small = -1;
X register int small_val = 32767; /* Be nice to 16 bit'ers. Non-crucial if it's too low. */
X for (i = 0; i < MEM_SLOTS; i++){
X if (!mem_slots[i].used && mem_slots[i].size < mem_slots[x].size && mem_slots[i].size < small_val){
X small_val = mem_slots[i].size;
X small = i;
X }
X }
X
X if (small != -1){
X saved = 1;
X /* We got one, now clobber it... */
X free(mem_slots[small].s);
X /* and move on in. */
X mem_slots[small].s = mem_slots[x].s;
X mem_slots[small].size = mem_slots[x].size;
X mem_slots[small].used = 0;
X }
X }
X
X if (!saved){
X /* Have to toss it away. */
X free(mem_slots[x].s);
X }
X}
X
static void
mem_init()
X{
X /*
X * Clear all the memory slots.
X */
X
X register int i;
X
X for (i = 0; i < MEM_SLOTS; i++){
X mem_slots[i].s = (char *)0;
X mem_slots[i].used = 0;
X }
X}
X
static void
mem_free(except)
char *except;
X{
X /*
X * "Clear out" all the memory slots. Actually we do no freeing since
X * we may well be called again. We just mark the slots as unused. Next
X * time round they might be useful - the addresses and sizes are still there.
X *
X * For the slot (if any) whose address is 'except', we actually set the
X * address to 0. This is done because we are called ONLY from the macro
X * RETURN() in strsed() and we intend to return the value in 'except'.
X * Once this is done, strsed should (in theory) have no knowledge at all
X * of the address it passed back last time. That way we won't clobber it
X * and cause all sorts of nasty problems.
X */
X
X register int i;
X
X for (i = 0; i < MEM_SLOTS; i++){
X mem_slots[i].used = 0;
X if (mem_slots[i].s == except){
X mem_slots[i].s = (char *)0;
X mem_slots[i].size = 0;
X }
X }
X}
X
END_OF_FILE
if test 41443 -ne `wc -c <'strsed.c'`; then
echo shar: \"'strsed.c'\" unpacked with wrong size!
fi
# end of 'strsed.c'
fi
echo shar: End of archive 15 \(of 17\).
cp /dev/null ark15isdone
MISSING=""
for I in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 ; do
if test ! -f ark${I}isdone ; then
MISSING="${MISSING} ${I}"
fi
done
if test "${MISSING}" = "" ; then
echo You have unpacked all 17 archives.
rm -f ark[1-9]isdone ark[1-9][0-9]isdone
else
echo You still need to unpack the following archives:
echo " " ${MISSING}
fi
## End of shell archive.
exit 0
dan
----------------------------------------------------
O'Reilly && Associates argv at sun.com / argv at ora.com
Opinions expressed reflect those of the author only.
More information about the Comp.sources.x
mailing list