v14i074: strstr.c implementation
Doug Gwyn
gwyn at smoke.brl.mil
Fri Aug 31 10:24:53 AEST 1990
Posting-number: Volume 14, Issue 74
Submitted-by: gwyn at smoke.brl.mil (Doug Gwyn)
Archive-name: strstr/part01
#! /bin/sh
# This file was wrapped with "dummyshar". "sh" this file to extract.
# Contents: strstr.c
echo extracting 'strstr.c'
if test -f 'strstr.c' -a -z "$1"; then echo Not overwriting 'strstr.c'; else
sed 's/^X//' << \EOF > 'strstr.c'
X/*
X strstr - public-domain implementation of standard C library function
X
X last edit: 24-Aug-1990 D A Gwyn
X
X This is an original implementation based on an idea by D M Sunday,
X essentially the "quick search" algorithm described in CACM V33 N8.
X Unlike Sunday's implementation, this one does not wander past the
X ends of the strings (which can cause malfunctions under certain
X circumstances), nor does it require the length of the searched
X text to be determined in advance. There are numerous other subtle
X improvements too. The code is intended to be fully portable, but in
X environments that do not conform to the C standard, you should check
X the sections below marked "configure as required". There are also
X a few compilation options, as follows:
X
X #define ROBUST to obtain sane behavior when invoked with a null
X pointer argument, at a miniscule cost in speed
X #define ZAP to use memset() to zero the shift[] array; this may
X be faster in some implementations, but could fail on
X unusual architectures
X #define DEBUG to enable assertions (bug detection)
X #define TEST to enable the test program attached at the end
X*/
X#define ROBUST
X#define ZAP
X
X#ifdef __STDC__
X
X#include <stddef.h> /* defines size_t and NULL */
X#include <limits.h> /* defines UCHAR_MAX */
X
X#ifdef ZAP
Xtypedef void *pointer;
Xextern pointer memset( pointer, int, size_t );
X#endif
X
X#else /* normal UNIX-like C environment assumed; configure as required: */
X
Xtypedef unsigned size_t; /* type of result of sizeof */
X
X#define NULL 0 /* null pointer constant */
X
X#define UCHAR_MAX 255 /* largest value of unsigned char */
X /* 255 @ 8 bits, 65535 @ 16 bits */
X
X#ifdef ZAP
Xtypedef char *pointer;
Xextern pointer memset();
X#endif
X
X#define const /* nothing */
X
X#endif /* __STDC__ */
X
X#ifndef DEBUG
X#define NDEBUG
X#endif
X#include <assert.h>
X
Xtypedef const unsigned char cuc; /* char variety used in algorithm */
X
X#define EOS '\0' /* C string terminator */
X
Xchar * /* returns -> leftmost occurrence,
X or null pointer if not present */
Xstrstr( s1, s2 )
X const char *s1; /* -> string to be searched */
X const char *s2; /* -> search-pattern string */
X {
X register cuc *t; /* -> text character being tested */
X register cuc *p; /* -> pattern char being tested */
X register cuc *tx; /* -> possible start of match */
X register size_t m; /* length of pattern */
X#if UCHAR_MAX > 255 /* too large for auto allocation */
X static /* not malloc()ed; that can fail! */
X#endif /* else allocate shift[] on stack */
X size_t shift[UCHAR_MAX + 1]; /* pattern shift table */
X
X#ifdef ROBUST /* not required by C standard */
X if ( s1 == NULL || s2 == NULL )
X return NULL; /* certainly, no match is found! */
X#endif
X
X /* Precompute shift intervals based on the pattern;
X the length of the pattern is determined as a side effect: */
X
X#ifdef ZAP
X (void)memset( (pointer)&shift[1], 0, UCHAR_MAX * sizeof(size_t) );
X#else
X {
X register unsigned char c;
X
X c = UCHAR_MAX;
X do
X shift[c] = 0;
X while ( --c > 0 );
X }
X#endif
X /* Note: shift[0] is undefined at this point (fixed later). */
X
X for ( m = 1, p = (cuc *)s2; *p != EOS; ++m, ++p )
X shift[(cuc)*p] = m;
X
X assert(s2[m - 1] == EOS);
X
X {
X register unsigned char c;
X
X c = UCHAR_MAX;
X do
X shift[c] = m - shift[c];
X while ( --c > 0 );
X
X /* Note: shift[0] is still undefined at this point. */
X }
X
X shift[0] = --m; /* shift[EOS]; important details! */
X
X assert(s2[m] == EOS);
X
X /* Try to find the pattern in the text string: */
X
X for ( tx = (cuc *)s1; ; tx += shift[*t] )
X {
X for ( t = tx, p = (cuc *)s2; ; ++t, ++p )
X {
X if ( *p == EOS ) /* entire pattern matched */
X return (char *)tx;
X
X if ( *p != *t )
X break;
X }
X
X do {
X assert(m > 0);
X assert(t - tx < m);
X
X if ( *t == EOS )
X return NULL; /* no match */
X }
X while ( ++t - tx != m ); /* < */
X }
X }
X
X#ifdef TEST
X
X#ifdef __STDC__
X
X#include <stdlib.h> /* defines exit, EXIT_* */
X
X#else /* normal UNIX-like C environment assumed; configure as required: */
X
Xextern void exit();
X#define EXIT_SUCCESS 0
X#define EXIT_FAILURE 1
X
X#endif /* __STDC__ */
X
X#include <stdio.h>
X
Xint
Xmain( argc, argv )
X int argc;
X char *argv[];
X {
X register char *p;
X
X if ( argc < 3 )
X {
X (void)fprintf( stderr, "usage: strstr text pattern\n" );
X exit( EXIT_FAILURE );
X }
X
X if ( (p = strstr( argv[1], argv[2] )) == NULL )
X (void)printf( "pattern not found\n" );
X else
X (void)printf( "pattern start: %s\n", p );
X
X return EXIT_SUCCESS;
X }
X
X#endif /* TEST */
EOF
chars=`wc -c < 'strstr.c'`
if test $chars != 4462; then echo 'strstr.c' is $chars characters, should be 4462 characters!; fi
fi
exit 0
More information about the Comp.sources.misc
mailing list