News/mail gatewaying software, ALPHA RELEASE, Part03/03

Rich Salz rsalz at bbn.com
Tue Feb 21 09:44:29 AEST 1989


This code gateways between newsgroups and mailing lists.  It definitely
has bugs, use at your own risk.  Please send bug reports back to me.
For more details, see the file "README" in the first shar.

Please don't redistribute this code; wait for the "real" version in
comp.sources.unix in a month or two.

Hope to hear from you,
	/rich $alz

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of archive 3 (of 3)."
# Contents:  gag.y mail2news.c regex.c
# Wrapped by rsalz at fig.bbn.com on Mon Feb 20 18:36:46 1989
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'gag.y' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'gag.y'\"
else
echo shar: Extracting \"'gag.y'\" \(13865 characters\)
sed "s/^X//" >'gag.y' <<'END_OF_FILE'
X%{
X/*
X**  GAG
X**
X**  Mail/news gateway alias generator.
X*/
X#define MAINLINE
X#include "gate.h"
X#ifdef	RCSID
Xstatic char RCS[] =
X	"$Header$";
X#endif	/* RCSID */
X
X/*
X**  If a malloc'd value is non-null and pointing to the new value, free it.
X*/
X/*#define FREEANDSET(a, b)	if (a && a != b) { free(a); a = b; } else
X*/
X#define FREEANDSET(a, b)	a = b
X
X
Xextern int	 yylineno;
Xextern char	 yytext[];
Xextern char	 yyfilename[];
Xextern FILE	*yyin;
X
Xchar		*Pname;			/* Program name			*/
X
XSTATIC int	 Errors;		/* Did user screw up?		*/
XSTATIC int	 NoGroupCheck;		/* Don't check if valid group?	*/
XSTATIC int	 PostViaMail;		/* Make post-news-group alias?	*/
XSTATIC char	*OutDir;		/* Directory for MMDF scripts	*/
XSTATIC FILE	*mmdf;			/* File for MMDF aliases	*/
XSTATIC FILE	*sendmail;		/* File for Sendmail aliases	*/
XSTATIC FILE	*news;			/* File for sys file entries	*/
X
XSTATIC char	 *CurCommand,		  *DefCommand;
XSTATIC char	 *CurDirectory,		  *DefDirectory;
XSTATIC char	**CurDistribs,		 **DefDistribs;
XSTATIC int	  CurDoMailinglist,	   DefDoMailinglist;
XSTATIC char	 *CurFlags,		  *DefFlags;
XSTATIC char	 *CurMail2news,		  *DefMail2news;
XSTATIC char	 *CurMailcontact,	  *DefMailcontact;
XSTATIC char	 *CurMailhost,		  *DefMailhost;
XSTATIC char	 *CurModerator,		  *DefModerator;
XSTATIC char	 *CurNews2mail,		  *DefNews2mail;
XSTATIC char	 *CurOrganization,	  *DefOrganization;
XSTATIC char	 *CurOwner,		  *DefOwner;
XSTATIC char	 *CurRequestAddr,	  *DefRequestAddr;
XSTATIC char	 *CurSite,		  *DefSite;
XSTATIC char	 *CurUser,		  *DefUser;
X
Xextern time_t	 time();
X%}
X
X%union {
X    char	*String;
X}
X
X%token	COMMAND DEFAULT DIRECTORY DISTRIBUTIONS DO DOTIFY FLAGS ID INEWSt
X%token	MAIL2NEWS MAILCONTACT MAILHOST MAILINGLIST MAILPOST MODERATOR
X%token	NEWS2MAIL NO ORGANIZATION OWNER REQUESTADDR SITE USER
X
X%type	<String>	ID value id
X
X%%
X
Xfile	: /* NULL */
X	| file block ';'
X	| file default ';'
X	| file mpost ';'
X	| file error ';' {
X#ifdef	lint
X	    /* Compulsive... */
X	    if (yylineno)
X		YYERROR;
X#endif	/* lint */
X	}
X	;
X
Xdefault	: DEFAULT COMMAND value		{ FREEANDSET(DefCommand, $3); }
X	| DEFAULT DIRECTORY value	{ FREEANDSET(DefDirectory, $3); }
X	| DEFAULT DISTRIBUTIONS value	{
X	    if (DefDistribs)
X		SplitFree(&DefDistribs);
X	    (void)Split($3, &DefDistribs, '\0');
X	}
X	| DEFAULT INEWSt FLAGS value	{ FREEANDSET(DefFlags, $4); }
X	| DEFAULT MAIL2NEWS value	{ FREEANDSET(DefMail2news, $3); }
X	| DEFAULT MAILCONTACT value	{ FREEANDSET(DefMailcontact, $3); }
X	| DEFAULT MAILHOST value	{ FREEANDSET(DefMailhost, $3); }
X	| DEFAULT DO MAILINGLIST	{ DefDoMailinglist = TRUE; }
X	| DEFAULT NO MAILINGLIST	{ DefDoMailinglist = FALSE; }
X	| DEFAULT MODERATOR value	{ FREEANDSET(DefModerator, $3); }
X	| DEFAULT NEWS2MAIL value	{ FREEANDSET(DefNews2mail, $3); }
X	| DEFAULT ORGANIZATION value	{ FREEANDSET(DefOrganization, $3); }
X	| DEFAULT OWNER value		{ FREEANDSET(DefOwner, $3); }
X	| DEFAULT REQUESTADDR value	{ FREEANDSET(DefRequestAddr, $3); }
X	| DEFAULT SITE value		{ FREEANDSET(DefSite, $3); }
X	| DEFAULT USER value		{ FREEANDSET(DefUser, $3); }
X	;
X
Xblock	: op_init ID ID op_set {
X	    if (!Errors && ValidNewsgroup($2))
X		WriteOne($2, $3);
X	    free($2);
X	    free($3);
X	}
X	;
X
Xmpost	: op_init MAILPOST ID op_set {
X	    char	*GroupasMail;
X
X	    if (!Errors && ValidNewsgroup($3)) {
X		GroupasMail = Dot2Dash($3);
X		if (mmdf)
X		    MMDFpostviamail($3, GroupasMail);
X		if (sendmail)
X		    Fprintf(sendmail, "%s: \"|%s -n %s\"\n",
X			    GroupasMail, CurMail2news, $3);
X		free(GroupasMail);
X	    }
X	    free($3);
X	}
X	;
X
Xop_init	: /* NULL */ {
X	    CurDoMailinglist =  DefDoMailinglist;
X	    if (CurDistribs != DefDistribs)
X		SplitFree(&CurDistribs);
X	    CurDistribs = DefDistribs;
X	    FREEANDSET(CurCommand, DefCommand);
X	    FREEANDSET(CurDirectory, DefDirectory);
X	    FREEANDSET(CurFlags, DefFlags);
X	    FREEANDSET(CurMail2news, DefMail2news);
X	    FREEANDSET(CurMailhost, DefMailhost);
X	    FREEANDSET(CurMailcontact, DefMailcontact);
X	    FREEANDSET(CurModerator, DefModerator);
X	    FREEANDSET(CurNews2mail, DefNews2mail);
X	    FREEANDSET(CurOrganization, DefOrganization);
X	    FREEANDSET(CurOwner, DefOwner);
X	    FREEANDSET(CurRequestAddr, DefRequestAddr);
X	    FREEANDSET(CurSite, DefSite);
X	    FREEANDSET(CurUser, DefUser);
X	}
X	;
X
Xop_set	: /* NULL */
X	| an_opt op_set
X	;
X
X	/* Here be memory leaks! */
Xan_opt	: COMMAND value		{ CurCommand = $2; }
X	| DIRECTORY value	{ CurDirectory = $2; }
X	| DISTRIBUTIONS value	{ (void)Split($2, &CurDistribs, '\0'); }
X	| INEWSt FLAGS value	{ CurFlags = $3; }
X	| MAIL2NEWS value	{ CurMail2news = $2; }
X	| MAILCONTACT value	{ CurMailcontact = $2; }
X	| MAILHOST value	{ CurMailhost = $2; }
X	| DO MAILINGLIST	{ CurDoMailinglist = TRUE; }
X	| NO MAILINGLIST	{ CurDoMailinglist = FALSE; }
X	| MODERATOR value	{ CurModerator = $2; }
X	| NEWS2MAIL value	{ CurNews2mail = $2; }
X	| ORGANIZATION value	{ CurOrganization = $2; }
X	| OWNER value		{ CurOwner = $2; }
X	| REQUESTADDR value	{ CurRequestAddr = $2; }
X	| SITE value		{ CurSite = $2; }
X	| USER value		{ CurUser = $2; }
X	;
X
Xvalue	: id {
X	    $$ = $1;
X	}
X	| '=' id {
X	    $$ = $2;
X	}
X	| DOTIFY id {
X	    $$ = Dotify($2);
X	    free($2);
X	}
X	| '=' DOTIFY id {
X	    $$ = Dotify($3);
X	    free($3);
X	}
X	;
X
Xid	: ID {
X	    $$ = $1;
X	}
X	| '(' ID ')' {
X	    $$ = $2;
X	}
X	;
X
X%%
X
X
X/*
X**  Copy the string s turning all '.' into '-'.
X*/
XSTATIC char *
XDot2Dash(s)
X    register char	*s;
X{
X    register char	*p;
X    char		*save;
X
X    for (save = p = malloc((unsigned int)(strlen(s) + 1)); *s; s++)
X	*p++ = *s == '.' ? '-' : *s;
X    *p = '\0';
X    return(save);
X}
X
X
X/*
X**  Copy the string s putting a '.' before all uppercase letters and '.'.
X*/
XSTATIC char *
XDotify(s)
X    register char	*s;
X{
X    register char	*p;
X    char		*save;
X
X    for (save = p = malloc((unsigned int)(strlen(s) * 2 + 1)); *s; *p++ = *s++)
X	if (*s == '.' || isupper(*s))
X	    *p++ = '.';
X    *p = '\0';
X    return(save);
X}
X
X
X/*
X**  Check if the newsgroup exists in the ACTIVE file.
X*/
XSTATIC int
XValidNewsgroup(Group)
X    register char	 *Group;
X{
X    static char		**File;
X    register char	**p;
X    register char	 *q;
X
X    if (NoGroupCheck)
X	return(TRUE);
X
X    if (File == NULL)
X	/* Read in active file, trim to just the newsgroup names. */
X	for (p = File = ReadFile(ACTIVE); *p; p++)
X	    if (q = IDX(*p, ' '))
X		*q = '\0';
X
X    for (p = File; *p; p++)
X	if (EQ(*p, Group))
X	    return(TRUE);
X
X    Fprintf(stderr, "%s: ignoring invalid newsgroup \"%s\".\n", Pname, Group);
X    return(FALSE);
X}
X
X
X/*
X**  Create an MMDF alias set so that users can mail into a newsgroup
X**  as if it were a mailing list.
X*/
XSTATIC void
XMMDFpostviamail(Ngroup, GroupasMail)
X    char		*Ngroup;
X    char		*GroupasMail;
X{
X    register FILE	*F;
X    char		 buff[SM_SIZE];
X
X    /* Create a post-news-group alias which pipes into
X     * the /bin/dir/post-news-group script. */
X    Fprintf(mmdf, "post-%s: @%s { \"%s|%s/post-%s\" }\n",
X	    GroupasMail, CurMailhost, CurUser, CurDirectory,
X	    GroupasMail);
X    Fprintf(mmdf, "\tpost-%s@%s\n", GroupasMail, CurMailhost);
X
X    /* Write a post-news-script which calls mail2news. */
X    if (OutDir) {
X
X	/* Open the file. */
X	(void)sprintf(buff, "%s/post-%s", OutDir, GroupasMail);
X	(void)unlink(buff);
X	if ((F = fopen(buff, "w")) == NULL) {
X	    Fprintf(stderr, "%s:  Can't open \"%s\" for output, %s.\n",
X		    Pname, buff, Estring());
X	    exit(1);
X	}
X
X	/* Write the script. */
X	Fprintf(F, "#! /bin/sh\n");
X	Fprintf(F, "## This script forwards into the \"%s\" newsgroup.\n",
X		Ngroup);
X	Fprintf(F, "exec %s -n %s\n", CurMail2news, Ngroup);
X
X	/* Close the file. */
X	(void)fclose(F);
X	(void)chmod(buff, 0755);
X    }
X}
X
X
X/*
X**  Write out one newsgroup/mailing list gatewaying entry.  This is where
X**  the real work is done.  We do MMDF, Sendmail, and news/sys file entries
X**  here.
X*/
XSTATIC void
XWriteOne(Ngroup, Mlist)
X    register char	*Ngroup;
X    register char	*Mlist;
X{
X    register char	**p;
X    register FILE	 *F;
X    register char	 *GroupasMail;
X    char		  buff[SM_SIZE];
X
X    GroupasMail = Dot2Dash(Ngroup);
X
X    if (mmdf) {
X	/* Create an alias that forwards to the script. */
X	Fprintf(mmdf, "\n##  Add this to the \"%s\" mailing list.\n", Mlist);
X	Fprintf(mmdf, "%s-gate: @%s { \"%s|%s/gate-%s\" }\n",
X		Mlist, CurMailhost, CurUser, CurDirectory, Mlist);
X	Fprintf(mmdf, "\t%s-gate@%s\n", Mlist, CurMailhost);
X
X	/* Write the script. */
X	if (OutDir) {
X
X	    /* Open the file. */
X	    (void)sprintf(buff, "%s/gate-%s", OutDir, Mlist);
X	    (void)unlink(buff);
X	    if ((F = fopen(buff, "w")) == NULL) {
X		Fprintf(stderr, "%s:  Can't open \"%s\" for output, %s.\n",
X			Pname, buff, Estring());
X		exit(1);
X	    }
X
X	    /* Write it. */
X	    Fprintf(F, "#! /bin/sh\n");
X	    Fprintf(F, "## This script is on the \"%s\" mailing list.\n",
X		    Mlist);
X	    Fprintf(F, "exec %s -n %s \\\n\t-o \"%s\"",
X		    CurMail2news, Ngroup, CurOrganization);
X	    if (CurModerator && *CurModerator)
X		Fprintf(sendmail, "\\\n\t-a %s", CurModerator);
X	    if (CurFlags && *CurFlags)
X		Fprintf(F, " \\\n\t%s", CurFlags);
X	    Fprintf(F, "\n");
X
X	    /* Close it. */
X	    (void)fclose(F);
X	    (void)chmod(buff, 0755);
X	}
X
X	if (PostViaMail)
X	    MMDFpostviamail(Ngroup, GroupasMail);
X    }
X
X    if (sendmail) {
X	if (IDX(CurOrganization, '\'')) {
X	    yyerror("Can't have ' in organization name");
X	    free(GroupasMail);
X	    return;
X	}
X
X	/* Does it make sense to do this? */
X	if (CurModerator && *CurModerator && CurDoMailinglist)
X	    Fprintf(stderr, "Warning:  group %s is moderated and mailable.\n",
X		    Ngroup);
X
X	Fprintf(sendmail, "\n## %s <==> %s gateway\n", Ngroup, Mlist);
X	Fprintf(sendmail, "%sowner-%s: %s\n",
X		CurDoMailinglist ? "" : "#", Mlist, CurOwner);
X	Fprintf(sendmail, "%s%s: %s@%s\n",
X		CurDoMailinglist ? "" : "#", Mlist, Mlist, CurMailhost);
X	Fprintf(sendmail, "owner-post-%s: %s\n", Mlist, CurOwner);
X	Fprintf(sendmail, "post-%s: \"|%s -n %s -o '%s'",
X		Mlist, CurMail2news, Ngroup, CurOrganization);
X	if (CurFlags && *CurFlags)
X	    Fprintf(sendmail, " %s", CurFlags);
X	if (CurModerator && *CurModerator)
X	    Fprintf(sendmail, " -a %s", CurModerator);
X	Fprintf(sendmail, "\"\n");
X
X	if (PostViaMail) {
X	    Fprintf(sendmail, "owner-%s: %s\n", GroupasMail, CurOwner);
X	    Fprintf(sendmail, "%s: \"|%s -n %s\"\n",
X		    GroupasMail, CurMail2news, Ngroup);
X	}
X    }
X
X    if (news) {
X	/* Sanity check. */
X	if (CurSite == NULL) {
X	    Fprintf(stderr, "Can't write sys files without a site!\n");
X	    exit(1);
X	}
X
X	/* Psuedo-site name and distributions. */
X	Fprintf(news, "%s\\\n :", CurSite);
X	for (p = CurDistribs; *p; p++)
X	    Fprintf(news, "%s,!%s.all,", *p, *p);
X	Fprintf(news, "%s,!%s.all\\\n ", Ngroup, Ngroup);
X
X	/* Command invocation. */
X	if (CurRequestAddr)
X	    Fprintf(news, "::%s %s %s %s %s %%s\n",
X		    CurNews2mail, Mlist, CurMailcontact, CurRequestAddr,
X		    CurMailhost);
X	else
X	    Fprintf(news, "::%s %s %s %s-request %s %%s\n",
X		    CurNews2mail, Mlist, CurMailcontact, CurMailcontact,
X		    CurMailhost);
X    }
X
X    /* Clean up. */
X    free(GroupasMail);
X}
X
X
X
X/*
X**  Write an error message.
X*/
Xyyerror(p)
X    char	*p;
X{
X    char	 buff[SM_SIZE];
X
X    (void)strncpy(buff, yytext, sizeof buff);
X    buff[sizeof buff - 1] = '\0';
X    Fprintf(stderr, "\"%s\", line %d: %s (near \"%s\")\n",
X	    yyfilename, yylineno, p, buff);
X    Errors++;
X}
X
X
X/*
X**  Open a file, or use - for standard output.
X*/
XSTATIC FILE *
Xopenfile(name)
X    char	*name;
X{
X    FILE	*F;
X
X    if (EQ(name, "-"))
X	return(stdout);
X    if ((F = fopen(name, "w")) == NULL) {
X	Fprintf(stderr, "%s:  Can't open \"%s\" for output, %s.\n",
X		Pname, name, Estring());
X	exit(1);
X    }
X    return(F);
X}
X
X
Xmain(ac, av)
X    int		 ac;
X    char	*av[];
X{
X    static char	 PROLOG[] = "--START-OF-GATEWAY-OUTPUT-";
X    static char	 EPILOG[] = "--END-OF-GATEWAY-OUTPUT-";
X    static char	 LINE1[] =
X	"This section of the alias file has been built automatically;";
X    static char	 LINE2[] =
X	"if you make any changes here they will be lost when it is rebuilt.";
X    int		 c;
X    time_t	 now;
X    char	*timestring;
X
X    /* Set defaults. */
X    Pname = (Pname = RDX(av[0], '/')) ? Pname + 1 : av[0];
X    (void)umask(0);
X    now = time((time_t *)NULL);
X    timestring = ctime(&now);
X
X    /* Parse JCL. */
X    while ((c = getopt(ac, av, "bd:m:n:ps:")) != EOF)
X	switch (c) {
X	default:
X	    Fprintf(stderr, "%s:  Incorrect usage.\n", Pname);
X	    exit(1);
X	case 'b':
X	    NoGroupCheck = TRUE;
X	    break;
X	case 'd':
X	    OutDir = optarg;
X	    break;
X	case 'm':
X	    mmdf = openfile(optarg);
X	    break;
X	case 'n':
X	    news = openfile(optarg);
X	    break;
X	case 'p':
X	    PostViaMail++;
X	    break;
X	case 's':
X	    sendmail = openfile(optarg);
X	    break;
X	}
X
X    /* Get input. */
X    av += optind;
X    if (*av == NULL)
X	(void)strcpy(yyfilename, "stdin");
X    else {
X	if ((yyin = fopen(*av, "r")) == NULL) {
X	    Fprintf(stderr, "%s: Can't open \"%s\" for input, %s.\n",
X		    Pname, *av, Estring());
X	    exit(1);
X	}
X	(void)strcpy(yyfilename, *av);
X    }
X
X    /* Write prologs. */
X    if (mmdf) {
X	Fprintf(mmdf, "##  %s\n", PROLOG);
X	Fprintf(mmdf, "##  Created at %s", timestring);
X	Fprintf(mmdf, "##  %s\n", LINE1);
X	Fprintf(mmdf, "##  %s\n", LINE2);
X    }
X    if (news) {
X	Fprintf(news, "##  %s\n", PROLOG);
X	Fprintf(news, "##  Created at %s", timestring);
X    }
X    if (sendmail) {
X	Fprintf(sendmail, "##  %s\n", PROLOG);
X	Fprintf(sendmail, "## Created at %s", timestring);
X	Fprintf(sendmail, "##  %s\n", LINE1);
X	Fprintf(sendmail, "##  %s\n", LINE2);
X    }
X
X    /* Do the work. */
X    (void)yyparse();
X
X    /* Close files. */
X    if (mmdf) {
X	Fprintf(mmdf, "##  %s\n", EPILOG);
X	if (mmdf != stdout)
X	    (void)fclose(mmdf);
X    }
X    if (news) {
X	Fprintf(news, "##  %s\n", EPILOG);
X	if (news != stdout)
X	    (void)fclose(news);
X    }
X    if (sendmail) {
X	Fprintf(sendmail, "##  %s\n", EPILOG);
X	if (sendmail != stdout)
X	    (void)fclose(sendmail);
X    }
X
X    /* That's all she wrote... */
X    exit(Errors == 0 ? 0 : 1);
X    /* NOTREACHED */
X}
END_OF_FILE
if test 13865 -ne `wc -c <'gag.y'`; then
    echo shar: \"'gag.y'\" unpacked with wrong size!
fi
# end of 'gag.y'
fi
if test -f 'mail2news.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'mail2news.c'\"
else
echo shar: Extracting \"'mail2news.c'\" \(14007 characters\)
sed "s/^X//" >'mail2news.c' <<'END_OF_FILE'
X/*
X**  MAIL2NEWS
X**  Gateway mail messages into netnews.  Usage:
X**	mail2news [inews flags] -o Organization
X**  In order to do this, there are a number of interesting transformations
X**  that need to be made on the headers...
X**
X**  This program is descended from:  @(#)recnews.c 2.10 4/16/85.
X*/
X#include "gate.h"
X#include <signal.h>
X#include <sys/file.h>
X#ifdef	RCSID
Xstatic char RCS[] =
X	"$Header$";
X#endif	/* RCSID */
X
X/* Play games to be portable for those without <sys/wait.h>. */
X#ifdef	HAVE_SYSWAIT
X#include <sys/wait.h>
Xtypedef union wait	WAITER;
X#define WAITVAL(s)	((s).w_status)
X#else
Xtypedef int		WAITER;
X#define WAITVAL(s)	(s)
X#endif	/* HAVE_SYSWAIT */
X#define	W_core(s)	(WAITVAL(s) & 0200)
X#define	W_exsig(s)	(WAITVAL(s) & 0177)
X#define	W_excode(s)	((WAITVAL(s) >> 8) & 0377)
X
X/* For those who don't have this in <sys/file.h>. */
X#ifndef	R_OK
X#define	R_OK		4	/* readable by caller */
X#endif	/* R_OK */
X
X/* Stuff for pipe(2). */
X#define STDIN		0
X#define	READER		0
X#define	WRITER		1
X
X/* Global variables. */
X#ifdef	DEBUG
Xint	 Buggy;
X#endif	/* DEBUG */
Xint	 ChildPid = -1;
Xchar	*Pname;
X
X/*
X**  This is a little tricky.  If we're getting an original submission, we
X**  want to reject it if it doesn't have a subject header.  If we're
X**  we're playing gateway, however, it has already been distributed
X**  elsewhere, and it is unreasonable to throw it out, so we supply a subject
X**  of `(none)'.  We determine whether we're gatewaying by checking for
X**  either the `o' (change organization name) or `x' (don't send this to
X**  the specified site) options on the command line, because these indicate
X**  gatewaying activity.
X*/
Xint	 SubjReqd = TRUE;
X
X/*
X**  Quickie hack to see of a mail message is a "please drop me" request.
X**  Original program written by Russ Nelson, <nelson at clutx.clarkson.edu>.
X**  Severely hacked on by Rich $alz, <rsalz at bbn.com>.
X**
X**  Perhaps a better way to test is to make the test less conservative,
X**  and see what "real" articles get caught, and make adjustments then?
X**  Comments solicited.
X*/
XSTATIC FILE *
XIsSubRequest(F)
X    register FILE	*F;
X{
X    register FILE	*Out;
X    register char	*p;
X    register int	 c;
X    register int	 drop_or_add;
X    register int	 from_or_to;
X    register int	 mail_word;
X    register int	 count;
X    char		 word[SM_SIZE];
X    char		 buff[SM_SIZE];
X
X    /* Create temp file; if we can't let the message through. */
X    if ((Out = fopen(mktemp(strcpy(buff, TEMPFILE)), "w")) == NULL)
X	return(F);
X
X    /* Clear counts. */
X    drop_or_add = 0;
X    from_or_to = 0;
X    mail_word = 0;
X    count = 0;
X
X    /* Read input a word at a time. */
X    for (p = word; (c = getc(F)) != EOF; ) {
X	(void)putc(c, Out);
X	if (!isalpha(c)) {
X	    *p = '\0';
X	    if (p > word)
X		count++;
X	    p = word;
X
X	    if (EQ(word, "remove") || EQ(word, "drop") || EQ(word, "off")
X	     || EQ(word, "subscribe") || EQ(word, "get") || EQ(word, "add"))
X		drop_or_add++;
X	    else if (EQ(word, "from") || EQ(word, "to"))
X		from_or_to++;
X	    else if (EQ(word, "mail") || EQ(word, "mailing")
X		  || EQ(word, "list") || EQ(word, "dl"))
X		mail_word++;
X	}
X	else if (p < &word[sizeof word - 1])
X	    *p++ = isupper(c) ? tolower(c) : c;
X    }
X
X    (void)fclose(F);
X    (void)fclose(Out);
X
X    /* Use fancy-shmancy AI techniques to determine what the message is. */
X    c = count < 25 && drop_or_add && from_or_to && mail_word;
X    F = c ? NULL : fopen(buff, "r");
X
X    (void)unlink(buff);
X    return(F);
X}
X
X
X
X/*
X**  Modify the Newsgroups: as directed by the command string.
X*/
XSTATIC void
XDoCommand(hp, command, group)
X    register HBUF		 *hp;
X    char			 *command;
X    char			 *group;
X{
X    register char		 *p;
X    register int		  i;
X    register int		  n;
X    register int		  nng;
X    char			**tokens;
X    char			**ng;
X    char			  buff[BUFSIZ];
X
X    if ((n = Split(command, &tokens, '\0')) == 0) {
X	SplitFree(&tokens);
X	return;
X    }
X
X    nng = Split(hp->nbuf, &ng, NGDELIM);
X    p = hp->nbuf;
X    switch (tokens[0][0]) {
X    case 'a':				/* Add		*/
X	if (n > 1)
X	    for (p += strlen(p), i = 1; i < n; i++) {
X		*p++ = NGDELIM;
X		p += APPEND(p, tokens[i]);
X	    }
X	break;
X    case 'd':				/* Delete	*/
X	for (i = 0; i < nng; i++)
X	    if (!EQ(ng[i], group)) {
X		if (p > hp->nbuf)
X		    *p++ = NGDELIM;
X		p += APPEND(p, ng[i]);
X	    }
X	if (p == hp->nbuf)
X	    Strcpy(hp->nbuf, "junk");
X	break;
X    case 'k':				/* Kill		*/
X	Fprintf(stderr, "%s:  Your posting to %s was killed by %s.\n",
X		Pname, hp->nbuf, n > 1 ? tokens[1] : group);
X	exit(EX_NOPERM);
X	/* NOTREACHED */
X    case 'm':				/* Move		*/
X	if (n > 1)
X	    if (nng == 1)
X		Strcpy(hp->nbuf, tokens[1]);
X	    else
X		for (i = 0; i < nng; i++) {
X		    if (p > hp->nbuf)
X			*p++ = NGDELIM;
X		    p += APPEND(p, EQ(ng[i], group) ? tokens[1] : ng[i]);
X		}
X	break;
X    case 'q':				/* Quiet kill	*/
X#ifdef	DEBUG
X	if (Buggy) {
X	    (void)printf("Quiet kill (ignored for debugging).\n");
X	    break;
X	}
X#endif	/* DEBUG */
X	/* Eat the message up, and pretend we delivered it. */
X	while (fgets(buff, sizeof buff, stdin))
X	    ;
X	exit(EX_OK);
X	/* NOTREACHED */
X    }
X
X    SplitFree(&tokens);
X    SplitFree(&ng);
X}
X
X
X/*
X**  Split a line that looks like XpatternXcommandX into the pattern and
X**  the command.  Initialize the RE matcher with the pattern, and return
X**  the command.
X*/
XSTATIC char *
XParsePattern(p, lineno)
X    register char	*p;
X    int			 lineno;
X{
X    register char	*cp;
X    register char	*command;
X    register char	 delim;
X    char		*RE;
X
X    /* Ignore comments and blank lines. */
X    if (*p == '#' || *p == '\0')
X	return(NULL);
X
X    for (delim = *p++, RE = cp = p, command = NULL; *cp; *p++ = *cp++)
X	if (*cp == '\\' && cp[1] == delim)
X	    cp++;
X	else if (*cp == delim) {
X	    /* Found delimiter; mark command, terminate RE. */
X	    command = ++cp;
X	    *p = '\0';
X	    break;
X	}
X
X    if (command == NULL || *command == '\0')
X	Fprintf(stderr, "%s:  Incomplete regular expression, line %d.\n",
X		Pname, lineno);
X    else if (cp = re_comp(RE))
X	Fprintf(stderr, "%s:  Bad regular expression, line %d: %s.\n",
X		Pname, lineno, cp);
X    else
X	return(command);
X
X#ifdef	lint
X    /* My, my, aren't we anal. */
X    (void)re_subs("", "");
X    re_modw("");
X#endif	/* lint */
X
X    return(NULL);
X}
X
X
X/*
X**  Change newsgroups if the Subject:, Keywords:, or Summary: match a
X**  pattern found in the newsgroup remap file.
X*/
XSTATIC void
XEditnewsgroups(hp)
X    register HBUF		 *hp;
X{
X    register char		 *p;
X    register int		  n;
X    register int		  i;
X    register int		  j;
X    register int		  t;
X    char			**groups;
X    char			**mapline;
X    char			 *hdrline[4];
X    char			  buff[LG_SIZE];
X
X    /* Copy some headers, but if nothing's there, give up. */
X    i = 0;
X    if (hdrline[i] = MakeLower(COPY(hp->title)))
X	i++;
X    if (hdrline[i] = MakeLower(COPY(hp->keywords)))
X	i++;
X    if (hdrline[i] = MakeLower(COPY(hp->summary)))
X	i++;
X    if (i == 0)
X	return;
X    hdrline[i] = NULL;
X
X    /* For all the newsgroups, see if there's a mapping file. */
X    for (n = Split(hp->nbuf, &groups, NGDELIM), i = 0; i < n; i++) {
X	if (groups[i] == NULL || groups[i][0] == '\0')
X	    continue;
X
X	/* Gate the name of the mapping file. */
X#ifdef	IN_ONEPLACE
X	Strcpy(buff, IN_ONEPLACE);
X#endif	/* IN_ONEPLACE */
X#ifdef	IN_SPOOLDIR
X	{
X	    register char	*q;
X
X	    for (p = buff + APPEND(buff, IN_SPOOLDIR), q = groups[i]; *q; q++)
X		*p++ = *q == '.' ? '/' : *q;
X	    Strcpy(p, "/recnews.cmd");
X	}
X#endif	/* IN_SPOOLDIR */
X#ifdef	IN_CMDDIR
X	Sprintf(buff, "%s/%s", IN_CMDDIR, groups[i]);
X#endif	/* IN_CMDDIR */
X
X	if (access(buff, R_OK) >= 0 && (mapline = ReadFile(buff))) {
X	    /* For all lines in the file, if there's a command and the
X	     * pattern matches, execute the command. */
X	    for (j = 0; mapline[j]; j++)
X		if (p = ParsePattern(mapline[j], j))
X		    for (t = 0; hdrline[t]; t++)
X			if (re_exec(hdrline[t]) == 1) {
X			    DoCommand(hp, p, groups[i]);
X			    break;
X			}
X	    FreeFile(mapline);
X	}
X    }
X
X    /* Free dynamic space. */
X    for (i = 0; hdrline[i]; i++)
X	free(hdrline[i]);
X    SplitFree(&groups);
X}
X
X
X/*
X**  Signal-catcher and child-reapers.
X*/
X
X
X/*
X**  Exit such that sendmail will again later.
X*/
XSTATIC CATCHER
Xtempfail()
X{
X    exit(EX_TEMPFAIL);
X}
X
X
X/*
X**  Reap the inews child properly, and exit with his exit code, so that
X**  ultimate success or failure rests with inews.
X*/
XSTATIC CATCHER
Xchildgone()
X{
X    register int	pid;
X    WAITER		W;
X
X    if ((pid = wait(&W)) != ChildPid || pid == -1)
X	exit(EX_OSERR);
X    
X    /* Was it a good death? */
X    if (W_exsig(W)) {
X	Fprintf(stderr, "%s:  Child %d killed by signal %d.\n",
X		Pname, pid, W_exsig(W));
X	if (W_core(W))
X	    Fprintf(stderr, "%s:  Child %d dumped core.\n", Pname, pid);
X	exit(EX_SOFTWARE);
X    }
X
X#ifdef	SUBMIT
X    /* We need a way to tell temporary errors from permanent ones.  Inews
X     * will reject messages because of too little text, too much quoting,
X     * etc., and the message sites in the queue forever.  Until then we'll
X     * have to lose messages on any error. */
X    exit(0);
X#else
X    exit(W_excode(W));
X#endif	/* SUBMIT */
X}
X
X
X
X/*
X**  Convert the characters following dots to upper case, if they're
X**  lower case.  Two dots in a row will leave one dot in their place.
X**  Modifies the argument.
X*/
Xchar *
XHackPeriods(string)
X    char		*string;
X{
X    register char	*s;
X    register char	*p;
X
X    if (string) {
X	for (p = s = string; *p; *s++ = *p++)
X	    if (*p == '.') {
X		if (*++p == '\0') {
X		    *s++ = '.';
X		    break;
X		}
X		if (islower(*p))
X		    *p = toupper(*p);
X	    }
X	*s = '\0';
X    }
X    return(string);
X}
X
X
X
X
Xmain(ac, av)
X    register int	  ac;
X    register char	 *av[];
X{
X    register char	**vec;
X    register char	 *p;
X    register FILE	 *F;
X    register FILE	 *Infile;
X    HBUF		  H;
X    char		**iv;
X    char		  buff[BUFSIZ];
X    int			  fd[2];
X    int			  Checkit;
X
X    Pname = ((Pname = RDX(av[0], '/')) ? Pname + 1 : av[0]);
X    Infile = stdin;
X
X    /* So that cores will actually drop... */
X    if (chdir("/tmp") < 0) {
X	Fprintf(stderr, "%s:  Can't chdir(/tmp), %s.\n", Pname, Estring());
X	exit(EX_TEMPFAIL);
X    }
X
X    /* If someone wants to shut down the system, tell sendmail to
X     * try again later. */
X    Signal(SIGTERM, tempfail);
X
X#ifdef	SENDMAIL
X    /* First read should fetch us the UNIX From_ line.  Not done in MMDF. */
X    if (fgets(buff, sizeof buff, Infile) == NULL)
X	exit(EX_NOINPUT);
X
X    if (!EQn(buff, "From ", 5)) {
X	Fprintf(stderr, "%s:  Input didn't start with UNIX From line:\n",
X		Pname);
X	Fprintf(stderr,"\t%s.\n", buff);
X	exit(EX_DATAERR);
X    }
X#endif	/* SENDMAIL */
X
X    /* Read the mail header. */
X    rfc822read(&H, Infile, buff, sizeof buff);
X
X    /* Process the argument list, copying anything that we don't recognize
X     * over to the inews argument list and changing things as we see fit. */
X    Checkit = FALSE;
X    for (vec = iv = NEW(char*, ac+2), *vec++ = INEWS, *vec++ = "-h"; p = *++av; )
X	if (p[0] != '-')
X	    *vec++ = p;
X	 else
X	    switch(p[1]) {
X	    case 'x':
X		SubjReqd = FALSE;
X		/* FALLTHROUGH */
X	    default:
X		*vec++ = p;
X		break;
X	    case '.':
X#ifdef	DEBUG
X		Buggy++;
X#endif	/* DEBUG */
X		break;
X	    case 'n':
X		/* Newsgroup this messages goes to. */
X		Strcpy(H.nbuf, p[2] ? &p[2] : *++av);
X		break;
X	    case 'o':
X		/* Default organization. */
X		if (H.organization[0] == '\0')
X		    Strcpy(H.organization, HackPeriods(p[2] ? &p[2] : *++av));
X		else if (p[2] == '\0')
X		    av++;
X		SubjReqd = FALSE;
X		break;
X	    case 'F':
X		Checkit = TRUE;
X		break;
X	    }
X    *vec++ = NULL;
X
X    /* Bash on the mail header. */
X    if (p = HackHeader(&H, SubjReqd)) {
X	Fprintf(stderr, "%s:  Rejected by netnews because:\n", Pname);
X	Fprintf(stderr, "\t%s.\n", p);
X	if (H.nbuf[0])
X	    Fprintf(stderr, "\tIt was going into the newsgroup%s %s.\n",
X		    IDX(H.nbuf, NGDELIM) ? "s" : "", H.nbuf);
X	exit(EX_DATAERR);
X    }
X    Editnewsgroups(&H);
X
X#ifdef	DEBUG
X    if (Buggy) {
X	for (vec = iv; *vec; vec++)
X	    (void)printf(" |%s| ", *vec);
X	(void)printf("\n");
X	if (!rfc822write(&H, stdout))
X	    Fprintf(stderr, "%s:  Can't write header, %s.\n",
X		    Pname, Estring());
X	while (fgets(buff, sizeof buff, Infile))
X	    Fputs(buff, stdout);
X	exit(EX_OK);
X    }
X#endif	/* DEBUG */
X
X    if (Checkit && (Infile = IsSubRequest(Infile)) == NULL) {
X	Fprintf(stderr, "%s:  Rejected by netnews becase:\n", Pname);
X	Fprintf(stderr, "\tIt seems like a subscription request.\n");
X	exit(EX_DATAERR);
X    }
X
X    /* Get ready to spawn an inews. */
X    if (pipe(fd) < 0) {
X	Fprintf(stderr, "%s:  Can't pipe, %s.\n", Pname, Estring());
X	exit(EX_TEMPFAIL);
X    }
X    Fflush(stderr);
X    Fflush(stdout);
X#ifdef	SIGCHLD
X    Signal(SIGCHLD, childgone);
X#endif	/* SIGCHLD */
X#ifdef	SIGCLD
X    Signal(SIGCLD, childgone);
X#endif	/* SIGCLD */
X
X    if ((ChildPid = fork()) < 0) {
X	Fprintf(stderr,"%s:  Can't fork, %s.\n", Pname, Estring());
X	exit(EX_TEMPFAIL);
X    }
X    if (ChildPid == 0) {
X	/* Redirect I/O; it's unlikely the test below will fail. */
X	if (fd[READER] != STDIN) {
X	    Close(STDIN);
X	    if (dup(fd[READER]) != STDIN)
X		Fprintf(stderr, "%s:  Can't redirect input, %s.\n",
X			Pname, Estring());
X	}
X	Close(fd[READER]);
X	Close(fd[WRITER]);
X	(void)execv(iv[0], iv);
X	Fprintf(stderr, "%s:  Can't exec %s, %s.\n", Pname, iv[0], Estring());
X	exit(EX_OSERR);
X    }
X
X    /* Set things up after the fork. */
X    Close(fd[READER]);
X    Signal(SIGPIPE, childgone);
X    if ((F = fdopen(fd[WRITER], "w")) == NULL)
X	exit(EX_OSERR);
X
X    /* Stuff the header. */
X    if (!rfc822write(&H, F)) {
X	Fprintf(stderr, "%s:  Can't write header, %s.\n", Pname, Estring());
X	exit(EX_IOERR);
X    }
X
X    /* Write the rest of the message. */
X    while (fgets(buff, sizeof buff, Infile)) {
X	Fputs(buff, F);
X	if (ferror(F))
X	    break;
X    }
X
X    /* Close down the pipe. */
X    Fflush(F);
X    if (ferror(F)) {
X	Fprintf(stderr, "%s:  Error flushing pipe to news, %s.\n",
X		Pname, Estring());
X	exit(EX_IOERR);
X    }
X    if (fclose(F) == EOF)
X	Fprintf(stderr, "%s:  Error closing pipe to news, %s.\n",
X		Pname, Estring());
X
X    /* Wait for inews, and exit as it does. */
X    childgone();
X}
END_OF_FILE
if test 14007 -ne `wc -c <'mail2news.c'`; then
    echo shar: \"'mail2news.c'\" unpacked with wrong size!
fi
# end of 'mail2news.c'
fi
if test -f 'regex.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'regex.c'\"
else
echo shar: Extracting \"'regex.c'\" \(19006 characters\)
sed "s/^X//" >'regex.c' <<'END_OF_FILE'
X#ifndef	lint
Xstatic char RCS[] =
X	"$Header: regex.c,v 1.2 87/09/09 17:24:10 rsalz Release1 $";
X#endif	/* lint */
X/*
X * regex - Regular expression pattern matching
X *         and replacement
X *
X *
X * By:  Ozan S. Yigit (oz)
X *      Dept. of Computer Science
X *      York University
X *
X *
X * These routines are the PUBLIC DOMAIN equivalents 
X * of regex routines as found in 4.nBSD UN*X, with minor
X * extensions.
X *
X * These routines are derived from various implementations
X * found in software tools books, and Conroy's grep. They
X * are NOT derived from licensed/restricted software.
X * For more interesting/academic/complicated implementations,
X * see Henry Spencer's regexp routines, or GNU Emacs pattern
X * matching module.
X *
X * Routines:
X *      re_comp:        compile a regular expression into
X *                      a DFA.
X *
X *			char *re_comp(s)
X *			char *s;
X *
X *      re_exec:        execute the DFA to match a pattern.
X *
X *			int re_exec(s)
X *			char *s;
X *
X *	re_modw		change re_exec's understanding of what
X *			a "word" looks like (for \< and \>)
X *			by adding into the hidden word-character 
X *			table.
X *
X *			void re_modw(s)
X *			char *s;
X *
X *      re_subs:	substitute the matched portions in
X *              	a new string.
X *
X *			int re_subs(src, dst)
X *			char *src;
X *			char *dst;
X *
X *	re_fail:	failure routine for re_exec.
X *
X *			void re_fail(msg, op)
X *			char *msg;
X *			char op;
X *  
X * Regular Expressions:
X *
X *      [1]     char    matches itself, unless it is a special
X *                      character (metachar): . \ [ ] * + ^ $
X *
X *      [2]     .       matches any character.
X *
X *      [3]     \       matches the character following it, except
X *			when followed by a left or right round bracket,
X *			a digit 1 to 9 or a left or right angle bracket. 
X *			(see [7], [8] and [9])
X *			It is used as an escape character for all 
X *			other meta-characters, and itself. When used
X *			in a set ([4]), it is treated as an ordinary
X *			character.
X *
X *      [4]     [set]   matches one of the characters in the set.
X *                      If the first character in the set is "^",
X *                      it matches a character NOT in the set. A
X *                      shorthand S-E is used to specify a set of
X *                      characters S upto E, inclusive. The special
X *                      characters "]" and "-" have no special
X *                      meaning if they appear as the first chars
X *                      in the set.
X *                      examples:        match:
X *
X *                              [a-z]    any lowercase alpha
X *
X *                              [^]-]    any char except ] and -
X *
X *                              [^A-Z]   any char except uppercase
X *                                       alpha
X *
X *                              [a-zA-Z] any alpha
X *
X *      [5]     *       any regular expression form [1] to [4], followed by
X *                      closure char (*) matches zero or more matches of
X *                      that form.
X *
X *      [6]     +       same as [5], except it matches one or more.
X *
X *      [7]             a regular expression in the form [1] to [10], enclosed
X *                      as \(form\) matches what form matches. The enclosure
X *                      creates a set of tags, used for [8] and for
X *                      pattern substution. The tagged forms are numbered
X *			starting from 1.
X *
X *      [8]             a \ followed by a digit 1 to 9 matches whatever a
X *                      previously tagged regular expression ([7]) matched.
X *
X *	[9]	\<	a regular expression starting with a \< construct
X *		\>	and/or ending with a \> construct, restricts the
X *			pattern matching to the beginning of a word, and/or
X *			the end of a word. A word is defined to be a character
X *			string beginning and/or ending with the characters
X *			A-Z a-z 0-9 and _. It must also be preceded and/or
X *			followed by any character outside those mentioned.
X *
X *      [10]            a composite regular expression xy where x and y
X *                      are in the form [1] to [10] matches the longest
X *                      match of x followed by a match for y.
X *
X *      [11]	^	a regular expression starting with a ^ character
X *		$	and/or ending with a $ character, restricts the
X *                      pattern matching to the beginning of the line,
X *                      or the end of line. [anchors] Elsewhere in the
X *			pattern, ^ and $ are treated as ordinary characters.
X *
X *
X * Acknowledgements:
X *
X *	HCR's Hugh Redelmeier has been most helpful in various
X *	stages of development. He convinced me to include BOW
X *	and EOW constructs, originally invented by Rob Pike at
X *	the University of Toronto.
X *
X * References:
X *              Software tools			Kernighan & Plauger
X *              Software tools in Pascal        Kernighan & Plauger
X *              Grep [rsx-11 C dist]            David Conroy
X *		ed - text editor		Un*x Programmer's Manual
X *		Advanced editing on Un*x	B. W. Kernighan
X *		RegExp routines			Henry Spencer
X *
X * Notes:
X *
X *	This implementation uses a bit-set representation for character
X *	classes for speed and compactness. Each character is represented 
X *	by one bit in a 128-bit block. Thus, CCL or NCL always takes a 
X *	constant 16 bytes in the internal dfa, and re_exec does a single
X *	bit comparison to locate the character in the set.
X *
X * Examples:
X *
X *	pattern:	foo*.*
X *	compile:	CHR f CHR o CLO CHR o END CLO ANY END END
X *	matches:	fo foo fooo foobar fobar foxx ...
X *
X *	pattern:	fo[ob]a[rz]	
X *	compile:	CHR f CHR o CCL 2 o b CHR a CCL bitset END
X *	matches:	fobar fooar fobaz fooaz
X *
X *	pattern:	foo\\+
X *	compile:	CHR f CHR o CHR o CHR \ CLO CHR \ END END
X *	matches:	foo\ foo\\ foo\\\  ...
X *
X *	pattern:	\(foo\)[1-3]\1	(same as foo[1-3]foo)
X *	compile:	BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END
X *	matches:	foo1foo foo2foo foo3foo
X *
X *	pattern:	\(fo.*\)-\1
X *	compile:	BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END
X *	matches:	foo-foo fo-fo fob-fob foobar-foobar ...
X * 
X */
X
X#define MAXDFA  1024
X#define MAXTAG  10
X
X#define OKP     1
X#define NOP     0
X
X#define CHR     1
X#define ANY     2
X#define CCL     3
X#define NCL     4
X#define BOL     5
X#define EOL     6
X#define BOT     7
X#define EOT     8
X#define BOW	9
X#define EOW	10
X#define REF     11
X#define CLO     12
X
X#define END     0
X
X/*
X * The following defines are not meant
X * to be changeable. They are for readibility
X * only.
X *
X */
X#define MAXCHR	128
X#define CHRBIT	8
X#define BITBLK	MAXCHR/CHRBIT
X#define BLKIND	0170
X#define BITIND	07
X
X#define ASCIIB	0177
X
Xtypedef /*unsigned*/ char CHAR;
X
Xstatic int  tagstk[MAXTAG];             /* subpat tag stack..*/
Xstatic CHAR dfa[MAXDFA];		/* automaton..       */
Xstatic int  sta = NOP;               	/* status of lastpat */
X
Xstatic CHAR bittab[BITBLK];		/* bit table for CCL */
X
Xstatic void
Xchset(c) register CHAR c; { bittab[((c)&BLKIND)>>3] |= 1<<((c)&BITIND); }
X
X#define badpat(x)	return(*dfa = END, x)
X#define store(x)	*mp++ = x
X 
Xchar *     
Xre_comp(pat)
Xchar *pat;
X{
X	register char *p;               /* pattern pointer   */
X	register CHAR *mp=dfa;          /* dfa pointer       */
X	register CHAR *lp;              /* saved pointer..   */
X	register CHAR *sp=dfa;          /* another one..     */
X
X	register int tagi = 0;          /* tag stack index   */
X	register int tagc = 1;          /* actual tag count  */
X
X	register int n;
X	int c1, c2;
X		
X	if (!pat || !*pat)
X		if (sta)
X			return(0);
X		else
X			badpat("No previous regular expression");
X	sta = NOP;
X
X	for (p = pat; *p; p++) {
X		lp = mp;
X		switch(*p) {
X
X		case '.':               /* match any char..  */
X			store(ANY);
X			break;
X
X		case '^':               /* match beginning.. */
X			if (p == pat)
X				store(BOL);
X			else {
X				store(CHR);
X				store(*p);
X			}
X			break;
X
X		case '$':               /* match endofline.. */
X			if (!*(p+1))
X				store(EOL);
X			else {
X				store(CHR);
X				store(*p);
X			}
X			break;
X
X		case '[':               /* match char class..*/
X
X			if (*++p == '^') {
X				store(NCL);
X				p++;
X			}
X			else
X				store(CCL);
X
X			if (*p == '-')		/* real dash */
X				chset(*p++);
X			if (*p == ']')		/* real brac */
X				chset(*p++);
X			while (*p && *p != ']') {
X				if (*p == '-' && *(p+1) && *(p+1) != ']') {
X					p++;
X					c1 = *(p-2) + 1;
X					c2 = *p++;
X					while (c1 <= c2)
X						chset(c1++);
X				}
X#ifdef EXTEND
X				else if (*p == '\\' && *(p+1)) {
X					p++;
X					chset(*p++);
X				}
X#endif
X				else
X					chset(*p++);
X			}
X			if (!*p)
X				badpat("Missing ]");
X
X			for (n = 0; n < BITBLK; bittab[n++] = (char) 0)
X				store(bittab[n]);
X	
X			break;
X
X		case '*':               /* match 0 or more.. */
X		case '+':               /* match 1 or more.. */
X			if (p == pat)
X				badpat("Empty closure");
X			lp = sp;                /* previous opcode */
X			if (*lp == CLO)         /* equivalence..   */
X				break;
X			switch(*lp) {
X
X			case BOL:
X			case BOT:
X			case EOT:
X			case BOW:
X			case EOW:
X			case REF:
X				badpat("Illegal closure");
X			default:
X				break;
X			}
X
X			if (*p == '+')
X				for (sp = mp; lp < sp; lp++)
X					store(*lp);
X
X			store(END);
X			store(END);
X			sp = mp;
X			while (--mp > lp)
X				*mp = mp[-1];
X			store(CLO);
X			mp = sp;
X			break;
X
X		case '\\':              /* tags, backrefs .. */
X			switch(*++p) {
X
X			case '(':
X				if (tagc < MAXTAG) {
X					tagstk[++tagi] = tagc;
X					store(BOT);
X					store(tagc++);
X				}
X				else
X					badpat("Too many \\(\\) pairs");
X				break;
X			case ')':
X				if (*sp == BOT)
X					badpat("Null pattern inside \\(\\)");
X				if (tagi > 0) {
X					store(EOT);
X					store(tagstk[tagi--]);
X				}
X				else
X					badpat("Unmatched \\)");
X				break;
X			case '<':
X				store(BOW);
X				break;
X			case '>':
X				if (*sp == BOW)
X					badpat("Null pattern inside \\<\\>");
X				store(EOW);
X				break;
X			case '1':
X			case '2':
X			case '3':
X			case '4':
X			case '5':
X			case '6':
X			case '7':
X			case '8':
X			case '9':
X				n = *p-'0';
X				if (tagi > 0 && tagstk[tagi] == n)
X					badpat("Cyclical reference");
X				if (tagc > n) {
X					store(REF);
X					store(n);
X				}
X				else
X					badpat("Undetermined reference");
X				break;
X#ifdef EXTEND
X			case 'b':
X				store(CHR);
X				store('\b');
X				break;
X			case 'n':
X				store(CHR);
X				store('\n');
X				break;
X			case 'f':
X				store(CHR);
X				store('\f');
X				break;
X			case 'r':
X				store(CHR);
X				store('\r');
X				break;
X			case 't':
X				store(CHR);
X				store('\t');
X				break;
X#endif
X			default:
X				store(CHR);
X				store(*p);
X			}
X			break;
X
X		default :               /* an ordinary char  */
X			store(CHR);
X			store(*p);
X			break;
X		}
X		sp = lp;
X	}
X	if (tagi > 0)
X		badpat("Unmatched \\(");
X	store(END);
X	sta = OKP;
X	return(0);
X}
X
X
Xstatic char *bol;
Xstatic char *bopat[MAXTAG];
Xstatic char *eopat[MAXTAG];
Xchar *pmatch();
X
X/*
X * re_exec:
X * 	execute dfa to find a match.
X *
X *	special cases: (dfa[0])	
X *		BOL
X *			Match only once, starting from the
X *			beginning.
X *		CHR
X *			First locate the character without
X *			calling pmatch, and if found, call
X *			pmatch for the remaining string.
X *		END
X *			re_comp failed, poor luser did not
X *			check for it. Fail fast.
X *
X *	If a match is found, bopat[0] and eopat[0] are set
X *	to the beginning and the end of the matched fragment,
X *	respectively.
X *
X */
X
Xint
Xre_exec(lp)
Xregister char *lp;
X{
X	register char c;
X	register char *ep = 0;
X	register CHAR *ap = dfa;
X
X	bol = lp;
X
X	bopat[0] = 0;
X	bopat[1] = 0;
X	bopat[2] = 0;
X	bopat[3] = 0;
X	bopat[4] = 0;
X	bopat[5] = 0;
X	bopat[6] = 0;
X	bopat[7] = 0;
X	bopat[8] = 0;
X	bopat[9] = 0;
X
X	switch(*ap) {
X
X	case BOL:			/* anchored: match from BOL only */
X		ep = pmatch(lp,ap);
X		break;
X	case CHR:			/* ordinary char: locate it fast */
X		c = *(ap+1);
X		while (*lp && *lp != c)
X			lp++;
X		if (!*lp)		/* if EOS, fail, else fall thru. */
X			return(0);
X	default:			/* regular matching all the way. */
X		while (*lp) {
X			if ((ep = pmatch(lp,ap)))
X				break;
X			lp++;
X		}
X		break;
X	case END:			/* munged automaton. fail always */
X		return(0);
X	}
X	if (!ep)
X		return(0);
X
X	bopat[0] = lp;
X	eopat[0] = ep;
X	return(1);
X}
X
X/* 
X * pmatch: 
X *	internal routine for the hard part
X *
X * 	This code is mostly snarfed from an early
X * 	grep written by David Conroy. The backref and
X * 	tag stuff, and various other mods are by oZ.
X *
X *	special cases: (dfa[n], dfa[n+1])
X *		CLO ANY
X *			We KNOW ".*" will match ANYTHING
X *			upto the end of line. Thus, go to
X *			the end of line straight, without
X *			calling pmatch recursively. As in
X *			the other closure cases, the remaining
X *			pattern must be matched by moving
X *			backwards on the string recursively,
X *			to find a match for xy (x is ".*" and 
X *			y is the remaining pattern) where
X *			the match satisfies the LONGEST match
X *			for x followed by a match for y.
X *		CLO CHR
X *			We can again scan the string forward
X *			for the single char without recursion, 
X *			and at the point of failure, we execute 
X *			the remaining dfa recursively, as
X *			described above.
X *
X *	At the end of a successful match, bopat[n] and eopat[n]
X *	are set to the beginning and end of subpatterns matched
X *	by tagged expressions (n = 1 to 9).	
X *
X */
X
Xextern void re_fail();
X
X/*
X * character classification table for word boundary
X * operators BOW and EOW. the reason for not using 
X * ctype macros is that we can let the user add into 
X * our own table. see re_modw. This table is not in
X * the bitset form, since we may wish to extend it
X * in the future for other character classifications. 
X *
X *	TRUE for 0-9 A-Z a-z _
X */
Xstatic char chrtyp[MAXCHR] = {
X	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
X	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
X	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
X	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
X	0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 
X	1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 
X	0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 
X	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
X	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
X	1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 
X	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
X	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
X	1, 1, 1, 0, 0, 0, 0, 0
X	};
X
X#define inascii(x)	(0177&(x))
X#define iswordc(x) 	chrtyp[inascii(x)]
X#define isinset(x,y) 	((x)[((y)&BLKIND)>>3] & (1<<((y)&BITIND)))
X
X/*
X * skip values for CLO XXX to skip past the closure
X *
X */
X
X#define ANYSKIP	2 	/* CLO ANY END ...	   */
X#define CHRSKIP	3	/* CLO CHR chr END ...	   */
X#define CCLSKIP 18	/* CLO CCL 16bytes END ... */
X
Xstatic char *
Xpmatch(lp, ap)
Xregister char *lp;
Xregister CHAR *ap;
X{
X	register char *e;		/* extra pointer for CLO */
X	register char *bp;		/* beginning of subpat.. */
X	register char *ep;		/* ending of subpat..	 */
X	register int op, c, n;
X	char *are;			/* to save the line ptr. */
X
X	while ((op = *ap++) != END)
X		switch(op) {
X
X		case CHR:
X			if (*lp++ != *ap++)
X				return(0);
X			break;
X		case ANY:
X			if (!*lp++)
X				return(0);
X			break;
X		case CCL:
X			c = *lp++;
X			if (!isinset(ap,c))
X				return(0);
X			ap += BITBLK;
X			break;
X		case NCL:
X			c = *lp++;
X			if (isinset(ap,c))
X				return(0);
X			ap += BITBLK;
X			break;
X		case BOL:
X			if (lp != bol)
X				return(0);
X			break;
X		case EOL:
X			if (*lp)
X				return(0);
X			break;
X		case BOT:
X			bopat[*ap++] = lp;
X			break;
X		case EOT:
X			eopat[*ap++] = lp;
X			break;
X 		case BOW:
X			if (!(lp!=bol && iswordc(lp[-1])) && iswordc(*lp))
X				break;
X			return(0);
X		case EOW:
X			if ((lp!=bol && iswordc(lp[-1])) && !iswordc(*lp))
X				break;
X			return(0);
X		case REF:
X			n = *ap++;
X			bp = bopat[n];
X			ep = eopat[n];
X			while (bp < ep)
X				if (*bp++ != *lp++)
X					return(0);
X			break;
X		case CLO:
X			are = lp;
X			switch(*ap) {
X
X			case ANY:
X				while (*lp)
X					lp++;
X				n = ANYSKIP;
X				break;
X			case CHR:
X				c = *(ap+1);
X				while (*lp && c == *lp)
X					lp++;
X				n = CHRSKIP;
X				break;
X			case CCL:
X			case NCL:
X				while (*lp && (e = pmatch(lp, ap)))
X					lp = e;
X				n = CCLSKIP;
X				break;
X			default:
X				re_fail("closure: bad dfa.", *ap);
X				return(0);
X			}
X
X			ap += n;
X
X			while (lp >= are) {
X				if (e = pmatch(lp, ap))
X					return(e);
X				--lp;
X			}
X			return(0);
X		default:
X			re_fail("re_exec: bad dfa.", op);
X			return(0);
X		}
X	return(lp);
X}
X
X/*
X * re_modw:
X *	add new characters into the word table to
X *	change the re_exec's understanding of what
X *	a word should look like. Note that we only
X *	accept additions into the word definition.
X *
X *	If the string parameter is 0 or null string,
X *	the table is reset back to the default, which
X *	contains A-Z a-z 0-9 _. [We use the compact
X *	bitset representation for the default table]
X *
X */
X
Xstatic char deftab[16] = {	
X	0, 0, 0, 0, 0, 0, 377, 003, 376, 377, 377, 207,  
X	376, 377, 377, 007 
X}; 
X
Xvoid
Xre_modw(s)
Xregister char *s;
X{
X	register int i;
X
X	if (!s || !*s) {
X		for (i = 0; i < MAXCHR; i++)
X			if (!isinset(deftab,i))
X				iswordc(i) = 0;
X	}
X	else
X		while(*s)
X			iswordc(*s++) = 1;
X}
X
X/*
X * re_subs:
X *	substitute the matched portions of the src in
X *	dst.
X *
X *	&	substitute the entire matched pattern.
X *
X *	\digit	substitute a subpattern, with the given
X *		tag number. Tags are numbered from 1 to
X *		9. If the particular tagged subpattern
X *		does not exist, null is substituted.
X *
X */
Xint
Xre_subs(src, dst)
Xregister char *src;
Xregister char *dst;
X{
X	register char c;
X	register int  pin;
X	register char *bp;
X	register char *ep;
X
X	if (!*src || !bopat[0])
X		return(0);
X
X	while (c = *src++) {
X		switch(c) {
X
X		case '&':
X			pin = 0;
X			break;
X
X		case '\\':
X			c = *src++;
X			if (c >= '0' && c <= '9') {
X				pin = c - '0';
X				break;
X			}
X			
X		default:
X			*dst++ = c;
X			continue;
X		}
X
X		if ((bp = bopat[pin]) && (ep = eopat[pin])) {
X			while (*bp && bp < ep)
X				*dst++ = *bp++;
X			if (bp < ep)
X				return(0);
X		}
X	}
X	*dst = (char) 0;
X	return(1);
X}
X			
X#ifdef DEBUG
X/*
X * symbolic - produce a symbolic dump of the
X *            dfa
X */
Xsymbolic(s) 
Xchar *s;
X{
X	(void)printf("pattern: %s\n", s);
X	(void)printf("dfacode:\n");
X	dfadump(dfa);
X}
X
Xstatic	
Xdfadump(ap)
XCHAR *ap;
X{
X	register int n;
X
X	while (*ap != END)
X		switch(*ap++) {
X		case CLO:
X			(void)printf("CLOSURE");
X			dfadump(ap);
X			switch(*ap) {
X			case CHR:
X				n = CHRSKIP;
X				break;
X			case ANY:
X				n = ANYSKIP;
X				break;
X			case CCL:
X			case NCL:
X				n = CCLSKIP;
X				break;
X			}
X			ap += n;
X			break;
X		case CHR:
X			(void)printf("\tCHR %c\n",*ap++);
X			break;
X		case ANY:
X			(void)printf("\tANY .\n");
X			break;
X		case BOL:
X			(void)printf("\tBOL -\n");
X			break;
X		case EOL:
X			(void)printf("\tEOL -\n");
X			break;
X		case BOT:
X			(void)printf("BOT: %d\n",*ap++);
X			break;
X		case EOT:
X			(void)printf("EOT: %d\n",*ap++);
X			break;
X		case BOW:
X			(void)printf("BOW\n");
X			break;
X		case EOW:
X			(void)printf("EOW\n");
X			break;
X		case REF:
X			(void)printf("REF: %d\n",*ap++);
X			break;
X		case CCL:
X			(void)printf("\tCCL [");
X			for (n = 0; n < MAXCHR; n++)
X				if (isinset(ap,(CHAR)n))
X					(void)printf("%c",n);
X			(void)printf("]\n");
X			ap += BITBLK;
X			break;
X		case NCL:
X			(void)printf("\tNCL [");
X			for (n = 0; n < MAXCHR; n++)
X				if (isinset(ap,(CHAR)n))
X					(void)printf("%c",n);
X			(void)printf("]\n");
X			ap += BITBLK;
X			break;
X		default:
X			(void)printf("bad dfa. opcode %o\n", ap[-1]);
X			exit(1);
X			break;
X		}
X}
X#endif
END_OF_FILE
if test 19006 -ne `wc -c <'regex.c'`; then
    echo shar: \"'regex.c'\" unpacked with wrong size!
fi
# end of 'regex.c'
fi
echo shar: End of archive 3 \(of 3\).
cp /dev/null ark3isdone
MISSING=""
for I in 1 2 3 ; do
    if test ! -f ark${I}isdone ; then
	MISSING="${MISSING} ${I}"
    fi
done
if test "${MISSING}" = "" ; then
    echo You have unpacked all 3 archives.
    echo "Have fun; bug reports to rsalz at bbn.com"
    rm -f ark[1-9]isdone
else
    echo You still need to unpack the following archives:
    echo "        " ${MISSING}
fi
##  End of shell archive.
exit 0
-- 
Please send comp.sources.unix-related mail to rsalz at uunet.uu.net.



More information about the Alt.sources mailing list