slice utility (improved mailsplit)

Gary Puckering garyp at cognos.UUCP
Wed Dec 17 08:07:31 AEST 1986


Slice splits up a file into lots of little files.  It reads its input a
line at a time, and starts a new output file when

*	the input line matches a pattern, or
*	there have been n lines written to the current output file.

You can use it to split a mailbox or an archive of news articles into
one article per file, for example.  In fact, you can do this with about
5 lines of awk, but you run into problems with long lines (and speed,
if it bothers you!).

Slice was originally contributed by Russell Quinn as the program
"mailsplit".  Unlike mailsplit, however, slice allows multiple output
formats to be specified (rather than multiple input files).  This makes
it possible to deposit the pieces (slices!) into files named whatever
your want.  For example:

     slice <article -x '^--* [Cc]ut' README article.sh

will deposit everything up to the cut line into README and everything
after it into article.sh (the -x option causes the matched line to be
excluded).

There are even options to make slicing mailboxes and files containing
shell scripts easier (-m and -s).

There are some good examples in the man page.

Source, Makefile and manual entry enclosed.  To install, do the
following:

1:	Edit the Makefile: you'll need to alter the "R=/usr/local" if 
	you don't want slice to live in /usr/local/usr/bin.

2:	make slice

3:	have a play with it & satisfy yourself that it behaves reasonably

4:	make install

Make "install" will do a "$(MAKE) $(CLEAN)" afterwards.  If you don't 
want to remove the binary, say

        CLEAN="" make install

at step 4.


--------------------- cut here ----------------------------------------
#!/bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #!/bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	Makefile
#	opts.h
#	slice.c
# This archive created: Tue Dec 16 17:04:43 1986
export PATH; PATH=/bin:$PATH
echo shar: extracting "'Makefile'" '(1221 characters)'
if test -f 'Makefile'
then
	echo shar: over-writing existing file "'Makefile'"
fi
sed 's/^X//' << \SHAR_EOF > 'Makefile'
X# Makefile for slice
X#
X# Originally contributed at mailsplit, written by:
X#   R E Quin, October 1986 University of Warwick (UK) Computer Science
X#   warwick!req     +44 203 523193
X#
X# Modified and recontributed by:
X#   Gary Puckering        3755 Riverside Dr.
X#   Cognos Incorporated   Ottawa, Ontario
X#   (613) 738-1440        CANADA  K1G 3N3
X#
X# This makefile is intended for the sys5 Augmented make.
X# 
XMAKE=make 
XCLEAN=clean 
XCC=cc 
XHACKS= 
XCFLAGS=-O $(HACKS)
X# R is the root of the filesystem -- i.e. where to install things.
X# The binaries are installed in $R/$(DESTDIR).
XR=/usr/local
XDESTDIR=$R/usr/bin 
XMANDIR=$R/usr/man/man1
XPROG=slice 
X
X# PROG is what to make; DESTDIR is where to put it.
X# HACKS are for -DBUGFIX style things.
X
X# R is intended to be the root of the filesystem if it isn't "/"
X
X# "make install " does a $(MAKE) $(CLEAN) at the end, so you can say
X# CLEAN=  make -e install
X# if you don't want to remove the garbage at the end, for example.
X# This is useful primarily for testing the install: entry!
X
Xall: $(PROG)
X 
Xslice: opts.h slice.o
X	$(CC) -o $(PROG) slice.o
X 
Xinstall: slice
X	/bin/mv $(PROG) $(DESTDIR)
X	/bin/cp slice.1 $(MANDIR)
X	$(MAKE) $(CLEAN)
X 
Xclean: 
X	rm -rf core *.o $(PROG) a.out
SHAR_EOF
if test 1221 -ne "`wc -c 'Makefile'`"
then
	echo shar: error transmitting "'Makefile'" '(should have been 1221 characters)'
fi
echo shar: extracting "'opts.h'" '(769 characters)'
if test -f 'opts.h'
then
	echo shar: over-writing existing file "'opts.h'"
fi
sed 's/^X//' << \SHAR_EOF > 'opts.h'
X
X#define FALSE 0
X#define TRUE 1
Xtypedef int bool;
X
X#define EXIT_SYNTAX 1	/* syntax error parsing commandline options */
X#define EXIT_SEMANT 2	/* options are correct but meaningless */
X#define EXIT_RUNERR 3	/* error opening a file, for example */
X#define EXIT_INTERN 4	/* internal error -- bug!! */
X
X#define nextstr(s,count,array,failure)	\
X	{if (((count)<2) && !((array)[0][1])) {failure;}\
X	else {if ((array)[0][1]) { s = &((array)[0][1]); } \
X	      else {s = array[1]; --count; array++;}}}
X
X#define DFLTNAME "slice"	/* input filename (for stdin) */
X#define BUFLEN BUFSIZ	/* the maximum length of an input line (incl. "\n\0") */
X#define MAXFILENAMELEN BUFSIZ	/* longer than the longest possible file name */
X#define DFLTOUTNAME	"%s:%03.d"	/* o/p file name format */
X
SHAR_EOF
if test 769 -ne "`wc -c 'opts.h'`"
then
	echo shar: error transmitting "'opts.h'" '(should have been 769 characters)'
fi
echo shar: extracting "'slice.c'" '(8964 characters)'
if test -f 'slice.c'
then
	echo shar: over-writing existing file "'slice.c'"
fi
sed 's/^X//' << \SHAR_EOF > 'slice.c'
X/* slice -- split files at lines that match a pattern */
X#include <stdio.h>
X#include <ctype.h>
X
X#include "opts.h"				/* defines nextstr() etc */
X
Xchar *progname = "slice";		/* for error messages */
Xchar *pattern = (char *) NULL;	/* reg expr used to split file */
Xchar **format;					/* ptr for format strings */
Xint  n_format;					/* number of format strings */
Xchar *defaultfmt[] = {DFLTOUTNAME};	/* default format string */
Xint  filenumber = 0;
Xint  every_n_lines = 0;			/* split every n lines */
Xbool exclude = FALSE;			/* exclude matched line from o/p files */
Xbool split_after = FALSE;		/* split after matched line */
X
Xusage(status)
X     int status;	/* exit if status != 0 */
X{
X     fprintf(stderr,"Usage: %s [-f filename] [-a] [-x] [-i<n>] [-m|-s|-n<n>] [-e expression | expression] [format...]\n", progname);
X     if (status)
X	  exit(status);
X}
X
Xmain(argc, argv)
X     char *argv[];
X{
X     /* split files at points that match a given pattern */
X     /* initialise things */
X     bool donefiles = FALSE;
X     char *buffer;
X	 char *infile = (char *) NULL;
X
X     int getnum();		/* does more checking than atoi */
X     char *rmpath();    /* removes leading pathname from a filename */
X
X     /* now remove possible leading pathname
X      * (e.g. /usr/bin/slice is to report it's errors as slice
X      */
X     progname = rmpath(argv[0]);
X
X
X	while (--argc) {
X	  if (**++argv == '-') {
X		switch(*++*argv) {
X			case 'a': {				/* split after pattern */
X				split_after = TRUE;
X				break;
X			}
X			case 'e': {				/* pattern (expression) */
X				++argv; argc--;
X				if (argc==0 || !**argv) {
X					error("Pattern after -e missing or null\n");
X					usage(1);
X				}
X				pattern = *argv;
X				break;
X			}
X			case 'm': {				/* mailbox pattern */
X				pattern = "^From ";
X				break; 
X			}
X			case 's': {				/* shell pattern */
X				pattern = "^#! *\/bin\/sh";
X				break; 
X			}
X			case 'n': {				/* -n n_lines -- split every n lines */
X				nextstr(buffer,argc,argv,usage(2));
X				every_n_lines = getnum(buffer);
X				if (every_n_lines <= 0) {
X					error("-n: number must be at least 1\n");
X					exit(EXIT_SYNTAX);
X				}
X				break;
X			} 
X			case 'f': {
X				++argv; argc--;
X				if (argc==0 || !**argv) {
X					error("Filename after -f missing or null\n");
X					usage(1);
X				}
X				infile = *argv;
X				break;
X			}				
X		    case 'i': {	/* -i initial_number */
X				nextstr(buffer,argc,argv,usage(2));
X				filenumber = getnum(buffer);
X				if (filenumber < 0) {
X			    	error("-i must be followed by a positive number\n");
X				    exit(EXIT_SYNTAX);
X				 }
X				filenumber--;	/* needs to be one less to start with */
X				break;
X		    }
X			case 'x': { /* exclude matched lines */
X				exclude = TRUE;
X				break;
X			}
X		    default: {
X				error("Unknown flag -%c\n", **argv);
X				usage(1);
X		    }
X		}			/* end switch */
X	  } else {	
X		if (!pattern) pattern = *argv;	/* first non-flag is pattern */
X		else break;						/* break while loop */
X	  }			/* end if */
X     }		/* end while */
X
X	 if (!argc) {
X		format = defaultfmt;
X		n_format = 1; }
X	 else {
X		format = argv;
X		n_format = argc;
X	 }
X
X#ifdef DEBUG
X	printf("argc=%d\n",argc);
X	printf("format='%s'\n",*format);
X	printf("pattern='%s'\n",pattern);
X#endif
X
X	 if (!infile) split(stdin, DFLTNAME, pattern);
X	 else        fsplit(infile, pattern);
X
X     exit(0);
X}
X
Xfsplit(name, pat)
X     char *name;
X     char *pat;
X{
X     FILE *fd;
X
X     if (!name || !*name) {
X	  error("Can't split a file with an empty name\n");
X	  usage(2);
X     }
X
X     if ( (fd = fopen(name, "r")) == NULL) {
X	  error("Can't open %s\n", name);
X	  return;
X     }
X
X     (void) split(fd, name, pat);
X
X     if (fclose(fd) == EOF) {	/* something's gone wrong */
X	  error("Can't close %s -- giving up\n", name);
X	  exit(EXIT_RUNERR);
X     }
X}
X
Xchar buffer[BUFLEN];
X
Xint
Xsplit(input, name, pattern)
X     FILE *input;		/* fd of input file */
X     char *name;		/* input filename */
X     char *pattern;		/* pattern used to split file */
X{
X     /* do the real work here. Oh dear, I don't know how... */
X     /* we are always called with an open file. */
X
X     extern char *re_comp();     /* compile string into automaton */
X     extern int   re_exec();     /* try to match string */
X#define REMATCH 1
X#define RENOMATCH 0
X#define REFAULT -1
X
X     char *errmessage;
X     FILE *output = NULL;
X     char fnambuf[MAXFILENAMELEN + 2];  /* +1 for null, +1 for overflow */
X     int reg_status = 0;				/* regular expression status */
X     int line = 0;
X
X	 if (split_after && exclude) {
X	  error("Can't specify both -a and -x\n");
X	  usage(2);
X	 }
X
X	 if (every_n_lines && exclude) {
X	  error("Can't specify both -n and -x\n");
X	  usage(2);
X	 }
X
X	 if (every_n_lines && split_after) {
X	  error("Can't specify both -n and -a\n");
X	  usage(2);
X	 }
X
X	 if (every_n_lines && pattern) {
X	  error("Can't specify both -n and pattern\n");
X	  usage(2);
X	 }
X
X     if (!every_n_lines && (!pattern || !*pattern)) {
X	  error("Can't match an empty pattern\n");
X	  usage(2);
X     }
X
X     if (!every_n_lines && (errmessage = re_comp(pattern)) != NULL) {
X	  error("Error in pattern <%s>: %s\n", pattern, errmessage);
X	  exit(EXIT_RUNERR);
X     }
X     /* errmessage is NULL here */
X
X     /* the -2 to fgets is because of the null and \n appended */
X     while (fgets(buffer, BUFLEN - 2, input) != NULL) {
X	  if (!output ||	/* first line */
X	     (every_n_lines > 0 && (++line == every_n_lines)) || /* nth line */
X	     (!every_n_lines &&
X	     ((reg_status = re_exec(buffer)) == REMATCH)) ) { /* matches pat */
X	       /* don't look at 1st line of file, to avoid an infinite */
X	       /* recursion... */
X
X			if (output && split_after) {
X				fputs(buffer, output);
X			}
X
X			if (n_format && mkname(fnambuf, name)) {;
X				/* check for output file = input file */
X				if (strcmp(fnambuf,name)==0) {
X					error("Output file same as input file\n");
X					exit(EXIT_RUNERR);
X				}
X				/* start a new file */
X				if (output && output != stdout) {
X					if (fclose(output) == EOF) {
X						error("Can't close output file\n");
X						exit(EXIT_RUNERR);
X					}
X					output = NULL;
X				}
X				line = 0;
X				if (fnambuf[0]=='+' && fnambuf[1]==NULL) {
X					output = stdout;
X				} else {
X					if ((output = fopen(fnambuf, "a")) == NULL) {
X						error("Can't open output file %s\n", fnambuf);
X						exit(EXIT_RUNERR);
X					}
X				}
X				/* if matched lines are excluded, skip the fputs */
X				if (exclude && reg_status == REMATCH) continue;
X
X				/* if file is to be split after pattern, put already done */
X				if (split_after && reg_status == REMATCH) continue;
X			} else {
X				error("Insufficient formats -- last file contains remainder\n");
X				}
X	  } else if (reg_status == REFAULT) {
X	       /* the re_exec failed */
X	       error("Internal error trying to match <%s> to <%s>\n",
X			      pattern, buffer);
X	       exit(EXIT_INTERN);
X	  }
X	  fputs(buffer, output);
X      }
X      return (filenumber == -1);	/* exit status for main */
X}
X
Xbool
Xmkname(fnambuf, name)
X	 char *fnambuf;
X	 char *name;
X{
X     int i, s = -1, d = -1;
X	 static bool new_format = TRUE;
X	 static bool perpetual = FALSE;
X	 static bool d_before_s = FALSE;
X
X	 if (new_format) {
X		 if (!n_format) {
X			error("Internal error: mkname called but formats have run out\n");
X			exit(EXIT_INTERN);
X		 }
X	     i = bfsearch(*format, "%",0);
X	     s = bfsearch(*format, "%s",0);
X	     if (i>=0 && i==s) d = bfsearch(*format, "%",++i);
X	     else 			   d = i;
X		 if (d<0) perpetual = FALSE;
X		 else     perpetual = TRUE;
X		 if (d<s || s<0) d_before_s = TRUE;
X		 else            d_before_s = FALSE;
X		 new_format = FALSE;
X	 }
X
X	 if (perpetual) ++filenumber;
X
X     if (d_before_s)
X          sprintf(fnambuf, *format, filenumber, rmpath(name));
X     else 
X          sprintf(fnambuf, *format, rmpath(name), filenumber);
X      
X	 if (!perpetual) {
X		new_format = TRUE;
X		--n_format;
X		if (n_format) {
X			++format; 
X			filenumber=0;
X		}
X	}
X}
X
Xerror(fmt, a1, a2, a3, a4)
X     char *fmt;
X{
X     fputs(progname, stderr);
X     fputs(": ", stderr);
X     fprintf(stderr, fmt, a1, a2, a3, a4);
X}
X
X/* getnum(s) returns the value of the unsigned int in s.  If there's any
X * trailing garbage, or the number isn't +ve, we return -1
X */
Xint
Xgetnum(s)
X     char *s;
X{
X     register char *p;
X
X     for (p = s; *p; p++) {
X	  if (!isdigit(*p)) {
X	       return -1;
X	  }
X     }
X     return atoi(s);
X}
X
X
X/* Remove the leading pathname from a filename */
X
Xchar *
Xrmpath(fullname)
X    char *fullname;
X{
X    register char *p;
X    char *q = (char *) NULL;
X
X    for (p = fullname; p && *p; p++) {
X         if (*p == '/')
X  	    q = ++p;
X    }
X    if (q && *q) {
X         return(q);
X    }
X    return(fullname);
X}
X
X
X/* Find substring within string */
X/* Brute force algorithm */
X
Xint 
Xbfsearch(string,key,start)
X
X  char  string[],
X	key[];
X  int   start;
X{
X	int i=start,j=0;
X
X	if (string[0]==NULL || key[0]==NULL) return(-1);
X
X	do {
X	  if (string[i] == key[j])
X	    {i++; j++;}
X	  else
X	    {i=i-j+1; j=0;};
X	}
X	while (string[i]!=NULL && key[j]!=NULL);
X
X	if (key[j]==NULL) return(i-j);
X	  else return(-1);
X}
X
SHAR_EOF
if test 8964 -ne "`wc -c 'slice.c'`"
then
	echo shar: error transmitting "'slice.c'" '(should have been 8964 characters)'
fi
#	End of shell archive
exit 0
-- 
Gary Puckering        3755 Riverside Dr.
Cognos Incorporated   Ottawa, Ontario
(613) 738-1440        CANADA  K1G 3N3



More information about the Comp.sources.unix mailing list