Better pattern matching for `rh'

Rob McMahon cudcv at warwick.ac.uk
Thu May 4 03:35:22 AEST 1989


The author of `rh' himself admitted that the choice of "*name" or "name*" in
rh's pattern matching was inadequate.  It seems more natural to have the
shell-style `?', `*', and `[' metacharacters here than full regular
expressions.  Maybe there should be an alternative "/re/" since only the last
component is checked, so that there can be no slashes in the pattern.

Since the pattern matching code from BSD's ftp is publicly distributable, I
glued it in, after hopefully making it 8-bit clean.  Here are my diffs, this
allows patterns such as "a?[b-y]\*\"z*".  This will need `-Drindex=strrchr'
for systems that use `strrchr' instead of `rindex'.  Note that it doesn't
ignore files that start with `.', I'm not sure if this is a feature.

RCS file: rh.man,v
retrieving revision 1.1
diff -c -r1.1 rh.man
*** /tmp/,RCSt1a01638	Wed May  3 18:30:00 1989
--- rh.man	Wed May  3 17:13:28 1989
***************
*** 176,187 ****
  This operater evaluates to the integer uid of
  .I username.
  .PP
! .IP """*.c"""
  This operator evaluates to true if the current filename matches
! "expression". The only form of 
  .I expression
! supported is, "filename", "*filename", "filename*", "*".
! When doing comparisons, only the base name is examined, not
  pathnames.
  .PP
  .IP [yyyy/mm/dd]
--- 176,187 ----
  This operater evaluates to the integer uid of
  .I username.
  .PP
! .IP """\fIexpression\fR"""
  This operator evaluates to true if the current filename matches
! "expression".
  .I expression
! may contain the normal `*', `?', and `[' shell metacharacters, which can be
! quoted with `\e'.  When doing comparisons, only the base name is examined, not
  pathnames.
  .PP
  .IP [yyyy/mm/dd]
===================================================================
RCS file: rhcmds.c,v
retrieving revision 1.1
diff -c -r1.1 rhcmds.c
*** /tmp/,RCSt1a01638	Wed May  3 18:30:04 1989
--- rhcmds.c	Wed May  3 18:29:13 1989
***************
*** 10,15 ****
--- 10,33 ----
   *
   * ---------------------------------------------------------------------- */
  
+ /* Gmatch is taken from the Berkeley ftp code, and is protected thus: */
+ /*
+  * Copyright (c) 1980 Regents of the University of California.
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms are permitted
+  * provided that the above copyright notice and this paragraph are
+  * duplicated in all such forms and that any documentation,
+  * advertising materials, and other materials related to such
+  * distribution and use acknowledge that the software was developed
+  * by the University of California, Berkeley.  The name of the
+  * University may not be used to endorse or promote products derived
+  * from this software without specific prior written permission.
+  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+  */
+ 
  #include "rh.h"
  #include <sys/types.h>
  #include <sys/stat.h>
***************
*** 75,108 ****
   *
   */
  
  c_star(i)
! long i;
  {
  
! 	register int ri,ii;
! 	
! 	if( Starbuf[i]=='*') {
! 		ii=strlen(fname)-1;
! 		ri=strlen(Starbuf+i)-1+i;
! 		while( fname[ii]==Starbuf[ri] && ri>i ) {
! 			ri--; ii--;
  		}
- 		Stack[SP++] = (ri==i);
  	}
- 	else {
- 		int x=0;
- 		ii=0;
- 		while( fname[x] ) {
- 			if(fname[x]=='/') ii=x;
- 			x++;
- 		}
- 		ii++;
- 		ri=i;
- 		while( fname[ii]==Starbuf[ri] && Starbuf[ri]!='*' 
- 			&& fname[ii] && Starbuf[ri]) {
- 			ri++; ii++;
- 		}
- 		Stack[SP++]=!(fname[ii]+Starbuf[ri]) || Starbuf[ri]=='*';
- 	}
  }
- 
--- 93,172 ----
   *
   */
  
+ static int Gmatch();
+ 
  c_star(i)
! 	long i;
  {
+ 	char *p;
+ 	extern char *rindex();
+ 	if ((p = rindex(fname, '/')) != 0)
+ 		p++;
+ 	else
+ 		p = fname;
+ 	Stack[SP++] = Gmatch(p, &Starbuf[i]);
+ }
  
! static
! Gmatch(s, p)
! 	register char *s, *p;
! {
! 	register int scc;
! 	int ok, lc;
! 	int c, cc;
! 
! 	for (;;) {
! 		scc = *s++;
! 		switch (c = *p++) {
! 
! 		case '[':
! 			ok = 0;
! 			lc = 077777;
! 			while (cc = *p++) {
! 				if (cc == ']') {
! 					if (ok)
! 						break;
! 					return (0);
! 				}
! 				if (cc == '-') {
! 					if (lc <= scc && scc <= *p++)
! 						ok++;
! 				} else
! 					if (scc == (lc = cc))
! 						ok++;
! 			}
! 			if (cc == 0)
! 				if (ok)
! 					p--;
! 				else
! 					return 0;
! 			continue;
! 
! 		case '*':
! 			if (!*p)
! 				return (1);
! 			for (s--; *s; s++)
! 				if (Gmatch(s, p))
! 					return (1);
! 			return (0);
! 
! 		case 0:
! 			return (scc == 0);
! 
! 		default:
! 			if (c != scc)
! 				return (0);
! 			continue;
! 
! 		case '?':
! 			if (scc == 0)
! 				return (0);
! 			continue;
! 
! 		case '\\':
! 			if (*p++ != scc)
! 				return (0);
! 			continue;
  		}
  	}
  }
===================================================================
RCS file: rhparse.c,v
retrieving revision 1.1
diff -c -r1.1 rhparse.c
*** /tmp/,RCSt1a01638	Wed May  3 18:30:09 1989
--- rhparse.c	Wed May  3 17:13:54 1989
***************
*** 459,471 ****
  	}
  
  	if( c == '"' ) {
! 		int index,st=0;
  		index=starfree;
! 		while( (c=getit())!= '"' ) {
  			if( starfree > STARLEN )
  				error("no more string space");
- 			if(c=='*') st++;
- 			if(st>1) error("too many *'s present");
  			Starbuf[starfree++]=c;
  		}
  		Starbuf[starfree++]='\0';
--- 459,470 ----
  	}
  
  	if( c == '"' ) {
! 		int index,st=0,quoted=0;
  		index=starfree;
! 		while( (c=getit())!= '"' || quoted ) {
! 			quoted = (c == '\\' && !quoted);
  			if( starfree > STARLEN )
  				error("no more string space");
  			Starbuf[starfree++]=c;
  		}
  		Starbuf[starfree++]='\0';

Rob
-- 
UUCP:   ...!mcvax!ukc!warwick!cudcv	PHONE:  +44 203 523037
JANET:  cudcv at uk.ac.warwick             ARPA:   cudcv at warwick.ac.uk
Rob McMahon, Computing Services, Warwick University, Coventry CV4 7AL, England



More information about the Comp.sources.bugs mailing list