Unix World contest (find the bugs)
Col. G. L. Sicherman
gls at corona.ATT.COM
Thu Mar 14 07:44:17 AEST 1991
A few months ago _Unix World_ ran a contest to see who could write
the nicest program for stripping C++ comments. A C++ comment is
/* a C
comment, */
or
// everything from double-slash to end of line,
except that the key tokens don't count when they are part of
a string (or char) literal "/*".
Recently the three award winners were published: a C program,
a lex program, and a sh program. The columnist challenged
readers to find bugs in them. As it happens, all three programs,
simple though they are, have bugs!
Challenge yourself -- for each of the following three programs,
devise a source text (preferably a valid C program) on which it
fails to strip comments properly. (If you post your solutions,
please put "SPOILER" in the Subject: line. I'll post answers
in a couple of weeks -- if necessary!)
The C program:
#include <stdio.h>
char *sccsID="@(#) cstrip.c 1.1 Bart J. Besseling, 8/90";
int m[9][8] = { /* finite-state machine */
/* events:
/ * " ' \ \n sp ch states: */
{ 0x01,0x80,0x85,0x87,0x80,0x80,0x80,0x80 }, /* 0: hunt */
{ 0x02,0x33,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0 }, /* 1: maybe */
{ 0x02,0x02,0x02,0x02,0x02,0x80,0x02,0x02 }, /* 2: c++ */
{ 0x13,0x14,0x13,0x13,0x13,0x83,0x83,0x13 }, /* 3: c */
{ 0x10,0x13,0x13,0x13,0x13,0x83,0x83,0x13 }, /* 4: end c */
{ 0x85,0x85,0x80,0x85,0x86,0x80,0x85,0x85 }, /* 5: string */
{ 0x85,0x85,0x85,0x85,0x85,0x85,0x85,0x85 }, /* 6: \ in str */
{ 0x87,0x87,0x87,0x80,0x88,0x80,0x87,0x87 }, /* 7: char */
{ 0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87 }, /* 8: \ in char */
};
int
main() /* Input parser and output generator */
{
register int ch, event, state;
for (state = 0; (ch = getchar()) != EOF;) {
/* translate character into event */
switch (ch) {
case '/': event = 0; break;
case '*': event = 1; break;
case '"': event = 2; break;
case '\'': event = 3; break;
case '\\': event = 4; break;
case '\n': event = 5; break;
case '\t':
case ' ': event = 6; break;
default: event = 7; break;
}
/* obtain next state and operation from machine */
state = m[state & 0x0f][event];
/* perform operation */
if (state & 0x10) putchar(' ');
if (state & 0x20) putchar(' ');
if (state & 0x40) putchar('/');
if (state & 0x80) putchar(ch);
}
return 0;
}
The lex program:
%Start CODE CCOM STRING CHAR CPLUS
%%
%{
char *sccsID = "@(#) sc 1.0 Andre van Dalen, 6/90";
BEGIN CODE;
%}
<STRING>([^\\]\")|(\\\\\") |
<CHAR>([^.\\]\')|(\\\\\') |
<CPLUS>\n { ECHO; BEGIN CODE; }
<CCOM>"*/" { two_space(); BEGIN CODE; }
<CCOM,CPLUS>. { output(*yytext=='\t'?'\t':' ');}
<CODE>"/*" { two_space(); BEGIN CCOM ; }
<CODE>"//" { two_space(); BEGIN CPLUS ;}
<CODE>\" { ECHO; BEGIN STRING; }
<CODE>\' { ECHO; BEGIN CHAR; }
<STRING,CODE>. { ECHO; }
%%
two_space()
{
output(' '); output(' ');
}
main(argc, argv)
int argc; char **argv;
{
if (argc==1) yylex();
else while (*++argv) {
fclose(yyin);
if (!(yyin=fopen(*argv,"r"))) {
perror(*argv);
exit(1);
}
yylex();
}
exit(0);
}
The sh program:
# @(#) sc Strip comments from a C/C++ source file
# Author: Carl Bergerson, August 1990
# set -x # Uncomment for debugging
# Define correct usage message:
USAGE="Usage: $0 [sourcefile]"
case $# in
0) sed -e 's/^#/a#/' | /lib/cpp |
sed -e '/^#/d' -e 's/^a#/#/';;
1) sed -e 's/^#/a#/' $1 | /lib/cpp |
sed -e '/^#/d' -e 's/^a#/#/';;
*) echo $USAGE >&2
exit 1 ;;
esac
--
Col. G. L. Sicherman
gls at corona.att.COM
More information about the Comp.lang.c
mailing list