cpp (2-3) file as cpp2.arc
Martin Minow
minow at decvax.UUCP
Mon Sep 3 01:53:54 AEST 1984
-h- cpp.h Sat Sep 1 21:43:39 1984 cpp.h
/*
* I n t e r n a l D e f i n i t i o n s f o r C P P
*
* In general, definitions in this file should not be changed.
*/
#ifndef TRUE
#define TRUE 1
#define FALSE 0
#endif
#define EOS '\0' /* End of string */
#define EOF_CHAR 0 /* Returned by get() on eof */
#define NULLST ((char *) NULL) /* Pointer to nowhere (linted) */
#if COMMENT_INVISIBLE
#define COM_SPACE 0x1F /* End of comment separator */
#endif
#define DEF_NOARGS (-1) /* #define foo vs #define foo() */
/*
* Note -- in Ascii, the following will map macro formals onto the C1
* control character region (decimal 128 .. (128 + NPARM)) which will
* be ok as long as NPARM is less than 32).
*/
#define PFLAG 0x80 /* Macro formals start here */
#if NPARM >= 32
assertion fails -- NPARM isn't less than 32
#endif
/*
* Character type codes.
*/
#define INV 0 /* Invalid, must be zero */
#define OP_EOE INV /* End of expression */
#define DIG 1 /* Digit */
#define LET 2 /* Identifier start */
#define FIRST_BINOP OP_ADD
#define OP_ADD 3
#define OP_SUB 4
#define OP_MUL 5
#define OP_DIV 6
#define OP_MOD 7
#define OP_ASL 8
#define OP_ASR 9
#define OP_AND 10 /* &, not && */
#define OP_OR 11 /* |, not || */
#define OP_XOR 12
#define OP_EQ 13
#define OP_NE 14
#define OP_LT 15
#define OP_LE 16
#define OP_GE 17
#define OP_GT 18
#define OP_ANA 19 /* && */
#define OP_ORO 20 /* || */
#define OP_QUE 21 /* ? */
#define OP_COL 22 /* : */
#define OP_CMA 23 /* , (relevant?) */
#define LAST_BINOP OP_CMA /* Last binary operand */
/*
* The following are unary.
*/
#define FIRST_UNOP OP_PLU /* First Unary operand */
#define OP_PLU 24 /* + (draft ANSI standard) */
#define OP_NEG 25 /* - */
#define OP_COM 26 /* ~ */
#define OP_NOT 27 /* ! */
#define LAST_UNOP OP_NOT
#define OP_LPA 28 /* ( */
#define OP_RPA 29 /* ) */
#define OP_END 30 /* End of expression marker */
#define OP_MAX (OP_END + 1) /* Number of operators */
#define OP_FAIL (OP_END + 1) /* For error returns */
/*
* The following are for lexical scanning only.
*/
#define QUO 65 /* Both flavors of quotation */
#define DOT 66 /* . can start a number */
#define SPA 67 /* Space and tab */
#define BSH 68 /* Just a backslash */
#define END 69 /* EOF */
/*
* The DEFBUF structure stores information about #defined
* macros. Note that the defbuf->repl information is always
* in malloc storage.
*/
typedef struct defbuf {
struct defbuf *link; /* Next define in chain */
char *repl; /* -> replacement */
int hash; /* Symbol table hash */
int nargs; /* For define(args) */
char name[1]; /* #define name */
} DEFBUF;
/*
* The FILEINFO structure stores information about open files
* and macros being expanded.
*/
typedef struct fileinfo {
char *bptr; /* Buffer pointer */
int line; /* for include or macro */
FILE *fp; /* File if non-null */
struct fileinfo *parent; /* Link to includer */
char *filename; /* File/macro name */
char *progname; /* From #line statement */
char buffer[1]; /* current input line */
} FILEINFO;
/*
* nomacarg is a built-in #define on Decus C.
*/
#if COMMENT_INVISIBLE
#ifdef nomacarg
#define cput output /* Comment concatenates tokens */
#else
#define cput(c) { if (c != COM_SPACE) putchar(c); }
#endif
#else
#define cput putchar /* Comment == space */
#define cget get /* Normal get routine */
#endif
#ifndef nomacarg
#define streq(s1, s2) (strcmp(s1, s2) == 0)
#endif
/*
* Error codes. VMS uses system definitions.
* Decus C codes are defined in stdio.h.
* Others are cooked to order.
*/
#if HOST == SYS_VMS
#include <ssdef.h>
#include <stsdef.h>
#define IO_NORMAL (SS$_NORMAL | STS$M_INHIB_MSG)
#define IO_ERROR SS$_ABORT
#endif
/*
* Note: IO_NORMAL and IO_ERROR are defined in the Decus C stdio.h file
*/
#ifndef IO_NORMAL
#define IO_NORMAL 0
#endif
#ifndef IO_ERROR
#define IO_ERROR 1
#endif
/*
* Externs
*/
extern int line; /* Current line number */
extern int wrongline; /* Force #line to cc pass 1 */
extern char type[]; /* Character classifier */
extern char token[IDMAX]; /* Current input token */
extern int instring; /* TRUE if scanning string */
extern int errors; /* Error counter */
extern int recursion; /* Macro depth counter */
extern FILEINFO *infile; /* Current input file */
extern char work[NWORK]; /* #define scratch */
extern char *workp; /* Free space in work */
#if DEBUG
extern int debug; /* Debug level */
#endif
extern char *getmem(); /* Get memory or die. */
extern DEFBUF *lookid(); /* Look for a #define'd thing */
extern DEFBUF *defendel(); /* Symbol table enter/delete */
extern char *savestring(); /* Stuff string in malloc mem. */
extern char *strcpy();
extern char *strcat();
extern char *strrchr();
extern char *strchr();
extern long time();
-h- cppdef.h Sat Sep 1 21:43:39 1984 cppdef.h
/*
* S y s t e m D e p e n d e n t
* D e f i n i t i o n s f o r C P P
*
* Definitions in this file may be edited to configure CPP for particular
* host operating systems and target configurations.
*
* NOTE: cpp assumes it is compiled by a compiler that supports macros
* with arguments. If this is not the case (as for Decus C), #define
* nomacarg -- and provide function equivalents for all macros.
*
* cpp also assumes the host and target implement the Ascii character set.
* If this is not the case, you will have to do some editing here and there.
*/
/*
* This redundant definition of TRUE and FALSE works around
* a limitation of Decus C.
*/
#ifndef TRUE
#define TRUE 1
#define FALSE 0
#endif
/*
* Define the HOST operating system. This is needed so that
* cpp can use appropriate filename conventions.
*/
#define SYS_UNKNOWN 0
#define SYS_UNIX 1
#define SYS_VMS 2
#define SYS_RSX 3
#define SYS_RT11 4
#define SYS_LATTICE 5
#define SYS_ONYX 6
#define SYS_68000 7
#ifndef HOST
#ifdef unix
#define HOST SYS_UNIX
#else
#ifdef vms
#define HOST SYS_VMS
#else
#ifdef rsx
#define HOST SYS_RSX
#else
#ifdef rt11
#define HOST SYS_RSX
#endif
#endif
#endif
#endif
#endif
#ifndef HOST
#define HOST SYS_UNKNOWN
#endif
/*
* We assume that the target is the same as the host system
*/
#ifndef TARGET
#define TARGET HOST
#endif
/*
* In order to predefine machine-dependent constants,
* several strings are defined here:
*
* MACHINE defines the target cpu
* SYSTEM defines the target operating system
* COMPILER defines the target compiler
*
* The above may be #defined as "" if they are not wanted.
* They should not be #defined as NULL.
*
* LINE_PREFIX defines the # output line prefix, if not "line"
* FILE_LOCAL marks functions which are referenced only in the
* file they reside. Some C compilers allow these
* to be marked "static" even though they are referenced
* by "extern" statements elsewhere.
*/
#if TARGET == SYS_LATTICE
#define MACHINE "i8086"
#define SYSTEM "pcdos" /* Dos for IBM PC */
#endif
#if TARGET == SYS_ONYX
#define MACHINE "z8000"
#define SYSTEM "unix"
#endif
#if TARGET == SYS_VMS
#define MACHINE "vax"
#define SYSTEM "vms"
#define COMPILER "vax11c"
#endif
#if TARGET == SYS_RSX
#define MACHINE "pdp11"
#define SYSTEM "rsx"
#define COMPILER "decus"
#endif
#if TARGET == SYS_RT11
#define MACHINE "pdp11"
#define SYSTEM "rt11"
#define COMPILER "decus"
#endif
#if TARGET == SYS_68000
#define MACHINE "M68000", "m68000", "m68k"
#define SYSTEM "unix"
#endif
#if TARGET == SYS_UNIX
#define SYSTEM "unix"
#ifdef pdp11
#define MACHINE "pdp11"
#else
#ifdef vax
#define MACHINE "vax"
#endif
#endif
#endif
/*
* defaults
*/
#ifndef MSG_PREFIX
#define MSG_PREFIX "cpp: "
#endif
#ifndef LINE_PREFIX
#ifdef decus
#define LINE_PREFIX ""
#else
#define LINE_PREFIX "line"
#endif
#endif
/*
* BITS_CHAR may be defined to set the number of bits per character.
* it is needed only for multi-byte character constants.
*/
#ifndef BITS_CHAR
#define BITS_CHAR 8
#endif
/*
* BIG_ENDIAN is set TRUE on machines (such as the IBM 360 series)
* where 'ab' stores 'a' in the high-bits and 'b' in the low-bits.
* It is set FALSE on machines (such as the PDP-11 and Vax-11)
* where 'ab' stores 'a' in the low-bits and 'b' in the high-bits.
* (Or is it the other way around?)
*/
#ifndef BIG_ENDIAN
#define BIG_ENDIAN FALSE
#endif
/*
* NO_REG_UNION is set TRUE for host compilers that do not allow
* the following construction:
* register union {
* int i;
* char *p;
* }
*/
#ifndef NO_REG_UNION
#ifdef pcc
#define NO_REG_UNION 1
#else
#define NO_REG_UNION 0
#endif
#endif
#if NO_REG_UNION
#define REG_UNION union
#else
#define REG_UNION register union
#endif
/*
* COMMENT_INVISIBLE may be defined to allow "old-style" comment
* processing, whereby the comment becomes a zero-length token
* delimiter. This permitted tokens to be concatenated in macro
* expansions. This was removed from the Draft Ansi Standard.
*/
#ifndef COMMENT_INVISIBLE
#define COMMENT_INVISIBLE 0 /* Comment == space */
#endif
/*
* STRING_FORMAL may be defined to allow recognition of macro parameters
* in replacement strings. This was removed from the Draft Ansi Standard.
*/
#ifndef STRING_FORMAL
#define STRING_FORMAL 0 /* No string formals */
#endif
/*
* Some common definitions.
*/
#ifndef DEBUG
#define DEBUG 1 /* Compile debugging code */
#endif
/*
* The following definitions are used to allocate memory for
* work buffers. In general, they should not be modified
* by implementors.
*
* NPARM The maximum number of #define parameters
* IDMAX The longest identifier
* NBUFF Input buffer size
* NWORK Work buffer size -- the longest macro
* must fit here after expansion.
* NEXP The nesting depth of #if expressions.
* NINCLUDE The number of directories that may be specified
* on a per-system basis, or by the -I option.
*/
#define NPARM 10 /* Max number of parameters */
#ifndef IDMAX
#define IDMAX 31 /* Longest identifier per std. */
#endif
#define NBUFF 256 /* Input buffer (line) size */
#define NWORK 256 /* Work buffer size */
#define NEXP 20 /* #if expression stack depth */
#define NINCLUDE 7 /* #include directories */
#define NPARMWORK (NWORK * 2) /* Parm work buffer size */
/*
* Some special constants. These may need to be changed if cpp
* is ported to a wierd machine.
*
* NOTE: if cpp is run on a non-ascii machine, ALERT and VT may
* need to be changed. They are used to implement the proposed
* ANSI standard C control characters '\a' and '\v' only.
* DEL is used to tag macro tokens to prevent #define foo foo
* from looping. Note that we don't try to prevent more elaborate
* #define loops from occurring.
*/
#define ALERT '\007' /* '\a' is "Bell" */
#define VT '\013' /* Vertical Tab (CTRL/K) */
#define DEL '\177' /* Magic for #defines */
#ifndef FILE_LOCAL
#ifdef decus
#define FILE_LOCAL static
#else
#ifdef vax11c
#define FILE_LOCAL static
#else
#define FILE_LOCAL /* gets global scope on others */
#endif
#endif
#endif
-h- cpp1.c Sat Sep 1 21:43:39 1984 cpp1.c
/*
* CPP main program.
*
* Edit history
* 21-May-84 MM "Field test" release
* 23-May-84 MM Some minor hacks.
* 30-May-84 ARF Didn't get enough memory for __DATE__
* Added code to read stdin if no input
* files are provided.
* 29-Jun-84 MM Added ARF's suggestions, Unixifying cpp.
* 11-Jul-84 MM "Official" first release (that's what I thought!)
* 22-Jul-84 MM/ARF/SCK Fixed line number bugs, added cpp recognition
* of #line, fixed problems with #include.
* 23-Jul-84 MM More (minor) include hacking, some documentation.
* Also, redid cpp's #include files
* 25-Jul-84 MM #line filename isn't used for #include searchlist
* #line format is <number> <optional name>
* 25-Jul-84 ARF/MM Various bugs, mostly serious. Removed homemade doprint
* 01-Aug-84 MM Fixed recursion bug, remove extra newlines and
* leading whitespace from cpp output.
* 02-Aug-84 MM Hacked (i.e. optimized) out blank lines and unneeded
* whitespace in general. Cleaned up unget()'s.
* 03-Aug-84 Keie Several bug fixes from Ed Keizer, Vrije Universitet.
* -- corrected arg. count in -D and pre-defined
* macros. Also, allow \n inside macro actual parameter
* lists.
* 06-Aug-84 MM If debugging, dump the preset vector at startup.
* 12-Aug-84 MM/SCK Some small changes from Sam Kendall
* 15-Aug-84 Keie/MM cerror, cwarn, etc. take a single string arg.
* cierror, etc. take a single int. arg.
* changed LINE_PREFIX slightly so it can be
* changed in the makefile.
* 31-Aug-84 MM USENET net.sources release.
*/
/*)BUILD
$(PROGRAM) = cpp
$(FILES) = { cpp1 cpp2 cpp3 }
$(INCLUDE) = { cppdef.h cpp.h }
$(STACK) = 2000
$(TKBOPTIONS) = {
STACK = 2000
}
*/
#ifdef DOCUMENTATION
title cpp C Pre-Processor
index C pre-processor
synopsis
.s.nf
cpp [-options] [infile [outfile]]
.s.f
description
CPP reads a C source file, expands macros and include
files, and writes an input file for the C compiler.
If no file arguments are given, cpp reads from stdin
and writes to stdout. If one file argument is given,
it will define the input file, while two file arguments
define both input and output files.
The following options are supported. Options may
be given in either case.
.lm +16
.p -16
-Idirectory Add this directory to the list of
directories searched for #include "..." and #include <...>
commands. Note that there is no space between the
"-I" and the directory string. More than one -I command
is permitted. On non-Unix systems "directory" is forced
to upper-case.
.p -16
-Dname=value Define the name as if the programmer wrote
.s
#define name value
.s
at the start of the first file. If "=value" is not
given, a value of "1" will be used.
.s
On non-unix systems, all alphabetic text will be forced
to upper-case.
.s
.p -16
-Uname Undefine the name as if
.s
#undef name
.s
were given. On non-Unix systems, "name" will be forced to
upper-case.
.s.lm -16
The following variables are pre-defined:
.s
Target computer (as appropriate):
.s
pdp11, vax, M68000 m68000 m68k
.s
Target operating system (as appropriate):
.s
rsx, rt11, vms, unix
.s
Target compiler (as appropriate):
.s
decus, vax11c
.s
The implementor may add definitions to this list.
The default definitions match the definition of the
host computer, operating system, and C compiler.
.s
The following are always available unless undefined:
.lm +16
.p -12
__FILE__ The input (or #include) file being compiled
(as a quoted string).
.p -12
__LINE__ The line number being compiled.
.p -12
__DATE__ The date and time of compilation as
a Unix ctime quoted string (the trailing newline is removed).
Thus,
.s
printf("Bug at line %s,", __LINE__);
printf(" source file %s", __FILE__);
printf(" compiled on %s", __DATE__);
.p -16
-Xnumber Enable debugging code. If no value is
given, a value of 1 will be used. (For maintenence of
CPP only.)
.s.lm -16
Draft Ansi Standard Considerations
Comments are removed from the input text. The comment
is replaced by a single space character. This differs
from usage on some existing preprocessors (but it follows
the Draft Ansi C Standard).
Note that arguments may be concatenated as follows:
.s.nf
#define I(x)x
#define CAT(x,y)I(x)y
int value = CAT(1,2);
.s.f
If the above macros are defined and invoked without extraneous
spaces, they will be transportable to other implementations.
Unfortunately, this will not properly expand
.s.nf
int CAT(foo,__LINE__);
int CAT(foo,__LINE__);
.s.f
as __LINE__ is copied into the input stream, yielding
"foo__LINE__" in both cases, rather than the expected
"foo123", "foo124", which would result if __LINE__
were expanded and the result copied into the input stream.
Macro formal parameters are not recognized within
quoted strings and character constants in macro definitions.
CPP implements most of the ANSI draft standard.
You should be aware of the following differences:
.lm +4
.s.i-4;o###In the draft standard, the _\n (backslash-newline)
character is "invisible" to all processing. In this implementation,
it is invisible to strings, but acts a "whitespace" (token-delimiter)
outside of strings. This considerably simplifies error
message handling.
.s.i-4;o###The following new features of C are processed by cpp:
.s
.br;####_#elif expression####(_#else _#if)
.br;####'_\xNNN'#############(Hexadecimal constants)
.br;####'_\a'################(Ascii BELL [silly])
.br;####'_\v'################(Ascii VT)
.br;####_#if defined NAME####(1 if defined, 0 if not)
.br;####_#if defined (NAME)##(1 if defined, 0 if not)
.br;####_unary +#############(gag me with a spoon)
.s.i-4;o###The draft standard has extended C, adding a string
concatenation operator, where
.s
"foo" "bar"
.s
is regarded as the single string "foobar". (This does not
affect CPP's processing.)
.s.lm -4
error messages
Many. CPP prints warning messages if you try to use
multiple-byte character constants (non-transportable)
or if you #undef a symbol that was not defined.
bugs
Cpp prints spurious error or warning messages in #if
sequences such as the following:
.s
.br;####_#define foo 0
.br;####_#if (foo != 0) _? (100 / foo) _: 0
.br;####_#undef foo
.br;####_#if ((defined(foo)) _? foo _: 0) == 1
.s
Cpp should supress the error message if the expression's
value is already known.
author
Martin Minow
#endif
#include <stdio.h>
#include <ctype.h>
#include "cppdef.h"
#include "cpp.h"
/*
* Commonly used global variables:
* line is the current input line number.
* wrongline is set in many places when the actual output
* line is out of sync with the numbering, e.g,
* when expanding a macro with an embedded newline.
*
* Note that line and wrongline are initialized in such
* a way that the code starts by outputting a #line.
*
* token holds the last identifier scanned (which might
* be a candidate for macro expansion).
* errors is the running cpp error counter.
* infile is the head of a linked list of input files (extended by
* #include and macros being expanded). infile always points
* to the current file/macro. infile->parent to the includer,
* etc. infile->fd is NULL if this input stream is a macro.
*/
int line; /* Current line number */
int wrongline; /* Force #line to compiler */
char token[IDMAX]; /* Current input token */
int errors; /* cpp error counter */
FILEINFO *infile = NULL; /* Current input file */
#if DEBUG
int debug; /* TRUE if debugging now */
#endif
/*
* This counter is incremented when a macro expansion is initiated.
* If it exceeds a built-in value, the expansion stops -- this tests
* for a runaway condition:
* #define X Y
* #define Y X
* X
* It is decremented, in get(), when the macro expansion terminates.
*/
int recursion; /* Infinite recursion counter */
/*
* instring is set TRUE when a string is scanned. It modifies the
* behavior of the "get next character" routine -- comments aren't
* skipped over, and \<newline> is silently absorbed. It is set
* by routines that scan "string" and 'char'. It is essentially
* a parameter to the get() routine, but made global for speed.
*/
int instring = FALSE; /* TRUE if scanning string */
/*
* work[] and workp are used to store one piece of text in a temporay
* buffer. To initialize storage, set workp = work. To store one
* character, call save(c); (This will fatally exit if there isn't
* room.) To terminate the string, call save(EOS). Note that
* the work buffer is used by several subroutines -- be sure your
* data won't be overwritten.
*/
char work[NWORK]; /* Work buffer */
char *workp; /* Work buffer pointer */
/*
* flevel and tlevel are used to compute #if nesting. if flevel == 0,
* cpp is emitting tokens, if > 0, it is skipping over tokens to an
* #else or #endif. Hard to understand code in control() modifies the
* counters when #else, #endif, or another #if is processed.
*/
static int flevel = 0; /* #ifdef false level */
static int tlevel = 0; /* #ifdef true level */
/*
* incdir[] and ninclude store the -i directories (and the system-specific
* #include <...> directories.
*/
static char *incdir[NINCLUDE]; /* -i directories */
static int ninclude; /* Number of -i directories */
/*
* This is the table used to predefine target machine and operating
* system designators. It may need hacking for specific circumstances.
* Note: it is not clear that this is part of the Ansi Standard.
*/
static char *preset[] = { /* names defined at cpp start */
#ifdef MACHINE
MACHINE,
#endif
#ifdef SYSTEM
SYSTEM,
#endif
#ifdef COMPILER
COMPILER,
#endif
#if DEBUG
"decus_cpp", /* Ourselves! */
#endif
NULL /* Must be last */
};
/*
* The value of these predefined symbols must be recomputed whenever
* they are evaluated. The order must not be changed.
*/
static char *magic[] = { /* Note: order is important */
"__LINE__",
"__FILE__",
NULL /* Must be last */
};
main(argc, argv)
int argc;
char *argv[];
{
register int i;
#if HOST == SYS_VMS
argc = getredirection(argc, argv);
#endif
initdefines();
switch (dooptions(argc, argv)) {
case 0: /* No args? */
case 1: /* No files, stdin -> stdout */
#if HOST == SYS_UNIX
work[0] = EOS; /* Unix can't find stdin name */
#else
fgetname(stdin, work); /* Vax-11C, Decus C know name */
#endif
break;
case 3:
#if HOST == SYS_VMS
/*
* Reopen stdout with "vanilla rms" attributes.
*/
if ((i = creat(argv[2], 0, "rat=cr", "rfm=var")) == -1
|| dup2(i, fileno(stdout)) == -1) {
#else
if (freopen(argv[2], "w", stdout) == NULL) {
#endif
perror(argv[2]);
cerror("Can't open output file \"%s\"", argv[2]);
exit(IO_ERROR);
} /* Continue by opening input */
case 2: /* One file -> stdout */
if (freopen(argv[1], "r", stdin) == NULL) {
perror(argv[1]);
cerror("Can't open input file \"%s\"", argv[1]);
exit(IO_ERROR);
}
strcpy(work, argv[1]); /* Remember input filename */
break;
default:
exit(IO_ERROR); /* Can't happen */
}
setincdirs(); /* Setup -I include directories */
addfile(stdin, work); /* "open" main input file */
#if DEBUG
if (debug > 0)
dumpdef("preset #define symbols");
#endif
cppmain(); /* Process main file */
if ((i = flevel + tlevel) != 0)
cierror("Inside #ifdef block at end of input, depth = %d", i);
fclose(stdout);
if (errors > 0) {
fprintf(stderr, (errors == 1)
? "%d error in preprocessor"
: "%d errors in preprocessor", errors);
exit(IO_ERROR);
}
exit(IO_NORMAL);
}
FILE_LOCAL
cppmain()
/*
* Main process for cpp -- copies tokens from the current input
* stream (main file, include file, or a macro) to the output
* file.
*/
{
register int c; /* Current character */
register int counter; /* newlines and spaces */
extern int output(); /* Output one character */
/*
* Explicitly output a #line at the start of cpp output so
* that lint (etc.) knows the name of the original source
* file. If we don't do this explicitly, we may get
* the name of the first #include file instead.
*/
line = 1;
sharp();
line = 0;
/*
* This loop is started "from the top" at the beginning of each
* line. wrongline is set TRUE in many places if it is necessary
* to write a #line record. (But we don't write them when expanding
* macros.)
* The counter variable has two different uses: at
* the start of a line, it counts the number of blank lines that
* have been skipped over. These are then either output via
* #line records or by outputting explicit blank lines.
*
* When expanding tokens within a line, the counter remembers
* whether a blank/tab has been output. These are dropped
* at the end of the line, and replaced by a single blank
* within lines.
*/
for (;;) {
for (counter = 0;; counter++) {
while (type[(c = get())] == SPA) /* Skip leading blanks */
; /* in this line. */
if (c == '\n') /* If line's all blank, */
; /* Do nothing now */
else if (c == '#') /* Is 1st non-space '#' */
control(); /* Yes, do a #command */
else if (c == EOF_CHAR) /* At end of file? */
break;
else if (flevel > 0) /* #ifdef false? */
skipnl(); /* Skip to newline */
else {
break; /* Actual token */
}
}
if (c == EOF_CHAR) /* Exit process at */
break; /* End of file */
/*
* If the loop didn't terminate because of end of file, we
* know there is a token to compile. First, clean up after
* absorbing newlines. counter has the number we skipped.
*/
if (wrongline && infile->fp != NULL)
sharp(); /* Output # line number */
else if (counter > 0) { /* Get rid of the */
if (counter > 4) /* pending newlines. */
sharp(); /* (lots of them here) */
else { /* If just a few, stuff */
while (--counter >= 0) /* them out ourselves */
putchar('\n');
}
}
/*
* Process each token on this line. counter
* is now used to skip over trailing blanks.
*/
for (counter = 0; c != EOF_CHAR && c != '\n';) {
if (type[c] == SPA)
counter++;
else {
if (counter > 0) { /* Any pending */
putchar(' '); /* whitespace is output */
counter = 0; /* restart the counter */
}
switch (type[c]) {
case LET:
if (!macroid(c)) /* Scan ID; do macros */
fputs(token, stdout); /* Just output if not */
break;
case DIG: /* Digits and '.' may */
case DOT: /* begin numbers */
scannumber(c, output); /* Output the number */
break;
case QUO: /* char or string const */
scanstring(c, output); /* Copy it to output */
break;
default: /* Some other character */
cput(c); /* Just output it */
break;
} /* Switch ends */
} /* if not a space */
c = get(); /* And get another */
}
if (c == '\n') { /* Compiling at EOL? */
putchar('\n'); /* Output newline, if */
if (infile->fp == NULL) /* Expanding a macro, */
wrongline = TRUE; /* Output # line later */
}
} /* Continue until EOF */
}
FILE_LOCAL
output(c)
int c;
/*
* Output one character to stdout -- output() is passed as an
* argument to scanstring()
*/
{
#if COMMENT_INVISIBLE
if (c != COM_SPACE)
putchar(c);
#else
putchar(c);
#endif
}
static char *sharpfilename = NULL;
FILE_LOCAL
sharp()
/*
* Output a line number line.
*/
{
register char *name;
printf("#%s %d", LINE_PREFIX, line);
if (infile->fp != NULL) {
name = (infile->progname != NULL)
? infile->progname : infile->filename;
if (sharpfilename == NULL
|| sharpfilename != NULL & !streq(name, sharpfilename)) {
if (sharpfilename != NULL)
free(sharpfilename);
sharpfilename = savestring(name);
printf(" \"%s\"", name);
}
}
putchar('\n');
wrongline = FALSE;
}
/*
* Process #control lines
*/
#define ISIFNDEF FALSE /* Must be FALSE */
#define ISIFDEF TRUE /* Must be TRUE */
#define ISIF (TRUE + 1) /* Must have onebit set */
#if (ISIF == ISIFNDEF)
error << The above won't work >>
#endif
/*
* The following is generated by a "perfect hash" routine.
*/
#define L_else 4
#define L_line 5
#define L_define 6
#define L_elif 7
#define L_endif 8
#define L_if 9
#define L_undef 10
#define L_include 11
#define L_ifdef 12
#define L_ifndef 13
#define L_assert 14
#define L_option 15
#define FIRST 'a'
#define LAST 'u'
static char px_assoc[] = {
0, /* 'a' */
-1, /* 'b' */
-1, /* 'c' */
0, /* 'd' */
0, /* 'e' */
3, /* 'f' */
-1, /* 'g' */
-1, /* 'h' */
4, /* 'i' */
-1, /* 'j' */
-1, /* 'k' */
1, /* 'l' */
-1, /* 'm' */
9, /* 'n' */
0, /* 'o' */
-1, /* 'p' */
-1, /* 'q' */
-1, /* 'r' */
-1, /* 's' */
8, /* 't' */
2, /* 'u' */
};
static char *px_table[] = {
NULL, /* 0 */
NULL, /* 1 */
NULL, /* 2 */
NULL, /* 3 */
"else", /* 4 */
"line", /* 5 */
"define", /* 6 */
"elif", /* 7 */
"endif", /* 8 */
"if", /* 9 */
"undef", /* 10 */
"include", /* 11 */
"ifdef", /* 12 */
"ifndef", /* 13 */
"assert", /* 14 */
"option", /* 15 */
};
FILE_LOCAL
control()
/*
* Process #control lines. Simple commands are processed inline,
* while complex commands have their own subroutines.
*/
{
register int c;
register char *tp;
register int hash;
char *ep;
c = skipws();
if (c == '\n' || c == EOF_CHAR)
return;
scanid(token, c);
/*
* Look for keyword (string of alpha) in the perfect hash table.
* Set hash to the index (L_xxx value) or 0 if not found
*/
if (token[0] < FIRST || token[0] > LAST)
hash = 0;
else {
for (tp = token; isalpha(*tp); tp++)
;
hash = (tp - token);
if (*--tp < FIRST || *tp > LAST)
hash = 0;
else {
hash += px_assoc[*token - FIRST] + px_assoc[*tp - FIRST];
if (px_table[hash] == NULL
|| !streq(token, px_table[hash]))
hash = 0;
}
}
/*
* hash is now set to a unique value corresponding to the
* control keyword (or zero if it's not in the table).
*/
if (infile->fp == NULL)
cwarn("Control line \"%s\" within macro expansion", token);
if (flevel > 0) {
switch (hash) {
case L_line: /* These aren't */
case L_include: /* interesting */
case L_define: /* if we */
case L_undef: /* aren't */
case L_assert: /* compiling. */
case L_option: /* New option, too. */
skipnl();
return;
}
}
switch (hash) {
case L_line:
/*
* Parse the line to update the line number and "progname"
* field and line number for the next input line.
* Set wrongline to force it out later.
*/
c = skipws();
workp = work; /* Save name in work */
while (c != '\n' && c != EOF_CHAR) {
if (c != '"')
save(c);
c = get();
}
unget();
save(EOS);
/*
* Split #line argument into <line-number> and <name>
* We subtract 1 as we want the number of the next line.
*/
line = atoi(work) - 1; /* Reset line number */
for (tp = work; isdigit(*tp) || type[*tp] == SPA; tp++)
; /* Skip over digits */
if (*tp != EOS) { /* Got a filename, so: */
if (*tp == '"' && (ep = strrchr(tp + 1, '"')) != NULL) {
tp++; /* Skip over left quote */
*ep = EOS; /* And ignore right one */
}
if (infile->progname != NULL) /* Give up the old name */
free(infile->progname); /* if it's allocated. */
infile->progname = savestring(tp);
}
wrongline = TRUE; /* Force output later */
break;
case L_include:
doinclude();
break;
case L_define:
dodefine();
break;
case L_undef:
doundef();
break;
case L_ifdef:
doif(ISIFDEF);
break;
case L_ifndef:
doif(ISIFNDEF);
break;
case L_elif:
case L_else:
if (flevel == 0) { /* Compiling now? */
if (tlevel == 0) /* Yes, but in an if? */
cerror("#%s without corresponding #if",
(hash == L_elif) ? "elif" : "else");
else { /* Ok: */
flevel++; /* Make it false. */
tlevel--; /* False isn't true */
}
}
else if (--flevel == 0) { /* Drop false count, */
tlevel++; /* Step true if need be */
wrongline = TRUE; /* Need #line now */
}
else { /* Not compiling yet so */
flevel++; /* Keep it false */
}
if (hash == L_else) /* Else stops here */
break;
if (flevel > 0) /* #elif, fake an */
flevel--; /* #endif and fall */
else if (tlevel > 0) /* into #if */
tlevel--; /* processor. */
case L_if:
doif(ISIF);
break;
case L_endif:
if (flevel > 0) { /* If not compiling */
if (--flevel == 0) /* Maybe start, if so, */
wrongline = TRUE; /* Need a #line first */
}
else if (tlevel > 0) /* Still compiling, but */
tlevel--; /* Drop true counter */
else {
cerror("#endif without corresponding #if", NULLST);
}
break;
case L_assert:
if (eval() == 0)
cerror("Preprocessor assertion failure", NULLST);
break;
case L_option:
/*
* #option is provided to pass "pragmas" to later
* passes of the compiler. cpp doesn't have any yet.
*/
printf("#option ");
while ((c = get()) != '\n' && c != EOF_CHAR)
cput(c);
unget();
break;
default:
#if DEBUG
/*
* For debugging, we allow #debug and #nodebug
*/
if (streq("debug", token)) {
debug++;
break;
}
if (streq("nodebug", token)) {
debug--;
break;
}
#endif
/*
* Undefined #control keyword.
* Note: the correct behavior may be to warn and
* pass the line to a subsequent compiler pass.
* This would allow #asm or similar extensions.
*/
cwarn("Illegal # line", NULLST);
skipws();
unget();
break;
}
#if 1
skipnl(); /* Dump rest of control line */
#else
if (skipws() != '\n') {
/*
* Some people have written:
* #ifdef foobar
* ...
* #endif foobar
*
* Vax-11 C doesn't print a warning, so we don't either.
*/
cwarn("Unrecognized text after control command", NULLST);
while ((c = get()) != '\n' && c != EOF_CHAR)
;
}
#endif
}
FILE_LOCAL
doif(isifdef)
int isifdef;
/*
* Process an #if, #ifdef, or #ifndef. The latter two are straightforward,
* while #if needs a subroutine of its own to evaluate the expression.
* Eventually, tlevel and flevel are modified accordingly.
*/
{
register int c;
register int found;
if ((c = skipws()) == '\n' || c == EOF_CHAR) {
unget();
goto badif;
}
if (isifdef == ISIF) {
unget();
found = (eval() != 0); /* Evaluate expr, != 0 is TRUE */
isifdef = TRUE; /* #if is now like #ifdef */
}
else {
if (type[c] != LET) /* Next non-blank isn't letter */
goto badif; /* ... is an error */
found = (lookid(c) != NULL); /* Look for it in symbol table */
}
if (flevel == 0 && (isifdef == found))
tlevel++;
else
flevel++;
return;
badif: cerror("#if, #ifdef, or #ifndef without an argument", NULLST);
}
FILE_LOCAL
doinclude()
/*
* Process the #include control line.
*/
{
register int c;
int delim;
delim = skipws();
if (delim != '<' && delim != '"')
goto incerr;
if (delim == '<')
delim = '>';
workp = work;
while ((c = get()) != EOF_CHAR && c != '\n' && c != delim) {
#if COMMENT_INVISIBLE
if (c != COM_SPACE)
save(c);
#else
save(c);
#endif
}
save(EOS);
if (c != delim)
goto incerr;
skipnl(); /* Ignore rest of #include line */
unget('\n'); /* Force nl after includee */
openinclude(work, (delim == '"'));
return;
incerr: cerror("#include syntax error", NULLST);
return;
}
FILE_LOCAL
openinclude(filename, searchlocal)
char *filename; /* Input file name */
int searchlocal; /* TRUE if #include "file" */
/*
* Actually open an include file. This routine is only called from
* doinclude() above, but was written as a separate subroutine for
* programmer convenience. It searches the list of directories
* and actually opens the file, linking it into the list of
* active files.
*/
{
register char *tp; /* -> source file name */
register int i;
char tmpname[NWORK]; /* Filename work area */
if (searchlocal) {
/*
* Look in local directory first
*/
#if HOST == SYS_UNIX
/*
* Try to open filename relative to the directory of the current
* source file (as opposed to the current directory). (ARF, SCK).
*/
if (filename[0] == '/'
|| (tp = strrchr(infile->filename, '/')) == NULL)
strcpy(tmpname, filename);
else {
sprintf(tmpname, "%.*s/%s", tp - infile->filename,
infile->filename, filename);
}
if (openfile(tmpname))
return;
#else
/*
* Same problem, but for DEC operating systems.
* Filenames may have "device:[directory]"
*/
if (strchr(filename, ']') != NULL
|| strchr(filename, ':') != NULL
|| ( (tp = strrchr(infile->filename, ']')) == NULL
&& (tp = strrchr(infile->filename, ':')) == NULL))
strcpy(tmpname, filename);
else {
sprintf(tmpname, "%.*s%s", tp - infile->filename + 1,
infile->filename, filename);
}
if (openfile(tmpname))
return;
#endif
}
/*
* Look in any directories specified by -I command line
* arguments, then in the builtin search list.
*/
for (i = 0; i < ninclude; i++) {
if (strlen(incdir[i]) + strlen(filename) >= (NWORK - 1))
cfatal("Filename work buffer overflow", NULLST);
else {
#if HOST == SYS_UNIX
if (filename[0] == '/')
strcpy(tmpname, filename);
else {
sprintf(tmpname, "%s/%s", incdir[i], filename);
}
#else
if (strrchr(filename, ']') != NULL
|| strrchr(filename, ':') != NULL)
strcpy(tmpname, filename);
else {
sprintf(tmpname, "%s%s", incdir[i], filename);
}
#endif
if (openfile(tmpname))
return;
}
}
/*
* No sense continuing if #include file isn't there.
*/
cfatal("Cannot open include file \"%s\"", filename);
}
FILE_LOCAL int
openfile(filename)
char *filename;
/*
* Open a file, add it to the linked list of open files.
* This is called only from openfile() above.
*/
{
register FILE *fp;
if ((fp = fopen(filename, "r")) == NULL)
return (FALSE);
#if DEBUG
if (debug)
fprintf(stderr, "Reading from \"%s\"\n", filename);
#endif
addfile(fp, filename);
return (TRUE);
}
FILE_LOCAL
addfile(fp, filename)
FILE *fp; /* Open file pointer */
char *filename; /* Name of the file */
/*
* Initialize tables for this open file. This is called from openfile()
* above (for #include files), and from the entry to cpp to open the main
* input file. It calls a common routine, getfile() to build the FILEINFO
* structure which is used to read characters. (getfile() is also called
* to setup a macro replacement.)
*/
{
register FILEINFO *file;
extern FILEINFO *getfile();
file = getfile(NBUFF, filename);
file->fp = fp; /* Better remember FILE * */
file->buffer[0] = '\n'; /* Fake initial newline to */
file->buffer[1] = EOS; /* initialize for first read */
line = 0; /* Note correct line number */
wrongline = TRUE; /* Force out initial #line */
}
FILE_LOCAL
setincdirs()
/*
* Append system-specific directories to the include directory list.
* Called only when cpp is started.
*/
{
#if HOST == SYS_UNIX
incdir[ninclude++] = "/usr/include";
#define MAXINCLUDE (NINCLUDE - 1)
#endif
#if HOST == SYS_VMS
extern char *getenv();
if (getenv("C$LIBRARY") != NULL)
incdir[ninclude++] = "C$LIBRARY:";
incdir[ninclude++] = "SYS$LIBRARY:";
#define MAXINCLUDE (NINCLUDE - 2)
#endif
#if HOST == SYS_RSX
extern int $$rsts; /* TRUE on RSTS/E */
extern int $$pos; /* TRUE on PRO-350 P/OS */
extern int $$vms; /* TRUE on VMS compat. */
if ($$pos) { /* P/OS? */
incdir[ninclude++] = "SY:[ZZDECUSC]";
incdir[ninclude++] = "LB:[1,5]";
}
else if ($$rsts) { /* RSTS/E? */
incdir[ninclude++] = "SY:@"; /* User-defined account */
incdir[ninclude++] = "C:"; /* Decus-C library */
incdir[ninclude++] = "LB:[1,1]"; /* RSX library */
}
else if ($$vms) { /* VMS compatibility? */
incdir[ninclude++] = "C:";
}
else { /* Plain old RSX/IAS */
incdir[ninclude++] = "LB:[1,1]";
}
#define MAXINCLUDE (NINCLUDE - 3)
#endif
#if HOST == SYS_RT11
extern int $$rsts; /* RSTS/E emulation? */
if ($$rsts)
incdir[ninclude++] = "SY:@"; /* User-defined account */
incdir[ninclude++] = "C:"; /* Decus-C library disk */
incdir[ninclude++] = "SY:"; /* System (boot) disk */
#define MAXINCLUDE (NINCLUDE - 3)
#endif
}
FILE_LOCAL int
dooptions(argc, argv)
int argc;
char *argv[];
/*
* dooptions is called to process command line arguments (-Detc).
* It is called only at cpp startup.
*/
{
register char *ap;
register DEFBUF *dp;
register int c;
int i, j;
char *arg;
for (i = j = 1; i < argc; i++) {
arg = ap = argv[i];
if (*ap++ != '-')
argv[j++] = argv[i];
else {
c = *ap++; /* Option byte */
if (islower(c)) /* Normalize case */
c = toupper(c);
switch (c) { /* Command character */
case 'I': /* Include directory */
if (ninclude >= MAXINCLUDE)
cfatal("Too many include directories", NULLST);
incdir[ninclude++] = ap;
break;
case 'D': /* Define symbol */
#if HOST != SYS_UNIX
zap_uc(ap); /* Force define to U.C. */
#endif
/*
* If the option is just "-Dfoo", make it -Dfoo=1
*/
while (*ap != EOS && *ap != '=')
ap++;
if (*ap == EOS)
ap = "1";
else
*ap++ = EOS;
/*
* Now, save the word and its definition.
*/
dp = defendel(argv[i] + 2, FALSE);
dp->repl = savestring(ap);
dp->nargs = DEF_NOARGS;
break;
case 'U': /* Undefine symbol */
#if HOST != SYS_UNIX
zap_uc(ap);
#endif
if (defendel(ap, TRUE) == NULL)
cwarn("\"%s\" wasn't defined", ap);
break;
#if DEBUG
case 'X': /* Debug */
debug = (isdigit(*ap)) ? atoi(ap) : 1;
fprintf(stderr, "Debug set to %d\n", debug);
break;
#endif
default: /* What is this one? */
cwarn("Unknown option \"%s\"\n\
The following options are valid:\n\
-Dsymbol=value\tDefine a symbol with the given (optional) value\n\
-Idirectory\tAdd a directory to the #include search list\n\
-Usymbol\tUndefine symbol\n", arg);
#if DEBUG
fprintf(stderr, "-Xvalue\tSet internal debug flag\n");
#endif
break;
} /* Switch on all options */
} /* If it's a -option */
} /* For all arguments */
if (j > 3) {
cerror(
"Too many file arguments. Usage: cpp [input [output]]",
NULLST);
}
return (j); /* Return new argc */
}
#if HOST != SYS_UNIX
FILE_LOCAL
zap_uc(ap)
register char *ap;
/*
* Dec operating systems mangle upper-lower case in command lines.
* This routine forces the -D and -U arguments to uppercase.
* It is called only on cpp startup by dooptions().
*/
{
while (*ap != EOS) {
/*
* Don't use islower() here so it works with Multinational
*/
if (*ap >= 'a' && *ap <= 'z')
*ap = toupper(*ap);
ap++;
}
}
#endif
FILE_LOCAL
initdefines()
/*
* Initialize the built-in #define's. There are two flavors:
* #define decus 1 (static definitions)
* #define __FILE__ ?? (dynamic, evaluated by magic)
* Called only on cpp startup.
*/
{
register char **pp;
REG_UNION {
int i;
char *p;
} t;
register DEFBUF *dp;
long tvec;
extern char *ctime();
/*
* Predefine the built-in symbols. Allow the
* implementor to pre-define a symbol as "" to
* eliminate it.
*/
for (pp = preset; *pp != NULL; pp++) {
if (*pp[0] != EOS) {
dp = defendel(*pp, FALSE);
dp->repl = savestring("1");
dp->nargs = DEF_NOARGS;
}
}
/*
* The magic pre-defines (__FILE__ and __LINE__ are
* initialized with negative argument counts. expand()
* notices this and calls the appropriate routine.
* DEF_NOARGS is one greater than the first "magic" definition.
*/
for (pp = magic, t.i = DEF_NOARGS; *pp != NULL; pp++) {
dp = defendel(*pp, FALSE);
dp->nargs = --t.i;
}
/*
* Define __DATE__ as today's date.
*/
dp = defendel("__DATE__", FALSE);
dp->repl = t.p = getmem(27);
dp->nargs = DEF_NOARGS;
time(&tvec);
*t.p++ = '"';
strcpy(t.p, ctime(&tvec));
t.p[24] = '"'; /* Overwrite newline */
}
#if HOST == SYS_VMS
/*
* getredirection() is intended to aid in porting C programs
* to VMS (Vax-11 C) which does not support '>' and '<'
* I/O redirection. With suitable modification, it may
* useful for other portability problems as well.
*/
FILE_LOCAL int
getredirection(argc, argv)
int argc;
char **argv;
/*
* Process vms redirection arg's. Exit if any error is seen.
* If getredirection() processes an argument, it is erased
* from the vector. getredirection() returns a new argc value.
*
* Warning: do not try to simplify the code for vms. The code
* presupposes that getredirection() is called before any data is
* read from stdin or written to stdout.
*
* Normal usage is as follows:
*
* main(argc, argv)
* int argc;
* char *argv[];
* {
* argc = getredirection(argc, argv);
* }
*/
{
register char *ap; /* Argument pointer */
int i; /* argv[] index */
int j; /* Output index */
int file; /* File_descriptor */
extern int errno; /* Last vms i/o error */
for (j = i = 1; i < argc; i++) { /* Do all arguments */
switch (*(ap = argv[i])) {
case '<': /* <file */
if (freopen(++ap, "r", stdin) == NULL) {
perror(ap); /* Can't find file */
exit(errno); /* Is a fatal error */
}
break;
case '>': /* >file or >>file */
if (*++ap == '>') { /* >>file */
/*
* If the file exists, and is writable by us,
* call freopen to append to the file (using the
* file's current attributes). Otherwise, create
* a new file with "vanilla" attributes as if the
* argument was given as ">filename".
* access(name, 2) returns zero if we can write on
* the specified file.
*/
if (access(++ap, 2) == 0) {
if (freopen(ap, "a", stdout) != NULL)
break; /* Exit case statement */
perror(ap); /* Error, can't append */
exit(errno); /* After access test */
} /* If file accessable */
}
/*
* On vms, we want to create the file using "standard"
* record attributes. creat(...) creates the file
* using the caller's default protection mask and
* "variable length, implied carriage return"
* attributes. dup2() associates the file with stdout.
*/
if ((file = creat(ap, 0, "rat=cr", "rfm=var")) == -1
|| dup2(file, fileno(stdout)) == -1) {
perror(ap); /* Can't create file */
exit(errno); /* is a fatal error */
} /* If '>' creation */
break; /* Exit case test */
default:
argv[j++] = ap; /* Not a redirector */
break; /* Exit case test */
}
} /* For all arguments */
argv[j] = NULL; /* Terminate argv[] */
return (j); /* Return new argc */
}
#endif
More information about the Comp.sources.unix
mailing list