number recognition tools
Andy Glew
aglew at oberon.csg.uiuc.edu
Fri Jan 19 10:51:20 AEST 1990
This is a repost of the number recognition tools, with a working shar
file, and a bug fix courtesy of Joseph Pepin.
This post has compiled and successfully completed its test on
both a SUN3 and a DECSTATION 3100.
------ CUT HERE -----------------------------------------------------------
#! /bin/sh
# This is a shell archive. Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file". To overwrite existing
# files, type "sh file -c". You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g.. If this archive is complete, you
# will see the following message at the end:
# "End of shell archive."
# Contents: Makefile README bug.JDP debug.h getnumber.c getnumber.man
# number.c number.man test.c
# Wrapped by aglew at rigel.csg.uiuc.edu on Thu Jan 18 17:54:52 1990
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'Makefile' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'Makefile'\"
else
echo shar: Extracting \"'Makefile'\" \(237 characters\)
sed "s/^X//" >'Makefile' <<'END_OF_FILE'
XCFLAGS=-g
XCC=FP='' /bin/cc
Xall: test number.o getnumber
Xtests: test
Xtest: test.o number.o
X ${CC} -g -DTEST -o test test.o number.o -lm
Xgetnumber: getnumber.o number.o
X ${CC} -g -o getnumber getnumber.o number.o -lm
Xnumber.o: number.c
X
END_OF_FILE
if test 237 -ne `wc -c <'Makefile'`; then
echo shar: \"'Makefile'\" unpacked with wrong size!
fi
# end of 'Makefile'
fi
if test -f 'README' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'README'\"
else
echo shar: Extracting \"'README'\" \(967 characters\)
sed "s/^X//" >'README' <<'END_OF_FILE'
X/*
X * number
X *
X * This is a directory for a family of routines that convert
X * a string to an integer. The intention is to be able to freely
X * recognize just about any format integer:
X *
X * Decimal 1342334
X * Hex 0xAB43
X * Octal 01377
X * Binary 0b100100011
X * Arbitrary Radix rrr#vvvvvvvv
X *
X * Because people often want to provide a special format over and above
X * those that are already provided
X *
X * Eg. Hex 'ABC'Z
X * Decimal 10.
X * Ignore _ 100_677_888
X *
X * the intent is to define a, possibly parametrized, recognizer function
X * for each format, and then to pass a list of desired recognizer functions
X * for your specific recognizer.
X *
X * This is not intended to be fast, only general.
X *
X * All recognizers are of the form:
X *
X * success = RECOGNIZER( string, resultptr )
X * int success; /* -1 indicates failure */
X * char *string;
X * int *result;
X *
X */
X
X
X
X
X
END_OF_FILE
if test 967 -ne `wc -c <'README'`; then
echo shar: \"'README'\" unpacked with wrong size!
fi
# end of 'README'
fi
if test -f 'bug.JDP' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'bug.JDP'\"
else
echo shar: Extracting \"'bug.JDP'\" \(948 characters\)
sed "s/^X//" >'bug.JDP' <<'END_OF_FILE'
XReturn-Path: <att!cbnewsl!joepepin at uxc.cso.uiuc.edu>
XDate: Thu, 18 Jan 90 15:26:53 EST
XFrom: jdp at tarpon.att.com (Joseph Pepin)
XTo: aglew at oberon.csg.uiuc.edu
XSubject: Re: number recognition tools
XNewsgroups: alt.sources
XIn-Reply-To: <AGLEW.90Jan15115309 at oberon.csg.uiuc.edu>
XOrganization: AT&T Bell Laboratories
X
XTwo serious bugs:
X
X1) The shar file is corrupt: the END-OF-FILE after each man page
Xis preceeded by an "X", causing the next file not to be unshar'ed.
XMaybe the original files were not terminated by a NL.
X
X2) Executing "getnumber -100e-2" dumps core on a 3B2 and a 6386WGS
Xrunning SVR3.2. The problem is you don't malloc enough space
Xfor the right side of an infix expression:
X
XLine 218 of number.c:
X
X right = malloc ... InfixStrLen)));
X
Xshould be:
X
X right = malloc ... InfixStrLen))+1);
X
XI suspect that other systems treat malloc(0) as malloc(1).
X
XPlease fix these bugs and/or post this message: my newsserver
Xdiscourages postings.
X
END_OF_FILE
if test 948 -ne `wc -c <'bug.JDP'`; then
echo shar: \"'bug.JDP'\" unpacked with wrong size!
fi
# end of 'bug.JDP'
fi
if test -f 'debug.h' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'debug.h'\"
else
echo shar: Extracting \"'debug.h'\" \(4128 characters\)
sed "s/^X//" >'debug.h' <<'END_OF_FILE'
X#ifndef DEBUG_H
X
X#define DEBUG_H 1
X
X#ifdef DEBUG
Xint DEBUG_was_defined;
X#endif
X
X/*
X * manual NAME debug.h - Andy Glew's debug header
X *
X * USAGE #define DEBUG 1 #include "debug.h" ... debugf((fmt,vars...));
X *
X * DESCRIPTION
X *
X * The header file "debug.h" may be found in a directory near you. It
X * contains macros to make the production of debugging messages more
X * pleasant.
X *
X * debugf
X *
X * The most important/useful of these macros is debugf((...)). Debugf
X * is a printf (to stdout by default, optionally to stderr or
X * elsewhere). It accepts standard printf format strings and a
X * variable number of arguments. The only syntactic difference is the
X * necessity of double parentheses about the parameter list
X * (necessary because C macros can't have variable numbers of
X * arguments). Debugf usually produces one line of output per call,
X * with a distinctive mark like "Debug in file XXX line NNN".
X *
X * Example: while( ... ) { debugf(("in loop\n")); ... }
X *
X * DEBUG
X *
X * To use debugf: #define DEBUG 1 #include "debug.h" DEBUG must be
X * defined before debug.h is included, either in code or in a -dDEBUG
X * flag when compiled. If DEBUG is not defined when debug.h is
X * included, debugf and other debugging macros occupy no space in
X * your program.
X *
X * nodebugf
X *
X * nodebugf((...)) is syntactic sugar to make it easy to turn debugfs
X * off without having to remove them or go through convolutions
X * setting a debug control variable.
X *
X * debugshow
X *
X * debugshow(var,fmt) produces the quintessential debugging output
X * VARIABLE=VALUE_OF_VARIABLE. fmt is the format string you would use
X * in printf, without the double quotes.
X *
X * Example: int Ingrid=77; debugshow(Ingrid,%d); Produces Debug in file
X * XXX line NNN: Ingrid = 77
X *
X * _debugf
X *
X * _debugf is the name of the function to be used to print the debugging
X * output, printf by default. It can be changed at any time to
X * another varargs function. eprintf is useful - just
X * fprintf(stderr...) although it must be rewritten as a function due
X * to the weaknesses of C. Logging functions, and the like, can also
X * be useful.
X *
X * DebugCondition
X *
X * There are actually several layers of indirection in this macro
X * system:
X *
X * debugshow -> debugf -> _ifdebugf -> _debugf
X *
X * _ifdebugf should not be changed; but the condition DebugCondition
X * which it tests can usefully be changed. By default DebugCondition
X * is defined as (1); it is often nice to set it to a variable that
X * you can patch
X *
X * #define DebugCondition DebugVar int DebugVar = 0; #define DEBUG 1
X * #include "debug.h"
X *
X * I would have made a variable the default except for awkwardnesses
X * some people have about adding modules to the standard C library.
X *
X * Some people like having multiple debug levels, although I don't.
X * These can also be stacked.
X *
X * The function name __FUNC__ should be printed out as soon as the C
X * compiler is fixed.
X *
X * manual
X */
X
X#if defined(DEBUG)
X# define DEBUGcode( sl ) sl
X# define DEBUGdecl( decl ) decl
X#else
X# define DEBUGcode( sl )
X# define DEBUGdecl( decl )
X#endif
X
X#define noDEBUGcode( sl )
X#define noDEBUGdecl( sl )
X
X/*
X * double brackets about _debugf's parmlist so that you can do
X * #define _debugf(v) (printf v,uprintf v) which is useful in the
X * kernel
X */
X# if defined(DEBUG)
X# if !defined(_debugf)
X# define _debugf(parmlist) (printf parmlist)
X# endif
X
X/* DebugCondition can be controlled by the user */
X# define _ifdebugf(parmlist) ( DebugCondition ? _debugf(parmlist) : 0 )
X# if !defined(DebugCondition)
X# define DebugCondition (1)
X# endif
X# endif /* DEBUG */
X
X# if defined(DEBUG)
X# define debugf(parmlist) \
X ( _ifdebugf(("Debug in file %s line %d ",__FILE__,__LINE__)), \
X _ifdebugf(parmlist) \
X )
X# else
X# define debugf(parmlist)
X# endif
X# define nodebugf(parmlist)
X
X /* debugshow - cannot use "s in arguments */
X#ifdef DEBUG
X# define debugshow(var,fmt) debugf(("var = fmt\n",var))
X# define nodebugshow(var,fmt)
X#else
X# define debugshow(var,fmt)
X# define nodebugshow(var,fmt)
X#endif
X
X
X#endif /* DEBUG_H */
X
END_OF_FILE
if test 4128 -ne `wc -c <'debug.h'`; then
echo shar: \"'debug.h'\" unpacked with wrong size!
fi
# end of 'debug.h'
fi
if test -f 'getnumber.c' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'getnumber.c'\"
else
echo shar: Extracting \"'getnumber.c'\" \(899 characters\)
sed "s/^X//" >'getnumber.c' <<'END_OF_FILE'
X/* User level wrapper for Dgetnumber */
X
Xvoid exit();
X
Xint Igetnumber();
Xint Dgetnumber();
X
Xint DorI = 'D';
X
Xchar *format = "%g";
X
Xmain(argc,argv)
X int argc;
X char **argv;
X{
X double dres;
X int ires;
X
X for(;*++argv;) {
X if( !strcmp(*argv,"-i") ) {
X DorI = 'I';
X format = "%d";
X }
X else if( !strcmp(*argv,"-d") ) {
X DorI = 'D';
X format = "%g";
X }
X else if( !strcmp(*argv,"-format") ) {
X format = *++argv;
X }
X else {
X switch( DorI ) {
X default:
X exit(-1);
X case 'D':
X if( Dgetnumber(*argv,&dres) == -1 ) {
X (void)printf("invalid\n");
X }
X else {
X (void)printf(format,dres);
X (void)printf("\n");
X }
X break;
X case 'I':
X if( Igetnumber(*argv,&ires) == -1 ) {
X (void)printf("invalid\n");
X }
X else {
X (void)printf(format,ires);
X (void)printf("\n");
X }
X break;
X }
X }
X }
X exit(0);
X /*NOTREACHED*/
X}
X
X
END_OF_FILE
if test 899 -ne `wc -c <'getnumber.c'`; then
echo shar: \"'getnumber.c'\" unpacked with wrong size!
fi
# end of 'getnumber.c'
fi
if test -f 'getnumber.man' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'getnumber.man'\"
else
echo shar: Extracting \"'getnumber.man'\" \(2080 characters\)
sed "s/^X//" >'getnumber.man' <<'END_OF_FILE'
X.nf
X NAME:
X getnumber \- read a number in arbitrary notation
X
X SYNOPSIS:
X getnumber [-i|-d|-format '%printf-format'] number-string ...
X
X DESCRIPTION:
X getnumber is a program wrapped around the Dgetnumber and Igetnumber
X family of routines (see their man pages). Getnumber converts number
X in an almost arbitrary string representation, and prints the number
X out on stdout as a decimal integer or double precision value,
X or using a printf like string.
X
X getnumber is intended to be used in shell scripts that would like
X to be able to recognize numbers typed in by the user in their natural
X format. Eg.
X ARG=0.5M-1
X ...
X value=`getnumber -i $ARG`
X
XINTERFACE:
X Getnumber processes its command line arguments and prints to stdout.
X Stdin is not used.
X
X The command line option
X
X -i -- convert to an integer, using %d format to print
X -d -- convert to a C double, using %g format to print
X
X -format %printf-format
X -- format to be used in printing the result.
X
X Exits with error status -1 on a command line error.
X Prints "invalid" on conversion errors, but continues to process.
X
XBACKGROUND:
X See the man pages for the Dgetnumber and Igetnumber family
X for more deatils (man number).
X
X The intention is to be able to freely recognize just about any
X format number:
X
X Decimal 1342334
X Hex 0xAB43
X Octal 01377
X Binary 0b100100011
X Arbitrary Radix rrr#vvvvvvvv
X H:M:S 1:20:33
X Real 1.45
X "Meg" 4M
X Expressions (4M-1)*2
X Exponential 1.2E6
X
X Expressions currently include:
X infix binary: | ^ & << >> + - * / % **(exponent)
X prefix unary: - + ~
X midfix grouping: () [] {}
X and it is similarly easy to add new notations.
X
X All number representations and expressions can be intermixed:
X [(2M-1)*4]>>0x03
X
X NOTES:
X
X AUTHOR:
X Andy Glew (aglew at uiuc.edu)
X
X HISTORY:
X Originally written by Andy Glew at McGill University, 1983
X
X BUGS:
X
END_OF_FILE
if test 2080 -ne `wc -c <'getnumber.man'`; then
echo shar: \"'getnumber.man'\" unpacked with wrong size!
fi
# end of 'getnumber.man'
fi
if test -f 'number.c' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'number.c'\"
else
echo shar: Extracting \"'number.c'\" \(19062 characters\)
sed "s/^X//" >'number.c' <<'END_OF_FILE'
X/*
X * number
X *
X * This is a directory for a family of routines that convert
X * a string to a number.
X *
X *
X * The intention is to be able to freely recognize just about any
X * format number:
X *
X * Decimal 1342334
X * Hex 0xAB43
X * Octal 01377
X * Binary 0b100100011
X * Arbitrary Radix rrr#vvvvvvvv
X * H:M:S 1:20:33
X * Real 1.45
X * "Meg" 4M
X * Expressions (4M-1)*2
X * Exponential 1.2E6
X *
X * Because people often want to provide a special format over and above
X * those that are already provided
X *
X * Eg. Hex 'ABC'Z
X * Decimal 10.
X * Ignore _ 100_677_888
X *
X * the intent is to define a, possibly parametrized, recognizer function
X * for each format, and then to pass a list of desired recognizer functions
X * for your specific recognizer.
X *
X * This is not intended to be fast, only general.
X *
X * All recognizers are of the form:
X *
X * success = RECOGNIZER( string, resultptr )
X * int success; { -1 indicates failure }
X * char *string;
X * double *result;
X *
X * Recognition is done bottom up instead of top-down;
X * instead of having a grammar that constrains notation,
X * everything is passed to low-level recognizers that try to recognize
X * the string, perhaps recursively, passing off to other recognizer
X * in case of failure.
X *
X * The current recognizers are:
X *
X * Octal0 0<octal> eg. 0377 = 0xFF
X * SimpleDecimalString <decimal> eg. 10 = 0xA
X * Binary0b 0b<binary> eg. 0b011 = 3
X * Decimal0d 0d<decimal> eq. 10 = 0xA
X * Hex0x 0x<hex> eg. 0xA = 10
X * ArbitraryRadix <decimalbase>#<radix> eq. 3#22 = 8
X * colon60 M:S eg. 1:20 = 80
X * colon60colon60 H:M:S eg. 2:1:20 = 7280
X * PowersOf2 <real>[KMG] eg. 0.5K = 512
X * RealDecimalString <real> eg. 0.5
X * Expressions eg. 0.5M-1
X *
X * Expressions currently include:
X * infix binary: | ^ & << >> + - * / % **(exponent)
X * prefix unary: - + ~
X * midfix grouping: () [] {}
X * and it is similarly easy to add new notations.
X *
X * All number representations and expressions can be intermixed:
X * [(2M-1)*4]>>0x03
X *
X * There are some functions useful in building other recognizers, like
X * RadixString(), and the expression building functions.
X *
X * There are two top level recognizers,
X * Dgetnumber(str,res)
X * and Igetnumber(str,res);
X * the "I" version is basically a call to the "D" version, which rounds,
X * and errors if the rounded integer value is more than int_threshold
X * away from the non-int value.
X *
X * Initially, this was integer only, but in Jan 89 I changed it
X * to produce a floating point result - if you want integer, just
X * integerize.
X * This will have some lossage if your floating point format
X * cannot represent all integer values exactly. Sorry - in that
X * case, you'll just have to go back to the old routine.
X * It has the advantage of one family of routines being able
X * to handle intermediate cases - like 0.5M.
X * It has the advantage of, on a system with decent floating
X * point, being able to trap on overflow or underflow.
X * Interim: add this for Motorola systems.
X * If you can, use your system dependent way of trapping on inexact.
X * Interim: if you have IEEE floating point, it would be nice to
X * have this same routine read in NaNs.
X */
X
X#include <stdio.h>
X#include <string.h>
X#include <math.h>
X
Xextern double pow();
Xextern double floor();
Xextern double fabs();
Xextern char *malloc();
Xextern void free();
X
X#define DEBUG 1
X#include "debug.h"
X
X#define index strchr
Xchar *index();
X#define rindex strrchr
Xchar *rindex();
X
Xvoid exit();
X
X/* Interim: no tree building. all recursive in place. */
X
Xint MidfixExpression(str,res,preStr,postStr,MidfixFunc)
X char *str;
X double *res;
X char *preStr, *postStr;
X double (*MidfixFunc)();
X{
X char *midStr;
X double midVal;
X int strLen = strlen(str);
X int preStrLen = strlen(preStr);
X int postStrLen = strlen(postStr);
X int match;
X
X nodebugf(("In midfix <%s> <%s> <%s>\n", str, preStr, postStr));
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( strncmp(str,preStr,preStrLen) != 0 ) {
X return -1;
X }
X if( strcmp(str+strLen-postStrLen, postStr) != 0 ) {
X return -1;
X }
X
X midStr = malloc((unsigned)(strLen-preStrLen-postStrLen+1));
X if( !midStr ) {
X (void)fprintf(stderr,"Error malloc'ing memory for expressions\n");
X exit(1);
X }
X (void)strncpy(midStr,str+preStrLen,strLen-preStrLen-postStrLen);
X midStr[strLen-preStrLen-postStrLen] = '\0';
X nodebugf(("midStr=<%s>\n",midStr));
X match = (Dgetnumber(midStr,&midVal) != -1);
X free(midStr);
X if( !match ) {
X return -1;
X }
X *res = MidfixFunc(midVal);
X return 0;
X}
Xdouble Dnop(a) double a; { return a; }
X
X
Xint PrefixExpression(str,res,PrefixStr,PrefixFunc)
X char *str;
X double *res;
X char *PrefixStr;
X double (*PrefixFunc)();
X{
X double Darg;
X int PrefixStrLen = strlen(PrefixStr);
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( strncmp(str,PrefixStr,PrefixStrLen) != 0 ) {
X return -1;
X }
X else {
X if( Dgetnumber(str+PrefixStrLen,&Darg) == -1 ) {
X return -1;
X }
X *res = PrefixFunc(Darg);
X return 0;
X }
X}
Xdouble Dplussign(a) double a; { return a; }
Xdouble Dnegsign(a) double a; { return -a; }
Xdouble Dinvert(a) double a; { return (double)~(unsigned)a; }
X
X
Xint InfixExpression(str,res,InfixStr,InfixFunc)
X char *str;
X double *res;
X char *InfixStr;
X double (*InfixFunc)();
X{
X char *left, *right;
X double Dleft, Dright;
X char *p1;
X int match;
X int InfixStrLen = strlen(InfixStr);
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X /* Try splitting into subexpressions at all occurrences of the operator,
X moving from left to handle 1-1-1. Retries will handle subexpressions.
X */
X for( p1 = str + strlen(str);;p1--) {
X if( p1 < str ) {
X return -1;
X }
X if( !strncmp(p1,InfixStr,InfixStrLen) ) {
X left = malloc((unsigned)(p1-str+1));
X right = malloc((unsigned)(strlen(p1+InfixStrLen))+1);
X /* Bug JDP1: found by jdp at tarpon.att.com,
X Joseph Pepin - did not malloc enough space for RHS,
X missing +1 above. -100e-2 test case. */
X if( !left || !right ) {
X (void)fprintf(stderr,"Error malloc'ing memory for expressions\n");
X exit(1);
X }
X (void)strncpy(left,str,p1-str);
X left[p1-str] = '\0';
X (void)strcpy(right, p1+InfixStrLen );
X match = Dgetnumber(left,&Dleft) != -1
X && Dgetnumber(right,&Dright) != -1;
X free(left);
X free(right);
X if( match ) {
X *res = InfixFunc(Dleft,Dright);
X return 0;
X }
X }
X }
X}
X
Xdouble Dplus(a,b) double a,b; { return a+b; }
Xdouble Dsub(a,b) double a,b; { return a-b; }
Xdouble Dtimes(a,b) double a,b; { return a*b; }
Xdouble Ddivide(a,b) double a,b; { return a/b; }
X/* Interim: need to indicate loss of info on these "pesudo-integer" ops */
Xdouble Dor(a,b) double a,b; { return (double)((unsigned)a | (unsigned)b); }
Xdouble Dand(a,b) double a,b; { return (double)((unsigned)a & (unsigned)b); }
Xdouble Dxor(a,b) double a,b; { return (double)((unsigned)a ^ (unsigned)b); }
Xdouble Dremainder(a,b) double a,b; { return (double)((unsigned)a % (unsigned)b); }
Xdouble Dlsh(a,b) double a,b; { return (double)((unsigned)a << (unsigned)b); }
Xdouble Drsh(a,b) double a,b; { return (double)((unsigned)a >> (unsigned)b); }
Xdouble Dexponent(a,b) double a,b; { extern double pow(); return pow(a,b); }
X
X
Xint Expression(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( InfixExpression(str,res,"|",Dor) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"^",Dxor) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"&",Dand) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"<<",Dlsh) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,">>",Drsh) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"+",Dplus) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"-",Dsub) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"*",Dtimes) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"/",Ddivide) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"%",Dremainder) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"**",Dexponent) == 0 ) {
X return 0;
X }
X if( PrefixExpression(str,res,"+",Dplussign) == 0 ) {
X return 0;
X }
X if( PrefixExpression(str,res,"-",Dnegsign) == 0 ) {
X return 0;
X }
X if( PrefixExpression(str,res,"~",Dinvert) == 0 ) {
X return 0;
X }
X if( MidfixExpression(str,res,"(",")",Dnop) == 0 ) {
X return 0;
X }
X if( MidfixExpression(str,res,"[","]",Dnop) == 0 ) {
X return 0;
X }
X if( MidfixExpression(str,res,"{","}",Dnop) == 0 ) {
X return 0;
X }
X return -1;
X}
X
X
Xint SimpleDecimalString(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X return RadixString(str,10,res);
X}
X
Xint Binary0b(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( str[0] == '0' && (str[1] == 'b' || str[1] == 'B') ) {
X return RadixString(str+2,2,res);
X }
X return -1;
X}
X
Xint Octal0(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( str[0] == '0' ) {
X return RadixString(str+1,8,res);
X }
X return -1;
X}
X
Xint Decimal0d(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( str[0] == '0' && (str[1] == 'd' || str[1] == 'D') ) {
X return RadixString(str+2,10,res);
X }
X return -1;
X}
X
Xint Hex0x(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( str[0] == '0' && (str[1] == 'x' || str[1] == 'X') ) {
X return RadixString(str+2,16,res);
X }
X return -1;
X}
X
Xint ArbitraryRadix(str,res)
X char *str;
X double *res;
X{
X int radix;
X int digval;
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X for( radix = 0; (digval = DECIMAL_DIGIT_VALUE(*str)) != -1; str++ ) {
X radix = radix*10 + digval;
X }
X if( *str != '#' )
X return -1;
X return RadixString(str+1,radix,res);
X}
X
Xint RadixString(str,radix,res)
X char *str;
X int radix;
X double *res;
X{
X int val;
X int digval;
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X val = 0;
X for(;;) {
X if( !IGNORE(*str) ) {
X digval = DIGIT_VALUE(*str);
X if( digval < 0 || digval >= radix )
X return -1;
X val = val*radix + digval;
X }
X str++;
X if( *str == '\0' )
X break;
X }
X *res = val;
X
X return 0;
X}
X
X/* Recognisers for M:S and H:M:S forms */
X
X#define PRECOLON() { \
X if( (colonpos = index(str,':')) == 0 ) { \
X return -1; \
X } \
X precolon_len = colonpos-str; \
X if( precolon_len > 128-1 ) { \
X /* string too long */ \
X return -1; \
X } \
X /* Copy the precolon string and interpret as a decimal number */ \
X (void)strncpy( precolon_str, str, precolon_len ); \
X precolon_str[precolon_len] = '\0'; \
X \
X if( SimpleDecimalString(precolon_str,&precolon_val) == -1 ) { \
X return -1; \
X } \
X if( precolon_val != (int) precolon_val || precolon_val < 0 ) { \
X return -1; \
X } \
X}
X
Xint colon60(str,res)
X char *str;
X double *res;
X{
X char *colonpos;
X char precolon_str[128];
X int precolon_len;
X double precolon_val;
X double postcolon_val;
X
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X PRECOLON();
X
X if( DecimalString(colonpos+1,&postcolon_val) == -1 ) {
X return -1;
X }
X if( postcolon_val < 0 || 60 <= postcolon_val ) {
X return -1;
X }
X
X *res = precolon_val*60 + postcolon_val;
X return 0;
X}
Xint colon60colon60(str,res)
X char *str;
X double *res;
X{
X char *colonpos;
X char precolon_str[128];
X int precolon_len;
X double precolon_val;
X double postcolon_val;
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X PRECOLON();
X
X if( colon60(colonpos+1,&postcolon_val) == -1 ) {
X return -1;
X }
X if( postcolon_val < 0 || 60*60 <= postcolon_val) {
X return -1;
X }
X
X *res = precolon_val*(60*60) + postcolon_val;
X return 0;
X}
X
X/* Recognize common powers of 2: 2^10=K, 2^20=M, 2^30=G
X Interim: would handle 2^40=T if it didn't overflow */
X
Xint PowersOf2(str,res)
X char *str;
X double *res;
X{
X int sl = strlen(str);
X double val;
X char buf[128];
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( sl <= 0 || sl > 128-1 ) {
X return -1;
X }
X (void)strcpy(buf,str);
X buf[sl-1] = '\0';
X
X if( str[sl-1] == 'K' ) {
X if( DecimalString(buf,&val) == -1 ) {
X return -1;
X }
X *res = val * 1024;
X return 0;
X }
X else if( str[sl-1] == 'M' ) {
X if( DecimalString(buf,&val) == -1 ) {
X return -1;
X }
X *res = val * 1024*1024;
X return 0;
X }
X else if( str[sl-1] == 'G' ) {
X if( DecimalString(buf,&val) == -1 ) {
X return -1;
X }
X *res = val * 1024*1024*1024;
X return 0;
X }
X return -1;
X}
X
X/* Floating point recognizer.
X Interim: decimal floating point only.
X iii.fffff form only.
X Interim: should recognize scientific notation iii.fffEeeee */
X
Xint RealDecimalString(str,res)
X char *str;
X double *res;
X{
X double val;
X int sl;
X int infraction;
X int scale;
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X sl = strlen(str);
X
X if( sl <= 0 ) {
X return -1;
X }
X
X if( index(str,'.') == 0 ) {
X return -1;
X }
X
X /* accumulation of value down from least significant end
X first, to reduce errors */
X val = 0;
X infraction = 1;
X scale = 1;
X for(;sl-->0;) {
X int d = DECIMAL_DIGIT_VALUE(str[sl]);
X if( d == -1 ) {
X if( str[sl] == '.' ) {
X infraction = 0;
X continue;
X }
X else {
X return -1;
X }
X }
X if( infraction ) {
X val += d;
X val /= 10.0;
X }
X else {
X val = d*scale + val;
X scale = scale*10;
X }
X }
X *res = val;
X return 0;
X}
X
X
X
X
Xint DecimalString(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( SimpleDecimalString(str,res) == -1
X && RealDecimalString(str,res) == -1 ) {
X return -1;
X }
X return 0;
X}
X
X
X/* Generic routine for signs */
Xint Signed(func,str,res)
X int (*func) ();
X char *str;
X double *res;
X{
X double value;
X int ret;
X int neg;
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X switch( str[0] ) {
X case '+':
X neg = 0;
X ret = func(str+1,&value);
X break;
X case '-':
X neg = 1;
X ret = func(str+1,&value);
X break;
X default:
X neg = 0;
X ret = func(str,&value);
X break;
X }
X if( ret == -1 ) {
X return -1;
X }
X else {
X if( neg ) value = -value;
X *res = value;
X return 0;
X }
X
X}
X
X
X
X/* Scientific notation recognizer */
X
Xint ScientificExponentialNotation(str,res)
X char *str;
X double *res;
X{
X char *dupptr;
X char *expptr;
X double exponent;
X double mantissa;
X int retval;
X extern char *strdup();
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( (dupptr = strdup(str)) == 0 ) {
X (void)fprintf(stderr,"insufficient free memory to duplicate string\n");
X exit(1);
X }
X if( (expptr = strchr(dupptr,'E')) == 0
X && (expptr = strchr(dupptr,'e')) == 0
X ) {
X retval = -1;
X goto cleanup;
X }
X
X *expptr++ = '\0';
X
X /* dupptr is now a pointer to the mantissa, expptr to the exponent,
X both null terminated strings */
X if( Signed(SimpleDecimalString,expptr,&exponent) == -1 ) {
X retval = -1;
X goto cleanup;
X }
X
X if( Signed(RealDecimalString,dupptr,&mantissa) == -1
X && Signed(SimpleDecimalString,dupptr,&mantissa) == -1 ) {
X retval = -1;
X goto cleanup;
X }
X
X *res = mantissa * pow(10.0,exponent);
X retval = 0;
X
X cleanup:
X free(dupptr);
X
X return retval;
X}
X
X
X/* Utility functions */
Xchar *IGNORE_CHARS = "_";
X
Xint IGNORE(c)
X char c;
X{
X return index(IGNORE_CHARS,c) != 0;
X}
X
Xint DIGIT_VALUE(d)
X char d;
X{
X int val;
X
X switch( d ) {
X default: val = -1; break;
X case '0': val = 0; break;
X case '1': val = 1; break;
X case '2': val = 2; break;
X case '3': val = 3; break;
X case '4': val = 4; break;
X case '5': val = 5; break;
X case '6': val = 6; break;
X case '7': val = 7; break;
X case '8': val = 8; break;
X case '9': val = 9; break;
X case 'a': case 'A': val = 0xA; break;
X case 'b': case 'B': val = 0xB; break;
X case 'c': case 'C': val = 0xC; break;
X case 'd': case 'D': val = 0xD; break;
X case 'e': case 'E': val = 0xE; break;
X case 'f': case 'F': val = 0xF; break;
X }
X return val;
X}
X
Xint DECIMAL_DIGIT_VALUE(d)
X char d;
X{
X int val = DIGIT_VALUE(d);
X if( val < 0 || val >= 10 )
X return -1;
X else
X return val;
X}
X
X
X
X
Xtypedef int (*Recognizer)();
X
XRecognizer DefaultRecognizers[] = {
X Octal0,
X SimpleDecimalString,
X Binary0b,
X Decimal0d,
X Hex0x,
X ArbitraryRadix,
X colon60,
X colon60colon60,
X PowersOf2,
X RealDecimalString,
X Expression,
X ScientificExponentialNotation,
X 0
X};
X
X/* Flags for DgetnumberList */
X#define PRIO_RESOLVE_AMBIGUITY 1
X
Xint DgetnumberList(str,res,flags,RecognizerList)
X char *str;
X double *res;
X int flags;
X Recognizer RecognizerList[];
X{
X int found;
X double oldval = 0; /* to silence lint "oldval may be used before set" */
X double newval;
X Recognizer *fp;
X
X /* Special test for null strings.
X All recognizers should really handle this, but...
X */
X if( str == 0 || str[0] == 0 ) {
X return -1;
X }
X
X for(fp = RecognizerList, found=0; *fp; fp++) {
X if( (*fp)(str,&newval) != -1 ) {
X if( flags & PRIO_RESOLVE_AMBIGUITY ) {
X found = 1;
X break;
X }
X if( found ) {
X if( newval != oldval ) {
X return -1;
X }
X }
X else {
X oldval = newval;
X found = 1;
X }
X }
X }
X if( found ) {
X *res = newval;
X return 0;
X }
X else {
X return -1;
X }
X}
X
X
X/* Recognizer form of the above, with the default list */
X
Xint Dgetnumber(str,res)
X char *str;
X double *res;
X{
X if( DgetnumberList(str,res,
X PRIO_RESOLVE_AMBIGUITY,
X DefaultRecognizers)
X == -1 ) {
X return -1;
X }
X return 0;
X}
X
X
X/* Integer version of the above.
X Includes a threshold because arithmetic may be inexact (sigh) */
X/* Interim: should this be parametrized for "D" function to call,
X threshold, and list of recognizers? I'm not sure. */
X
Xdouble int_threshold = 0.00000001;
X
Xint Igetnumber(str,res)
X char *str;
X int *res;
X{
X extern double floor();
X extern double fabs();
X double dres;
X double delta;
X
X if( Dgetnumber(str,&dres) == -1 ) {
X return -1;
X }
X *res = (int)floor(dres+0.5);
X delta = (double)*res - dres;
X if( fabs(delta) > int_threshold ) {
X return -1;
X }
X return 0;
X}
X
X
X
X/* interim: strdup for BSD. remove if you already have it */
Xchar *strdup(s)
X char *s;
X{
X char *mp = malloc(strlen(s)+1);
X if( mp == 0 ) {
X (void)fprintf(stderr,"Error malloc'ing in strdup\n");
X exit(1);
X }
X (void)strcpy(mp,s);
X return mp;
X}
X
END_OF_FILE
if test 19062 -ne `wc -c <'number.c'`; then
echo shar: \"'number.c'\" unpacked with wrong size!
fi
# end of 'number.c'
fi
if test -f 'number.man' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'number.man'\"
else
echo shar: Extracting \"'number.man'\" \(5263 characters\)
sed "s/^X//" >'number.man' <<'END_OF_FILE'
X.nf
X NAME:
X Dgetnumber, Igetnumber \- a family of string to number conversion routines
X
X SYNOPSIS:
X
X /* Default double precision recognizer */
X success = Dgetnumber( string, resultptr )
X int success; { -1 indicates failure }
X char *string;
X double *result;
X
X /* Default integer recognizer */
X success = Igetnumber( string, resultptr )
X int success; { -1 indicates failure }
X char *string;
X int *result;
X
X /* Threshold used for integerizing double precision values. */
X double int_threshold;
X
X DESCRIPTION:
X
X Dgetnumber and Igetnumber are two representatives (probably all that a typical
X use may ever encounter) of a family of routines for string representations of
X numbers to numbers in machine internal representation.
X
X They were written out of frustration with programs and routines that seldom
X accept all of the "natural" representations of numbers for a problem --
X disk utilities that require decimal numbers as input, while disk error loggers
X produce hex numbers on output, times that need to be converted from H:M:S
X before they can be used, etc.
X
X The intention is to be able to freely recognize just about any
X format number:
X
X Decimal 1342334
X Hex 0xAB43
X Octal 01377
X Binary 0b100100011
X Arbitrary Radix rrr#vvvvvvvv
X H:M:S 1:20:33
X Real 1.45
X "Meg" 4M
X Expressions (4M-1)*2
X Exponential 1.2E6
X
X Because people often want to provide a special format over and above
X those that are already provided
X
X Eg. Hex 'ABC'Z
X Decimal 10.
X Ignore _ 100_677_888
X
X the intent is to define a, possibly parametrized, recognizer function
X for each format, and then to pass a list of desired recognizer functions
X for your specific recognizer.
X
X This is not intended to be fast, only general.
X
X All recognizers are of the form:
X
X success = RECOGNIZER( string, resultptr )
X int success; { -1 indicates failure }
X char *string;
X double *result;
X
X Recognition is done bottom up instead of top-down;
X instead of having a grammar that constrains notation,
X everything is passed to low-level recognizers that try to recognize
X the string, perhaps recursively, passing off to other recognizer
X in case of failure.
X
X The current recognizers are:
X
X Octal0 0<octal> eg. 0377 = 0xFF
X SimpleDecimalString <decimal> eg. 10 = 0xA
X Binary0b 0b<binary> eg. 0b011 = 3
X Decimal0d 0d<decimal> eq. 10 = 0xA
X Hex0x 0x<hex> eg. 0xA = 10
X ArbitraryRadix <decimalbase>#<radix> eq. 3#22 = 8
X colon60 M:S eg. 1:20 = 80
X colon60colon60 H:M:S eg. 2:1:20 = 7280
X PowersOf2 <real>[KMG] eg. 0.5K = 512
X RealDecimalString <real> eg. 0.5
X Expressions eg. 0.5M-1
X
X Expressions currently include:
X infix binary: | ^ & << >> + - * / % **(exponent)
X prefix unary: - + ~
X midfix grouping: () [] {}
X and it is similarly easy to add new notations.
X
X All number representations and expressions can be intermixed:
X [(2M-1)*4]>>0x03
X
X There are some functions useful in building other recognizers, like
X RadixString(), and the expression building functions.
X
X There are two top level recognizers,
X Dgetnumber(str,res)
X and Igetnumber(str,res);
X the "I" version is basically a call to the "D" version, which rounds,
X and errors if the rounded integer value is more than int_threshold
X away from the non-int value.
X
X These use an internal function
X
X typedef int (*Recognizer)();
X
X int DgetnumberList(str,res,flags,RecognizerList)
X char *str;
X double *res;
X int flags;
X Recognizer RecognizerList[];
X
X which is called by default with
X
X Recognizer DefaultRecognizers[]
X
X An easy way for users to customize these routines is to
X create a private list of recognizers, deleting standard recognizers
X that are undesired, and adding user coded recognizers that have
X not been provided (eg. nnCnnTnnB cylinder/track/block notation)
X and then call DgetnumberList() from their own top-level wrapper.
X
X (Internal detail: a flag controls whether conflicting matches
X should be an error or not).
X
X NOTES:
X Initially, this was integer only, but in Jan 89 I changed it
X to produce a floating point result - if you want integer, just
X integerize.
X This will have some lossage if your floating point format
X cannot represent all integer values exactly. Sorry - in that
X case, you'll just have to go back to the old routine.
X It has the advantage of one family of routines being able
X to handle intermediate cases - like 0.5M.
X It has the advantage of, on a system with decent floating
X point, being able to trap on overflow or underflow.
X But this is not added.
X If you can, use your system dependent way of trapping on inexact.
X Interim: if you have IEEE floating point, it would be nice to
X have this same routine read in NaNs.
X
X AUTHOR:
X Andy Glew (aglew at uiuc.edu)
X
X HISTORY:
X Originally written by Andy Glew at McGill University, 1983
X
X BUGS:
X
END_OF_FILE
if test 5263 -ne `wc -c <'number.man'`; then
echo shar: \"'number.man'\" unpacked with wrong size!
fi
# end of 'number.man'
fi
if test -f 'test.c' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'test.c'\"
else
echo shar: Extracting \"'test.c'\" \(4677 characters\)
sed "s/^X//" >'test.c' <<'END_OF_FILE'
Xvoid exit();
X
Xint verbose = 1;
X
Xint StopOnError = 1;
X
Xmain(argc,argv)
X int argc;
X char **argv;
X{
X int aval;
X int gval;
X int gret;
X
X if( argc == 1 ) {
X AutoTests();
X }
X else for(;*++argv;) {
X if(0) ;
X else if( !strcmp(*argv,"-auto") ) {
X AutoTests();
X }
X else if( !strcmp(*argv,"-verbose") ) {
X verbose = 1;
X }
X else if( !strcmp(*argv,"-noverbose") ) {
X verbose = 0;
X }
X else if( !strcmp(*argv,"-stoponerror") ) {
X StopOnError = 1;
X }
X else if( !strcmp(*argv,"-nostoponerror") ) {
X StopOnError = 0;
X }
X else if( !strcmp(*argv,"-test") ) {
X gret = Igetnumber(argv[1],&gval);
X
X if( !strcmp(argv[2],"invalid") ) {
X if( gret == -1 )
X exit(0);
X else {
X (void)printf("<%s> <%s> failed - invalid\n",argv[1],argv[2]);
X exit(1);
X }
X }
X else {
X aval = atoi(argv[2]);
X if( gret == -1 ) {
X (void)printf("<%s> invalid - should be <%s> %d\n",argv[1],argv[2],aval);
X exit(1);
X }
X else {
X if( aval != gval ) {
X (void)printf("<%s> %d should be <%s> %d\n",
X argv[1],gval,argv[2],aval);
X exit(1);
X }
X else
X exit(0);
X }
X }
X }
X else {
X (void)printf("Unknown argument <%s>\n",*argv);
X exit(1);
X }
X }
X exit(0);
X /*NOTREACHED*/
X}
X
Xstruct TestVec {
X char *str;
X int value;
X int valid;
X} TV[] = {
X { "-100e-2", -1, 1 }, /* Bug JDP1:
X found by jdp at tarpon.att.com,
X Joseph Pepin - did not malloc
X enough space for RHS
X */
X { "-111E0A", 0, 0 },
X { "-111E0", -111, 1 },
X { "+121E0", 121, 1 },
X { "100E-2", 1, 1 },
X { "-100E-2", -1, 1 },
X { "+100E-2", 1, 1 },
X { "1E+1", 10, 1 },
X { "1.2E1", 12, 1 },
X { "0.5E6", 500000, 1 },
X { "-111e0A", 0, 0 },
X { "-111e0", -111, 1 },
X { "+121e0", 121, 1 },
X { "100e-2", 1, 1 },
X { "-100e-2", -1, 1 },
X { "+100e-2", 1, 1 },
X { "1e+1", 10, 1 },
X { "1.2e1", 12, 1 },
X { "0.5e6", 500000, 1 },
X { "2**4+1", 17, 1 },
X { "(0xFF>>2)+1", 64, 1 },
X { "(1<<2)+1", 5, 1 },
X { "1<<2+1", 8, 1 },
X /* replicated because of an old stateful error */
X { "-(-(-(-(-(-3)))))", 3, 1 },
X { "-(-(-(-(-(-3)))))", 3, 1 },
X { "-(-(-(-(-(-3)))))", 3, 1 },
X { "[1+(2*3)]*{1+2}", 21, 1 },
X { "1+(2+3)", 6, 1 },
X { "1-1-1", -1, 1 },
X { "(0)", 0, 1 },
X { "(1K)+1", 1025, 1 },
X { "2*(1K)+1", 2049, 1 },
X { "2*(2K+1)+(2M/1K)", 6146, 1 },
X { "(0)", 0, 1 },
X { "(0)", 0, 1 },
X { "0b0000", 0, 1 },
X { "0b11", 3, 1 },
X { "-0b101", -5, 1 },
X { "-5K", -5120, 1 },
X { "0.5K", 512, 1 },
X { "13M", 13631488, 1 },
X { "1G", 1073741824, 1 },
X { "1:20", 80, 1 },
X { "2:1:20", 7280, 1 },
X { "I-0b101", 0, 0 },
X { "10.3", 0, 0 },
X { "x0", 0, 0 },
X { "0x", 0, 0 },
X { "-1-", 0, 0 },
X { "-1+", 0, 0 },
X { "-", 0, 0 },
X { "+", 0, 0 },
X { "-4+1", -3, 1 },
X { "-4*-3", 12, 1 },
X { "4*-3", -12, 1 },
X { "-4*3", -12, 1 },
X { 0, 0 }
X};
X
XAutoTests()
X{
X int i;
X struct TestVec *tv;
X
X for( tv=TV; tv->str; tv++ ) {
X int val;
X if( Igetnumber(tv->str,&val) == -1 ) {
X if( tv->valid ) {
X (void)printf("Error: <%s> invalid, should be %d\n",
X tv->str, tv->value );
X if( StopOnError ) {
X exit(1);
X }
X }
X else {
X if( verbose ) (void)printf("Passed: <%s> invalid\n",tv->str);
X }
X }
X else {
X if( tv->valid ) {
X if( val != tv->value ) {
X (void)printf("Error: <%s> %d, should be %d\n",
X tv->str, val, tv->value );
X if( StopOnError ) {
X exit(1);
X }
X }
X else {
X if( verbose ) (void)printf("Passed: <%s> %d\n", tv->str, val);
X }
X }
X else {
X (void)printf("Error: <%s> %d, should be invalid\n",
X tv->str, val );
X if( StopOnError ) {
X exit(1);
X }
X }
X }
X }
X
X
X for(i=0;i<100;i++)
X TestAllPatterns(i);
X for(i=132;i<1000000000;i+=12331)
X TestAllPatterns(i);
X
X}
X
XTestAllPatterns(i)
X{
X TestFormat(i,"%d");
X TestFormat(i,"0%o");
X TestFormat(i,"0x%x");
X TestFormat(i,"0d%d");
X}
XTestFormat(i,fstr)
X int i;
X char *fstr;
X{
X char buf[128];
X
X /* With no sign */
X (void)sprintf(buf+1,fstr,i);
X TestBufferValue(buf+1,i);
X /* With sign */
X buf[0]='+';
X TestBufferValue(buf,i);
X buf[0]='-';
X TestBufferValue(buf,-i);
X}
XTestBufferValue(buf,i)
X char *buf;
X int i;
X{
X int val;
X if( Igetnumber(buf,&val) == -1 ) {
X (void)printf("error - <%s> invalid, should be %d\n",buf,i);
X if( StopOnError ) {
X exit(1);
X }
X }
X else if( val != i ) {
X (void)printf("error - <%s> %d, should be %d\n",buf,val,i);
X if( StopOnError ) {
X exit(1);
X }
X } else {
X if( verbose ) (void)printf("Passed: <%s> %d\n",buf,i);
X }
X}
X
X
X
END_OF_FILE
if test 4677 -ne `wc -c <'test.c'`; then
echo shar: \"'test.c'\" unpacked with wrong size!
fi
# end of 'test.c'
fi
echo shar: End of shell archive.
exit 0
--
Andy Glew, aglew at uiuc.edu
More information about the Alt.sources
mailing list