number recognition tools
Andy Glew
aglew at oberon.csg.uiuc.edu
Tue Jan 16 04:53:09 AEST 1990
(My apologies if this is messy - I am unsure if this newsreader
emails to the moderator for posts to moderated groups. Please ACK).
The following is a set of tools for recognizing almost arbitrary
number strings, both library functions and shell level command.
It differs from atoi(), etc. in that it already supports natural
notations like "0.5M-1", and can be easily customized for application
specific notations.
Man pages for the library routines and a shell level wrapper are provided.
This package has been used for many years on V7, System V, and BSD systems.
The posted version has recently been used on SUN3s and Motorola System V boxes.
Porting problems should be minor.
No install script is provided (because I have never been able to rely
on a standard version of install in my travels).
#! /bin/sh
# This is a shell archive. Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file". To overwrite existing
# files, type "sh file -c". You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g.. If this archive is complete, you
# will see the following message at the end:
# "End of shell archive."
# Contents: Makefile README debug.h getnumber.c getnumber.man number.c
# number.man test.c
# Wrapped by aglew at rigel.csg.uiuc.edu on Mon Jan 15 11:58:45 1990
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'Makefile' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'Makefile'\"
else
echo shar: Extracting \"'Makefile'\" \(236 characters\)
sed "s/^X//" >'Makefile' <<'END_OF_FILE'
XCFLAGS=-g
XCC=FP='' /bin/cc
Xall: test number.o getnumber
Xtests: test
Xtest: test.o number.o
X ${CC} -g -DTEST -o test test.o number.o -lm
Xgetnumber: getnumber.o number.o
X ${CC} -g -o getnumber getnumber.o number.o -lm
Xnumber.o: number.c
END_OF_FILE
if test 236 -ne `wc -c <'Makefile'`; then
echo shar: \"'Makefile'\" unpacked with wrong size!
fi
# end of 'Makefile'
fi
if test -f 'README' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'README'\"
else
echo shar: Extracting \"'README'\" \(966 characters\)
sed "s/^X//" >'README' <<'END_OF_FILE'
X/*
X * number
X *
X * This is a directory for a family of routines that convert
X * a string to an integer. The intention is to be able to freely
X * recognize just about any format integer:
X *
X * Decimal 1342334
X * Hex 0xAB43
X * Octal 01377
X * Binary 0b100100011
X * Arbitrary Radix rrr#vvvvvvvv
X *
X * Because people often want to provide a special format over and above
X * those that are already provided
X *
X * Eg. Hex 'ABC'Z
X * Decimal 10.
X * Ignore _ 100_677_888
X *
X * the intent is to define a, possibly parametrized, recognizer function
X * for each format, and then to pass a list of desired recognizer functions
X * for your specific recognizer.
X *
X * This is not intended to be fast, only general.
X *
X * All recognizers are of the form:
X *
X * success = RECOGNIZER( string, resultptr )
X * int success; /* -1 indicates failure */
X * char *string;
X * int *result;
X *
X */
X
X
X
X
END_OF_FILE
if test 966 -ne `wc -c <'README'`; then
echo shar: \"'README'\" unpacked with wrong size!
fi
# end of 'README'
fi
if test -f 'debug.h' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'debug.h'\"
else
echo shar: Extracting \"'debug.h'\" \(4127 characters\)
sed "s/^X//" >'debug.h' <<'END_OF_FILE'
X#ifndef DEBUG_H
X
X#define DEBUG_H 1
X
X#ifdef DEBUG
Xint DEBUG_was_defined;
X#endif
X
X/*
X * manual NAME debug.h - Andy Glew's debug header
X *
X * USAGE #define DEBUG 1 #include "debug.h" ... debugf((fmt,vars...));
X *
X * DESCRIPTION
X *
X * The header file "debug.h" may be found in a directory near you. It
X * contains macros to make the production of debugging messages more
X * pleasant.
X *
X * debugf
X *
X * The most important/useful of these macros is debugf((...)). Debugf
X * is a printf (to stdout by default, optionally to stderr or
X * elsewhere). It accepts standard printf format strings and a
X * variable number of arguments. The only syntactic difference is the
X * necessity of double parentheses about the parameter list
X * (necessary because C macros can't have variable numbers of
X * arguments). Debugf usually produces one line of output per call,
X * with a distinctive mark like "Debug in file XXX line NNN".
X *
X * Example: while( ... ) { debugf(("in loop\n")); ... }
X *
X * DEBUG
X *
X * To use debugf: #define DEBUG 1 #include "debug.h" DEBUG must be
X * defined before debug.h is included, either in code or in a -dDEBUG
X * flag when compiled. If DEBUG is not defined when debug.h is
X * included, debugf and other debugging macros occupy no space in
X * your program.
X *
X * nodebugf
X *
X * nodebugf((...)) is syntactic sugar to make it easy to turn debugfs
X * off without having to remove them or go through convolutions
X * setting a debug control variable.
X *
X * debugshow
X *
X * debugshow(var,fmt) produces the quintessential debugging output
X * VARIABLE=VALUE_OF_VARIABLE. fmt is the format string you would use
X * in printf, without the double quotes.
X *
X * Example: int Ingrid=77; debugshow(Ingrid,%d); Produces Debug in file
X * XXX line NNN: Ingrid = 77
X *
X * _debugf
X *
X * _debugf is the name of the function to be used to print the debugging
X * output, printf by default. It can be changed at any time to
X * another varargs function. eprintf is useful - just
X * fprintf(stderr...) although it must be rewritten as a function due
X * to the weaknesses of C. Logging functions, and the like, can also
X * be useful.
X *
X * DebugCondition
X *
X * There are actually several layers of indirection in this macro
X * system:
X *
X * debugshow -> debugf -> _ifdebugf -> _debugf
X *
X * _ifdebugf should not be changed; but the condition DebugCondition
X * which it tests can usefully be changed. By default DebugCondition
X * is defined as (1); it is often nice to set it to a variable that
X * you can patch
X *
X * #define DebugCondition DebugVar int DebugVar = 0; #define DEBUG 1
X * #include "debug.h"
X *
X * I would have made a variable the default except for awkwardnesses
X * some people have about adding modules to the standard C library.
X *
X * Some people like having multiple debug levels, although I don't.
X * These can also be stacked.
X *
X * The function name __FUNC__ should be printed out as soon as the C
X * compiler is fixed.
X *
X * manual
X */
X
X#if defined(DEBUG)
X# define DEBUGcode( sl ) sl
X# define DEBUGdecl( decl ) decl
X#else
X# define DEBUGcode( sl )
X# define DEBUGdecl( decl )
X#endif
X
X#define noDEBUGcode( sl )
X#define noDEBUGdecl( sl )
X
X/*
X * double brackets about _debugf's parmlist so that you can do
X * #define _debugf(v) (printf v,uprintf v) which is useful in the
X * kernel
X */
X# if defined(DEBUG)
X# if !defined(_debugf)
X# define _debugf(parmlist) (printf parmlist)
X# endif
X
X/* DebugCondition can be controlled by the user */
X# define _ifdebugf(parmlist) ( DebugCondition ? _debugf(parmlist) : 0 )
X# if !defined(DebugCondition)
X# define DebugCondition (1)
X# endif
X# endif /* DEBUG */
X
X# if defined(DEBUG)
X# define debugf(parmlist) \
X ( _ifdebugf(("Debug in file %s line %d ",__FILE__,__LINE__)), \
X _ifdebugf(parmlist) \
X )
X# else
X# define debugf(parmlist)
X# endif
X# define nodebugf(parmlist)
X
X /* debugshow - cannot use "s in arguments */
X#ifdef DEBUG
X# define debugshow(var,fmt) debugf(("var = fmt\n",var))
X# define nodebugshow(var,fmt)
X#else
X# define debugshow(var,fmt)
X# define nodebugshow(var,fmt)
X#endif
X
X
X#endif /* DEBUG_H */
END_OF_FILE
if test 4127 -ne `wc -c <'debug.h'`; then
echo shar: \"'debug.h'\" unpacked with wrong size!
fi
# end of 'debug.h'
fi
if test -f 'getnumber.c' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'getnumber.c'\"
else
echo shar: Extracting \"'getnumber.c'\" \(898 characters\)
sed "s/^X//" >'getnumber.c' <<'END_OF_FILE'
X/* User level wrapper for Dgetnumber */
X
Xvoid exit();
X
Xint Igetnumber();
Xint Dgetnumber();
X
Xint DorI = 'D';
X
Xchar *format = "%g";
X
Xmain(argc,argv)
X int argc;
X char **argv;
X{
X double dres;
X int ires;
X
X for(;*++argv;) {
X if( !strcmp(*argv,"-i") ) {
X DorI = 'I';
X format = "%d";
X }
X else if( !strcmp(*argv,"-d") ) {
X DorI = 'D';
X format = "%g";
X }
X else if( !strcmp(*argv,"-format") ) {
X format = *++argv;
X }
X else {
X switch( DorI ) {
X default:
X exit(-1);
X case 'D':
X if( Dgetnumber(*argv,&dres) == -1 ) {
X (void)printf("invalid\n");
X }
X else {
X (void)printf(format,dres);
X (void)printf("\n");
X }
X break;
X case 'I':
X if( Igetnumber(*argv,&ires) == -1 ) {
X (void)printf("invalid\n");
X }
X else {
X (void)printf(format,ires);
X (void)printf("\n");
X }
X break;
X }
X }
X }
X exit(0);
X /*NOTREACHED*/
X}
X
END_OF_FILE
if test 898 -ne `wc -c <'getnumber.c'`; then
echo shar: \"'getnumber.c'\" unpacked with wrong size!
fi
# end of 'getnumber.c'
fi
if test -f 'getnumber.man' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'getnumber.man'\"
else
echo shar: Extracting \"'getnumber.man'\" \(2079 characters\)
sed "s/^X//" >'getnumber.man' <<'END_OF_FILE'
X.nf
X NAME:
X getnumber \- read a number in arbitrary notation
X
X SYNOPSIS:
X getnumber [-i|-d|-format '%printf-format'] number-string ...
X
X DESCRIPTION:
X getnumber is a program wrapped around the Dgetnumber and Igetnumber
X family of routines (see their man pages). Getnumber converts number
X in an almost arbitrary string representation, and prints the number
X out on stdout as a decimal integer or double precision value,
X or using a printf like string.
X
X getnumber is intended to be used in shell scripts that would like
X to be able to recognize numbers typed in by the user in their natural
X format. Eg.
X ARG=0.5M-1
X ...
X value=`getnumber -i $ARG`
X
XINTERFACE:
X Getnumber processes its command line arguments and prints to stdout.
X Stdin is not used.
X
X The command line option
X
X -i -- convert to an integer, using %d format to print
X -d -- convert to a C double, using %g format to print
X
X -format %printf-format
X -- format to be used in printing the result.
X
X Exits with error status -1 on a command line error.
X Prints "invalid" on conversion errors, but continues to process.
X
XBACKGROUND:
X See the man pages for the Dgetnumber and Igetnumber family
X for more deatils (man number).
X
X The intention is to be able to freely recognize just about any
X format number:
X
X Decimal 1342334
X Hex 0xAB43
X Octal 01377
X Binary 0b100100011
X Arbitrary Radix rrr#vvvvvvvv
X H:M:S 1:20:33
X Real 1.45
X "Meg" 4M
X Expressions (4M-1)*2
X Exponential 1.2E6
X
X Expressions currently include:
X infix binary: | ^ & << >> + - * / % **(exponent)
X prefix unary: - + ~
X midfix grouping: () [] {}
X and it is similarly easy to add new notations.
X
X All number representations and expressions can be intermixed:
X [(2M-1)*4]>>0x03
X
X NOTES:
X
X AUTHOR:
X Andy Glew (aglew at uiuc.edu)
X
X HISTORY:
X Originally written by Andy Glew at McGill University, 1983
X
X BUGS:
X END_OF_FILE
if test 2079 -ne `wc -c <'getnumber.man'`; then
echo shar: \"'getnumber.man'\" unpacked with wrong size!
fi
# end of 'getnumber.man'
fi
if test -f 'number.c' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'number.c'\"
else
echo shar: Extracting \"'number.c'\" \(18614 characters\)
sed "s/^X//" >'number.c' <<'END_OF_FILE'
X/*
X * number
X *
X * This is a directory for a family of routines that convert
X * a string to a number.
X *
X *
X * The intention is to be able to freely recognize just about any
X * format number:
X *
X * Decimal 1342334
X * Hex 0xAB43
X * Octal 01377
X * Binary 0b100100011
X * Arbitrary Radix rrr#vvvvvvvv
X * H:M:S 1:20:33
X * Real 1.45
X * "Meg" 4M
X * Expressions (4M-1)*2
X * Exponential 1.2E6
X *
X * Because people often want to provide a special format over and above
X * those that are already provided
X *
X * Eg. Hex 'ABC'Z
X * Decimal 10.
X * Ignore _ 100_677_888
X *
X * the intent is to define a, possibly parametrized, recognizer function
X * for each format, and then to pass a list of desired recognizer functions
X * for your specific recognizer.
X *
X * This is not intended to be fast, only general.
X *
X * All recognizers are of the form:
X *
X * success = RECOGNIZER( string, resultptr )
X * int success; { -1 indicates failure }
X * char *string;
X * double *result;
X *
X * Recognition is done bottom up instead of top-down;
X * instead of having a grammar that constrains notation,
X * everything is passed to low-level recognizers that try to recognize
X * the string, perhaps recursively, passing off to other recognizer
X * in case of failure.
X *
X * The current recognizers are:
X *
X * Octal0 0<octal> eg. 0377 = 0xFF
X * SimpleDecimalString <decimal> eg. 10 = 0xA
X * Binary0b 0b<binary> eg. 0b011 = 3
X * Decimal0d 0d<decimal> eq. 10 = 0xA
X * Hex0x 0x<hex> eg. 0xA = 10
X * ArbitraryRadix <decimalbase>#<radix> eq. 3#22 = 8
X * colon60 M:S eg. 1:20 = 80
X * colon60colon60 H:M:S eg. 2:1:20 = 7280
X * PowersOf2 <real>[KMG] eg. 0.5K = 512
X * RealDecimalString <real> eg. 0.5
X * Expressions eg. 0.5M-1
X *
X * Expressions currently include:
X * infix binary: | ^ & << >> + - * / % **(exponent)
X * prefix unary: - + ~
X * midfix grouping: () [] {}
X * and it is similarly easy to add new notations.
X *
X * All number representations and expressions can be intermixed:
X * [(2M-1)*4]>>0x03
X *
X * There are some functions useful in building other recognizers, like
X * RadixString(), and the expression building functions.
X *
X * There are two top level recognizers,
X * Dgetnumber(str,res)
X * and Igetnumber(str,res);
X * the "I" version is basically a call to the "D" version, which rounds,
X * and errors if the rounded integer value is more than int_threshold
X * away from the non-int value.
X *
X * Initially, this was integer only, but in Jan 89 I changed it
X * to produce a floating point result - if you want integer, just
X * integerize.
X * This will have some lossage if your floating point format
X * cannot represent all integer values exactly. Sorry - in that
X * case, you'll just have to go back to the old routine.
X * It has the advantage of one family of routines being able
X * to handle intermediate cases - like 0.5M.
X * It has the advantage of, on a system with decent floating
X * point, being able to trap on overflow or underflow.
X * Interim: add this for Motorola systems.
X * If you can, use your system dependent way of trapping on inexact.
X * Interim: if you have IEEE floating point, it would be nice to
X * have this same routine read in NaNs.
X */
X
X#include <stdio.h>
X#include <string.h>
X#include <math.h>
X
Xextern double pow();
Xextern double floor();
Xextern double fabs();
Xextern char *malloc();
Xextern void free();
X
X#define DEBUG 1
X#include "debug.h"
X
X#define index strchr
Xchar *index();
X#define rindex strrchr
Xchar *rindex();
X
Xvoid exit();
X
X/* Interim: no tree building. all recursive in place. */
X
Xint MidfixExpression(str,res,preStr,postStr,MidfixFunc)
X char *str;
X double *res;
X char *preStr, *postStr;
X double (*MidfixFunc)();
X{
X char *midStr;
X double midVal;
X int strLen = strlen(str);
X int preStrLen = strlen(preStr);
X int postStrLen = strlen(postStr);
X int match;
X
X nodebugf(("In midfix <%s> <%s> <%s>\n", str, preStr, postStr));
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( strncmp(str,preStr,preStrLen) != 0 ) {
X return -1;
X }
X if( strcmp(str+strLen-postStrLen, postStr) != 0 ) {
X return -1;
X }
X
X midStr = malloc((unsigned)(strLen-preStrLen-postStrLen+1));
X if( !midStr ) {
X (void)fprintf(stderr,"Error malloc'ing memory for expressions\n");
X exit(1);
X }
X (void)strncpy(midStr,str+preStrLen,strLen-preStrLen-postStrLen);
X midStr[strLen-preStrLen-postStrLen] = '\0';
X nodebugf(("midStr=<%s>\n",midStr));
X match = (Dgetnumber(midStr,&midVal) != -1);
X free(midStr);
X if( !match ) {
X return -1;
X }
X *res = MidfixFunc(midVal);
X return 0;
X}
Xdouble Dnop(a) double a; { return a; }
X
X
Xint PrefixExpression(str,res,PrefixStr,PrefixFunc)
X char *str;
X double *res;
X char *PrefixStr;
X double (*PrefixFunc)();
X{
X double Darg;
X int PrefixStrLen = strlen(PrefixStr);
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( strncmp(str,PrefixStr,PrefixStrLen) != 0 ) {
X return -1;
X }
X else {
X if( Dgetnumber(str+PrefixStrLen,&Darg) == -1 ) {
X return -1;
X }
X *res = PrefixFunc(Darg);
X return 0;
X }
X}
Xdouble Dplussign(a) double a; { return a; }
Xdouble Dnegsign(a) double a; { return -a; }
Xdouble Dinvert(a) double a; { return (double)~(unsigned)a; }
X
X
Xint InfixExpression(str,res,InfixStr,InfixFunc)
X char *str;
X double *res;
X char *InfixStr;
X double (*InfixFunc)();
X{
X char *left, *right;
X double Dleft, Dright;
X char *p1;
X int match;
X int InfixStrLen = strlen(InfixStr);
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X /* Try splitting into subexpressions at all occurrences of the operator,
X moving from left to handle 1-1-1. Retries will handle subexpressions.
X */
X for( p1 = str + strlen(str);;p1--) {
X if( p1 < str ) {
X return -1;
X }
X if( !strncmp(p1,InfixStr,InfixStrLen) ) {
X left = malloc((unsigned)(p1-str+1));
X right = malloc((unsigned)(strlen(p1+InfixStrLen)));
X if( !left || !right ) {
X (void)fprintf(stderr,"Error malloc'ing memory for expressions\n");
X exit(1);
X }
X (void)strncpy(left,str,p1-str);
X left[p1-str] = '\0';
X (void)strcpy(right, p1+InfixStrLen );
X match = Dgetnumber(left,&Dleft) != -1
X && Dgetnumber(right,&Dright) != -1;
X free(left);
X free(right);
X if( match ) {
X *res = InfixFunc(Dleft,Dright);
X return 0;
X }
X }
X }
X}
X
Xdouble Dplus(a,b) double a,b; { return a+b; }
Xdouble Dsub(a,b) double a,b; { return a-b; }
Xdouble Dtimes(a,b) double a,b; { return a*b; }
Xdouble Ddivide(a,b) double a,b; { return a/b; }
X/* Interim: need to indicate loss of info on these "pesudo-integer" ops */
Xdouble Dor(a,b) double a,b; { return (double)((unsigned)a | (unsigned)b); }
Xdouble Dand(a,b) double a,b; { return (double)((unsigned)a & (unsigned)b); }
Xdouble Dxor(a,b) double a,b; { return (double)((unsigned)a ^ (unsigned)b); }
Xdouble Dremainder(a,b) double a,b; { return (double)((unsigned)a % (unsigned)b); }
Xdouble Dlsh(a,b) double a,b; { return (double)((unsigned)a << (unsigned)b); }
Xdouble Drsh(a,b) double a,b; { return (double)((unsigned)a >> (unsigned)b); }
Xdouble Dexponent(a,b) double a,b; { extern double pow(); return pow(a,b); }
X
X
Xint Expression(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( InfixExpression(str,res,"|",Dor) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"^",Dxor) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"&",Dand) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"<<",Dlsh) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,">>",Drsh) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"+",Dplus) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"-",Dsub) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"*",Dtimes) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"/",Ddivide) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"%",Dremainder) == 0 ) {
X return 0;
X }
X if( InfixExpression(str,res,"**",Dexponent) == 0 ) {
X return 0;
X }
X if( PrefixExpression(str,res,"+",Dplussign) == 0 ) {
X return 0;
X }
X if( PrefixExpression(str,res,"-",Dnegsign) == 0 ) {
X return 0;
X }
X if( PrefixExpression(str,res,"~",Dinvert) == 0 ) {
X return 0;
X }
X if( MidfixExpression(str,res,"(",")",Dnop) == 0 ) {
X return 0;
X }
X if( MidfixExpression(str,res,"[","]",Dnop) == 0 ) {
X return 0;
X }
X if( MidfixExpression(str,res,"{","}",Dnop) == 0 ) {
X return 0;
X }
X return -1;
X}
X
X
Xint SimpleDecimalString(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X return RadixString(str,10,res);
X}
X
Xint Binary0b(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( str[0] == '0' && (str[1] == 'b' || str[1] == 'B') ) {
X return RadixString(str+2,2,res);
X }
X return -1;
X}
X
Xint Octal0(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( str[0] == '0' ) {
X return RadixString(str+1,8,res);
X }
X return -1;
X}
X
Xint Decimal0d(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( str[0] == '0' && (str[1] == 'd' || str[1] == 'D') ) {
X return RadixString(str+2,10,res);
X }
X return -1;
X}
X
Xint Hex0x(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( str[0] == '0' && (str[1] == 'x' || str[1] == 'X') ) {
X return RadixString(str+2,16,res);
X }
X return -1;
X}
X
Xint ArbitraryRadix(str,res)
X char *str;
X double *res;
X{
X int radix;
X int digval;
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X for( radix = 0; (digval = DECIMAL_DIGIT_VALUE(*str)) != -1; str++ ) {
X radix = radix*10 + digval;
X }
X if( *str != '#' )
X return -1;
X return RadixString(str+1,radix,res);
X}
X
Xint RadixString(str,radix,res)
X char *str;
X int radix;
X double *res;
X{
X int val;
X int digval;
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X val = 0;
X for(;;) {
X if( !IGNORE(*str) ) {
X digval = DIGIT_VALUE(*str);
X if( digval < 0 || digval >= radix )
X return -1;
X val = val*radix + digval;
X }
X str++;
X if( *str == '\0' )
X break;
X }
X *res = val;
X
X return 0;
X}
X
X/* Recognisers for M:S and H:M:S forms */
X
X#define PRECOLON() { \
X if( (colonpos = index(str,':')) == 0 ) { \
X return -1; \
X } \
X precolon_len = colonpos-str; \
X if( precolon_len > 128-1 ) { \
X /* string too long */ \
X return -1; \
X } \
X /* Copy the precolon string and interpret as a decimal number */ \
X (void)strncpy( precolon_str, str, precolon_len ); \
X precolon_str[precolon_len] = '\0'; \
X \
X if( SimpleDecimalString(precolon_str,&precolon_val) == -1 ) { \
X return -1; \
X } \
X if( precolon_val != (int) precolon_val || precolon_val < 0 ) { \
X return -1; \
X } \
X}
X
Xint colon60(str,res)
X char *str;
X double *res;
X{
X char *colonpos;
X char precolon_str[128];
X int precolon_len;
X double precolon_val;
X double postcolon_val;
X
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X PRECOLON();
X
X if( DecimalString(colonpos+1,&postcolon_val) == -1 ) {
X return -1;
X }
X if( postcolon_val < 0 || 60 <= postcolon_val ) {
X return -1;
X }
X
X *res = precolon_val*60 + postcolon_val;
X return 0;
X}
Xint colon60colon60(str,res)
X char *str;
X double *res;
X{
X char *colonpos;
X char precolon_str[128];
X int precolon_len;
X double precolon_val;
X double postcolon_val;
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X PRECOLON();
X
X if( colon60(colonpos+1,&postcolon_val) == -1 ) {
X return -1;
X }
X if( postcolon_val < 0 || 60*60 <= postcolon_val) {
X return -1;
X }
X
X *res = precolon_val*(60*60) + postcolon_val;
X return 0;
X}
X
X/* Recognize common powers of 2: 2^10=K, 2^20=M, 2^30=G
X Interim: would handle 2^40=T if it didn't overflow */
X
Xint PowersOf2(str,res)
X char *str;
X double *res;
X{
X int sl = strlen(str);
X double val;
X char buf[128];
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( sl <= 0 || sl > 128-1 ) {
X return -1;
X }
X (void)strcpy(buf,str);
X buf[sl-1] = '\0';
X
X if( str[sl-1] == 'K' ) {
X if( DecimalString(buf,&val) == -1 ) {
X return -1;
X }
X *res = val * 1024;
X return 0;
X }
X else if( str[sl-1] == 'M' ) {
X if( DecimalString(buf,&val) == -1 ) {
X return -1;
X }
X *res = val * 1024*1024;
X return 0;
X }
X else if( str[sl-1] == 'G' ) {
X if( DecimalString(buf,&val) == -1 ) {
X return -1;
X }
X *res = val * 1024*1024*1024;
X return 0;
X }
X return -1;
X}
X
X/* Floating point recognizer.
X Interim: decimal floating point only.
X iii.fffff form only.
X Interim: should recognize scientific notation iii.fffEeeee */
X
Xint RealDecimalString(str,res)
X char *str;
X double *res;
X{
X double val;
X int sl;
X int infraction;
X int scale;
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X sl = strlen(str);
X
X if( sl <= 0 ) {
X return -1;
X }
X
X if( index(str,'.') == 0 ) {
X return -1;
X }
X
X /* accumulation of value down from least significant end
X first, to reduce errors */
X val = 0;
X infraction = 1;
X scale = 1;
X for(;sl-->0;) {
X int d = DECIMAL_DIGIT_VALUE(str[sl]);
X if( d == -1 ) {
X if( str[sl] == '.' ) {
X infraction = 0;
X continue;
X }
X else {
X return -1;
X }
X }
X if( infraction ) {
X val += d;
X val /= 10.0;
X }
X else {
X val = d*scale + val;
X scale = scale*10;
X }
X }
X *res = val;
X return 0;
X}
X
X
X
X
Xint DecimalString(str,res)
X char *str;
X double *res;
X{
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( SimpleDecimalString(str,res) == -1
X && RealDecimalString(str,res) == -1 ) {
X return -1;
X }
X return 0;
X}
X
X
X/* Generic routine for signs */
Xint Signed(func,str,res)
X int (*func) ();
X char *str;
X double *res;
X{
X double value;
X int ret;
X int neg;
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X switch( str[0] ) {
X case '+':
X neg = 0;
X ret = func(str+1,&value);
X break;
X case '-':
X neg = 1;
X ret = func(str+1,&value);
X break;
X default:
X neg = 0;
X ret = func(str,&value);
X break;
X }
X if( ret == -1 ) {
X return -1;
X }
X else {
X if( neg ) value = -value;
X *res = value;
X return 0;
X }
X
X}
X
X
X
X/* Scientific notation recognizer */
X
Xint ScientificExponentialNotation(str,res)
X char *str;
X double *res;
X{
X char *dupptr;
X char *expptr;
X double exponent;
X double mantissa;
X int retval;
X
X if( str == 0 || *str == 0) {
X return -1;
X }
X
X if( (dupptr = strdup(str)) == 0 ) {
X (void)fprintf(stderr,"insufficient free memory to duplicate string\n");
X exit(1);
X }
X if( (expptr = strchr(dupptr,'E')) == 0
X && (expptr = strchr(dupptr,'e')) == 0
X ) {
X retval = -1;
X goto cleanup;
X }
X
X *expptr++ = '\0';
X
X /* dupptr is now a pointer to the mantissa, expptr to the exponent,
X both null terminated strings */
X if( Signed(SimpleDecimalString,expptr,&exponent) == -1 ) {
X retval = -1;
X goto cleanup;
X }
X
X if( Signed(RealDecimalString,dupptr,&mantissa) == -1
X && Signed(SimpleDecimalString,dupptr,&mantissa) == -1 ) {
X retval = -1;
X goto cleanup;
X }
X
X *res = mantissa * pow(10.0,exponent);
X retval = 0;
X
X cleanup:
X free(dupptr);
X
X return retval;
X}
X
X
X/* Utility functions */
Xchar *IGNORE_CHARS = "_";
X
Xint IGNORE(c)
X char c;
X{
X return index(IGNORE_CHARS,c) != 0;
X}
X
Xint DIGIT_VALUE(d)
X char d;
X{
X int val;
X
X switch( d ) {
X default: val = -1; break;
X case '0': val = 0; break;
X case '1': val = 1; break;
X case '2': val = 2; break;
X case '3': val = 3; break;
X case '4': val = 4; break;
X case '5': val = 5; break;
X case '6': val = 6; break;
X case '7': val = 7; break;
X case '8': val = 8; break;
X case '9': val = 9; break;
X case 'a': case 'A': val = 0xA; break;
X case 'b': case 'B': val = 0xB; break;
X case 'c': case 'C': val = 0xC; break;
X case 'd': case 'D': val = 0xD; break;
X case 'e': case 'E': val = 0xE; break;
X case 'f': case 'F': val = 0xF; break;
X }
X return val;
X}
X
Xint DECIMAL_DIGIT_VALUE(d)
X char d;
X{
X int val = DIGIT_VALUE(d);
X if( val < 0 || val >= 10 )
X return -1;
X else
X return val;
X}
X
X
X
X
Xtypedef int (*Recognizer)();
X
XRecognizer DefaultRecognizers[] = {
X Octal0,
X SimpleDecimalString,
X Binary0b,
X Decimal0d,
X Hex0x,
X ArbitraryRadix,
X colon60,
X colon60colon60,
X PowersOf2,
X RealDecimalString,
X Expression,
X ScientificExponentialNotation,
X 0
X};
X
X/* Flags for DgetnumberList */
X#define PRIO_RESOLVE_AMBIGUITY 1
X
Xint DgetnumberList(str,res,flags,RecognizerList)
X char *str;
X double *res;
X int flags;
X Recognizer RecognizerList[];
X{
X int found;
X double oldval = 0; /* to silence lint "oldval may be used before set" */
X double newval;
X Recognizer *fp;
X
X /* Special test for null strings.
X All recognizers should really handle this, but...
X */
X if( str == 0 || str[0] == 0 ) {
X return -1;
X }
X
X for(fp = RecognizerList, found=0; *fp; fp++) {
X if( (*fp)(str,&newval) != -1 ) {
X if( flags & PRIO_RESOLVE_AMBIGUITY ) {
X found = 1;
X break;
X }
X if( found ) {
X if( newval != oldval ) {
X return -1;
X }
X }
X else {
X oldval = newval;
X found = 1;
X }
X }
X }
X if( found ) {
X *res = newval;
X return 0;
X }
X else {
X return -1;
X }
X}
X
X
X/* Recognizer form of the above, with the default list */
X
Xint Dgetnumber(str,res)
X char *str;
X double *res;
X{
X if( DgetnumberList(str,res,
X PRIO_RESOLVE_AMBIGUITY,
X DefaultRecognizers)
X == -1 ) {
X return -1;
X }
X return 0;
X}
X
X
X/* Integer version of the above.
X Includes a threshold because arithmetic may be inexact (sigh) */
X/* Interim: should this be parametrized for "D" function to call,
X threshold, and list of recognizers? I'm not sure. */
X
Xdouble int_threshold = 0.00000001;
X
Xint Igetnumber(str,res)
X char *str;
X int *res;
X{
X extern double floor();
X extern double fabs();
X double dres;
X double delta;
X
X if( Dgetnumber(str,&dres) == -1 ) {
X return -1;
X }
X *res = (int)floor(dres+0.5);
X delta = (double)*res - dres;
X if( fabs(delta) > int_threshold ) {
X return -1;
X }
X return 0;
X}
END_OF_FILE
if test 18614 -ne `wc -c <'number.c'`; then
echo shar: \"'number.c'\" unpacked with wrong size!
fi
# end of 'number.c'
fi
if test -f 'number.man' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'number.man'\"
else
echo shar: Extracting \"'number.man'\" \(5262 characters\)
sed "s/^X//" >'number.man' <<'END_OF_FILE'
X.nf
X NAME:
X Dgetnumber, Igetnumber \- a family of string to number conversion routines
X
X SYNOPSIS:
X
X /* Default double precision recognizer */
X success = Dgetnumber( string, resultptr )
X int success; { -1 indicates failure }
X char *string;
X double *result;
X
X /* Default integer recognizer */
X success = Igetnumber( string, resultptr )
X int success; { -1 indicates failure }
X char *string;
X int *result;
X
X /* Threshold used for integerizing double precision values. */
X double int_threshold;
X
X DESCRIPTION:
X
X Dgetnumber and Igetnumber are two representatives (probably all that a typical
X use may ever encounter) of a family of routines for string representations of
X numbers to numbers in machine internal representation.
X
X They were written out of frustration with programs and routines that seldom
X accept all of the "natural" representations of numbers for a problem --
X disk utilities that require decimal numbers as input, while disk error loggers
X produce hex numbers on output, times that need to be converted from H:M:S
X before they can be used, etc.
X
X The intention is to be able to freely recognize just about any
X format number:
X
X Decimal 1342334
X Hex 0xAB43
X Octal 01377
X Binary 0b100100011
X Arbitrary Radix rrr#vvvvvvvv
X H:M:S 1:20:33
X Real 1.45
X "Meg" 4M
X Expressions (4M-1)*2
X Exponential 1.2E6
X
X Because people often want to provide a special format over and above
X those that are already provided
X
X Eg. Hex 'ABC'Z
X Decimal 10.
X Ignore _ 100_677_888
X
X the intent is to define a, possibly parametrized, recognizer function
X for each format, and then to pass a list of desired recognizer functions
X for your specific recognizer.
X
X This is not intended to be fast, only general.
X
X All recognizers are of the form:
X
X success = RECOGNIZER( string, resultptr )
X int success; { -1 indicates failure }
X char *string;
X double *result;
X
X Recognition is done bottom up instead of top-down;
X instead of having a grammar that constrains notation,
X everything is passed to low-level recognizers that try to recognize
X the string, perhaps recursively, passing off to other recognizer
X in case of failure.
X
X The current recognizers are:
X
X Octal0 0<octal> eg. 0377 = 0xFF
X SimpleDecimalString <decimal> eg. 10 = 0xA
X Binary0b 0b<binary> eg. 0b011 = 3
X Decimal0d 0d<decimal> eq. 10 = 0xA
X Hex0x 0x<hex> eg. 0xA = 10
X ArbitraryRadix <decimalbase>#<radix> eq. 3#22 = 8
X colon60 M:S eg. 1:20 = 80
X colon60colon60 H:M:S eg. 2:1:20 = 7280
X PowersOf2 <real>[KMG] eg. 0.5K = 512
X RealDecimalString <real> eg. 0.5
X Expressions eg. 0.5M-1
X
X Expressions currently include:
X infix binary: | ^ & << >> + - * / % **(exponent)
X prefix unary: - + ~
X midfix grouping: () [] {}
X and it is similarly easy to add new notations.
X
X All number representations and expressions can be intermixed:
X [(2M-1)*4]>>0x03
X
X There are some functions useful in building other recognizers, like
X RadixString(), and the expression building functions.
X
X There are two top level recognizers,
X Dgetnumber(str,res)
X and Igetnumber(str,res);
X the "I" version is basically a call to the "D" version, which rounds,
X and errors if the rounded integer value is more than int_threshold
X away from the non-int value.
X
X These use an internal function
X
X typedef int (*Recognizer)();
X
X int DgetnumberList(str,res,flags,RecognizerList)
X char *str;
X double *res;
X int flags;
X Recognizer RecognizerList[];
X
X which is called by default with
X
X Recognizer DefaultRecognizers[]
X
X An easy way for users to customize these routines is to
X create a private list of recognizers, deleting standard recognizers
X that are undesired, and adding user coded recognizers that have
X not been provided (eg. nnCnnTnnB cylinder/track/block notation)
X and then call DgetnumberList() from their own top-level wrapper.
X
X (Internal detail: a flag controls whether conflicting matches
X should be an error or not).
X
X NOTES:
X Initially, this was integer only, but in Jan 89 I changed it
X to produce a floating point result - if you want integer, just
X integerize.
X This will have some lossage if your floating point format
X cannot represent all integer values exactly. Sorry - in that
X case, you'll just have to go back to the old routine.
X It has the advantage of one family of routines being able
X to handle intermediate cases - like 0.5M.
X It has the advantage of, on a system with decent floating
X point, being able to trap on overflow or underflow.
X But this is not added.
X If you can, use your system dependent way of trapping on inexact.
X Interim: if you have IEEE floating point, it would be nice to
X have this same routine read in NaNs.
X
X AUTHOR:
X Andy Glew (aglew at uiuc.edu)
X
X HISTORY:
X Originally written by Andy Glew at McGill University, 1983
X
X BUGS:
X END_OF_FILE
if test 5262 -ne `wc -c <'number.man'`; then
echo shar: \"'number.man'\" unpacked with wrong size!
fi
# end of 'number.man'
fi
if test -f 'test.c' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'test.c'\"
else
echo shar: Extracting \"'test.c'\" \(4519 characters\)
sed "s/^X//" >'test.c' <<'END_OF_FILE'
Xvoid exit();
X
Xint verbose = 1;
X
Xint StopOnError = 1;
X
Xmain(argc,argv)
X int argc;
X char **argv;
X{
X int aval;
X int gval;
X int gret;
X
X if( argc == 1 ) {
X AutoTests();
X }
X else for(;*++argv;) {
X if(0) ;
X else if( !strcmp(*argv,"-auto") ) {
X AutoTests();
X }
X else if( !strcmp(*argv,"-verbose") ) {
X verbose = 1;
X }
X else if( !strcmp(*argv,"-noverbose") ) {
X verbose = 0;
X }
X else if( !strcmp(*argv,"-stoponerror") ) {
X StopOnError = 1;
X }
X else if( !strcmp(*argv,"-nostoponerror") ) {
X StopOnError = 0;
X }
X else if( !strcmp(*argv,"-test") ) {
X gret = Igetnumber(argv[1],&gval);
X
X if( !strcmp(argv[2],"invalid") ) {
X if( gret == -1 )
X exit(0);
X else {
X (void)printf("<%s> <%s> failed - invalid\n",argv[1],argv[2]);
X exit(1);
X }
X }
X else {
X aval = atoi(argv[2]);
X if( gret == -1 ) {
X (void)printf("<%s> invalid - should be <%s> %d\n",argv[1],argv[2],aval);
X exit(1);
X }
X else {
X if( aval != gval ) {
X (void)printf("<%s> %d should be <%s> %d\n",
X argv[1],gval,argv[2],aval);
X exit(1);
X }
X else
X exit(0);
X }
X }
X }
X else {
X (void)printf("Unknown argument <%s>\n",*argv);
X exit(1);
X }
X }
X exit(0);
X /*NOTREACHED*/
X}
X
Xstruct TestVec {
X char *str;
X int value;
X int valid;
X} TV[] = {
X { "-111E0A", 0, 0 },
X { "-111E0", -111, 1 },
X { "+121E0", 121, 1 },
X { "100E-2", 1, 1 },
X { "-100E-2", -1, 1 },
X { "+100E-2", 1, 1 },
X { "1E+1", 10, 1 },
X { "1.2E1", 12, 1 },
X { "0.5E6", 500000, 1 },
X { "-111e0A", 0, 0 },
X { "-111e0", -111, 1 },
X { "+121e0", 121, 1 },
X { "100e-2", 1, 1 },
X { "-100e-2", -1, 1 },
X { "+100e-2", 1, 1 },
X { "1e+1", 10, 1 },
X { "1.2e1", 12, 1 },
X { "0.5e6", 500000, 1 },
X { "2**4+1", 17, 1 },
X { "(0xFF>>2)+1", 64, 1 },
X { "(1<<2)+1", 5, 1 },
X { "1<<2+1", 8, 1 },
X /* replicated because of an old stateful error */
X { "-(-(-(-(-(-3)))))", 3, 1 },
X { "-(-(-(-(-(-3)))))", 3, 1 },
X { "-(-(-(-(-(-3)))))", 3, 1 },
X { "[1+(2*3)]*{1+2}", 21, 1 },
X { "1+(2+3)", 6, 1 },
X { "1-1-1", -1, 1 },
X { "(0)", 0, 1 },
X { "(1K)+1", 1025, 1 },
X { "2*(1K)+1", 2049, 1 },
X { "2*(2K+1)+(2M/1K)", 6146, 1 },
X { "(0)", 0, 1 },
X { "(0)", 0, 1 },
X { "0b0000", 0, 1 },
X { "0b11", 3, 1 },
X { "-0b101", -5, 1 },
X { "-5K", -5120, 1 },
X { "0.5K", 512, 1 },
X { "13M", 13631488, 1 },
X { "1G", 1073741824, 1 },
X { "1:20", 80, 1 },
X { "2:1:20", 7280, 1 },
X { "I-0b101", 0, 0 },
X { "10.3", 0, 0 },
X { "x0", 0, 0 },
X { "0x", 0, 0 },
X { "-1-", 0, 0 },
X { "-1+", 0, 0 },
X { "-", 0, 0 },
X { "+", 0, 0 },
X { "-4+1", -3, 1 },
X { "-4*-3", 12, 1 },
X { "4*-3", -12, 1 },
X { "-4*3", -12, 1 },
X { 0, 0 }
X};
X
XAutoTests()
X{
X int i;
X struct TestVec *tv;
X
X for( tv=TV; tv->str; tv++ ) {
X int val;
X if( Igetnumber(tv->str,&val) == -1 ) {
X if( tv->valid ) {
X (void)printf("Error: <%s> invalid, should be %d\n",
X tv->str, tv->value );
X if( StopOnError ) {
X exit(1);
X }
X }
X else {
X if( verbose ) (void)printf("Passed: <%s> invalid\n",tv->str);
X }
X }
X else {
X if( tv->valid ) {
X if( val != tv->value ) {
X (void)printf("Error: <%s> %d, should be %d\n",
X tv->str, val, tv->value );
X if( StopOnError ) {
X exit(1);
X }
X }
X else {
X if( verbose ) (void)printf("Passed: <%s> %d\n", tv->str, val);
X }
X }
X else {
X (void)printf("Error: <%s> %d, should be invalid\n",
X tv->str, val );
X if( StopOnError ) {
X exit(1);
X }
X }
X }
X }
X
X
X for(i=0;i<100;i++)
X TestAllPatterns(i);
X for(i=132;i<1000000000;i+=12331)
X TestAllPatterns(i);
X
X}
X
XTestAllPatterns(i)
X{
X TestFormat(i,"%d");
X TestFormat(i,"0%o");
X TestFormat(i,"0x%x");
X TestFormat(i,"0d%d");
X}
XTestFormat(i,fstr)
X int i;
X char *fstr;
X{
X char buf[128];
X
X /* With no sign */
X (void)sprintf(buf+1,fstr,i);
X TestBufferValue(buf+1,i);
X /* With sign */
X buf[0]='+';
X TestBufferValue(buf,i);
X buf[0]='-';
X TestBufferValue(buf,-i);
X}
XTestBufferValue(buf,i)
X char *buf;
X int i;
X{
X int val;
X if( Igetnumber(buf,&val) == -1 ) {
X (void)printf("error - <%s> invalid, should be %d\n",buf,i);
X if( StopOnError ) {
X exit(1);
X }
X }
X else if( val != i ) {
X (void)printf("error - <%s> %d, should be %d\n",buf,val,i);
X if( StopOnError ) {
X exit(1);
X }
X } else {
X if( verbose ) (void)printf("Passed: <%s> %d\n",buf,i);
X }
X}
X
X
END_OF_FILE
if test 4519 -ne `wc -c <'test.c'`; then
echo shar: \"'test.c'\" unpacked with wrong size!
fi
# end of 'test.c'
fi
echo shar: End of shell archive.
exit 0
--
Andy Glew, aglew at uiuc.edu
More information about the Alt.sources
mailing list