printf with bounds checking (snprintf, sxprintf), minor bug in sscanf
PAD Powell
padpowell at wateng.UUCP
Sat Sep 1 00:58:42 AEST 1984
: Run this shell script with "sh" not "csh"
PATH=:/bin:/usr/bin:/usr/ucb
export PATH
all=FALSE
if [ $1x = -ax ]; then
all=TRUE
fi
/bin/echo 'Extracting README'
sed 's/^X//' <<'//go.sysin dd *' >README
INTRODUCTION
Having been burned several times by the behaviour of sprintf,
I have just finished testing a new version, called snprintf,
and sxprintf, which have the exact functionality of sprintf,
but do bound checking.
In doing this, I discovered a couple of minor coding glitches in the
_doprnt() routine. It was written in assembler, and the authors
have my sympathy. I have the funniest feeling that many of the
sections of the conversion routines were DEC VMS sources.
It uses many of the very baroque VAX instructions...
In the following directory, the file doprnt.s.diff was generated using
diff -e /usr/src/lib/libc/vax/stdio/doprnt.s doprnt.s >doprint.s.diff
It can be regenerated using
ed /usr.....doprnt.s <doprnt.s.diff
COPY the ORGINAL doprnt.s before you do the ed, it gets trompped on.
In addition, note a minor bug fix with sscanf, the IORD flag is not needed.
And will cause REAL problems.
Patrick Powell,
U. Waterloo, VLSI Design Group,
Waterloo Ont.
SUMMARY
snprintf( count, str, format, args )
int count; char *str, *format, ...;
Exact functionality of sprintf, but will only generate count characters,
including trailing 0. If it fails, it returns a NULL, otherwise it
returns s.
sxprintf( count, str, format, args )
int count; char *str, *format, ...;
This has the exact functionality of snprintf, in that it does bound checking.
It does not append a trailing 0, and allows very nice reformatting of
fixed field items.
INSTALLATION
1. copy the snprintf.c,sxprintf.c,sprintf.c to /usr/src/lib/libc/stdio
NOTE: save the old versions, you might want them.
2. copy doprnt.s to /usr/src/lib/libc/vax/stdio
3. update the lint library, by copying the llib-lc to
/usr/src/usr.lib/lint/llib-lc, and then making the new lint libs
5. compile the lc library (moan).
Actually, you can shorten this by using the make file,
which has a quick "update" entry. This does an "ar u *.o"
on the object files and the /usr/lib/llibc
What are the benefits?
1. A bombproof version of the stdio library routines that does bounds
checking.
Patrick ("I hate sprintf") Powell
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
/bin/chmod 644 README
/bin/echo -n ' '; /bin/ls -ld README
fi
/bin/echo 'Extracting Makefile'
sed 's/^X//' <<'//go.sysin dd *' >Makefile
#
# MAKEFILE for the fixes to the stdio library
# Only makes the updates to the distribution.
#
# The "update" entry allows the /lib/libc.a library to be modified
# without building everything
#
CFLAGS= -O
OBJS= printf.o scanf.o fprintf.o sprintf.o snprintf.o sxprintf.o doprnt.o
X.c.o:
${CC} -p ${CFLAGS} -c $*.c
-ld -X -r $*.o
mv a.out profiled/$*.o
${CC} ${CFLAGS} -c $*.c
-ld -x -r $*.o
mv a.out $*.o
stdiolib stdiolib_p: ${OBJS}
@echo "building profiled stdiolib"
@cd profiled; ar cru ../stdiolib_p ${OBJS}
@echo "building normal stdiolib"
@ar cru stdiolib ${OBJS}
clean:
-rm -f stdiolib stdiolib_p ${OBJS} profiled/*.o libc.a libc_p.a
doprnt.o: doprnt.s
cp doprnt.s doprnt.c
cc -E doprnt.c | sed -f mcount.sed | as -o doprnt.o
-ld -x -r doprnt.o
mv a.out profiled/doprnt.o
cc -E doprnt.c | as -o doprnt.o
-ld -x -r doprnt.o
mv a.out doprnt.o
rm -f doprnt.c
update: stdiolib stdiolib_p
rm *.o
cp /lib/libc.a libc.a
ar x stdiolib
ar cr libc.a *.o
ranlib libc.a
rm *.o
cp /usr/lib/libc_p.a libc_p.a
ar x stdiolib_p
ar cr libc_p.a *.o
ranlib libc_p.a
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
/bin/chmod 644 Makefile
/bin/echo -n ' '; /bin/ls -ld Makefile
fi
/bin/echo 'Extracting fprintf.c'
sed 's/^X//' <<'//go.sysin dd *' >fprintf.c
X/* @(#)fprintf.c 4.1 (Berkeley) 12/21/80 */
#include <stdio.h>
fprintf(iop, fmt, args)
FILE *iop;
char *fmt;
{
int s;
s = _doprnt(fmt, &args, iop);
if( s < 0 ){
return( EOF );
}
return(ferror(iop)? EOF: 0);
}
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
/bin/chmod 644 fprintf.c
/bin/echo -n ' '; /bin/ls -ld fprintf.c
fi
/bin/echo 'Extracting printf.c'
sed 's/^X//' <<'//go.sysin dd *' >printf.c
X/* @(#)printf.c 4.1 (Berkeley) 12/21/80 */
#include <stdio.h>
printf(fmt, args)
char *fmt;
{
int s;
s = _doprnt(fmt, &args, stdout);
if( s < 0 ){
return( EOF );
}
return(ferror(stdout)? EOF: 0);
}
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
/bin/chmod 644 printf.c
/bin/echo -n ' '; /bin/ls -ld printf.c
fi
/bin/echo 'Extracting scanf.c'
sed 's/^X//' <<'//go.sysin dd *' >scanf.c
X/* @(#)scanf.c 4.2 (Waterloo) 8/28/84 */
#include <stdio.h>
scanf(fmt, args)
char *fmt;
{
return(_doscan(stdin, fmt, &args));
}
fscanf(iop, fmt, args)
FILE *iop;
char *fmt;
{
return(_doscan(iop, fmt, &args));
}
sscanf(str, fmt, args)
register char *str;
char *fmt;
{
FILE _strbuf;
_strbuf._flag = _IOSTRG;
_strbuf._ptr = _strbuf._base = str;
_strbuf._cnt = 0;
while (*str++)
_strbuf._cnt++;
_strbuf._bufsiz = _strbuf._cnt;
return(_doscan(&_strbuf, fmt, &args));
}
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
/bin/chmod 644 scanf.c
/bin/echo -n ' '; /bin/ls -ld scanf.c
fi
/bin/echo 'Extracting snprintf.c'
sed 's/^X//' <<'//go.sysin dd *' >snprintf.c
X/* @(#)snprintf.c 4.2 (Waterloo) 19/08/84 */
#include <stdio.h>
char *snprintf(count, str, fmt, args)
int count;
char *str, *fmt;
{
int s;
struct _iobuf _strbuf;
_strbuf._flag = _IOSTRG;
_strbuf._ptr = str;
_strbuf._cnt = count;
s = _doprnt(fmt, &args, &_strbuf);
if( s >= 0 ){
if( _strbuf._cnt > 0 ){
*_strbuf._ptr = 0;
} else {
s = -1;
}
}
return(s < 0? NULL : str );
}
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
/bin/chmod 755 snprintf.c
/bin/echo -n ' '; /bin/ls -ld snprintf.c
fi
/bin/echo 'Extracting sprintf.c'
sed 's/^X//' <<'//go.sysin dd *' >sprintf.c
X/* @(#)sprintf.c 4.2 (Waterloo) 19/08/84 */
#include <stdio.h>
char *sprintf(str, fmt, args)
char *str, *fmt;
{
int s;
struct _iobuf _strbuf;
_strbuf._flag = _IOSTRG;
_strbuf._ptr = str;
_strbuf._cnt = 32767;
s = _doprnt(fmt, &args, &_strbuf);
if( s >= 0 ){
if( _strbuf._cnt > 0 ){
*_strbuf._ptr = 0;
} else {
s = -1;
}
}
return(s < 0? NULL : str );
}
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
/bin/chmod 644 sprintf.c
/bin/echo -n ' '; /bin/ls -ld sprintf.c
fi
/bin/echo 'Extracting sxprintf.c'
sed 's/^X//' <<'//go.sysin dd *' >sxprintf.c
X/* @(#)sxprintf.c 4.2 (Waterloo) 19/08/84 */
#include <stdio.h>
char *sxprintf(count, str, fmt, args)
int count;
char *str, *fmt;
{
int s;
struct _iobuf _strbuf;
_strbuf._flag = _IOSTRG;
_strbuf._ptr = str;
_strbuf._cnt = count;
s = _doprnt(fmt, &args, &_strbuf);
return(s < 0? NULL : str );
}
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
/bin/chmod 644 sxprintf.c
/bin/echo -n ' '; /bin/ls -ld sxprintf.c
fi
/bin/echo 'Extracting test.c'
sed 's/^X//' <<'//go.sysin dd *' >test.c
#include <stdio.h>
main()
{
char str[100], input[100];
int i, l, s;
printf("%d\n", 1);
s = snprintf( 100, str, "%s\n", "test this" );
printf( "%d: \n", s, str );
s = snprintf( 100, str, "%d\n", "test this" );
printf( "%d: %s\n", s, str );
s = snprintf( 100, str, "%30s\n", "test this" );
printf( "%d: %s\n", s, str );
s = snprintf( 100, str, "%-30s\n", "test this" );
printf( "%d: %s\n", s, str );
s = snprintf( 100, str, "%-30s\n", "test % this" );
printf( "%d: %s\n", s, str );
}
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
/bin/chmod 644 test.c
/bin/echo -n ' '; /bin/ls -ld test.c
fi
/bin/echo 'Extracting doprnt.s.diff'
sed 's/^X//' <<'//go.sysin dd *' >doprnt.s.diff
616c
movl exp,sexp # save from destruction
X.
597c
ashl $-1,r7,r0 # displ to last byte
X.
587,593d
584,585c
snarro: subl3 $18,r7,r0 # rounding position
ashp r0,$18,(sp),$5,r7,16(sp) # shift and round
X.
581,582c
cmpl r7,$31 # expressible in packed decimal?
jleq snarro # yes
X.
561d
558c
subl2 r2,r0 # truncate, willy-nilly
X.
554c
# check bounds on users who say %.300f (see %d format for expl)
X.
552c
movl ndigit,r0 # need any more?
X.
547c
cmpl r0,r6 # limit on packed length
X.
531c
movl exp,r0 # need any more?
X.
528c
f4: subl2 r0,r6 # eat some digits
X.
525c
f2: cmpl r0,r6 # limit on packed length
X.
491c
pushab prnum # goto prnum after fall-through call to fedit
X.
474,480d
458a
X/*
"%f" "%F"
*/
X.
446a
X/*
"%*" - width or precision specified by the next parameter
width
if the parameter is > 0, then normal
if < 0, then left adjustment
*/
X.
442a
X/*
"%." - set precision flag
*/
X.
431a
X/*
"%0" - a numeric field
field width or 0 fill indication
"%12346789" - a numeric field
- width or precision
*/
X.
429a
X/*
"%-" - left justification flag
*/
X.
427a
X/*
"%+" - always print sign flag
*/
X.
425a
X/*
"%#" - self identifying flag
*/
X.
423a
X/*
"% " - space flag
*/
X.
414,415c
pone: .byte 0x1C # packed 1
X/*
"%c"
*/
X.
410,411c
jbs $31,nchar,errdone # error flag set
addl2 llafx,sp # deallocate
movl (sp)+,r1 # recover non-pad addr
X.
407,408c
movl llafx,r0 # length
movl sp,r1 # addr
X.
402,405c
subl2 r0,width # pad width decreases minimum width
pushl r1 # save non-pad addr
movl r0,llafx # remember width of pad
subl2 r0,sp # allocate
X.
391c
movl width,r0 # size of pad
X.
380,389c
decl r0 # yes; adjust count
movzbl (r1)+,r2 # fetch byte
movq *fdesc,r4 # output buffer descriptor
sobgeq r4,p2 # room at the out [inn] ?
bsbw strout2 # no; force it, then try rest
jbs $31,nchar,errdone # error flag set
jbr p3 # here we go 'round the mullberry bush, ...
p2: movb r2,(r5)+ # hand-deposit the percent or null
incl nchar # count it
movq r4,*fdesc # store output descriptor
jbr p1 # what an expensive hiccup!
X.
376,378c
subl2 ndigit,width # root reduces minimum width
movl ndigit,r0 # root length
p1: bsbw strout # transfer to output buffer
jbs $31,nchar,errdone # error flag set
X.
372,374c
bsbb padz # zero pad on left
padnlz: # end of extension for left zero padding
X.
366,369c
jleq padnlx # left zero pad requires left affix first
subl2 r0,ndigit # part of total length will be transferred
subl2 r0,width # and will account for part of minimum width
bsbw strout # left affix
jbs $31,nchar,errdone # error flag set
X.
361,362c
# this bsbb needed even if %0 flag extension is removed
bsbb padb # blank pad on left
X.
359c
# extension for %0 flag causing left zero padding to field width
X.
357c
jleq padlno # in particular, no left padding
X.
352,355c
X/*
prstr:
print the string, or at least place it in the buffer
*/
prstr:
# r1=addr first byte; r5=addr last byte +1
# width=minimum width; llafx=len. left affix
# ndigit=<avail>
subl3 r1,r5,ndigit # raw width
X.
346c
# move the digits down by the extra number needed
X.
341,344c
movab 32(r5)[ndigit],r2 # calculate addr[last_digit]+totallen+32
subl2 fp,r2 # see if this hits the frame pointer
jlss prn5 # no, so OK
subl2 r2,ndigit # truncate by the overflow
X.
339c
pushl r1 # movcx gobbles registers
X.
337c
jleq prstr # none
X.
335c
subl2 llafx,lrafx # number of digits - size_signs
X.
331,332c
prn4: incl llafx # and note that we have fudged signs
prn3: jbs $prec,flags,prn1 # if precision set, use specified value
X.
329c
prn2: jbc $blank,flags,prn3 # if blank flag set, want a blank for sign
X.
325,326c
jneq prn3 # already some left affix, dont fuss
jbc $plssgn,flags,prn2 # if plssng flag set, move in a sign
X.
322,323c
X/*
prnum:
Print a number, with sign, and other ugly things
*/
prnum: # r1=addr first byte, r5=addr last byte +1, llafx=size of signs
# -1(r1) vacant, for forced sign
X.
314a
X/*
"%d", "%D"
*/
X.
298a
X/*
"%u", "%U"
*/
X.
296,297c
L12a: movb $'0,-(r1) # leading zero for octal is digit, not an affix
jbr prn3 # omit sign (plus, blank) massaging
X.
278,290c
addl2 $4,r5 # room for left affix (2) and slop [forced sign?]
movl (ap)+,r0 # fetch arg
L11: extzv r2,r3,r0,r1 # pull out a digit
movb (llafx)[r1],(r5)+ # convert to character
L12: acbl $0,r6,r2,L11 # continue until done
clrq r6 # lrafx, llafx
clrb (r5) # flag end
skpc $'0,$11,4(sp) # skip over leading zeroes (note: r1,r0 set)
jbc $numsgn,flags,prn3 # if no sign or afix, skip
tstl -4(ap) # original value
jeql prn3 # no affix on 0, for some reason
cmpl r3,$4 # were we doing hex or octal?
jneq L12a # octal
X.
276c
L10: mnegl r3,r6 # r6 = -(field width)
X.
274c
jbc $caps,flags,L10 # if upper case, use upper case table
X.
271,272c
movl $28,r2 # init position (28-32)
movl $4,r3 # field width (4 bits)
X.
269a
X/*
"%x" "%X"
1. set up index for first character (i.e.- bit 28-32)
2. set field width
3. if "%X" use capital letters for hex
NOTE: much of the actions here are to set up the
extzv instruction- extract field (with zero fill)
*/
X.
263a
X/*
"%o" "%O"
1. set up the index for the first character
2. set up translation table
(note, this is the same for the decimal, hex, and other fields)
*/
X.
250a
X/*
"%s" - string format
1. see if a max length specified.
2. truncate if too long.
*/
X.
249c
jbr L4 # and try again
X.
245a
X/*
"%<CAPITAL>" -> "%<lowercase>"
1. Set the "caps" flag
2. lower case the format
*/
X.
240a
X/*
"%r"
Handle the "remote arglist" facility
1. Replace the arglist (ap) with the indicated arglist
*/
X.
235a
X/*
"%<garbage>"
Handle bad format characters:
1. put at end of temp buffer (stack)
2. print them
*/
X.
138,141c
movl sp,r5 # reset output buffer pointer
clrq r9 # width; flags
clrq r6 # lrafx,llafx
longorunsg: # we can ignore both of these distinctions
X.
131,136c
bsbw strout # copy to output, stop at null or percent
movl r1,r11 # new fmt
jbs $31,nchar,errdone # error flag set
jbc $vbit,r2,errdone # if no escape, then an error
tstb (r11)+ # escape; null or percent?
jeql prdone # null means end of fmt
X.
121,129c
movzwl $65535,r0 # pseudo length of -1
movl r11,r1 # fmt addr
X/*
Loop over the input, until a format is hit
*/
X.
119c
clrl nchar # number of chars transferred
X.
115c
movab -WRKSZ(sp),sp # work space
X.
111a
X/*
* _doprnt entry point
*/
X.
109c
movl nchar,r0 # set up return value
X.
107c
mnegl $1,nchar # set nchar to -1 for error indication
X.
105a
X/*
* Exit from _doprnt()
*/
X.
100,104d
97c
tstl r0 # no; check for full output buffer
beql stresc # you have nothing left
X/*
* Search terminated by a non-escape, and you have something left.
* This means that you have a buffer full, and should output it.
* check the output buffer, then go for it
*/
strmor: movzbl (r1)+,r2 # get next char, advance pointer
decl r0 # and decrement count
tstb strtab[r2] # translate
jneq strout2 # bad guy in disguise (outbuf is full)
incl r0 # fix the length
decl r1 # and the addr
movl $1<vbit,r2 # fake condition codes
jbr stresc
X/*
* Force the write of a full buffer
*/
strout2: # enter here to force out r2; r0,r1 must be saved
incl nchar # count the char
pushr $3 # save r0, r1, r2
movl fdesc,r0 # get the file descriptor address
bitl $_IOSNPRNT,_flags(r0) # check to see if snprintf/printf
jneq L3 # Whoops! Don't write it
pushl fdesc # FILE
pushl r2 # the char
calls $2,__flsbuf # please empty the buffer and handle 1 char
tstl r0 # successful?
jlss L3 # no, neqative returned
popr $3 # get input descriptor back
jbr strout # and output again
L3: mnegl $1,nchar # set nchar to -1 for error indication
popr $3 # get input descriptor back
X/*
* found escape or end of string
*/
X.
90,95c
/******* End bogus movtuc workaround ***/
X/*
* Finished moving the characters, now check to see how many
*/
movdon: movq r4,*fdesc # update output descriptor
subl2 r0,nchar # correct count
X.
86,87c
L1:
movb r3,(r5)+ # copy to end of destination
X.
82,84c
bneq L1
mnegl $1,r2 # set r2 to -1 to fake escape return code
decl r1 # adjust r1, back up
X.
80c
movzbl (r1)+,r3 # get the next byte, and check it
X.
78c
tstl r4 # if no dest bytes left, terminated
X.
73,75c
X/******* Start bogus movtuc workaround *****
movtuc r0,(r1),$0,(r3),r4,(r5)
movpsl r2 /* squirrel away condition codes */
X/*
After execution:
r0 = #bytes remaining in source, including escape
r0 == 0 only if entire string was translated with no escape
r1 = addr of byte causing escape, or one beyond end of string
r2 = 0;
r3 = addr of table
r4 = number of bytes in destination string
r5 = addr of next byte in destination string
*/
clrl r2 # r2 = 0
tstl r0 # if no source bytes left, terminated
X.
70,71c
tstl r0 # if r0 >= 0, you are OK
bgeq strd
clrl r0 # set count to 0 if negative
strd:
X.
49,66c
X/*
* strout(r0,r1)
* r0 = count, r1 = string
* Copy the string to the end of the buffer,
* checking for a null or escape character
* If the buffer overflows and the output mode is set, force output,
* otherwise set an error and return.
*
* Note: this was supposed to use the MOVTUC instruction
* Note: the strout will always return a "escape" indication
* for success. This makes error handling a little easier
*/
X.
29a
X/*
* strtab: table used with the MOVTUC instruction
*/
X.
9,27c
X/*
* flag register bits
*/
#define ndfnd 0 /* number from format field being processed */
#define prec 1 /* "%." encountered- precision */
#define zfill 2 /* "%0" - zero fill */
#define minsgn 3 /* "%-" - minus sign, can be left justify flag */
#define plssgn 4 /* "%+" - plus sign, force signed output */
#define numsgn 5 /* format- looks like you have to do fancy formatting */
#define caps 6 /* format- capitalized version of the format field */
#define blank 7 /* "% " - constant width e format, no sign */
#define gflag 8 /* "%g" format */
#define dpflag 9 /* format- decimal point needed */
#define width r9 /* width of field */
#define ndigit r8 /* number of digits of fill, various ugly uses */
#define llafx r7 /* temporary, ugly name */
#define lrafx r6 /* temporary, ugly name */
X/*
* when invoked, doprnt will create a temporary area on the stack,
* whose top is pointed to by (fp); this area is used for local variables
*/
#define fdesc -4(fp) /* the FILE *file parameter */
#define exp -8(fp) /* temporary */
#define sexp -12(fp) /* temporary */
#define nchar -16(fp) /* number of characters placed in buffer/output */
#define sign -17(fp) /* temporary */
X.
7a
X/*
* working register defintions
*/
X.
6a
X/* set size of working area to 1024+128 bytes */
#define WRKSZ 1152
X/* get this definition from the stdio.h definitions */
#define _IOSNPRNT 01000
X/* offset for the _flags field in the _iosb structure */
#define _flags 16
X/* position of the V bit in a register, when the PSW is copied into it */
X.
3a
This implementation of the stdio library _doprnt has been edited
and changed a little bit to handle provisions for snprintf(),
a printf with fixed length output buffer.
Patrick Powell,
VLSI Research Group,
University of Waterloo
In addition, some minor botches in the code have been changed.
As a guide to the reader, the STDIO library uses the _iobuf
structure, which is defined as follows:
define BUFSIZ 1024
define _NFILE 20
OFFSET extern struct _iobuf {
0 int _cnt; # number chars left in buffer
4 char *_ptr; # pointer to next free char in buffer
8 char *_base; # buffer
12 int _bufsiz;# buffer size
16 short _flag; # contains flags
0 char _file; # contains fcb, should really be an int
} _iob[_NFILE];
Originally the VAX MOVTUC (Move Translated Until Character)
instruction was going to be used to do the format string
interpretation, but something seems to have gone wrong.
There was originally a really messy bit of code to check for
a fairly wierd set of conditions that were hardware dependent.
This does not seem to be a problem any more, and the code has
been eliminated.
1. A working area is allocated on the top of the stack.
This area was originally 256 bytes long, and
is the source of comments like "cannot handle long fields"
comment in the printf(3s) man page.
This bound (set by WRKSZ) has been increased to
(1024+128) bytes, and should handle fields up to 1024 bytes.
2. The format string is scanned until a flag ('%' or '0') is found.
If a '%', the neccessary formatting of a field is done.
During this process, output is copied to an output buffer,
and if the buffer is full, the "_flsbuf" routine is called.
If the "IOSTRG" flag is set in the FCB, then an error
condition is raised, and _doprnt will return a negative
integer.
3. This version of _doprnt will return the number of output characters
placed in the output stream/buffer.
*/
X.
1,2c
X/* @(#)doprnt.s 4.3 (WATERLOO) 8/20/84 */
X/*
_doprnt( fmt, file, args )
char *fmt; FILE *file; void **args;
X.
w
q
//go.sysin dd *
made=TRUE
if [ $made = TRUE ]; then
/bin/chmod 644 doprnt.s.diff
/bin/echo -n ' '; /bin/ls -ld doprnt.s.diff
fi
exit
More information about the Comp.sources.unix
mailing list