UUCP sequence number bogs
utzoo!decvax!ittvax!swatt
utzoo!decvax!ittvax!swatt
Thu Jul 1 13:07:20 AEST 1982
Some time ago several people mentioned that sequence numbers can
wrap around too quickly on busy systems and possibly generate
duplicate names. Decvax simply went to hex for the sequence
numbers and ran into no problems. I wrote some routines to
manage the 4-digit sequence number in base 62 to be absolutely
sure wraparound would be a long time coming.
As a side effect of all this, I noticed that uux, in particular
is very stupid about the way it grabs sequence numbers: it gets 6
sequence numbers for the typical command (such as used by mail
and news), but only uses 3. Since the sequence number file has
to be locked for each new number, uux spends a lot of time just
getting sequence numbers.
With sequence numbers in base 62, I decided to look into grabbing
numbers in a HUNK, instead of 1 at a time. Below are some
comparisons (run during day on VAX780 running 4.1bsd, RP06
disks):
First, the time to generate 100 sequence numbers the old way:
# time oldgen 100
0.6u 4.3s 0:30 16% 9+9k 1+614io 1pf+0w
%time cumsecs #call ms/call name
26.7 1.23 _unlink
21.5 2.21 _creat
15.3 2.91 _link
8.0 3.28 _write
7.3 3.61 _close
5.5 3.86 _open
3.6 4.03 _chmod
2.5 4.14 _read
2.0 4.24 _malloc
1.5 4.30 __doprnt
1.5 4.37 _sprintf
0.7 4.40 _IEH3exit
0.7 4.44 _fopen
0.4 4.45 __doscan
0.4 4.47 __filbuf
0.4 4.49 __innum
0.4 4.50 _calloc
0.4 4.52 _fclose
0.4 4.54 _freopen
0.4 4.55 100 0.17 _getseq
0.4 4.57 _strcmp
0.3 4.58 _fflush
0.1 4.59 __flsbuf
0.0 4.59 100 0.00 _gename
0.0 4.59 1 0.00 _main
Now 100 sequence numbers in hunks of 10:
# time newgen 100
0.2u 0.4s 0:04 15% 7+8k 1+73io 1pf+0w
%time cumsecs #call ms/call name
16.7 0.08 _creat
13.3 0.15 440 0.15 _tstoa
10.0 0.20 _close
7.5 0.24 _link
6.7 0.27 _open
6.7 0.30 _read
6.7 0.34 _unlink
6.7 0.37 udiv
5.8 0.40 _fgets
3.3 0.42 __doprnt
3.3 0.43 __filbuf
3.3 0.45 _chmod
3.3 0.47 100 0.17 _getseq
3.3 0.48 _malloc
3.3 0.50 _write
0.0 0.50 10 0.00 _atots
0.0 0.50 100 0.00 _gename
0.0 0.50 1 0.00 _main
As you can see, the old way takes 30 seconds real time verses 4
seconds the new way. The difference in I/O is staggering,
although the the time command is that of csh, and I never have
been able to figure out what the io information means.
We have been using the new scheme since June 21, and have noticed
no problems talking with 4.1bsd, V7, and ONYX sites. It means
the sequence numbers will have the character set [0-9A-Za-z], but
UUCP seems to not care about that. By the way our current
sequence number is: 0DJe, or 51189 decimal, or 5190 typical uux
calls. As far as I can tell, hunks larger than 10 are wasteful
for all but a few uucp commands, although with 14 million to play
with, you can afford a little waste.
Note that when compiled with -DTEST, it uses a lockfile in the
current area. The tests I ran whose results are included above
were in a relatively small directory (< 20 entries). In the real
system, the sequence lockfile is in the spool directory, and I
suspect the savings of chuncking sequence numbers will be even
greater. You can probably also move the lockfile to somewhere
else and save even more overhead.
Here are the same tests run when the current directory has been
filled to contain 1000 empty entries:
# time oldgen 100
0.6u 8.9s 0:36 26% 9+9k 3+618io 1pf+0w
%time cumsecs #call ms/call name
34.5 3.22 _creat
30.6 6.07 _link
14.6 7.43 _unlink
4.5 7.85 _write
3.6 8.18 _close
2.9 8.45 _open
1.6 8.60 _chmod
1.4 8.73 _sprintf
1.3 8.84 _read
0.7 8.91 __doprnt
0.5 8.96 _IEH3exit
0.5 9.01 udiv
0.5 9.06 _strlen
0.4 9.09 _calloc
0.4 9.12 _fclose
0.4 9.16 _strcpy
0.3 9.19 __innum
0.3 9.21 _malloc
0.2 9.23 100 0.21 _getseq
0.2 9.25 _free
0.2 9.26 _freopen
0.2 9.28 _sbrk
0.1 9.29 __filbuf
0.1 9.31 100 0.13 _gename
0.1 9.31 __cleanup
0.1 9.32 _fflush
0.0 9.33 __instr
0.0 9.33 _atof
0.0 9.34 _getppid
0.0 9.34 _longjmp
0.0 9.34 1 0.00 _main
# time newgen 100
0.2u 0.9s 0:04 26% 7+8k 0+67io 1pf+0w
%time cumsecs #call ms/call name
32.2 0.30 _creat
21.4 0.50 _link
12.5 0.62 _unlink
7.1 0.68 _fgets
3.6 0.72 __doprnt
3.6 0.75 _chmod
3.6 0.78 _malloc
3.6 0.82 _open
1.8 0.83 _close
1.8 0.85 100 0.17 _getseq
1.8 0.87 1 16.68 _main
1.8 0.88 _sbrk
1.8 0.90 _sprintf
1.8 0.92 udiv
1.8 0.93 urem
0.0 0.93 10 0.00 _atots
0.0 0.93 100 0.00 _gename
0.0 0.93 440 0.00 _tstoa
Here is the code for gename.c, simply replace your existing file
with the new version. You can experiment with the included test
main yourself. The original also had a bug such that 9999
wrapped around to 1000 instead of 0.
- Alan S. Watt
=================================================================
/* gename 3.4 01/15/82 (ITT) */
#ifdef TEST
# define finish(x) exit(x)
# define DEBUG(a,b,c) ;
# define SEQLOCK "SEQLCK"
# define SEQFILE "SEQFILE"
# include "uucplock.c"
#else !TEST
# include "uucp.h"
# ifdef ITTVAX
# define TSSEQ
# define SEQHUNK 10
# define BASE 62
# endif ITTVAX
#endif TEST
/*******
* gename(pre, sys, grade, file) generate file name
* char grade, *sys, pre, *file;
*
* return codes: none
*/
gename(pre, sys, grade, file)
char pre, *sys, grade, *file;
{
char sqnum[5];
getseq(sqnum);
sprintf(file, "%c.%.7s%c%.4s", pre, sys, grade, sqnum);
DEBUG(4, "file - %s\n", file);
return;
}
#define SLOCKTIME 10L
#define SLOCKTRIES 5
#define SEQLEN 4
/*******
* getseq(snum) get next sequence number
* char *snum;
*
* return codes: none
*
* Fri Jan 15 15:34:02 EST 1982 ittvax!swatt:
* if "TSSEQ" is defined, use "ts" routines to keep sequence numbers
* in either base 36 or 62.
* if "SEQHUNK" is defined, then sequence numbers are allocated in
* hunks of that size. Using SEQHUNK on very busy systems is not
* advisable unless TSSEQ is also enabled.
*
* Also fix so wraparound is correct; the old code did 9999=>1000
*
* The hunk idea saves a lot of filesystem activity. To get a
* new sequence number from the sequnce file invovles:
*
* 1) Lock the file (creat + link)
* 2) Open file
* 3) Read from file
* 4) Reopen file for writing
* 5) Change mode to 0666
* 6) Write to file
* 7) Unlock file (unlink)
*/
#ifndef SEQHUNK
# define SEQHUNK 1
#endif !SEQHUNK
getseq(snum)
char *snum;
{
FILE *fp;
#ifdef TSSEQ
typedef unsigned long int ts_t;
ts_t atots();
char *tstoa();
#else !TSSEQ
typedef int ts_t;
#endif TSSEQ
char tseqbuf[64];
static ts_t n;
static int nseq = 0;
if (nseq <= 0) {
for (n = 0; n < SLOCKTRIES; n++) {
if (!ulockf( SEQLOCK, SLOCKTIME))
break;
sleep(5);
}
ASSERT(n < SLOCKTRIES, "CAN NOT GET", SEQLOCK, 0);
/* @@@(ittvax!swatt):
* can save something by using "r+" on fopen
*/
if ((fp = fopen(SEQFILE, "r")) != NULL) {
#ifdef TSSEQ
fgets (tseqbuf, sizeof tseqbuf, fp);
n = atots (tseqbuf);
#else !TSSEQ
/* read sequence number file */
fscanf(fp, "%4d", &n);
#endif TSSEQ
fp = freopen(SEQFILE, "w", fp);
ASSERT(fp != NULL, "CAN NOT OPEN", SEQFILE, 0);
chmod(SEQFILE, 0666);
}
else {
/* can not read file - create a new one */
if ((fp = fopen(SEQFILE, "w")) == NULL)
/* can not write new seqeunce file */
return(FAIL);
chmod(SEQFILE, 0666);
n = 0;
}
#ifdef TSSEQ
tstoa (n+SEQHUNK, tseqbuf, SEQLEN);
#else !TSSEQ
sprintf (tseqbuf, "%04d", n+SEQHUNK);
#endif TSSEQ
/* discard high order digits on overflow */
while (strlen (tseqbuf) > SEQLEN)
strcpy (tseqbuf, tseqbuf+1);
fprintf (fp, "%s", tseqbuf);
fclose (fp);
rmlock (SEQLOCK);
nseq = SEQHUNK;
}
#ifdef TSSEQ
/* Convert n to base 36 (or base 62) digit string */
tstoa (n, tseqbuf, SEQLEN);
#else !TSSEQ
sprintf (tseqbuf, "%04d", n);
#endif TSSEQ
/* discard high-order digits on overflow */
while (strlen (tseqbuf) > SEQLEN)
strcpy (tseqbuf, tseqbuf+1);
strcpy (snum, tseqbuf);
++n;
--nseq;
return(0);
}
#ifdef TSSEQ
/*********************************************************************
function: ts
description: Convert between binary integers and base thirty-six
strings (hence the name "ts")
programmer: Alan S. Watt
history:
01/14/82 original version
*********************************************************************/
/* Alphanumeric string-to-integer conversion routines */
/* BASE is either 36 or 62
* Base 36 allows magnitudes from 0 to 1679615
* Base 62 allows magnitudes from 0 to 14776365
*/
#ifndef BASE
# define BASE 62
#endif BASE
#define EOS '\0'
static char tsdigits[] =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
#include <ctype.h>
/* Ordinal values for characters
* Change for non-ASCII
*/
#define DIGORD(c) (c-'0')
#define UCORD(c) (c-'A')
#define LCORD(c) (c-'a')
typedef unsigned long int ts_t;
/* Convert TS string to binary integer */
ts_t
atots (str)
register char *str;
{
register ts_t ret;
register digit;
for (ret = 0; *str != EOS; str++) {
if (isupper (*str))
digit = UCORD(*str) + 10;
else if (islower (*str))
digit = LCORD(*str) + 10 + (BASE-36);
else if (isdigit (*str))
digit = DIGORD(*str);
else
break;
ret = ret * BASE + digit;
}
return (ret);
}
/* Convert binary integer to TS string.
* String is left-padded with zeros up to the
* width given by 'prec'. Truncation does not
* take place here; calling routine must do it
* if required.
*/
char *
tstoa (num, buf, prec)
register ts_t num;
register char *buf;
int prec;
{
register ts_t quot = num / BASE;
if (--prec > 0 || quot != 0)
buf = tstoa (quot, buf, prec);
*buf++ = tsdigits[num % BASE];
*buf = EOS;
return (buf);
}
#endif TSSEQ
#ifdef TEST
#include <stdio.h>
main (argc, argv)
int argc;
char **argv;
{
char buf[128];
int nseq;
if (argc > 1)
nseq = atoi (argv[1]);
else
nseq = 10;
while (nseq-- > 0) {
gename ('C', "ittvax", 'n', buf);
if (argc < 3)
printf ("%s\n", buf);
}
}
#endif TEST
=================================================================
More information about the Net.bugs
mailing list