brl-vgr Bug Report
dpk at BRL-VGR.ARPA
dpk at BRL-VGR.ARPA
Sat Nov 24 13:31:50 AEST 1984
Subject: Tar is inefficient, ignores blocksize
Index: bin/tar.c 4.2BSD 2.9BSD SysV (and others) Fix
Description:
While technically not a bug, one can always hope that
such unnecessary inefficiency is removed from the system.
A letter on the net mentioning tar's behavior of always
reading or writing blocks of 512 bytes from or to files
prompted me to analyze it and see just how bad it was.
It was quite bad. It always reads or writes in blocks of 512.
This is quite inefficient on systems with a blocksize greater
than 512 bytes. What's more, it spent an inordinate amount
of time in the bcopy() routine. First off, when extracting
files it is totally unnecessary to bcopy() the data around,
since you can immediately write it out to disk. Some further
work allowed me to actually buffer the data going to disk in
blocks of max(diskblocksize,tapeblocksize). For creating
tar archives, some bcopy()s are unavoidable, but the number
can be greatly reduced by having the writing to tape and
reading from disk "get into sync" so that you begin reading
and writing in tapeblocksize and avoiding all bcopy()s.
I have made an analysis of the behavior before and after the
improvements to tar were made and as you would expect the
greatest benefit is achieved when the files involved are
significantly larger than the tapeblocksize. The following
figures summarize the behavior of three version of tar.
Ntar is the new tar, Tar is 4.2BSD tar, and Tar5 is the
System V version of Tar under the BRL/SYSV package.
Create an archive of large files (1 Meg):
Ntar 0.3u 1.9s
Tar 1.8u 4.9s
Tar5 4.7u 4.8s
Create an archive of small files (1 Meg):
Ntar 4.2u 6.8s
Tar 4.4 9.4
Tar5 7.7 9.1
Create an archive of /bin (4.2BSD)
Ntar 1.4u 2.9s
Tar 2.3 5.5
Tar5 5.2 6.7
Extract an archive of Large files (created above)
Ntar 0.2u 4.2s
Tar 1.6 14.0
Extract an archive of small files (created above)
Ntar 2.6 17.1
Tar 4.4 25.5
Tar5 7.4 29.9
This inefficency is also present in earlier TARs and is worse
in most cases because the "bcopy()" routine (probably named
something else or included in the {read/write}tape() routines)
is not nearly as efficient as the Berkeley version.
Repeat-By:
Time tar for extraction and creation. The time is significant.
Gprof or prof the tar program and analyze the results. You
will find that it is spending a large portion of its time
in bcopy() and the number of calls to the read/write system
call for files on disk is also unreasonable.
Fix:
There were serveral basic changes:
- readtape replaced with a routine readtbuf which is passed
a pointer to a pointer and a maximum number of bytes to be
accepted, returns a pointer to the new data, and the amount
there.
- small readtape() routine that emulates old behavior.
- writetbuf that take a pointer to data and a count of blocks.
It is smart enough to detect that the buffers contain at least
tapeblock characters and issues a write without bcopying.
This routine also returns the number blocks left to fill
the tape block buffer. This is used to "get into sync".
- dynamically allocated big buffers and assured they were page
aligned.
- changed the putfile and extract functions to use these changes
to advantage.
A "diff -e" editor script follows based on the original 4.2BSD
tar and a regular diff follows that for other sites. The changes
should also be almost directly applicable to other versions of TAR.
ARPA sites for whom BRL has copies of your ATT source license,
can obtain a copy by sending a message to "~dpk at vgr".
####################################
diff -e tar.c.old tar.c
1109c
while (n-- > 0) {
bcopy(buffer, (char *)&tbuf[recno++], TBLOCK);
buffer += TBLOCK;
if (recno >= nblock) {
if (write(mt, tbuf, TBLOCK*nblock) < 0) {
fprintf(stderr, "tar: tape write error\n");
done(2);
}
recno = 0;
}
}
/* Tell the user how much to write to get in sync */
return (nblock - recno);
.
1107c
n -= nblock;
buffer += (nblock * TBLOCK);
.
1101,1103c
/*
* Special case: We have an empty tape buffer, and the
* users data size is >= the tape block size: Avoid
* the bcopy and dma direct to tape. BIG WIN. Add the
* residual to the tape buffer.
*/
while (recno == 0 && n >= nblock) {
if (write(mt, buffer, TBLOCK*nblock) < 0) {
.
1090,1091c
writetbuf(buffer, n)
register char *buffer;
register int n;
.
1086,1087c
if (size > ((nblock-recno)*TBLOCK))
size = (nblock-recno)*TBLOCK;
*bufpp = (char *)&tbuf[recno];
recno += (size/TBLOCK);
return (size);
.
1064a
char *bufp;
int nread;
readtbuf (&bufp, TBLOCK);
bcopy(bufp, buffer, TBLOCK);
return(TBLOCK);
}
readtbuf(bufpp, size)
char **bufpp;
int size;
{
.
1062c
readtape (buffer)
.
710a
bytes -= nread;
blocks -= (((nread-1)/TBLOCK)+1);
.
703,705c
} else if (write(ofile, bufp, (int) bytes) < 0) {
.
694,697c
for (; blocks > 0;) {
register int nread;
char *bufp;
register int nwant;
nwant = NBLOCK*TBLOCK;
if (nwant > (blocks*TBLOCK))
nwant = (blocks*TBLOCK);
nread = readtbuf(&bufp, nwant);
if (bytes > nread) {
if (write(ofile, bufp, nread) < 0) {
.
609d
590a
if (bigbuf != buf)
#ifndef vax
free(bigbuf);
#else
free(origbuf);
#endif
.
589a
#endif vax
while ((i = read(infile, bigbuf, min((hint*TBLOCK), maxread))) > 0
&& blocks > 0) {
register int nblks;
nblks = ((i-1)/TBLOCK)+1;
if (nblks > blocks)
nblks = blocks;
hint = writetbuf(bigbuf, nblks);
blocks -= nblks;
}
.
586,588c
if ((origbuf = malloc(maxread+pagesize)) == 0) {
maxread = TBLOCK;
bigbuf = buf;
} else {
bigbuf = (char *)(((int)origbuf+pagesize)&~(pagesize-1));
}
.
584c
hint = writetape((char *)&dblock);
maxread = max(stbuf.st_blksize, (nblock * TBLOCK));
#ifndef vax
if ((bigbuf = malloc(maxread)) == 0) {
maxread = TBLOCK;
bigbuf = buf;
}
#else
/*
* The following is for 4.2BSD and related systems to force
* the buffer to be page aligned. The kernel will avoid
* bcopy()'s on disk IO this way by manipulating the page tables.
*/
{
int pagesize = getpagesize();
.
534a
close(infile);
.
416a
int maxread;
int hint; /* amount to write to get "in sync" */
.
410a
#ifdef vax
char *origbuf;
#endif
char *bigbuf;
.
400c
readtbuf(&bufp, TBLOCK);
.
391c
char *bufp;
.
222a
#else
/*
* The following is for 4.2BSD and related systems to force
* the buffer to be page aligned. The kernel will avoid
* bcopy()'s on disk IO this way by manipulating the page tables.
*/
{
int pagesize = getpagesize();
tbuf = (union hblock *)malloc((nblock*TBLOCK)+pagesize);
tbuf = (union hblock *)(((int)tbuf+pagesize)&~(pagesize-1));
}
#endif vax
.
221a
#ifndef vax
.
21a
#define writetape(b) writetbuf(b, 1)
#define min(a,b) ((a) < (b) ? (a) : (b))
#define max(a,b) ((a) > (b) ? (a) : (b))
.
1a
static char RCSid[] = "@(#)$Header: tar.c,v 1.4 84/11/14 00:08:15 root Exp $";
#endif
#ifndef lint
.
0a
/*
* T A R . C
*
* $Revision: 1.4 $
*
* $Log: tar.c,v $
* Revision 1.4 84/11/14 00:08:15 root
* New more efficient version. Minimizes the number of bcopys
* and maximizes block buffering. Page aligns block buffers.
*
* Revision 1.3 84/02/23 20:24:42 dpk
* Added missing close(infile) to prevent running out of fd's
*
* Revision 1.2 84/02/23 20:17:02 dpk
* Added distinctive RCS header
*
*/
.
###################### cut here #########################
diff tar.c.old tar.c
0a1,17
> /*
> * T A R . C
> *
> * $Revision: 1.4 $
> *
> * $Log: tar.c,v $
> * Revision 1.4 84/11/14 00:08:15 root
> * New more efficient version. Minimizes the number of bcopys
> * and maximizes block buffering. Page aligns block buffers.
> *
> * Revision 1.3 84/02/23 20:24:42 dpk
> * Added missing close(infile) to prevent running out of fd's
> *
> * Revision 1.2 84/02/23 20:17:02 dpk
> * Added distinctive RCS header
> *
> */
1a19,22
> static char RCSid[] = "@(#)$Header: tar.c,v 1.4 84/11/14 00:08:15 root Exp $";
> #endif
>
> #ifndef lint
21a43,46
> #define writetape(b) writetbuf(b, 1)
> #define min(a,b) ((a) < (b) ? (a) : (b))
> #define max(a,b) ((a) > (b) ? (a) : (b))
>
221a247
> #ifndef vax
222a249,261
> #else
> /*
> * The following is for 4.2BSD and related systems to force
> * the buffer to be page aligned. The kernel will avoid
> * bcopy()'s on disk IO this way by manipulating the page tables.
> */
> {
> int pagesize = getpagesize();
>
> tbuf = (union hblock *)malloc((nblock*TBLOCK)+pagesize);
> tbuf = (union hblock *)(((int)tbuf+pagesize)&~(pagesize-1));
> }
> #endif vax
391c430
< char buf[TBLOCK];
---
> char *bufp;
400c439
< readtape(buf);
---
> readtbuf(&bufp, TBLOCK);
410a450,453
> #ifdef vax
> char *origbuf;
> #endif
> char *bigbuf;
416a460,461
> int maxread;
> int hint; /* amount to write to get "in sync" */
534a580
> close(infile);
584c630,644
< writetape((char *)&dblock);
---
> hint = writetape((char *)&dblock);
> maxread = max(stbuf.st_blksize, (nblock * TBLOCK));
> #ifndef vax
> if ((bigbuf = malloc(maxread)) == 0) {
> maxread = TBLOCK;
> bigbuf = buf;
> }
> #else
> /*
> * The following is for 4.2BSD and related systems to force
> * the buffer to be page aligned. The kernel will avoid
> * bcopy()'s on disk IO this way by manipulating the page tables.
> */
> {
> int pagesize = getpagesize();
586,588c646,651
< while ((i = read(infile, buf, TBLOCK)) > 0 && blocks > 0) {
< writetape(buf);
< blocks--;
---
> if ((origbuf = malloc(maxread+pagesize)) == 0) {
> maxread = TBLOCK;
> bigbuf = buf;
> } else {
> bigbuf = (char *)(((int)origbuf+pagesize)&~(pagesize-1));
> }
589a653,664
> #endif vax
>
> while ((i = read(infile, bigbuf, min((hint*TBLOCK), maxread))) > 0
> && blocks > 0) {
> register int nblks;
>
> nblks = ((i-1)/TBLOCK)+1;
> if (nblks > blocks)
> nblks = blocks;
> hint = writetbuf(bigbuf, nblks);
> blocks -= nblks;
> }
590a666,671
> if (bigbuf != buf)
> #ifndef vax
> free(bigbuf);
> #else
> free(origbuf);
> #endif
609d689
< char buf[TBLOCK];
694,697c774,784
< for (; blocks-- > 0; bytes -= TBLOCK) {
< readtape(buf);
< if (bytes > TBLOCK) {
< if (write(ofile, buf, TBLOCK) < 0) {
---
> for (; blocks > 0;) {
> register int nread;
> char *bufp;
> register int nwant;
>
> nwant = NBLOCK*TBLOCK;
> if (nwant > (blocks*TBLOCK))
> nwant = (blocks*TBLOCK);
> nread = readtbuf(&bufp, nwant);
> if (bytes > nread) {
> if (write(ofile, bufp, nread) < 0) {
703,705c790
< continue;
< }
< if (write(ofile, buf, (int) bytes) < 0) {
---
> } else if (write(ofile, bufp, (int) bytes) < 0) {
710a796,797
> bytes -= nread;
> blocks -= (((nread-1)/TBLOCK)+1);
1062c1149
< readtape(buffer)
---
> readtape (buffer)
1064a1152,1163
> char *bufp;
> int nread;
>
> readtbuf (&bufp, TBLOCK);
> bcopy(bufp, buffer, TBLOCK);
> return(TBLOCK);
> }
>
> readtbuf(bufpp, size)
> char **bufpp;
> int size;
> {
1086,1087c1185,1189
< bcopy((char *)&tbuf[recno++], buffer, TBLOCK);
< return (TBLOCK);
---
> if (size > ((nblock-recno)*TBLOCK))
> size = (nblock-recno)*TBLOCK;
> *bufpp = (char *)&tbuf[recno];
> recno += (size/TBLOCK);
> return (size);
1090,1091c1192,1194
< writetape(buffer)
< char *buffer;
---
> writetbuf(buffer, n)
> register char *buffer;
> register int n;
1101,1103c1204,1212
< bcopy(buffer, (char *)&tbuf[recno++], TBLOCK);
< if (recno >= nblock) {
< if (write(mt, tbuf, TBLOCK*nblock) < 0) {
---
>
> /*
> * Special case: We have an empty tape buffer, and the
> * users data size is >= the tape block size: Avoid
> * the bcopy and dma direct to tape. BIG WIN. Add the
> * residual to the tape buffer.
> */
> while (recno == 0 && n >= nblock) {
> if (write(mt, buffer, TBLOCK*nblock) < 0) {
1107c1216,1217
< recno = 0;
---
> n -= nblock;
> buffer += (nblock * TBLOCK);
1109c1219,1233
< return (TBLOCK);
---
>
> while (n-- > 0) {
> bcopy(buffer, (char *)&tbuf[recno++], TBLOCK);
> buffer += TBLOCK;
> if (recno >= nblock) {
> if (write(mt, tbuf, TBLOCK*nblock) < 0) {
> fprintf(stderr, "tar: tape write error\n");
> done(2);
> }
> recno = 0;
> }
> }
>
> /* Tell the user how much to write to get in sync */
> return (nblock - recno);
################### End of Bug Report ####################
More information about the Comp.unix.wizards
mailing list