v15i083: Awk program for putting a set of files on minimum floppies.
Susan Thomas
thomas%randvax.UUCP at usc.edu
Mon Dec 17 07:57:35 AEST 1990
Posting-number: Volume 15, Issue 83
Submitted-by: thomas%randvax.UUCP at usc.edu (Susan Thomas)
Archive-name: greedy/part01
[Assuming I've finally figured out the latest rearrangement at uunet, I'm going
to post some 30 submissions now, then notify the new moderator. When I get
confirmation form him/her, I will post anything remaining in the queue and send
a message notifying the net, then redirect the aliases. ++bsa]
(I'm posting this for a friend)
This is a awk implementation of the "greedy" bin-packing algorithm
as applied to the problem of spreading a set of files over a set
of floppies in such a way as to require the least number of
floppies. It takes a list of files and generates copying scripts
which embody the optimal allocation of files.
Greedy is not the optimal algorithm. It's just a easily implemented
heuristic.
The archive contains a read.me with more information.
---------------------------------------------------------------------------
Submitted-by: ajayshah at usc.edu
Archive-name: greedy/part01
---- Cut Here and unpack ----
#!/bin/sh
# This is greedy, a shell archive (shar 3.32)
# made 10/22/1990 07:17 UTC by ajayshah at usc.edu
# Source directory /tmp/PACKING
#
# existing files WILL be overwritten
#
# This shar contains:
# length mode name
# ------ ---------- ------------------------------------------
# 1691 -rw-r--r-- copying.bat.good
# 1983 -rw-r--r-- lookup.table.good
# 3968 -rw-r--r-- pack.awk
# 1864 -rw-r--r-- read.me
# 1281 -rw-r--r-- testdata
#
if touch 2>&1 | fgrep 'amc' > /dev/null
then TOUCH=touch
else TOUCH=true
fi
# ============= copying.bat.good ==============
echo "x - extracting copying.bat.good (Text)"
sed 's/^X//' << 'SHAR_EOF' > copying.bat.good &&
X at echo off
Xecho This set of files will need 3 floppies.
Xecho Make sure you have so many HD formatted floppies handy.
Xecho and then hit ENTER.
Xpause
Xecho Insert Floppy Number 1 and hit ENTER:
Xpause
Xcopy ./PC/C/backlog.c b:
Xcopy ./PC/C/boss01.zip b:
Xcopy ./PC/C/boss02a.zip b:
Xcopy ./PC/C/boss02b.zip b:
Xcopy ./PC/C/cc03.arc b:
Xcopy ./PC/C/ccc1053b.arc b:
Xcopy ./PC/C/ccc1053c.arc b:
Xcopy ./PC/C/cnews005.arc b:
Xcopy ./PC/C/cnews009.arc b:
Xecho Insert Floppy Number 2 and hit ENTER:
Xpause
Xcopy ./PC/BORLAND/bgidriv.arc b:
Xcopy ./PC/BORLAND/bgifont.arc b:
Xcopy ./PC/BORLAND/bgifonts.arc b:
Xcopy ./PC/BORLAND/bgiherc.arc b:
Xcopy ./PC/BORLAND/bgvga256.arc b:
Xcopy ./PC/BORLAND/tcppt1.zip b:
Xcopy ./PC/C/abmake14.arc b:
Xcopy ./PC/C/ansi-c.arc b:
Xcopy ./PC/C/apm.arc b:
Xcopy ./PC/C/boss03.zip b:
Xcopy ./PC/C/bplus11.arc b:
Xcopy ./PC/C/c-flow.arc b:
Xcopy ./PC/C/c4window.arc b:
Xcopy ./PC/C/cc01.arc b:
Xcopy ./PC/C/cc02.arc b:
Xcopy ./PC/C/cc04.arc b:
Xcopy ./PC/C/ccc1053a.arc b:
Xcopy ./PC/C/ccompile.arc b:
Xcopy ./PC/C/cephes.arc b:
Xcopy ./PC/C/clp_v11.arc b:
Xcopy ./PC/C/cnews003.arc b:
Xcopy ./PC/C/cnews004.arc b:
Xcopy ./PC/C/cnews006.arc b:
Xcopy ./PC/C/cnews008.arc b:
Xcopy ./PC/C/cnews010.arc b:
Xecho Insert Floppy Number 3 and hit ENTER:
Xpause
Xcopy ./PC/BORLAND/00-index.txt b:
Xcopy ./PC/BORLAND/td1pat.arc b:
Xcopy ./PC/C/00-index.txt b:
Xcopy ./PC/C/64colors.zip b:
Xcopy ./PC/C/advc11.arc b:
Xcopy ./PC/C/argtest.c b:
Xcopy ./PC/C/asyncpec.arc b:
Xcopy ./PC/C/bituudec.c b:
Xcopy ./PC/C/break.arc b:
Xcopy ./PC/C/btoa.arc b:
Xcopy ./PC/C/c_check.arc b:
Xcopy ./PC/C/cbooks.arc b:
Xcopy ./PC/C/cdecl.arc b:
Xcopy ./PC/C/cnews001.arc b:
Xcopy ./PC/C/cnews002.arc b:
Xcopy ./PC/C/cnews007.arc b:
Xecho Toodles.
SHAR_EOF
$TOUCH -am 1022000890 copying.bat.good &&
chmod 0644 copying.bat.good ||
echo "restore of copying.bat.good failed"
set `wc -c copying.bat.good`;Wc_c=$1
if test "$Wc_c" != "1691"; then
echo original size 1691, current size $Wc_c
fi
# ============= lookup.table.good ==============
echo "x - extracting lookup.table.good (Text)"
sed 's/^X//' << 'SHAR_EOF' > lookup.table.good &&
X./PC/BORLAND/00-index.txt --> Floppy Number 3
X./PC/BORLAND/bgidriv.arc --> Floppy Number 2
X./PC/BORLAND/bgifont.arc --> Floppy Number 2
X./PC/BORLAND/bgifonts.arc --> Floppy Number 2
X./PC/BORLAND/bgiherc.arc --> Floppy Number 2
X./PC/BORLAND/bgvga256.arc --> Floppy Number 2
X./PC/BORLAND/tcppt1.zip --> Floppy Number 2
X./PC/BORLAND/td1pat.arc --> Floppy Number 3
X./PC/C/00-index.txt --> Floppy Number 3
X./PC/C/64colors.zip --> Floppy Number 3
X./PC/C/abmake14.arc --> Floppy Number 2
X./PC/C/advc11.arc --> Floppy Number 3
X./PC/C/ansi-c.arc --> Floppy Number 2
X./PC/C/apm.arc --> Floppy Number 2
X./PC/C/argtest.c --> Floppy Number 3
X./PC/C/asyncpec.arc --> Floppy Number 3
X./PC/C/backlog.c --> Floppy Number 1
X./PC/C/bituudec.c --> Floppy Number 3
X./PC/C/boss01.zip --> Floppy Number 1
X./PC/C/boss02a.zip --> Floppy Number 1
X./PC/C/boss02b.zip --> Floppy Number 1
X./PC/C/boss03.zip --> Floppy Number 2
X./PC/C/bplus11.arc --> Floppy Number 2
X./PC/C/break.arc --> Floppy Number 3
X./PC/C/btoa.arc --> Floppy Number 3
X./PC/C/c-flow.arc --> Floppy Number 2
X./PC/C/c4window.arc --> Floppy Number 2
X./PC/C/c_check.arc --> Floppy Number 3
X./PC/C/cbooks.arc --> Floppy Number 3
X./PC/C/cc01.arc --> Floppy Number 2
X./PC/C/cc02.arc --> Floppy Number 2
X./PC/C/cc03.arc --> Floppy Number 1
X./PC/C/cc04.arc --> Floppy Number 2
X./PC/C/ccc1053a.arc --> Floppy Number 2
X./PC/C/ccc1053b.arc --> Floppy Number 1
X./PC/C/ccc1053c.arc --> Floppy Number 1
X./PC/C/ccompile.arc --> Floppy Number 2
X./PC/C/cdecl.arc --> Floppy Number 3
X./PC/C/cephes.arc --> Floppy Number 2
X./PC/C/clp_v11.arc --> Floppy Number 2
X./PC/C/cnews001.arc --> Floppy Number 3
X./PC/C/cnews002.arc --> Floppy Number 3
X./PC/C/cnews003.arc --> Floppy Number 2
X./PC/C/cnews004.arc --> Floppy Number 2
X./PC/C/cnews005.arc --> Floppy Number 1
X./PC/C/cnews006.arc --> Floppy Number 2
X./PC/C/cnews007.arc --> Floppy Number 3
X./PC/C/cnews008.arc --> Floppy Number 2
X./PC/C/cnews009.arc --> Floppy Number 1
X./PC/C/cnews010.arc --> Floppy Number 2
SHAR_EOF
$TOUCH -am 1022000890 lookup.table.good &&
chmod 0644 lookup.table.good ||
echo "restore of lookup.table.good failed"
set `wc -c lookup.table.good`;Wc_c=$1
if test "$Wc_c" != "1983"; then
echo original size 1983, current size $Wc_c
fi
# ============= pack.awk ==============
echo "x - extracting pack.awk (Text)"
sed 's/^X//' << 'SHAR_EOF' > pack.awk &&
X
X# This awk program packs a set of files over a set of floppies
X# using the "greedy bin-packing algorithm".
X
X# It is hardcoded for 3.5" HD floppies being written to b:
X# To change the "b:" assumption goto line 106.
X# To change the 3.5" HD assumption goto lines 16-17.
X
XBEGIN {
X name = 1; #
X size = 2; #
X taken = 3; #
X FID = 4; #
X # Ignore these four defns.
X
X FCAPACITY = 1457664; # change this for floppies other than DSHD 3.5"
X CLUSTERSIZE = 512; # ditto.
X
X stderr = "/dev/tty";
X }
X
X{table[NR, name] = $1;
X table[NR, size] = $2;
X table[NR, taken] = 0; # 0 for not yet taken, 1 for taken.
X table[NR, FID] = 0; #allocating the FID column now tends to make it faster.
X
X if (table[NR, size] > FCAPACITY) {
X print "File " table[NR, name] " on line " NR " is too large for one floppy.";
X goch = 1;
X exit 1
X }
X}
X
XEND {
X if (goch == 1) exit 1;
X #So far, all that has been done is setup table.
X print "Finished reading in all data." > stderr;
X
X floppy = 0;
X todo = NR;
X done = 0;
X while (done < todo) {
X # If there are more files to be done, then open a
X # new floppy.
X print "done = " done " todo = " todo > stderr;
X
X floppy++; spaceleft = FCAPACITY;
X trymore = 0;
X while (trymore == 0) {
X # look for a file to put into this floppy.
X bestfile = 0; bestsize = 0;
X for (i = 1; i <= todo; i++) {
X # Linear scan over all files looking for best fit.
X if (table[i, taken] == 0) {
X #this file has not yet been taken
X if ((table[i, size]+CLUSTERSIZE) < spaceleft) {
X #this file can (in principle) fit in the slot
X if (table[i, size] > bestsize) {
X #this file does it better than the best
X #file we've found so far!
X bestfile = i;
X bestsize = table[i, size];
X }
X }
X }
X }
X #End of linear scan.
X if (bestfile == 0) {
X #we didn't find a file to fit this space
X trymore = 1 #quit trying further with this floppy
X print "Wasted space on floppy " floppy " = " spaceleft;
X }
X else {
X print "Putting file " table[bestfile, name] " of size " table[bestfile, size] " into floppy " floppy "." > stderr;
X table[bestfile, FID] = floppy;
X table[bestfile, taken] = 1;
X spaceleft = spaceleft - table[bestfile, size] - CLUSTERSIZE;
X # CLUSTERSIZE bytes is worstcase wastage of space
X # with clusters of CLUSTERSIZE bytes.
X done++
X }
X }
X }
X # End of greedy algorithm.
X
X # Once you land here, you've finished with all files.
X # We generate two things now:
X # First, generate the MS-DOS batch file which'll do the copying.
X # Next, generate the lookup table which maps a file into it's
X # FID (floppy ID)
X
X # Printing batch file to StdOut:
X msdosbat = "copying.bat";
X print "@echo off" > msdosbat;
X print "echo This set of files will need " floppy " floppies." > msdosbat;
X print "echo Make sure you have so many HD formatted floppies handy." > msdosbat;
X print "echo and then hit ENTER." > msdosbat;
X print "pause" > msdosbat;
X for (f = 1; f <= floppy; f++) {
X # running over every floppy
X print "echo Insert Floppy Number " f " and hit ENTER:" > msdosbat;
X print "pause" > msdosbat;
X # Now generate code for the copying:
X for (i = 1; i <= todo; i++) {
X if (table[i, FID] == f) {
X print "copy " table[i, name] " b:" > msdosbat;
X } #\
X } # \
X } # \__ Change this for other drives.
X print "echo Toodles." > msdosbat;
X
X # Now to generate the lookup table to file "lookup.table"
X for (i = 1; i <= todo; i++) {
X print table[i, name] " --> Floppy Number " table[i, FID] > "lookup.table"
X }
X
X for (i=1; i<=5; i++) print "" > stderr;
X print "I have created two files for you." > stderr;
X print "" > stderr;
X print "The file copying.bat is a MessDos batch file which does the copying." > stderr;
X print "The file lookup.table is a lookup table mapping a file to it's floppy." > stderr;
X print "" > stderr;
X print "Toodles!" > stderr;
X}
X
SHAR_EOF
$TOUCH -am 1022001690 pack.awk &&
chmod 0644 pack.awk ||
echo "restore of pack.awk failed"
set `wc -c pack.awk`;Wc_c=$1
if test "$Wc_c" != "3968"; then
echo original size 3968, current size $Wc_c
fi
# ============= read.me ==============
echo "x - extracting read.me (Text)"
sed 's/^X//' << 'SHAR_EOF' > read.me &&
X
XWHAT?
X
XThis is a awk implementation of the "greedy" bin-packing algorithm
Xas applied to the problem of spreading a set of files over a set
Xof floppies in such a way as to require the least number of
Xfloppies. It takes a list of files and generates copying scripts
Xwhich embody the optimal allocation of files.
X
XGreedy is not the optimal algorithm. It's just a easily implemented
Xheuristic.
X
X---------------------------------------------------------------------------
X
X
X
XHOW?
X
XIt needs an input file of the form
X
X./PC/BORLAND/00-index.txt 866
X./PC/BORLAND/bgidriv.arc 101151
X
Xetc., where the first field is the filename and the 2nd field is
Xthe filesize.
X
XUsage:
X
X nawk -f pack.awk filelist
X or
X gawk -f pack.awk filelist
X
XThe program talks a lot on /dev/tty about it's activities. Finally,
Xit generates two files: a MS-DOS batch file which does the copying
Xand a lookup table mapping filenames to floppy numbers. Modifications
Xon what is generated, etc., are not difficult.
X
XThe file 'testdata' is for demo purposes. Try saying
X
X nawk -f pack.awk testdata
X
XIf you want a doublecheck, then the "correct" results are files
X'copying.bat.good' and 'lookup.table.good'.
X
XIt does NOT work with awk; you must have either nawk or gawk.
X
X---------------------------------------------------------------------------
X
X
X
XIt's horrendously slow. I only plan to be using it once in a while
Xso it's fine by me. If someone ports it to C or so, I'd like to get
Xa copy!
X
XIf someone implements a smarter heuristic, then I'd be even more
Xinterested!
X
X_______________________________________________________________________________
XAjay Shah, ajayshah at usc.edu ajayshah%rcc%rand.org
XApt: (213) 734-3930 RAND Corporation: (213) 393-0411 x7281
X_______________________________________________________________________________
X
SHAR_EOF
$TOUCH -am 1022001290 read.me &&
chmod 0644 read.me ||
echo "restore of read.me failed"
set `wc -c read.me`;Wc_c=$1
if test "$Wc_c" != "1864"; then
echo original size 1864, current size $Wc_c
fi
# ============= testdata ==============
echo "x - extracting testdata (Text)"
sed 's/^X//' << 'SHAR_EOF' > testdata &&
X./PC/BORLAND/00-index.txt 866
X./PC/BORLAND/bgidriv.arc 101151
X./PC/BORLAND/bgifont.arc 81908
X./PC/BORLAND/bgifonts.arc 87716
X./PC/BORLAND/bgiherc.arc 58540
X./PC/BORLAND/bgvga256.arc 28922
X./PC/BORLAND/tcppt1.zip 40323
X./PC/BORLAND/td1pat.arc 11443
X./PC/C/00-index.txt 11904
X./PC/C/64colors.zip 12382
X./PC/C/abmake14.arc 62897
X./PC/C/advc11.arc 14336
X./PC/C/ansi-c.arc 15213
X./PC/C/apm.arc 86050
X./PC/C/argtest.c 1442
X./PC/C/asyncpec.arc 6186
X./PC/C/backlog.c 3797
X./PC/C/bituudec.c 7191
X./PC/C/boss01.zip 139935
X./PC/C/boss02a.zip 241954
X./PC/C/boss02b.zip 200308
X./PC/C/boss03.zip 81988
X./PC/C/bplus11.arc 22618
X./PC/C/break.arc 8204
X./PC/C/btoa.arc 6182
X./PC/C/c-flow.arc 62706
X./PC/C/c4window.arc 53248
X./PC/C/c_check.arc 21504
X./PC/C/cbooks.arc 12874
X./PC/C/cc01.arc 4073
X./PC/C/cc02.arc 93940
X./PC/C/cc03.arc 117176
X./PC/C/cc04.arc 113452
X./PC/C/ccc1053a.arc 36508
X./PC/C/ccc1053b.arc 227761
X./PC/C/ccc1053c.arc 309945
X./PC/C/ccompile.arc 49024
X./PC/C/cdecl.arc 15088
X./PC/C/cephes.arc 58368
X./PC/C/clp_v11.arc 26056
X./PC/C/cnews001.arc 7995
X./PC/C/cnews002.arc 7393
X./PC/C/cnews003.arc 25600
X./PC/C/cnews004.arc 67584
X./PC/C/cnews005.arc 115712
X./PC/C/cnews006.arc 40020
X./PC/C/cnews007.arc 13312
X./PC/C/cnews008.arc 73748
X./PC/C/cnews009.arc 96256
X./PC/C/cnews010.arc 72437
SHAR_EOF
$TOUCH -am 1021235690 testdata &&
chmod 0644 testdata ||
echo "restore of testdata failed"
set `wc -c testdata`;Wc_c=$1
if test "$Wc_c" != "1281"; then
echo original size 1281, current size $Wc_c
fi
exit 0
More information about the Comp.sources.misc
mailing list