Shuffle generation
Joe Keane
jgk at osc.COM
Thu May 16 10:46:05 AEST 1991
In article <8c=sAju00Vp_Ihxl4r at andrew.cmu.edu> dd2i+ at andrew.cmu.edu (Dee Dee
Two Eye) writes:
>To swap N items in an array A[0..(n-1)], the following O(N) algorithm works:
>
>for (i=0; i < N; i++) {
> swap(i, i + random(N - i));
>}
>
>Where random(r) is a function which returns a random integer in 0..(r-1)
>and swap(a, b) swaps the stuff in array A[a] with A[b].
This algorithm is much better than the naive one, which repeatedly swaps two
randomly chosen elements. At the end of the loop, all permutations are
equally probable, assuming the random function is uniform. The naive
algorithm only approaches this asymptotically.
Note that if you seed your random-number generator with a 32-bit integer, then
there are only 2^32 possible permutations, no matter how big the array is. If
you worry about such things, like i do, then you should make multiple passes
over the array.
Following i've included `shuffle.c', a program i wrote a while ago which uses
the algorithm described above. I've found that it's a useful utility, so you
may want to `clip and save' it.
--
Joe Keane, professional C++ programmer
jgk at osc.com (...!uunet!stratus!osc!jgk)
--
#include <stdio.h>
#include <sys/time.h>
#define DEBUG 0
extern char* malloc();
extern char* realloc();
struct line
{
int length;
char* ptr;
};
int main (argc, argv)
int argc;
char* argv[];
{
struct line* master_ptr;
int master_size;
#if DEBUG
fputs("shuffle: reading lines...\n", stderr);
#endif
{
int master_capacity;
char *buffer_ptr;
int buffer_capacity;
master_capacity = 256;
master_ptr = (struct line*)malloc(master_capacity * sizeof(struct line));
if (!master_ptr)
goto out_of_memory;
master_size = 0;
buffer_capacity = 256;
buffer_ptr = malloc(buffer_capacity);
if (!buffer_ptr)
goto out_of_memory;
for (;;)
{
int c;
int buffer_size;
char* line_ptr;
c = getchar();
if (c == EOF)
goto eof;
if (master_size >= master_capacity)
{
master_capacity *= 2;
master_ptr = (struct line*)realloc(master_ptr, master_capacity * sizeof (struct line));
if (!master_ptr)
goto out_of_memory;
}
buffer_size = 0;
while (c != '\n')
{
if (buffer_size >= buffer_capacity)
{
buffer_capacity *= 2;
buffer_ptr = realloc(buffer_ptr, buffer_capacity);
}
buffer_ptr[buffer_size] = c;
buffer_size++;
c = getchar();
if (c == EOF)
{
fputs("shuffle: adding newline at end of file\n", stderr);
break;
}
}
line_ptr = malloc(buffer_size);
if (!line_ptr)
goto out_of_memory;
memcpy(line_ptr, buffer_ptr, buffer_size);
master_ptr[master_size].length = buffer_size;
master_ptr[master_size].ptr = line_ptr;
master_size++;
if (c == EOF)
goto eof;
}
eof:
free(buffer_ptr);
}
#if DEBUG
fprintf(stderr, "shuffle: total of %d lines read\n", master_size);
#endif
{
int pass;
for (pass = 0; pass < 16; pass++)
{
struct timeval tv;
int line_number;
gettimeofday(&tv, 0);
#if DEBUG
fprintf(stderr, "shuffle: doing pass %d, time is %d seconds, %d microseconds...\n", pass, tv.tv_sec, tv.tv_usec);
#endif
srandom(tv.tv_sec ^ tv.tv_usec);
for (line_number = 0; line_number < master_size; line_number++)
{
int other_line;
struct line temp;
other_line = random() % (master_size - line_number) + line_number;
temp = master_ptr[line_number];
master_ptr[line_number] = master_ptr[other_line];
master_ptr[other_line] = temp;
}
}
}
#if DEBUG
fputs("shuffle: writing lines...\n", stderr);
#endif
{
int line_number;
for (line_number = 0; line_number < master_size; line_number++)
{
char* ptr;
char* end;
ptr = master_ptr[line_number].ptr;
end = ptr + master_ptr[line_number].length;
while (ptr < end)
{
putchar(*ptr);
ptr++;
}
putchar('\n');
}
}
#if DEBUG
fputs("shuffle: all done\n", stderr);
#endif
return 0;
out_of_memory:
fputs("shuffle: out of memory\n", stderr);
return 1;
}
More information about the Comp.lang.c
mailing list