unbatcher out of sync?
Eamonn McManus
em at dce.ie
Wed Jan 16 10:34:28 AEST 1991
henry at zoo.toronto.edu (Henry Spencer) writes:
>It means "something's wrong with your batch": relaynews did not find a
>"#! rnews nnnnn" line where one should have been. Typically this means
>garbling during preparation or transmission. One notorious trouble spot
>is that the batch format cannot tolerate transformations of newlines to
>CR-LF pairs; the byte counts in the "#! rnews" lines must be spot-on.
We had `unbatcher out of sync' problems at a site I was involved in, which
was fed its news by mail from a VMS site (ugh). The VMS mailer (PMDF) got
confused when lines exceeded 256 characters, as References lines often do,
and would make a total hash of the header when this happened. As a result,
the "#! rnews" count would always be off by a small amount for the affected
article. C News resyncs at the next "#! rnews" line, but if the count is
too long for the actual article contents it will have missed the start of
the article following the garbled one.
To kludge around this problem I wrote a program `patchbatch' which zips
through a news batch looking for "#! rnews" lines with incorrect counts.
If it finds one, it hunts back and forth a small amount for the next "#!
rnews" line and adjusts the incorrect one to point to it. This was
surprisingly effective: while it was running I believe it never failed to
correct a munged batch.
I'm including the source of patchbatch in case it is of use to the original
poster, or anyone else.
,
Eamonn
/* patchbatch.c - patch a news batch. */
/* By Eamonn McManus <emcmanus at cs.tcd.ie>, February 1990.
* This program is not copyrighted.
*
* Blast through a news batch checking the offsets after `#! rnews'.
* If we find that the offset does not lead to another `#! rnews' line
* or EOF, we search around for the line somewhere in the vicinity. If
* it is found, we go back and patch the original offset to point to the
* correct place. This is useful for example on systems where long lines
* get truncated or split in transmission, since in this case the stated
* offset will be wrong.
*
* This is the hackiest program I have written in a long time.
*/
#include <stdio.h>
#include <string.h>
#include <sys/fcntl.h> /* For O_RDWR. */
#include <sys/types.h>
#include <sys/stat.h>
extern long strtol();
char verbose;
extern int optind;
main(argc, argv)
char **argv;
{
int i, status;
while ((i = getopt(argc, argv, "v")) != -1)
switch (i) {
case 'v':
verbose = 1; break;
default:
goto usage;
}
if (optind == argc) {
usage:
fprintf(stderr, "Usage: patchbatch file [...]\n");
exit(2);
}
status = 0;
for (i = optind; i < argc; i++)
if (patchbatch(argv[i]) < 0)
status = 1;
exit(status);
}
static char lead[] = "#! rnews ";
#define LEADLEN (sizeof lead - 1)
#define FUDGE (2 * sizeof lead)
int patchbatch(name)
char *name;
{
int fd, i;
long here, offset;
char buf[64];
struct stat st;
if ((fd = open(name, O_RDWR)) < 0) {
perror(name);
return -1;
}
if (fstat(fd, &st) < 0) {
perror(name);
return -1;
}
if ((i = read(fd, buf, sizeof buf - 1)) != sizeof buf - 1) {
if (i < 0)
perror(name);
else fprintf(stderr, "%s: too short for a news batch\n");
close(fd); return -1;
}
buf[sizeof buf - 1] = '\0';
if (strncmp(buf, lead, LEADLEN) != 0) {
fprintf(stderr, "%s: not a news batch (should start with %s)\n",
name, lead);
close(fd);
return -1;
}
here = 0; i = 0;
while (1) {
char *p;
int numsize;
long artstart, newpos;
offset = strtol(buf + LEADLEN, &p, 10);
if (offset == 0) {
fprintf(stderr,
"%s: bad value after %s, file offset %ld\n",
name, lead, here);
close(fd);
return -1;
}
numsize = p - (buf + LEADLEN);
artstart = here + LEADLEN + numsize + 1/*\n*/;
newpos = artstart + offset;
if (newpos == st.st_size)
return 0;
else if (newpos > st.st_size) {
char offstr[16];
lastart:
offset = st.st_size - artstart;
changeoffset:
sprintf(offstr + 1, "%ld", offset);
switch (strlen(offstr + 1) - numsize) {
case 0: /* Same size, just overwrite. */
p = offstr + 1;
break;
case -1: /* Shorter, use leading 0. */
p = offstr; *p = '0';
break;
case 1: /* Longer, oops. */
fprintf(stderr, "%s: no room to change article \
length to %ld, file offset %ld\n", name, offset, here);
goto setnewpos;
}
lseek(fd, here + LEADLEN, 0);
if (write(fd, p, numsize) < 0) {
perror(name); return -1;
}
if (verbose)
fprintf(stderr, "%s: changed article length to \
%ld, file offset %ld\n", name, offset, here);
setnewpos:
newpos = artstart + offset;
if (newpos >= st.st_size)
return 0;
} else { /* newpos < st.st_size */
lseek(fd, newpos - FUDGE, 0);
if (read(fd, buf, sizeof buf - 1) < sizeof buf - 1)
goto lastart;
if (strncmp(buf + FUDGE, lead, LEADLEN) == 0) {
strcpy(buf, buf + FUDGE); /* Hmmm... */
here = newpos;
continue;
}
for (p = buf; (p = strchr(p, lead[0])) != NULL; p++)
if (strncmp(p, lead, LEADLEN) == 0)
break;
if (p == NULL) {
fprintf(stderr, "%s: can't find next article \
with offset %ld from file pos %ld\n", name, offset, here);
close(fd); return -1;
}
offset = (newpos - FUDGE) + (p - buf) - artstart;
goto changeoffset;
}
lseek(fd, newpos, 0);
if (read(fd, buf, sizeof buf - 1) < sizeof buf - 1) {
fprintf(stderr, "%s: last article too short\n", name);
close(fd); return -1;
}
here = newpos;
}
}
More information about the Alt.sources
mailing list