fix for 'refer' sort problem
cons at sdccsu3.UUCP
cons at sdccsu3.UUCP
Sat Aug 4 00:31:18 AEST 1984
Here is a description and fix for a 'refer' sort problem
packaged as a shar archive.
#! /bin/sh
# The rest of this file is a shell script which will extract:
# Sendbug02 refer2.c refer5.c
echo x - Sendbug02
cat >Sendbug02 <<'!Funky!Stuff!'
Subject: REFER mis-sorts reference lists if duplicate citations occur.
Index: usr.bin/refer/refer2.c 4.2BSD
usr.bin/refer/refer5.c 4.2BSD
Description:
The putsig routine in refer5.c is responsible for placing
signals (usually superscripts, or author-date labels) in the
body of a document. Putsig also makes the signals available
for printing in the reference list by emitting strings such as
".ds [F signal" into the reference list.
If a reference is cited more than once, it is only put on the
reference list the first time it is cited. However putsig
persists in emitting the ".ds [F ..." string for duplicate
citations.
This causes a problem if the reference list is sorted because
the extraneous ".ds [F ..." material appears at the beginning
of the next non-duplicate reference where it obscures the
sortkey. Thus the reference list is mis-sorted.
Repeat-By:
Create a document which cites a reference twice, then cites a
new reference. Process the document using REFER with the -s
option. Notice the extra ".ds [F ..." line in the REFER
output. If the two references came out sorted properly, you
were lucky, reverse their roles and you will see the failure.
Fix:
Putsig is called at two places in refer2.c, in one context a
duplicate citation is being processed and in the other a new
citation is being processed. Add a flag parameter to putsig to
distinguish the two calls. Modify the routine to suppress the
emission of ".ds [F ..." when it is called for a
duplicate citation. (See sources posted to net.sources)
Rick Accurso
UUCP: ...!ucbvax!sdcsvax!sdccsu3!accurso
ARPA: sdcsvax!sdccsu3!accurso at nosc
!Funky!Stuff!
echo x - refer2.c
cat >refer2.c <<'!Funky!Stuff!'
#ifndef lint
static char *sccsid = "@(#)refer2.c 4.1 (Berkeley) 5/6/83";
#endif
#include "refer..c"
#define NFLD 80
#define TLEN 512
extern FILE *in;
char one[ANSLEN];
int onelen = ANSLEN;
static char dr [100] = "";
/*
** doref - Process a citation.
*/
doref(line1)
char *line1;
{
char buff[QLEN]; /* query keywords */
char dbuff[3*QLEN]; /* field data supplied in citation */
char answer[ANSLEN], temp[TLEN], line[BUFSIZ];
char *p, **sr, *flds[NFLD], *r;
int stat, nf, nr, query = 0, alph, digs;
again:
buff[0] = dbuff[0] = NULL;
if (biblio && Iline == 1 && line1[0] == '%')
/*
** In biblio mode first line of input file may begin
** with % and contain field data. Hold it in dbuff.
*/
strcat(dbuff, line1);
while (input(line)) { /* get query */
Iline++;
if (prefix(".]", line))
/* end of citation */
break;
if (biblio && line[0] == '\n')
/*
** In biblio mode a blank line indicates
** the end of the reference.
*/
break;
if (biblio && line[0] == '%' && line[1] == *convert)
break;
if (control(line[0]))
query = 1;
/*
** Store lines of query keys in buff;
** store lines of field data in dbuff.
*/
strcat(query ? dbuff : buff, line);
if (strlen(buff) > QLEN)
err("query too long (%d)", strlen(buff));
if (strlen(dbuff) > 3 * QLEN)
err("record at line %d too long", Iline-1);
}
if (biblio && line[0] == '\n' && feof(in))
return;
if (strcmp(buff, "$LIST$\n")==0) {
/*
** Produce the list of accumulated references.
*/
assert (dbuff[0] == 0);
dumpold();
return;
}
answer[0] = 0;
/*
** Refine the query keywords in buff.
*/
for (p = buff; *p; p++) {
if (isupper(*p))
/* Convert to lowercase. */
*p |= 040;
}
alph = digs = 0;
for (p = buff; *p; p++) {
if (isalpha(*p))
alph++;
else
if (isdigit(*p))
digs++;
else {
*p = 0;
if ((alph+digs < 3) || common(p-alph)) {
r = p-alph;
/*
** Blank out unacceptable
** keywords (too short, common etc.)
*/
while (r < p)
*r++ = ' ';
}
if (alph == 0 && digs > 0) {
r = p-digs;
if (digs != 4 || atoi(r)/100 != 19) {
/*
** Blank out numbers
** which are not in
** 1900-1999
*/
while (r < p)
*r++ = ' ';
}
}
*p = ' ';
alph = digs = 0;
}
}
one[0] = 0;
if (buff[0]) { /* do not search if no query */
for (sr = rdata; sr < search; sr++) {
temp[0] = 0;
corout(buff, temp, "hunt", *sr, TLEN);
assert(strlen(temp) < TLEN);
if (strlen(temp)+strlen(answer) > BUFSIZ)
err("Accumulated answers too large",0);
strcat(answer, temp);
if (strlen(answer)>BUFSIZ)
err("answer too long (%d)", strlen(answer));
if (newline(answer) > 0)
break;
}
}
assert(strlen(one) < ANSLEN);
assert(strlen(answer) < ANSLEN);
/*
** If a search was done, the number of newlines in answer
** indicates how many hits were found.
*/
if (buff[0])
switch (newline(answer)) {
case 0:
fprintf(stderr, "No such paper: %s\n", buff);
return;
default:
fprintf(stderr, "Too many hits: %s\n", trimnl(buff));
choices(answer);
p = buff;
while (*p != '\n')
p++;
*++p = 0;
case 1:
/*
** Search found one hit, success!
*/
if (endpush)
/*
** References are being produced
** in a $LIST$ rather than as
** footnotes.
*/
if (nr = chkdup(answer)) {
/*
** This reference has already
** been cited.
*/
if (bare < 2) {
/*
** Signals in the text
** are desired (no -b)
*/
nf = tabs(flds, one);
nf += tabs(flds+nf, dbuff);
assert(nf < NFLD);
putsig(nf,flds,nr,line1,line,1);
}
/*
** Since it's a dup, no need to
** putkey or putref.
*/
return;
}
if (one[0] == 0)
/*
** Place the reference data for the hit
** indicated by answer in one.
*/
corout(answer, one, "deliv", dr, QLEN);
break;
}
assert(strlen(buff) < QLEN);
assert(strlen(one) < ANSLEN);
/*
** Set the flds[] pointers at the beginning of each
** field of reference data in one and dbuff.
*/
nf = tabs(flds, one);
nf += tabs(flds+nf, dbuff);
assert(nf < NFLD);
refnum++;
/*
** The stream "fo" is written to by putkey
** and putref. If references are being produced in the
** form of footnotes, fo is stdout. If references are
** being printed as a list at the end (endpush), then
** fo is a temp file. In the endpush case each reference
** is written to fo as one long line. If the list is
** to be sorted, putkey places the sort key on the front
** of the line.
*/
if (sort)
putkey(nf, flds, refnum, keystr);
if (bare < 2)
putsig(nf, flds, refnum, line1, line, 0);
else
flout();
putref(nf, flds);
if (biblio && line[0] == '\n')
goto again;
if (biblio && line[0] == '%' && line[1] == *convert)
fprintf(fo, "%s%c%s", convert+1, sep, line+3);
}
/* count the newlines in s */
newline(s)
char *s;
{
int k = 0, c;
while (c = *s++)
if (c == '\n')
k++;
return(k);
}
/* print the titles associated with the hits in buff */
choices(buff)
char *buff;
{
char ob[BUFSIZ], *p, *r, *q, *t;
int nl;
for (r = p = buff; *p; p++) {
if (*p == '\n') {
*p++ = 0;
corout(r, ob, "deliv", dr, BUFSIZ);
nl = 1;
for (q = ob; *q; q++) {
if (nl && (q[0]=='.'||q[0]=='%') && q[1]=='T') {
q += 3;
for (t = q; *t && *t != '\n'; t++)
;
*t = 0;
fprintf(stderr, "%.70s\n", q);
q = 0;
break;
}
nl = *q == '\n';
}
if (q)
fprintf(stderr, "??? at %s\n",r);
r=p;
}
}
}
control(c)
{
if (c == '.')
return(1);
if (c == '%')
return(1);
return(0);
}
!Funky!Stuff!
echo x - refer5.c
cat >refer5.c <<'!Funky!Stuff!'
/*
* $Log: refer5.c,v $
* Revision 1.5 84/07/09 16:12:23 cons
* Putsig now refrains from putting out ".ds [F" info when the citation
* is a duplicate. The extraneous ".ds [F" info fouled-up sortkeys
* for subsequent non-duplicate reference. Accurso
*
* Revision 1.4 84/07/05 15:30:12 cons
* Fixed keylet() so that disambiguating letters a, b, c, ...
* will be issued instead of control characters ^A, ^B, ^C, ... Accurso
*
* Revision 1.3 84/07/05 15:16:50 cons
* Added comments. Accurso
*
*/
#ifndef lint
static char *rcsid = "$Header: refer5.c,v 1.5 84/07/09 16:12:23 cons Exp $";
#endif
#include "refer..c"
#define SAME 0
#define NFLAB 3000
#define NLABC 1000
static char sig[NLABC];
static char bflab[NFLAB]; /* Record of plain signals issued.
** "Plain signals" have not had
** disambiguating letter appended,
** miller84 vs. miller84a.
*/
static char *labtab[NLABC]; /* Array of pointers to plain signals;
** indexed by nref.
*/
static char *lbp = bflab;
static char labc[NLABC]; /* Array of disambiguating
** characters issued; indexed by nref.
*/
static char stbuff[50];
static int prevsig;
/* putsig
**
** Imbed a signal indicating a citation in the text.
** Also may supply the signal for printing in a
** reference list (.ds [F signal).
**
** CONDENSE facility which converts consecutive numeric signals (4,5,6,7)
** to a range (4-7) does not handle sorted reference lists.
*/
putsig (nf, flds, nref, nstline, endline, dupl)
char *flds[]; /* Fields of reference data */
char *nstline; /* Line which indicated start of citation.
** Usually ".[". In biblio mode could be blank or
** start with "%".
*/
char *endline; /* Line which indicated end of citation.
** Usually ".]". Blank in biblio mode.
*/
int dupl; /* dupl==0 implies new citation;
** dupl==1 implies repeat citation.
*/
{
char t[100], t1[100], t2[100], format[10], *sd, *stline;
int addon, another = 0;
static FILE *fhide = 0;
int i;
char tag;
#ifdef CONDENSE
static int *wref = NULL;
static int wcnt = 0;
static int wsize = 50;
if (wref == NULL)
wref = calloc(wsize, sizeof(int));
#endif
if (labels) { /* User specified -l, -k, or -S option. */
if (nf == 0) /* Repeat citation of a reference.
** Reuse previously issued signal.
*/
sprintf(t, "%s%c", labtab[nref], labc[nref]);
else {
*t = 0;
if (keywant) /* -k option, use signal
** supplied in reference data.
*/
sprintf(t, "%s", fpar(nf,flds,t1,keywant,1,0));
if (science && t[0] == 0) {
/* -S option and no -k, produce
** signal such as (Miller, 1984).
*/
sd = fpar(nf, flds, t2, 'D', 1, 0);
sprintf(t, "%s, %s", fpar(nf,flds,t1,'A',1,0),
sd);
}
else if (t[0] == 0) {
/* -l option, produce a signal such
** as Miller1984 or Mil84.
*/
sprintf(format,
nmlen>0 ? "%%.%ds%%s" : "%%s%%s",
nmlen);
/* format is %s%s for default labels */
/* or %.3s%s eg if wanted */
sd = fpar(nf, flds, t2, 'D', 1, 0);
if (dtlen > 0) {
char *sdb;
for (sdb = sd; *sd; sd++)
;
sd = sd - dtlen;
if (sd < sdb)
sd = sdb;
}
sprintf(t, format, fpar(nf,flds,t1,'A',1,0),
sd);
}
if (keywant) {
/* Check user supplied signal,
** if final character is '-',
** user wants disambiguating
** character as necessary.
*/
addon = 0;
for (sd = t; *sd; sd++)
;
if (*--sd == '-') {
addon = 1;
*sd = 0;
}
}
/* Add plain signal to record of issued
** signals. Append a disambiguating letter
** to this instance as necessary.
*/
if ((!keywant || addon) && !science) {
addch(t, keylet(t, nref));
}
else {
tokeytab (t,nref);
}
}
} /* end (labels) */
else {
/* Use numbers for signals */
if (sort)
/* Surround reference number by FLAG so
** that it can be found for renumbering
** after sort.
*/
sprintf(t, "%c%d%c", FLAG, nref, FLAG);
else
if (nref > 0) {
#ifdef CONDENSE
if ((++wcnt>wsize) &&
((wref=realloc(wref, (wsize+=50)*sizeof(int))) == NULL)
) {
fprintf(stderr, "Ref cond out of memory.");
exit(1);
}
wref[wcnt-1] = nref;
#endif
}
sprintf(t, "%d", nref);
}
another = prefix (".[", sd=lookat());
if (another && (strcmp(".[\n", sd) != SAME))
fprintf(stderr, "File %s line %d: punctuation ignored from: %s",
Ifile, Iline, sd);
strcat(sig, t);
#if EBUG
fprintf(stderr, "sig is now %s leng %d\n",sig,strlen(sig));
#endif
/* Arrange stline and endline so that they point to
** appropriate signal bracketing strings.
*/
trimnl(nstline);
trimnl(endline);
stline = stbuff;
if (prevsig == 0) {
strcpy (stline, nstline);
prevsig=1;
}
if (stline[2] || endline[2]) {
stline += 2;
endline += 2;
}
else {
stline = "\\*([.";
endline = "\\*(.]";
}
if (science) {
stline = " (";
endline = ")";
}
if (bare == 0) { /* We are putting signals in text. */
if (!another) {
/* No more citations for the moment.
** Prepare accumulated signals (do condensing
** and bracketing); put signals out.
*/
#ifdef CONDENSE
wref[wcnt] = 0;
if (!labels && !sort && wcnt > 1)
condense(wref,wcnt,sig);
wcnt = 0;
#endif
sprintf(t1, "%s%s\%s\n", stline, sig, endline);
append(t1);
flout();
sig[0] = 0;
prevsig = 0;
if (fo == fhide) {
int ch;
fclose(fhide);
fhide = fopen(hidenam, "r");
fo = ftemp;
while ((ch = getc(fhide)) != EOF)
putc(ch, fo);
fclose(fhide);
unlink(hidenam);
}
} /* end (!another) */
else {
/* Another citation follows immediately.
*/
strcat(sig, ",\\|");
if (fo == ftemp) { /* hide if need be */
sprintf(hidenam, "/tmp/rj%dc", getpid());
#if EBUG
fprintf(stderr, "hiding in %s\n", hidenam);
#endif
fhide = fopen(hidenam, "w");
if (fhide == NULL)
err("Can't get scratch file %s",
(void) hidenam);
fo = fhide;
}
} /* end (another) */
} /* end (bare == 0) -- putting signals in text */
if (bare < 2)
if (nf > 0)
if ( ! dupl )
fprintf(fo,".ds [F %s%c",t,sep);
if (bare > 0)
flout();
#if EBUG
fprintf(stderr, "sig is now %s\n",sig);
#endif
}
char *
fpar (nf, flds, out, c, seq, prepend)
char *flds[], *out;
{
char *p, *s;
int i, fnd = 0;
for(i = 0; i < nf; i++)
if (flds[i][1] == c && ++fnd >= seq) {
/* for titles use first word otherwise last */
if (c == 'T' || c == 'J') {
p = flds[i]+3;
if (prefix("A ", p))
p += 2;
if (prefix("An ", p))
p += 3;
if (prefix("The ", p))
p += 4;
mycpy2(out, p, 20);
return(out);
}
/* if its not 'L' then use just the last word */
s = p = flds[i]+2;
if (c != 'L') {
for(; *p; p++);
while (p > s && *p != ' ')
p--;
}
/* special wart for authors */
if (c == 'A' && (p[-1] == ',' || p[1] =='(')) {
p--;
while (p > s && *p != ' ')
p--;
mycpy(out, p+1);
}
else
strcpy(out, p+1);
if (c == 'A' && prepend)
initadd(out, flds[i]+2, p);
return(out);
}
return(0);
}
putkey(nf, flds, nref, keystr)
char *flds[], *keystr;
{
char t1[50], *sf;
int ctype, i, count;
fprintf(fo, ".\\\"");
if (nf <= 0)
fprintf(fo, "%s%c%c", labtab[nref], labc[nref], sep);
else {
while (ctype = *keystr++) {
count = atoi(keystr);
if (*keystr=='+')
count=999;
if (count <= 0)
count = 1;
for(i = 1; i <= count; i++) {
sf = fpar(nf, flds, t1, ctype, i, 1);
if (sf == 0)
break;
sf = artskp(sf);
fprintf(fo, "%s%c", sf, '-');
}
}
fprintf(fo, "%c%d%c%c", FLAG, nref, FLAG, sep);
}
}
tokeytab (t, nref)
char *t;
{
strcpy(labtab[nref]=lbp, t);
while (*lbp++)
;
}
keylet(t, nref)
char *t;
{
int i;
int x = -1;
for(i = 1; i < nref; i++) {
if (strcmp(labtab[i], t) == 0)
x = labc[i];
}
tokeytab (t, nref);
if (lbp-bflab > NFLAB)
err("bflab overflow (%d)", NFLAB);
if (nref > NLABC)
err("nref in labc overflow (%d)", NLABC);
#if EBUG
fprintf(stderr, "lbp up to %d of %d\n", lbp-bflab, NFLAB);
#endif
if (x == 0) /* The last reference to use this signal
** was put out plain; this reference
** needs disambiguating character 'a'.
*/
x = 'a'-1;
return(labc[nref] = x+1);
}
mycpy(s, t)
char *s, *t;
{
while (*t && *t != ',' && *t != ' ')
*s++ = *t++;
*s = 0;
}
mycpy2(s, t, n)
char *s, *t;
{
int c;
while (n-- && (c= *t++) > 0) {
if (c == ' ')
c = '-';
*s++ = c;
}
*s = 0;
}
initadd(to, from, stop)
char *to, *from, *stop;
{
int c, nalph = 1;
while (*to)
to++;
while (from < stop) {
c = *from++;
if (!isalpha(c)) {
if (nalph)
*to++ = '.';
nalph = 0;
continue;
}
if (nalph++ == 0)
*to++ = c;
}
*to = 0;
}
static char *articles[] = {
"the ", "an ", "a ", 0
};
char *
artskp(s) /* skips over initial "a ", "an ", "the " in s */
char *s;
{
char **p, *r1, *r2;
for (p = articles; *p; p++) {
r2 = s;
for (r1 = *p; ((*r1 ^ *r2) & ~040 ) == 0; r1++)
r2++;
if (*r1 == 0 && *r2 != 0)
return(r2);
}
return(s);
}
!Funky!Stuff!
More information about the Comp.sources.unix
mailing list