v02i097: sources for refer => BiBTeX conversion

Sat Apr 16 01:22:11 AEST 1988

comp.sources.misc: Volume 2, Issue 97
Submitted-By: "David Kotz" <dfk at duke.cs.duke.edu>
Archive-Name: refer2tex

#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	refer2tex
#	bibfix.sed
#	r2bib.c
# This archive created: Fri Apr 15 09:17:47 1988
export PATH; PATH=/bin:$PATH
if test -f 'refer2tex'
then
	echo shar: will not over-write existing file "'refer2tex'"
else
cat << \SHAR_EOF > 'refer2tex'
#!/bin/csh -f
#
# refer2tex: translate a refer bibliography to a bibtex one.
#  Sort of. It may need some manual help but this is an approximation.
#
# David Kotz
#
# usage: refer2tex bibfile
# produces bibfile.bib, in BibTex format
#

onintr cleanup
set tmp=/tmp/$1.fix$$

if ($#argv < 1) then
    echo usage: refer2tex bibfile
    exit
endif

sed -f /usr/local/new/bib/bibfix.sed $1 > $tmp
sortbib -sA+D $tmp | r2bib > $1.bib

cleanup:
rm -f $tmp
SHAR_EOF
chmod +x 'refer2tex'
fi # end of overwriting check
if test -f 'bibfix.sed'
then
	echo shar: will not over-write existing file "'bibfix.sed'"
else
cat << \SHAR_EOF > 'bibfix.sed'
# Fix any cutoff lines
s/\(..*\)%A/\1 (CUT OFF BY LOOKBIB)\
\
%A/

# Change some [nt]roff garbage into latex equivalent
s/\\(co/\\copyright/g
s/\\fI/{\\em /g
s/\\fB/{\\bf /g
s/\\fR/}/g
s/\\(em/---/g
s/\\\*-/---/g

# these must be protected against happening twice
# this changes & to \&
s/\([^\\]\)&/\1\\\&/g
# this changes $ to \$
s/\([^\\]\)\$/\1\\\$/g
# this changes - to -- in pages lines
/pages/s/\([^-]\)-\([^-]\)/\1--\2/g
SHAR_EOF
fi # end of overwriting check
if test -f 'r2bib.c'
then
	echo shar: will not over-write existing file "'r2bib.c'"
else
cat << \SHAR_EOF > 'r2bib.c'
/* r2bib - convert refer input files to bibtex .bib files
   Author - Rusty Wright, Center for Music Experiment, UCSD
   Modified by - Rod Oldehoeft, LLNL & Colorado State University:
From: Rod Oldehoeft <rro at lll-crg.ARPA>
      1.  Accept a lower-case refer letter code as well as upper case
      2.  Map "%X" refer entry to "note=" bibtex entry
      3.  A "%B" entry results in "@inbook" result
      4.  Use {} instead of "" to bracket output fields
	 5.  Map "%M" to "month=" bibtex entry
	 6.  Map "%Y" to "year=" bibtex entry
      7.  Try to make bibtex entry key from author initials and year
   Modified by David Kotz, Duke University Computer Science (dfk at cs.duke.edu):
   1. Fixed a bug (indirect through NULL) found when run on Suns.
   2. Make the keyword generator smarter about dates and repeated entries.
      To use this effectively do a 'sortbib -sA+D' on the file before sending
	 it through here.
   3. map %K to keywords and %X to abstract instead of note. %O maps to note.
*/
# include <ctype.h>
# include <stdio.h>

struct rb {
	char	rb_kl;		/* refer key letter		*/
	char *	rb_kw;		/* bibtex string		*/
	char	rb_emit;	/* don't print data if 0	*/
	char *	rb_data;	/* refer data			*/
};

struct rb rb[] = {
	{ 'A',	"author",		1,	NULL	},
	{ 'B',	"booktitle",	1,	NULL	},
	{ 'C',	"address",	1,	NULL	},
	{ 'D',	"year",		1,	NULL	},
	{ 'E',	"editor",		1,	NULL	},
/*	{ 'H',	"commentary1",	1,	NULL	},*/
	{ 'I',	"publisher",	1,	NULL	},
	{ 'J',	"journal",	1,	NULL	},
	{ 'K',	"keywords",	1,	NULL	},
	{ 'L',	"label",		0,	NULL	},	/* use as bibtex key */
     { 'M',	"month",		1,	NULL },
	{ 'N',	"number",		1,	NULL	},
	{ 'O',	"note",		1,	NULL	},
	{ 'P',	"pages",		1,	NULL	},
	{ 'Q',	"institution",	1,	NULL	},
	{ 'R',	"report",		0,	NULL	},
	{ 'S',	"series",		1,	NULL	},
	{ 'T',	"title",		1,	NULL	},
	{ 'V',	"volume",		1,	NULL	},
	{ 'X',	"abstract",	1,	NULL	},
	{ 'Y',	"year",		1,	NULL },
	{ 0,	0,		0,	0	}
};

struct bmap {
	char	bm_kl;
	char	*bm_entry;
};

/*
 * entries are in order of precedence.
 * any entry with a 'J' field must be
 * an article, but anthing with an 'I'
 * field doesn't have to be a book (if
 * an entry has both 'J' and 'I' it is
 * considered to be an article).
 */
struct bmap	bmap[] = {
	{ 'J',	"article"	},
	{ 'R',	"techreport"	},
	{ 'I',	"book"	},
	{ 'B',	"inbook"	},
	{ 0,	0		}
};

main(argc, argv)
	char		**argv;
{
	register FILE	*fid;
	register int	i;
	int		err;

	err = 0;

	if (argc > 1) {
		for (i = 1; i < argc; i++) {
			if ((fid = fopen(argv[i], "r")) == NULL) {
				fprintf(stderr, "fopen: ");
				perror(argv[i]);
				continue;
			}
			err += r2bib(argv[i], fid);
		}
	}
	else
		err += r2bib("stdin", stdin);

	if (err)
		exit(1);

	exit(0);
}

r2bib(file, fid)
	char		*file;
	FILE		*fid;
{
	extern char	*sanz();
	register char	*cp;
	struct rb	*lrb;		/* last rb stored into */
	int		line;
	char		buf[BUFSIZ];
	int		err;

	lrb = NULL;
	err = 0;
	line = 0;

	while (fgets(buf, sizeof(buf), fid) != NULL) {
		line++;

		if ((cp = sanz(buf)) == NULL) {
			if (lrb != NULL) {
				dumprb();
				lrb = NULL;
			}
			continue;
		}

		/*
		 * if the first letter is a % then it's the
		 * a new record, otherwise it's a continuation
		 * of the previous one.
		 */
		if (cp[0] == '%') {
			for (lrb = &rb[0]; lrb->rb_kl != 0; lrb++) {
				if(lrb->rb_kl == (islower(cp[1]) ? toupper(cp[1]) : cp[1])){
					stuffrb(lrb, &cp[2]);
					break;
				}
			}
			if (lrb->rb_kl == 0) {
				fprintf(stderr, "r2b: %s: line %d: unknown key letter %c, ignoring\n", file, line, cp[1]);
				err = 1;
			}
		}
		else {
			if (lrb == NULL) {
				fprintf(stderr, "r2b: %s: line %d: bad format, ignoring\n", file, line);
				err = 1;
				continue;
			}

			stuffrb(lrb, &cp[0]);
		}
	}

	if (lrb != NULL)
		dumprb();

	return(err);
}

#define KEYSIZ 100			/* hopefully long enough */

dumprb() {
	register struct rb	*trb;
	register struct bmap	*bm;
	static int		key;
	char			*bibkey;
	char			*cp;
	int			first;
	static char lastkey[KEYSIZ];	/* the previous key we output */
	char thiskey[KEYSIZ];	/* key we are now building */
	static int repeat = 0;

	/*
	 * first, figure out what type of entry this
	 * is.
	 */
	for (bm = &bmap[0]; bm->bm_kl != 0; bm++) {
		for (trb = &rb[0]; trb->rb_kl != 0; trb++) {
			if ((trb->rb_kl == bm->bm_kl) && (trb->rb_data != NULL)) {
				printf("@%s{", bm->bm_entry);
				goto out;
			}
		}
	}
out:
	if (bm->bm_kl == 0)
		printf("@misc{");

	/*
	 * in order of precedence; how to determine the
	 * bibtex key:
	 *	1. use capital letters from %A, followed if possible
      *      by the two chars after "19" in %D or %Y field.
	 *	2. otherwise just use the string "keyN" where N
	 *	   is the count of this bibliographic entry in
	 *	   the refer file.
	 */

	key++;
	bibkey = thiskey;

	for (trb = &rb[0]; trb->rb_kl != 0; trb++) {
		if( trb->rb_kl == 'A'){
			if( trb->rb_data == NULL ) {
				sprintf(thiskey, "key%d,\n",key); 
				printf("key%d",key);
				break;
			}else{
				for( cp = trb->rb_data; *cp != NULL; cp++ ) {
					if( isupper(*cp)) {
					    printf("%c", *cp);
					    *bibkey++ = *cp;
					}
				};
				*bibkey = '\0';
			};
		}else{ if((trb->rb_kl == 'D') || (trb->rb_kl == 'Y')) {
				for( cp = trb->rb_data; cp != NULL && *cp != NULL; cp++ ) {
					if(isdigit(cp[0]) && isdigit(cp[1]) &&
					   isdigit(cp[2]) && isdigit(cp[3])) {
					     *bibkey++ = cp[2];
					     *bibkey++ = cp[3];
						printf("%c%c", cp[2], cp[3]);
						break;
					};
				};
				*bibkey = '\0';
				break;
			  };
		};
	};

	if (strcmp(thiskey, lastkey) == 0) {
	    /* key was the same as previous; add a letter */
	    printf("%c", 'a' + repeat++);
	} else {
	    /* key differed from previous, but remember it */
	    strcpy(lastkey, thiskey);
	    repeat = 0;
	}

	printf(",\n");

	first = 1;

	for (trb = &rb[0]; trb->rb_kl != 0; trb++) {
		if (trb->rb_data == NULL)
			continue;

		if (trb->rb_emit != 0) {
			/*
			 * clank,
			 * this is so that things will line up.
			 */
			if (strlen(trb->rb_kw) < 6)
				cp = "\t\t";
			else
				cp = "\t";

			if (! first)
				printf(",\n");

			printf("\t%s =%s{%s}", trb->rb_kw, cp, trb->rb_data);
			first = 0;
		}

		(void) free(trb->rb_data);
		trb->rb_data = NULL;
	}

	printf("\n}\n\n");
}

stuffrb(lrb, cp)
	struct rb	*lrb;
	char		*cp;
{
	extern char	*andfix();
	extern char	*malloc();
	extern char	*realloc();

	/* empty data field */
	if ((cp = sanz(cp)) == NULL)
		return;

	if (lrb->rb_kl == 'A')
		cp = andfix(cp);

	if (lrb->rb_data == NULL) {
		if ((lrb->rb_data = malloc(strlen(cp) + 1)) == NULL) {
			perror("malloc");
			exit(1);
		}

		strcpy(lrb->rb_data, cp);
	}
	else {
		char	*conj;

		if (lrb->rb_kl == 'A')
			conj = " and ";
		else
			conj = " ";

		if ((lrb->rb_data = realloc(lrb->rb_data, strlen(lrb->rb_data) + strlen(cp) + strlen(conj) + 1)) == NULL) {
			perror("realloc");
			exit(1);
		}

		strcat(lrb->rb_data, conj);
		strcat(lrb->rb_data, cp);
	}
}

/*
 */
char *
andfix(string)
	register char	*string;
{
	register char	*tmp;
	register char	*cp;

	tmp = string;

	for (cp = string; *cp != NULL; cp++) {
		if (strncmp(cp, " and ", 5) == 0) {
			/*
			 * +2 for the curly braces around "{and}",
			 * +1 for the null at the end.
			 */
			if ((tmp = malloc(strlen(string) + 2 + 1)) == NULL) {
				perror("malloc");
				exit(1);
			}

			strncpy(tmp, string, cp - string);
			tmp[cp - string] = NULL; /* strncpy doesn't */
			strcat(tmp, " {and} ");
			strcat(tmp, cp + 5);
		}
	}

	return(tmp);
}

char *
sanz(bp)
	char		*bp;
{
	register char	*cp;

	cp = &bp[strlen(bp) - 1];

	/*
	 * back up over any spaces chars
	 */
	while (isspace(*cp) && (cp >= bp))
		cp--;

	if (cp < bp)
		return(NULL);	/* empty line */

	*++cp = NULL;

	while (isspace(*bp) && (bp < cp))
		bp++;

	if (cp == bp)
		return(NULL);	/* empty line */

	return(bp);
}
SHAR_EOF
fi # end of overwriting check
#	End of shell archive
exit 0
-- 
Department of Computer Science, Duke University, Durham, NC 27706
ARPA:	dfk at cs.duke.edu
CSNET:	dfk at duke        
UUCP:	{ihnp4!decvax}!duke!dfk