dejunk - automatic handling of junked news
Johan Finnved
jf at sal.UUCP
Fri Jan 25 14:47:13 AEST 1985
At our site we had a somewhat outdated active file so we got a
lot of articles in newsgroup junk.
I wrote a program to move articles from junk to their proper
newsgroups as if though the newsgroups were there when the articles
were received.
Ambition level:
- If the articles were received to *some* newsgroups
duplicates are avoided.
- If newsgroups are missing a question is asked *once* for
each missing newsgroup if you want to create it. If the
answer is 'no' you don't get repeated questions even
if there is a lot of articles for the unwanted newsgroup.
- Xref: header lines are fixed to reflect the new situation.
- If there is no problem the articles are unlinked from
the junk directory.
Somebody recognizes the problem ?
Perhaps someone has already made such a program ?
My program (as entered from scratch today so there may be some bugs left)
is posted to net.sources
Johan Finnved
jf at sal.UUCP
...!decvax!mcvax!enea!sal!jf
------- cut here to get dejunk.c ------
/*
* De-junker
* dejunk version 1.0 18-Jan-85 Johan Finnved
*
* Program to move articles from junk to their proper
* newsgroups as if though the newsgroups were there when the articles
* were received.
* Ambition level:
* - If the articles were received to *some* newsgroups
* duplicates are avoided.
* - If newsgroups are missing a question is asked *once* for
* each missing newsgroup if you want to create it. If the
* answer is 'no' you don't get repeated questions even
* if there is a lot of articles for the unwanted newsgroup.
* - Xref: header lines are fixed to reflect the new situation.
* - If there is no problem the articles are unlinked from
* the junk directory.
*
*
* The program is tested only on our site running V7 and news version 2.10.1
*
* On our site it is sufficient to have dejunk setuid news
* our kernel allows setuid(geteuid())
*
* Possible porting problems:
* The program assumes that the d_ino fields in the spool
* directories uniquely identify the articles.
* That is is an article appears in several places they
* are *hard-linked* to the same article.
* (This is a problem with eunice isn't it?)
*
* Program assumes index() and rindex()
*
* Program relies on relatively easy headers generated by inews
* (no contiuation lines etc...)
*
* Almost all data areas are static, you may want to
* have a smarter malloc sceme.
* */
#include <ctype.h>
#include <whoami.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/dir.h>
#include <sys/stat.h>
#define MAXNG 1000 /* Max number of newsgroups */
#define ARTNGMAX 40 /* Max number of newsgroups in one article */
#define HDRMAX 3000 /* Max size (in bytes) of header */
#define HLINEMAX 50 /* Max header lines */
char ACTIVE[]= "/usr/lib/news/active" ;
char SPOOLDIR[]= "/usr/spool/news" ;
char * tmpname ;
char *strcpy(), *strcat(), *index(), *rindex(), *mktemp() ;
#ifndef READDIR
#define N_D 3
struct dirsim {
FILE * D_fp ;
struct direct D_entry ;
} dirsimtab[N_D] ;
typedef struct dirsim DIR ;
DIR *
opendir(name)
char *name ;
{
register DIR * dp ;
for(dp = dirsimtab ; dp < &dirsimtab[N_D] ; dp++) {
if(dp->D_fp == NULL) {
dp->D_fp = fopen(name,"r") ;
if(dp->D_fp == NULL)
return(NULL) ;
return(dp) ;
}
}
fprintf(stderr,"diropen out of slots\n") ; abort() ;
}
closedir(dp)
register DIR *dp ;
{
fclose(dp->D_fp) ; dp->D_fp = NULL ;
}
struct direct *
readdir(dp)
register DIR *dp ;
{
while(fread(&dp->D_entry,sizeof(struct direct),1,dp->D_fp) == 1) {
if(dp->D_entry.d_ino == 0)
continue ;
return(&dp->D_entry) ;
}
return(NULL) ;
}
#endif
DIR * dirallo() ;
char * ngtodir() ;
struct act {
char *ac_name ; /* Name of newsgroup */
long ac_rnd ; /* Random address of seq */
long ac_seq ; /* Local sequence number or
* -1 is newsgroup shouldn't be
* created */
} acttab[MAXNG] ;
int ini_ng ; /* Initial number of newsgroups */
int tot_ng ; /* Total number of names in acttab */
int new_mod, new_uid, new_gid ; /* Protection info for news directory */
FILE * actfp ;
FILE * artfp ;
int nang ; /* Number of newsgroups in this article */
int nhlines ; /* Number of header lines */
long artxref[ARTNGMAX] ; /* Xref numbers found in article */
long newxref[ARTNGMAX] ; /* Xref numbers that are found by searching */
int artng[ARTNGMAX] ; /* Newsgroups in article (index in acttab) */
char *ng, *xref, *title ; /* Interesting header pointers */
char hdrbuf[HDRMAX] ; /* Buffer to store header */
char * hdrlines[HLINEMAX] ; /* Array of line starts */
char myname[] = sysname ;
int mynamez ;
namecmp(ap1,ap2)
struct act *ap1, *ap2 ;
{
return(strcmp(ap1->ac_name,ap2->ac_name)) ;
}
main()
{
register char *cp, *cp1, *cpe ;
register i, j ;
char line[100] ;
char junkname[100] ;
DIR * junkdp ;
DIR * chkdp ;
FILE * tfp ;
struct stat sbuf ;
struct direct * dirp ;
ino_t artino ;
int goodart, badart ;
register struct act * ap ;
setgid(getegid()) ; setuid(geteuid()) ; /* If the system allows it */
mynamez = strlen(myname) ;
if((actfp = fopen(ACTIVE,"r+w")) == NULL) {
fprintf(stderr,"Unable to open active file\n") ;
done(1) ;
}
if(fstat(fileno(actfp),&sbuf) < 0) {
fprintf(stderr,"Unable to fstat active file\n") ;
done(1) ;
}
if((cp1 = cp = (char *) malloc((int) sbuf.st_size)) == NULL) {
fprintf(stderr,"Unable to allocate in-core active file copy") ;
done(1) ;
}
cpe = cp + sbuf.st_size ;
if(fread(cp,1,cpe-cp,actfp) != cpe-cp) {
fprintf(stderr,"Unable to read active file\n") ;
done(1) ;
}
while(cp < cpe) {
acttab[ini_ng].ac_name = cp ;
while(*cp++ != ' ' && cp < cpe)
;
cp[-1] = '\0' ;
acttab[ini_ng].ac_rnd = cp - cp1 ; /* rnd adr of seq */
acttab[ini_ng].ac_seq = atol(cp) ;
if(cp[5] != '\n') {
fprintf(stderr,"Strange line in active ng=%s\n",
acttab[ini_ng].ac_name) ;
done(1) ;
}
cp += 6 ;
if(ini_ng++ >= MAXNG) {
fprintf(stderr,"Too many newsgroups\n") ;
done(1) ;
}
}
qsort(acttab,tot_ng=ini_ng,sizeof(struct act),namecmp) ;
sprintf(line,"%s/junk",SPOOLDIR) ;
if(stat(line,&sbuf) < 0) {
fprintf(stderr,"Unable to stat junk directory") ;
done(1) ;
}
new_mod = sbuf.st_mode & 0777 ;
new_uid = sbuf.st_uid ; new_gid = sbuf.st_gid ;
if((junkdp = opendir(line)) == NULL) {
fprintf(stderr,"Unable to open %s directory\n",line) ;
done(1) ;
}
while((dirp = readdir(junkdp)) != NULL) {
if(!islegal(dirp->d_name))
continue ;
artino = dirp->d_ino ;
sprintf(junkname,"%s/junk/%s",SPOOLDIR,dirp->d_name) ;
if(stat(junkname,&sbuf) < 0
|| (sbuf.st_mode & S_IFMT) != S_IFREG)
continue ;
if((artfp = fopen(junkname,"r")) == NULL)
continue ;
if(hread()== NULL) { /* Get the article */
fprintf(stderr,"%s garbled\n",junkname) ;
fclose(artfp) ;
continue ;
}
for(nang = 0 , cp = strcpy(line,ng); *cp ;) {
cp1 = cp ;
artxref[nang] = 0 ;
newxref[nang] = 0 ;
while(*cp && *cp !=',') cp++ ;
if (*cp == ',') *cp++ = '\0' ;
artng[nang++] = lookng(cp1,1) ;
}
if(xref != NULL && !strncmp(xref,myname,mynamez)) {
for(cp = strcpy(line,xref+mynamez+1) ; *cp ;) {
if((cp1 = index(cp,':')) == NULL)
break ;
*cp1++ = '\0' ;
if((j = lookng(cp,0)) >= 0)
for(i = 0 ; i < nang ; i++)
if(artng[i] == j) {
artxref[i] = atol(cp1) ;
break ;
}
for(cp=cp1 ; *cp && *cp++ != ' ' ; )
;
}
}
printf("%s: %s\n",dirp->d_name,title) ;
j = 0 ;
goodart = 0 ; badart = 0 ;
for(i = 0 ; i < nang ; i++) {
ap = &acttab[artng[i]] ;
if(++j > 2) {
j = 1 ;
printf("\n") ;
}
printf("\t%s:",ap->ac_name) ;
if(ap->ac_seq < 0l) { /* Inactive newsgroup */
printf("Skipped") ;
/* Note that badart
* is not incremented
* since we don't want this newsgroup */
continue ;
}
if((chkdp = dirallo(ngtodir(ap->ac_name))) == NULL){
printf("no directory") ;
badart++ ;
continue ;
}
while((dirp = readdir(chkdp)) != NULL) {
if(!islegal(dirp->d_name))
continue ;
if(dirp->d_ino == artino) {
printf("Ok(%s)",dirp->d_name) ;
newxref[i] = atol(dirp->d_name) ;
goodart++ ;
goto nextgrp ;
}
}
/* Install missing news */
if(install(ap,junkname) < 0) {
printf("Missing") ;
badart++ ;
} else {
newxref[i] = ap->ac_seq ;
printf("Installed(%ld)",ap->ac_seq) ;
goodart++ ;
}
nextgrp:
closedir(chkdp) ;
}
/* Check Xrefs */
for(i = 0 ; i < nang ; i++)
if(artxref[i] != ((goodart>1) ? newxref[i] : 0l))
break ;
if(i < nang) {
line[0] = '\0' ;
for(i = 0 ; i < nang ; i++)
if(newxref[i] > 0l) {
if(line[0] == '\0')
sprintf(line,
"Xref: %s",myname) ;
sprintf(line+strlen(line)," %s:%ld",
acttab[artng[i]].ac_name,
newxref[i]) ;
}
printf("\nModified->\t%s",line) ;
if(tmpname == NULL)
tmpname = mktemp("/tmp/dejunkXXXXXX") ;
if((tfp = fopen(tmpname,"w+r")) == NULL) {
fprintf(stderr,"Unable to make tmp copy") ;
done(1) ;
}
for(i = 0 ; i < nhlines ; i++)
if(strncmp(cp=hdrlines[i],"Xref:",5)!=0)
fprintf(tfp,"%s\n",cp) ;
if(line[0] != '\0')
fprintf(tfp,"%s\n",line) ;
putc('\n',tfp) ;
while(fgets(line,sizeof line,artfp))
fprintf(tfp,"%s",line) ;
fclose(artfp) ;
fflush(tfp) ;
if(ferror(tfp)) {
fprintf(stderr,"Error writing temp article") ;
done(1) ;
}
if((artfp = fopen(junkname,"w")) == NULL) {
fprintf(stderr,
"Unable to reopen article for write\n") ;
done(1) ;
}
rewind(tfp) ;
while(fgets(line,sizeof line,tfp))
fprintf(artfp,"%s",line) ;
fclose(tfp) ;
}
printf("\n") ;
fclose(artfp) ;
if(badart == 0) {
unlink(junkname) ;
}
}
done(0) ;
}
done(rt)
{
if(tmpname)
unlink(tmpname) ;
exit(rt) ;
}
install(ap,name)
register struct act *ap ;
char *name ;
{
register char *cp ;
long newseq ;
char destname[100] ;
char numbuf[10] ;
newseq = ap->ac_seq+1 ;
sprintf(destname,"%s/%ld",ngtodir(ap->ac_name),newseq) ;
fseek(actfp,ap->ac_rnd,0) ;
if(fgets(numbuf,sizeof numbuf,actfp) == 0
|| (cp = index(numbuf,'\n')) == NULL)
goto rdfault ;
*cp = '\0' ;
if(ftell(actfp) != ap->ac_rnd + 6
|| (!islegal(numbuf))
|| ap->ac_seq != atol(numbuf)) {
rdfault:
fprintf(stderr,"Something wrong checkreading active\n") ;
return(-1) ;
}
fseek(actfp,ap->ac_rnd,0) ;
fprintf(actfp,"%05ld",newseq) ;
fflush(actfp) ;
if(ferror(actfp)) {
fprintf(stderr,"Problem writing active file\n") ;
done(1) ;
}
if(link(name,destname) < 0) {
perror("making link") ;
return(-1) ;
}
ap->ac_seq = newseq ; /* Committed to new seq number */
return(0) ;
}
islegal(name)
char *name ;
{
register char *cp ;
for(cp = name ; *cp && cp < name+5 ; cp++)
if(!isascii(*cp) || !isdigit(*cp))
return(0) ;
return(*cp == '\0') ;
}
char sysline[100] ;
DIR * dirallo(name)
char * name ;
{
register char *cp ;
register i = 0 ;
DIR * dp ;
struct stat sbuf ;
for(;;) {
if((dp = opendir(name)) != NULL)
return(dp) ;
if(i) {
fprintf(stderr,"Unable to create %s\n",name) ;
done(1) ;
}
cp = rindex(name,'/') ;
*cp = '\0' ;
if((dp = dirallo(name)) != NULL)
closedir(dp) ;
*cp = '/' ;
sprintf(sysline,"mkdir %s",name) ;
i = system(sysline) ;
printf("'%s' returns %d\n",sysline,i) ;
chmod(name,new_mod) ;
chown(name,new_uid,new_gid) ;
/* Check that directory is correctly allocat */
if( stat(name,&sbuf) < 0
/* If uid is not correct - complain
* only if modes are different for owner
* and others */
|| (sbuf.st_uid != new_uid
&& ((new_mod & 0700) >> 6) != (new_mod & 07))
/* If gid is not correct - complain
* only if modes are different for group
* and others */
|| (sbuf.st_gid != new_gid
&& ((new_mod & 070) >> 3) != (new_mod & 07))
|| (sbuf.st_mode & 0777) != new_mod ) {
fprintf(stderr,"Directory allocation failed\n") ;
done(1) ;
}
i = 1 ; /* No more retries */
}
}
char *
ngtodir(ng)
register char *ng ;
{
static char line[100] ;
register char *cp ;
strcpy(line,SPOOLDIR) ;
cp = line + strlen(line) ;
*cp++ = '/' ;
while(*cp = *ng++)
if(*cp++ == '.')
cp[-1] = '/' ;
return(line) ;
}
lookng(cp,doalloc)
register char *cp ;
{
register k, l, r, i ;
register char *cp1 ;
DIR * dp ;
char line[100] ;
/* Binary search initial ng table */
l = 0 ; r = ini_ng -1 ;
while(l <= r) {
k = (l + r) >> 1 ;
i = strcmp(cp,acttab[k].ac_name) ;
if (i <= 0) r = k - 1 ;
if (i >= 0) l = k + 1 ;
}
if(l - r >= 2)
return(k) ; /* Found in binary search */
/* Linear search in additional ng table */
for(k = ini_ng ; k < tot_ng ; k++) {
if(!strcmp(cp,acttab[k].ac_name))
return(k) ;
}
/* Not found - possibly insert */
if(!doalloc)
return(-1) ;
if(k >= MAXNG) {
merr: fprintf(stderr,"Too many newsgroups\n") ;
done(1) ;
}
if((cp1 = (char *)malloc(strlen(cp)+1)) == NULL) {
goto merr ;
}
tot_ng = k+1 ;
acttab[k].ac_name = strcpy(cp1,cp) ;
acttab[k].ac_seq = (long) (-1) ;
fprintf(stderr,"Do you wish to add newsgroup %s [yn]",cp) ;
line[0] = 0 ;
gets(line) ;
switch(line[0]) {
case 'y':
case 'Y':
if(fseek(actfp,0l,2) < 0
|| fprintf(actfp,"%s 00000\n",cp) < 0
|| fflush(actfp) < 0
|| ferror(actfp)) {
fprintf(stderr,"Unable to append to active file\n") ;
break ;
}
acttab[k].ac_seq = 0 ;
acttab[k].ac_rnd = ftell(actfp) - 6l ;
if((dp = dirallo(ngtodir(cp))) != NULL) /* create dirs */
closedir(dp) ;
break ;
case 'n':
case 'N':
break ;
case 'q':
case 'Q':
done(1) ;
break ;
}
return(k) ;
}
/* Get article header (We know inews puts one header on each line */
hread()
{
register char *cp, *linep ;
register i ;
ng = NULL ; xref = NULL ; title = NULL ;
linep = hdrbuf ;
nhlines = 0 ;
for(;;) {
if(fgets(linep, &hdrbuf[HDRMAX] - linep, artfp)==NULL)
return(NULL) ;
cp = linep + strlen(linep) ;
if(cp[-1] != '\n')
return(NULL) ; /* Too big header */
cp[-1] = '\0' ; /* Clobber newline */
if(cp == linep+1)
break ; /* Empty line - end of header */
hdrlines[nhlines++] = linep ;
if(ng == NULL && strncmp(linep,"Newsgroups: ",12)==0)
ng = linep+12 ;
if(xref == NULL && strncmp(linep,"Xref: ",6) == 0)
xref = linep+6 ;
if(title == NULL && strncmp(linep,"Subject: ",9) == 0)
title = linep+9 ;
linep = cp ;
}
return(nhlines) ;
}
More information about the Comp.sources.unix
mailing list