Some improvements to "file"
Guy Harris
guy at sun.uucp
Tue Sep 3 18:17:22 AEST 1985
1) The SCCS ID contains an extraneous <ESC>, if anybody cares...
2) A new type "byte" has been added to the magic number types in /etc/magic
(the scaffolding was already there), and the numeric types now support an
optional masking operator. If a mask is specified, the value is ANDed with
the specified mask before it is compared with any test values and before it
is printed.
3) The test values for the "string" type can now contain C string escape
sequences, such as "\n" for newline.
4) The return value of "malloc" is now tested, and the internal table of
/etc/magic file entries is now "realloc"ed if it fills up; printing a
message saying to change some magic number in the source and recompile is
rather dumb, considering most S5 users outside AT&T *don't* have the source
to do that with...
5) The error messages have been cleaned up, and error messages due to
"open"s or "stat"s failing now include the error message corresponding to
the value of "errno".
6) When it tries to reset the time of last access, it now uses "ustat" the
way the manual says you're supposed to. Thus, it won't fail on systems like
4.2BSD, where "st_atime" and "st_mtime" aren't contiguous. (Note that the
S3/S5 manual go out of their way *NOT* to tell you that they're contiguous;
somebody made an effort to prevent the very sort of crap that "file",
"cpio", and "pack" do.)
7) Several bits of code, such as the code that parses /etc/magic and the
code that prints the table built from /etc/magic, have been cleaned up.
Note that the bug report (in the referenced articles) about "unsigned short"
vs. "short" may be fixed in the original S5R2 version (they may have seen
the bug in S5R1) - some of the suggested changes seem to have been made).
The version here corrects the other two complaints about no "byte" type and
halfassed checking of the type field in general.
Note that the 4.2BSD version has a number of other useful tests (like tests
against "ditroff" output) which should be tossed into the S5 version.
The original reason I put in the stuff under 2) and 3) was to make it
possible to test for a "compress"ed file (see mod.sources for "compress"
version 4 - it blows "pack" and "compact" away), and to test for block
compression and to dump the number of bits required for decompression,
without having to hack the source. Here's the entry for "compressed" files:
0 string \037\235 compressed data
>2 byte&0x80 >0 block compressed
>2 byte&0x1f x %d bits
What it means is "test the two bytes starting at location 0 in the file. If
they are equal to \037 and \235 respectively, print 'compressed data'. Then
test whether the byte at location 2, when masked with 0x80, is non-zero. If
so, print 'block compressed'. Then print that byte, masked with 0x1f, in
the form '<N> bits'."
The gory details of how to construct a "magic" file are given (sketchily,
and *sans* these new features) in "/etc/magic". This is the wrong place to
give them. The correct place to give them is MAGIC(4), which I have whipped
up and posted in another article.
Here are the changes, in "diff -c" form:
*** /arch/s5r2/usr/src/cmd/file/file.c Mon Oct 31 06:46:34 1983
--- ./file.c Tue Sep 3 00:57:47 1985
***************
*** 2,8
#include <stdio.h>
#include <ctype.h>
- #include <signal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
--- 2,7 -----
#include <stdio.h>
#include <ctype.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
***************
*** 13,21
*/
#define BYTE 0
! #define SHORT 2
! #define LONG 4
! #define STR 8
/*
** Opcodes
--- 12,20 -----
*/
#define BYTE 0
! #define SHORT 1
! #define LONG 2
! #define STR 3
#define NTYPES 4
***************
*** 17,22
#define LONG 4
#define STR 8
/*
** Opcodes
*/
--- 16,30 -----
#define LONG 2
#define STR 3
+ #define NTYPES 4
+
+ char *typenames[NTYPES] = {
+ "byte",
+ "short",
+ "long",
+ "string"
+ };
+
/*
** Opcodes
*/
***************
*** 28,33
#define ANY 4
#define SUB 64 /* or'ed in */
/*
** Misc
*/
--- 36,51 -----
#define ANY 4
#define SUB 64 /* or'ed in */
+ #define NOPS 5
+
+ char opnames[NOPS] = {
+ '=',
+ '>',
+ '<',
+ '=',
+ 'x'
+ };
+
/*
** Misc
*/
***************
*** 51,56
char e_level; /* 0 or 1 */
long e_off; /* in bytes */
char e_type;
char e_opcode;
union {
long num;
--- 69,75 -----
char e_level; /* 0 or 1 */
long e_off; /* in bytes */
char e_type;
+ long e_mask; /* if non-zero, mask value with this */
char e_opcode;
union {
long num;
***************
*** 61,67
typedef struct entry Entry;
! Entry *mtab;
char fbuf[FBSZ];
char *mfile = "/etc/magic";
char *fort[] = {
--- 80,89 -----
typedef struct entry Entry;
! Entry *mtab, *mend;
! int errno;
! int sys_nerr;
! char *sys_errlist[];
char fbuf[FBSZ];
char *mfile = "/etc/magic";
char *fort[] = {
***************
*** 76,81
char *strchr();
char *malloc();
long atolo();
int i = 0;
int fbsz;
int ifd;
--- 98,105 -----
char *strchr();
char *malloc();
long atolo();
+ char *getstr();
+ void showstr();
int i = 0;
int fbsz;
int ifd = -1;
***************
*** 78,84
long atolo();
int i = 0;
int fbsz;
! int ifd;
#define prf(x) printf("%s:%s", x, strlen(x)>6 ? "\t" : "\t\t");
--- 102,108 -----
void showstr();
int i = 0;
int fbsz;
! int ifd = -1;
#define prf(x) printf("%s:%s", x, strlen(x)>6 ? "\t" : "\t\t");
***************
*** 102,108
case 'f':
fflg++;
if ((fl = fopen(optarg, "r")) == NULL) {
! fprintf(stderr, "cannot open %s\n", optarg);
goto use;
}
break;
--- 126,133 -----
case 'f':
fflg++;
if ((fl = fopen(optarg, "r")) == NULL) {
! fprintf(stderr, "file: %s: %s\n", optarg,
! errno < sys_nerr? sys_errlist[errno]: "Can't open");
goto use;
}
break;
***************
*** 125,134
reg Entry *ep;
mkmtab(1);
! printf("level off type opcode value string\n");
! for(ep = mtab; ep->e_off != -1L; ep++) {
! printf("%d\t%d\t%d\t%d\t", ep->e_level, ep->e_off,
! ep->e_type, ep->e_opcode);
if(ep->e_type == STR)
printf("%s\t", ep->e_value.str);
else
--- 150,162 -----
reg Entry *ep;
mkmtab(1);
! printf("level\toff\ttype\topcode\tvalue\tstring\n");
! for(ep = mtab; ep < mend; ep++) {
! printf("%d\t%d\t%s", ep->e_level, ep->e_off,
! typenames[ep->e_type]);
! if(ep->e_mask != 0L)
! printf("&%#lo", ep->e_mask);
! printf("\t%c\t", opnames[ep->e_opcode & ~SUB]);
if(ep->e_type == STR)
showstr(ep->e_value.str);
else
***************
*** 130,136
printf("%d\t%d\t%d\t%d\t", ep->e_level, ep->e_off,
ep->e_type, ep->e_opcode);
if(ep->e_type == STR)
! printf("%s\t", ep->e_value.str);
else
printf("%lo\t", ep->e_value.num);
printf("%s", ep->e_str);
--- 158,164 -----
printf("&%#lo", ep->e_mask);
printf("\t%c\t", opnames[ep->e_opcode & ~SUB]);
if(ep->e_type == STR)
! showstr(ep->e_value.str);
else
printf("%lo\t", ep->e_value.num);
printf("%s", ep->e_str);
***************
*** 156,162
p = argv[optind];
prf(p);
type(p);
! if(ifd)
close(ifd);
}
exit(0);
--- 184,190 -----
p = argv[optind];
prf(p);
type(p);
! if(ifd >= 0)
close(ifd);
}
exit(0);
***************
*** 168,173
int j,nl;
char ch;
struct stat mbuf;
ifd = -1;
if(stat(file, &mbuf) < 0) {
--- 196,205 -----
int j,nl;
char ch;
struct stat mbuf;
+ struct utimbuf {
+ time_t actime;
+ time_t modtime;
+ } utb;
ifd = -1;
if(stat(file, &mbuf) < 0) {
***************
*** 171,177
ifd = -1;
if(stat(file, &mbuf) < 0) {
! printf("cannot open\n");
return;
}
switch (mbuf.st_mode & S_IFMT) {
--- 203,210 -----
ifd = -1;
if(stat(file, &mbuf) < 0) {
! printf("%s\n",
! (unsigned)errno < sys_nerr? sys_errlist[errno]: "Cannot stat");
return;
}
switch (mbuf.st_mode & S_IFMT) {
***************
*** 197,203
}
ifd = open(file, 0);
if(ifd < 0) {
! printf("cannot open for reading\n");
return;
}
fbsz = read(ifd, fbuf, FBSZ);
--- 230,237 -----
}
ifd = open(file, 0);
if(ifd < 0) {
! printf("%s\n",
! (unsigned)errno < sys_nerr? sys_errlist[errno]: "Cannot read");
return;
}
fbsz = read(ifd, fbuf, FBSZ);
***************
*** 206,212
goto out;
}
if(sccs()) {
! printf("sccs \n");
goto out;
}
if(ckmtab())
--- 240,246 -----
goto out;
}
if(sccs()) {
! printf("sccs\n");
goto out;
}
if(ckmtab())
***************
*** 318,324
for(i=0; i < fbsz; i++)
if(fbuf[i]&0200) {
if (fbuf[0]=='\100' && fbuf[1]=='\357') {
! printf("troff output\n");
goto out;
}
printf("data\n");
--- 352,358 -----
for(i=0; i < fbsz; i++)
if(fbuf[i]&0200) {
if (fbuf[0]=='\100' && fbuf[1]=='\357') {
! printf("otroff output\n");
goto out;
}
printf("data\n");
***************
*** 338,344
}
printf("\n");
out:
! utime(file, &mbuf.st_atime);
}
mkmtab(cflg)
--- 372,380 -----
}
printf("\n");
out:
! utb.actime = mbuf.st_atime;
! utb.modtime = mbuf.st_mtime;
! (void)utime(file, &utb);
}
mkmtab(cflg)
***************
*** 347,352
reg Entry *ep;
reg FILE *fp;
reg int lcnt = 0;
auto char buf[BSZ];
auto Entry *mend;
--- 383,389 -----
reg Entry *ep;
reg FILE *fp;
reg int lcnt = 0;
+ reg int i;
auto char buf[BSZ];
auto int curentry;
auto int nentries;
***************
*** 348,354
reg FILE *fp;
reg int lcnt = 0;
auto char buf[BSZ];
! auto Entry *mend;
ep = (Entry *) calloc(sizeof(Entry), NENT);
if(ep == NULL) {
--- 385,392 -----
reg int lcnt = 0;
reg int i;
auto char buf[BSZ];
! auto int curentry;
! auto int nentries;
mtab = (Entry *) malloc(sizeof(Entry)*NENT);
if(mtab == NULL) {
***************
*** 350,358
auto char buf[BSZ];
auto Entry *mend;
! ep = (Entry *) calloc(sizeof(Entry), NENT);
! if(ep == NULL) {
! fprintf(stderr, "no memory for magic table\n");
exit(2);
}
mtab = ep;
--- 388,396 -----
auto int curentry;
auto int nentries;
! mtab = (Entry *) malloc(sizeof(Entry)*NENT);
! if(mtab == NULL) {
! fprintf(stderr, "file: no memory for magic table\n");
exit(2);
}
curentry = 0;
***************
*** 355,362
fprintf(stderr, "no memory for magic table\n");
exit(2);
}
! mtab = ep;
! mend = &mtab[NENT];
fp = fopen(mfile, "r");
if(fp == NULL) {
fprintf(stderr, "cannot open magic file <%s>.\n", mfile);
--- 393,400 -----
fprintf(stderr, "file: no memory for magic table\n");
exit(2);
}
! curentry = 0;
! nentries = NENT;
fp = fopen(mfile, "r");
if(fp == NULL) {
fprintf(stderr, "file: cannot open magic file <%s>: %s\n",
***************
*** 359,365
mend = &mtab[NENT];
fp = fopen(mfile, "r");
if(fp == NULL) {
! fprintf(stderr, "cannot open magic file <%s>.\n", mfile);
exit(2);
}
while(fgets(buf, BSZ, fp) != NULL) {
--- 397,404 -----
nentries = NENT;
fp = fopen(mfile, "r");
if(fp == NULL) {
! fprintf(stderr, "file: cannot open magic file <%s>: %s\n",
! mfile, errno < sys_nerr? sys_errlist[errno]: "Can't open");
exit(2);
}
while(fgets(buf, BSZ, fp) != NULL) {
***************
*** 365,370
while(fgets(buf, BSZ, fp) != NULL) {
reg char *p = buf;
reg char *p2;
reg char opc;
if(*p == '\n' || *p == '#')
--- 404,410 -----
while(fgets(buf, BSZ, fp) != NULL) {
reg char *p = buf;
reg char *p2;
+ reg char *p3;
reg char opc;
ep = &mtab[curentry];
***************
*** 367,372
reg char *p2;
reg char opc;
if(*p == '\n' || *p == '#')
continue;
lcnt++;
--- 407,413 -----
reg char *p3;
reg char opc;
+ ep = &mtab[curentry];
if(*p == '\n' || *p == '#')
continue;
lcnt++;
***************
*** 397,409
continue;
}
*p2++ = NULL;
! if(*p == 's') {
! if(*(p+1) == 'h')
! ep->e_type = SHORT;
! else
! ep->e_type = STR;
! } else if (*p == 'l')
! ep->e_type = LONG;
while(*p2 == '\t')
*p2++;
/* OP-VALUE */
--- 438,458 -----
continue;
}
*p2++ = NULL;
! p3 = strchr(p, '&');
! if(p3 != NULL) {
! *p3++ = '\0';
! ep->e_mask = atolo(p3);
! } else
! ep->e_mask = 0L;
! for (i = 0; i < NTYPES; i++) {
! if (strcmp(p, typenames[i]) == 0)
! goto foundtype;
! }
! if(cflg)
! fprintf(stderr, "file: illegal type %s\n", p);
! continue;
! foundtype:
! ep->e_type = i;
while(*p2 == '\t')
*p2++;
/* OP-VALUE */
***************
*** 411,417
p2 = strchr(p, '\t');
if(p2 == NULL) {
if(cflg)
! fprintf(stderr, "fmt error, no tab after %son line %d\n", p, lcnt);
continue;
}
*p2++ = NULL;
--- 460,466 -----
p2 = strchr(p, '\t');
if(p2 == NULL) {
if(cflg)
! fprintf(stderr, "file: fmt error, no tab after %son line %d\n", p, lcnt);
continue;
}
*p2++ = '\0';
***************
*** 414,420
fprintf(stderr, "fmt error, no tab after %son line %d\n", p, lcnt);
continue;
}
! *p2++ = NULL;
if(ep->e_type != STR) {
opc = *p++;
switch(opc) {
--- 463,469 -----
fprintf(stderr, "file: fmt error, no tab after %son line %d\n", p, lcnt);
continue;
}
! *p2++ = '\0';
if(ep->e_type != STR) {
opc = *p++;
for (i = 0; i < NOPS; i++) {
***************
*** 417,441
*p2++ = NULL;
if(ep->e_type != STR) {
opc = *p++;
! switch(opc) {
! case '=':
! ep->e_opcode = EQ;
! break;
!
! case '>':
! ep->e_opcode = GT;
! break;
!
! case '<':
! ep->e_opcode = LT;
! break;
!
! case 'x':
! ep->e_opcode = ANY;
! break;
!
! default:
! p--;
}
}
if(ep->e_opcode != ANY) {
--- 466,474 -----
*p2++ = '\0';
if(ep->e_type != STR) {
opc = *p++;
! for (i = 0; i < NOPS; i++) {
! if (opc == opnames[i])
! goto foundop;
}
p--;
goto notfound;
***************
*** 437,442
default:
p--;
}
}
if(ep->e_opcode != ANY) {
if(ep->e_type != STR)
--- 470,479 -----
if (opc == opnames[i])
goto foundop;
}
+ p--;
+ goto notfound;
+ foundop:
+ ep->e_opcode = i;
}
notfound:
if(ep->e_opcode != ANY) {
***************
*** 438,443
p--;
}
}
if(ep->e_opcode != ANY) {
if(ep->e_type != STR)
ep->e_value.num = atolo(p);
--- 475,481 -----
foundop:
ep->e_opcode = i;
}
+ notfound:
if(ep->e_opcode != ANY) {
if(ep->e_type != STR)
ep->e_value.num = atolo(p);
***************
*** 441,450
if(ep->e_opcode != ANY) {
if(ep->e_type != STR)
ep->e_value.num = atolo(p);
! else {
! ep->e_value.str = malloc(strlen(p) + 1);
! strcpy(ep->e_value.str, p);
! }
}
while(*p2 == '\t')
*p2++;
--- 479,486 -----
if(ep->e_opcode != ANY) {
if(ep->e_type != STR)
ep->e_value.num = atolo(p);
! else
! ep->e_value.str = getstr(p);
}
while(*p2 == '\t')
*p2++;
***************
*** 450,455
*p2++;
/* STRING */
ep->e_str = malloc(strlen(p2) + 1);
p = ep->e_str;
while(*p2 != '\n') {
if(*p2 == '%')
--- 486,495 -----
*p2++;
/* STRING */
ep->e_str = malloc(strlen(p2) + 1);
+ if(ep->e_str == NULL) {
+ fprintf(stderr, "file: no memory for magic table\n");
+ exit(2);
+ }
p = ep->e_str;
while(*p2 != '\n') {
if(*p2 == '%')
***************
*** 457,466
*p++ = *p2++;
}
*p = NULL;
! ep++;
! if(ep >= mend) {
! fprintf(stderr, "file: magic tab overflow - increase NENT in file.c.\n");
! exit(2);
}
}
ep->e_off = -1L;
--- 497,510 -----
*p++ = *p2++;
}
*p = NULL;
! curentry++;
! if(curentry >= nentries) {
! mtab = (Entry *) realloc(mtab, sizeof(Entry)*NENT);
! if(mtab == NULL) {
! fprintf(stderr, "file: no memory for magic table\n");
! exit(2);
! }
! nentries += NENT;
}
}
mend = &mtab[curentry];
***************
*** 463,469
exit(2);
}
}
! ep->e_off = -1L;
}
long
--- 507,513 -----
nentries += NENT;
}
}
! mend = &mtab[curentry];
}
long
***************
*** 486,491
return(j);
}
ckmtab()
{
--- 530,661 -----
return(j);
}
+ char *
+ getstr(s)
+ reg char *s;
+ {
+ auto char *store;
+ reg char *p;
+ reg char c;
+ reg int val;
+
+ if((store = malloc(strlen(s) + 1)) == NULL) {
+ fprintf(stderr, "file: no memory for magic table\n");
+ exit(2);
+ }
+ p = store;
+ while((c = *s++) != '\0') {
+ if(c == '\\') {
+ switch(c = *s++) {
+
+ case '\0':
+ goto out;
+
+ default:
+ *p++ = c;
+ break;
+
+ case 'n':
+ *p++ = '\n';
+ break;
+
+ case 'r':
+ *p++ = '\r';
+ break;
+
+ case 'b':
+ *p++ = '\b';
+ break;
+
+ case 't':
+ *p++ = '\t';
+ break;
+
+ case 'f':
+ *p++ = '\f';
+ break;
+
+ case 'v':
+ *p++ = '\v';
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ val = c - '0';
+ c = *s++; /* try for 2 */
+ if(c >= '0' && c <= '7') {
+ val = (val<<3) | (c - '0');
+ c = *s++; /* try for 3 */
+ if(c >= '0' && c <= '7')
+ val = (val<<3) | (c-'0');
+ else
+ --s;
+ }
+ else
+ --s;
+ *p++ = val;
+ break;
+ }
+ } else
+ *p++ = c;
+ }
+ out:
+ *p = '\0';
+ return(store);
+ }
+
+ void
+ showstr(s)
+ reg char *s;
+ {
+ reg char c;
+
+ while((c = *s++) != '\0') {
+ if(c >= 040 && c <= 0176)
+ putchar(c);
+ else {
+ putchar('\\');
+ switch (c) {
+
+ case '\n':
+ putchar('n');
+ break;
+
+ case '\r':
+ putchar('r');
+ break;
+
+ case '\b':
+ putchar('b');
+ break;
+
+ case '\t':
+ putchar('t');
+ break;
+
+ case '\f':
+ putchar('f');
+ break;
+
+ case '\v':
+ putchar('v');
+ break;
+
+ default:
+ printf("%.3o", c & 0377);
+ break;
+ }
+ }
+ }
+ putchar('\t');
+ }
+
ckmtab()
{
***************
*** 502,508
mkmtab(0);
init = 1;
}
! for(ep = mtab; ep->e_off != -1L; ep++) {
if(lev1) {
if(ep->e_level != 1)
break;
--- 672,678 -----
mkmtab(0);
init = 1;
}
! for(ep = mtab; ep < mend; ep++) {
if(lev1) {
if(ep->e_level != 1)
break;
***************
*** 534,539
val.l = (*(long *) p);
break;
}
switch(ep->e_opcode & ~SUB) {
case EQ:
#ifdef u3b
--- 704,711 -----
val.l = (*(long *) p);
break;
}
+ if(ep->e_mask)
+ val.l &= ep->e_mask;
switch(ep->e_opcode & ~SUB) {
case EQ:
#ifdef u3b
More information about the Net.bugs
mailing list