Strings(1) command for COFF, customizable as well
Piercarlo Grandi
pcg at aber-cs.UUCP
Mon Apr 17 09:35:53 AEST 1989
Here is another old source I haved done, especially useful to Microport
SystemV/AT users, that implements the strings(1) command for COFF files.
Strings(1) extracts those that look like ascii strings from a datafile
or an executable. If the file looks file an executable, its text portion
is not scanned.
This strings(1) clone works on COFF files. It is also more flexible
thans BSD strings(1), and more easily customize to other formats and
tasks.
-------------------- cut here -----------------------------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
# strings.1
# strings.c
# This archive created: Sun Mar 26 17:40:10 1989
export PATH; PATH=/bin:$PATH
if test -f 'strings.1'
then
echo shar: will not over-write existing file "'strings.1'"
else
cat << \SHAR_EOF > 'strings.1'
.TH STRINGS 1
.ad b
.SH NAME
strings \- print strings in a file, or a COFF executable
.SH SYNOPSYS
.B strings
.BR - [ acop ]
.BR - nnnn
.RB [ - ]
[ file ... ]
.SH DESCRIPTION
This program reads the given
.IR file s
(or the standard input if none is given)
and prints out all strings in it whose length is at least
.IR nnnn ,
by default 4.
.LP
By default the file is tested to see if it is an executable or relocatable,
or an archive, and if so only the strings in the data section(s) are
considered. Note that if it is an archive then strings are printed for
.B all
the members of the archive.
.LP
The meaning of the options is:
.IP "\fB-a\fP"
Print all strings in the file regardless of its type.
.IP "\fB-c\fP"
Print only NUL terminated strings (or NL terminated ones), i.e. only
bona fide C strings.
.IP "\fB-o\fP"
Print before each string its offset in hex.
.IP "\fB-p\fP"
Print before each string the pathname of the file it is in.
.IP "\fB-\fP"
After this null option all the arguments are assumed to be filenames;
it can be used if the first filename begins with a dash.
.SH AUTHOR
(C) 1988 Piercarlo Grandi.
.SH BUGS
Just like the BSD
.IR strings (1)
options may be specified only before any file name.
.LP
The maximum string size is a compile time constant; longer strings are
truncated when printed (this size is typically 512, so it is not a problem).
.LP
The file offset if printed in hex, and thus not exactly compatible
with BSD
.IR strings (1).
SHAR_EOF
fi # end of overwriting check
if test -f 'strings.c'
then
echo shar: will not over-write existing file "'strings.c'"
else
cat << \SHAR_EOF > 'strings.c'
/*
$Header: /aware0/aware/piercarl/Src./Commands./strings.c,v 1.8 89/03/26 17:39:13 piercarl Exp $
*/
static char Notice[] =
"Copyright (C) 1988 Piercarlo Grandi. All rights reserved.";
/*
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 1, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You may have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
An introduction to this program.
This is a clone of the strings(1) program present under BSD. It is
much more general, and most importantly supports COFF style
executables.
It is easy to modify this program along two different lines, one way
is for other types of executables, and another is for a task different
from printing strings.
This is possible because scanning a COFF file and printing strings are
respectively a functional and a function; the scanner may be passed
any suitable function (e.g. a disassenbler), and the function may be
used with many different scanners.
It is not an exact clone of strings(1); two new options are allowed,
-p to print the pathname of the file before each string, and -c to
print only null terminated strings, that is C strings.
*/
#define public /* extern */
#define private static
#define reg register
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
extern long ftell();
#define StringsTRACE 0
#define StringsCOFF 1 /* COFF executable format */
#define StringsBSD 0 /* BSD executables */
#define StringsV7 0 /* V7 executables */
#define StringsX86 0 /* Xenix/Intel executables */
#define StringsMAX (512)
#if (StringsAOUT)
# include "a.out.h"
#endif
#if (StringsCOFF)
# include "filehdr.h"
# include "scnhdr.h"
# include "ldfcn.h"
#endif
/*
The options. All but the last are boolean.
*/
short unsigned dasha = 0;
short unsigned dasho = 0;
short unsigned dashp = 0;
short unsigned dashc = 0;
short unsigned dashmin = 4;
/*
This returns non zero if the given path accessible thru the given file
is an executable image (or an archive of executable images)
*/
private short unsigned StringsAout(file,path)
FILE *file;
char *path;
{
/*
Our first test is that the given file is a regular file, because
we MUST be able to lseek(2) an executable (admittedly we could
relax this precondition, but the great care adn effort needed is
not probably worth the utility.
*/
checkForFile:
{
struct stat stat;
if (fstat(fileno(file),&stat) < 0)
{
perror(path);
return 0;
}
# if (StringsTRACE)
fprintf(stderr,"Aout: mode 0%o\n",stat.st_mode);
# endif
if ((stat.st_mode&S_IFMT) != S_IFREG)
return 0;
}
# if (StringsCOFF)
hasCoffFormat:
{
LDFILE *ldfile;
short unsigned aout;
if ((ldfile = ldopen(path,NULL)) == NULL)
{
extern int errno;
if (errno != 0)
perror(path);
return 0;
}
# if (StringsTRACE)
fprintf(stderr,"Aout: TYPE 0%o, f_magic 0%o\n",
TYPE(ldfile),HEADER(ldfile).f_magic);
# endif
aout = (TYPE(ldfile) == ARTYPE) || ISCOFF(HEADER(ldfile).f_magic);
ldaclose(ldfile);
return aout;
}
# endif
}
/*
This will apply the data extractor to the data section(s) of the
executable with the given path and accessible thru the given file.
It is guaranteed that if the file is stdin this procedure will not
be called.
*/
private void StringsData(file,path,extractor)
FILE *file;
char *path;
void (*extractor)(/*FILE *,char *,long*/);
#if (StringsCOFF)
{
LDFILE *ldfile;
scanAllArchiveMembers:
for
(
ldfile = ldopen(path,NULL);
ldfile != NULL;
ldfile = (ldclose(ldfile) == SUCCESS) ? NULL : ldopen(path,ldfile)
)
if (ISCOFF(HEADER(ldfile).f_magic))
{
short unsigned section;
SCNHDR scnhdr;
# if (StringsTRACE)
fprintf(stderr,"Data: f_nscns %u\n",HEADER(ldfile).f_nscns);
# endif
ForAllSections:
for (section = 1; section <= HEADER(ldfile).f_nscns; section++)
if (ldshread(ldfile,section,&scnhdr) != SUCCESS)
perror(path);
else
{
# if (StringsTRACE)
fprintf(stderr,"Data: section %u, s_flags 0%lo\n",
section,scnhdr.s_flags);
# endif
skipNonDataSection:
if ((scnhdr.s_flags & 0x0000000f) != STYP_REG
|| !(scnhdr.s_flags & STYP_DATA))
continue;
# if (StringsTRACE)
fprintf(stderr,"Data: s_scnptr 0x%08lx, s_size 0x%08lx\n",
scnhdr.s_scnptr,scnhdr.s_size);
# endif
seekForSectionStart:
if (ldsseek(ldfile,section) < 0)
{
perror(path);
continue;
}
extractFromSection:
(*extractor)(IOPTR(ldfile),path,scnhdr.s_size);
}
}
}
#endif
/*
This defines which characters are part of a good string.
*/
#define StringsGOOD(c) (isprint(c) || isspace(c))
/*
This accumulates a good string, starting from the current file
position for the given number of bytes. We consider a string
terminated either by a newline or by a non string character, or if
strict C type strings are wanted, by a null character, as we assume
that non newline, non null terminated strings are spurious.
*/
private void StringsScan(file,path,bytes)
FILE *file;
char *path;
long bytes;
{
static char string[StringsMAX];
reg char *s;
reg int c;
reg short unsigned l;
# if (StringsTRACE)
fprintf(stderr,"Scan: ftell 0x%08lx, bytes 0x%08lx\n",ftell(file),bytes);
# endif
for (s = string, l = 0; bytes != 0 && (c = getc(file)) != EOF; --bytes)
{
/*
A potential string is terminated either by a newline or by a
non string character. If this is a non terminal character,
we add it to the string as long as the string max length is
not overflowed.
Note that if we only accept null terminated strings, newline
is no longer considered to be a string terminator.
*/
if (StringsGOOD(c) && (c != '\n' || dashc))
{
if (s < (string + sizeof string - 1))
*s++ = c;
l++;
}
/*
The string has been terminated; if it is non empty, and
longer than the prescribed minimum, and (only when flag -c
is on) zero terminated, it is a bona fide string and it will
be printed.
Notice that with the following condition we will print also
zero length strings IFF they are newline terminated; with a
zero dashmin we would otherwise recognize a null string
between every couple of bad characters.
*/
else if (s != string || c == '\n')
{
*s++ = '\n'; if (c == '\n') l++; *s++ = '\0';
if (l >= dashmin && (!dashc || c == '\0'))
{
if (dashp) printf("%s: ",path);
if (dasho) printf("0x%08lx: ",ftell(file)-1-l);
fputs(string,stdout);
}
s = string, l = 0;
}
}
if (ferror(stdin))
perror(path);
}
/*
Our job is easy; first we collect the options, then we apply the
extractor, directly or thru the executable scanner, to each of the
files whose name remain as arguments.
*/
/*ARGSUSED*/
public int main(argc,argv,envp)
int argc;
char **argv;
char **envp;
{
parseOptions:
for
(
--argc,argv++;
argc > 0 && (*argv)[0] == '-' && (*argv)[1] != '\0';
--argc, argv++
)
{
register char *flags;
for (flags = *argv + 1; *flags != '\0'; flags++)
switch (*flags)
{
case 'o': dasho = 1; break;
case 'a': dasha = 1; break;
case 'p': dashp = 1; break;
case 'c': dashc = 1; break;
default:
if (!isdigit(*flags))
{
fputs("Syntax: strings [ -[acop] ] [ -nnnn ] [ - ] [ file ...]\n",stderr);
exit(1);
}
dashmin = *flags-'0';
for (flags++; isdigit(*flags); flags++)
dashmin = dashmin*10 + (*flags-'0');
--flags;
if (dashmin < 1)
dashmin = 1;
}
}
/*
A do because we must loop at least once on stdin, even if no arg
path is given.
*/
if (argc <= 0)
*argv = "stdin";
processInputs:
do
{
/*
If this is an actual file (not a pipe) and is an executable
image (and option a is not specified) we will scan just the
data sections, else we will scan all the file.
An assumption here we guarantee is that StringsData will be
called ONLY on a regular file to which lseek(2) can be
applied, and this is ensured by Aout().
*/
if (argc > 0 && freopen(*argv,"r",stdin) == NULL)
{
perror(argv[0]);
exit(1);
}
if (dasha || !StringsAout(stdin,*argv))
StringsScan(stdin,*argv,0x0fffffffL);
else
StringsData(stdin,*argv,StringsScan);
if (argc > 0)
--argc, argv++;
}
while (argc > 0);
fclose(stdin);
exit(0);
/*NOTREACHED*/
}
SHAR_EOF
fi # end of overwriting check
# End of shell archive
exit 0
--
Piercarlo "Peter" Grandi | ARPA: pcg%cs.aber.ac.uk at nsfnet-relay.ac.uk
Dept of CS, UCW Aberystwyth | UUCP: ...!mcvax!ukc!aber-cs!pcg
Penglais, Aberystwyth SY23 3BZ, UK | INET: pcg at cs.aber.ac.uk
More information about the Alt.sources
mailing list