atype.c & ctype.c -- simple text statistics
Tyrannosaurus Wombat
rsk at j.cc.purdue.edu
Mon Oct 13 14:06:35 AEST 1986
The following two short programs calculate simple text statistics,
and occasionally come in handy; I'm sending these out in net.sources
in the hopes of garnering useful comments on them. They tend to
be useful in debugging from time to time.
Atype prints a table of ascii occurences like this...
468 nul 4 soh 3 stx 1 etx 0 eot 0 enq 0 ack 3 bel
0 bs 0 ht 0 nl 0 vt 2 np 0 cr 0 so 0 si
5 dle 0 dc1 0 dc2 0 dc3 0 dc4 0 nak 0 syn 0 etb
0 can 0 em 0 sub 0 esc 0 fs 0 gs 0 rs 0 us
0 sp 0 ! 0 " 0 # 0 $ 0 % 0 & 0 '
0 ( 0 ) 0 * 0 + 0 , 0 - 6 . 0 /
0 0 1 1 0 2 0 3 0 4 0 5 0 6 0 7
0 8 0 9 0 : 0 ; 0 < 0 = 0 > 0 ?
0 @ 0 A 0 B 0 C 0 D 0 E 0 F 0 G
1 H 0 I 0 J 0 K 0 L 0 M 0 N 0 O
0 P 0 Q 0 R 0 S 0 T 0 U 0 V 0 W
1 X 0 Y 0 Z 0 [ 0 \ 0 ] 0 ^ 0 _
0 ` 2 a 0 b 3 c 0 d 3 e 0 f 0 g
0 h 0 i 0 j 0 k 0 l 0 m 0 n 0 o
3 p 0 q 0 r 0 s 3 t 0 u 0 v 0 w
0 x 3 y 0 z 0 { 0 | 0 } 0 ~ 0 del
...and reads either stdin or whatever file arguments are provided.
Ctype prints a table of ctype(3) occurences like this...
ascii cntrl print space punct alnum digit alpha upper lower
510 487 25 18 7 17 1 0 17 3
....and reads either stdin or whatever file arguments are provided.
Both work on 4.2bsd.
One shortcoming of each is known: very large input can cause the printed
output fields to overflow, making the display messy.
A future release (in mod.sources) will include appropriate manual pages,
and whatever enhancements result from comments made by readers.
--------------------------------------------------
#include <stdio.h>
/* Atype.c find numbers of different types of characters in
* a file...Rich Kulawiec, 8/2/82 revised 10/86
* Note that characters 200-377 octal are mapped down.
*/
char *maptable[16][8] = {
"nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
"bs ", "ht ", "nl ", "vt ", "np ", "cr ", "so ", "si ",
"dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
"can", "em ", "sub", "esc", "fs ", "gs ", "rs ", "us ",
"sp ", " ! ", " \" "," # ", " $ ", " % ", " & ", " ' ",
" ( ", " ) ", " * ", " + ", " , ", " - ", " . ", " / ",
" 0 ", " 1 ", " 2 ", " 3 ", " 4 ", " 5 ", " 6 ", " 7 ",
" 8 ", " 9 ", " : ", " ; ", " < ", " = ", " > ", " ? ",
" @ ", " A ", " B ", " C ", " D ", " E ", " F ", " G ",
" H ", " I ", " J ", " K ", " L ", " M ", " N ", " O ",
" P ", " Q ", " R ", " S ", " T ", " U ", " V ", " W ",
" X ", " Y ", " Z ", " [ ", " \\ ", " ] ", " ^ ", " _ ",
" ` ", " a ", " b ", " c ", " d ", " e ", " f ", " g ",
" h ", " i ", " j ", " k ", " l ", " m ", " n ", " o ",
" p ", " q ", " r ", " s ", " t ", " u ", " v ", " w ",
" x ", " y ", " z ", " { ", " | ", " } ", " ~ ", "del"
} ;
int count[8][16];
FILE *fp;
FILE *fopen();
main(argc, argv)
int argc;
char *argv[];
{
int c,i,j,k;
if(argc == 1) {
fp = stdin;
while((c = getc(fp)) != EOF)
count[ ((c&0177) % 8) ][ ((c&0177) / 8) ]++;
}
else {
for ( i = 1; i < argc; i++) {
if( (fp=fopen(argv[i],"r")) == NULL) {
(void) fprintf(stderr,"atype: can't open %s\n",argv[i]);
continue;
}
while((c = getc(fp)) != EOF)
count[ ((c&0177) % 8) ][ ((c&0177) / 8) ]++;
(void) fclose(fp);
}
}
for(k=0; k<16; k++) {
for(j=0; j<8; j++)
(void) printf("%5d %s",count[j][k],maptable[k][j]);
(void) printf("\n");
}
}
--------------------------------------------------
#include <stdio.h>
#include <ctype.h>
/* Ctype.c find numbers of different types of characters in
* a file...Rich Kulawiec, 4/20/81 revised 10/86
*/
FILE *fp;
FILE *fopen();
void tally();
#define NASCII 0
#define NCNTRL 1
#define NPRINT 2
#define NALNUM 3
#define NPUNCT 4
#define NALPHA 5
#define NDIGIT 6
#define NUPPER 7
#define NLOWER 8
#define NSPACE 9
#define NCLASS 10
long class[NCLASS];
main(argc, argv)
int argc;
char *argv[];
{
int i,j;
for( j = 0; j < NCLASS; j++)
class[j] = 0L;
(void) printf("ascii\tcntrl\tprint\tspace\tpunct\talnum\tdigit\talpha\tupper\tlower\n");
if( argc == 1) {
fp = stdin;
tally(fp);
}
else {
for ( i = 1; i < argc; i++) {
if( (fp=fopen(argv[i],"r")) == NULL) {
(void) fprintf(stderr,"ctype: can't open %s\n",argv[i]);
continue;
}
tally(fp);
(void) fclose(fp);
}
}
for ( j = 0; j <NCLASS; j++)
(void) printf("%ld\t",class[j]);
(void) printf("\n");
}
void tally(filep)
FILE *filep;
{
int c;
while((c = getc(filep)) != EOF){
if(isascii(c) != 0)
class[NASCII]++;
if(iscntrl(c) != 0)
class[NCNTRL]++;
if(isprint(c) != 0)
class[NPRINT]++;
if(isspace(c) != 0)
class[NSPACE]++;
if(ispunct(c) != 0)
class[NPUNCT]++;
if(isalnum(c) != 0)
class[NALNUM]++;
if(isdigit(c) != 0)
class[NDIGIT]++;
if(isalpha(c) != 0)
class[NALPHA]++;
if(isupper(c) != 0)
class[NUPPER]++;
if(islower(c) != 0)
class[NLOWER]++;
}
}
--------------------------------------------------
More information about the Comp.sources.unix
mailing list