atype.c & ctype.c -- simple text statistics

Tyrannosaurus Wombat rsk at j.cc.purdue.edu
Mon Oct 13 14:06:35 AEST 1986


The following two short programs calculate simple text statistics,
and occasionally come in handy; I'm sending these out in net.sources
in the hopes of garnering useful comments on them.  They tend to
be useful in debugging from time to time.

Atype prints a table of ascii occurences like this...

  468 nul    4 soh    3 stx    1 etx    0 eot    0 enq    0 ack    3 bel
    0 bs     0 ht     0 nl     0 vt     2 np     0 cr     0 so     0 si 
    5 dle    0 dc1    0 dc2    0 dc3    0 dc4    0 nak    0 syn    0 etb
    0 can    0 em     0 sub    0 esc    0 fs     0 gs     0 rs     0 us 
    0 sp     0  !     0  "     0  #     0  $     0  %     0  &     0  ' 
    0  (     0  )     0  *     0  +     0  ,     0  -     6  .     0  / 
    0  0     1  1     0  2     0  3     0  4     0  5     0  6     0  7 
    0  8     0  9     0  :     0  ;     0  <     0  =     0  >     0  ? 
    0  @     0  A     0  B     0  C     0  D     0  E     0  F     0  G 
    1  H     0  I     0  J     0  K     0  L     0  M     0  N     0  O 
    0  P     0  Q     0  R     0  S     0  T     0  U     0  V     0  W 
    1  X     0  Y     0  Z     0  [     0  \     0  ]     0  ^     0  _ 
    0  `     2  a     0  b     3  c     0  d     3  e     0  f     0  g 
    0  h     0  i     0  j     0  k     0  l     0  m     0  n     0  o 
    3  p     0  q     0  r     0  s     3  t     0  u     0  v     0  w 
    0  x     3  y     0  z     0  {     0  |     0  }     0  ~     0 del

...and reads either stdin or whatever file arguments are provided.

Ctype prints a table of ctype(3) occurences like this...

ascii	cntrl	print	space	punct	alnum	digit	alpha	upper	lower
510	487	25	18	7	17	1	0	17	3	

....and reads either stdin or whatever file arguments are provided.

Both work on 4.2bsd.

One shortcoming of each is known: very large input can cause the printed
output fields to overflow, making the display messy.

A future release (in mod.sources) will include appropriate manual pages,
and whatever enhancements result from comments made by readers.

--------------------------------------------------
#include <stdio.h>

/*	Atype.c  find numbers of different types of characters in
*	a file...Rich Kulawiec, 8/2/82  revised 10/86
*	Note that characters 200-377 octal are mapped down.
*/

char    *maptable[16][8] = { 
	"nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
	"bs ", "ht ", "nl ", "vt ", "np ", "cr ", "so ", "si ",
	"dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
	"can", "em ", "sub", "esc", "fs ", "gs ", "rs ", "us ",
	"sp ", " ! ", " \" "," # ", " $ ", " % ", " & ", " ' ",
	" ( ", " ) ", " * ", " + ", " , ", " - ", " . ", " / ", 
	" 0 ", " 1 ", " 2 ", " 3 ", " 4 ", " 5 ", " 6 ", " 7 ", 
	" 8 ", " 9 ", " : ", " ; ", " < ", " = ", " > ", " ? ", 
	" @ ", " A ", " B ", " C ", " D ", " E ", " F ", " G ", 
	" H ", " I ", " J ", " K ", " L ", " M ", " N ", " O ", 
	" P ", " Q ", " R ", " S ", " T ", " U ", " V ", " W ", 
	" X ", " Y ", " Z ", " [ ", " \\ ", " ] ", " ^ ", " _ ", 
	" ` ", " a ", " b ", " c ", " d ", " e ", " f ", " g ", 
	" h ", " i ", " j ", " k ", " l ", " m ", " n ", " o ", 
	" p ", " q ", " r ", " s ", " t ", " u ", " v ", " w ",
	" x ", " y ", " z ", " { ", " | ", " } ", " ~ ", "del" 
	} ;

int     count[8][16];

FILE	*fp;
FILE	*fopen();

main(argc, argv)
int argc;
char *argv[];
{
	int c,i,j,k;

	if(argc == 1) {
		fp = stdin;
		while((c = getc(fp)) != EOF)
			count[ ((c&0177) % 8) ][ ((c&0177) / 8) ]++;
	}
	else {
		for ( i = 1; i < argc;  i++) {
			if( (fp=fopen(argv[i],"r")) == NULL) {
				(void) fprintf(stderr,"atype: can't open %s\n",argv[i]);
				continue;
			}
			while((c = getc(fp)) != EOF)
				count[ ((c&0177) % 8) ][ ((c&0177) / 8) ]++;
			(void) fclose(fp);
		}
	}

	for(k=0; k<16; k++) {
		for(j=0; j<8; j++) 
			(void) printf("%5d %s",count[j][k],maptable[k][j]);
		(void) printf("\n");
	}
}
--------------------------------------------------
#include <stdio.h>
#include <ctype.h>

/*     Ctype.c  find numbers of different types of characters in
*	a file...Rich Kulawiec, 4/20/81  revised 10/86
*/

FILE *fp;
FILE *fopen();

void	tally();

#define	NASCII	0
#define	NCNTRL	1
#define	NPRINT	2
#define	NALNUM	3
#define	NPUNCT	4
#define	NALPHA	5
#define	NDIGIT	6
#define	NUPPER	7
#define	NLOWER	8
#define	NSPACE	9

#define NCLASS	10

long	class[NCLASS];

main(argc, argv)
int argc;
char *argv[];
{
	int i,j;

	for( j = 0; j < NCLASS; j++)
		class[j] = 0L;

	(void) printf("ascii\tcntrl\tprint\tspace\tpunct\talnum\tdigit\talpha\tupper\tlower\n");

	if( argc == 1) {
		fp = stdin;
		tally(fp);
	}
	else {
		for ( i = 1; i < argc;  i++) {
			if( (fp=fopen(argv[i],"r")) == NULL) {
				(void) fprintf(stderr,"ctype: can't open %s\n",argv[i]);
				continue;
			}
			tally(fp);
			(void) fclose(fp);
		}
	}

	for ( j = 0; j <NCLASS; j++)
		(void) printf("%ld\t",class[j]);
	(void) printf("\n");
}

void tally(filep)
FILE *filep;
{
	int	c;

	while((c = getc(filep)) != EOF){
		if(isascii(c) != 0)
			class[NASCII]++;
		if(iscntrl(c) != 0)
			class[NCNTRL]++;
		if(isprint(c) != 0)
			class[NPRINT]++;
		if(isspace(c) != 0)
			class[NSPACE]++;
		if(ispunct(c) != 0)
			class[NPUNCT]++;
		if(isalnum(c) != 0)
			class[NALNUM]++;
		if(isdigit(c) != 0)
			class[NDIGIT]++;
		if(isalpha(c) != 0)
			class[NALPHA]++;
		if(isupper(c) != 0)
			class[NUPPER]++;
		if(islower(c) != 0)
			class[NLOWER]++;
	}
}
--------------------------------------------------



More information about the Comp.sources.unix mailing list