Long waits for inodes on Power Series

Mark Bartelt sysmark at physics.utoronto.ca
Fri Nov 9 23:16:56 AEST 1990


A couple days ago I noticed that things like "du" and "find" seemed to be
taking a ridiculously long amount of time to run.  A bit of poking around
uncovered a weirdness that I don't understand.  Perhaps someone else has
encountered the problem, and maybe even knows what the cause is.  (IRIX
bug?  Configuration botch?  Something else?)

Basically, it seems to be taking our 4D/280 much longer to grab an inode
than I would expect it ought.  Once a file is open, things seem to motor
along quite nicely.  The numbers below show the total elapsed time, total
system time, per-file average elapsed time, and per-file average system
time (all in milliseconds) that it took to run "timestat" as follows ...

        find /usr/*bin /usr/etc /usr/include -type f
        for x in 1 2 3 4; do
                timestat /usr/lib
        done

... on several Personal IRISes (4D/20s and 4D/25s), and the Power Series
4D/280.  (The "timestat" program is appended below in case anyone wants
to try the same test on his/her system.)  All "timestat" does is to do a
stat() of every file in a directory, and report the times.  The "find" in
the example above is an attempt to clear the inode cache of all references
to files living in the directory to be timestat'ed.

                 ----first----    ---others---
                  etime  stime    etime  stime
        PI #1      6.31   0.51     0.17   0.17
        PI #2      6.97   1.74     0.45   0.45
        PI #3      4.46   0.96     0.18   0.18
        PI #4      4.28   0.72     0.18   0.18
        PI #5     13.70   5.29     0.72   0.72
   !!!  PS       223.83  89.29     0.31   0.31

What one finds is that, indeed, the inode caching makes things run fast,
once the inodes have been grabbed.  However, why does it take so bloody
long to get the inodes in the first place?  And why is the problem seen
only on the Power Series IRIS, and not on the PIs?

The results seem not to depend much on system load.  The numbers do tend
to differ somewhat between a heavily loaded system and an essentially idle
one, but the orders of magnitude are pretty much the same.  With nobody
else logged on, and six compute-bound processes running (less than the
number of CPUs), and no significant amount of swapping going on (in any
case, I assume as much since the total memory used by all processes at
the time was less than 128mb, on a 256mb machine), we get these results:

        PS        65.20  51.07     0.26   0.26

One possibly-relevant bit of information is that the 4D/280 is running
3.3.1, as is PI #5.  (Note that the times for #5 are in fact noticeably
higher than for the other PIs, though nothing near the magnitude of the
PS numbers.)  The other PIs are still running 3.2.1.

Anyway, this is a rather major pain in the arse, since it makes things
like "find" and "du" essentially useless on large hierarchies.  Does
anyone have any suggestions as to what the problem may be?

Thanks in advance.

Mark Bartelt                                               416/978-5619
Canadian Institute for                         sysmark at cita.toronto.edu
Theoretical Astrophysics                       sysmark at cita.utoronto.ca

-------------------- Cut here for a copy of timestat.c --------------------

/*
 *	Invoke stat() on every file in a directory, and
 *	print total and average per-file elapsed time,
 *	stime, and utime.
 *
 *	We read in all the filenames first, to ensure that
 *	all we're measuring is the stat() times.
 *
 */

#include <sys/types.h>
#include <sys/times.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <dirent.h>
#include <stdio.h>

struct list {
	struct list *	next;
	char *		name;
};

struct list *	list_head;
struct list *	list_tail;

int	nfiles;

main(ac,av)
int	ac;
char **	av;
{
	DIR *		dp;
	struct dirent *	dep;
	struct tms	start;
	long		start_time;
	struct tms	finish;
	long		finish_time;
	struct list *	lp;
	struct stat	stbuf;
	float		etime;
	float		stime;
	float		utime;

	if ( ac != 2 ) {
		fprintf(stderr,"Usage: %s dir\n",*av);
		exit(1);
	}
	if ( chdir(*++av) ) {
		fprintf(stderr,"Can't chdir to %s\n",*av);
		exit(2);
	}
	if ( (dp=opendir(".")) == NULL ) {
		fprintf(stderr,"Can't opendir(%s)\n",*av);
		exit(3);
	}
	while ( (dep=readdir(dp)) != NULL )
		add_to_list(dep->d_name);
	start_time = times(&start);
	for ( lp = list_head; lp; lp = lp->next )
		stat(lp->name,&stbuf);
	finish_time = times(&finish);
	etime = (float)( finish_time - start_time ) * 1000.0 / (float)HZ;
	stime = (float)( finish.tms_stime - start.tms_stime ) * 1000.0 / (float)HZ;
	utime = (float)( finish.tms_utime - start.tms_utime ) * 1000.0 / (float)HZ;
	fprintf(stdout,"%d files\n",nfiles);
	fprintf(stdout,"etime: %10.2f %10.2f\n",etime,etime/(float)nfiles);
	fprintf(stdout,"stime: %10.2f %10.2f\n",stime,stime/(float)nfiles);
	fprintf(stdout,"utime: %10.2f %10.2f\n",utime,utime/(float)nfiles);
	exit(0);
}

add_to_list(p)
char *	p;
{
	struct list *	lp;

	if ( (lp=(struct list *)malloc(sizeof(struct list))) == NULL
	  || (lp->name=(char *)malloc(strlen(p)+1)) == NULL ) {
		fprintf(stderr,"malloc failure\n");
		exit(4);
	}
	if ( !list_head )
		list_head = lp;
	else
		list_tail->next = lp;
	list_tail = lp;
	strcpy(lp->name,p);
	++nfiles;
}



More information about the Comp.sys.sgi mailing list