problem with fread/fwrite
Chris Torek
chris at mimsy.umd.edu
Tue Nov 13 20:48:37 AEST 1990
In article <2677 at cirrusl.UUCP> dhesi%cirrusl at oliveb.ATC.olivetti.com
(Rahul Dhesi) writes:
>I believe the requirement to call fseek (etc.) when switching arises
>out of the need to make stdio fast. Due to buffering, alternating
>reads and writes can confuse each other. The only way the stdio
>library could automatically protect you against this would be for it to
>explicitly test for internal state before every read and write.
Although this is (effectively) the reason the V7 Unix stdio and all its
descendents (and, presumably, whatever predecessor eventually became
the USG stdio and thence the System V stdio, though I have not looked
closer than determining that the SVR3 stdio was absolutely horrid
inside) ... where was I? Oh yes, the reason most Unix stdios do not
check. Right.
Your average out-of-the-box Unix stdio has, for efficiency, two
particular state variables in each FILE. One is a pointer into a
current buffer, and the other is a count. For `getc' operations, if
the count is positive, one decrements it and fetches through the
pointer, which is then increemented. For `putc' operations, if the
count is positive, one decrements it and stores through the pointer,
which is then incremented. This means that buffered I/O, which
typically stores somewhere between 512 and 65536 characters in each
buffer, can handle somewhere between 511 and 65535 `calls' to `getc' or
`putc' within an inline macro expansion. Unfortunately, it also means
that
fp = fopen("foo", "w+");
...
putc(' ', fp);
c = getc(fp);
tends to `get' a random value (whatever happened to be in the current
buffer).
This particular `feature' is easy to fix without sacrificing
efficiency. Instead of carrying one count and one pointer, stdio can
carry *two* counts (and, as it turns out, one pointer). The current
read or write state is then stored implicitly in the two counts (as
well as explicitly elsewhere, of course). The following extracts from
my <stdio.h> should give you the idea.
/*
* Stdio buffers.
*/
struct __sbuf {
unsigned char *_base;
int _size;
};
/*
* Stdio state variables.
*
* The following always hold:
*
* if (_flags&(__SLBF|__SWR)) == (__SLBF|__SWR),
* _lbfsize is -_bf._size, else _lbfsize is 0
* if _flags&__SRD, _w is 0
* if _flags&__SWR, _r is 0
*
* This ensures that the getc and putc macros (or inline functions) never
* try to write or read from a file that is in `read' or `write' mode.
* (Moreover, they can, and do, automatically switch from read mode to
* write mode, and back, on "r+" and "w+" files.)
*
* _lbfsize is used only to make the inline line-buffered output stream
* code as compact as possible.
*
* _ub, _up, and _ur are used when ungetc() pushes back more characters
* than fit in the current _bf, or when ungetc() pushes back a character
* that does not match the previous one in _bf. When this happens,
* _ub._base becomes non-nil (i.e., a stream has ungetc() data iff
* _ub._base!=NULL) and _up and _ur save the current values of _p and _r.
*/
typedef struct __sFILE {
unsigned char *_p; /* current position in (some) buffer */
int _r; /* read space left for getc() */
int _w; /* write space left for putc() */
short _flags; /* flags, below; this FILE is free if 0 */
short _file; /* fileno, if Unix descriptor, else -1 */
struct __sbuf _bf; /* the buffer (at least 1 byte, if !NULL) */
int _lbfsize; /* 0 or -_bf._size, for inline putc */
/* operations */
void *_cookie; /* cookie passed to io functions */
#if __STDC__ || c_plusplus
int (*_read)(void *_cookie, char *_buf, int _n);
int (*_write)(void *_cookie, const char *_buf, int _n);
fpos_t (*_seek)(void *_cookie, fpos_t _offset, int _whence);
int (*_close)(void *_cookie);
#else
int (*_read)();
int (*_write)();
fpos_t (*_seek)();
int (*_close)();
#endif
/* separate buffer for long sequences of ungetc() */
struct __sbuf _ub; /* ungetc buffer */
unsigned char *_up; /* saved _p when _p is doing ungetc data */
int _ur; /* saved _r when _r is counting ungetc data */
/* tricks to meet minimum requirements even when malloc() fails */
unsigned char _ubuf[3]; /* guarantee an ungetc() buffer */
unsigned char _nbuf[1]; /* guarantee a getc() buffer */
/* separate buffer for fgetline() when line crosses buffer boundary */
struct __sbuf _lb; /* buffer for fgetline() */
/* Unix stdio files get aligned to block boundaries on fseek() */
int _blksize; /* stat.st_blksize (may be != _bf._size) */
int _offset; /* current lseek offset */
} FILE;
extern FILE __sF[];
#define __SLBF 0x0001 /* line buffered */
#define __SNBF 0x0002 /* unbuffered */
#define __SRD 0x0004 /* OK to read */
#define __SWR 0x0008 /* OK to write */
/* RD and WR are never simultaneously asserted */
#define __SRW 0x0010 /* open for reading & writing */
#define __SEOF 0x0020 /* found EOF */
#define __SERR 0x0040 /* found error */
#define __SMBF 0x0080 /* _buf is from malloc */
#define __SAPP 0x0100 /* fdopen()ed in append mode */
#define __SSTR 0x0200 /* this is an sprintf/snprintf string */
#define __SOPT 0x0400 /* do fseek() optimisation */
#define __SNPT 0x0800 /* do not do fseek() optimisation */
#define __SOFF 0x1000 /* set iff _offset is in fact correct */
#define __SMOD 0x2000 /* true => fgetline modified _p text */
[much deleted]
/*
* The __sfoo macros are here so that we can
* define function versions in the C library.
*/
#define __sgetc(p) (--(p)->_r < 0 ? __srget(p) : (int)(*(p)->_p++))
#ifdef __GNUC__
static __inline int __sputc(int _c, FILE *_p) {
if (--_p->_w >= 0 || (_p->_w >= _p->_lbfsize && (char)_c != '\n'))
return (*_p->_p++ = _c);
else
return (__swbuf(_c, _p));
}
#else
/*
* This has been tuned to generate reasonable code on the vax using pcc
*/
#define __sputc(c, p) \
(--(p)->_w < 0 ? \
(p)->_w >= (p)->_lbfsize ? \
(*(p)->_p = (c)), *(p)->_p != '\n' ? \
(int)*(p)->_p++ : \
__swbuf('\n', p) : \
__swbuf((int)(c), p) : \
(*(p)->_p = (c), (int)*(p)->_p++))
#endif
--
In-Real-Life: Chris Torek, Univ of MD Comp Sci Dept (+1 301 405 2750)
Domain: chris at cs.umd.edu Path: uunet!mimsy!chris
More information about the Comp.lang.c
mailing list