mailing list of musl libc
 help / color / mirror / code / Atom feed
* Removing stupid, spurious UB in stdio (bikeshed time)
@ 2016-04-26 22:18 Rich Felker
  2016-04-27  7:56 ` Alexander Monakov
  0 siblings, 1 reply; 4+ messages in thread
From: Rich Felker @ 2016-04-26 22:18 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 3136 bytes --]

There's a lot of nonsense-UB in stdio due to buffer comparisons along
the lines of "f->rpos < f->rend". The intent of these comparisons is
to simultaneously check that the buffer is initialized for the proper
mode (read or write) and that there's data left in it (for reading) or
space left (to write) or buffered data to be written out (for write),
etc.

Unfortunately, when the buffer is uninitialized for the mode being
checked, the comparison becomes NULL<NULL, and while this should
obviously be false (since < implies !=), NULL<NULL is actually UB.

The attached patch makes a stab at fixing the problem by using !=
instead of < or >. This is correct because in the places where we're
testing for <, > is not a possibility; to be true, > would require OOB
pointer arithmetic (thus UB) already to have been performed.

Unfortunately there are still places where UB is left: subtraction,
such as this line in ftell:

	return pos - (f->rend - f->rpos) + (f->wpos - f->wbase);

Adding branches to test for all the null buffer possibilities here
would uglify and pessimize the code (compilers are too stupid to
optimize them out).

So another possible idea is abolishing use of null pointers here, and
using a different sentinel, e.g. f->buf (start of buffer). Then
instead of disabling write buffer via:

	f->wpos = f->wbase = f->wend = 0;

we could do something like:

	f->wpos = f->wbase = f->wend = f->buf;

Unfortunately there are several places that explicitly check
predicates like !f->rpos, and in some places they're treated
differently than end-of-buffer, such as ungetc:

	if (!f->rpos) __toread(f);
	if (!f->rpos || f->rpos <= f->buf - UNGET) {

Here, empty buffer after __toread is expected, but null pointer
indicates an error entering read mode. This could possibly be handled
by using f->buf-UNGET (which would naturally inhibit ungetc) as the
sentinel for read mode off.

The other major user of null sentinels is code in fseek.c and fflush.c
that checks for write failure; after f->write(), f->wpos==0 is
interpreted as an error. f->wpos==f->wend can't in general be used as
an error conditional (it's always true for unbuffered output) but it
could be used in these specific cases since the code path is only
reachable when there's buffered data to be flushed.

Also related: vsnprintf does some wacky UB stuff in order to support
direct writes into a buffer of unknown size, and strto* use similar
hacks to read from buffers of unknown size. So the right long-term
solution might be to eliminate the whole system of end pointers and
instead compare pos-start against a size. And on top of that I guess
we need to stop using null so pos-start is defined.

So what to do? I'm not aware of any immediate need to fix these
issues, but I want to future-proof the code and make it friendly to
analysis tools that catch the undefined behavior. I think a good place
to start might be coming up with and documenting a clear model for how
stdio's buffer internals are supposed to work, what operations are
allowed, what invariants hold, etc. based on the above analysis of
current UB issues and what the code is doing.

Rich

[-- Attachment #2: stdio-buff-compare.diff --]
[-- Type: text/plain, Size: 4875 bytes --]

diff --git a/src/internal/stdio_impl.h b/src/internal/stdio_impl.h
index 7cdf729..1f14809 100644
--- a/src/internal/stdio_impl.h
+++ b/src/internal/stdio_impl.h
@@ -84,10 +84,10 @@ void __ofl_unlock(void);
 #define ferror(f) ((f)->flags & F_ERR)
 
 #define getc_unlocked(f) \
-	( ((f)->rpos < (f)->rend) ? *(f)->rpos++ : __uflow((f)) )
+	( ((f)->rpos != (f)->rend) ? *(f)->rpos++ : __uflow((f)) )
 
 #define putc_unlocked(c, f) \
-	( ((unsigned char)(c)!=(f)->lbf && (f)->wpos<(f)->wend) \
+	( ((unsigned char)(c)!=(f)->lbf && (f)->wpos != (f)->wend) \
 	? *(f)->wpos++ = (c) : __overflow((f),(c)) )
 
 /* Caller-allocated FILE * operations */
diff --git a/src/stdio/__overflow.c b/src/stdio/__overflow.c
index 3bb3792..e65a594 100644
--- a/src/stdio/__overflow.c
+++ b/src/stdio/__overflow.c
@@ -4,7 +4,7 @@ int __overflow(FILE *f, int _c)
 {
 	unsigned char c = _c;
 	if (!f->wend && __towrite(f)) return EOF;
-	if (f->wpos < f->wend && c != f->lbf) return *f->wpos++ = c;
+	if (f->wpos != f->wend && c != f->lbf) return *f->wpos++ = c;
 	if (f->write(f, &c, 1)!=1) return EOF;
 	return c;
 }
diff --git a/src/stdio/__stdio_exit.c b/src/stdio/__stdio_exit.c
index 191b445..210e768 100644
--- a/src/stdio/__stdio_exit.c
+++ b/src/stdio/__stdio_exit.c
@@ -9,8 +9,8 @@ static void close_file(FILE *f)
 {
 	if (!f) return;
 	FFINALLOCK(f);
-	if (f->wpos > f->wbase) f->write(f, 0, 0);
-	if (f->rpos < f->rend) f->seek(f, f->rpos-f->rend, SEEK_CUR);
+	if (f->wpos != f->wbase) f->write(f, 0, 0);
+	if (f->rpos != f->rend) f->seek(f, f->rpos-f->rend, SEEK_CUR);
 }
 
 void __stdio_exit(void)
diff --git a/src/stdio/__toread.c b/src/stdio/__toread.c
index 35f67b8..faef8c2 100644
--- a/src/stdio/__toread.c
+++ b/src/stdio/__toread.c
@@ -3,7 +3,7 @@
 int __toread(FILE *f)
 {
 	f->mode |= f->mode-1;
-	if (f->wpos > f->wbase) f->write(f, 0, 0);
+	if (f->wpos != f->wbase) f->write(f, 0, 0);
 	f->wpos = f->wbase = f->wend = 0;
 	if (f->flags & F_NORD) {
 		f->flags |= F_ERR;
diff --git a/src/stdio/fflush.c b/src/stdio/fflush.c
index 3f462c8..00fe06a 100644
--- a/src/stdio/fflush.c
+++ b/src/stdio/fflush.c
@@ -3,13 +3,13 @@
 static int __fflush_unlocked(FILE *f)
 {
 	/* If writing, flush output */
-	if (f->wpos > f->wbase) {
+	if (f->wpos != f->wbase) {
 		f->write(f, 0, 0);
 		if (!f->wpos) return EOF;
 	}
 
 	/* If reading, sync position, per POSIX */
-	if (f->rpos < f->rend) f->seek(f, f->rpos-f->rend, SEEK_CUR);
+	if (f->rpos != f->rend) f->seek(f, f->rpos-f->rend, SEEK_CUR);
 
 	/* Clear read and write modes */
 	f->wpos = f->wbase = f->wend = 0;
@@ -37,7 +37,7 @@ int fflush(FILE *f)
 
 	for (f=*__ofl_lock(); f; f=f->next) {
 		FLOCK(f);
-		if (f->wpos > f->wbase) r |= __fflush_unlocked(f);
+		if (f->wpos != f->wbase) r |= __fflush_unlocked(f);
 		FUNLOCK(f);
 	}
 	__ofl_unlock();
diff --git a/src/stdio/fgetwc.c b/src/stdio/fgetwc.c
index e455cfe..0c9faa1 100644
--- a/src/stdio/fgetwc.c
+++ b/src/stdio/fgetwc.c
@@ -12,7 +12,7 @@ static wint_t __fgetwc_unlocked_internal(FILE *f)
 	size_t l;
 
 	/* Convert character from buffer if possible */
-	if (f->rpos < f->rend) {
+	if (f->rpos != f->rend) {
 		l = mbrtowc(&wc, (void *)f->rpos, f->rend - f->rpos, &st);
 		if (l+2 >= 2) {
 			f->rpos += l + !l; /* l==0 means 1 byte, null */
diff --git a/src/stdio/fread.c b/src/stdio/fread.c
index aef75f7..6f5ac01 100644
--- a/src/stdio/fread.c
+++ b/src/stdio/fread.c
@@ -13,7 +13,7 @@ size_t fread(void *restrict destv, size_t size, size_t nmemb, FILE *restrict f)
 
 	f->mode |= f->mode-1;
 
-	if (f->rend - f->rpos > 0) {
+	if (f->rpos != f->rend) {
 		/* First exhaust the buffer. */
 		k = MIN(f->rend - f->rpos, l);
 		memcpy(dest, f->rpos, k);
diff --git a/src/stdio/fseek.c b/src/stdio/fseek.c
index b160b74..4b7030a 100644
--- a/src/stdio/fseek.c
+++ b/src/stdio/fseek.c
@@ -6,7 +6,7 @@ int __fseeko_unlocked(FILE *f, off_t off, int whence)
 	if (whence == SEEK_CUR) off -= f->rend - f->rpos;
 
 	/* Flush write buffer, and report error on failure. */
-	if (f->wpos > f->wbase) {
+	if (f->wpos != f->wbase) {
 		f->write(f, 0, 0);
 		if (!f->wpos) return -1;
 	}
diff --git a/src/stdio/ftell.c b/src/stdio/ftell.c
index bb62897..5ff2178 100644
--- a/src/stdio/ftell.c
+++ b/src/stdio/ftell.c
@@ -5,7 +5,7 @@
 off_t __ftello_unlocked(FILE *f)
 {
 	off_t pos = f->seek(f, 0,
-		(f->flags & F_APP) && f->wpos > f->wbase
+		(f->flags & F_APP) && f->wpos != f->wbase
 		? SEEK_END : SEEK_CUR);
 	if (pos < 0) return pos;
 
diff --git a/src/stdio/vfwscanf.c b/src/stdio/vfwscanf.c
index 223aad4..1fd6b71 100644
--- a/src/stdio/vfwscanf.c
+++ b/src/stdio/vfwscanf.c
@@ -77,7 +77,7 @@ static int in_set(const wchar_t *set, int c)
 #if 1
 #undef getwc
 #define getwc(f) \
-	((f)->rpos < (f)->rend && *(f)->rpos < 128 ? *(f)->rpos++ : (getwc)(f))
+	((f)->rpos != (f)->rend && *(f)->rpos < 128 ? *(f)->rpos++ : (getwc)(f))
 
 #undef ungetwc
 #define ungetwc(c,f) \

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-04-27 18:39 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-26 22:18 Removing stupid, spurious UB in stdio (bikeshed time) Rich Felker
2016-04-27  7:56 ` Alexander Monakov
2016-04-27 18:17   ` Rich Felker
2016-04-27 18:39     ` Alexander Monakov

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).