mailing list of musl libc
 help / color / mirror / code / Atom feed
* [PATCH v7] stdio: implement fopencookie(3)
@ 2017-11-16  6:00 William Pitcock
  2017-11-17 17:35 ` Rich Felker
  0 siblings, 1 reply; 4+ messages in thread
From: William Pitcock @ 2017-11-16  6:00 UTC (permalink / raw)
  To: musl; +Cc: William Pitcock

The fopencookie(3) function allows the programmer to create a custom
stdio implementation, using four hook functions which operate on a
"cookie" data type.

Changelog:

v7:
- include GNU typedefs for cookie i/o functions

v6:
- remove pointer arithmetic instead using a structure to contain the parent FILE
  object
- set F_ERR flag where appropriate
- style fixes
- fix stdio readahead to handle single-byte read case (as pointed out by dalias,
  tested by custom fuzzer)
- handle seek return values correctly (found by fuzzing)

v5:
- implement stdio readahead buffering support

v4:
- remove parameter names from header function declarations

v3:
- remove spurious `struct winsize`
- make f->lock unconditionally 0

v2:
- properly implement stdio buffering

v1:
- initial proof of concept
---
 include/stdio.h         |  14 +++++
 src/stdio/fopencookie.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 155 insertions(+)
 create mode 100644 src/stdio/fopencookie.c

diff --git a/include/stdio.h b/include/stdio.h
index 884d2e6a..2932c76f 100644
--- a/include/stdio.h
+++ b/include/stdio.h
@@ -182,6 +182,20 @@ int vasprintf(char **, const char *, __isoc_va_list);
 #ifdef _GNU_SOURCE
 char *fgets_unlocked(char *, int, FILE *);
 int fputs_unlocked(const char *, FILE *);
+
+typedef ssize_t (cookie_read_function_t)(void *, char *, size_t);
+typedef ssize_t (cookie_write_function_t)(void *, const char *, size_t);
+typedef int (cookie_seek_function_t)(void *, off_t *, int);
+typedef int (cookie_close_function_t)(void *);
+
+typedef struct {
+	cookie_read_function_t *read;
+	cookie_write_function_t *write;
+	cookie_seek_function_t *seek;
+	cookie_close_function_t *close;
+} cookie_io_functions_t;
+
+FILE *fopencookie(void *, const char *, cookie_io_functions_t);
 #endif
 
 #if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
diff --git a/src/stdio/fopencookie.c b/src/stdio/fopencookie.c
new file mode 100644
index 00000000..bcf42c10
--- /dev/null
+++ b/src/stdio/fopencookie.c
@@ -0,0 +1,141 @@
+#define _GNU_SOURCE
+#include "stdio_impl.h"
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+
+struct fcookie {
+	void *cookie;
+	cookie_io_functions_t iofuncs;
+};
+
+struct cookie_FILE {
+	FILE f;
+	struct fcookie fc;
+	unsigned char buf[UNGET+BUFSIZ];
+};
+
+static size_t cookieread(FILE *f, unsigned char *buf, size_t len)
+{
+	struct fcookie *fc = f->cookie;
+	ssize_t ret = -1;
+	size_t remain = len, readlen = 0;
+
+	if (!fc->iofuncs.read) goto bail;
+
+	ret = fc->iofuncs.read(fc->cookie, (char *) buf, len > 1 ? (len - 1) : 1);
+	if (ret <= 0) goto bail;
+
+	readlen += ret;
+	remain -= ret;
+
+	f->rpos = f->buf;
+	ret = fc->iofuncs.read(fc->cookie, (char *) f->rpos, f->buf_size);
+	if (ret <= 0) goto bail;
+	f->rend = f->rpos + ret;
+
+	if (remain > 0) {
+		if (remain > f->buf_size) remain = f->buf_size;
+		memcpy(buf + readlen, f->rpos, remain);
+		readlen += remain;
+		f->rpos += remain;
+	}
+
+	return readlen;
+
+bail:
+	f->flags |= F_EOF ^ ((F_ERR^F_EOF) & ret);
+	f->rpos = f->rend = f->buf;
+	return readlen;
+}
+
+static size_t cookiewrite(FILE *f, const unsigned char *buf, size_t len)
+{
+	struct fcookie *fc = f->cookie;
+	ssize_t ret;
+	size_t len2 = f->wpos - f->wbase;
+	if (!fc->iofuncs.write) return len;
+	if (len2) {
+		f->wpos = f->wbase;
+		if (cookiewrite(f, f->wpos, len2) < len2) return 0;
+	}
+	ret = fc->iofuncs.write(fc->cookie, (const char *) buf, len);
+	if (ret < 0) {
+		f->wpos = f->wbase = f->wend = 0;
+		f->flags |= F_ERR;
+		return 0;
+	}
+	return ret;
+}
+
+static off_t cookieseek(FILE *f, off_t off, int whence)
+{
+	struct fcookie *fc = f->cookie;
+	int res;
+	if (whence > 2) {
+		errno = EINVAL;
+		return -1;
+	}
+	if (!fc->iofuncs.seek) {
+		errno = ENOTSUP;
+		return -1;
+	}
+	res = fc->iofuncs.seek(fc->cookie, &off, whence);
+	if (res < 0)
+		return res;
+	return off;
+}
+
+static int cookieclose(FILE *f)
+{
+	struct fcookie *fc = f->cookie;
+	if (fc->iofuncs.close) return fc->iofuncs.close(fc->cookie);
+	return 0;
+}
+
+FILE *fopencookie(void *cookie, const char *mode, cookie_io_functions_t iofuncs)
+{
+	struct cookie_FILE *f;
+
+	/* Check for valid initial mode character */
+	if (!strchr("rwa", *mode)) {
+		errno = EINVAL;
+		return 0;
+	}
+
+	/* Allocate FILE+fcookie+buffer or fail */
+	if (!(f=malloc(sizeof *f))) return 0;
+
+	/* Zero-fill only the struct, not the buffer */
+	memset(f, 0, sizeof(FILE));
+
+	/* Impose mode restrictions */
+	if (!strchr(mode, '+')) f->f.flags = (*mode == 'r') ? F_NOWR : F_NORD;
+
+	/* Set up our fcookie */
+	f->fc.cookie = cookie;
+	f->fc.iofuncs.read = iofuncs.read;
+	f->fc.iofuncs.write = iofuncs.write;
+	f->fc.iofuncs.seek = iofuncs.seek;
+	f->fc.iofuncs.close = iofuncs.close;
+
+	f->f.fd = -1;
+	f->f.cookie = &f->fc;
+	f->f.buf = f->buf;
+	f->f.buf_size = BUFSIZ;
+	f->f.lbf = EOF;
+
+	/* enable opportunistic stdio locking */
+	f->f.lock = 0;
+
+	/* Initialize op ptrs. No problem if some are unneeded. */
+	f->f.read = cookieread;
+	f->f.write = cookiewrite;
+	f->f.seek = cookieseek;
+	f->f.close = cookieclose;
+
+	/* Add new FILE to open file list */
+	return __ofl_add(&f->f);
+}
-- 
2.15.0



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v7] stdio: implement fopencookie(3)
  2017-11-16  6:00 [PATCH v7] stdio: implement fopencookie(3) William Pitcock
@ 2017-11-17 17:35 ` Rich Felker
  2017-11-17 17:57   ` Rich Felker
  2017-11-17 19:52   ` William Pitcock
  0 siblings, 2 replies; 4+ messages in thread
From: Rich Felker @ 2017-11-17 17:35 UTC (permalink / raw)
  To: musl

On Thu, Nov 16, 2017 at 06:00:55AM +0000, William Pitcock wrote:
> The fopencookie(3) function allows the programmer to create a custom
> stdio implementation, using four hook functions which operate on a
> "cookie" data type.
>
> [...]
> +
> +struct fcookie {
> +	void *cookie;
> +	cookie_io_functions_t iofuncs;
> +};
> +
> +struct cookie_FILE {
> +	FILE f;
> +	struct fcookie fc;
> +	unsigned char buf[UNGET+BUFSIZ];
> +};
> +
> +static size_t cookieread(FILE *f, unsigned char *buf, size_t len)
> +{
> +	struct fcookie *fc = f->cookie;
> +	ssize_t ret = -1;
> +	size_t remain = len, readlen = 0;
> +
> +	if (!fc->iofuncs.read) goto bail;
> +
> +	ret = fc->iofuncs.read(fc->cookie, (char *) buf, len > 1 ? (len - 1) : 1);
> +	if (ret <= 0) goto bail;
> +
> +	readlen += ret;
> +	remain -= ret;
> +
> +	f->rpos = f->buf;
> +	ret = fc->iofuncs.read(fc->cookie, (char *) f->rpos, f->buf_size);
> +	if (ret <= 0) goto bail;
> +	f->rend = f->rpos + ret;
> +
> +	if (remain > 0) {
> +		if (remain > f->buf_size) remain = f->buf_size;
> +		memcpy(buf + readlen, f->rpos, remain);
> +		readlen += remain;
> +		f->rpos += remain;
> +	}
> +
> +	return readlen;
> +
> +bail:
> +	f->flags |= F_EOF ^ ((F_ERR^F_EOF) & ret);
> +	f->rpos = f->rend = f->buf;
> +	return readlen;
> +}

At least the following bugs are still present:

1. If len==1, the read func is called twice, and might block the
   second time even though the requested read of 1 byte succeeded.

2. The return value is wrong. It must be between 0 and len and
   represent the amount of data made available to the caller. Buffer
   filling is not supposed to be included in that since it's an
   internal matter, not something the caller sees.

3. A return value <=0 from buffer filling step is not cause to set the
   EOF or error indicators, since the part the caller requested
   succeeded. But see below...

Regarding issues 1 and 3, I think the right fix is to make the first
call conditional on len>1, and always use len-1. This makes it so the
second (possibly only) read call is never redundant/always part of the
caller-requested read, so that you can honor its EOF or error result.
Otherwise errors could be lost.

Note that you also need to be able to handle the case where there is
no buffer, in case setvbuf was called to disable buffering for the
stream. In that case only the first read should happen and it should
have full length len, not len-1.

Really all of this logic should be identical to what's in
__stdio_read.c, but with calls to the read backend rather than readv.

> +static size_t cookiewrite(FILE *f, const unsigned char *buf, size_t len)
> +{
> +	struct fcookie *fc = f->cookie;
> +	ssize_t ret;
> +	size_t len2 = f->wpos - f->wbase;
> +	if (!fc->iofuncs.write) return len;
> +	if (len2) {
> +		f->wpos = f->wbase;
> +		if (cookiewrite(f, f->wpos, len2) < len2) return 0;
> +	}
> +	ret = fc->iofuncs.write(fc->cookie, (const char *) buf, len);
> +	if (ret < 0) {
> +		f->wpos = f->wbase = f->wend = 0;
> +		f->flags |= F_ERR;
> +		return 0;
> +	}
> +	return ret;
> +}

I don't see any bugs here.

> +static off_t cookieseek(FILE *f, off_t off, int whence)
> +{
> +	struct fcookie *fc = f->cookie;
> +	int res;
> +	if (whence > 2) {

Needs to be 2U. Otherwise you don't catch negative.

> +		errno = EINVAL;
> +		return -1;
> +	}
> +	if (!fc->iofuncs.seek) {
> +		errno = ENOTSUP;
> +		return -1;
> +	}

Is there a reason to prefer ENOTSUP here? Generally ESPIPE is the
error for non-seekable FILE types, but maybe there's a reason
something else is preferred here.

> +FILE *fopencookie(void *cookie, const char *mode, cookie_io_functions_t iofuncs)
> +{
> +	struct cookie_FILE *f;
> +
> +	/* Check for valid initial mode character */
> +	if (!strchr("rwa", *mode)) {
> +		errno = EINVAL;
> +		return 0;
> +	}
> +
> +	/* Allocate FILE+fcookie+buffer or fail */
> +	if (!(f=malloc(sizeof *f))) return 0;
> +
> +	/* Zero-fill only the struct, not the buffer */
> +	memset(f, 0, sizeof(FILE));

Largely style, but I would prefer memset(&f->f, 0, sizeof f->f) (i.e.
not encoding a hidden assumption that f->f is first member and always
using sizeof the object pointed to by first arg rather than a type to
ensure consistency);

> +
> +	/* Impose mode restrictions */
> +	if (!strchr(mode, '+')) f->f.flags = (*mode == 'r') ? F_NOWR : F_NORD;
> +
> +	/* Set up our fcookie */
> +	f->fc.cookie = cookie;
> +	f->fc.iofuncs.read = iofuncs.read;
> +	f->fc.iofuncs.write = iofuncs.write;
> +	f->fc.iofuncs.seek = iofuncs.seek;
> +	f->fc.iofuncs.close = iofuncs.close;
> +
> +	f->f.fd = -1;
> +	f->f.cookie = &f->fc;
> +	f->f.buf = f->buf;

Must be f->buf+UNGET. Otherwise ungetc will underflow.

> +	f->f.buf_size = BUFSIZ;
> +	f->f.lbf = EOF;
> +
> +	/* enable opportunistic stdio locking */
> +	f->f.lock = 0;

This comment seems wrong. I think the line is unnecessary/redundant
since you memset, probably leftover from when you had it as -1.

Rich


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v7] stdio: implement fopencookie(3)
  2017-11-17 17:35 ` Rich Felker
@ 2017-11-17 17:57   ` Rich Felker
  2017-11-17 19:52   ` William Pitcock
  1 sibling, 0 replies; 4+ messages in thread
From: Rich Felker @ 2017-11-17 17:57 UTC (permalink / raw)
  To: musl

On Fri, Nov 17, 2017 at 12:35:41PM -0500, Rich Felker wrote:
> On Thu, Nov 16, 2017 at 06:00:55AM +0000, William Pitcock wrote:
> > The fopencookie(3) function allows the programmer to create a custom
> > stdio implementation, using four hook functions which operate on a
> > "cookie" data type.
> >
> > [...]
> > +
> > +struct fcookie {
> > +	void *cookie;
> > +	cookie_io_functions_t iofuncs;
> > +};
> > +
> > +struct cookie_FILE {
> > +	FILE f;
> > +	struct fcookie fc;
> > +	unsigned char buf[UNGET+BUFSIZ];
> > +};
> > +
> > +static size_t cookieread(FILE *f, unsigned char *buf, size_t len)
> > +{
> > +	struct fcookie *fc = f->cookie;
> > +	ssize_t ret = -1;
> > +	size_t remain = len, readlen = 0;
> > +
> > +	if (!fc->iofuncs.read) goto bail;
> > +
> > +	ret = fc->iofuncs.read(fc->cookie, (char *) buf, len > 1 ? (len - 1) : 1);
> > +	if (ret <= 0) goto bail;
> > +
> > +	readlen += ret;
> > +	remain -= ret;
> > +
> > +	f->rpos = f->buf;
> > +	ret = fc->iofuncs.read(fc->cookie, (char *) f->rpos, f->buf_size);
> > +	if (ret <= 0) goto bail;
> > +	f->rend = f->rpos + ret;
> > +
> > +	if (remain > 0) {
> > +		if (remain > f->buf_size) remain = f->buf_size;
> > +		memcpy(buf + readlen, f->rpos, remain);
> > +		readlen += remain;
> > +		f->rpos += remain;
> > +	}
> > +
> > +	return readlen;
> > +
> > +bail:
> > +	f->flags |= F_EOF ^ ((F_ERR^F_EOF) & ret);
> > +	f->rpos = f->rend = f->buf;
> > +	return readlen;
> > +}
> 
> At least the following bugs are still present:
> 
> 1. If len==1, the read func is called twice, and might block the
>    second time even though the requested read of 1 byte succeeded.
> 
> 2. The return value is wrong. It must be between 0 and len and
>    represent the amount of data made available to the caller. Buffer
>    filling is not supposed to be included in that since it's an
>    internal matter, not something the caller sees.
> 
> 3. A return value <=0 from buffer filling step is not cause to set the
>    EOF or error indicators, since the part the caller requested
>    succeeded. But see below...

Also:

4. The return value of the second read call is basically ignored,
   except checking for error/eof. The code just assumes at least
   `f->buf_size` bytes were read successfully, copying junk out of the
   buffer back to the caller if fewer were read.

Rich


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v7] stdio: implement fopencookie(3)
  2017-11-17 17:35 ` Rich Felker
  2017-11-17 17:57   ` Rich Felker
@ 2017-11-17 19:52   ` William Pitcock
  1 sibling, 0 replies; 4+ messages in thread
From: William Pitcock @ 2017-11-17 19:52 UTC (permalink / raw)
  To: musl

Hi,

On Fri, Nov 17, 2017 at 11:35 AM, Rich Felker <dalias@libc.org> wrote:
> On Thu, Nov 16, 2017 at 06:00:55AM +0000, William Pitcock wrote:
>> The fopencookie(3) function allows the programmer to create a custom
>> stdio implementation, using four hook functions which operate on a
>> "cookie" data type.
>>
>> [...]
>> +
>> +struct fcookie {
>> +     void *cookie;
>> +     cookie_io_functions_t iofuncs;
>> +};
>> +
>> +struct cookie_FILE {
>> +     FILE f;
>> +     struct fcookie fc;
>> +     unsigned char buf[UNGET+BUFSIZ];
>> +};
>> +
>> +static size_t cookieread(FILE *f, unsigned char *buf, size_t len)
>> +{
>> +     struct fcookie *fc = f->cookie;
>> +     ssize_t ret = -1;
>> +     size_t remain = len, readlen = 0;
>> +
>> +     if (!fc->iofuncs.read) goto bail;
>> +
>> +     ret = fc->iofuncs.read(fc->cookie, (char *) buf, len > 1 ? (len - 1) : 1);
>> +     if (ret <= 0) goto bail;
>> +
>> +     readlen += ret;
>> +     remain -= ret;
>> +
>> +     f->rpos = f->buf;
>> +     ret = fc->iofuncs.read(fc->cookie, (char *) f->rpos, f->buf_size);
>> +     if (ret <= 0) goto bail;
>> +     f->rend = f->rpos + ret;
>> +
>> +     if (remain > 0) {
>> +             if (remain > f->buf_size) remain = f->buf_size;
>> +             memcpy(buf + readlen, f->rpos, remain);
>> +             readlen += remain;
>> +             f->rpos += remain;
>> +     }
>> +
>> +     return readlen;
>> +
>> +bail:
>> +     f->flags |= F_EOF ^ ((F_ERR^F_EOF) & ret);
>> +     f->rpos = f->rend = f->buf;
>> +     return readlen;
>> +}
>
> At least the following bugs are still present:
>
> 1. If len==1, the read func is called twice, and might block the
>    second time even though the requested read of 1 byte succeeded.
>
> 2. The return value is wrong. It must be between 0 and len and
>    represent the amount of data made available to the caller. Buffer
>    filling is not supposed to be included in that since it's an
>    internal matter, not something the caller sees.
>
> 3. A return value <=0 from buffer filling step is not cause to set the
>    EOF or error indicators, since the part the caller requested
>    succeeded. But see below...
>
> Regarding issues 1 and 3, I think the right fix is to make the first
> call conditional on len>1, and always use len-1. This makes it so the
> second (possibly only) read call is never redundant/always part of the
> caller-requested read, so that you can honor its EOF or error result.
> Otherwise errors could be lost.
>
> Note that you also need to be able to handle the case where there is
> no buffer, in case setvbuf was called to disable buffering for the
> stream. In that case only the first read should happen and it should
> have full length len, not len-1.
>
> Really all of this logic should be identical to what's in
> __stdio_read.c, but with calls to the read backend rather than readv.

I believe I have reworked the read logic to match __stdio_read.c more closely.

I will be sending v8 shortly, which I hope to be the final version.

>
>> +static size_t cookiewrite(FILE *f, const unsigned char *buf, size_t len)
>> +{
>> +     struct fcookie *fc = f->cookie;
>> +     ssize_t ret;
>> +     size_t len2 = f->wpos - f->wbase;
>> +     if (!fc->iofuncs.write) return len;
>> +     if (len2) {
>> +             f->wpos = f->wbase;
>> +             if (cookiewrite(f, f->wpos, len2) < len2) return 0;
>> +     }
>> +     ret = fc->iofuncs.write(fc->cookie, (const char *) buf, len);
>> +     if (ret < 0) {
>> +             f->wpos = f->wbase = f->wend = 0;
>> +             f->flags |= F_ERR;
>> +             return 0;
>> +     }
>> +     return ret;
>> +}
>
> I don't see any bugs here.
>
>> +static off_t cookieseek(FILE *f, off_t off, int whence)
>> +{
>> +     struct fcookie *fc = f->cookie;
>> +     int res;
>> +     if (whence > 2) {
>
> Needs to be 2U. Otherwise you don't catch negative.

Done.

>> +             errno = EINVAL;
>> +             return -1;
>> +     }
>> +     if (!fc->iofuncs.seek) {
>> +             errno = ENOTSUP;
>> +             return -1;
>> +     }
>
> Is there a reason to prefer ENOTSUP here? Generally ESPIPE is the
> error for non-seekable FILE types, but maybe there's a reason
> something else is preferred here.

FreeBSD's implementation[1] used ENOTSUP for this case.

[1]: https://github.com/freebsd/freebsd/blob/master/lib/libc/stdio/fopencookie.c#L140-L143

glibc does not seem to set any errno at all for this case.

I think it is good to follow established convention though, since the
FreeBSD implementation does set errno, by matching theirs.

>
>> +FILE *fopencookie(void *cookie, const char *mode, cookie_io_functions_t iofuncs)
>> +{
>> +     struct cookie_FILE *f;
>> +
>> +     /* Check for valid initial mode character */
>> +     if (!strchr("rwa", *mode)) {
>> +             errno = EINVAL;
>> +             return 0;
>> +     }
>> +
>> +     /* Allocate FILE+fcookie+buffer or fail */
>> +     if (!(f=malloc(sizeof *f))) return 0;
>> +
>> +     /* Zero-fill only the struct, not the buffer */
>> +     memset(f, 0, sizeof(FILE));
>
> Largely style, but I would prefer memset(&f->f, 0, sizeof f->f) (i.e.
> not encoding a hidden assumption that f->f is first member and always
> using sizeof the object pointed to by first arg rather than a type to
> ensure consistency);

Done.

>
>> +
>> +     /* Impose mode restrictions */
>> +     if (!strchr(mode, '+')) f->f.flags = (*mode == 'r') ? F_NOWR : F_NORD;
>> +
>> +     /* Set up our fcookie */
>> +     f->fc.cookie = cookie;
>> +     f->fc.iofuncs.read = iofuncs.read;
>> +     f->fc.iofuncs.write = iofuncs.write;
>> +     f->fc.iofuncs.seek = iofuncs.seek;
>> +     f->fc.iofuncs.close = iofuncs.close;
>> +
>> +     f->f.fd = -1;
>> +     f->f.cookie = &f->fc;
>> +     f->f.buf = f->buf;
>
> Must be f->buf+UNGET. Otherwise ungetc will underflow.

Done.

>
>> +     f->f.buf_size = BUFSIZ;
>> +     f->f.lbf = EOF;
>> +
>> +     /* enable opportunistic stdio locking */
>> +     f->f.lock = 0;
>
> This comment seems wrong. I think the line is unnecessary/redundant
> since you memset, probably leftover from when you had it as -1.

I removed it from v8.

William


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-11-17 19:52 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-16  6:00 [PATCH v7] stdio: implement fopencookie(3) William Pitcock
2017-11-17 17:35 ` Rich Felker
2017-11-17 17:57   ` Rich Felker
2017-11-17 19:52   ` William Pitcock

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).