mailing list of musl libc
 help / color / mirror / Atom feed
* [musl] [PATCH v5 1/3] glob: introduce context struct for do_glob
@ 2021-08-27 20:49 Ismael Luceno
  2021-08-27 20:49 ` [musl] [PATCH v5 2/3] add internal aliases __opendir, __readdir and __closedir Ismael Luceno
  2021-08-27 20:49 ` [musl] [PATCH v5 3/3] glob: implement GLOB_ALTDIRFUNC et al Ismael Luceno
  0 siblings, 2 replies; 3+ messages in thread
From: Ismael Luceno @ 2021-08-27 20:49 UTC (permalink / raw)
  To: musl; +Cc: Rich Felker, Ismael Luceno

this reduces the function frame by sharing more state in the recursion,
and produces a slightly smaller object file with GCC 10.3 on x86_64:

   text    data     bss     dec     hex filename
   2303       0       0    2303     8ff glob-ctx.lo
   2356       0       0    2356     934 glob-noctx.lo

Signed-off-by: Ismael Luceno <ismael@iodev.co.uk>
---

Notes:
    Changes since v1:
    
    - Make context struct constant

 src/regex/glob.c | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/src/regex/glob.c b/src/regex/glob.c
index 7780e21ee113..9491eaeef266 100644
--- a/src/regex/glob.c
+++ b/src/regex/glob.c
@@ -32,10 +32,16 @@ static int append(struct match **tail, const char *name, size_t len, int mark)
 	return 0;
 }
 
-static int do_glob(char *buf, size_t pos, int type, char *pat, int flags, int (*errfunc)(const char *path, int err), struct match **tail)
+struct glob_ctx {
+	struct match **tail;
+	int flags;
+	int (*errfunc)(const char *path, int err);
+};
+
+static int do_glob(char *buf, size_t pos, int type, char *pat, const struct glob_ctx *restrict ctx)
 {
 	/* If GLOB_MARK is unused, we don't care about type. */
-	if (!type && !(flags & GLOB_MARK)) type = DT_REG;
+	if (!type && !(ctx->flags & GLOB_MARK)) type = DT_REG;
 
 	/* Special-case the remaining pattern being all slashes, in
 	 * which case we can use caller-passed type if it's a dir. */
@@ -55,7 +61,7 @@ static int do_glob(char *buf, size_t pos, int type, char *pat, int flags, int (*
 			break;
 		} else if (pat[i] == '[') {
 			in_bracket = 1;
-		} else if (pat[i] == '\\' && !(flags & GLOB_NOESCAPE)) {
+		} else if (pat[i] == '\\' && !(ctx->flags & GLOB_NOESCAPE)) {
 			/* Backslashes inside a bracket are (at least by
 			 * our interpretation) non-special, so if next
 			 * char is ']' we have a complete expression. */
@@ -100,23 +106,23 @@ static int do_glob(char *buf, size_t pos, int type, char *pat, int flags, int (*
 		 * or if that fails, use lstat for determining existence to
 		 * avoid false negatives in the case of broken symlinks. */
 		struct stat st;
-		if ((flags & GLOB_MARK) && (!type||type==DT_LNK) && !stat(buf, &st)) {
+		if ((ctx->flags & GLOB_MARK) && (!type||type==DT_LNK) && !stat(buf, &st)) {
 			if (S_ISDIR(st.st_mode)) type = DT_DIR;
 			else type = DT_REG;
 		}
 		if (!type && lstat(buf, &st)) {
-			if (errno!=ENOENT && (errfunc(buf, errno) || (flags & GLOB_ERR)))
+			if (errno!=ENOENT && (ctx->errfunc(buf, errno) || (ctx->flags & GLOB_ERR)))
 				return GLOB_ABORTED;
 			return 0;
 		}
-		if (append(tail, buf, pos, (flags & GLOB_MARK) && type==DT_DIR))
+		if (append(ctx->tail, buf, pos, (ctx->flags & GLOB_MARK) && type==DT_DIR))
 			return GLOB_NOSPACE;
 		return 0;
 	}
 	char *p2 = strchr(pat, '/'), saved_sep = '/';
 	/* Check if the '/' was escaped and, if so, remove the escape char
 	 * so that it will not be unpaired when passed to fnmatch. */
-	if (p2 && !(flags & GLOB_NOESCAPE)) {
+	if (p2 && !(ctx->flags & GLOB_NOESCAPE)) {
 		char *p;
 		for (p=p2; p>pat && p[-1]=='\\'; p--);
 		if ((p2-p)%2) {
@@ -126,7 +132,7 @@ static int do_glob(char *buf, size_t pos, int type, char *pat, int flags, int (*
 	}
 	DIR *dir = opendir(pos ? buf : ".");
 	if (!dir) {
-		if (errfunc(buf, errno) || (flags & GLOB_ERR))
+		if (ctx->errfunc(buf, errno) || (ctx->flags & GLOB_ERR))
 			return GLOB_ABORTED;
 		return 0;
 	}
@@ -142,22 +148,22 @@ static int do_glob(char *buf, size_t pos, int type, char *pat, int flags, int (*
 
 		if (p2) *p2 = 0;
 
-		int fnm_flags= ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0)
-			| ((!(flags & GLOB_PERIOD)) ? FNM_PERIOD : 0);
+		int fnm_flags= ((ctx->flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0)
+			| ((!(ctx->flags & GLOB_PERIOD)) ? FNM_PERIOD : 0);
 
 		if (fnmatch(pat, de->d_name, fnm_flags))
 			continue;
 
 		/* With GLOB_PERIOD, don't allow matching . or .. unless
 		 * fnmatch would match them with FNM_PERIOD rules in effect. */
-		if (p2 && (flags & GLOB_PERIOD) && de->d_name[0]=='.'
+		if (p2 && (ctx->flags & GLOB_PERIOD) && de->d_name[0]=='.'
 		    && (!de->d_name[1] || de->d_name[1]=='.' && !de->d_name[2])
 		    && fnmatch(pat, de->d_name, fnm_flags | FNM_PERIOD))
 			continue;
 
 		memcpy(buf+pos, de->d_name, l+1);
 		if (p2) *p2 = saved_sep;
-		int r = do_glob(buf, pos+l, de->d_type, p2 ? p2 : "", flags, errfunc, tail);
+		int r = do_glob(buf, pos+l, de->d_type, p2 ? p2 : "", ctx);
 		if (r) {
 			closedir(dir);
 			return r;
@@ -166,7 +172,7 @@ static int do_glob(char *buf, size_t pos, int type, char *pat, int flags, int (*
 	int readerr = errno;
 	if (p2) *p2 = saved_sep;
 	closedir(dir);
-	if (readerr && (errfunc(buf, errno) || (flags & GLOB_ERR)))
+	if (readerr && (ctx->errfunc(buf, errno) || (ctx->flags & GLOB_ERR)))
 		return GLOB_ABORTED;
 	errno = old_errno;
 	return 0;
@@ -248,8 +254,14 @@ int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, i
 		char *s = p;
 		if ((flags & (GLOB_TILDE | GLOB_TILDE_CHECK)) && *p == '~')
 			error = expand_tilde(&s, buf, &pos);
-		if (!error)
-			error = do_glob(buf, pos, 0, s, flags, errfunc, &tail);
+		if (!error) {
+			const struct glob_ctx ctx = {
+				.tail = &tail,
+				.flags = flags,
+				.errfunc = errfunc,
+			};
+			error = do_glob(buf, pos, 0, s, &ctx);
+		}
 		free(p);
 	}
 
-- 
2.33.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [musl] [PATCH v5 2/3] add internal aliases __opendir, __readdir and __closedir
  2021-08-27 20:49 [musl] [PATCH v5 1/3] glob: introduce context struct for do_glob Ismael Luceno
@ 2021-08-27 20:49 ` Ismael Luceno
  2021-08-27 20:49 ` [musl] [PATCH v5 3/3] glob: implement GLOB_ALTDIRFUNC et al Ismael Luceno
  1 sibling, 0 replies; 3+ messages in thread
From: Ismael Luceno @ 2021-08-27 20:49 UTC (permalink / raw)
  To: musl; +Cc: Rich Felker, Ismael Luceno

Signed-off-by: Ismael Luceno <ismael@iodev.co.uk>
---
 src/dirent/closedir.c |  4 +++-
 src/dirent/opendir.c  |  4 +++-
 src/dirent/readdir.c  |  5 +++--
 src/include/dirent.h  | 10 ++++++++++
 4 files changed, 19 insertions(+), 4 deletions(-)
 create mode 100644 src/include/dirent.h

diff --git a/src/dirent/closedir.c b/src/dirent/closedir.c
index e794ae9ca44b..f4249f56e210 100644
--- a/src/dirent/closedir.c
+++ b/src/dirent/closedir.c
@@ -3,9 +3,11 @@
 #include <stdlib.h>
 #include "__dirent.h"
 
-int closedir(DIR *dir)
+int __closedir(DIR *dir)
 {
 	int ret = close(dir->fd);
 	free(dir);
 	return ret;
 }
+
+weak_alias(__closedir, closedir);
diff --git a/src/dirent/opendir.c b/src/dirent/opendir.c
index 5cb84e303fee..4123c81994cd 100644
--- a/src/dirent/opendir.c
+++ b/src/dirent/opendir.c
@@ -5,7 +5,7 @@
 #include "__dirent.h"
 #include "syscall.h"
 
-DIR *opendir(const char *name)
+DIR *__opendir(const char *name)
 {
 	int fd;
 	DIR *dir;
@@ -19,3 +19,5 @@ DIR *opendir(const char *name)
 	dir->fd = fd;
 	return dir;
 }
+
+weak_alias(__opendir, opendir);
diff --git a/src/dirent/readdir.c b/src/dirent/readdir.c
index 569fc7057737..cb34a258569c 100644
--- a/src/dirent/readdir.c
+++ b/src/dirent/readdir.c
@@ -7,7 +7,7 @@
 typedef char dirstream_buf_alignment_check[1-2*(int)(
 	offsetof(struct __dirstream, buf) % sizeof(off_t))];
 
-struct dirent *readdir(DIR *dir)
+struct dirent *__readdir(DIR *dir)
 {
 	struct dirent *de;
 	
@@ -26,4 +26,5 @@ struct dirent *readdir(DIR *dir)
 	return de;
 }
 
-weak_alias(readdir, readdir64);
+weak_alias(__readdir, readdir64);
+weak_alias(__readdir, readdir);
diff --git a/src/include/dirent.h b/src/include/dirent.h
new file mode 100644
index 000000000000..918e123566d4
--- /dev/null
+++ b/src/include/dirent.h
@@ -0,0 +1,10 @@
+#ifndef DIRENT_H
+#define DIRENT_H
+
+#include "../../include/dirent.h"
+
+hidden int            __closedir(DIR *);
+hidden DIR           *__opendir(const char *);
+hidden struct dirent *__readdir(DIR *);
+
+#endif
-- 
2.33.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [musl] [PATCH v5 3/3] glob: implement GLOB_ALTDIRFUNC et al
  2021-08-27 20:49 [musl] [PATCH v5 1/3] glob: introduce context struct for do_glob Ismael Luceno
  2021-08-27 20:49 ` [musl] [PATCH v5 2/3] add internal aliases __opendir, __readdir and __closedir Ismael Luceno
@ 2021-08-27 20:49 ` Ismael Luceno
  1 sibling, 0 replies; 3+ messages in thread
From: Ismael Luceno @ 2021-08-27 20:49 UTC (permalink / raw)
  To: musl; +Cc: Rich Felker, Ismael Luceno

Signed-off-by: Ismael Luceno <ismael@iodev.co.uk>
---

Notes:
    Changes since v4:
    
    - Rebased on top of do_glob context patch
    - Added alternate definitions of glob_t for BSD/GNU vs POSIX
    - Added static check to ensure the memory layout of both glob_t versions
      matches plus a wrapper for the POSIX glob function (necessary?)
    
    Changes since v3:
    
    - Wrap pointers used by GLOB_ALTDIRFUNC on the header to protect from namespace
      contamination. GNU and BSD code shouldn't be using "dirent", so it should be
      safe.
    
    Changes since v2:
    
    - Rebased
    
    Changes since v1:
    
    - Avoid overwriting the function pointers in glob_t
    - Wrap {open,read}dir too

 include/glob.h   | 18 +++++++++++-
 src/regex/glob.c | 73 ++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/include/glob.h b/include/glob.h
index 0ff70bdfeef2..32c03610020b 100644
--- a/include/glob.h
+++ b/include/glob.h
@@ -11,12 +11,27 @@ extern "C" {
 
 #include <bits/alltypes.h>
 
+#if defined(_BSD_SOURCE) || defined(_GNU_SOURCE)
+#define __dirent dirent
+#define __stat stat
+#else
+#define glob __posix_glob
+#endif
+
+struct __dirent;
+struct __stat;
+
 typedef struct {
 	size_t gl_pathc;
 	char **gl_pathv;
 	size_t gl_offs;
 	int __dummy1;
-	void *__dummy2[5];
+
+	void (*gl_closedir)(void *);
+	struct __dirent *(*gl_readdir)(void *);
+	void *(*gl_opendir)(const char *);
+	int (*gl_lstat)(const char *__restrict, struct __stat *__restrict);
+	int (*gl_stat)(const char *__restrict, struct __stat *__restrict);
 } glob_t;
 
 int  glob(const char *__restrict, int, int (*)(const char *, int), glob_t *__restrict);
@@ -31,6 +46,7 @@ void globfree(glob_t *);
 #define GLOB_NOESCAPE 0x40
 #define	GLOB_PERIOD   0x80
 
+#define GLOB_ALTDIRFUNC  0x0200
 #define GLOB_NOMAGIC     0x0800
 #define GLOB_TILDE       0x1000
 #define GLOB_TILDE_CHECK 0x4000
diff --git a/src/regex/glob.c b/src/regex/glob.c
index 9491eaeef266..be7be0b62da9 100644
--- a/src/regex/glob.c
+++ b/src/regex/glob.c
@@ -36,6 +36,13 @@ struct glob_ctx {
 	struct match **tail;
 	int flags;
 	int (*errfunc)(const char *path, int err);
+
+	/* for GLOB_ALTDIRFUNC */
+	void (*closedir)(void *);
+	struct dirent *(*readdir)(void *);
+	void *(*opendir)(const char *);
+	int (*lstat)(const char *restrict, struct stat *restrict);
+	int (*stat)(const char *restrict, struct stat *restrict);
 };
 
 static int do_glob(char *buf, size_t pos, int type, char *pat, const struct glob_ctx *restrict ctx)
@@ -106,11 +113,11 @@ static int do_glob(char *buf, size_t pos, int type, char *pat, const struct glob
 		 * or if that fails, use lstat for determining existence to
 		 * avoid false negatives in the case of broken symlinks. */
 		struct stat st;
-		if ((ctx->flags & GLOB_MARK) && (!type||type==DT_LNK) && !stat(buf, &st)) {
+		if ((ctx->flags & GLOB_MARK) && (!type||type==DT_LNK) && !ctx->stat(buf, &st)) {
 			if (S_ISDIR(st.st_mode)) type = DT_DIR;
 			else type = DT_REG;
 		}
-		if (!type && lstat(buf, &st)) {
+		if (!type && ctx->lstat(buf, &st)) {
 			if (errno!=ENOENT && (ctx->errfunc(buf, errno) || (ctx->flags & GLOB_ERR)))
 				return GLOB_ABORTED;
 			return 0;
@@ -130,7 +137,7 @@ static int do_glob(char *buf, size_t pos, int type, char *pat, const struct glob
 			saved_sep = '\\';
 		}
 	}
-	DIR *dir = opendir(pos ? buf : ".");
+	DIR *dir = ctx->opendir(pos ? buf : ".");
 	if (!dir) {
 		if (ctx->errfunc(buf, errno) || (ctx->flags & GLOB_ERR))
 			return GLOB_ABORTED;
@@ -138,7 +145,7 @@ static int do_glob(char *buf, size_t pos, int type, char *pat, const struct glob
 	}
 	int old_errno = errno;
 	struct dirent *de;
-	while (errno=0, de=readdir(dir)) {
+	while (errno=0, de=ctx->readdir(dir)) {
 		/* Quickly skip non-directories when there's pattern left. */
 		if (p2 && de->d_type && de->d_type!=DT_DIR && de->d_type!=DT_LNK)
 			continue;
@@ -165,13 +172,13 @@ static int do_glob(char *buf, size_t pos, int type, char *pat, const struct glob
 		if (p2) *p2 = saved_sep;
 		int r = do_glob(buf, pos+l, de->d_type, p2 ? p2 : "", ctx);
 		if (r) {
-			closedir(dir);
+			ctx->closedir(dir);
 			return r;
 		}
 	}
 	int readerr = errno;
 	if (p2) *p2 = saved_sep;
-	closedir(dir);
+	ctx->closedir(dir);
 	if (readerr && (ctx->errfunc(buf, errno) || (ctx->flags & GLOB_ERR)))
 		return GLOB_ABORTED;
 	errno = old_errno;
@@ -230,6 +237,10 @@ static int expand_tilde(char **pat, char *buf, size_t *pos)
 	return 0;
 }
 
+static void wrap_closedir(void *p) { __closedir(p); }
+static struct dirent *wrap_readdir(void *d) { return __readdir(d); }
+static void *wrap_opendir(const char *path) { return __opendir(path); }
+
 int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, int err), glob_t *restrict g)
 {
 	struct match head = { .next = NULL }, *tail = &head;
@@ -259,6 +270,11 @@ int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, i
 				.tail = &tail,
 				.flags = flags,
 				.errfunc = errfunc,
+				.closedir = (flags & GLOB_ALTDIRFUNC) ? g->gl_closedir : wrap_closedir,
+				.readdir = (flags & GLOB_ALTDIRFUNC) ? g->gl_readdir : wrap_readdir,
+				.opendir = (flags & GLOB_ALTDIRFUNC) ? g->gl_opendir : wrap_opendir,
+				.lstat = (flags & GLOB_ALTDIRFUNC) ? g->gl_lstat : lstat,
+				.stat = (flags & GLOB_ALTDIRFUNC) ? g->gl_stat : stat,
 			};
 			error = do_glob(buf, pos, 0, s, &ctx);
 		}
@@ -326,3 +342,48 @@ void globfree(glob_t *g)
 
 weak_alias(glob, glob64);
 weak_alias(globfree, globfree64);
+
+/*
+ * Following code exists to work-around UB by different versions of glob_t
+ * (BSD/GNU vs POSIX), which are required to avoid namespace contamination.
+ */
+#undef __dirent
+#undef __stat
+struct __dirent;
+struct __stat;
+typedef struct {
+	size_t gl_pathc;
+	char **gl_pathv;
+	size_t gl_offs;
+	int __dummy1;
+
+	void (*gl_closedir)(void *);
+	struct __dirent *(*gl_readdir)(void *);
+	void *(*gl_opendir)(const char *);
+	int (*gl_lstat)(const char *__restrict, struct __stat *__restrict);
+	int (*gl_stat)(const char *__restrict, struct __stat *__restrict);
+} __posix_glob_t;
+
+#define CMP_MEMB(A, B, memb) ( \
+	offsetof(A, memb) == offsetof(B, memb) && \
+	sizeof(((A*)0)->memb) == sizeof(((B*)0)->memb))
+
+int __posix_glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, int err), __posix_glob_t *restrict g)
+{
+	/* statically check size and member offsets */
+	switch(1) {
+	case 0:
+	case (sizeof(__posix_glob_t) == sizeof(glob_t) &&
+	      CMP_MEMB(__posix_glob_t, glob_t, gl_pathc) &&
+	      CMP_MEMB(__posix_glob_t, glob_t, gl_pathv) &&
+	      CMP_MEMB(__posix_glob_t, glob_t, gl_offs) &&
+	      CMP_MEMB(__posix_glob_t, glob_t, gl_closedir) &&
+	      CMP_MEMB(__posix_glob_t, glob_t, gl_readdir) &&
+	      CMP_MEMB(__posix_glob_t, glob_t, gl_opendir) &&
+	      CMP_MEMB(__posix_glob_t, glob_t, gl_lstat) &&
+	      CMP_MEMB(__posix_glob_t, glob_t, gl_stat)):
+		break;
+	}
+
+	return glob(pat, flags, errfunc, (glob_t *)g);
+}
-- 
2.33.0


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-08-27 20:50 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-27 20:49 [musl] [PATCH v5 1/3] glob: introduce context struct for do_glob Ismael Luceno
2021-08-27 20:49 ` [musl] [PATCH v5 2/3] add internal aliases __opendir, __readdir and __closedir Ismael Luceno
2021-08-27 20:49 ` [musl] [PATCH v5 3/3] glob: implement GLOB_ALTDIRFUNC et al Ismael Luceno

mailing list of musl libc

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.vuxu.org/musl

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 musl musl/ https://inbox.vuxu.org/musl \
		musl@inbox.vuxu.org
	public-inbox-index musl

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.vuxu.org/vuxu.archive.musl


code repositories for the project(s) associated with this inbox:

	https://git.vuxu.org/mirror/musl/

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git