mailing list of musl libc
 help / color / mirror / code / Atom feed
* nftw miscalculates FTW.base when pathnames end in /
@ 2016-05-04 13:42 Joakim Sindholt
  2016-05-14  3:01 ` Rich Felker
  0 siblings, 1 reply; 3+ messages in thread
From: Joakim Sindholt @ 2016-05-04 13:42 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 1675 bytes --]

Running the test program from the glibc ftw man page[1]:

+zhasha@wirbelwind /home/zhasha ; ./6.out test
d    0    4096   test                                     0 test
d    1    4096   test/a                                   5 a
f    2       0   test/a/1                                 7 1
d    1    4096   test/b                                   5 b
f    2       0   test/b/1                                 7 1
f    2       0   test/b/2                                 7 2
d    1    4096   test/ddd                                 5 ddd
f    2       0   test/ddd/5                               9 5
d    1    4096   test/cc                                  5 cc
f    2       0   test/cc/3                                8 3
+zhasha@wirbelwind /home/zhasha ; ./6.out test/
d    0    4096   test/                                    4 /
d    1    4096   test/a                                   6
f    2       0   test/a/1                                 7 1
d    1    4096   test/b                                   6
f    2       0   test/b/1                                 7 1
f    2       0   test/b/2                                 7 2
d    1    4096   test/ddd                                 6 dd
f    2       0   test/ddd/5                               9 5
d    1    4096   test/cc                                  6 c
f    2       0   test/cc/3                                8 3

The final 2 columns are the file name length and path + ftw->base
respectively, which is off by one when the path ends in /. It also
seems to confuse the initial dir name.

I have attached a fix but I'm not sure it's a particularly good one.

[1] http://linux.die.net/man/3/ftw

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: nftw.patch --]
[-- Type: text/x-patch, Size: 563 bytes --]

diff --git a/src/misc/nftw.c b/src/misc/nftw.c
index efb2b89..7c2c0d3 100644
--- a/src/misc/nftw.c
+++ b/src/misc/nftw.c
@@ -108,11 +108,13 @@ int nftw(const char *path, int (*fn)(const char *, const struct stat *, int, str
 	if (fd_limit <= 0) return 0;
 
 	l = strlen(path);
+	while (l && path[l-1]=='/') --l;
 	if (l > PATH_MAX) {
 		errno = ENAMETOOLONG;
 		return -1;
 	}
-	memcpy(pathbuf, path, l+1);
+	memcpy(pathbuf, path, l);
+	pathbuf[l]='\0';
 	
 	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
 	r = do_nftw(pathbuf, fn, fd_limit, flags, NULL);

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: nftw miscalculates FTW.base when pathnames end in /
  2016-05-04 13:42 nftw miscalculates FTW.base when pathnames end in / Joakim Sindholt
@ 2016-05-14  3:01 ` Rich Felker
  2016-07-12  9:41   ` Joakim Sindholt
  0 siblings, 1 reply; 3+ messages in thread
From: Rich Felker @ 2016-05-14  3:01 UTC (permalink / raw)
  To: musl

On Wed, May 04, 2016 at 03:42:19PM +0200, Joakim Sindholt wrote:
> Running the test program from the glibc ftw man page[1]:
> 
> +zhasha@wirbelwind /home/zhasha ; ./6.out test
> d    0    4096   test                                     0 test
> d    1    4096   test/a                                   5 a
> f    2       0   test/a/1                                 7 1
> d    1    4096   test/b                                   5 b
> f    2       0   test/b/1                                 7 1
> f    2       0   test/b/2                                 7 2
> d    1    4096   test/ddd                                 5 ddd
> f    2       0   test/ddd/5                               9 5
> d    1    4096   test/cc                                  5 cc
> f    2       0   test/cc/3                                8 3
> +zhasha@wirbelwind /home/zhasha ; ./6.out test/
> d    0    4096   test/                                    4 /
> d    1    4096   test/a                                   6
> f    2       0   test/a/1                                 7 1
> d    1    4096   test/b                                   6
> f    2       0   test/b/1                                 7 1
> f    2       0   test/b/2                                 7 2
> d    1    4096   test/ddd                                 6 dd
> f    2       0   test/ddd/5                               9 5
> d    1    4096   test/cc                                  6 c
> f    2       0   test/cc/3                                8 3
> 
> The final 2 columns are the file name length and path + ftw->base
> respectively, which is off by one when the path ends in /. It also
> seems to confuse the initial dir name.
> 
> I have attached a fix but I'm not sure it's a particularly good one.
> 
> [1] http://linux.die.net/man/3/ftw

> diff --git a/src/misc/nftw.c b/src/misc/nftw.c
> index efb2b89..7c2c0d3 100644
> --- a/src/misc/nftw.c
> +++ b/src/misc/nftw.c
> @@ -108,11 +108,13 @@ int nftw(const char *path, int (*fn)(const char *, const struct stat *, int, str
>  	if (fd_limit <= 0) return 0;
>  
>  	l = strlen(path);
> +	while (l && path[l-1]=='/') --l;
>  	if (l > PATH_MAX) {
>  		errno = ENAMETOOLONG;
>  		return -1;
>  	}
> -	memcpy(pathbuf, path, l+1);
> +	memcpy(pathbuf, path, l);
> +	pathbuf[l]='\0';
>  	
>  	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
>  	r = do_nftw(pathbuf, fn, fd_limit, flags, NULL);

Thanks for reminding me to look at this today. I've actually read the
code, and while your patch should avoid the problem, I agree it's
probably not the right fix (interpreting and alterring the part of the
path passed by the application). Instead, I think the error is in the
computations of new.base and lev.base.

new.base should be equal to the offset at which de->d_name is pasted
into the buffer, which is j+1, not l+1; these only differ when path
ends in a slash.

lev.base is computed separately depending on whether or not there's a
previous history; for the no-history case, it should be computed by
skipping all trailing slashes, then backing up until the previous
slash or beginning of string, whichever is hit first. But rather than
special-casing !h in do_nftw, the top-level nftw function should
probably set up a fake "-1 level" history structure to pass in. This
gathers the special top-level logic all in one place.

Can you try these ideas and see if they work for you? If not I'll give
them a try soon, but I've got a few other things I need to do first.

Rich


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: nftw miscalculates FTW.base when pathnames end in /
  2016-05-14  3:01 ` Rich Felker
@ 2016-07-12  9:41   ` Joakim Sindholt
  0 siblings, 0 replies; 3+ messages in thread
From: Joakim Sindholt @ 2016-07-12  9:41 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 1584 bytes --]

On Sat, May 14, 2016 at 5:01 AM, Rich Felker <dalias@libc.org> wrote:
> lev.base is computed separately depending on whether or not there's a
> previous history; for the no-history case, it should be computed by
> skipping all trailing slashes, then backing up until the previous
> slash or beginning of string, whichever is hit first. But rather than
> special-casing !h in do_nftw, the top-level nftw function should
> probably set up a fake "-1 level" history structure to pass in. This
> gathers the special top-level logic all in one place.

I had a look at copying the entire path over and adjusting the first
.base accordingly. The problem then became that the root dir would come
out as being named something////// depending on how many slashed I
added. This runs counter to the glibc behavior which also just hacks off
the slashes. In the end I think it's cleaner to just remove trailing
slashes than to have them included in all but the root dir.

As per your recommendation I also set up a fake -1 history level and
removed the special casing for !h, but I'm unsure what default values
should be used for dev and ino.

Then, as we talked about on IRC, I tried to reduce the stack usage of
do_nftw, which was 296 B when all was said and done. By moving a lot of
the context stuff explicitly into nftw it got all the way down to 88 on
clang and 104 on gcc. Depending on a few minor details those two numbers
would switch around. I'm not sure why. In any case it's a third of what
it was and I would consider that a substantial win.
The code isn't exactly beautiful though.



[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2.1: 0002-optimize-nftw-for-stack-usage.patch --]
[-- Type: text/x-patch, Size: 4929 bytes --]

From 5244c0e3fec2af28bf3922b4f0e45b209880f27e Mon Sep 17 00:00:00 2001
From: Joakim Sindholt <opensource@zhasha.com>
Date: Tue, 12 Jul 2016 11:12:24 +0200
Subject: [PATCH 2/2] optimize nftw for stack usage

Before this we would see 296 bytes of stack used per frame on amd64
using clang, and after it's all the way down to 88 bytes per frame. Less
than a third of what it was and much more reasonable.
---
 src/misc/nftw.c | 91 ++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 58 insertions(+), 33 deletions(-)

diff --git a/src/misc/nftw.c b/src/misc/nftw.c
index 7df7226..6057d89 100644
--- a/src/misc/nftw.c
+++ b/src/misc/nftw.c
@@ -8,6 +8,18 @@
 #include <pthread.h>
 #include "libc.h"
 
+struct context
+{
+	char *path;
+	int (*fn)(const char *, const struct stat *, int, struct FTW *);
+	int fd_limit;
+	int flags;
+
+	size_t l;
+	struct stat st;
+	struct FTW lev;
+};
+
 struct history
 {
 	struct history *chain;
@@ -20,68 +32,70 @@ struct history
 #undef dirfd
 #define dirfd(d) (*(int *)d)
 
-static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, struct FTW *), int fd_limit, int flags, struct history *h)
+static int do_nftw(struct context *c, struct history *h)
 {
-	size_t l = strlen(path), j = l && path[l-1]=='/' ? l-1 : l;
-	struct stat st;
 	struct history new;
 	int type;
 	int r;
-	struct FTW lev;
-	char *name;
 
-	if ((flags & FTW_PHYS) ? lstat(path, &st) : stat(path, &st) < 0) {
-		if (!(flags & FTW_PHYS) && errno==ENOENT && !lstat(path, &st))
+	if ((c->flags & FTW_PHYS) ? lstat(c->path, &c->st) : stat(c->path, &c->st) < 0) {
+		if (!(c->flags & FTW_PHYS) && errno==ENOENT && !lstat(c->path, &c->st))
 			type = FTW_SLN;
 		else if (errno != EACCES) return -1;
 		else type = FTW_NS;
-	} else if (S_ISDIR(st.st_mode)) {
-		if (access(path, R_OK) < 0) type = FTW_DNR;
-		else if (flags & FTW_DEPTH) type = FTW_DP;
+	} else if (S_ISDIR(c->st.st_mode)) {
+		if (access(c->path, R_OK) < 0) type = FTW_DNR;
+		else if (c->flags & FTW_DEPTH) type = FTW_DP;
 		else type = FTW_D;
-	} else if (S_ISLNK(st.st_mode)) {
-		if (flags & FTW_PHYS) type = FTW_SL;
+	} else if (S_ISLNK(c->st.st_mode)) {
+		if (c->flags & FTW_PHYS) type = FTW_SL;
 		else type = FTW_SLN;
 	} else {
 		type = FTW_F;
 	}
 
-	if ((flags & FTW_MOUNT) && st.st_dev != h->dev)
+	if ((c->flags & FTW_MOUNT) && c->st.st_dev != h->dev)
 		return 0;
-	
+
 	new.chain = h;
-	new.dev = st.st_dev;
-	new.ino = st.st_ino;
+	new.dev = c->st.st_dev;
+	new.ino = c->st.st_ino;
 	new.level = h->level+1;
-	new.base = j+1;
+	new.base = c->l+1;
 	
-	lev.level = new.level;
-	lev.base = h->base;
-
-	if (!(flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev)))
-		return r;
+	if (!(c->flags & FTW_DEPTH)) {
+		c->lev.level = new.level;
+		c->lev.base = h->base;
+		if ((r=c->fn(c->path, &c->st, type, &c->lev)))
+			return r;
+	}
 
 	for (; h; h = h->chain)
-		if (h->dev == st.st_dev && h->ino == st.st_ino)
+		if (h->dev == c->st.st_dev && h->ino == c->st.st_ino)
 			return 0;
 
-	if ((type == FTW_D || type == FTW_DP) && fd_limit) {
-		DIR *d = opendir(path);
+	if ((type == FTW_D || type == FTW_DP) && c->fd_limit) {
+		DIR *d = opendir(c->path);
 		if (d) {
 			struct dirent *de;
 			while ((de = readdir(d))) {
+				size_t dl;
 				if (de->d_name[0] == '.'
 				 && (!de->d_name[1]
 				  || (de->d_name[1]=='.'
 				   && !de->d_name[2]))) continue;
-				if (strlen(de->d_name) >= PATH_MAX-l) {
+				if ((dl=strlen(de->d_name)) > PATH_MAX-new.base) {
 					errno = ENAMETOOLONG;
 					closedir(d);
 					return -1;
 				}
-				path[j]='/';
-				strcpy(path+j+1, de->d_name);
-				if ((r=do_nftw(path, fn, fd_limit-1, flags, &new))) {
+				c->l = new.base+dl;
+				c->path[new.base-1]='/';
+				memcpy(c->path+new.base, de->d_name, dl+1);
+				--c->fd_limit;
+				r=do_nftw(c, &new);
+				++c->fd_limit;
+				if (r) {
 					closedir(d);
 					return r;
 				}
@@ -91,16 +105,21 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 			return -1;
 		}
 	}
+	c->path[new.base-1]=0;
 
-	path[l] = 0;
-	if ((flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev)))
-		return r;
+	if (c->flags & FTW_DEPTH) {
+		c->lev.level = new.level;
+		c->lev.base = h->base;
+		if ((r=c->fn(c->path, &c->st, type, &c->lev)))
+			return r;
+	}
 
 	return 0;
 }
 
 int nftw(const char *path, int (*fn)(const char *, const struct stat *, int, struct FTW *), int fd_limit, int flags)
 {
+	struct context c;
 	struct history h;
 	int r, cs;
 	size_t l;
@@ -126,8 +145,14 @@ int nftw(const char *path, int (*fn)(const char *, const struct stat *, int, str
 	while (h.base && pathbuf[h.base-1]!='/')
 		--h.base;
 
+	c.path = pathbuf;
+	c.fn = fn;
+	c.fd_limit = fd_limit;
+	c.flags = flags;
+	c.l = l;
+
 	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
-	r = do_nftw(pathbuf, fn, fd_limit, flags, &h);
+	r = do_nftw(&c, &h);
 	pthread_setcancelstate(cs, 0);
 	return r;
 }
-- 
2.7.3


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2.2: 0001-ignore-trailing-slashes-in-nftw-path.patch --]
[-- Type: text/x-patch, Size: 2034 bytes --]

From 9896c027a6b261e09dff68d61993bb60313e9166 Mon Sep 17 00:00:00 2001
From: Joakim Sindholt <opensource@zhasha.com>
Date: Tue, 12 Jul 2016 10:14:34 +0200
Subject: [PATCH 1/2] ignore trailing slashes in nftw path

---
 src/misc/nftw.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/misc/nftw.c b/src/misc/nftw.c
index efb2b89..7df7226 100644
--- a/src/misc/nftw.c
+++ b/src/misc/nftw.c
@@ -46,17 +46,17 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 		type = FTW_F;
 	}
 
-	if ((flags & FTW_MOUNT) && h && st.st_dev != h->dev)
+	if ((flags & FTW_MOUNT) && st.st_dev != h->dev)
 		return 0;
 	
 	new.chain = h;
 	new.dev = st.st_dev;
 	new.ino = st.st_ino;
-	new.level = h ? h->level+1 : 0;
-	new.base = l+1;
+	new.level = h->level+1;
+	new.base = j+1;
 	
 	lev.level = new.level;
-	lev.base = h ? h->base : (name=strrchr(path, '/')) ? name-path : 0;
+	lev.base = h->base;
 
 	if (!(flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev)))
 		return r;
@@ -101,6 +101,7 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 
 int nftw(const char *path, int (*fn)(const char *, const struct stat *, int, struct FTW *), int fd_limit, int flags)
 {
+	struct history h;
 	int r, cs;
 	size_t l;
 	char pathbuf[PATH_MAX+1];
@@ -108,14 +109,25 @@ int nftw(const char *path, int (*fn)(const char *, const struct stat *, int, str
 	if (fd_limit <= 0) return 0;
 
 	l = strlen(path);
+	while (l && path[l-1]=='/')
+		--l;
 	if (l > PATH_MAX) {
 		errno = ENAMETOOLONG;
 		return -1;
 	}
-	memcpy(pathbuf, path, l+1);
+	memcpy(pathbuf, path, l);
+	pathbuf[l] = 0;
 	
+	h.chain = NULL;
+	h.dev = (dev_t)-1;
+	h.ino = (ino_t)-1;
+	h.level = -1;
+	h.base = l;
+	while (h.base && pathbuf[h.base-1]!='/')
+		--h.base;
+
 	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
-	r = do_nftw(pathbuf, fn, fd_limit, flags, NULL);
+	r = do_nftw(pathbuf, fn, fd_limit, flags, &h);
 	pthread_setcancelstate(cs, 0);
 	return r;
 }
-- 
2.7.3


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-07-12  9:41 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-04 13:42 nftw miscalculates FTW.base when pathnames end in / Joakim Sindholt
2016-05-14  3:01 ` Rich Felker
2016-07-12  9:41   ` Joakim Sindholt

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).