mailing list of musl libc
 help / color / mirror / code / Atom feed
* fix nftw when called with paths ending in slash(es)
@ 2017-03-07  9:47 Joakim Sindholt
  2017-09-07  0:08 ` Rich Felker
  0 siblings, 1 reply; 2+ messages in thread
From: Joakim Sindholt @ 2017-03-07  9:47 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 2910 bytes --]

I sent a version of this patch to the list previously but I believe the
opinion was that nftw should preserve slashes as best it could, so
here's one that does that.

Using the test program from man nftw:

$ ./a.out ftw
d    0    4096   ftw                                      0 ftw
d    1    4096   ftw/a                                    4 a
f    2       0   ftw/a/f                                  6 f
d    2    4096   ftw/a/d                                  6 d
d    3    4096   ftw/a/d/e                                8 e
d    1    4096   ftw/b                                    4 b
f    2       0   ftw/b/g                                  6 g
f    1       0   ftw/c                                    4 c
$ ./a.out ftw/
d    0    4096   ftw                                      0 ftw
d    1    4096   ftw/a                                    4 a
f    2       0   ftw/a/f                                  6 f
d    2    4096   ftw/a/d                                  6 d
d    3    4096   ftw/a/d/e                                8 e
d    1    4096   ftw/b                                    4 b
f    2       0   ftw/b/g                                  6 g
f    1       0   ftw/c                                    4 c
$ ./a.out ftw//
d    0    4096   ftw                                      0 ftw
d    1    4096   ftw//a                                   5 a
f    2       0   ftw//a/f                                 7 f
d    2    4096   ftw//a/d                                 7 d
d    3    4096   ftw//a/d/e                               9 e
d    1    4096   ftw//b                                   5 b
f    2       0   ftw//b/g                                 7 g
f    1       0   ftw//c                                   5 c

And on the root

$ ./a.out / | sed 4q
d    0    4096   /                                        0 /
d    1    4096   /projects                                1 projects
d    2    4096   /projects/p9.git                         10 p9.git
d    3    4096   /projects/p9.git/hooks                   17 hooks
$ ./a.out // | sed 4q
d    0    4096   //                                       1 /
d    1    4096   //projects                               2 projects
d    2    4096   //projects/p9.git                        11 p9.git
d    3    4096   //projects/p9.git/hooks                  18 hooks

However glibc does this:

$ ./a.out / | sed 4q
d    0    4096   /                                        1
d    1    4096   /projects                                1 projects
d    2    4096   /projects/p9.git                         10 p9.git
d    3    4096   /projects/p9.git/hooks                   17 hooks

The standard just says:
 > The value of base is the offset of the object's filename in the
 > pathname passed as the first argument to fn

Which I believe should be interpreted as the root dir being called /
but I'm not sure. It's a simple change either way.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-fix-nftw-when-called-with-paths-ending-in-slash-es.patch --]
[-- Type: text/x-patch, Size: 1758 bytes --]

From 579501476c880bff651ec3ea5d3116ae26c9941e Mon Sep 17 00:00:00 2001
From: Joakim Sindholt <opensource@zhasha.com>
Date: Tue, 7 Mar 2017 10:31:37 +0100
Subject: [PATCH] fix nftw when called with paths ending in slash(es)

---
 src/misc/nftw.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/misc/nftw.c b/src/misc/nftw.c
index efb2b89..d82b909 100644
--- a/src/misc/nftw.c
+++ b/src/misc/nftw.c
@@ -22,13 +22,13 @@ struct history
 
 static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, struct FTW *), int fd_limit, int flags, struct history *h)
 {
-	size_t l = strlen(path), j = l && path[l-1]=='/' ? l-1 : l;
+	size_t l = strlen(path), j = l && path[l-1]=='/' ? l-1 : l, k;
 	struct stat st;
 	struct history new;
 	int type;
 	int r;
 	struct FTW lev;
-	char *name;
+	char *end = 0;
 
 	if ((flags & FTW_PHYS) ? lstat(path, &st) : stat(path, &st) < 0) {
 		if (!(flags & FTW_PHYS) && errno==ENOENT && !lstat(path, &st))
@@ -53,13 +53,29 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 	new.dev = st.st_dev;
 	new.ino = st.st_ino;
 	new.level = h ? h->level+1 : 0;
-	new.base = l+1;
+	new.base = j+1;
 	
 	lev.level = new.level;
-	lev.base = h ? h->base : (name=strrchr(path, '/')) ? name-path : 0;
+	if (h) {
+		lev.base = h->base;
+	} else {
+		lev.base = k = j;
+		while (k > 0 && path[k] == '/')
+			--k;
+		if (k > 0) {
+			end = path+k+1;
+			while (k > 0 && path[k-1] != '/')
+				--k;
+			lev.base = k;
+		}
+	}
 
+	if (end)
+		*end = '\0';
 	if (!(flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev)))
 		return r;
+	if (end)
+		*end = '/';
 
 	for (; h; h = h->chain)
 		if (h->dev == st.st_dev && h->ino == st.st_ino)
-- 
2.10.2


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: fix nftw when called with paths ending in slash(es)
  2017-03-07  9:47 fix nftw when called with paths ending in slash(es) Joakim Sindholt
@ 2017-09-07  0:08 ` Rich Felker
  0 siblings, 0 replies; 2+ messages in thread
From: Rich Felker @ 2017-09-07  0:08 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 6164 bytes --]

On Tue, Mar 07, 2017 at 10:47:38AM +0100, Joakim Sindholt wrote:
> I sent a version of this patch to the list previously but I believe the
> opinion was that nftw should preserve slashes as best it could, so
> here's one that does that.
> 
> Using the test program from man nftw:
> 
> $ ./a.out ftw
> d    0    4096   ftw                                      0 ftw
> d    1    4096   ftw/a                                    4 a
> f    2       0   ftw/a/f                                  6 f
> d    2    4096   ftw/a/d                                  6 d
> d    3    4096   ftw/a/d/e                                8 e
> d    1    4096   ftw/b                                    4 b
> f    2       0   ftw/b/g                                  6 g
> f    1       0   ftw/c                                    4 c
> $ ./a.out ftw/
> d    0    4096   ftw                                      0 ftw
> d    1    4096   ftw/a                                    4 a
> f    2       0   ftw/a/f                                  6 f
> d    2    4096   ftw/a/d                                  6 d
> d    3    4096   ftw/a/d/e                                8 e
> d    1    4096   ftw/b                                    4 b
> f    2       0   ftw/b/g                                  6 g
> f    1       0   ftw/c                                    4 c
> $ ./a.out ftw//
> d    0    4096   ftw                                      0 ftw
> d    1    4096   ftw//a                                   5 a
> f    2       0   ftw//a/f                                 7 f
> d    2    4096   ftw//a/d                                 7 d
> d    3    4096   ftw//a/d/e                               9 e
> d    1    4096   ftw//b                                   5 b
> f    2       0   ftw//b/g                                 7 g
> f    1       0   ftw//c                                   5 c
> 
> And on the root
> 
> $ ./a.out / | sed 4q
> d    0    4096   /                                        0 /
> d    1    4096   /projects                                1 projects
> d    2    4096   /projects/p9.git                         10 p9.git
> d    3    4096   /projects/p9.git/hooks                   17 hooks
> $ ./a.out // | sed 4q
> d    0    4096   //                                       1 /
> d    1    4096   //projects                               2 projects
> d    2    4096   //projects/p9.git                        11 p9.git
> d    3    4096   //projects/p9.git/hooks                  18 hooks
> 
> However glibc does this:
> 
> $ ./a.out / | sed 4q
> d    0    4096   /                                        1
> d    1    4096   /projects                                1 projects
> d    2    4096   /projects/p9.git                         10 p9.git
> d    3    4096   /projects/p9.git/hooks                   17 hooks
> 
> The standard just says:
> > The value of base is the offset of the object's filename in the
> > pathname passed as the first argument to fn
> 
> Which I believe should be interpreted as the root dir being called /
> but I'm not sure. It's a simple change either way.

> >From 579501476c880bff651ec3ea5d3116ae26c9941e Mon Sep 17 00:00:00 2001
> From: Joakim Sindholt <opensource@zhasha.com>
> Date: Tue, 7 Mar 2017 10:31:37 +0100
> Subject: [PATCH] fix nftw when called with paths ending in slash(es)
> 
> ---
>  src/misc/nftw.c | 24 ++++++++++++++++++++----
>  1 file changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/src/misc/nftw.c b/src/misc/nftw.c
> index efb2b89..d82b909 100644
> --- a/src/misc/nftw.c
> +++ b/src/misc/nftw.c
> @@ -22,13 +22,13 @@ struct history
>  
>  static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, struct FTW *), int fd_limit, int flags, struct history *h)
>  {
> -	size_t l = strlen(path), j = l && path[l-1]=='/' ? l-1 : l;
> +	size_t l = strlen(path), j = l && path[l-1]=='/' ? l-1 : l, k;
>  	struct stat st;
>  	struct history new;
>  	int type;
>  	int r;
>  	struct FTW lev;
> -	char *name;
> +	char *end = 0;
>  
>  	if ((flags & FTW_PHYS) ? lstat(path, &st) : stat(path, &st) < 0) {
>  		if (!(flags & FTW_PHYS) && errno==ENOENT && !lstat(path, &st))
> @@ -53,13 +53,29 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
>  	new.dev = st.st_dev;
>  	new.ino = st.st_ino;
>  	new.level = h ? h->level+1 : 0;
> -	new.base = l+1;
> +	new.base = j+1;
>  	
>  	lev.level = new.level;
> -	lev.base = h ? h->base : (name=strrchr(path, '/')) ? name-path : 0;
> +	if (h) {
> +		lev.base = h->base;
> +	} else {
> +		lev.base = k = j;
> +		while (k > 0 && path[k] == '/')
> +			--k;
> +		if (k > 0) {
> +			end = path+k+1;
> +			while (k > 0 && path[k-1] != '/')
> +				--k;
> +			lev.base = k;
> +		}
> +	}
>  
> +	if (end)
> +		*end = '\0';
>  	if (!(flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev)))
>  		return r;
> +	if (end)
> +		*end = '/';
>  
>  	for (; h; h = h->chain)
>  		if (h->dev == st.st_dev && h->ino == st.st_ino)
> -- 
> 2.10.2
> 

A couple things I noticed finally giving this proper review attention:

1. The logic to remove trailing slashes from the top-level start path
   when reporting to fn() (i.e. *end=0) only runs when FTW_DEPTH is
   clear. The trailing slash(es) are visible when FTW_DEPTH is set
   since fn() is called from a different place in that case.

2. Normalizing the start path by removing all trailing slashes except
   possibly the first one in the case of the root dir results in //
   being normalized to /. This is mostly harmless on Linux, but in
   theory POSIX allows // to be special and distinct from /, and I
   don't think we assume elsewhere in musl that //=/.

What do you think of the attached simplified version of your patch
which just fixes the bug (wrong struct FTW base) but doesn't remove
any trailing slashes from the input path string?

I don't see a reason trailing slashes that match the input are wrong
when reporting the top-level of the tree, but if you think it's better
to suppress them, perhaps we should special-case the root, either
doing what glibc does and reporting a zero-length base name, or always
reporting the full input sequence of slashes.

Rich

[-- Attachment #2: nftw.diff --]
[-- Type: text/plain, Size: 955 bytes --]

diff --git a/src/misc/nftw.c b/src/misc/nftw.c
index efb2b89..eb9014b 100644
--- a/src/misc/nftw.c
+++ b/src/misc/nftw.c
@@ -28,7 +28,6 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 	int type;
 	int r;
 	struct FTW lev;
-	char *name;
 
 	if ((flags & FTW_PHYS) ? lstat(path, &st) : stat(path, &st) < 0) {
 		if (!(flags & FTW_PHYS) && errno==ENOENT && !lstat(path, &st))
@@ -53,10 +52,17 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 	new.dev = st.st_dev;
 	new.ino = st.st_ino;
 	new.level = h ? h->level+1 : 0;
-	new.base = l+1;
+	new.base = j+1;
 	
 	lev.level = new.level;
-	lev.base = h ? h->base : (name=strrchr(path, '/')) ? name-path : 0;
+	if (h) {
+		lev.base = h->base;
+	} else {
+		size_t k;
+		for (k=j; k && path[k]=='/'; k--);
+		for (; k && path[k-1]!='/'; k--);
+		lev.base = k;
+	}
 
 	if (!(flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev)))
 		return r;

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-09-07  0:08 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-07  9:47 fix nftw when called with paths ending in slash(es) Joakim Sindholt
2017-09-07  0:08 ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).