mailing list of musl libc
 help / color / mirror / code / Atom feed
From: Rich Felker <dalias@libc.org>
To: musl@lists.openwall.com
Subject: Re: fix nftw when called with paths ending in slash(es)
Date: Wed, 6 Sep 2017 20:08:19 -0400	[thread overview]
Message-ID: <20170907000819.GP1627@brightrain.aerifal.cx> (raw)
In-Reply-To: <1488880058.29564.0@mail.zhasha.com>

[-- Attachment #1: Type: text/plain, Size: 6164 bytes --]

On Tue, Mar 07, 2017 at 10:47:38AM +0100, Joakim Sindholt wrote:
> I sent a version of this patch to the list previously but I believe the
> opinion was that nftw should preserve slashes as best it could, so
> here's one that does that.
> 
> Using the test program from man nftw:
> 
> $ ./a.out ftw
> d    0    4096   ftw                                      0 ftw
> d    1    4096   ftw/a                                    4 a
> f    2       0   ftw/a/f                                  6 f
> d    2    4096   ftw/a/d                                  6 d
> d    3    4096   ftw/a/d/e                                8 e
> d    1    4096   ftw/b                                    4 b
> f    2       0   ftw/b/g                                  6 g
> f    1       0   ftw/c                                    4 c
> $ ./a.out ftw/
> d    0    4096   ftw                                      0 ftw
> d    1    4096   ftw/a                                    4 a
> f    2       0   ftw/a/f                                  6 f
> d    2    4096   ftw/a/d                                  6 d
> d    3    4096   ftw/a/d/e                                8 e
> d    1    4096   ftw/b                                    4 b
> f    2       0   ftw/b/g                                  6 g
> f    1       0   ftw/c                                    4 c
> $ ./a.out ftw//
> d    0    4096   ftw                                      0 ftw
> d    1    4096   ftw//a                                   5 a
> f    2       0   ftw//a/f                                 7 f
> d    2    4096   ftw//a/d                                 7 d
> d    3    4096   ftw//a/d/e                               9 e
> d    1    4096   ftw//b                                   5 b
> f    2       0   ftw//b/g                                 7 g
> f    1       0   ftw//c                                   5 c
> 
> And on the root
> 
> $ ./a.out / | sed 4q
> d    0    4096   /                                        0 /
> d    1    4096   /projects                                1 projects
> d    2    4096   /projects/p9.git                         10 p9.git
> d    3    4096   /projects/p9.git/hooks                   17 hooks
> $ ./a.out // | sed 4q
> d    0    4096   //                                       1 /
> d    1    4096   //projects                               2 projects
> d    2    4096   //projects/p9.git                        11 p9.git
> d    3    4096   //projects/p9.git/hooks                  18 hooks
> 
> However glibc does this:
> 
> $ ./a.out / | sed 4q
> d    0    4096   /                                        1
> d    1    4096   /projects                                1 projects
> d    2    4096   /projects/p9.git                         10 p9.git
> d    3    4096   /projects/p9.git/hooks                   17 hooks
> 
> The standard just says:
> > The value of base is the offset of the object's filename in the
> > pathname passed as the first argument to fn
> 
> Which I believe should be interpreted as the root dir being called /
> but I'm not sure. It's a simple change either way.

> >From 579501476c880bff651ec3ea5d3116ae26c9941e Mon Sep 17 00:00:00 2001
> From: Joakim Sindholt <opensource@zhasha.com>
> Date: Tue, 7 Mar 2017 10:31:37 +0100
> Subject: [PATCH] fix nftw when called with paths ending in slash(es)
> 
> ---
>  src/misc/nftw.c | 24 ++++++++++++++++++++----
>  1 file changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/src/misc/nftw.c b/src/misc/nftw.c
> index efb2b89..d82b909 100644
> --- a/src/misc/nftw.c
> +++ b/src/misc/nftw.c
> @@ -22,13 +22,13 @@ struct history
>  
>  static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, struct FTW *), int fd_limit, int flags, struct history *h)
>  {
> -	size_t l = strlen(path), j = l && path[l-1]=='/' ? l-1 : l;
> +	size_t l = strlen(path), j = l && path[l-1]=='/' ? l-1 : l, k;
>  	struct stat st;
>  	struct history new;
>  	int type;
>  	int r;
>  	struct FTW lev;
> -	char *name;
> +	char *end = 0;
>  
>  	if ((flags & FTW_PHYS) ? lstat(path, &st) : stat(path, &st) < 0) {
>  		if (!(flags & FTW_PHYS) && errno==ENOENT && !lstat(path, &st))
> @@ -53,13 +53,29 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
>  	new.dev = st.st_dev;
>  	new.ino = st.st_ino;
>  	new.level = h ? h->level+1 : 0;
> -	new.base = l+1;
> +	new.base = j+1;
>  	
>  	lev.level = new.level;
> -	lev.base = h ? h->base : (name=strrchr(path, '/')) ? name-path : 0;
> +	if (h) {
> +		lev.base = h->base;
> +	} else {
> +		lev.base = k = j;
> +		while (k > 0 && path[k] == '/')
> +			--k;
> +		if (k > 0) {
> +			end = path+k+1;
> +			while (k > 0 && path[k-1] != '/')
> +				--k;
> +			lev.base = k;
> +		}
> +	}
>  
> +	if (end)
> +		*end = '\0';
>  	if (!(flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev)))
>  		return r;
> +	if (end)
> +		*end = '/';
>  
>  	for (; h; h = h->chain)
>  		if (h->dev == st.st_dev && h->ino == st.st_ino)
> -- 
> 2.10.2
> 

A couple things I noticed finally giving this proper review attention:

1. The logic to remove trailing slashes from the top-level start path
   when reporting to fn() (i.e. *end=0) only runs when FTW_DEPTH is
   clear. The trailing slash(es) are visible when FTW_DEPTH is set
   since fn() is called from a different place in that case.

2. Normalizing the start path by removing all trailing slashes except
   possibly the first one in the case of the root dir results in //
   being normalized to /. This is mostly harmless on Linux, but in
   theory POSIX allows // to be special and distinct from /, and I
   don't think we assume elsewhere in musl that //=/.

What do you think of the attached simplified version of your patch
which just fixes the bug (wrong struct FTW base) but doesn't remove
any trailing slashes from the input path string?

I don't see a reason trailing slashes that match the input are wrong
when reporting the top-level of the tree, but if you think it's better
to suppress them, perhaps we should special-case the root, either
doing what glibc does and reporting a zero-length base name, or always
reporting the full input sequence of slashes.

Rich

[-- Attachment #2: nftw.diff --]
[-- Type: text/plain, Size: 955 bytes --]

diff --git a/src/misc/nftw.c b/src/misc/nftw.c
index efb2b89..eb9014b 100644
--- a/src/misc/nftw.c
+++ b/src/misc/nftw.c
@@ -28,7 +28,6 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 	int type;
 	int r;
 	struct FTW lev;
-	char *name;
 
 	if ((flags & FTW_PHYS) ? lstat(path, &st) : stat(path, &st) < 0) {
 		if (!(flags & FTW_PHYS) && errno==ENOENT && !lstat(path, &st))
@@ -53,10 +52,17 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int,
 	new.dev = st.st_dev;
 	new.ino = st.st_ino;
 	new.level = h ? h->level+1 : 0;
-	new.base = l+1;
+	new.base = j+1;
 	
 	lev.level = new.level;
-	lev.base = h ? h->base : (name=strrchr(path, '/')) ? name-path : 0;
+	if (h) {
+		lev.base = h->base;
+	} else {
+		size_t k;
+		for (k=j; k && path[k]=='/'; k--);
+		for (; k && path[k-1]!='/'; k--);
+		lev.base = k;
+	}
 
 	if (!(flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev)))
 		return r;

      reply	other threads:[~2017-09-07  0:08 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-07  9:47 Joakim Sindholt
2017-09-07  0:08 ` Rich Felker [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170907000819.GP1627@brightrain.aerifal.cx \
    --to=dalias@libc.org \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).