mailing list of musl libc
 help / color / mirror / code / Atom feed
* [RFC PATCH] micro-optimize __procfdname
@ 2016-02-21 11:41 Alexander Monakov
  2016-03-05  5:24 ` Rich Felker
  0 siblings, 1 reply; 9+ messages in thread
From: Alexander Monakov @ 2016-02-21 11:41 UTC (permalink / raw)
  To: musl

Hello,

I've noticed that internal function __procfdname can be slightly cleaned up by
filling the supplied buffer right-to-left and returning the last filled
position.  The patch below implements what I have in mind, and changes one
call site to demonstrate (I'll be happy to submit a patch that converts all
calls, if overall this change is desirable).

diff --git a/src/internal/procfdname.c b/src/internal/procfdname.c
index 697e0bd..cfb3f90 100644
--- a/src/internal/procfdname.c
+++ b/src/internal/procfdname.c
@@ -1,13 +1,9 @@
-void __procfdname(char *buf, unsigned fd)
+char *__procfdname_impl(char *buf, unsigned fd)
 {
-	unsigned i, j;
-	for (i=0; (buf[i] = "/proc/self/fd/"[i]); i++);
-	if (!fd) {
-		buf[i] = '0';
-		buf[i+1] = 0;
-		return;
-	}
-	for (j=fd; j; j/=10, i++);
-	buf[i] = 0;
-	for (; fd; fd/=10) buf[--i] = '0' + fd%10;
+	*buf = 0;
+	do *--buf = '0' + fd % 10;
+	while (fd /= 10);
+	for (int i = 13; i >= 0; i--)
+		*--buf = "/proc/self/fd/"[i];
+	return buf;
 }
diff --git a/src/internal/procfdname.h b/src/internal/procfdname.h
index e69de29..6d3c6e2 100644
--- a/src/internal/procfdname.h
+++ b/src/internal/procfdname.h
@@ -0,0 +1,9 @@
+#ifndef PROCFDNAME_H
+#define PROCFDNAME_H
+
+char *__procfdname_impl(char *, unsigned);
+
+#define procfdbufsize sizeof "/proc/self/fd/0123456789" + (3 * (sizeof(int)-4))
+#define procfdname(buf, fd) __procfdname_impl(buf + procfdbufsize - 1, fd)
+
+#endif
diff --git a/src/process/fexecve.c b/src/process/fexecve.c
index 6507b42..88e6b9d 100644
--- a/src/process/fexecve.c
+++ b/src/process/fexecve.c
@@ -1,13 +1,11 @@
 #include <unistd.h>
 #include <errno.h>
-
-void __procfdname(char *, unsigned);
+#include "procfdname.h"
 
 int fexecve(int fd, char *const argv[], char *const envp[])
 {
-	char buf[15 + 3*sizeof(int)];
-	__procfdname(buf, fd);
-	execve(buf, argv, envp);
+	char buf[procfdbufsize];
+	execve(procfdname(buf, fd), argv, envp);
 	if (errno == ENOENT) errno = EBADF;
 	return -1;
 }


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] micro-optimize __procfdname
  2016-02-21 11:41 [RFC PATCH] micro-optimize __procfdname Alexander Monakov
@ 2016-03-05  5:24 ` Rich Felker
  2016-03-05  5:42   ` Alexander Monakov
  0 siblings, 1 reply; 9+ messages in thread
From: Rich Felker @ 2016-03-05  5:24 UTC (permalink / raw)
  To: musl

On Sun, Feb 21, 2016 at 02:41:21PM +0300, Alexander Monakov wrote:
> Hello,
> 
> I've noticed that internal function __procfdname can be slightly cleaned up by
> filling the supplied buffer right-to-left and returning the last filled
> position.  The patch below implements what I have in mind, and changes one
> call site to demonstrate (I'll be happy to submit a patch that converts all
> calls, if overall this change is desirable).

I really doubt this makes any major improvement, but it might help
size a bit and it might be cleaner/more readable, so it's interesting.

> diff --git a/src/internal/procfdname.c b/src/internal/procfdname.c
> index 697e0bd..cfb3f90 100644
> --- a/src/internal/procfdname.c
> +++ b/src/internal/procfdname.c
> @@ -1,13 +1,9 @@
> -void __procfdname(char *buf, unsigned fd)
> +char *__procfdname_impl(char *buf, unsigned fd)
>  {
> -	unsigned i, j;
> -	for (i=0; (buf[i] = "/proc/self/fd/"[i]); i++);
> -	if (!fd) {
> -		buf[i] = '0';
> -		buf[i+1] = 0;
> -		return;
> -	}
> -	for (j=fd; j; j/=10, i++);
> -	buf[i] = 0;
> -	for (; fd; fd/=10) buf[--i] = '0' + fd%10;
> +	*buf = 0;
> +	do *--buf = '0' + fd % 10;
> +	while (fd /= 10);
> +	for (int i = 13; i >= 0; i--)
> +		*--buf = "/proc/self/fd/"[i];
> +	return buf;
>  }
> diff --git a/src/internal/procfdname.h b/src/internal/procfdname.h
> index e69de29..6d3c6e2 100644
> --- a/src/internal/procfdname.h
> +++ b/src/internal/procfdname.h
> @@ -0,0 +1,9 @@
> +#ifndef PROCFDNAME_H
> +#define PROCFDNAME_H
> +
> +char *__procfdname_impl(char *, unsigned);
> +
> +#define procfdbufsize sizeof "/proc/self/fd/0123456789" + (3 * (sizeof(int)-4))

What is the motivation behind changing the size expression to use the
"012...9" part? It's nonobvious to me.

> +#define procfdname(buf, fd) __procfdname_impl(buf + procfdbufsize - 1, fd)

I suppose the idea of putting the offset to the end in a macro in the
header rather than in the callee is both optimization and allowing the
compiler to detect out-of-bounds pointer arithmetic?

> +
> +#endif
> diff --git a/src/process/fexecve.c b/src/process/fexecve.c
> index 6507b42..88e6b9d 100644
> --- a/src/process/fexecve.c
> +++ b/src/process/fexecve.c
> @@ -1,13 +1,11 @@
>  #include <unistd.h>
>  #include <errno.h>
> -
> -void __procfdname(char *, unsigned);
> +#include "procfdname.h"
>  
>  int fexecve(int fd, char *const argv[], char *const envp[])
>  {
> -	char buf[15 + 3*sizeof(int)];
> -	__procfdname(buf, fd);
> -	execve(buf, argv, envp);
> +	char buf[procfdbufsize];
> +	execve(procfdname(buf, fd), argv, envp);
>  	if (errno == ENOENT) errno = EBADF;
>  	return -1;
>  }

Here using the return value directly is nice but at some other call
points might we need to introduce a pointer variable to store the
pointer returned? I haven't checked yet.

Rich


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] micro-optimize __procfdname
  2016-03-05  5:24 ` Rich Felker
@ 2016-03-05  5:42   ` Alexander Monakov
  2016-03-05  5:56     ` Rich Felker
  0 siblings, 1 reply; 9+ messages in thread
From: Alexander Monakov @ 2016-03-05  5:42 UTC (permalink / raw)
  To: musl

On Sat, 5 Mar 2016, Rich Felker wrote:
> I really doubt this makes any major improvement, but it might help
> size a bit and it might be cleaner/more readable, so it's interesting.

Yeah, this precedes a syscall so speed-wise it doesn't matter; I just
noticed two div-10 loops and saw a chance to improve size.

> > +char *__procfdname_impl(char *, unsigned);
> > +
> > +#define procfdbufsize sizeof "/proc/self/fd/0123456789" + (3 * (sizeof(int)-4))
> 
> What is the motivation behind changing the size expression to use the
> "012...9" part? It's nonobvious to me.

It just makes it obvious that there are 10 decimal places, which is how much a
32-bit unsigned int can occupy at most. I don't mind using any other style.

> > +#define procfdname(buf, fd) __procfdname_impl(buf + procfdbufsize - 1, fd)
> 
> I suppose the idea of putting the offset to the end in a macro in the
> header rather than in the callee is both optimization and allowing the
> compiler to detect out-of-bounds pointer arithmetic?

Hm, the latter is rather theoretical given the uses, right? I just made it to
make it really obvious that __procfdname_impl fills in reverse; it might be a
very minor size optimization. I don't mind dropping this add adjusting buf
with '+= procfdbufsize - 1' in the callee.

> Here using the return value directly is nice but at some other call
> points might we need to introduce a pointer variable to store the
> pointer returned? I haven't checked yet.

Yes, I went through the call sites and they are all easy to adjust; I think a
couple needed a pointer, like you said.

Alexander


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] micro-optimize __procfdname
  2016-03-05  5:42   ` Alexander Monakov
@ 2016-03-05  5:56     ` Rich Felker
  2016-03-05  6:14       ` Alexander Monakov
  0 siblings, 1 reply; 9+ messages in thread
From: Rich Felker @ 2016-03-05  5:56 UTC (permalink / raw)
  To: musl

On Sat, Mar 05, 2016 at 08:42:16AM +0300, Alexander Monakov wrote:
> On Sat, 5 Mar 2016, Rich Felker wrote:
> > I really doubt this makes any major improvement, but it might help
> > size a bit and it might be cleaner/more readable, so it's interesting.
> 
> Yeah, this precedes a syscall so speed-wise it doesn't matter; I just
> noticed two div-10 loops and saw a chance to improve size.

*nod*

> > > +char *__procfdname_impl(char *, unsigned);
> > > +
> > > +#define procfdbufsize sizeof "/proc/self/fd/0123456789" + (3 * (sizeof(int)-4))
> > 
> > What is the motivation behind changing the size expression to use the
> > "012...9" part? It's nonobvious to me.
> 
> It just makes it obvious that there are 10 decimal places, which is how much a
> 32-bit unsigned int can occupy at most. I don't mind using any other style.

I tend to like 3*sizeof(int) just because it's an idiom I know
(pessimistic bound as if each byte could hold 0...999 range rather
than just 0...255) but your version is slightly sharper.

> > > +#define procfdname(buf, fd) __procfdname_impl(buf + procfdbufsize - 1, fd)
> > 
> > I suppose the idea of putting the offset to the end in a macro in the
> > header rather than in the callee is both optimization and allowing the
> > compiler to detect out-of-bounds pointer arithmetic?
> 
> Hm, the latter is rather theoretical given the uses, right? I just made it to

I meant I thought the compiler might be able to catch if a callee
accidentally used the wrong buffer size. Shouldn't happen anyway, but
it'd be nice to have an extra layer of verification.

> make it really obvious that __procfdname_impl fills in reverse; it might be a
> very minor size optimization. I don't mind dropping this add adjusting buf
> with '+= procfdbufsize - 1' in the callee.

Yes, making it obvious what's going on is nice too.

Actually it would be even nicer if we could use a compound literal
inside the macro as the buffer, but that would pessimize with
unnecessary initialization and eliminate a lot of the code-size
benefit, I think.

> > Here using the return value directly is nice but at some other call
> > points might we need to introduce a pointer variable to store the
> > pointer returned? I haven't checked yet.
> 
> Yes, I went through the call sites and they are all easy to adjust; I think a
> couple needed a pointer, like you said.

OK.

Rich


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] micro-optimize __procfdname
  2016-03-05  5:56     ` Rich Felker
@ 2016-03-05  6:14       ` Alexander Monakov
  2016-03-05  6:20         ` Rich Felker
  0 siblings, 1 reply; 9+ messages in thread
From: Alexander Monakov @ 2016-03-05  6:14 UTC (permalink / raw)
  To: musl

On Sat, 5 Mar 2016, Rich Felker wrote:
> > make it really obvious that __procfdname_impl fills in reverse; it might be a
> > very minor size optimization. I don't mind dropping this add adjusting buf
> > with '+= procfdbufsize - 1' in the callee.
> 
> Yes, making it obvious what's going on is nice too.

I'm going to keep that adjustment in the macro for now, then.

> Actually it would be even nicer if we could use a compound literal
> inside the macro as the buffer, but that would pessimize with
> unnecessary initialization and eliminate a lot of the code-size
> benefit, I think.

Yep, I did consider that and arrived to a similar conclusion. Well, there's an
option of using alloca as long as no use is in a loop, but that's a bit uglier,
and as I recall it wasn't optimized to a static stack allocation.

I forgot to ask before, shouldn't __procfdname_impl have a visibility
annotation?

And likewise for other internal functions.  There are some internal functions
without hidden/internal visibility annotation, visible outside of libc.so.
That seems unintended and slightly harmful.

Alexander


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] micro-optimize __procfdname
  2016-03-05  6:14       ` Alexander Monakov
@ 2016-03-05  6:20         ` Rich Felker
  2016-03-05  6:35           ` Alexander Monakov
  2016-03-05 15:13           ` Alexander Monakov
  0 siblings, 2 replies; 9+ messages in thread
From: Rich Felker @ 2016-03-05  6:20 UTC (permalink / raw)
  To: musl

On Sat, Mar 05, 2016 at 09:14:57AM +0300, Alexander Monakov wrote:
> On Sat, 5 Mar 2016, Rich Felker wrote:
> > > make it really obvious that __procfdname_impl fills in reverse; it might be a
> > > very minor size optimization. I don't mind dropping this add adjusting buf
> > > with '+= procfdbufsize - 1' in the callee.
> > 
> > Yes, making it obvious what's going on is nice too.
> 
> I'm going to keep that adjustment in the macro for now, then.

OK.

> > Actually it would be even nicer if we could use a compound literal
> > inside the macro as the buffer, but that would pessimize with
> > unnecessary initialization and eliminate a lot of the code-size
> > benefit, I think.
> 
> Yep, I did consider that and arrived to a similar conclusion. Well, there's an
> option of using alloca as long as no use is in a loop, but that's a bit uglier,
> and as I recall it wasn't optimized to a static stack allocation.

Yeah, alloca is a lot uglier, an extra extension we don't currently
use, and not something I would want to add.

> I forgot to ask before, shouldn't __procfdname_impl have a visibility
> annotation?
> 
> And likewise for other internal functions.  There are some internal functions
> without hidden/internal visibility annotation, visible outside of libc.so.
> That seems unintended and slightly harmful.

Yes, I'd go with only _slightly_ harmful because visibility does not
help with static linking. But of course in the static case you don't
have to worry about ABI-compat with future libc versions.

I've thought about having libc.h define a macro simply named "hidden"
for declaring things with hidden visibility; then files needing it
could just include libc.h and do stuff like:

hidden char *__procfdname_impl(...);

This would be more a matter of de-uglification than abstraction. Does
anyone particularly like or hate this idea?

Rich


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] micro-optimize __procfdname
  2016-03-05  6:20         ` Rich Felker
@ 2016-03-05  6:35           ` Alexander Monakov
  2016-03-05 15:13           ` Alexander Monakov
  1 sibling, 0 replies; 9+ messages in thread
From: Alexander Monakov @ 2016-03-05  6:35 UTC (permalink / raw)
  To: musl

On Sat, 5 Mar 2016, Rich Felker wrote:
> I've thought about having libc.h define a macro simply named "hidden"
> for declaring things with hidden visibility; then files needing it
> could just include libc.h and do stuff like:
> 
> hidden char *__procfdname_impl(...);
> 
> This would be more a matter of de-uglification than abstraction. Does
> anyone particularly like or hate this idea?

I like it, with a further note that 'internal' would be more precise: a libc
does not hand off pointers to its internal functions to callers.

(well, 'internal' might be subtly broken in a toolchain due to zero real-world
use, but that's unlikely; it's probably handled exactly like 'hidden', and
does not help optimizers today)

Alexander


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] micro-optimize __procfdname
  2016-03-05  6:20         ` Rich Felker
  2016-03-05  6:35           ` Alexander Monakov
@ 2016-03-05 15:13           ` Alexander Monakov
  2016-03-05 17:30             ` Alexander Monakov
  1 sibling, 1 reply; 9+ messages in thread
From: Alexander Monakov @ 2016-03-05 15:13 UTC (permalink / raw)
  To: musl

On Sat, 5 Mar 2016, Rich Felker wrote:
> > > Actually it would be even nicer if we could use a compound literal
> > > inside the macro as the buffer, but that would pessimize with
> > > unnecessary initialization and eliminate a lot of the code-size
> > > benefit, I think.
> > 
> > Yep, I did consider that and arrived to a similar conclusion. Well, there's an
> > option of using alloca as long as no use is in a loop, but that's a bit uglier,
> > and as I recall it wasn't optimized to a static stack allocation.
> 
> Yeah, alloca is a lot uglier, an extra extension we don't currently
> use, and not something I would want to add.

There's an option of returning a struct containing both a buffer and a
pointer, but it's a bit worse code-size-wise and may be too magic:

struct procfdname_ret {
	char *ptr, buf[procfdsize];
} __procfdname_impl(unsigned fd);

#define procfdname(fd) __procfdname_impl(fd).ptr

... and in __procfdname_impl assign a pointer to last filled position in
retval.buf to retval.ptr.

OTOH this allows further cleanup of call sites. WDYT?

Alexander


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] micro-optimize __procfdname
  2016-03-05 15:13           ` Alexander Monakov
@ 2016-03-05 17:30             ` Alexander Monakov
  0 siblings, 0 replies; 9+ messages in thread
From: Alexander Monakov @ 2016-03-05 17:30 UTC (permalink / raw)
  To: musl

On Sat, 5 Mar 2016, Alexander Monakov wrote:
> There's an option of returning a struct containing both a buffer and a
> pointer, but it's a bit worse code-size-wise and may be too magic:
> 
> struct procfdname_ret {
> 	char *ptr, buf[procfdsize];
> } __procfdname_impl(unsigned fd);
> 
> #define procfdname(fd) __procfdname_impl(fd).ptr
> 
> ... and in __procfdname_impl assign a pointer to last filled position in
> retval.buf to retval.ptr.

Rich explained on IRC that it's not correct (thanks!), and I don't see a way
to unbreak it. Sorry for the noise.

Alexander


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-03-05 17:30 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-21 11:41 [RFC PATCH] micro-optimize __procfdname Alexander Monakov
2016-03-05  5:24 ` Rich Felker
2016-03-05  5:42   ` Alexander Monakov
2016-03-05  5:56     ` Rich Felker
2016-03-05  6:14       ` Alexander Monakov
2016-03-05  6:20         ` Rich Felker
2016-03-05  6:35           ` Alexander Monakov
2016-03-05 15:13           ` Alexander Monakov
2016-03-05 17:30             ` Alexander Monakov

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).