mailing list of musl libc
 help / color / mirror / code / Atom feed
* un-UBify-strings
@ 2018-09-23  0:35 Rich Felker
  2018-09-23  2:11 ` un-UBify-strings Pascal Cuoq
  0 siblings, 1 reply; 9+ messages in thread
From: Rich Felker @ 2018-09-23  0:35 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 413 bytes --]

I've had this patch sitting around since 2016, and just updated it to
apply cleanly. Any objections? Since I killed the stdio UB in this
release cycle I'd like to go ahead and eliminate all the
string-function UB that can be eliminated (there's still aligned read
past end of string that's unfixable without an attribute that
explicitly allows it, or asm; it might turn out that asm would make
sense here).

Rich

[-- Attachment #2: un-UB-strings.diff --]
[-- Type: text/plain, Size: 5088 bytes --]

diff --git a/src/string/memccpy.c b/src/string/memccpy.c
index 7c233d5..5c8b672 100644
--- a/src/string/memccpy.c
+++ b/src/string/memccpy.c
@@ -11,19 +11,21 @@ void *memccpy(void *restrict dest, const void *restrict src, int c, size_t n)
 {
 	unsigned char *d = dest;
 	const unsigned char *s = src;
-	size_t *wd, k;
-	const size_t *ws;
 
 	c = (unsigned char)c;
+#ifdef __GNUC__
+	size_t __attribute__((__may_alias__)) *wd;
+	const size_t __attribute__((__may_alias__)) *ws;
 	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
 		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s)!=c; n--, s++, d++);
 		if ((uintptr_t)s & ALIGN) goto tail;
-		k = ONES * c;
+		size_t k = ONES * c;
 		wd=(void *)d; ws=(const void *)s;
 		for (; n>=sizeof(size_t) && !HASZERO(*ws^k);
 		       n-=sizeof(size_t), ws++, wd++) *wd = *ws;
 		d=(void *)wd; s=(const void *)ws;
 	}
+#endif
 	for (; n && (*d=*s)!=c; n--, s++, d++);
 tail:
 	if (*s==c) return d+1;
diff --git a/src/string/memchr.c b/src/string/memchr.c
index 4daff7b..1038ce6 100644
--- a/src/string/memchr.c
+++ b/src/string/memchr.c
@@ -12,12 +12,14 @@ void *memchr(const void *src, int c, size_t n)
 {
 	const unsigned char *s = src;
 	c = (unsigned char)c;
+
+#ifdef __GNUC__
 	for (; ((uintptr_t)s & ALIGN) && n && *s != c; s++, n--);
-	if (n && *s != c) {
-		const size_t *w;
-		size_t k = ONES * c;
-		for (w = (const void *)s; n>=SS && !HASZERO(*w^k); w++, n-=SS);
-		for (s = (const void *)w; n && *s != c; s++, n--);
-	}
+	const __attribute__((__may_alias__)) size_t *w;
+	size_t k = ONES * c;
+	for (w = (const void *)s; n>=SS && !HASZERO(*w^k); w++, n-=SS);
+	s = (const void *)w;
+#endif
+	for (; n && *s != c; s++, n--);
 	return n ? (void *)s : 0;
 }
diff --git a/src/string/stpcpy.c b/src/string/stpcpy.c
index 54cf9ca..f115d16 100644
--- a/src/string/stpcpy.c
+++ b/src/string/stpcpy.c
@@ -9,9 +9,9 @@
 
 char *__stpcpy(char *restrict d, const char *restrict s)
 {
-	size_t *wd;
-	const size_t *ws;
-
+#ifdef __GNUC__
+	size_t __attribute__((__may_alias__)) *wd;
+	const size_t __attribute__((__may_alias__)) *ws;
 	if ((uintptr_t)s % ALIGN == (uintptr_t)d % ALIGN) {
 		for (; (uintptr_t)s % ALIGN; s++, d++)
 			if (!(*d=*s)) return d;
@@ -19,6 +19,7 @@ char *__stpcpy(char *restrict d, const char *restrict s)
 		for (; !HASZERO(*ws); *wd++ = *ws++);
 		d=(void *)wd; s=(const void *)ws;
 	}
+#endif
 	for (; (*d=*s); s++, d++);
 
 	return d;
diff --git a/src/string/stpncpy.c b/src/string/stpncpy.c
index d6d92ff..099d77c 100644
--- a/src/string/stpncpy.c
+++ b/src/string/stpncpy.c
@@ -9,9 +9,9 @@
 
 char *__stpncpy(char *restrict d, const char *restrict s, size_t n)
 {
-	size_t *wd;
-	const size_t *ws;
-
+#ifdef __GNUC__
+	size_t __attribute__((__may_alias__)) *wd;
+	const size_t __attribute__((__may_alias__)) *ws;
 	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
 		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++);
 		if (!n || !*s) goto tail;
@@ -20,6 +20,7 @@ char *__stpncpy(char *restrict d, const char *restrict s, size_t n)
 		       n-=sizeof(size_t), ws++, wd++) *wd = *ws;
 		d=(void *)wd; s=(const void *)ws;
 	}
+#endif
 	for (; n && (*d=*s); n--, s++, d++);
 tail:
 	memset(d, 0, n);
diff --git a/src/string/strchrnul.c b/src/string/strchrnul.c
index f2b9ae1..6875ae0 100644
--- a/src/string/strchrnul.c
+++ b/src/string/strchrnul.c
@@ -9,16 +9,18 @@
 
 char *__strchrnul(const char *s, int c)
 {
-	size_t *w, k;
-
 	c = (unsigned char)c;
 	if (!c) return (char *)s + strlen(s);
 
+#ifdef __GNUC__
+	size_t __attribute__((__may_alias__)) *w;
 	for (; (uintptr_t)s % ALIGN; s++)
 		if (!*s || *(unsigned char *)s == c) return (char *)s;
-	k = ONES * c;
+	size_t k = ONES * c;
 	for (w = (void *)s; !HASZERO(*w) && !HASZERO(*w^k); w++);
-	for (s = (void *)w; *s && *(unsigned char *)s != c; s++);
+	s = (void *)w;
+#endif
+	for (; *s && *(unsigned char *)s != c; s++);
 	return (char *)s;
 }
 
diff --git a/src/string/strlcpy.c b/src/string/strlcpy.c
index dcb22f6..a76b7b2 100644
--- a/src/string/strlcpy.c
+++ b/src/string/strlcpy.c
@@ -12,9 +12,10 @@ size_t strlcpy(char *d, const char *s, size_t n)
 {
 	char *d0 = d;
 	size_t *wd;
-	const size_t *ws;
 
 	if (!n--) goto finish;
+#ifdef __GNUC__
+	const __attribute__((__may_alias__)) size_t *ws;
 	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
 		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++);
 		if (n && *s) {
@@ -24,6 +25,7 @@ size_t strlcpy(char *d, const char *s, size_t n)
 			d=(void *)wd; s=(const void *)ws;
 		}
 	}
+#endif
 	for (; n && (*d=*s); n--, s++, d++);
 	*d = 0;
 finish:
diff --git a/src/string/strlen.c b/src/string/strlen.c
index 929ddcb..27b6d37 100644
--- a/src/string/strlen.c
+++ b/src/string/strlen.c
@@ -10,9 +10,12 @@
 size_t strlen(const char *s)
 {
 	const char *a = s;
-	const size_t *w;
+#ifdef __GNUC__
+	const __attribute__((__may_alias__)) size_t *w;
 	for (; (uintptr_t)s % ALIGN; s++) if (!*s) return s-a;
 	for (w = (const void *)s; !HASZERO(*w); w++);
-	for (s = (const void *)w; *s; s++);
+	s = (const void *)w;
+#endif
+	for (; *s; s++);
 	return s-a;
 }

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: un-UBify-strings
  2018-09-23  0:35 un-UBify-strings Rich Felker
@ 2018-09-23  2:11 ` Pascal Cuoq
  2018-09-23  2:32   ` un-UBify-strings Rich Felker
  0 siblings, 1 reply; 9+ messages in thread
From: Pascal Cuoq @ 2018-09-23  2:11 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 1784 bytes --]

Hello Rich,

On 23 Sep 2018, at 02:35, Rich Felker <dalias@libc.org<mailto:dalias@libc.org>> wrote:

I've had this patch sitting around since 2016, and just updated it to
apply cleanly. Any objections?

Your patch contains:

...
size_t __attribute__((__may_alias__)) *wd;
const size_t __attribute__((__may_alias__)) *ws;
...

In my experience, this use of __may_alias__ does not do anything. See function f in the example below, which both GCC and Clang optimize as if the programmer had not used __may_alias__ at all: https://gcc.godbolt.org/z/Um4NU7

You should use a typdef for the aliasing type, as shown for function g (in with GCC and Clang do not apply the optimization).

The example in GCC's documentation for __may_alias__ also uses a typedef: https://gcc.gnu.org/onlinedocs/gcc-4.0.4/gcc/Type-Attributes.html

Pascal

__________________________
#include<string.h>

doubleX,Y;

voidf(void*d, constvoid*s) {
size_t __attribute__((__may_alias__)) *wd;
wd = d;
X = 1.0;
*wd = 1;
Y = X;
}

typedefsize_t __attribute__((__may_alias__)) aliasing_word;
voidg(void*d, constvoid*s) {
aliasing_word *wd;
wd = d;
X = 1.0;
*wd = 1;
Y = X;
}

Clang 6.0.0 -O3 -fomit-frame-pointer:
f:# @f
movabsq$4607182418800017408, %rax# imm = 0x3FF0000000000000
movq%rax, X(%rip)
movq$1, (%rdi)
movq%rax, Y(%rip)
retq
g:# @g
movabsq$4607182418800017408, %rax# imm = 0x3FF0000000000000
movq%rax, X(%rip)
movq$1, (%rdi)
movqX(%rip), %rax
movq%rax, Y(%rip)
retq

GCC 8.2 -O3 -fomit-frame-pointer:
f:
movsd.LC0(%rip), %xmm0
movsd%xmm0, X(%rip)
movq$1, (%rdi)
movsd%xmm0, Y(%rip)
ret
g:
movq.LC0(%rip), %rax
movq%rax, X(%rip)
movq$1, (%rdi)
movsdX(%rip), %xmm0
movsd%xmm0, Y(%rip)
ret
.LC0:
.long0
.long1072693248









[-- Attachment #2: Type: text/html, Size: 10152 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: un-UBify-strings
  2018-09-23  2:11 ` un-UBify-strings Pascal Cuoq
@ 2018-09-23  2:32   ` Rich Felker
  2018-09-23  2:45     ` un-UBify-strings Rich Felker
  0 siblings, 1 reply; 9+ messages in thread
From: Rich Felker @ 2018-09-23  2:32 UTC (permalink / raw)
  To: musl

On Sun, Sep 23, 2018 at 02:11:42AM +0000, Pascal Cuoq wrote:
> Hello Rich,
> 
> On 23 Sep 2018, at 02:35, Rich Felker <dalias@libc.org<mailto:dalias@libc.org>> wrote:
> 
> I've had this patch sitting around since 2016, and just updated it to
> apply cleanly. Any objections?
> 
> Your patch contains:
> 
> ....
> size_t __attribute__((__may_alias__)) *wd;
> const size_t __attribute__((__may_alias__)) *ws;
> ....
> 
> In my experience, this use of __may_alias__ does not do anything.
> See function f in the example below, which both GCC and Clang
> optimize as if the programmer had not used __may_alias__ at all:
> https://gcc.godbolt.org/z/Um4NU7
> 
> You should use a typdef for the aliasing type, as shown for function
> g (in with GCC and Clang do not apply the optimization).
> 
> The example in GCC's documentation for __may_alias__ also uses a
> typedef:
> https://gcc.gnu.org/onlinedocs/gcc-4.0.4/gcc/Type-Attributes.html

Thanks, this is very helpful. I'll prepare an updated version.

Rich


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: un-UBify-strings
  2018-09-23  2:32   ` un-UBify-strings Rich Felker
@ 2018-09-23  2:45     ` Rich Felker
  2018-09-23  3:10       ` un-UBify-strings Pascal Cuoq
  0 siblings, 1 reply; 9+ messages in thread
From: Rich Felker @ 2018-09-23  2:45 UTC (permalink / raw)
  To: musl

On Sat, Sep 22, 2018 at 10:32:34PM -0400, Rich Felker wrote:
> On Sun, Sep 23, 2018 at 02:11:42AM +0000, Pascal Cuoq wrote:
> > Hello Rich,
> > 
> > On 23 Sep 2018, at 02:35, Rich Felker <dalias@libc.org<mailto:dalias@libc.org>> wrote:
> > 
> > I've had this patch sitting around since 2016, and just updated it to
> > apply cleanly. Any objections?
> > 
> > Your patch contains:
> > 
> > ....
> > size_t __attribute__((__may_alias__)) *wd;
> > const size_t __attribute__((__may_alias__)) *ws;
> > ....
> > 
> > In my experience, this use of __may_alias__ does not do anything.
> > See function f in the example below, which both GCC and Clang
> > optimize as if the programmer had not used __may_alias__ at all:
> > https://gcc.godbolt.org/z/Um4NU7
> > 
> > You should use a typdef for the aliasing type, as shown for function
> > g (in with GCC and Clang do not apply the optimization).
> > 
> > The example in GCC's documentation for __may_alias__ also uses a
> > typedef:
> > https://gcc.gnu.org/onlinedocs/gcc-4.0.4/gcc/Type-Attributes.html
> 
> Thanks, this is very helpful. I'll prepare an updated version.

While I've got your attention, I'm also trying to fix the UB in
address range checks for implementing memmove as memcpy, etc. Is this
correct:

	if ((uintptr_t)s-(uintptr_t)d-n <= -2*n) return memcpy(d, s, n);

?

Rich


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: un-UBify-strings
  2018-09-23  2:45     ` un-UBify-strings Rich Felker
@ 2018-09-23  3:10       ` Pascal Cuoq
  2018-09-23  3:15         ` un-UBify-strings Rich Felker
  0 siblings, 1 reply; 9+ messages in thread
From: Pascal Cuoq @ 2018-09-23  3:10 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 972 bytes --]


On 23 Sep 2018, at 04:45, Rich Felker <dalias@libc.org<mailto:dalias@libc.org>> wrote:
I'm also trying to fix the UB in
address range checks for implementing memmove as memcpy, etc. Is this
correct:

if ((uintptr_t)s-(uintptr_t)d-n <= -2*n) return memcpy(d, s, n);

?

It looks okay to me. You want to test whether (uintptr_t)s-(uintptr_t)d, computed as a mathematical integer, is between -n and n, and since uintptr_t is unsigned, you are using the well-known trick of aligning one of the bounds with 0 so that both inequalities can be tested in one instruction.

It would seen more natural to me to work on the right-hand side of zero, that it, to compute (uintptr_t)s-(uintptr_t)d+n and to check whether that is <= 2*n (overlap) or > 2*n (no overlap). The generated code may even be one instruction shorter. Apart from that, as long as we have the hypothesis that n <= UINTPTR_MAX/2, I cannot immediately see any reason why it would not work.

Pascal


[-- Attachment #2: Type: text/html, Size: 5904 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: un-UBify-strings
  2018-09-23  3:10       ` un-UBify-strings Pascal Cuoq
@ 2018-09-23  3:15         ` Rich Felker
  2018-09-23  3:44           ` un-UBify-strings Pascal Cuoq
  2018-09-23  3:45           ` un-UBify-strings Rich Felker
  0 siblings, 2 replies; 9+ messages in thread
From: Rich Felker @ 2018-09-23  3:15 UTC (permalink / raw)
  To: musl

On Sun, Sep 23, 2018 at 03:10:14AM +0000, Pascal Cuoq wrote:
> 
> On 23 Sep 2018, at 04:45, Rich Felker <dalias@libc.org<mailto:dalias@libc.org>> wrote:
> I'm also trying to fix the UB in
> address range checks for implementing memmove as memcpy, etc. Is this
> correct:
> 
> if ((uintptr_t)s-(uintptr_t)d-n <= -2*n) return memcpy(d, s, n);
> 
> ?
> 
> It looks okay to me. You want to test whether
> (uintptr_t)s-(uintptr_t)d, computed as a mathematical integer, is
> between -n and n, and since uintptr_t is unsigned, you are using the
> well-known trick of aligning one of the bounds with 0 so that both
> inequalities can be tested in one instruction.

Right.

> It would seen more natural to me to work on the right-hand side of
> zero, that it, to compute (uintptr_t)s-(uintptr_t)d+n and to check
> whether that is <= 2*n (overlap) or > 2*n (no overlap). The
> generated code may even be one instruction shorter. Apart from that,
> as long as we have the hypothesis that n <= UINTPTR_MAX/2, I cannot
> immediately see any reason why it would not work.

dist(s,d)==n is a no-overlap case. Otherwise I think this is correct
and we can use:

	if ((uintptr_t)s-(uintptr_t)d+n >= 2*n) return memcpy(d, s, n);

Yes?

Rich


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: un-UBify-strings
  2018-09-23  3:15         ` un-UBify-strings Rich Felker
@ 2018-09-23  3:44           ` Pascal Cuoq
  2018-09-23  4:02             ` un-UBify-strings Rich Felker
  2018-09-23  3:45           ` un-UBify-strings Rich Felker
  1 sibling, 1 reply; 9+ messages in thread
From: Pascal Cuoq @ 2018-09-23  3:44 UTC (permalink / raw)
  To: musl


> On 23 Sep 2018, at 05:15, Rich Felker <dalias@libc.org> wrote:
> 
> dist(s,d)==n is a no-overlap case.

In this case the formula I proposed has the drawback of rejecting the case where (uintptr_t)s-(uintptr_t)d is exactly -n. This case may be the justification for the way the original comparison was expressed:

> (uintptr_t)s-(uintptr_t)d-n <= -2*n

(uintptr_t)s-(uintptr_t)d = -n   ==> comparison true by LHS and RHS being equal

(uintptr_t)s-(uintptr_t)d = n    ==> comparison true by LHS being zero

(uintptr_t)s-(uintptr_t)d > -n and (uintptr_t)s-(uintptr_t)d < n  ==> comparison false



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: un-UBify-strings
  2018-09-23  3:15         ` un-UBify-strings Rich Felker
  2018-09-23  3:44           ` un-UBify-strings Pascal Cuoq
@ 2018-09-23  3:45           ` Rich Felker
  1 sibling, 0 replies; 9+ messages in thread
From: Rich Felker @ 2018-09-23  3:45 UTC (permalink / raw)
  To: musl

On Sat, Sep 22, 2018 at 11:15:02PM -0400, Rich Felker wrote:
> On Sun, Sep 23, 2018 at 03:10:14AM +0000, Pascal Cuoq wrote:
> > 
> > On 23 Sep 2018, at 04:45, Rich Felker <dalias@libc.org<mailto:dalias@libc.org>> wrote:
> > I'm also trying to fix the UB in
> > address range checks for implementing memmove as memcpy, etc. Is this
> > correct:
> > 
> > if ((uintptr_t)s-(uintptr_t)d-n <= -2*n) return memcpy(d, s, n);
> > 
> > ?
> > 
> > It looks okay to me. You want to test whether
> > (uintptr_t)s-(uintptr_t)d, computed as a mathematical integer, is
> > between -n and n, and since uintptr_t is unsigned, you are using the
> > well-known trick of aligning one of the bounds with 0 so that both
> > inequalities can be tested in one instruction.
> 
> Right.
> 
> > It would seen more natural to me to work on the right-hand side of
> > zero, that it, to compute (uintptr_t)s-(uintptr_t)d+n and to check
> > whether that is <= 2*n (overlap) or > 2*n (no overlap). The
> > generated code may even be one instruction shorter. Apart from that,
> > as long as we have the hypothesis that n <= UINTPTR_MAX/2, I cannot
> > immediately see any reason why it would not work.
> 
> dist(s,d)==n is a no-overlap case. Otherwise I think this is correct
> and we can use:
> 
> 	if ((uintptr_t)s-(uintptr_t)d+n >= 2*n) return memcpy(d, s, n);
> 
> Yes?

BTW just below there's a conditional if (d<s) that, as far as I can
tell, does not need any fixing. If we reach that point (if we don't
just call memcpy for the non-overlapping case) then, assuming n is
valid, d and s necessarily point into the same array, and therefore
d<s is well-defined.

Rich


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: un-UBify-strings
  2018-09-23  3:44           ` un-UBify-strings Pascal Cuoq
@ 2018-09-23  4:02             ` Rich Felker
  0 siblings, 0 replies; 9+ messages in thread
From: Rich Felker @ 2018-09-23  4:02 UTC (permalink / raw)
  To: musl

On Sun, Sep 23, 2018 at 03:44:46AM +0000, Pascal Cuoq wrote:
> 
> > On 23 Sep 2018, at 05:15, Rich Felker <dalias@libc.org> wrote:
> > 
> > dist(s,d)==n is a no-overlap case.
> 
> In this case the formula I proposed has the drawback of rejecting
> the case where (uintptr_t)s-(uintptr_t)d is exactly -n. This case
> may be the justification for the way the original comparison was
> expressed:

I think that's fixable just by subtracting 1 on both sides, but then
it's probably less efficient rather than more efficient.

> > (uintptr_t)s-(uintptr_t)d-n <= -2*n
> 
> (uintptr_t)s-(uintptr_t)d = -n   ==> comparison true by LHS and RHS being equal
> 
> (uintptr_t)s-(uintptr_t)d = n    ==> comparison true by LHS being zero
> 
> (uintptr_t)s-(uintptr_t)d > -n and (uintptr_t)s-(uintptr_t)d < n  ==> comparison false

Yes. It's interesting that the choice of using left or right of 0
depends on whether you want > or >=.

Rich


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-09-23  4:02 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-23  0:35 un-UBify-strings Rich Felker
2018-09-23  2:11 ` un-UBify-strings Pascal Cuoq
2018-09-23  2:32   ` un-UBify-strings Rich Felker
2018-09-23  2:45     ` un-UBify-strings Rich Felker
2018-09-23  3:10       ` un-UBify-strings Pascal Cuoq
2018-09-23  3:15         ` un-UBify-strings Rich Felker
2018-09-23  3:44           ` un-UBify-strings Pascal Cuoq
2018-09-23  4:02             ` un-UBify-strings Rich Felker
2018-09-23  3:45           ` un-UBify-strings Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).