mailing list of musl libc
 help / color / mirror / code / Atom feed
* [PATCH] un-UBify string functions
@ 2016-04-27 22:01 Rich Felker
  0 siblings, 0 replies; only message in thread
From: Rich Felker @ 2016-04-27 22:01 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 971 bytes --]

The attached patch is a first draft of an attempt to get rid of UB in
src/string/* by using the may_alias attribute correctly conditional on
__GNUC__. I've tried to structure it so that it's obvious there are no
semantic changes in the __GNUC__ case (memchr.c had some slight
structural changes, but the condition removed was already implied by
the for loop conditions), which unfortunately leaves everything pretty
ugly and with inconsistent style. We should probably pick some
canonical, well-styled files, write the fully-general (byte-match &&
size-limit end conditions) versions of both search and copy, and then
either write all the simpler versions in matching style, or write a
pair of common templates that optimize-down to individual functions
like strlen, stpcpy, etc.

At least memmove needs to be done separately (it doesn't fit the
pattern of the others), and there might be other omissions too. But
this is at least a good starting point for review.

Rich

[-- Attachment #2: un-UB-strings.diff --]
[-- Type: text/plain, Size: 5514 bytes --]

diff --git a/src/string/memccpy.c b/src/string/memccpy.c
index 7c233d5..5c8b672 100644
--- a/src/string/memccpy.c
+++ b/src/string/memccpy.c
@@ -11,19 +11,21 @@ void *memccpy(void *restrict dest, const void *restrict src, int c, size_t n)
 {
 	unsigned char *d = dest;
 	const unsigned char *s = src;
-	size_t *wd, k;
-	const size_t *ws;
 
 	c = (unsigned char)c;
+#ifdef __GNUC__
+	size_t __attribute__((__may_alias__)) *wd;
+	const size_t __attribute__((__may_alias__)) *ws;
 	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
 		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s)!=c; n--, s++, d++);
 		if ((uintptr_t)s & ALIGN) goto tail;
-		k = ONES * c;
+		size_t k = ONES * c;
 		wd=(void *)d; ws=(const void *)s;
 		for (; n>=sizeof(size_t) && !HASZERO(*ws^k);
 		       n-=sizeof(size_t), ws++, wd++) *wd = *ws;
 		d=(void *)wd; s=(const void *)ws;
 	}
+#endif
 	for (; n && (*d=*s)!=c; n--, s++, d++);
 tail:
 	if (*s==c) return d+1;
diff --git a/src/string/memchr.c b/src/string/memchr.c
index 4daff7b..1038ce6 100644
--- a/src/string/memchr.c
+++ b/src/string/memchr.c
@@ -12,12 +12,14 @@ void *memchr(const void *src, int c, size_t n)
 {
 	const unsigned char *s = src;
 	c = (unsigned char)c;
+
+#ifdef __GNUC__
 	for (; ((uintptr_t)s & ALIGN) && n && *s != c; s++, n--);
-	if (n && *s != c) {
-		const size_t *w;
-		size_t k = ONES * c;
-		for (w = (const void *)s; n>=SS && !HASZERO(*w^k); w++, n-=SS);
-		for (s = (const void *)w; n && *s != c; s++, n--);
-	}
+	const __attribute__((__may_alias__)) size_t *w;
+	size_t k = ONES * c;
+	for (w = (const void *)s; n>=SS && !HASZERO(*w^k); w++, n-=SS);
+	s = (const void *)w;
+#endif
+	for (; n && *s != c; s++, n--);
 	return n ? (void *)s : 0;
 }
diff --git a/src/string/stpcpy.c b/src/string/stpcpy.c
index 06623c4..3670826 100644
--- a/src/string/stpcpy.c
+++ b/src/string/stpcpy.c
@@ -10,9 +10,9 @@
 
 char *__stpcpy(char *restrict d, const char *restrict s)
 {
-	size_t *wd;
-	const size_t *ws;
-
+#ifdef __GNUC__
+	size_t __attribute__((__may_alias__)) *wd;
+	const size_t __attribute__((__may_alias__)) *ws;
 	if ((uintptr_t)s % ALIGN == (uintptr_t)d % ALIGN) {
 		for (; (uintptr_t)s % ALIGN; s++, d++)
 			if (!(*d=*s)) return d;
@@ -20,6 +20,7 @@ char *__stpcpy(char *restrict d, const char *restrict s)
 		for (; !HASZERO(*ws); *wd++ = *ws++);
 		d=(void *)wd; s=(const void *)ws;
 	}
+#endif
 	for (; (*d=*s); s++, d++);
 
 	return d;
diff --git a/src/string/stpncpy.c b/src/string/stpncpy.c
index 1f57a4d..1f53ae8 100644
--- a/src/string/stpncpy.c
+++ b/src/string/stpncpy.c
@@ -10,9 +10,9 @@
 
 char *__stpncpy(char *restrict d, const char *restrict s, size_t n)
 {
-	size_t *wd;
-	const size_t *ws;
-
+#ifdef __GNUC__
+	size_t __attribute__((__may_alias__)) *wd;
+	const size_t __attribute__((__may_alias__)) *ws;
 	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
 		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++);
 		if (!n || !*s) goto tail;
@@ -21,6 +21,7 @@ char *__stpncpy(char *restrict d, const char *restrict s, size_t n)
 		       n-=sizeof(size_t), ws++, wd++) *wd = *ws;
 		d=(void *)wd; s=(const void *)ws;
 	}
+#endif
 	for (; n && (*d=*s); n--, s++, d++);
 tail:
 	memset(d, 0, n);
diff --git a/src/string/strchrnul.c b/src/string/strchrnul.c
index 05700ad..b8e3b54 100644
--- a/src/string/strchrnul.c
+++ b/src/string/strchrnul.c
@@ -10,16 +10,18 @@
 
 char *__strchrnul(const char *s, int c)
 {
-	size_t *w, k;
-
 	c = (unsigned char)c;
 	if (!c) return (char *)s + strlen(s);
 
+#ifdef __GNUC__
+	size_t __attribute__((__may_alias__)) *w;
 	for (; (uintptr_t)s % ALIGN; s++)
 		if (!*s || *(unsigned char *)s == c) return (char *)s;
-	k = ONES * c;
+	size_t k = ONES * c;
 	for (w = (void *)s; !HASZERO(*w) && !HASZERO(*w^k); w++);
-	for (s = (void *)w; *s && *(unsigned char *)s != c; s++);
+	s = (void *)w;
+#endif
+	for (; *s && *(unsigned char *)s != c; s++);
 	return (char *)s;
 }
 
diff --git a/src/string/strcpy.c b/src/string/strcpy.c
index f7e3ba3..2883e93 100644
--- a/src/string/strcpy.c
+++ b/src/string/strcpy.c
@@ -4,13 +4,6 @@ char *__stpcpy(char *, const char *);
 
 char *strcpy(char *restrict dest, const char *restrict src)
 {
-#if 1
 	__stpcpy(dest, src);
 	return dest;
-#else
-	const unsigned char *s = src;
-	unsigned char *d = dest;
-	while ((*d++ = *s++));
-	return dest;
-#endif
 }
diff --git a/src/string/strlcpy.c b/src/string/strlcpy.c
index 193d724..0a27b35 100644
--- a/src/string/strlcpy.c
+++ b/src/string/strlcpy.c
@@ -13,9 +13,10 @@ size_t strlcpy(char *d, const char *s, size_t n)
 {
 	char *d0 = d;
 	size_t *wd;
-	const size_t *ws;
 
 	if (!n--) goto finish;
+#ifdef __GNUC__
+	const __attribute__((__may_alias__)) size_t *ws;
 	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
 		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++);
 		if (n && *s) {
@@ -25,6 +26,7 @@ size_t strlcpy(char *d, const char *s, size_t n)
 			d=(void *)wd; s=(const void *)ws;
 		}
 	}
+#endif
 	for (; n && (*d=*s); n--, s++, d++);
 	*d = 0;
 finish:
diff --git a/src/string/strlen.c b/src/string/strlen.c
index 929ddcb..27b6d37 100644
--- a/src/string/strlen.c
+++ b/src/string/strlen.c
@@ -10,9 +10,12 @@
 size_t strlen(const char *s)
 {
 	const char *a = s;
-	const size_t *w;
+#ifdef __GNUC__
+	const __attribute__((__may_alias__)) size_t *w;
 	for (; (uintptr_t)s % ALIGN; s++) if (!*s) return s-a;
 	for (w = (const void *)s; !HASZERO(*w); w++);
-	for (s = (const void *)w; *s; s++);
+	s = (const void *)w;
+#endif
+	for (; *s; s++);
 	return s-a;
 }

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2016-04-27 22:01 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-27 22:01 [PATCH] un-UBify string functions Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).