From mboxrd@z Thu Jan 1 00:00:00 1970 X-Msuck: nntp://news.gmane.org/gmane.linux.lib.musl.general/9961 Path: news.gmane.org!not-for-mail From: Rich Felker Newsgroups: gmane.linux.lib.musl.general Subject: [PATCH] un-UBify string functions Date: Wed, 27 Apr 2016 18:01:05 -0400 Message-ID: <20160427220105.GA12289@brightrain.aerifal.cx> Reply-To: musl@lists.openwall.com NNTP-Posting-Host: plane.gmane.org Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="VbJkn9YxBvnuCH5J" X-Trace: ger.gmane.org 1461794493 25169 80.91.229.3 (27 Apr 2016 22:01:33 GMT) X-Complaints-To: usenet@ger.gmane.org NNTP-Posting-Date: Wed, 27 Apr 2016 22:01:33 +0000 (UTC) To: musl@lists.openwall.com Original-X-From: musl-return-9974-gllmg-musl=m.gmane.org@lists.openwall.com Thu Apr 28 00:01:25 2016 Return-path: Envelope-to: gllmg-musl@m.gmane.org Original-Received: from mother.openwall.net ([195.42.179.200]) by plane.gmane.org with smtp (Exim 4.69) (envelope-from ) id 1avXWO-0004oN-Vg for gllmg-musl@m.gmane.org; Thu, 28 Apr 2016 00:01:25 +0200 Original-Received: (qmail 24388 invoked by uid 550); 27 Apr 2016 22:01:22 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Original-Received: (qmail 24356 invoked from network); 27 Apr 2016 22:01:17 -0000 Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) Original-Sender: Rich Felker Xref: news.gmane.org gmane.linux.lib.musl.general:9961 Archived-At: --VbJkn9YxBvnuCH5J Content-Type: text/plain; charset=us-ascii Content-Disposition: inline The attached patch is a first draft of an attempt to get rid of UB in src/string/* by using the may_alias attribute correctly conditional on __GNUC__. I've tried to structure it so that it's obvious there are no semantic changes in the __GNUC__ case (memchr.c had some slight structural changes, but the condition removed was already implied by the for loop conditions), which unfortunately leaves everything pretty ugly and with inconsistent style. We should probably pick some canonical, well-styled files, write the fully-general (byte-match && size-limit end conditions) versions of both search and copy, and then either write all the simpler versions in matching style, or write a pair of common templates that optimize-down to individual functions like strlen, stpcpy, etc. At least memmove needs to be done separately (it doesn't fit the pattern of the others), and there might be other omissions too. But this is at least a good starting point for review. Rich --VbJkn9YxBvnuCH5J Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="un-UB-strings.diff" diff --git a/src/string/memccpy.c b/src/string/memccpy.c index 7c233d5..5c8b672 100644 --- a/src/string/memccpy.c +++ b/src/string/memccpy.c @@ -11,19 +11,21 @@ void *memccpy(void *restrict dest, const void *restrict src, int c, size_t n) { unsigned char *d = dest; const unsigned char *s = src; - size_t *wd, k; - const size_t *ws; c = (unsigned char)c; +#ifdef __GNUC__ + size_t __attribute__((__may_alias__)) *wd; + const size_t __attribute__((__may_alias__)) *ws; if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) { for (; ((uintptr_t)s & ALIGN) && n && (*d=*s)!=c; n--, s++, d++); if ((uintptr_t)s & ALIGN) goto tail; - k = ONES * c; + size_t k = ONES * c; wd=(void *)d; ws=(const void *)s; for (; n>=sizeof(size_t) && !HASZERO(*ws^k); n-=sizeof(size_t), ws++, wd++) *wd = *ws; d=(void *)wd; s=(const void *)ws; } +#endif for (; n && (*d=*s)!=c; n--, s++, d++); tail: if (*s==c) return d+1; diff --git a/src/string/memchr.c b/src/string/memchr.c index 4daff7b..1038ce6 100644 --- a/src/string/memchr.c +++ b/src/string/memchr.c @@ -12,12 +12,14 @@ void *memchr(const void *src, int c, size_t n) { const unsigned char *s = src; c = (unsigned char)c; + +#ifdef __GNUC__ for (; ((uintptr_t)s & ALIGN) && n && *s != c; s++, n--); - if (n && *s != c) { - const size_t *w; - size_t k = ONES * c; - for (w = (const void *)s; n>=SS && !HASZERO(*w^k); w++, n-=SS); - for (s = (const void *)w; n && *s != c; s++, n--); - } + const __attribute__((__may_alias__)) size_t *w; + size_t k = ONES * c; + for (w = (const void *)s; n>=SS && !HASZERO(*w^k); w++, n-=SS); + s = (const void *)w; +#endif + for (; n && *s != c; s++, n--); return n ? (void *)s : 0; } diff --git a/src/string/stpcpy.c b/src/string/stpcpy.c index 06623c4..3670826 100644 --- a/src/string/stpcpy.c +++ b/src/string/stpcpy.c @@ -10,9 +10,9 @@ char *__stpcpy(char *restrict d, const char *restrict s) { - size_t *wd; - const size_t *ws; - +#ifdef __GNUC__ + size_t __attribute__((__may_alias__)) *wd; + const size_t __attribute__((__may_alias__)) *ws; if ((uintptr_t)s % ALIGN == (uintptr_t)d % ALIGN) { for (; (uintptr_t)s % ALIGN; s++, d++) if (!(*d=*s)) return d; @@ -20,6 +20,7 @@ char *__stpcpy(char *restrict d, const char *restrict s) for (; !HASZERO(*ws); *wd++ = *ws++); d=(void *)wd; s=(const void *)ws; } +#endif for (; (*d=*s); s++, d++); return d; diff --git a/src/string/stpncpy.c b/src/string/stpncpy.c index 1f57a4d..1f53ae8 100644 --- a/src/string/stpncpy.c +++ b/src/string/stpncpy.c @@ -10,9 +10,9 @@ char *__stpncpy(char *restrict d, const char *restrict s, size_t n) { - size_t *wd; - const size_t *ws; - +#ifdef __GNUC__ + size_t __attribute__((__may_alias__)) *wd; + const size_t __attribute__((__may_alias__)) *ws; if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) { for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++); if (!n || !*s) goto tail; @@ -21,6 +21,7 @@ char *__stpncpy(char *restrict d, const char *restrict s, size_t n) n-=sizeof(size_t), ws++, wd++) *wd = *ws; d=(void *)wd; s=(const void *)ws; } +#endif for (; n && (*d=*s); n--, s++, d++); tail: memset(d, 0, n); diff --git a/src/string/strchrnul.c b/src/string/strchrnul.c index 05700ad..b8e3b54 100644 --- a/src/string/strchrnul.c +++ b/src/string/strchrnul.c @@ -10,16 +10,18 @@ char *__strchrnul(const char *s, int c) { - size_t *w, k; - c = (unsigned char)c; if (!c) return (char *)s + strlen(s); +#ifdef __GNUC__ + size_t __attribute__((__may_alias__)) *w; for (; (uintptr_t)s % ALIGN; s++) if (!*s || *(unsigned char *)s == c) return (char *)s; - k = ONES * c; + size_t k = ONES * c; for (w = (void *)s; !HASZERO(*w) && !HASZERO(*w^k); w++); - for (s = (void *)w; *s && *(unsigned char *)s != c; s++); + s = (void *)w; +#endif + for (; *s && *(unsigned char *)s != c; s++); return (char *)s; } diff --git a/src/string/strcpy.c b/src/string/strcpy.c index f7e3ba3..2883e93 100644 --- a/src/string/strcpy.c +++ b/src/string/strcpy.c @@ -4,13 +4,6 @@ char *__stpcpy(char *, const char *); char *strcpy(char *restrict dest, const char *restrict src) { -#if 1 __stpcpy(dest, src); return dest; -#else - const unsigned char *s = src; - unsigned char *d = dest; - while ((*d++ = *s++)); - return dest; -#endif } diff --git a/src/string/strlcpy.c b/src/string/strlcpy.c index 193d724..0a27b35 100644 --- a/src/string/strlcpy.c +++ b/src/string/strlcpy.c @@ -13,9 +13,10 @@ size_t strlcpy(char *d, const char *s, size_t n) { char *d0 = d; size_t *wd; - const size_t *ws; if (!n--) goto finish; +#ifdef __GNUC__ + const __attribute__((__may_alias__)) size_t *ws; if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) { for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++); if (n && *s) { @@ -25,6 +26,7 @@ size_t strlcpy(char *d, const char *s, size_t n) d=(void *)wd; s=(const void *)ws; } } +#endif for (; n && (*d=*s); n--, s++, d++); *d = 0; finish: diff --git a/src/string/strlen.c b/src/string/strlen.c index 929ddcb..27b6d37 100644 --- a/src/string/strlen.c +++ b/src/string/strlen.c @@ -10,9 +10,12 @@ size_t strlen(const char *s) { const char *a = s; - const size_t *w; +#ifdef __GNUC__ + const __attribute__((__may_alias__)) size_t *w; for (; (uintptr_t)s % ALIGN; s++) if (!*s) return s-a; for (w = (const void *)s; !HASZERO(*w); w++); - for (s = (const void *)w; *s; s++); + s = (const void *)w; +#endif + for (; *s; s++); return s-a; } --VbJkn9YxBvnuCH5J--