mailing list of musl libc
 help / color / mirror / code / Atom feed
* [RFC PATCH 0/5] Add explicit_bzero, vectorize and 'normalize' various string functions
@ 2017-07-15 19:55 Nathan McSween
  2017-07-15 19:55 ` [RFC PATCH 1/5] string: vectorize various functions Nathan McSween
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Nathan McSween @ 2017-07-15 19:55 UTC (permalink / raw)
  To: musl; +Cc: Nathan McSween

I have boot tested this with no issues.

Interesting or questionable parts include:
* The return after alignment.
* str[n]casecmp wordwise.
* the new hidden strscpy and memsset function.
* memsset instead of making a standalone explicit_bzero.
* using macros for various gcc attributes, etc.

Nathan McSween (5):
  string: vectorize various functions
  string: modify wordwise functions to match new style
  string: add strscpy and modify functions to use strscpy
  string: use strchrnul in strcasestr instead of bytewise iteration
  string: add memsset a 'secure' memset and bsd explicit_bzero

 src/string/explicit_bzero.c |  9 ++++++++
 src/string/memccpy.c        | 51 ++++++++++++++++++++++++---------------------
 src/string/memchr.c         | 37 ++++++++++++++++++--------------
 src/string/memcmp.c         | 28 +++++++++++++++++++++----
 src/string/memrchr.c        | 29 ++++++++++++++++++++------
 src/string/memsset.c        | 13 ++++++++++++
 src/string/stpcpy.c         | 37 +++++++++++++++++---------------
 src/string/stpncpy.c        | 32 +++++++++-------------------
 src/string/strcasecmp.c     | 34 ++++++++++++++++++++++++++----
 src/string/strcasestr.c     | 12 ++++++++---
 src/string/strchrnul.c      | 33 ++++++++++++++++-------------
 src/string/strcmp.c         | 27 +++++++++++++++++++++---
 src/string/strlcpy.c        | 27 +++---------------------
 src/string/strlen.c         | 27 ++++++++++++++----------
 src/string/strncasecmp.c    | 37 ++++++++++++++++++++++++++++----
 src/string/strncat.c        | 10 ++++-----
 src/string/strncmp.c        | 28 +++++++++++++++++++++++--
 src/string/strscpy.c        | 37 ++++++++++++++++++++++++++++++++
 18 files changed, 348 insertions(+), 160 deletions(-)
 create mode 100644 src/string/explicit_bzero.c
 create mode 100644 src/string/memsset.c
 create mode 100644 src/string/strscpy.c

-- 
2.13.2



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC PATCH 1/5] string: vectorize various functions
  2017-07-15 19:55 [RFC PATCH 0/5] Add explicit_bzero, vectorize and 'normalize' various string functions Nathan McSween
@ 2017-07-15 19:55 ` Nathan McSween
  2017-07-15 19:55 ` [RFC PATCH 2/5] string: modify wordwise functions to match new style Nathan McSween
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Nathan McSween @ 2017-07-15 19:55 UTC (permalink / raw)
  To: musl; +Cc: Nathan McSween

Text sizes with gcc version 6.3.0
Before | After
 49  |   153  memcmp.lo
 38  |   294  memrchr.lo
120  |   376  strcasecmp.lo
 96  |   305  strncmp.lo
155  |   454  strncasecmp.lo
 96  |   305  strncmp.lo
861  |  1887  (TOTALS)

The size increase is mainly due to vectorizing but also to both the
__may_alias__ attribute and the conditional before the alignment loop for
functions that take a size argument.

The str[n]casecmp macros are of particular interest and work via tagging the
high bit for any character in the range of A-Z and rhs said high bit two to give
a-z.
---
 src/string/memcmp.c      | 28 ++++++++++++++++++++++++----
 src/string/memrchr.c     | 29 +++++++++++++++++++++++------
 src/string/strcasecmp.c  | 34 ++++++++++++++++++++++++++++++----
 src/string/strcmp.c      | 27 ++++++++++++++++++++++++---
 src/string/strncasecmp.c | 37 +++++++++++++++++++++++++++++++++----
 src/string/strncmp.c     | 28 ++++++++++++++++++++++++++--
 6 files changed, 160 insertions(+), 23 deletions(-)

diff --git a/src/string/memcmp.c b/src/string/memcmp.c
index bdbce9f0..7d2e8077 100644
--- a/src/string/memcmp.c
+++ b/src/string/memcmp.c
@@ -1,8 +1,28 @@
 #include <string.h>
+#include <stdint.h>
 
-int memcmp(const void *vl, const void *vr, size_t n)
+#define aliases __attribute__((__may_alias__))
+
+int memcmp(const void *_l, const void *_r, size_t n)
 {
-	const unsigned char *l=vl, *r=vr;
-	for (; n && *l == *r; n--, l++, r++);
-	return n ? *l-*r : 0;
+	const unsigned char *l = _l, *r = _r;
+	const size_t aliases *wl, aliases *wr;
+
+	if (n < sizeof(size_t) * 3 || ((uintptr_t)l | (uintptr_t)r)
+	    & sizeof(size_t) - 1) goto bytewise;
+
+	for (; (uintptr_t)l & sizeof(size_t) - 1 && *l == *r; l++, r++, n--);
+	if ((uintptr_t)l & sizeof(size_t) -1) return *l - *r;
+
+	wl = (const void *)l;
+	wr = (const void *)r;
+	for (; n >= sizeof(size_t) && *wl == *wr
+	     ; wl++, wr++, n -= sizeof(size_t));
+	l = (const void *)wl;
+	r = (const void *)wr;
+
+bytewise:
+	for (; n && *l == *r; l++, r++, n--);
+
+	return n ? *l - *r : 0;
 }
diff --git a/src/string/memrchr.c b/src/string/memrchr.c
index a78e9d6c..165c422b 100644
--- a/src/string/memrchr.c
+++ b/src/string/memrchr.c
@@ -1,12 +1,29 @@
 #include <string.h>
-#include "libc.h"
+#include <stdint.h>
 
-void *__memrchr(const void *m, int c, size_t n)
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
+#define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o)))
+
+void *__memrchr(const void *_s, int i, size_t n)
 {
-	const unsigned char *s = m;
-	c = (unsigned char)c;
-	while (n--) if (s[n]==c) return (void *)(s+n);
-	return 0;
+	i = (unsigned char )i;
+	const unsigned char *s = _s;
+	const size_t wi = byte_repeat(i);
+
+	if (n < sizeof(size_t) * 3) goto bytewise;
+
+	for (; (uintptr_t)(s + n) & sizeof(size_t) - 1 && s[n - 1] != i; n--);
+	if ((uintptr_t)(s + n) & sizeof(size_t) - 1) return (void *)(s + n - 1);
+
+	for (; n >= sizeof(size_t) &&
+	       !word_has_zero(*(size_t *)(s + n - sizeof(size_t)) ^ wi)
+	     ; n -= sizeof(size_t));
+
+bytewise:
+	for (; n && s[n - 1] != i; n--);
+
+	return n ? (void *)(s + n - 1) : 0;
 }
 
 weak_alias(__memrchr, memrchr);
diff --git a/src/string/strcasecmp.c b/src/string/strcasecmp.c
index 3cd5f2d0..07d56c5d 100644
--- a/src/string/strcasecmp.c
+++ b/src/string/strcasecmp.c
@@ -1,15 +1,41 @@
 #include <strings.h>
 #include <ctype.h>
-#include "libc.h"
+#include <stdint.h>
+
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
+#define word_tag_range(x, t, s, e) ((((x) | byte_repeat(t)) \
+- byte_repeat(s) & ~((x) | byte_repeat(t)) - \
+byte_repeat((e) + 1)) & (~(x) & byte_repeat(t)))
+#define word_to_lower(x) ((x) - (word_tag_range((x), 'a', 'z', 0x80)) >> 2)
+#define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o)))
 
 int strcasecmp(const char *_l, const char *_r)
 {
-	const unsigned char *l=(void *)_l, *r=(void *)_r;
-	for (; *l && *r && (*l == *r || tolower(*l) == tolower(*r)); l++, r++);
+	const unsigned char *l = (const void *)_l, *r = (const void *)_r;
+	const size_t aliases *wl, aliases *wr;
+
+	if (((uintptr_t)l | (uintptr_t)r) & sizeof(size_t) - 1) goto bytewise;
+
+	for (;(uintptr_t)l & sizeof(size_t) - 1 && *l
+	     && tolower(*l) == tolower(*r); l++, r++);
+	if ((uintptr_t)l & sizeof(size_t) - 1) return tolower(*l) - tolower(*r);
+
+	wl = (const void *)l;
+	wr = (const void *)r;
+	for (; !word_has_zero(*wl) && word_to_lower(*wl) == word_to_lower(*wr)
+	     ; wl++, wr++);
+	l = (const void *)wl;
+	r = (const void *)wr;
+
+bytewise:
+	for (; *l && tolower(*l) == tolower(*r); l++, r++);
+
 	return tolower(*l) - tolower(*r);
 }
 
-int __strcasecmp_l(const char *l, const char *r, locale_t loc)
+int __strcasecmp_l(const char *l, const char *r, locale_t unused)
 {
 	return strcasecmp(l, r);
 }
diff --git a/src/string/strcmp.c b/src/string/strcmp.c
index 808bd837..a2f358a9 100644
--- a/src/string/strcmp.c
+++ b/src/string/strcmp.c
@@ -1,7 +1,28 @@
 #include <string.h>
+#include <stdint.h>
 
-int strcmp(const char *l, const char *r)
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
+
+int strcmp(const char *_l, const char *_r)
 {
-	for (; *l==*r && *l; l++, r++);
-	return *(unsigned char *)l - *(unsigned char *)r;
+	const unsigned char *l = (const void *)_l, *r = (const void *)_r;
+	const size_t aliases *wl, aliases *wr;
+
+	if (((uintptr_t)l | (uintptr_t)r) & sizeof(size_t) - 1) goto bytewise;
+
+	for (; (uintptr_t)l & sizeof(size_t) - 1 && *l && *l == *r; l++, r++);
+	if ((uintptr_t)l & sizeof(size_t) - 1) return *l - *r;
+
+	wl = (const void *)l;
+	wr = (const void *)r;
+	for (; !word_has_zero(*wl) && *wl == *wr; wl++, wr++);
+	l = (const void *)wl;
+	r = (const void *)wr;
+
+bytewise:
+	for (; *l && *l == *r; l++, r++);
+
+	return *l - *r;
 }
diff --git a/src/string/strncasecmp.c b/src/string/strncasecmp.c
index 3af53008..e1a9454a 100644
--- a/src/string/strncasecmp.c
+++ b/src/string/strncasecmp.c
@@ -1,16 +1,45 @@
 #include <strings.h>
 #include <ctype.h>
-#include "libc.h"
+#include <stdint.h>
+
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
+#define word_tag_range(x, t, s, e) ((((x) | byte_repeat(t)) \
+- byte_repeat(s) & ~((x) | byte_repeat(t)) - \
+byte_repeat((e) + 1)) & (~(x) & byte_repeat(t)))
+#define word_to_tolower(x) ((x) - (word_tag_range((x), 'a', 'z', 0x80)) >> 2)
+#define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o)))
 
 int strncasecmp(const char *_l, const char *_r, size_t n)
 {
-	const unsigned char *l=(void *)_l, *r=(void *)_r;
+	const unsigned char *l = (const void *)_l, *r = (const void *)_r;
+	const size_t aliases *wl, aliases *wr;
+
 	if (!n--) return 0;
-	for (; *l && *r && n && (*l == *r || tolower(*l) == tolower(*r)); l++, r++, n--);
+
+	if (n < sizeof(size_t) * 3 || ((uintptr_t)l | (uintptr_t)r)
+	    & sizeof(size_t) - 1) goto bytewise;
+
+	for (;(uintptr_t)l & sizeof(size_t) - 1 && *l
+	     && tolower(*l) == tolower(*r); l++, r++, n--);
+	if ((uintptr_t)l & sizeof(size_t) - 1) return tolower(*l) - tolower(*r);
+
+	wl = (const void *)l;
+	wr = (const void *)r;
+	for (; n >= sizeof(size_t) && !word_has_zero(*wl)
+	       && word_to_tolower(*wl) == word_to_tolower(*wr)
+	     ; wl++, wr++, n -= sizeof(size_t));
+	l = (const void *)wl;
+	r = (const void *)wr;
+
+bytewise:
+	for (; n && *l && tolower(*l) == tolower(*r); l++, r++, n--);
+
 	return tolower(*l) - tolower(*r);
 }
 
-int __strncasecmp_l(const char *l, const char *r, size_t n, locale_t loc)
+int __strncasecmp_l(const char *l, const char *r, size_t n, locale_t unused)
 {
 	return strncasecmp(l, r, n);
 }
diff --git a/src/string/strncmp.c b/src/string/strncmp.c
index e228843f..667498e1 100644
--- a/src/string/strncmp.c
+++ b/src/string/strncmp.c
@@ -1,9 +1,33 @@
 #include <string.h>
+#include <stdint.h>
+
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
 
 int strncmp(const char *_l, const char *_r, size_t n)
 {
-	const unsigned char *l=(void *)_l, *r=(void *)_r;
+	const unsigned char *l = (const void *)_l, *r = (const void *)_r;
+	const size_t aliases *wl, aliases *wr;
+
 	if (!n--) return 0;
-	for (; *l && *r && n && *l == *r ; l++, r++, n--);
+
+	if (n < sizeof(size_t) * 3 || ((uintptr_t)l | (uintptr_t)r)
+	    & sizeof(size_t) - 1) goto bytewise;
+
+	for (; (uintptr_t)l & sizeof(size_t) - 1 && *l && *l == *r
+	     ; l++, r++, n--);
+	if ((uintptr_t)l & sizeof(size_t) - 1) return *l - *r;
+
+	wl = (const void *)l;
+	wr = (const void *)r;
+	for (; n >= sizeof(size_t) && !word_has_zero(*wl) && *wl == *wr
+	     ; wl++, wr++, n -= sizeof(size_t));
+	l = (const void *)wl;
+	r = (const void *)wr;
+
+bytewise:
+	for (; n && *l && *l == *r; l++, r++, n--);
+
 	return *l - *r;
 }
-- 
2.13.2



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC PATCH 2/5] string: modify wordwise functions to match new style
  2017-07-15 19:55 [RFC PATCH 0/5] Add explicit_bzero, vectorize and 'normalize' various string functions Nathan McSween
  2017-07-15 19:55 ` [RFC PATCH 1/5] string: vectorize various functions Nathan McSween
@ 2017-07-15 19:55 ` Nathan McSween
  2017-07-15 19:55 ` [RFC PATCH 3/5] string: add strscpy and modify functions to use strscpy Nathan McSween
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Nathan McSween @ 2017-07-15 19:55 UTC (permalink / raw)
  To: musl; +Cc: Nathan McSween

Text sizes w/ gcc 6.3.0:
Before | After
 307 |  248 memccpy.lo
 234 |  225 memchr.lo
 177 |  183 stpcpy.lo
 228 |  310 stpncpy.lo
 234 |  269 strchrnul.lo
 121 |  131 strlen.lo
1301 | 1366 (TOTALS)

Size increases are due to a any of:
* __may_alias__ use.
* skipping to bytewise when element count is less than sizeof(size_t) * 3.
* early return after alignment loop.
---
 src/string/memccpy.c   | 51 ++++++++++++++++++++++++++------------------------
 src/string/memchr.c    | 37 ++++++++++++++++++++----------------
 src/string/stpcpy.c    | 37 +++++++++++++++++++-----------------
 src/string/stpncpy.c   | 40 ++++++++++++++++++++-------------------
 src/string/strchrnul.c | 33 +++++++++++++++++---------------
 src/string/strlen.c    | 27 +++++++++++++++-----------
 6 files changed, 123 insertions(+), 102 deletions(-)

diff --git a/src/string/memccpy.c b/src/string/memccpy.c
index 7c233d5e..068f6a2d 100644
--- a/src/string/memccpy.c
+++ b/src/string/memccpy.c
@@ -1,31 +1,34 @@
 #include <string.h>
 #include <stdint.h>
-#include <limits.h>
 
-#define ALIGN (sizeof(size_t)-1)
-#define ONES ((size_t)-1/UCHAR_MAX)
-#define HIGHS (ONES * (UCHAR_MAX/2+1))
-#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
 
-void *memccpy(void *restrict dest, const void *restrict src, int c, size_t n)
+void *memccpy(void *restrict _d, const void *restrict _s, int i, size_t n)
 {
-	unsigned char *d = dest;
-	const unsigned char *s = src;
-	size_t *wd, k;
-	const size_t *ws;
+	i = (unsigned char)i;
+	unsigned char *d = _d;
+	const unsigned char *s = _s;
+	size_t aliases *wd;
+	const size_t aliases *ws, wi = byte_repeat(i);
 
-	c = (unsigned char)c;
-	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
-		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s)!=c; n--, s++, d++);
-		if ((uintptr_t)s & ALIGN) goto tail;
-		k = ONES * c;
-		wd=(void *)d; ws=(const void *)s;
-		for (; n>=sizeof(size_t) && !HASZERO(*ws^k);
-		       n-=sizeof(size_t), ws++, wd++) *wd = *ws;
-		d=(void *)wd; s=(const void *)ws;
-	}
-	for (; n && (*d=*s)!=c; n--, s++, d++);
-tail:
-	if (*s==c) return d+1;
-	return 0;
+	if (n < sizeof(size_t) * 3 || ((uintptr_t)d | (uintptr_t)s)
+	    & sizeof(size_t) - 1) goto bytewise;
+
+	for (; (uintptr_t)s & sizeof(size_t) - 1 && *s != i
+	     ; d++, s++, n--) *d = *s;
+	if ((uintptr_t)s & sizeof(size_t) - 1) return d + 1;
+
+	wd = (void *)d;
+	ws = (const void *)s;
+	for (;  n >= sizeof(size_t) && !word_has_zero(*ws ^ wi)
+	     ; *wd++ = *ws++, n -= sizeof(size_t));
+	d = (void *)wd;
+	s = (const void *)ws;
+
+bytewise:
+	for (; n && *d != i; *d++ = *s++, n--);
+
+	return n ? d + 1 : 0;
 }
diff --git a/src/string/memchr.c b/src/string/memchr.c
index 4daff7bb..0b3d3d80 100644
--- a/src/string/memchr.c
+++ b/src/string/memchr.c
@@ -1,23 +1,28 @@
 #include <string.h>
 #include <stdint.h>
-#include <limits.h>
 
-#define SS (sizeof(size_t))
-#define ALIGN (sizeof(size_t)-1)
-#define ONES ((size_t)-1/UCHAR_MAX)
-#define HIGHS (ONES * (UCHAR_MAX/2+1))
-#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
 
-void *memchr(const void *src, int c, size_t n)
+void *memchr(const void *_s, int i, size_t n)
 {
-	const unsigned char *s = src;
-	c = (unsigned char)c;
-	for (; ((uintptr_t)s & ALIGN) && n && *s != c; s++, n--);
-	if (n && *s != c) {
-		const size_t *w;
-		size_t k = ONES * c;
-		for (w = (const void *)s; n>=SS && !HASZERO(*w^k); w++, n-=SS);
-		for (s = (const void *)w; n && *s != c; s++, n--);
-	}
+	i = (unsigned char)i;
+	const unsigned char *s = _s;
+	const size_t aliases *ws, wi = byte_repeat(i);
+
+	if (n < sizeof(size_t) * 3) goto bytewise;
+
+	for (; (uintptr_t)s & sizeof(size_t) - 1 && *s != i; s++, n--);
+	if ((uintptr_t)s & sizeof(size_t) - 1) return (void *)s;
+
+	ws = (const void *)s;
+	for (; n >= sizeof(size_t) && !word_has_zero(*ws ^ wi)
+	     ; ws++, n -= sizeof(size_t));
+	s = (const void *)ws;
+
+bytewise:
+	for (; n && *s != i; s++, n--);
+
 	return n ? (void *)s : 0;
 }
diff --git a/src/string/stpcpy.c b/src/string/stpcpy.c
index 06623c44..0c4eb21a 100644
--- a/src/string/stpcpy.c
+++ b/src/string/stpcpy.c
@@ -1,26 +1,29 @@
 #include <string.h>
 #include <stdint.h>
-#include <limits.h>
-#include "libc.h"
 
-#define ALIGN (sizeof(size_t))
-#define ONES ((size_t)-1/UCHAR_MAX)
-#define HIGHS (ONES * (UCHAR_MAX/2+1))
-#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
+#define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o)))
 
 char *__stpcpy(char *restrict d, const char *restrict s)
 {
-	size_t *wd;
-	const size_t *ws;
-
-	if ((uintptr_t)s % ALIGN == (uintptr_t)d % ALIGN) {
-		for (; (uintptr_t)s % ALIGN; s++, d++)
-			if (!(*d=*s)) return d;
-		wd=(void *)d; ws=(const void *)s;
-		for (; !HASZERO(*ws); *wd++ = *ws++);
-		d=(void *)wd; s=(const void *)ws;
-	}
-	for (; (*d=*s); s++, d++);
+	size_t aliases *wd;
+	const size_t aliases *ws;
+
+	if (((uintptr_t)d | (uintptr_t)s) & sizeof(size_t) - 1) goto bytewise;
+
+	for (; (uintptr_t)s & sizeof(size_t) - 1 && (*d = *s); d++, s++);
+	if ((uintptr_t)s & sizeof(size_t) - 1) return d;
+
+	wd = (void *)d;
+	ws = (const void *)s;
+	for (; !word_has_zero(*ws); wd++, ws++) *wd = *ws;
+	d = (void *)wd;
+	s = (const void *)ws;
+
+bytewise:
+	for (; *d = *s; d++, s++);
 
 	return d;
 }
diff --git a/src/string/stpncpy.c b/src/string/stpncpy.c
index 1f57a4dd..0b37da5d 100644
--- a/src/string/stpncpy.c
+++ b/src/string/stpncpy.c
@@ -1,31 +1,33 @@
 #include <string.h>
 #include <stdint.h>
-#include <limits.h>
-#include "libc.h"
 
-#define ALIGN (sizeof(size_t)-1)
-#define ONES ((size_t)-1/UCHAR_MAX)
-#define HIGHS (ONES * (UCHAR_MAX/2+1))
-#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
+#define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o)))
 
 char *__stpncpy(char *restrict d, const char *restrict s, size_t n)
 {
 	size_t *wd;
 	const size_t *ws;
 
-	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
-		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++);
-		if (!n || !*s) goto tail;
-		wd=(void *)d; ws=(const void *)s;
-		for (; n>=sizeof(size_t) && !HASZERO(*ws);
-		       n-=sizeof(size_t), ws++, wd++) *wd = *ws;
-		d=(void *)wd; s=(const void *)ws;
-	}
-	for (; n && (*d=*s); n--, s++, d++);
-tail:
-	memset(d, 0, n);
-	return d;
+	if (n < sizeof(size_t) * 3 || ((uintptr_t)d | (uintptr_t)s)
+	    & sizeof(size_t) - 1) goto bytewise;
+
+	for (; ((uintptr_t)s & sizeof(size_t) - 1) && (*d = *s); d++, s++, n--);
+	if (!*s) return memset(d, 0, n);
+
+	wd = (void *)d;
+	ws = (const void *)s;
+	for (; n >= sizeof(size_t) && !word_has_zero(*ws)
+	     ; n -= sizeof(size_t), wd++, ws++) *wd = *ws;
+	d = (void *)wd;
+	s = (const void *)ws;
+
+bytewise:
+	for (; n && (*d = *s); d++, s++, n--);
+
+	return memset(d, 0, n);
 }
 
 weak_alias(__stpncpy, stpncpy);
-
diff --git a/src/string/strchrnul.c b/src/string/strchrnul.c
index 05700ad6..80405595 100644
--- a/src/string/strchrnul.c
+++ b/src/string/strchrnul.c
@@ -1,25 +1,28 @@
 #include <string.h>
 #include <stdint.h>
-#include <limits.h>
-#include "libc.h"
 
-#define ALIGN (sizeof(size_t))
-#define ONES ((size_t)-1/UCHAR_MAX)
-#define HIGHS (ONES * (UCHAR_MAX/2+1))
-#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
+#define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o)))
 
-char *__strchrnul(const char *s, int c)
+char *__strchrnul(const char *_s, int i)
 {
-	size_t *w, k;
+	i = (unsigned char)i;
+	const unsigned char *s = (const void *)_s;
+	const size_t aliases *ws, wi = byte_repeat(i);
 
-	c = (unsigned char)c;
-	if (!c) return (char *)s + strlen(s);
+	if (!i) return (char *)s + strlen((char *)s);
+
+	for (; (uintptr_t)s & sizeof(size_t) - 1 && *s && *s != i; s++);
+	if ((uintptr_t)s & sizeof(size_t) - 1) return (char *)s;
+
+	ws = (const void *)s;
+	for (; !word_has_zero(*ws) && !word_has_zero(*ws ^ wi); ws++);
+	s = (const void *)ws;
+
+	for (; *s && *s != i; s++);
 
-	for (; (uintptr_t)s % ALIGN; s++)
-		if (!*s || *(unsigned char *)s == c) return (char *)s;
-	k = ONES * c;
-	for (w = (void *)s; !HASZERO(*w) && !HASZERO(*w^k); w++);
-	for (s = (void *)w; *s && *(unsigned char *)s != c; s++);
 	return (char *)s;
 }
 
diff --git a/src/string/strlen.c b/src/string/strlen.c
index 929ddcbc..19ba310a 100644
--- a/src/string/strlen.c
+++ b/src/string/strlen.c
@@ -1,18 +1,23 @@
 #include <string.h>
 #include <stdint.h>
-#include <limits.h>
 
-#define ALIGN (sizeof(size_t))
-#define ONES ((size_t)-1/UCHAR_MAX)
-#define HIGHS (ONES * (UCHAR_MAX/2+1))
-#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
 
 size_t strlen(const char *s)
 {
-	const char *a = s;
-	const size_t *w;
-	for (; (uintptr_t)s % ALIGN; s++) if (!*s) return s-a;
-	for (w = (const void *)s; !HASZERO(*w); w++);
-	for (s = (const void *)w; *s; s++);
-	return s-a;
+	const char *const s0 = s;
+	const size_t aliases *ws;
+
+	for (; (uintptr_t)s & sizeof(size_t) - 1 && *s; s++);
+	if (!*s) return s - s0;
+
+	ws = (const void *)s;
+	for (; !word_has_zero(*ws); ws++);
+	s = (const void *)ws;
+
+	for (; *s; s++);
+
+	return s - s0;
 }
-- 
2.13.2



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC PATCH 3/5] string: add strscpy and modify functions to use strscpy
  2017-07-15 19:55 [RFC PATCH 0/5] Add explicit_bzero, vectorize and 'normalize' various string functions Nathan McSween
  2017-07-15 19:55 ` [RFC PATCH 1/5] string: vectorize various functions Nathan McSween
  2017-07-15 19:55 ` [RFC PATCH 2/5] string: modify wordwise functions to match new style Nathan McSween
@ 2017-07-15 19:55 ` Nathan McSween
  2017-07-15 19:55 ` [RFC PATCH 4/5] string: use strchrnul in strcasestr instead of bytewise iteration Nathan McSween
  2017-07-15 19:55 ` [RFC PATCH 5/5] string: add memsset a 'secure' memset and bsd explicit_bzero Nathan McSween
  4 siblings, 0 replies; 6+ messages in thread
From: Nathan McSween @ 2017-07-15 19:55 UTC (permalink / raw)
  To: musl; +Cc: Nathan McSween

Text sizes w/ gcc 6.3.0
Before | After
309 |  37 strlcpy.lo
 73 |  42 strncat.lo
240 |  85 stpncpy.lo
N/A | 260 strscpy.lo
622 | 424 (TOTALS)

strscpy is almost the same as strlcpy except it doesn't add strlen(src) to the result.
---
 src/string/stpncpy.c | 28 +++++++---------------------
 src/string/strlcpy.c | 27 +++------------------------
 src/string/strncat.c | 10 +++++-----
 src/string/strscpy.c | 37 +++++++++++++++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 50 deletions(-)
 create mode 100644 src/string/strscpy.c

diff --git a/src/string/stpncpy.c b/src/string/stpncpy.c
index 0b37da5d..6eaa4078 100644
--- a/src/string/stpncpy.c
+++ b/src/string/stpncpy.c
@@ -1,33 +1,19 @@
 #include <string.h>
-#include <stdint.h>
 
-#define aliases __attribute__((__may_alias__))
-#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
-#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
 #define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o)))
 
+size_t __strscpy(char *, const char *, size_t);
+
 char *__stpncpy(char *restrict d, const char *restrict s, size_t n)
 {
-	size_t *wd;
-	const size_t *ws;
-
-	if (n < sizeof(size_t) * 3 || ((uintptr_t)d | (uintptr_t)s)
-	    & sizeof(size_t) - 1) goto bytewise;
-
-	for (; ((uintptr_t)s & sizeof(size_t) - 1) && (*d = *s); d++, s++, n--);
-	if (!*s) return memset(d, 0, n);
+	if (!n) return d;
 
-	wd = (void *)d;
-	ws = (const void *)s;
-	for (; n >= sizeof(size_t) && !word_has_zero(*ws)
-	     ; n -= sizeof(size_t), wd++, ws++) *wd = *ws;
-	d = (void *)wd;
-	s = (const void *)ws;
+	size_t r = __strscpy(d, s, n) + 1;
 
-bytewise:
-	for (; n && (*d = *s); d++, s++, n--);
+	if (s[r - 1]) d[r - 1] = s[r - 1];
+	else memset(d + r, 0, n - r);
 
-	return memset(d, 0, n);
+	return d + r;
 }
 
 weak_alias(__stpncpy, stpncpy);
diff --git a/src/string/strlcpy.c b/src/string/strlcpy.c
index 193d7241..bb8ebb47 100644
--- a/src/string/strlcpy.c
+++ b/src/string/strlcpy.c
@@ -1,32 +1,11 @@
 #define _BSD_SOURCE
 #include <string.h>
-#include <stdint.h>
-#include <limits.h>
-#include "libc.h"
 
-#define ALIGN (sizeof(size_t)-1)
-#define ONES ((size_t)-1/UCHAR_MAX)
-#define HIGHS (ONES * (UCHAR_MAX/2+1))
-#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+size_t __strscpy(char *, const char *, size_t);
 
 size_t strlcpy(char *d, const char *s, size_t n)
 {
-	char *d0 = d;
-	size_t *wd;
-	const size_t *ws;
+	const size_t r = __strscpy(d, s, n);
 
-	if (!n--) goto finish;
-	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
-		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++);
-		if (n && *s) {
-			wd=(void *)d; ws=(const void *)s;
-			for (; n>=sizeof(size_t) && !HASZERO(*ws);
-			       n-=sizeof(size_t), ws++, wd++) *wd = *ws;
-			d=(void *)wd; s=(const void *)ws;
-		}
-	}
-	for (; n && (*d=*s); n--, s++, d++);
-	*d = 0;
-finish:
-	return d-d0 + strlen(s);
+	return r + strlen(s + r);
 }
diff --git a/src/string/strncat.c b/src/string/strncat.c
index 01ca2a23..f86dce8e 100644
--- a/src/string/strncat.c
+++ b/src/string/strncat.c
@@ -1,10 +1,10 @@
 #include <string.h>
 
+size_t __strscpy(char *, const char *, size_t);
+
 char *strncat(char *restrict d, const char *restrict s, size_t n)
 {
-	char *a = d;
-	d += strlen(d);
-	while (n && *s) n--, *d++ = *s++;
-	*d++ = 0;
-	return a;
+	__strscpy(d + strlen(d), s, n + 1);
+
+	return d;
 }
diff --git a/src/string/strscpy.c b/src/string/strscpy.c
new file mode 100644
index 00000000..c2e25b7c
--- /dev/null
+++ b/src/string/strscpy.c
@@ -0,0 +1,37 @@
+#include <stddef.h>
+#include <stdint.h>
+
+#define hidden __attribute__((visibility("hidden")))
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
+
+hidden
+size_t __strscpy(char *restrict d, const char *restrict s, size_t n)
+{
+	const char *const d0 = d;
+	size_t aliases *wd;
+	const size_t aliases *ws;
+
+	if (!n--) return 0;
+
+	if ( n < sizeof(size_t) * 3 || ((uintptr_t)d | (uintptr_t)s)
+	     & sizeof(size_t) - 1) goto bytewise;
+
+	for (; (uintptr_t)s & sizeof(size_t) - 1 && (*d = *s); d++, s++, n--);
+	if ((uintptr_t)s & sizeof(size_t) - 1) return d - d0;
+
+	wd = (void *)d;
+	ws = (const void *)s;
+	for (; !word_has_zero(*ws) && n >= sizeof(size_t)
+	     ; wd++, ws++, n -= sizeof(size_t)) *wd = *ws;
+	d = (void *)wd;
+	s = (const void *)ws;
+
+bytewise:
+	for (; n && (*d = *s); d++, s++, n--);
+
+	*d = 0;
+
+	return d - d0;
+}
-- 
2.13.2



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC PATCH 4/5] string: use strchrnul in strcasestr instead of bytewise iteration
  2017-07-15 19:55 [RFC PATCH 0/5] Add explicit_bzero, vectorize and 'normalize' various string functions Nathan McSween
                   ` (2 preceding siblings ...)
  2017-07-15 19:55 ` [RFC PATCH 3/5] string: add strscpy and modify functions to use strscpy Nathan McSween
@ 2017-07-15 19:55 ` Nathan McSween
  2017-07-15 19:55 ` [RFC PATCH 5/5] string: add memsset a 'secure' memset and bsd explicit_bzero Nathan McSween
  4 siblings, 0 replies; 6+ messages in thread
From: Nathan McSween @ 2017-07-15 19:55 UTC (permalink / raw)
  To: musl; +Cc: Nathan McSween

---
 src/string/strcasestr.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/string/strcasestr.c b/src/string/strcasestr.c
index af109f36..090b78fd 100644
--- a/src/string/strcasestr.c
+++ b/src/string/strcasestr.c
@@ -3,7 +3,13 @@
 
 char *strcasestr(const char *h, const char *n)
 {
-	size_t l = strlen(n);
-	for (; *h; h++) if (!strncasecmp(h, n, l)) return (char *)h;
-	return 0;
+	const size_t nl= strlen(n);
+
+	h = strchrnul(h, *n);
+
+	if (!*n) return (char *)h;
+
+	for (; *h && strncasecmp(h, n, nl); h = strchrnul(h + 1, *n));
+
+	return *h ? (char *)h : 0;
 }
-- 
2.13.2



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC PATCH 5/5] string: add memsset a 'secure' memset and bsd explicit_bzero
  2017-07-15 19:55 [RFC PATCH 0/5] Add explicit_bzero, vectorize and 'normalize' various string functions Nathan McSween
                   ` (3 preceding siblings ...)
  2017-07-15 19:55 ` [RFC PATCH 4/5] string: use strchrnul in strcasestr instead of bytewise iteration Nathan McSween
@ 2017-07-15 19:55 ` Nathan McSween
  4 siblings, 0 replies; 6+ messages in thread
From: Nathan McSween @ 2017-07-15 19:55 UTC (permalink / raw)
  To: musl; +Cc: Nathan McSween

---
 src/string/explicit_bzero.c |  9 +++++++++
 src/string/memsset.c        | 13 +++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 src/string/explicit_bzero.c
 create mode 100644 src/string/memsset.c

diff --git a/src/string/explicit_bzero.c b/src/string/explicit_bzero.c
new file mode 100644
index 00000000..497dd2bc
--- /dev/null
+++ b/src/string/explicit_bzero.c
@@ -0,0 +1,9 @@
+#define _BSD_SOURCE
+#include <string.h>
+
+void *__memsset(void *, int, size_t);
+
+void explicit_bzero(void *d, size_t n)
+{
+	__memsset(d, 0, n);
+}
diff --git a/src/string/memsset.c b/src/string/memsset.c
new file mode 100644
index 00000000..d4ab72a4
--- /dev/null
+++ b/src/string/memsset.c
@@ -0,0 +1,13 @@
+#include <string.h>
+
+#define hidden __attribute__((visibility("hidden")))
+
+hidden
+void *__memsset(void *d, int i, size_t n)
+{
+	memset(d, i, n);
+
+	__asm__ __volatile__("" : "=r" (d): "0" (d) : "memory");
+
+	return d;
+}
-- 
2.13.2



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-07-15 19:55 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-15 19:55 [RFC PATCH 0/5] Add explicit_bzero, vectorize and 'normalize' various string functions Nathan McSween
2017-07-15 19:55 ` [RFC PATCH 1/5] string: vectorize various functions Nathan McSween
2017-07-15 19:55 ` [RFC PATCH 2/5] string: modify wordwise functions to match new style Nathan McSween
2017-07-15 19:55 ` [RFC PATCH 3/5] string: add strscpy and modify functions to use strscpy Nathan McSween
2017-07-15 19:55 ` [RFC PATCH 4/5] string: use strchrnul in strcasestr instead of bytewise iteration Nathan McSween
2017-07-15 19:55 ` [RFC PATCH 5/5] string: add memsset a 'secure' memset and bsd explicit_bzero Nathan McSween

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).