[PATCH 0/4] Refactor and expand string functions.

mailing list of musl libc
 help / color / mirror / code / Atom feed

* [PATCH 0/4] Refactor and expand string functions.
@ 2013-02-04  0:12 Nathan McSween
  2013-02-04  0:12 ` [PATCH 1/4] Internal: Add word.h - word-at-a-time fns / macros Nathan McSween
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Nathan McSween @ 2013-02-04  0:12 UTC (permalink / raw)
  To: musl; +Cc: Nathan McSween

memchr - refactor
memcmp - word-at-a-time
memset - refactor
strcmp - word-at-a-time
strlcpy - refactor and always terminate string
strlen - refactor
strncmp - word-at-a-time

A simple wc -l on asm lines for changed files gives:
91 new_memchr.s
106 musl_memchr.s
65 new_memcmp.s
32 musl_memcmp.s
118 new_memset.s
121 musl_memset.s
64 new_strcmp.s
26 musl_strcmp.s
98 new_strlcpy.s
124 musl_strlcpy.s
55 new_strlen.s
55 musl_strlen.s
66 new_strncmp.s
45 musl_strncmp.s

Bikeshed over inline documentation welcome.

Nathan McSween (4):
  Internal: Add word.h - word-at-a-time fns / macros
  String: refactor to utilize word.h and optimize
  String: expand to word-at-a-time
  String: refactor to utilize word.h and always terminate string

 src/internal/word.h  | 39 ++++++++++++++++++++++++++++++++++++
 src/string/memchr.c  | 42 ++++++++++++++++++++++-----------------
 src/string/memcmp.c  | 38 +++++++++++++++++++++++++++++++----
 src/string/memset.c  | 39 +++++++++++++++++++++---------------
 src/string/strcmp.c  | 35 +++++++++++++++++++++++++++++---
 src/string/strlcpy.c | 56 ++++++++++++++++++++++++++++++----------------------
 src/string/strlen.c  | 29 +++++++++++++++------------
 src/string/strncmp.c | 36 ++++++++++++++++++++++++++++-----
 8 files changed, 231 insertions(+), 83 deletions(-)
 create mode 100644 src/internal/word.h

-- 
1.7.11.4



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/4] Internal: Add word.h - word-at-a-time fns / macros
  2013-02-04  0:12 [PATCH 0/4] Refactor and expand string functions Nathan McSween
@ 2013-02-04  0:12 ` Nathan McSween
  2013-02-04  0:12 ` [PATCH 2/4] String: refactor to utilize word.h and optimize Nathan McSween
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Nathan McSween @ 2013-02-04  0:12 UTC (permalink / raw)
  To: musl; +Cc: Nathan McSween

---
 src/internal/word.h | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 src/internal/word.h

diff --git a/src/internal/word.h b/src/internal/word.h
new file mode 100644
index 0000000..395f802
--- /dev/null
+++ b/src/internal/word.h
@@ -0,0 +1,39 @@
+/**
+ * _INTERNAL_WORD_H - various word size functions / macros
+ */
+#ifndef _MYOSIN_WORD_H
+#define _MYOSIN_WORD_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * WORD_LSB_ONE - Set low bit of each byte on arch word size to one.
+ */
+#define WORD_LSB_ONE ((size_t)-1 / (unsigned char)-1)
+
+/**
+ * WORD_MSB_ONE - Set high bit of each byte on arch word size to one.
+ */
+#define WORD_MSB_ONE (WORD_LSB_ONE * ((unsigned char)-1 / 2 + 1))
+
+/**
+ * word_has_zero - Word has a zero character
+ * @w: Word
+ */
+static inline char word_has_zero(size_t w)
+{
+	return !!((w - WORD_LSB_ONE) & (~w & WORD_MSB_ONE));
+}
+
+/**
+ * word_has_char - Word has a character
+ * @w: Word
+ */
+static inline char word_has_char(size_t w, char c)
+{
+	return !!((w - WORD_LSB_ONE)
+		  & ((~w & WORD_MSB_ONE)^(WORD_LSB_ONE * c)));
+}
+
+#endif /* !_INTERNAL_WORD_H */
-- 
1.7.11.4



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/4] String: refactor to utilize word.h and optimize
  2013-02-04  0:12 [PATCH 0/4] Refactor and expand string functions Nathan McSween
  2013-02-04  0:12 ` [PATCH 1/4] Internal: Add word.h - word-at-a-time fns / macros Nathan McSween
@ 2013-02-04  0:12 ` Nathan McSween
  2013-02-04  0:12 ` [PATCH 3/4] String: expand to word-at-a-time Nathan McSween
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Nathan McSween @ 2013-02-04  0:12 UTC (permalink / raw)
  To: musl; +Cc: Nathan McSween

---
 src/string/memchr.c | 42 ++++++++++++++++++++++++------------------
 src/string/memset.c | 39 +++++++++++++++++++++++----------------
 src/string/strlen.c | 29 ++++++++++++++++-------------
 3 files changed, 63 insertions(+), 47 deletions(-)

diff --git a/src/string/memchr.c b/src/string/memchr.c
index a0472f7..559facc 100644
--- a/src/string/memchr.c
+++ b/src/string/memchr.c
@@ -1,24 +1,30 @@
-#include <string.h>
-#include <stdlib.h>
+#include <stddef.h>
 #include <stdint.h>
-#include <limits.h>
-
-#define SS (sizeof(size_t))
-#define ALIGN (sizeof(size_t)-1)
-#define ONES ((size_t)-1/UCHAR_MAX)
-#define HIGHS (ONES * (UCHAR_MAX/2+1))
-#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+#include <string.h>
+#include "word.h"
 
-void *memchr(const void *src, int c, size_t n)
+/**
+ * memchr - Word sized c standard memchr.
+ * @s: Source
+ * @c: Character
+ * @n: Max size of @s
+ */
+void *memchr(const void *s, int c, size_t n)
 {
-	const unsigned char *s = src;
+	const unsigned char *cs = (const unsigned char *)s;
+	const size_t *w;
+
 	c = (unsigned char)c;
-	for (; ((uintptr_t)s & ALIGN) && n && *s != c; s++, n--);
-	if (n && *s != c) {
-		const size_t *w;
-		size_t k = ONES * c;
-		for (w = (const void *)s; n>=SS && !HASZERO(*w^k); w++, n-=SS);
-		for (s = (const void *)w; n && *s != c; s++, n--);
+
+	for (; (uintptr_t)cs % sizeof(size_t); cs++, n--) {
+		if (!n) return NULL;
+		if (*cs == c) return (void *)cs;
 	}
-	return n ? (void *)s : 0;
+
+	for (w = (const size_t *)cs; !word_has_char(*w, c); w++, n--)
+		if (!n) return NULL;
+	for (cs = (const unsigned char *)w; *cs != c; cs++, n--)
+		if (!n) return NULL;
+
+	return (void *)cs;
 }
diff --git a/src/string/memset.c b/src/string/memset.c
index 20e47c4..8c87548 100644
--- a/src/string/memset.c
+++ b/src/string/memset.c
@@ -1,21 +1,28 @@
-#include <string.h>
-#include <stdlib.h>
+#include <stddef.h>
 #include <stdint.h>
-#include <limits.h>
-
-#define SS (sizeof(size_t))
-#define ALIGN (sizeof(size_t)-1)
-#define ONES ((size_t)-1/UCHAR_MAX)
+#include <string.h>
+#include "word.h"
 
-void *memset(void *dest, int c, size_t n)
+/**
+ * memset - Word sized c standard memset.
+ * @d: Destination
+ * @c: Charater to set
+ * @n: Max size to set to @c in @d
+ */
+void *memset(void *d, int c, size_t n)
 {
-	unsigned char *s = dest;
+	unsigned char *cd = (unsigned char *)d;
+	const size_t wc = WORD_LSB_ONE * (unsigned char)c;
+	size_t *wd;
+
 	c = (unsigned char)c;
-	for (; ((uintptr_t)s & ALIGN) && n; n--) *s++ = c;
-	if (n) {
-		size_t *w, k = ONES * c;
-		for (w = (void *)s; n>=SS; n-=SS, w++) *w = k;
-		for (s = (void *)w; n; n--, s++) *s = c;
-	}
-	return dest;
+
+	for (; (uintptr_t)cd % sizeof(size_t); *cd++ = c, n--)
+		if (!n) return d;
+
+	for (wd = (size_t *)cd; n >= sizeof(size_t)
+	     ; *wd++ = wc, n -= sizeof(size_t));
+	for (cd = (unsigned char *)wd; n; *cd++ = c, n--);
+
+	return d;
 }
diff --git a/src/string/strlen.c b/src/string/strlen.c
index d6f8631..2b356f0 100644
--- a/src/string/strlen.c
+++ b/src/string/strlen.c
@@ -1,19 +1,22 @@
-#include <string.h>
-#include <stdlib.h>
+#include <stddef.h>
 #include <stdint.h>
-#include <limits.h>
-
-#define ALIGN (sizeof(size_t))
-#define ONES ((size_t)-1/UCHAR_MAX)
-#define HIGHS (ONES * (UCHAR_MAX/2+1))
-#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+#include <string.h>
+#include "word.h"
 
+/**
+ * strlen - Word sized c standard strlen.
+ * @s: Source
+ */
 size_t strlen(const char *s)
 {
-	const char *a = s;
+	const char *z = s;
 	const size_t *w;
-	for (; (uintptr_t)s % ALIGN; s++) if (!*s) return s-a;
-	for (w = (const void *)s; !HASZERO(*w); w++);
-	for (s = (const void *)w; *s; s++);
-	return s-a;
+
+	for (; (uintptr_t)s % sizeof(size_t); s++)
+		if (!*s) return s - z;
+
+	for (w = (const size_t *)s; !word_has_zero(*w); w++);
+	for (s = (const char *)w; *s; s++);
+
+	return s - z;
 }
-- 
1.7.11.4



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 3/4] String: expand to word-at-a-time
  2013-02-04  0:12 [PATCH 0/4] Refactor and expand string functions Nathan McSween
  2013-02-04  0:12 ` [PATCH 1/4] Internal: Add word.h - word-at-a-time fns / macros Nathan McSween
  2013-02-04  0:12 ` [PATCH 2/4] String: refactor to utilize word.h and optimize Nathan McSween
@ 2013-02-04  0:12 ` Nathan McSween
  2013-02-04  2:24   ` Isaac Dunham
  2013-02-04  0:12 ` [PATCH 4/4] String: refactor to utilize word.h and always terminate string Nathan McSween
  2013-02-05  4:25 ` [PATCH 0/4] Refactor and expand string functions Nathan McSween
  4 siblings, 1 reply; 11+ messages in thread
From: Nathan McSween @ 2013-02-04  0:12 UTC (permalink / raw)
  To: musl; +Cc: Nathan McSween

---
 src/string/memcmp.c  | 38 ++++++++++++++++++++++++++++++++++----
 src/string/strcmp.c  | 35 ++++++++++++++++++++++++++++++++---
 src/string/strncmp.c | 36 +++++++++++++++++++++++++++++++-----
 3 files changed, 97 insertions(+), 12 deletions(-)

diff --git a/src/string/memcmp.c b/src/string/memcmp.c
index bdbce9f..c5e8e59 100644
--- a/src/string/memcmp.c
+++ b/src/string/memcmp.c
@@ -1,8 +1,38 @@
+#include <stddef.h>
 #include <string.h>
+#include "word.h"
 
-int memcmp(const void *vl, const void *vr, size_t n)
+/**
+ * memcmp - Word sized c standard memcmp.
+ * @s: Source
+ * @c: Comparative
+ * @n: Max size of @s
+ */
+int memcmp(const void *s, const void *c, size_t n)
 {
-	const unsigned char *l=vl, *r=vr;
-	for (; n && *l == *r; n--, l++, r++);
-	return n ? *l-*r : 0;
+	const unsigned char *cs = (const unsigned char *)s;
+	const unsigned char *cc = (const unsigned char *)c;
+	const size_t *ws, *wc;
+
+
+	if ((uintptr_t)cs % sizeof(size_t) != (uintptr_t)cc % sizeof(size_t))
+		goto misaligned;
+
+	for (; (uintptr_t)cs % sizeof(size_t); cs++, cc++, n--) {
+		if (!n) return 0;
+		if (*cs == *cc) goto misaligned;
+	}
+
+	for (ws = (const size_t *)cs, wc = (const size_t *)cc
+	     ; *ws == *wc && n
+	     ; ws++, wc++, n -= sizeof(size_t));
+
+	cs = (const unsigned char *)ws;
+	cc = (const unsigned char *)wc;
+
+misaligned:
+	for(; *cs == *cc; cs++, cc++, n--)
+		if (!n) return 0;
+
+	return *cs - *cc;
 }
diff --git a/src/string/strcmp.c b/src/string/strcmp.c
index 91eb740..2a6983c 100644
--- a/src/string/strcmp.c
+++ b/src/string/strcmp.c
@@ -1,7 +1,36 @@
+#include <stddef.h>
+#include <stdint.h>
 #include <string.h>
+#include "word.h"
 
-int strcmp(const char *l, const char *r)
+/**
+ * strcmp - Word sized c standard strcmp.
+ * @c: Comparative
+ * @s: Source
+ */
+#undef strcmp
+int strcmp(const char *c, const char *s)
 {
-	for (; *l==*r && *l && *r; l++, r++);
-	return *(unsigned char *)l - *(unsigned char *)r;
+	const size_t *wc, *ws;
+
+	if ((uintptr_t)c % sizeof(size_t) != (uintptr_t)s % sizeof(size_t))
+		goto misaligned;
+
+	for (; (uintptr_t)c % sizeof(size_t); c++, s++) {
+		if (*c != *s || !*c || !*s)
+			return *(const unsigned char *)c
+			        - *(const unsigned char *)s;
+	}
+
+	for (wc = (const size_t *)c, ws = (const size_t *)s
+	     ; (!word_has_zero(*wc) || !word_has_zero(*ws)) && *wc == *ws
+	     ; wc++, ws++);
+
+	c = (const char *)wc;
+	s = (const char *)ws;
+
+misaligned:
+	for(; *c == *s && *c && *s; c++, s++);
+
+	return *(const unsigned char *)c - *(const unsigned char *)s;
 }
diff --git a/src/string/strncmp.c b/src/string/strncmp.c
index e228843..5c60895 100644
--- a/src/string/strncmp.c
+++ b/src/string/strncmp.c
@@ -1,9 +1,35 @@
+#include <stdint.h>
 #include <string.h>
+#include "word.h"
 
-int strncmp(const char *_l, const char *_r, size_t n)
+/**
+ * strncmp - Word sized c standard strncmp.
+ * @c: Comparative
+ * @s: Source
+ * @n: Max size of @s
+ */
+int strncmp(const char *c, const char *s, size_t n)
 {
-	const unsigned char *l=(void *)_l, *r=(void *)_r;
-	if (!n--) return 0;
-	for (; *l && *r && n && *l == *r ; l++, r++, n--);
-	return *l - *r;
+	const size_t *wc, *ws;
+
+	if ((uintptr_t)c % sizeof(size_t) != (uintptr_t)s % sizeof(size_t))
+		goto misaligned;
+
+	for (; (uintptr_t)c % sizeof(size_t); c++, s++, n--) {
+		if (*c != *s || !*c || !*s || !n)
+			return *(const unsigned char *)c
+				- *(const unsigned char *)s;
+	}
+
+	for (wc = (const size_t *)c, ws = (const size_t *)s
+	     ; !word_has_zero(*wc) || !word_has_zero(*ws)
+	     ; wc++, ws++);
+
+	c = (const char *)wc;
+	s = (const char *)ws;
+
+misaligned:
+	for(; *c == *s && *c && *s && n; c++, s++, n--);
+
+	return *(const unsigned char *)c - *(const unsigned char *)s;
 }
-- 
1.7.11.4



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 4/4] String: refactor to utilize word.h and always terminate string
  2013-02-04  0:12 [PATCH 0/4] Refactor and expand string functions Nathan McSween
                   ` (2 preceding siblings ...)
  2013-02-04  0:12 ` [PATCH 3/4] String: expand to word-at-a-time Nathan McSween
@ 2013-02-04  0:12 ` Nathan McSween
  2013-02-05  4:25 ` [PATCH 0/4] Refactor and expand string functions Nathan McSween
  4 siblings, 0 replies; 11+ messages in thread
From: Nathan McSween @ 2013-02-04  0:12 UTC (permalink / raw)
  To: musl; +Cc: Nathan McSween

---
 src/string/strlcpy.c | 56 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/src/string/strlcpy.c b/src/string/strlcpy.c
index 4d3ff92..bc26441 100644
--- a/src/string/strlcpy.c
+++ b/src/string/strlcpy.c
@@ -1,32 +1,40 @@
-#include <string.h>
-#include <stdlib.h>
+#include <stddef.h>
 #include <stdint.h>
-#include <limits.h>
-#include "libc.h"
-
-#define ALIGN (sizeof(size_t)-1)
-#define ONES ((size_t)-1/UCHAR_MAX)
-#define HIGHS (ONES * (UCHAR_MAX/2+1))
-#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
+#include <string.h>
+#include "word.h"
 
+/**
+ * strlcpy - Word sized bsd strlcpy.
+ * @d: Destination
+ * @s: Source
+ * @n: Max @s
+ */
 size_t strlcpy(char *d, const char *s, size_t n)
 {
-	char *d0 = d;
+	char *z = d;
 	size_t *wd;
 	const size_t *ws;
 
-	if (!n--) goto finish;
-	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
-		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s); n--, s++, d++);
-		if (n && *s) {
-			wd=(void *)d; ws=(const void *)s;
-			for (; n>=sizeof(size_t) && !HASZERO(*ws);
-			       n-=sizeof(size_t), ws++, wd++) *wd = *ws;
-			d=(void *)wd; s=(const void *)ws;
-		}
-	}
-	for (; n && (*d=*s); n--, s++, d++);
-	*d = 0;
-finish:
-	return d-d0 + strlen(s);
+	/* A byte for nul */
+	if (!n--) goto terminate;
+
+	if ((uintptr_t)d % sizeof(size_t) != (uintptr_t)s % sizeof(size_t))
+		goto misaligned;
+
+	for (; (uintptr_t)s % sizeof(size_t); *d++ = *s++, n--)
+		if (!*s || !n) goto terminate;
+
+	for (wd = (size_t *)d, ws= (const size_t *)s
+	     ; !word_has_zero(*ws) && n >= sizeof(size_t)
+	     ; *wd = *ws, wd++, ws++, n -= sizeof(size_t))
+
+	d = (char *)wd;
+	s = (const char *)ws;
+
+misaligned:
+	for (; (*d = *s) && n; d++, s++, n--);
+terminate:
+	*d = '\0';
+
+	return d - z + strlen(s);
 }
-- 
1.7.11.4



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/4] String: expand to word-at-a-time
  2013-02-04  0:12 ` [PATCH 3/4] String: expand to word-at-a-time Nathan McSween
@ 2013-02-04  2:24   ` Isaac Dunham
  2013-02-04  2:55     ` nwmcsween
  0 siblings, 1 reply; 11+ messages in thread
From: Isaac Dunham @ 2013-02-04  2:24 UTC (permalink / raw)
  To: musl

On Mon,  4 Feb 2013 00:12:14 +0000
Nathan McSween <nwmcsween@gmail.com> wrote:

> -int strcmp(const char *l, const char *r)
> +/**
> + * strcmp - Word sized c standard strcmp.
> + * @c: Comparative
> + * @s: Source
> + */
I have a few comments:

1: Why "comparative" and "source" instead of left/right?
Source implies that there's copying going on, which there isn't (at least in terms of the API).
s1/s2 or l/r is much more understandable.

2: For that many lines, you might as well explain what's going on:
/* Indicate whether string c is less than, greater than, or
equal to string s */
The comments are useless without the standard, and superfluous 
with it.

> +#undef strcmp
Huh?
I don't see why this is needed, unless you messed up your build environment.

> +int strcmp(const char *c, const char *s)

What's the effect of all this on size of the binary?
I see that all this ends up adding ~150 lines.

Is shortening the length of the variable names really useful?

-- 
Isaac Dunham <idunham@lavabit.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/4] String: expand to word-at-a-time
  2013-02-04  2:24   ` Isaac Dunham
@ 2013-02-04  2:55     ` nwmcsween
  0 siblings, 0 replies; 11+ messages in thread
From: nwmcsween @ 2013-02-04  2:55 UTC (permalink / raw)
  To: musl


On Feb 3, 2013, at 6:24 PM, Isaac Dunham <idunham@lavabit.com> wrote:

> On Mon,  4 Feb 2013 00:12:14 +0000
> Nathan McSween <nwmcsween@gmail.com> wrote:
> 
>> -int strcmp(const char *l, const char *r)
>> +/**
>> + * strcmp - Word sized c standard strcmp.
>> + * @c: Comparative
>> + * @s: Source
>> + */
> I have a few comments:
> 
> 1: Why "comparative" and "source" instead of left/right?
> Source implies that there's copying going on, which there isn't (at least in terms of the API).
> s1/s2 or l/r is much more understandable.
> 2: For that many lines, you might as well explain what's going on:
> /* Indicate whether string c is less than, greater than, or
> equal to string s */
> The comments are useless without the standard, and superfluous 
> with it.
Just personal preference really. It's nicer imo to have short args names and a blurb on what they stand for

>> +#undef strcmp
> Huh?
> I don't see why this is needed, unless you messed up your build environment.
> 
>> +int strcmp(const char *c, const char *s)
> 
> What's the effect of all this on size of the binary?
> I see that all this ends up adding ~150 lines.
With the expanded to word-at-a-time fns it adds a bit with just the refactorings it removes a bit. Not at my workstation atm though.

> Is shortening the length of the variable names really useful?
Preference, the only reason for the longer names in the args is usually due to say void * to unsigned char * for comparison, etc purposes
> -- 
> Isaac Dunham <idunham@lavabit.com>
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/4] Refactor and expand string functions.
  2013-02-04  0:12 [PATCH 0/4] Refactor and expand string functions Nathan McSween
                   ` (3 preceding siblings ...)
  2013-02-04  0:12 ` [PATCH 4/4] String: refactor to utilize word.h and always terminate string Nathan McSween
@ 2013-02-05  4:25 ` Nathan McSween
  2013-02-05 11:19   ` Szabolcs Nagy
  4 siblings, 1 reply; 11+ messages in thread
From: Nathan McSween @ 2013-02-05  4:25 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 1528 bytes --]

On 2/3/2013 4:12 PM, Nathan McSween wrote:
> memchr - refactor
> memcmp - word-at-a-time
> memset - refactor
> strcmp - word-at-a-time
> strlcpy - refactor and always terminate string
> strlen - refactor
> strncmp - word-at-a-time
>
> A simple wc -l on asm lines for changed files gives:
> 91 new_memchr.s
> 106 musl_memchr.s
> 65 new_memcmp.s
> 32 musl_memcmp.s
> 118 new_memset.s
> 121 musl_memset.s
> 64 new_strcmp.s
> 26 musl_strcmp.s
> 98 new_strlcpy.s
> 124 musl_strlcpy.s
> 55 new_strlen.s
> 55 musl_strlen.s
> 66 new_strncmp.s
> 45 musl_strncmp.s
>
> Bikeshed over inline documentation welcome.
>
> Nathan McSween (4):
>    Internal: Add word.h - word-at-a-time fns / macros
>    String: refactor to utilize word.h and optimize
>    String: expand to word-at-a-time
>    String: refactor to utilize word.h and always terminate string
>
>   src/internal/word.h  | 39 ++++++++++++++++++++++++++++++++++++
>   src/string/memchr.c  | 42 ++++++++++++++++++++++-----------------
>   src/string/memcmp.c  | 38 +++++++++++++++++++++++++++++++----
>   src/string/memset.c  | 39 +++++++++++++++++++++---------------
>   src/string/strcmp.c  | 35 +++++++++++++++++++++++++++++---
>   src/string/strlcpy.c | 56 ++++++++++++++++++++++++++++++----------------------
>   src/string/strlen.c  | 29 +++++++++++++++------------
>   src/string/strncmp.c | 36 ++++++++++++++++++++++++++++-----
>   8 files changed, 231 insertions(+), 83 deletions(-)
>   create mode 100644 src/internal/word.h
>
Attached are files of the functions changed.

[-- Attachment #2: memchr.c --]
[-- Type: text/plain, Size: 630 bytes --]

#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "word.h"

/**
 * memchr - Word sized c standard memchr.
 * @s: Source
 * @c: Character
 * @n: Max size of @s
 */
void *memchr(const void *s, int c, size_t n)
{
	const unsigned char *cs = (const unsigned char *)s;
	const size_t *w;

	c = (unsigned char)c;

	for (; (uintptr_t)cs % sizeof(size_t); cs++, n--) {
		if (!n) return NULL;
		if (*cs == c) return (void *)cs;
	}

	for (w = (const size_t *)cs; !word_has_char(*w, c); w++, n--)
		if (!n) return NULL;
	for (cs = (const unsigned char *)w; *cs != c; cs++, n--)
		if (!n) return NULL;

	return (void *)cs;
}

[-- Attachment #3: memcmp.c --]
[-- Type: text/plain, Size: 837 bytes --]

#include <stddef.h>
#include <string.h>
#include "word.h"

/**
 * memcmp - Word sized c standard memcmp.
 * @s: Source
 * @c: Comparative
 * @n: Max size of @s
 */
int memcmp(const void *s, const void *c, size_t n)
{
	const unsigned char *cs = (const unsigned char *)s;
	const unsigned char *cc = (const unsigned char *)c;
	const size_t *ws, *wc;


	if ((uintptr_t)cs % sizeof(size_t) != (uintptr_t)cc % sizeof(size_t))
		goto misaligned;

	for (; (uintptr_t)cs % sizeof(size_t); cs++, cc++, n--) {
		if (!n) return 0;
		if (*cs == *cc) goto misaligned;
	}

	for (ws = (const size_t *)cs, wc = (const size_t *)cc
	     ; *ws == *wc && n
	     ; ws++, wc++, n -= sizeof(size_t));

	cs = (const unsigned char *)ws;
	cc = (const unsigned char *)wc;

misaligned:
	for(; *cs == *cc; cs++, cc++, n--)
		if (!n) return 0;

	return *cs - *cc;
}

[-- Attachment #4: memset.c --]
[-- Type: text/plain, Size: 608 bytes --]

#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "word.h"

/**
 * memset - Word sized c standard memset.
 * @d: Destination
 * @c: Charater to set
 * @n: Max size to set to @c in @d
 */
void *memset(void *d, int c, size_t n)
{
	unsigned char *cd = (unsigned char *)d;
	const size_t wc = WORD_LSB_ONE * (unsigned char)c;
	size_t *wd;

	c = (unsigned char)c;

	for (; (uintptr_t)cd % sizeof(size_t); *cd++ = c, n--)
		if (!n) return d;

	for (wd = (size_t *)cd; n >= sizeof(size_t)
	     ; *wd++ = wc, n -= sizeof(size_t));
	for (cd = (unsigned char *)wd; n; *cd++ = c, n--);

	return d;
}

[-- Attachment #5: strcmp.c --]
[-- Type: text/plain, Size: 801 bytes --]

#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "word.h"

/**
 * strcmp - Word sized c standard strcmp.
 * @c: Comparative
 * @s: Source
 */
#undef strcmp
int strcmp(const char *c, const char *s)
{
	const size_t *wc, *ws;

	if ((uintptr_t)c % sizeof(size_t) != (uintptr_t)s % sizeof(size_t))
		goto misaligned;

	for (; (uintptr_t)c % sizeof(size_t); c++, s++) {
		if (*c != *s || !*c || !*s)
			return *(const unsigned char *)c
			        - *(const unsigned char *)s;
	}

	for (wc = (const size_t *)c, ws = (const size_t *)s
	     ; (!word_has_zero(*wc) || !word_has_zero(*ws)) && *wc == *ws
	     ; wc++, ws++);

	c = (const char *)wc;
	s = (const char *)ws;

misaligned:
	for(; *c == *s && *c && *s; c++, s++);

	return *(const unsigned char *)c - *(const unsigned char *)s;
}

[-- Attachment #6: strlcpy.c --]
[-- Type: text/plain, Size: 795 bytes --]

#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "word.h"

/**
 * strlcpy - Word sized bsd strlcpy.
 * @d: Destination
 * @s: Source
 * @n: Max @s
 */
size_t strlcpy(char *d, const char *s, size_t n)
{
	char *z = d;
	size_t *wd;
	const size_t *ws;

	/* A byte for nul */
	if (!n--) goto terminate;

	if ((uintptr_t)d % sizeof(size_t) != (uintptr_t)s % sizeof(size_t))
		goto misaligned;

	for (; (uintptr_t)s % sizeof(size_t); *d++ = *s++, n--)
		if (!*s || !n) goto terminate;

	for (wd = (size_t *)d, ws= (const size_t *)s
	     ; !word_has_zero(*ws) && n >= sizeof(size_t)
	     ; *wd = *ws, wd++, ws++, n -= sizeof(size_t))

	d = (char *)wd;
	s = (const char *)ws;

misaligned:
	for (; (*d = *s) && n; d++, s++, n--);
terminate:
	*d = '\0';

	return d - z + strlen(s);
}

[-- Attachment #7: strlen.c --]
[-- Type: text/plain, Size: 393 bytes --]

#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "word.h"

/**
 * strlen - Word sized c standard strlen.
 * @s: Source
 */
size_t strlen(const char *s)
{
	const char *z = s;
	const size_t *w;

	for (; (uintptr_t)s % sizeof(size_t); s++)
		if (!*s) return s - z;

	for (w = (const size_t *)s; !word_has_zero(*w); w++);
	for (s = (const char *)w; *s; s++);

	return s - z;
}

[-- Attachment #8: strncmp.c --]
[-- Type: text/plain, Size: 800 bytes --]

#include <stdint.h>
#include <string.h>
#include "word.h"

/**
 * strncmp - Word sized c standard strncmp.
 * @c: Comparative
 * @s: Source
 * @n: Max size of @s
 */
int strncmp(const char *c, const char *s, size_t n)
{
	const size_t *wc, *ws;

	if ((uintptr_t)c % sizeof(size_t) != (uintptr_t)s % sizeof(size_t))
		goto misaligned;

	for (; (uintptr_t)c % sizeof(size_t); c++, s++, n--) {
		if (*c != *s || !*c || !*s || !n)
			return *(const unsigned char *)c
				- *(const unsigned char *)s;
	}

	for (wc = (const size_t *)c, ws = (const size_t *)s
	     ; !word_has_zero(*wc) || !word_has_zero(*ws)
	     ; wc++, ws++);

	c = (const char *)wc;
	s = (const char *)ws;

misaligned:
	for(; *c == *s && *c && *s && n; c++, s++, n--);

	return *(const unsigned char *)c - *(const unsigned char *)s;
}

[-- Attachment #9: word.h --]
[-- Type: text/plain, Size: 825 bytes --]

/**
 * _INTERNAL_WORD_H - various word size functions / macros
 */
#ifndef _MYOSIN_WORD_H
#define _MYOSIN_WORD_H

#include <stddef.h>
#include <stdint.h>

/**
 * WORD_LSB_ONE - Set low bit of each byte on arch word size to one.
 */
#define WORD_LSB_ONE ((size_t)-1 / (unsigned char)-1)

/**
 * WORD_MSB_ONE - Set high bit of each byte on arch word size to one.
 */
#define WORD_MSB_ONE (WORD_LSB_ONE * ((unsigned char)-1 / 2 + 1))

/**
 * word_has_zero - Word has a zero character
 * @w: Word
 */
static inline char word_has_zero(size_t w)
{
	return !!((w - WORD_LSB_ONE) & (~w & WORD_MSB_ONE));
}

/**
 * word_has_char - Word has a character
 * @w: Word
 */
static inline char word_has_char(size_t w, char c)
{
	return !!((w - WORD_LSB_ONE)
		  & ((~w & WORD_MSB_ONE)^(WORD_LSB_ONE * c)));
}

#endif /* !_INTERNAL_WORD_H */

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Re: [PATCH 0/4] Refactor and expand string functions.
  2013-02-05  4:25 ` [PATCH 0/4] Refactor and expand string functions Nathan McSween
@ 2013-02-05 11:19   ` Szabolcs Nagy
  2013-02-05 14:05     ` Rich Felker
  0 siblings, 1 reply; 11+ messages in thread
From: Szabolcs Nagy @ 2013-02-05 11:19 UTC (permalink / raw)
  To: musl

* Nathan McSween <nwmcsween@gmail.com> [2013-02-04 20:25:53 -0800]:
> /**
>  * memchr - Word sized c standard memchr.
>  * @s: Source
>  * @c: Character
>  * @n: Max size of @s
>  */
> void *memchr(const void *s, int c, size_t n)
> {
> 	const unsigned char *cs = (const unsigned char *)s;
> 	const size_t *w;
> 
> 	c = (unsigned char)c;
> 
> 	for (; (uintptr_t)cs % sizeof(size_t); cs++, n--) {
> 		if (!n) return NULL;
> 		if (*cs == c) return (void *)cs;
> 	}
> 
> 	for (w = (const size_t *)cs; !word_has_char(*w, c); w++, n--)
> 		if (!n) return NULL;
> 	for (cs = (const unsigned char *)w; *cs != c; cs++, n--)
> 		i

did you test this code?

w++ but n-- does not seem right

> #define WORD_LSB_ONE ((size_t)-1 / (unsigned char)-1)
> #define WORD_MSB_ONE (WORD_LSB_ONE * ((unsigned char)-1 / 2 + 1))
> 
> /**
>  * word_has_zero - Word has a zero character
>  * @w: Word
>  */
> static inline char word_has_zero(size_t w)
> {
> 	return !!((w - WORD_LSB_ONE) & (~w & WORD_MSB_ONE));
> }
> 
> /**
>  * word_has_char - Word has a character
>  * @w: Word
>  */
> static inline char word_has_char(size_t w, char c)
> {
> 	return !!((w - WORD_LSB_ONE)
> 		  & ((~w & WORD_MSB_ONE)^(WORD_LSB_ONE

the code is mostly ok: don't use "char c" argument, use
unsigned char or int, same for return value

but i don't like the style of this

maybe it's just me but the WORD_LSB_ONE is a long name
that does not help me understand what it is, it could
be W1 vs W128 and the same amount of explanation would
be embedded in the name (but much shorter and thus easier
to recognize, things fit on one line etc, it assumes 8bit
char though)

the comments are bad (lot of text with no content)
and the code actually does magic that may need explanation
it's rather frustrating when you try to understand what's
going on and have to read lot of useless comments..

i dont see how word_has_char works, the name suggests that
it tests if w has c in it, but that's not what it does
(and the comment is actively misleading: only shows one arg,
which demonstrates why i dislike these kinds of comments,
thye cause more harm than good)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Re: [PATCH 0/4] Refactor and expand string functions.
  2013-02-05 11:19   ` Szabolcs Nagy
@ 2013-02-05 14:05     ` Rich Felker
  2013-02-05 15:05       ` Szabolcs Nagy
  0 siblings, 1 reply; 11+ messages in thread
From: Rich Felker @ 2013-02-05 14:05 UTC (permalink / raw)
  To: musl

On Tue, Feb 05, 2013 at 12:19:10PM +0100, Szabolcs Nagy wrote:
> * Nathan McSween <nwmcsween@gmail.com> [2013-02-04 20:25:53 -0800]:
> > /**
> >  * memchr - Word sized c standard memchr.
> >  * @s: Source
> >  * @c: Character
> >  * @n: Max size of @s
> >  */
> > void *memchr(const void *s, int c, size_t n)
> > {
> > 	const unsigned char *cs = (const unsigned char *)s;
> > 	const size_t *w;
> > 
> > 	c = (unsigned char)c;
> > 
> > 	for (; (uintptr_t)cs % sizeof(size_t); cs++, n--) {
> > 		if (!n) return NULL;
> > 		if (*cs == c) return (void *)cs;
> > 	}
> > 
> > 	for (w = (const size_t *)cs; !word_has_char(*w, c); w++, n--)
> > 		if (!n) return NULL;
> > 	for (cs = (const unsigned char *)w; *cs != c; cs++, n--)
> > 		i
> 
> did you test this code?
> 
> w++ but n-- does not seem right

You mean it should be n -= sizeof(size_t) or whatever?

> > #define WORD_LSB_ONE ((size_t)-1 / (unsigned char)-1)
> > #define WORD_MSB_ONE (WORD_LSB_ONE * ((unsigned char)-1 / 2 + 1))
> > 
> > /**
> >  * word_has_zero - Word has a zero character
> >  * @w: Word
> >  */
> > static inline char word_has_zero(size_t w)
> > {
> > 	return !!((w - WORD_LSB_ONE) & (~w & WORD_MSB_ONE));
> > }
> > 
> > /**
> >  * word_has_char - Word has a character
> >  * @w: Word
> >  */
> > static inline char word_has_char(size_t w, char c)
> > {
> > 	return !!((w - WORD_LSB_ONE)
> > 		  & ((~w & WORD_MSB_ONE)^(WORD_LSB_ONE
> 
> the code is mostly ok: don't use "char c" argument, use
> unsigned char or int, same for return value
> 
> but i don't like the style of this
> 
> maybe it's just me but the WORD_LSB_ONE is a long name
> that does not help me understand what it is, it could
> be W1 vs W128 and the same amount of explanation would
> be embedded in the name (but much shorter and thus easier
> to recognize, things fit on one line etc, it assumes 8bit
> char though)

Honestly I would prefer the whole inline function be replaced with
just a single-line macro that could be pasted where needed. IMO this
makes the code more readable because you don't have to go looking at
other functions.

> the comments are bad (lot of text with no content)

They're doxygen format I believe.

> and the code actually does magic that may need explanation
> it's rather frustrating when you try to understand what's
> going on and have to read lot of useless comments..

I agree. I don't like comments except to explain (in natural language)
what's going on in a part of the code where the *reason* for doing
something is non-obvious. The only thing non-obvious in these files is
word_has_char, but it's obvious what it does from the name. (Still, I
prefer the original macro names, but that's just me; dunno what others
think.)

> i dont see how word_has_char works, the name suggests that
> it tests if w has c in it, but that's not what it does

That's what it's supposed to do.

> (and the comment is actively misleading: only shows one arg,
> which demonstrates why i dislike these kinds of comments,
> thye cause more harm than good)

:)

Rich


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Re: [PATCH 0/4] Refactor and expand string functions.
  2013-02-05 14:05     ` Rich Felker
@ 2013-02-05 15:05       ` Szabolcs Nagy
  0 siblings, 0 replies; 11+ messages in thread
From: Szabolcs Nagy @ 2013-02-05 15:05 UTC (permalink / raw)
  To: musl

* Rich Felker <dalias@aerifal.cx> [2013-02-05 09:05:35 -0500]:
> On Tue, Feb 05, 2013 at 12:19:10PM +0100, Szabolcs Nagy wrote:
> > * Nathan McSween <nwmcsween@gmail.com> [2013-02-04 20:25:53 -0800]:
> > > 	for (; (uintptr_t)cs % sizeof(size_t); cs++, n--) {
> > > 		if (!n) return NULL;
> > > 		if (*cs == c) return (void *)cs;
> > > 	}
> > > 
> > > 	for (w = (const size_t *)cs; !word_has_char(*w, c); w++, n--)
> > > 		if (!n) return NULL;
> > 
> > w++ but n-- does not seem right
> 
> You mean it should be n -= sizeof(size_t) or whatever?
> 

yes, or n -= sizeof *w if you want to save some chars :)

and the if (!n) return NULL; is wrong as well once you
go by words

> Honestly I would prefer the whole inline function be replaced with
> just a single-line macro that could be pasted where needed. IMO this
> makes the code more readable because you don't have to go looking at
> other functions.
> 

> word_has_char, but it's obvious what it does from the name. (Still, I
> prefer the original macro names, but that's just me; dunno what others
> think.)
> 

i agree

> > i dont see how word_has_char works, the name suggests that
> > it tests if w has c in it, but that's not what it does
> 
> That's what it's supposed to do.
> 

then the grouping is wrong

- return !!((w - WORD_LSB_ONE) & ((~w & WORD_MSB_ONE)^(WORD_LSB_ONE * c)));
+ return !!(((w - WORD_LSB_ONE) & ~w & WORD_MSB_ONE) ^ (WORD_LSB_ONE * c));

but yes the original macros were cleaner


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2013-02-05 15:05 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-04  0:12 [PATCH 0/4] Refactor and expand string functions Nathan McSween
2013-02-04  0:12 ` [PATCH 1/4] Internal: Add word.h - word-at-a-time fns / macros Nathan McSween
2013-02-04  0:12 ` [PATCH 2/4] String: refactor to utilize word.h and optimize Nathan McSween
2013-02-04  0:12 ` [PATCH 3/4] String: expand to word-at-a-time Nathan McSween
2013-02-04  2:24   ` Isaac Dunham
2013-02-04  2:55     ` nwmcsween
2013-02-04  0:12 ` [PATCH 4/4] String: refactor to utilize word.h and always terminate string Nathan McSween
2013-02-05  4:25 ` [PATCH 0/4] Refactor and expand string functions Nathan McSween
2013-02-05 11:19   ` Szabolcs Nagy
2013-02-05 14:05     ` Rich Felker
2013-02-05 15:05       ` Szabolcs Nagy

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).