mailing list of musl libc
 help / color / mirror / code / Atom feed
* [musl] Changes for strcspn(), strspn(), strtok() and strtok_r()
@ 2021-07-13 12:02 Stefan Kanthak
  2021-07-13 14:22 ` Érico Nogueira
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Stefan Kanthak @ 2021-07-13 12:02 UTC (permalink / raw)
  To: musl

<https://git.musl-libc.org/cgit/musl/plain/src/string/strcspn.c>

 #include <string.h>
 
 #define BITOP(a,b,op) \
  ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
 
-size_t strcspn(const char *s, const char *c)
+size_t strcspn(const char *restrict s, const char *c)
 {
-        const char *a = s;
+        const char *a;
-        size_t byteset[32/sizeof(size_t)];
+        size_t byteset[32/sizeof(size_t)] = { 0 };
 
-        if (!c[0] || !c[1]) return __strchrnul(s, *c)-a;
+        if (!c[0] || !c[1]) return __strchrnul(a=s, *c)-a;
 
-        memset(byteset, 0, sizeof byteset);
         for (; *c && BITOP(byteset, *(unsigned char *)c, |=); c++);
-        for (; *s && !BITOP(byteset, *(unsigned char *)s, &); s++);
-        return s-a;
+        for (a=s; *a && !BITOP(byteset, *(unsigned char *)a, &); a++);
+        return a-s;
 }

After this change, musl's favorite compiler (GCC) will generate code
like the following (here for x86-64), just like the code it already
generates for strspn(), where the initialization of byteset[] is NOT
done via memset():

strcspn:
...
        xor    %eax, %eax
        movq   %rax, byteset(%rsp)
        movq   %rax, byteset+8(%rsp)
        movq   %rax, byteset+16(%rsp)
        movq   %rax, byteset+24(%rsp)
...

<https://git.musl-libc.org/cgit/musl/plain/src/string/strspn.c>

 #include <string.h>
 
 #define BITOP(a,b,op) \
  ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
 
-size_t strspn(const char *s, const char *c)
+size_t strspn(const char *restrict s, const char *c)
 {
-        const char *a = s;
+        const char *a;
         size_t byteset[32/sizeof(size_t)] = { 0 };
 
         if (!c[0]) return 0;
         if (!c[1]) {
-                for (; *s == *c; s++);
-                return s-a;
+                for (a=s; *a == *c; a++);
+                return a-s;
         }
 
         for (; *c && BITOP(byteset, *(unsigned char *)c, |=); c++);
-        for (; *s && BITOP(byteset, *(unsigned char *)s, &); s++);
-        return s-a;
+        for (a=s; *a && BITOP(byteset, *(unsigned char *)a, &); a++);
+        return a-s;
 }

<https://git.musl-libc.org/cgit/musl/plain/src/string/strtok.c>

 #include <string.h>
 
 char *strtok(char *restrict s, const char *restrict sep)
 {
         static char *p;
+        return strtok_r(s, sep, &p);
-        if (!s && !(s = p)) return NULL;
-        s += strspn(s, sep);
-        if (!*s) return p = 0;
-        p = s + strcspn(s, sep);
-        if (*p) *p++ = 0;
-        else p = 0;
-        return s;
 }

<https://git.musl-libc.org/cgit/musl/plain/src/string/strtok_r.c>

 #include <string.h>
 
 char *strtok_r(char *restrict s, const char *restrict sep, char **restrict p)
 {
         if (!s && !(s = *p)) return NULL;
         s += strspn(s, sep);
-        if (!*s) return *p = 0;
+        if (!*s) return *p = NULL;
         *p = s + strcspn(s, sep);
         if (**p) *(*p)++ = 0;
-        else *p = 0;
+        else *p = NULL;
         return s;
 }

If you want to go a step further, avoid to build the same byteset twice:

<https://git.musl-libc.org/cgit/musl/plain/src/string/strtok_r.c>

 #include <string.h>
 
 char *strtok_r(char *restrict s, const char *restrict sep, char **restrict p)
 {
+        size_t byteset[32/sizeof(size_t)] = { 0 };
+
         if (!s && !(s = *p)) return NULL;
+        if (!*s) return *p = NULL;
+        if (!*sep) return *p = NULL, *s;
-        s += strspn(s, sep);
+        for (; *c && BITOP(byteset, *(unsigned char *)c, |=); c++);
+        for (; *s && BITOP(byteset, *(unsigned char *)s, &); s++);
-        if (!*s) return *p = 0;
+        if (!*s) return *p = NULL;
-        *p = s + strcspn(s, sep);
-        if (**p) *(*p)++ = 0;
-        else *p = 0;
-        return s;
+        sep = s;
+        for (; *s && !BITOP(byteset, *(unsigned char *)s, &); s++);
+        if (*s) *s++ = 0;
+        else *s = NULL;
+        *p = s;
+        return sep;
 }

Stefan

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-07-13 17:40 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-13 12:02 [musl] Changes for strcspn(), strspn(), strtok() and strtok_r() Stefan Kanthak
2021-07-13 14:22 ` Érico Nogueira
2021-07-13 14:49 ` Érico Nogueira
2021-07-13 15:45 ` Rich Felker
2021-07-13 16:31   ` Stefan Kanthak
2021-07-13 17:40     ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).