From mboxrd@z Thu Jan 1 00:00:00 1970 X-Msuck: nntp://news.gmane.org/gmane.linux.lib.musl.general/11731 Path: news.gmane.org!.POSTED!not-for-mail From: Nathan McSween Newsgroups: gmane.linux.lib.musl.general Subject: [RFC PATCH 1/5] string: vectorize various functions Date: Sat, 15 Jul 2017 19:55:37 +0000 Message-ID: <20170715195541.3136-2-nwmcsween@gmail.com> References: <20170715195541.3136-1-nwmcsween@gmail.com> Reply-To: musl@lists.openwall.com NNTP-Posting-Host: blaine.gmane.org X-Trace: blaine.gmane.org 1500148715 30752 195.159.176.226 (15 Jul 2017 19:58:35 GMT) X-Complaints-To: usenet@blaine.gmane.org NNTP-Posting-Date: Sat, 15 Jul 2017 19:58:35 +0000 (UTC) Cc: Nathan McSween To: musl@lists.openwall.com Original-X-From: musl-return-11744-gllmg-musl=m.gmane.org@lists.openwall.com Sat Jul 15 21:58:30 2017 Return-path: Envelope-to: gllmg-musl@m.gmane.org Original-Received: from mother.openwall.net ([195.42.179.200]) by blaine.gmane.org with smtp (Exim 4.84_2) (envelope-from ) id 1dWTCt-0007cp-EQ for gllmg-musl@m.gmane.org; Sat, 15 Jul 2017 21:58:27 +0200 Original-Received: (qmail 3340 invoked by uid 550); 15 Jul 2017 19:58:24 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Original-Received: (qmail 3088 invoked from network); 15 Jul 2017 19:58:21 -0000 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=BA+HcTcPHMv4lKHUuvltXh77ybd1a4rB485Q4zP52cM=; b=F35ydCGn2a7rUclnN6JMlYAWjaJ+OYQ9bEfmoLzZn8wh5VMl5TD464EqcE/TFM0ANI Yd7O/PAdHU20eqyIJE2S0WqUOtSvk33uIuGZ+K233vcN1XXSVF0wIsVk2wni9UTsSIst Behnm19ii51Q71nhxPyDchIWT8ceuCIE9erO0Vti43gZo9ngEYShhwFi0Bkza2jUXBDS 9qtChM+3Ndv9E4frRjVrie2UChuIvDdMx9drgZSeU/89ZULV1HstNlRiupdYzuFclHMX 5qtUcNEWfJ/4LBWOkp6hOy5R2zxNB8keQd99TEUVP64PO64SeSRlz87H/iSkOIBhiE6Q 1aLQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=BA+HcTcPHMv4lKHUuvltXh77ybd1a4rB485Q4zP52cM=; b=OSkiZ5jsUbeWIv4l4vjGlIM7yF3gjZAnMEr+RVlp/kkhdWYVXjC3OfBa7GOaSUnZqQ DOqeedbU87CwWQc+4MCe+WJ4bGpddgw/HQShtRjMzW5bYwQMu3SZzUUxhEzG8drh2uz4 VIMR+7Aj++/HUlIOrmbMoUihfSHRtLCnkyH5DwSAJFEsnhIa2Rpj0FMWZ7PZWUGxsq+7 8FkX+zSDUVZDURpaHcK0PVn+Sd/n6nhmTKzAMkHpKkZpMA/Oxw+yOMFkGkQxHBQGhqe/ qc+iq+SfEHDVL1Ij/VtQjPHHHkOfaM0PBHSESakvR7LUcC87PG6e6ZxiBB67Q0BgNxY0 ewFg== X-Gm-Message-State: AIVw112/6btFmiCtu9xaakL6eUdxxS6/a9Ux4848lvL0h/Id1v14Wn05 zola8bgEB/9KSYyW X-Received: by 10.98.57.4 with SMTP id g4mr11445319pfa.155.1500148689718; Sat, 15 Jul 2017 12:58:09 -0700 (PDT) X-Mailer: git-send-email 2.13.2 In-Reply-To: <20170715195541.3136-1-nwmcsween@gmail.com> Xref: news.gmane.org gmane.linux.lib.musl.general:11731 Archived-At: Text sizes with gcc version 6.3.0 Before | After 49 | 153 memcmp.lo 38 | 294 memrchr.lo 120 | 376 strcasecmp.lo 96 | 305 strncmp.lo 155 | 454 strncasecmp.lo 96 | 305 strncmp.lo 861 | 1887 (TOTALS) The size increase is mainly due to vectorizing but also to both the __may_alias__ attribute and the conditional before the alignment loop for functions that take a size argument. The str[n]casecmp macros are of particular interest and work via tagging the high bit for any character in the range of A-Z and rhs said high bit two to give a-z. --- src/string/memcmp.c | 28 ++++++++++++++++++++++++---- src/string/memrchr.c | 29 +++++++++++++++++++++++------ src/string/strcasecmp.c | 34 ++++++++++++++++++++++++++++++---- src/string/strcmp.c | 27 ++++++++++++++++++++++++--- src/string/strncasecmp.c | 37 +++++++++++++++++++++++++++++++++---- src/string/strncmp.c | 28 ++++++++++++++++++++++++++-- 6 files changed, 160 insertions(+), 23 deletions(-) diff --git a/src/string/memcmp.c b/src/string/memcmp.c index bdbce9f0..7d2e8077 100644 --- a/src/string/memcmp.c +++ b/src/string/memcmp.c @@ -1,8 +1,28 @@ #include +#include -int memcmp(const void *vl, const void *vr, size_t n) +#define aliases __attribute__((__may_alias__)) + +int memcmp(const void *_l, const void *_r, size_t n) { - const unsigned char *l=vl, *r=vr; - for (; n && *l == *r; n--, l++, r++); - return n ? *l-*r : 0; + const unsigned char *l = _l, *r = _r; + const size_t aliases *wl, aliases *wr; + + if (n < sizeof(size_t) * 3 || ((uintptr_t)l | (uintptr_t)r) + & sizeof(size_t) - 1) goto bytewise; + + for (; (uintptr_t)l & sizeof(size_t) - 1 && *l == *r; l++, r++, n--); + if ((uintptr_t)l & sizeof(size_t) -1) return *l - *r; + + wl = (const void *)l; + wr = (const void *)r; + for (; n >= sizeof(size_t) && *wl == *wr + ; wl++, wr++, n -= sizeof(size_t)); + l = (const void *)wl; + r = (const void *)wr; + +bytewise: + for (; n && *l == *r; l++, r++, n--); + + return n ? *l - *r : 0; } diff --git a/src/string/memrchr.c b/src/string/memrchr.c index a78e9d6c..165c422b 100644 --- a/src/string/memrchr.c +++ b/src/string/memrchr.c @@ -1,12 +1,29 @@ #include -#include "libc.h" +#include -void *__memrchr(const void *m, int c, size_t n) +#define byte_repeat(x) ((size_t)~0 / 0xff * (x)) +#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80)) +#define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o))) + +void *__memrchr(const void *_s, int i, size_t n) { - const unsigned char *s = m; - c = (unsigned char)c; - while (n--) if (s[n]==c) return (void *)(s+n); - return 0; + i = (unsigned char )i; + const unsigned char *s = _s; + const size_t wi = byte_repeat(i); + + if (n < sizeof(size_t) * 3) goto bytewise; + + for (; (uintptr_t)(s + n) & sizeof(size_t) - 1 && s[n - 1] != i; n--); + if ((uintptr_t)(s + n) & sizeof(size_t) - 1) return (void *)(s + n - 1); + + for (; n >= sizeof(size_t) && + !word_has_zero(*(size_t *)(s + n - sizeof(size_t)) ^ wi) + ; n -= sizeof(size_t)); + +bytewise: + for (; n && s[n - 1] != i; n--); + + return n ? (void *)(s + n - 1) : 0; } weak_alias(__memrchr, memrchr); diff --git a/src/string/strcasecmp.c b/src/string/strcasecmp.c index 3cd5f2d0..07d56c5d 100644 --- a/src/string/strcasecmp.c +++ b/src/string/strcasecmp.c @@ -1,15 +1,41 @@ #include #include -#include "libc.h" +#include + +#define aliases __attribute__((__may_alias__)) +#define byte_repeat(x) ((size_t)~0 / 0xff * (x)) +#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80)) +#define word_tag_range(x, t, s, e) ((((x) | byte_repeat(t)) \ +- byte_repeat(s) & ~((x) | byte_repeat(t)) - \ +byte_repeat((e) + 1)) & (~(x) & byte_repeat(t))) +#define word_to_lower(x) ((x) - (word_tag_range((x), 'a', 'z', 0x80)) >> 2) +#define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o))) int strcasecmp(const char *_l, const char *_r) { - const unsigned char *l=(void *)_l, *r=(void *)_r; - for (; *l && *r && (*l == *r || tolower(*l) == tolower(*r)); l++, r++); + const unsigned char *l = (const void *)_l, *r = (const void *)_r; + const size_t aliases *wl, aliases *wr; + + if (((uintptr_t)l | (uintptr_t)r) & sizeof(size_t) - 1) goto bytewise; + + for (;(uintptr_t)l & sizeof(size_t) - 1 && *l + && tolower(*l) == tolower(*r); l++, r++); + if ((uintptr_t)l & sizeof(size_t) - 1) return tolower(*l) - tolower(*r); + + wl = (const void *)l; + wr = (const void *)r; + for (; !word_has_zero(*wl) && word_to_lower(*wl) == word_to_lower(*wr) + ; wl++, wr++); + l = (const void *)wl; + r = (const void *)wr; + +bytewise: + for (; *l && tolower(*l) == tolower(*r); l++, r++); + return tolower(*l) - tolower(*r); } -int __strcasecmp_l(const char *l, const char *r, locale_t loc) +int __strcasecmp_l(const char *l, const char *r, locale_t unused) { return strcasecmp(l, r); } diff --git a/src/string/strcmp.c b/src/string/strcmp.c index 808bd837..a2f358a9 100644 --- a/src/string/strcmp.c +++ b/src/string/strcmp.c @@ -1,7 +1,28 @@ #include +#include -int strcmp(const char *l, const char *r) +#define aliases __attribute__((__may_alias__)) +#define byte_repeat(x) ((size_t)~0 / 0xff * (x)) +#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80)) + +int strcmp(const char *_l, const char *_r) { - for (; *l==*r && *l; l++, r++); - return *(unsigned char *)l - *(unsigned char *)r; + const unsigned char *l = (const void *)_l, *r = (const void *)_r; + const size_t aliases *wl, aliases *wr; + + if (((uintptr_t)l | (uintptr_t)r) & sizeof(size_t) - 1) goto bytewise; + + for (; (uintptr_t)l & sizeof(size_t) - 1 && *l && *l == *r; l++, r++); + if ((uintptr_t)l & sizeof(size_t) - 1) return *l - *r; + + wl = (const void *)l; + wr = (const void *)r; + for (; !word_has_zero(*wl) && *wl == *wr; wl++, wr++); + l = (const void *)wl; + r = (const void *)wr; + +bytewise: + for (; *l && *l == *r; l++, r++); + + return *l - *r; } diff --git a/src/string/strncasecmp.c b/src/string/strncasecmp.c index 3af53008..e1a9454a 100644 --- a/src/string/strncasecmp.c +++ b/src/string/strncasecmp.c @@ -1,16 +1,45 @@ #include #include -#include "libc.h" +#include + +#define aliases __attribute__((__may_alias__)) +#define byte_repeat(x) ((size_t)~0 / 0xff * (x)) +#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80)) +#define word_tag_range(x, t, s, e) ((((x) | byte_repeat(t)) \ +- byte_repeat(s) & ~((x) | byte_repeat(t)) - \ +byte_repeat((e) + 1)) & (~(x) & byte_repeat(t))) +#define word_to_tolower(x) ((x) - (word_tag_range((x), 'a', 'z', 0x80)) >> 2) +#define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o))) int strncasecmp(const char *_l, const char *_r, size_t n) { - const unsigned char *l=(void *)_l, *r=(void *)_r; + const unsigned char *l = (const void *)_l, *r = (const void *)_r; + const size_t aliases *wl, aliases *wr; + if (!n--) return 0; - for (; *l && *r && n && (*l == *r || tolower(*l) == tolower(*r)); l++, r++, n--); + + if (n < sizeof(size_t) * 3 || ((uintptr_t)l | (uintptr_t)r) + & sizeof(size_t) - 1) goto bytewise; + + for (;(uintptr_t)l & sizeof(size_t) - 1 && *l + && tolower(*l) == tolower(*r); l++, r++, n--); + if ((uintptr_t)l & sizeof(size_t) - 1) return tolower(*l) - tolower(*r); + + wl = (const void *)l; + wr = (const void *)r; + for (; n >= sizeof(size_t) && !word_has_zero(*wl) + && word_to_tolower(*wl) == word_to_tolower(*wr) + ; wl++, wr++, n -= sizeof(size_t)); + l = (const void *)wl; + r = (const void *)wr; + +bytewise: + for (; n && *l && tolower(*l) == tolower(*r); l++, r++, n--); + return tolower(*l) - tolower(*r); } -int __strncasecmp_l(const char *l, const char *r, size_t n, locale_t loc) +int __strncasecmp_l(const char *l, const char *r, size_t n, locale_t unused) { return strncasecmp(l, r, n); } diff --git a/src/string/strncmp.c b/src/string/strncmp.c index e228843f..667498e1 100644 --- a/src/string/strncmp.c +++ b/src/string/strncmp.c @@ -1,9 +1,33 @@ #include +#include + +#define aliases __attribute__((__may_alias__)) +#define byte_repeat(x) ((size_t)~0 / 0xff * (x)) +#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80)) int strncmp(const char *_l, const char *_r, size_t n) { - const unsigned char *l=(void *)_l, *r=(void *)_r; + const unsigned char *l = (const void *)_l, *r = (const void *)_r; + const size_t aliases *wl, aliases *wr; + if (!n--) return 0; - for (; *l && *r && n && *l == *r ; l++, r++, n--); + + if (n < sizeof(size_t) * 3 || ((uintptr_t)l | (uintptr_t)r) + & sizeof(size_t) - 1) goto bytewise; + + for (; (uintptr_t)l & sizeof(size_t) - 1 && *l && *l == *r + ; l++, r++, n--); + if ((uintptr_t)l & sizeof(size_t) - 1) return *l - *r; + + wl = (const void *)l; + wr = (const void *)r; + for (; n >= sizeof(size_t) && !word_has_zero(*wl) && *wl == *wr + ; wl++, wr++, n -= sizeof(size_t)); + l = (const void *)wl; + r = (const void *)wr; + +bytewise: + for (; n && *l && *l == *r; l++, r++, n--); + return *l - *r; } -- 2.13.2