From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=-1.5 required=5.0 tests=DKIM_INVALID,DKIM_SIGNED, MAILING_LIST_MULTI,RCVD_IN_DNSWL_LOW,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.4 Received: (qmail 8277 invoked from network); 31 May 2023 14:16:43 -0000 Received: from second.openwall.net (193.110.157.125) by inbox.vuxu.org with ESMTPUTF8; 31 May 2023 14:16:43 -0000 Received: (qmail 13844 invoked by uid 550); 31 May 2023 14:16:09 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Reply-To: musl@lists.openwall.com Received: (qmail 13689 invoked from network); 31 May 2023 14:16:06 -0000 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=inria.fr; s=dc; h=from:to:subject:date:message-id:in-reply-to:references: mime-version:content-transfer-encoding; bh=qrS74hKNy57E/OSV2ThJOysFWgNpcDbfDnAL65gVFQc=; b=m9tUXylT7oVm0+wmmWbVXBmy/OcQq+PW0msXPYyWvJsLuf7Qbg+bY3kH SCyxKZoTvC/7UIU39XM3gu1Dvmifyznh0owMa8g8IvjOg3G6k8it/Z6Ng bvX/X5UJmYbkVTdWN7GuOIsuILMJcQitGgEYjuQYpcDASNbZALVQnhQEM Q=; Authentication-Results: mail3-relais-sop.national.inria.fr; dkim=none (message not signed) header.i=none; spf=SoftFail smtp.mailfrom=Jens.Gustedt@inria.fr; dmarc=fail (p=none dis=none) d=inria.fr X-IronPort-AV: E=Sophos;i="6.00,207,1681164000"; d="scan'208";a="57463843" From: Jens Gustedt To: musl@lists.openwall.com Date: Wed, 31 May 2023 16:15:49 +0200 Message-Id: X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [musl] [C23 128 bit 3/4] C23: implement w128 and wf128 for scanf and similar These functions are possibly a bit slower with 128 bit support enabled, since the core function `__intscan` uses wider data formats and instructions in some places. There was already an optimization in place for all formats which we now extend: - first with `unsigned`, then - switch to `uint64_t` if the value extends `UINT_MAX`, then - switch to `uwide128` (an internal 128 bit structure) if that is unavoidable. If the scan is erroneous because there are more digits than expected, the error might be noticed a bit later than previously, but the `errno` number should be the same. --- src/internal/intscan.c | 105 ++++++++++++++++++++++++++++++++--------- src/internal/intscan.h | 4 +- src/stdio/vfscanf.c | 25 ++++++---- src/stdio/vfwscanf.c | 19 +++++--- src/stdlib/strtol.c | 12 ++--- src/stdlib/wcstol.c | 12 ++--- 6 files changed, 125 insertions(+), 52 deletions(-) diff --git a/src/internal/intscan.c b/src/internal/intscan.c index 57424554..c7c1e47e 100644 --- a/src/internal/intscan.c +++ b/src/internal/intscan.c @@ -1,6 +1,7 @@ #include #include #include +#include "uwide128.h" #include "shgetc.h" /* Lookup table for digit values. -1==255>=36 -> invalid */ @@ -23,15 +24,59 @@ static const unsigned char table[] = { -1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, }; -unsigned long long __intscan(FILE *f, unsigned base, int pok, unsigned long long lim) +static uwide128 const uwide128_max = { { -1, -1, } }; +static uwide128 const uwide128_zero = { { 0, 0, } }; + +static const uwide128 __uwide128_limit[37] = { + comp2(0, 0), + comp2(-1, -1), + comp2(-1, 9223372036854775807), + comp2(6148914691236517205, 6148914691236517205), + comp2(-1, 4611686018427387903), + comp2(3689348814741910323, 3689348814741910323), + comp2(-6148914691236517206, 3074457345618258602), + comp2(5270498306774157604, 2635249153387078802), + comp2(-1, 2305843009213693951), + comp2(-4099276460824344804, 2049638230412172401), + comp2(-7378697629483820647, 1844674407370955161), + comp2(8384883669867978007, 1676976733973595601), + comp2(6148914691236517205, 1537228672809129301), + comp2(4256940940086819603, 1418980313362273201), + comp2(2635249153387078802, 1317624576693539401), + comp2(1229782938247303441, 1229782938247303441), + comp2(-1, 1152921504606846975), + comp2(1085102592571150095, 1085102592571150095), + comp2(-2049638230412172402, 1024819115206086200), + comp2(-1941762534074689644, 970881267037344821), + comp2(-3689348814741910324, 922337203685477580), + comp2(-4392081922311798004, 878416384462359600), + comp2(-5030930201920786805, 838488366986797800), + comp2(4812194106185100421, 802032351030850070), + comp2(-6148914691236517206, 768614336404564650), + comp2(-6640827866535438582, 737869762948382064), + comp2(-7094901566811366007, 709490156681136600), + comp2(-1366425486941448268, 683212743470724133), + comp2(-7905747460161236407, 658812288346769700), + comp2(-3180473116156819245, 636094623231363848), + comp2(-8608480567731124088, 614891469123651720), + comp2(-8925843906633654008, 595056260442243600), + comp2(-1, 576460752303423487), + comp2(8943875914525843207, 558992244657865200), + comp2(-8680820740569200761, 542551296285575047), + comp2(8432797290838652167, 527049830677415760), + comp2(8198552921648689607, 512409557603043100), +}; + +uwide128 __intscan(FILE *f, unsigned base, int pok, uwide128 lim) { const unsigned char *val = table+1; int c, neg=0; unsigned x; - unsigned long long y; + uint64_t z; + uwide128 y; if (base > 36 || base == 1) { errno = EINVAL; - return 0; + return uwide128_zero; } while (isspace((c=shgetc(f)))); if (c=='+' || c=='-') { @@ -46,7 +91,7 @@ unsigned long long __intscan(FILE *f, unsigned base, int pok, unsigned long long shunget(f); if (pok) shunget(f); else shlim(f, 0); - return 0; + return uwide128_zero; } base = 16; } else if ((c|32)=='b' && base != 16) { @@ -55,7 +100,7 @@ unsigned long long __intscan(FILE *f, unsigned base, int pok, unsigned long long shunget(f); if (pok) shunget(f); else shlim(f, 0); - return 0; + return uwide128_zero; } base = 2; } else if (base == 0) { @@ -67,43 +112,59 @@ unsigned long long __intscan(FILE *f, unsigned base, int pok, unsigned long long shunget(f); shlim(f, 0); errno = EINVAL; - return 0; + return uwide128_zero; } } if (base == 10) { for (x=0; c-'0'<10U && x<=UINT_MAX/10-1; c=shgetc(f)) x = x*10 + (c-'0'); - for (y=x; c-'0'<10U && y<=ULLONG_MAX/10 && 10*y<=ULLONG_MAX-(c-'0'); c=shgetc(f)) - y = y*10 + (c-'0'); + for (z=x; c-'0'<10U && z<=UINT64_MAX/10-1; c=shgetc(f)) + z = z*10 + (c-'0'); + y=__uwide128_u64(x); + if (c-'0'<10U) { + uwide128 y10 = __uwide128_mul(y, 10); + while (c-'0'<10U + && __uwide128_le(y, __uwide128_limit[10]) + && __uwide128_le(y10, __uwide128_sub(uwide128_max, (c-'0')))) { + y = __uwide128_add(y10, (c-'0')); + y10 = __uwide128_mul(y, 10); + c=shgetc(f); + } + } if (c-'0'>=10U) goto done; - } else if (!(base & base-1)) { - int bs = "\0\1\2\4\7\3\6\5"[(0x17*base)>>5&7]; - for (x=0; val[c]>bs; c=shgetc(f)) - y = y<=lim) { - if (!(lim&1) && !neg) { + if (__uwide128_le(lim, y)) { + if (!(lim.v64[word64(0)]&1) && !neg) { errno = ERANGE; - return lim-1; - } else if (y>lim) { + return __uwide128_sub(lim, 1); + } else if (!__uwide128_le(y, lim)) { errno = ERANGE; return lim; } } - return (y^neg)-neg; + return neg ? __uwide128_neg(y) : y; } diff --git a/src/internal/intscan.h b/src/internal/intscan.h index ccf9f112..cf7a9d8f 100644 --- a/src/internal/intscan.h +++ b/src/internal/intscan.h @@ -1,8 +1,10 @@ #ifndef INTSCAN_H #define INTSCAN_H +#include "stdio_impl.h" #include +#include "uwide128.h" -hidden unsigned long long __intscan(FILE *, unsigned, int, unsigned long long); +hidden uwide128 __intscan(FILE *, unsigned, int, uwide128); #endif diff --git a/src/stdio/vfscanf.c b/src/stdio/vfscanf.c index f8ace92f..be922f82 100644 --- a/src/stdio/vfscanf.c +++ b/src/stdio/vfscanf.c @@ -18,25 +18,29 @@ #define SIZE_l 1 #define SIZE_L 2 #define SIZE_ll 3 +#define SIZE_128 4 -static void store_int(void *dest, int size, unsigned long long i) +static void store_int(void *dest, int size, uwide128 i) { if (!dest) return; switch (size) { case SIZE_hh: - *(char *)dest = i; + *(char *)dest = i.v64[lo64]; break; case SIZE_h: - *(short *)dest = i; + *(short *)dest = i.v64[lo64]; break; case SIZE_def: - *(int *)dest = i; + *(int *)dest = i.v64[lo64]; break; case SIZE_l: - *(long *)dest = i; + *(long *)dest = i.v64[lo64]; break; case SIZE_ll: - *(long long *)dest = i; + *(long long *)dest = i.v64[lo64]; + break; + case SIZE_128: + *(uwide128 *)dest = i; break; } } @@ -77,7 +81,7 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap) void *dest=NULL; int invert; int matches=0; - unsigned long long x; + uwide128 x; long double y; off_t pos = 0; unsigned char scanset[257]; @@ -178,6 +182,7 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap) #else case 64: size = SIZE_ll; break; #endif + case 128: size = SIZE_128; break; } break; case 'b': case 'd': case 'i': case 'o': case 'u': case 'x': @@ -206,7 +211,7 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap) case '[': break; case 'n': - store_int(dest, size, pos); + store_int(dest, size, __uwide128_u64(pos)); /* do not increment match count, etc! */ continue; default: @@ -326,9 +331,9 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap) case 'i': base = 0; int_common: - x = __intscan(f, base, 0, ULLONG_MAX); + x = __intscan(f, base, 0, UWIDE128_MAX); if (!shcnt(f)) goto match_fail; - if (t=='p' && dest) *(void **)dest = (void *)(uintptr_t)x; + if (t=='p' && dest) *(void **)dest = (void *)(uintptr_t)x.v64[lo64]; else store_int(dest, size, x); break; case 'a': case 'A': diff --git a/src/stdio/vfwscanf.c b/src/stdio/vfwscanf.c index 6fe2749c..f70d83a1 100644 --- a/src/stdio/vfwscanf.c +++ b/src/stdio/vfwscanf.c @@ -18,25 +18,29 @@ #define SIZE_l 1 #define SIZE_L 2 #define SIZE_ll 3 +#define SIZE_128 4 -static void store_int(void *dest, int size, unsigned long long i) +static void store_int(void *dest, int size, uwide128 i) { if (!dest) return; switch (size) { case SIZE_hh: - *(char *)dest = i; + *(char *)dest = i.v64[lo64]; break; case SIZE_h: - *(short *)dest = i; + *(short *)dest = i.v64[lo64]; break; case SIZE_def: - *(int *)dest = i; + *(int *)dest = i.v64[lo64]; break; case SIZE_l: - *(long *)dest = i; + *(long *)dest = i.v64[lo64]; break; case SIZE_ll: - *(long long *)dest = i; + *(long long *)dest = i.v64[lo64]; + break; + case SIZE_128: + *(uwide128 *)dest = i; break; } } @@ -201,6 +205,7 @@ int vfwscanf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap) #else case 64: size = SIZE_ll; break; #endif + case 128: size = SIZE_128; break; } break; case 'b': case 'd': case 'i': case 'o': case 'u': case 'x': @@ -234,7 +239,7 @@ int vfwscanf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap) switch (t) { case 'n': - store_int(dest, size, pos); + store_int(dest, size, __uwide128_i64(pos)); /* do not increment match count, etc! */ continue; diff --git a/src/stdlib/strtol.c b/src/stdlib/strtol.c index bfefea69..db5acadb 100644 --- a/src/stdlib/strtol.c +++ b/src/stdlib/strtol.c @@ -5,12 +5,12 @@ #include #include -static unsigned long long strtox(const char *s, char **p, int base, unsigned long long lim) +static unsigned long long strtox(const char *s, char **p, int base, uwide128 lim) { FILE f; sh_fromstring(&f, s); shlim(&f, 0); - unsigned long long y = __intscan(&f, base, 1, lim); + unsigned long long y = __intscan(&f, base, 1, lim).v64[lo64]; if (p) { size_t cnt = shcnt(&f); *p = (char *)s + cnt; @@ -20,22 +20,22 @@ static unsigned long long strtox(const char *s, char **p, int base, unsigned lon unsigned long long strtoull(const char *restrict s, char **restrict p, int base) { - return strtox(s, p, base, ULLONG_MAX); + return strtox(s, p, base, __uwide128_u64(ULLONG_MAX)); } long long strtoll(const char *restrict s, char **restrict p, int base) { - return strtox(s, p, base, LLONG_MIN); + return strtox(s, p, base, __uwide128_u64(0ULL+LLONG_MIN)); } unsigned long strtoul(const char *restrict s, char **restrict p, int base) { - return strtox(s, p, base, ULONG_MAX); + return strtox(s, p, base, __uwide128_u64(ULONG_MAX)); } long strtol(const char *restrict s, char **restrict p, int base) { - return strtox(s, p, base, 0UL+LONG_MIN); + return strtox(s, p, base, __uwide128_u64(0UL+LONG_MIN)); } intmax_t strtoimax(const char *restrict s, char **restrict p, int base) diff --git a/src/stdlib/wcstol.c b/src/stdlib/wcstol.c index 1eeb495f..d7662562 100644 --- a/src/stdlib/wcstol.c +++ b/src/stdlib/wcstol.c @@ -29,7 +29,7 @@ static size_t do_read(FILE *f, unsigned char *buf, size_t len) return 0; } -static unsigned long long wcstox(const wchar_t *s, wchar_t **p, int base, unsigned long long lim) +static unsigned long long wcstox(const wchar_t *s, wchar_t **p, int base, uwide128 lim) { wchar_t *t = (wchar_t *)s; unsigned char buf[64]; @@ -42,7 +42,7 @@ static unsigned long long wcstox(const wchar_t *s, wchar_t **p, int base, unsign while (iswspace(*t)) t++; f.cookie = (void *)t; shlim(&f, 0); - unsigned long long y = __intscan(&f, base, 1, lim); + unsigned long long y = __intscan(&f, base, 1, lim).v64[lo64]; if (p) { size_t cnt = shcnt(&f); *p = cnt ? t + cnt : (wchar_t *)s; @@ -52,22 +52,22 @@ static unsigned long long wcstox(const wchar_t *s, wchar_t **p, int base, unsign unsigned long long wcstoull(const wchar_t *restrict s, wchar_t **restrict p, int base) { - return wcstox(s, p, base, ULLONG_MAX); + return wcstox(s, p, base, __uwide128_u64(ULLONG_MAX)); } long long wcstoll(const wchar_t *restrict s, wchar_t **restrict p, int base) { - return wcstox(s, p, base, LLONG_MIN); + return wcstox(s, p, base, __uwide128_u64(0ULL+LLONG_MIN)); } unsigned long wcstoul(const wchar_t *restrict s, wchar_t **restrict p, int base) { - return wcstox(s, p, base, ULONG_MAX); + return wcstox(s, p, base, __uwide128_u64(ULONG_MAX)); } long wcstol(const wchar_t *restrict s, wchar_t **restrict p, int base) { - return wcstox(s, p, base, 0UL+LONG_MIN); + return wcstox(s, p, base, __uwide128_u64(0UL+LONG_MIN)); } intmax_t wcstoimax(const wchar_t *restrict s, wchar_t **restrict p, int base) -- 2.34.1