[musl] [C23 128 bit 2/4] C23: implement w128 and wf128 support for printf

mailing list of musl libc
 help / color / mirror / code / Atom feed

From: Jens Gustedt <Jens.Gustedt@inria.fr>
To: musl@lists.openwall.com
Subject: [musl] [C23 128 bit 2/4] C23: implement w128 and wf128 support for printf
Date: Wed, 31 May 2023 16:15:48 +0200	[thread overview]
Message-ID: <8fea6ddefeac5257eccd7d9d7c9597f95ff587a0.1685536608.git.Jens.Gustedt@inria.fr> (raw)
In-Reply-To: <cover.1685536608.git.Jens.Gustedt@inria.fr>

C23 now allows extended integer types wider than `intmax_t` for the
case that they are used to implement some fixed-width integer
type. The length specifer wN can then be used for `printf` and friends
to print the type.

Gcc and clang provide `__int128` types for many architectures (in
particular x86_64) since a long time, and adding these types
"officially" is a recurrent user request. They are particularly nice
to have for bitsets.

Implementing w128 (and wf128) is a first step to provide `int128_t`
and `uint128_t` natively, independently of the fact if there already
is a compiler that implements these types.

This implementation hopefully only adds a very mild overhead in size
and processing time for those architectures where this type is
present. The impact is

- one extra state array for the 128 bit type (some 60 static byte or so)
- a widening of the local buffer needed to collect digits (512 bytes
  on the stack)
- widening of the static format functions to accept the 128 type
  (probably some byte for some extra load instructions)

These functions then are possibly a bit slower, since they use wider
instructions and/or combine several instructions. There was already an
optimization in place for the 'u' format, because here division and
modulo base 10 is needed, which could be a bit costly. We now apply
similar tricks for the other integer formats to avoid passing into the
128 bit emulation if that is possible.
---
 src/stdio/vfprintf.c  | 113 ++++++++++++++++++++++++++----------------
 src/stdio/vfwprintf.c |  45 ++++++++++-------
 2 files changed, 97 insertions(+), 61 deletions(-)

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index a531a513..aac065fc 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -1,4 +1,5 @@
 #include "stdio_impl.h"
+#include "uwide128.h"
 #include <errno.h>
 #include <ctype.h>
 #include <limits.h>
@@ -35,10 +36,12 @@ enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
 	ZTPRE, JPRE, WPRE,
 	STOP,
+	WWPRE,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
 	SHORT, USHORT, CHAR, UCHAR,
 	LLONG, SIZET, IMAX, UMAX, PDIFF, UIPTR,
+	INT128, UINT128,
 	DBL, LDBL,
 	NOARG,
 	MAXSTATE
@@ -114,7 +117,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 
 union arg
 {
-	uintmax_t i;
+	uwide128 i;
 	long double f;
 	void *p;
 };
@@ -122,22 +125,24 @@ union arg
 static void pop_arg(union arg *arg, int type, va_list *ap)
 {
 	switch (type) {
-	       case PTR:	arg->p = va_arg(*ap, void *);
-	break; case INT:	arg->i = va_arg(*ap, int);
-	break; case UINT:	arg->i = va_arg(*ap, unsigned int);
-	break; case LONG:	arg->i = va_arg(*ap, long);
-	break; case ULONG:	arg->i = va_arg(*ap, unsigned long);
-	break; case ULLONG:	arg->i = va_arg(*ap, unsigned long long);
-	break; case SHORT:	arg->i = (short)va_arg(*ap, int);
-	break; case USHORT:	arg->i = (unsigned short)va_arg(*ap, int);
-	break; case CHAR:	arg->i = (signed char)va_arg(*ap, int);
-	break; case UCHAR:	arg->i = (unsigned char)va_arg(*ap, int);
-	break; case LLONG:	arg->i = va_arg(*ap, long long);
-	break; case SIZET:	arg->i = va_arg(*ap, size_t);
-	break; case IMAX:	arg->i = va_arg(*ap, intmax_t);
-	break; case UMAX:	arg->i = va_arg(*ap, uintmax_t);
-	break; case PDIFF:	arg->i = va_arg(*ap, ptrdiff_t);
-	break; case UIPTR:	arg->i = (uintptr_t)va_arg(*ap, void *);
+		case PTR:	arg->p = va_arg(*ap, void *);
+	break; case INT:	arg->i = __uwide128_i64(va_arg(*ap, int));
+	break; case UINT:	arg->i = __uwide128_u64(va_arg(*ap, unsigned int));
+	break; case LONG:	arg->i = __uwide128_i64(va_arg(*ap, long));
+	break; case ULONG:	arg->i = __uwide128_u64(va_arg(*ap, unsigned long));
+	break; case ULLONG:	arg->i = __uwide128_u64(va_arg(*ap, unsigned long long));
+	break; case SHORT:	arg->i = __uwide128_i64((short)va_arg(*ap, int));
+	break; case USHORT:	arg->i = __uwide128_u64((unsigned short)va_arg(*ap, int));
+	break; case CHAR:	arg->i = __uwide128_i64((signed char)va_arg(*ap, int));
+	break; case UCHAR:	arg->i = __uwide128_u64((unsigned char)va_arg(*ap, int));
+	break; case LLONG:	arg->i = __uwide128_i64(va_arg(*ap, long long));
+	break; case SIZET:	arg->i = __uwide128_u64(va_arg(*ap, size_t));
+	break; case IMAX:	arg->i = __uwide128_i64(va_arg(*ap, intmax_t));
+	break; case UMAX:	arg->i = __uwide128_u64(va_arg(*ap, uintmax_t));
+	break; case PDIFF:	arg->i = __uwide128_i64(va_arg(*ap, ptrdiff_t));
+	break; case UIPTR:	arg->i = __uwide128_u64((uintptr_t)va_arg(*ap, void *));
+	break; case INT128:	arg->i = __uwide128_pop(ap);
+	break; case UINT128:	arg->i = __uwide128_pop(ap);
 	break; case DBL:	arg->f = va_arg(*ap, double);
 	break; case LDBL:	arg->f = va_arg(*ap, long double);
 	}
@@ -163,29 +168,51 @@ static const char xdigits[16] = {
 	"0123456789ABCDEF"
 };
 
-static char *fmt_x(uintmax_t x, char *s, int lower)
+static char *fmt_u(unsigned long long x, char *s)
 {
-	for (; x; x>>=4) *--s = xdigits[(x&15)]|lower;
+	unsigned long y;
+	for (   ; x>ULONG_MAX; x/=10) *--s = '0' + x%10;
+	for (y=x;           y; y/=10) *--s = '0' + y%10;
 	return s;
 }
 
-static char *fmt_b(uintmax_t x, char *s)
+static char *fmt128_x(uwide128 x, char *s, int lower)
 {
-	for (; x; x>>=1) *--s = '0' + (x&1);
+	uint32_t y;
+	uint64_t z;
+	while (x.v64[hi64]) *--s = xdigits[__uwide128_div16(&x)]|lower;
+	for (z = x.v64[lo64]; z>UINT32_MAX; z>>=4) *--s = xdigits[(z&15)]|lower;
+	for (y=z;           y; y>>=4) *--s = xdigits[(y&15)]|lower;
 	return s;
 }
 
-static char *fmt_o(uintmax_t x, char *s)
+static char *fmt128_b(uwide128 x, char *s)
 {
-	for (; x; x>>=3) *--s = '0' + (x&7);
+	uint64_t z;
+	uint32_t y;
+	while (x.v64[hi64]) *--s = '0' + __uwide128_div2(&x);
+	for (z = x.v64[lo64]; z>UINT32_MAX; z>>=1) *--s = '0' + z%2;
+	for (y=z;           y; y>>=1) *--s = '0' + y%2;
 	return s;
 }
 
-static char *fmt_u(uintmax_t x, char *s)
+static char *fmt128_o(uwide128 x, char *s)
 {
-	unsigned long y;
-	for (   ; x>ULONG_MAX; x/=10) *--s = '0' + x%10;
-	for (y=x;           y; y/=10) *--s = '0' + y%10;
+	uint64_t z;
+	uint32_t y;
+	while (x.v64[hi64]) *--s = '0' + __uwide128_div8(&x);
+	for (z = x.v64[lo64]; z>UINT32_MAX; z>>=3) *--s = '0' + z%8;
+	for (y=z;           y; y>>=3) *--s = '0' + y%8;
+	return s;
+}
+
+static char *fmt128_u(uwide128 x, char *s)
+{
+	uint64_t z;
+	uint32_t y;
+	while (x.v64[hi64]) *--s = '0' + __uwide128_div10(&x);
+	for (z = x.v64[lo64]; z>UINT32_MAX; z/=10) *--s = '0' + z%10;
+	for (y=z;           y; y/=10) *--s = '0' + y%10;
 	return s;
 }
 
@@ -456,7 +483,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 	unsigned st, ps, width=0;
 	int cnt=0, l=0;
 	size_t i;
-	char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4];
+	char buf[sizeof(uwide128)*CHAR_BIT+3+LDBL_MANT_DIG/4];
 	const char *prefix;
 	int t, pl;
 	wchar_t wc[2], *ws;
@@ -498,7 +525,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			if (isdigit(s[1]) && s[2]=='$') {
 				l10n=1;
 				if (!f) nl_type[s[1]-'0'] = INT, w = 0;
-				else w = nl_arg[s[1]-'0'].i;
+				else w = nl_arg[s[1]-'0'].i.v64[lo64];
 				s+=3;
 			} else if (!l10n) {
 				w = f ? va_arg(*ap, int) : 0;
@@ -511,7 +538,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		if (*s=='.' && s[1]=='*') {
 			if (isdigit(s[2]) && s[3]=='$') {
 				if (!f) nl_type[s[2]-'0'] = INT, p = 0;
-				else p = nl_arg[s[2]-'0'].i;
+				else p = nl_arg[s[2]-'0'].i.v64[lo64];
 				s+=4;
 			} else if (!l10n) {
 				p = f ? va_arg(*ap, int) : 0;
@@ -552,6 +579,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 #else
 		case 64:  ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break;
 #endif
+		case 128: ps = WWPRE; st = (st == UINT) ? UINT128 : ((st == INT) ? INT128 : PTR); break;
 		default:  goto inval;
 		}
 
@@ -592,6 +620,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			case HHPRE: *(unsigned char *)arg.p = cnt; break;
 			case ZTPRE: *(size_t *)arg.p = cnt; break;
 			case JPRE: *(uintmax_t *)arg.p = cnt; break;
+			case WWPRE: *(uwide128 *)arg.p = __uwide128_i64(cnt); break;
 			}
 			continue;
 		case 'p':
@@ -599,39 +628,39 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			t = 'x';
 			fl |= ALT_FORM;
 		case 'x': case 'X':
-			a = fmt_x(arg.i, z, t&32);
-			if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
+			a = fmt128_x(arg.i, z, t&32);
+			if (!__uwide128_iszero(arg.i) && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
 			if (0) {
 		case 'b': case 'B':
-			a = fmt_b(arg.i, z);
-			if (arg.i && (fl & ALT_FORM)) prefix = (t == 'b' ? "0b" : "0B"), pl=2;
+			a = fmt128_b(arg.i, z);
+			if (!__uwide128_iszero(arg.i) && (fl & ALT_FORM)) prefix = (t == 'b' ? "0b" : "0B"), pl=2;
 			} if (0) {
 		case 'o':
-			a = fmt_o(arg.i, z);
+			a = fmt128_o(arg.i, z);
 			if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1;
 			} if (0) {
 		case 'd': case 'i':
 			pl=1;
-			if (arg.i>INTMAX_MAX) {
-				arg.i=-arg.i;
+			if (arg.i.v64[hi64]>INT64_MAX) {
+				arg.i=__uwide128_neg(arg.i);
 			} else if (fl & MARK_POS) {
 				prefix++;
 			} else if (fl & PAD_POS) {
 				prefix+=2;
 			} else pl=0;
 		case 'u':
-			a = fmt_u(arg.i, z);
+			a = fmt128_u(arg.i, z);
 			}
 			if (xp && p<0) goto overflow;
 			if (xp) fl &= ~ZERO_PAD;
-			if (!arg.i && !p) {
+			if (__uwide128_iszero(arg.i) && !p) {
 				a=z;
 				break;
 			}
-			p = MAX(p, z-a + !arg.i);
+			p = MAX(p, z-a + __uwide128_iszero(arg.i));
 			break;
 		case 'c':
-			*(a=z-(p=1))=arg.i;
+			*(a=z-(p=1))=arg.i.v64[lo64];
 			fl &= ~ZERO_PAD;
 			break;
 		case 'm':
@@ -644,7 +673,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			fl &= ~ZERO_PAD;
 			break;
 		case 'C':
-			wc[0] = arg.i;
+			wc[0] = arg.i.v64[lo64];
 			wc[1] = 0;
 			arg.p = wc;
 			p = -1;
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index 3689c2d5..d510233f 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -1,4 +1,5 @@
 #include "stdio_impl.h"
+#include "uwide128.h"
 #include <errno.h>
 #include <ctype.h>
 #include <limits.h>
@@ -28,10 +29,12 @@ enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
 	ZTPRE, JPRE, WPRE,
 	STOP,
+	WWPRE,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
 	SHORT, USHORT, CHAR, UCHAR,
 	LLONG, SIZET, IMAX, UMAX, PDIFF, UIPTR,
+	INT128, UINT128,
 	DBL, LDBL,
 	NOARG,
 	MAXSTATE
@@ -107,7 +110,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 
 union arg
 {
-	uintmax_t i;
+	uwide128 i;
 	long double f;
 	void *p;
 };
@@ -116,21 +119,23 @@ static void pop_arg(union arg *arg, int type, va_list *ap)
 {
 	switch (type) {
 	       case PTR:	arg->p = va_arg(*ap, void *);
-	break; case INT:	arg->i = va_arg(*ap, int);
-	break; case UINT:	arg->i = va_arg(*ap, unsigned int);
-	break; case LONG:	arg->i = va_arg(*ap, long);
-	break; case ULONG:	arg->i = va_arg(*ap, unsigned long);
-	break; case ULLONG:	arg->i = va_arg(*ap, unsigned long long);
-	break; case SHORT:	arg->i = (short)va_arg(*ap, int);
-	break; case USHORT:	arg->i = (unsigned short)va_arg(*ap, int);
-	break; case CHAR:	arg->i = (signed char)va_arg(*ap, int);
-	break; case UCHAR:	arg->i = (unsigned char)va_arg(*ap, int);
-	break; case LLONG:	arg->i = va_arg(*ap, long long);
-	break; case SIZET:	arg->i = va_arg(*ap, size_t);
-	break; case IMAX:	arg->i = va_arg(*ap, intmax_t);
-	break; case UMAX:	arg->i = va_arg(*ap, uintmax_t);
-	break; case PDIFF:	arg->i = va_arg(*ap, ptrdiff_t);
-	break; case UIPTR:	arg->i = (uintptr_t)va_arg(*ap, void *);
+	break; case INT:	arg->i = __uwide128_i64(va_arg(*ap, int));
+	break; case UINT:	arg->i = __uwide128_u64(va_arg(*ap, unsigned int));
+	break; case LONG:	arg->i = __uwide128_i64(va_arg(*ap, long));
+	break; case ULONG:	arg->i = __uwide128_u64(va_arg(*ap, unsigned long));
+	break; case ULLONG:	arg->i = __uwide128_u64(va_arg(*ap, unsigned long long));
+	break; case SHORT:	arg->i = __uwide128_i64((short)va_arg(*ap, int));
+	break; case USHORT:	arg->i = __uwide128_u64((unsigned short)va_arg(*ap, int));
+	break; case CHAR:	arg->i = __uwide128_i64((signed char)va_arg(*ap, int));
+	break; case UCHAR:	arg->i = __uwide128_u64((unsigned char)va_arg(*ap, int));
+	break; case LLONG:	arg->i = __uwide128_i64(va_arg(*ap, long long));
+	break; case SIZET:	arg->i = __uwide128_u64(va_arg(*ap, size_t));
+	break; case IMAX:	arg->i = __uwide128_i64(va_arg(*ap, intmax_t));
+	break; case UMAX:	arg->i = __uwide128_u64(va_arg(*ap, uintmax_t));
+	break; case PDIFF:	arg->i = __uwide128_i64(va_arg(*ap, ptrdiff_t));
+	break; case UIPTR:	arg->i = __uwide128_u64((uintptr_t)va_arg(*ap, void *));
+	break; case INT128:	arg->i = __uwide128_pop(ap);
+	break; case UINT128:	arg->i = __uwide128_pop(ap);
 	break; case DBL:	arg->f = va_arg(*ap, double);
 	break; case LDBL:	arg->f = va_arg(*ap, long double);
 	}
@@ -213,7 +218,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 			if (iswdigit(s[1]) && s[2]=='$') {
 				l10n=1;
 				nl_type[s[1]-'0'] = INT;
-				w = nl_arg[s[1]-'0'].i;
+				w = nl_arg[s[1]-'0'].i.v64[lo64];
 				s+=3;
 			} else if (!l10n) {
 				w = f ? va_arg(*ap, int) : 0;
@@ -226,7 +231,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 		if (*s=='.' && s[1]=='*') {
 			if (isdigit(s[2]) && s[3]=='$') {
 				nl_type[s[2]-'0'] = INT;
-				p = nl_arg[s[2]-'0'].i;
+				p = nl_arg[s[2]-'0'].i.v64[lo64];
 				s+=4;
 			} else if (!l10n) {
 				p = f ? va_arg(*ap, int) : 0;
@@ -267,6 +272,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 #else
 		case 64:  ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break;
 #endif
+		case 128: ps = WWPRE; st = (st == UINT) ? UINT128 : ((st == INT) ? INT128 : PTR); break;
 		default:  goto inval;
 		}
 
@@ -297,13 +303,14 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 			case HHPRE: *(unsigned char *)arg.p = cnt; break;
 			case ZTPRE: *(size_t *)arg.p = cnt; break;
 			case JPRE: *(uintmax_t *)arg.p = cnt; break;
+			case WWPRE: *(uwide128 *)arg.p = __uwide128_i64(cnt); break;
 			}
 			continue;
 		case 'c':
 		case 'C':
 			if (w<1) w=1;
 			pad(f, w-1, fl);
-			out(f, &(wchar_t){t=='C' ? arg.i : btowc(arg.i)}, 1);
+			out(f, &(wchar_t){t=='C' ? arg.i.v64[lo64] : btowc(arg.i.v64[lo64])}, 1);
 			pad(f, w-1, fl^LEFT_ADJ);
 			l = w;
 			continue;
-- 
2.34.1

next prev parent reply	other threads:[~2023-05-31 14:16 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-31 14:15 [musl] [C23 128 bit 0/4] implement the library part of 128 bit support Jens Gustedt
2023-05-31 14:15 ` [musl] [C23 128 bit 1/4] add an emulation for 128 bit arithmetic as needed for C library support Jens Gustedt
2023-05-31 14:15 ` Jens Gustedt [this message]
2023-05-31 14:15 ` [musl] [C23 128 bit 3/4] C23: implement w128 and wf128 for scanf and similar Jens Gustedt
2023-05-31 14:15 ` [musl] [C23 128 bit 4/4] C23: implement proper support for int128_t and uint128_t Jens Gustedt
2023-05-31 14:27   ` Rich Felker
2023-05-31 14:29     ` Rich Felker
2023-05-31 14:36     ` Jₑₙₛ Gustedt
2023-05-31 14:41       ` Rich Felker
2023-05-31 14:55         ` Jₑₙₛ Gustedt
2023-05-31 14:57           ` Rich Felker
2023-05-31 15:07             ` Jₑₙₛ Gustedt
2023-05-31 15:14               ` Rich Felker
2023-05-31 15:37                 ` Jₑₙₛ Gustedt
2023-05-31 15:40                   ` Rich Felker
2023-05-31 15:56                     ` Jₑₙₛ Gustedt
2023-05-31 16:30                       ` Alexander Monakov
2023-05-31 16:58                         ` Jens Gustedt
2023-05-31 17:03                           ` Rich Felker
2023-05-31 17:09                           ` Alexander Monakov
2023-06-01  7:24                             ` Jₑₙₛ Gustedt
2023-05-31 14:42     ` Jₑₙₛ Gustedt
2023-05-31 14:47       ` Rich Felker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8fea6ddefeac5257eccd7d9d7c9597f95ff587a0.1685536608.git.Jens.Gustedt@inria.fr \
    --to=jens.gustedt@inria.fr \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).