[musl] [C23 printf 1/3] C23: implement the b and B printf specifiers

mailing list of musl libc
 help / color / mirror / code / Atom feed

* [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers
       [not found] <cover.1685429467.git.Jens.Gustedt@inria.fr>
@ 2023-05-30  6:55 ` Jens Gustedt
  2023-05-30  6:55 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt
  2023-05-30  6:55 ` [musl] [C23 printf 3/3] C23: implement the wfN length modifiers " Jens Gustedt
  2 siblings, 0 replies; 23+ messages in thread
From: Jens Gustedt @ 2023-05-30  6:55 UTC (permalink / raw)
  To: musl, jens.gustedt

The b specifier is mandatory for C23. It has been reserved previously,
so we may safely add it, even for older compilation modes.

The B specifier is optional, but recommended for those implementations
that didn't have it reserved previously for other purposes.

The PRIbXXX and PRIBXXX macros are mandatory if the specifiers are
supported and may serve as feature test macros for users.
---
 include/inttypes.h    | 34 ++++++++++++++++++++++++++++++++++
 src/stdio/vfprintf.c  | 19 ++++++++++++++++++-
 src/stdio/vfwprintf.c | 11 +++++++++--
 3 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/include/inttypes.h b/include/inttypes.h
index 61dcb727..73a42e32 100644
--- a/include/inttypes.h
+++ b/include/inttypes.h
@@ -120,12 +120,44 @@ uintmax_t wcstoumax(const wchar_t *__restrict, wchar_t **__restrict, int);
 #define PRIXFAST32 "X"
 #define PRIXFAST64 __PRI64 "X"
 
+#define PRIb8  "b"
+#define PRIb16 "b"
+#define PRIb32 "b"
+#define PRIb64 __PRI64 "b"
+
+#define PRIbLEAST8  "b"
+#define PRIbLEAST16 "b"
+#define PRIbLEAST32 "b"
+#define PRIbLEAST64 __PRI64 "b"
+
+#define PRIbFAST8  "b"
+#define PRIbFAST16 "b"
+#define PRIbFAST32 "b"
+#define PRIbFAST64 __PRI64 "b"
+
+#define PRIB8  "B"
+#define PRIB16 "B"
+#define PRIB32 "B"
+#define PRIB64 __PRI64 "B"
+
+#define PRIBLEAST8  "B"
+#define PRIBLEAST16 "B"
+#define PRIBLEAST32 "B"
+#define PRIBLEAST64 __PRI64 "B"
+
+#define PRIBFAST8  "B"
+#define PRIBFAST16 "B"
+#define PRIBFAST32 "B"
+#define PRIBFAST64 __PRI64 "B"
+
 #define PRIdMAX __PRI64 "d"
 #define PRIiMAX __PRI64 "i"
 #define PRIoMAX __PRI64 "o"
 #define PRIuMAX __PRI64 "u"
 #define PRIxMAX __PRI64 "x"
 #define PRIXMAX __PRI64 "X"
+#define PRIbMAX __PRI64 "b"
+#define PRIBMAX __PRI64 "B"
 
 #define PRIdPTR __PRIPTR "d"
 #define PRIiPTR __PRIPTR "i"
@@ -133,6 +165,8 @@ uintmax_t wcstoumax(const wchar_t *__restrict, wchar_t **__restrict, int);
 #define PRIuPTR __PRIPTR "u"
 #define PRIxPTR __PRIPTR "x"
 #define PRIXPTR __PRIPTR "X"
+#define PRIbPTR __PRIPTR "b"
+#define PRIBPTR __PRIPTR "B"
 
 #define SCNd8   "hhd"
 #define SCNd16  "hd"
diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index a712d80f..cbc79783 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -48,6 +48,7 @@ enum {
 
 static const unsigned char states[]['z'-'A'+1] = {
 	{ /* 0: bare types */
+		S('b') = UINT, S('B') = UINT,
 		S('d') = INT, S('i') = INT,
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -58,6 +59,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
 		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
 	}, { /* 1: l-prefixed */
+		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
 		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -65,17 +67,20 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('c') = INT, S('s') = PTR, S('n') = PTR,
 		S('l') = LLPRE,
 	}, { /* 2: ll-prefixed */
+		S('b') = ULLONG, S('B') = ULLONG,
 		S('d') = LLONG, S('i') = LLONG,
 		S('o') = ULLONG, S('u') = ULLONG,
 		S('x') = ULLONG, S('X') = ULLONG,
 		S('n') = PTR,
 	}, { /* 3: h-prefixed */
+		S('b') = USHORT, S('B') = USHORT,
 		S('d') = SHORT, S('i') = SHORT,
 		S('o') = USHORT, S('u') = USHORT,
 		S('x') = USHORT, S('X') = USHORT,
 		S('n') = PTR,
 		S('h') = HHPRE,
 	}, { /* 4: hh-prefixed */
+		S('b') = UCHAR, S('B') = UCHAR,
 		S('d') = CHAR, S('i') = CHAR,
 		S('o') = UCHAR, S('u') = UCHAR,
 		S('x') = UCHAR, S('X') = UCHAR,
@@ -85,11 +90,13 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('E') = LDBL, S('F') = LDBL, S('G') = LDBL, S('A') = LDBL,
 		S('n') = PTR,
 	}, { /* 6: z- or t-prefixed (assumed to be same size) */
+		S('b') = SIZET, S('B') = SIZET,
 		S('d') = PDIFF, S('i') = PDIFF,
 		S('o') = SIZET, S('u') = SIZET,
 		S('x') = SIZET, S('X') = SIZET,
 		S('n') = PTR,
 	}, { /* 7: j-prefixed */
+		S('b') = UMAX, S('B') = UMAX,
 		S('d') = IMAX, S('i') = IMAX,
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
@@ -156,6 +163,12 @@ static char *fmt_x(uintmax_t x, char *s, int lower)
 	return s;
 }
 
+static char *fmt_b(uintmax_t x, char *s)
+{
+	for (; x; x>>=1) *--s = '0' + (x&1);
+	return s;
+}
+
 static char *fmt_o(uintmax_t x, char *s)
 {
 	for (; x; x>>=3) *--s = '0' + (x&7);
@@ -437,7 +450,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 	unsigned st, ps;
 	int cnt=0, l=0;
 	size_t i;
-	char buf[sizeof(uintmax_t)*3+3+LDBL_MANT_DIG/4];
+	char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4];
 	const char *prefix;
 	int t, pl;
 	wchar_t wc[2], *ws;
@@ -564,6 +577,10 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			a = fmt_x(arg.i, z, t&32);
 			if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
 			if (0) {
+		case 'b': case 'B':
+			a = fmt_b(arg.i, z);
+			if (arg.i && (fl & ALT_FORM)) prefix = (t == 'b' ? "0b" : "0B"), pl=2;
+			} if (0) {
 		case 'o':
 			a = fmt_o(arg.i, z);
 			if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1;
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index 53697701..dbc93f74 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -41,6 +41,7 @@ enum {
 
 static const unsigned char states[]['z'-'A'+1] = {
 	{ /* 0: bare types */
+		S('b') = UINT, S('B') = UINT,
 		S('d') = INT, S('i') = INT,
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -51,6 +52,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
 		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
 	}, { /* 1: l-prefixed */
+		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
 		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -58,17 +60,20 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('c') = INT, S('s') = PTR, S('n') = PTR,
 		S('l') = LLPRE,
 	}, { /* 2: ll-prefixed */
+		S('b') = ULLONG, S('B') = ULLONG,
 		S('d') = LLONG, S('i') = LLONG,
 		S('o') = ULLONG, S('u') = ULLONG,
 		S('x') = ULLONG, S('X') = ULLONG,
 		S('n') = PTR,
 	}, { /* 3: h-prefixed */
+		S('b') = USHORT, S('B') = USHORT,
 		S('d') = SHORT, S('i') = SHORT,
 		S('o') = USHORT, S('u') = USHORT,
 		S('x') = USHORT, S('X') = USHORT,
 		S('n') = PTR,
 		S('h') = HHPRE,
 	}, { /* 4: hh-prefixed */
+		S('b') = UCHAR, S('B') = UCHAR,
 		S('d') = CHAR, S('i') = CHAR,
 		S('o') = UCHAR, S('u') = UCHAR,
 		S('x') = UCHAR, S('X') = UCHAR,
@@ -78,11 +83,13 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('E') = LDBL, S('F') = LDBL, S('G') = LDBL, S('A') = LDBL,
 		S('n') = PTR,
 	}, { /* 6: z- or t-prefixed (assumed to be same size) */
+		S('b') = SIZET, S('B') = SIZET,
 		S('d') = PDIFF, S('i') = PDIFF,
 		S('o') = SIZET, S('u') = SIZET,
 		S('x') = SIZET, S('X') = SIZET,
 		S('n') = PTR,
 	}, { /* 7: j-prefixed */
+		S('b') = UMAX, S('B') = UMAX,
 		S('d') = IMAX, S('i') = IMAX,
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
@@ -145,7 +152,7 @@ static int getint(wchar_t **s) {
 
 static const char sizeprefix['y'-'a'] = {
 ['a'-'a']='L', ['e'-'a']='L', ['f'-'a']='L', ['g'-'a']='L',
-['d'-'a']='j', ['i'-'a']='j', ['o'-'a']='j', ['u'-'a']='j', ['x'-'a']='j',
+['b'-'a']='j', ['d'-'a']='j', ['i'-'a']='j', ['o'-'a']='j', ['u'-'a']='j', ['x'-'a']='j',
 ['p'-'a']='j'
 };
 
@@ -321,7 +328,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 		case 'a': case 'e': case 'f': case 'g':
 			l = fprintf(f, charfmt, w, p, arg.f);
 			break;
-		case 'd': case 'i': case 'o': case 'u': case 'x': case 'p':
+		case 'b': case 'd': case 'i': case 'o': case 'u': case 'x': case 'p':
 			l = fprintf(f, charfmt, w, p, arg.i);
 			break;
 		}
-- 
2.34.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
       [not found] <cover.1685429467.git.Jens.Gustedt@inria.fr>
  2023-05-30  6:55 ` [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers Jens Gustedt
@ 2023-05-30  6:55 ` Jens Gustedt
  2023-06-02 14:38   ` Rich Felker
  2023-05-30  6:55 ` [musl] [C23 printf 3/3] C23: implement the wfN length modifiers " Jens Gustedt
  2 siblings, 1 reply; 23+ messages in thread
From: Jens Gustedt @ 2023-05-30  6:55 UTC (permalink / raw)
  To: musl, jens.gustedt

These are mandatory for C23 and concern all types for which the
platform has `int_leastN_t` and `uint_leastN_t`. For musl these types
always coincide with `intN_t` and `uintN_t` and are always present for
N equal 8, 16, 32 and 64.

They can be added for general use since all lowercase letters were
previously reserved.

Nevertheless, users that use these modifiers will see a lot of
warnings from compilers in the beginning. This is because the
compilers have not yet integrated this form of a specifier into their
correponding extensions (gcc attributes). So unfortunately also
testing this feature may be a bit noisy for the moment.

The only architecture dependend choice is the type for N == 64, which
may be `long` or `long long`. We just mimick the test that is done in
other places to compare `UINTPTR_MAX` and `UINT64_MAX` to determine
that.
---
 src/stdio/vfprintf.c  | 28 +++++++++++++++++++++++++---
 src/stdio/vfwprintf.c | 28 +++++++++++++++++++++++++---
 2 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index cbc79783..265fb7ad 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -33,7 +33,7 @@
 
 enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
-	ZTPRE, JPRE,
+	ZTPRE, JPRE, WPRE,
 	STOP,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
@@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
 		S('m') = NOARG,
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
-		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
+		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE,
 	}, { /* 1: l-prefixed */
 		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
@@ -101,6 +101,12 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
 		S('n') = PTR,
+	}, { /* 8: w-prefixed */
+		S('b') = UINT, S('B') = UINT,
+		S('d') = INT, S('i') = INT,
+		S('o') = UINT, S('u') = UINT,
+		S('x') = UINT, S('X') = UINT,
+		S('n') = PTR,
 	}
 };
 
@@ -447,7 +453,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 	int w, p, xp;
 	union arg arg;
 	int argpos;
-	unsigned st, ps;
+	unsigned st, ps, width=0;
 	int cnt=0, l=0;
 	size_t i;
 	char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4];
@@ -527,9 +533,25 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			if (OOB(*s)) goto inval;
 			ps=st;
 			st=states[st]S(*s++);
+ 			if (st == WPRE) {
+				if (*s == '0') goto inval;
+				width = getint(&s);
+                        }
 		} while (st-1<STOP);
 		if (!st) goto inval;
 
+		if (ps == WPRE) switch (width) {
+		case 8:   ps = HHPRE; st = (st == UINT) ? UCHAR  : ((st == INT) ? CHAR  : PTR); break;
+		case 16:  ps = HPRE;  st = (st == UINT) ? USHORT : ((st == INT) ? SHORT : PTR); break;
+		case 32:  ps = BARE;  break;
+#if UINTPTR_MAX >= UINT64_MAX
+		case 64:  ps = LPRE;  st = (st == UINT) ? ULONG  : ((st == INT) ? LONG  : PTR); break;
+#else
+		case 64:  ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break;
+#endif
+		default:  goto inval;
+		}
+
 		/* Check validity of argument type (nl/normal) */
 		if (st==NOARG) {
 			if (argpos>=0) goto inval;
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index dbc93f74..c3e81d2a 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -26,7 +26,7 @@
 
 enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
-	ZTPRE, JPRE,
+	ZTPRE, JPRE, WPRE,
 	STOP,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
@@ -50,7 +50,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
 		S('m') = NOARG,
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
-		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
+		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE,
 	}, { /* 1: l-prefixed */
 		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
@@ -94,6 +94,12 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
 		S('n') = PTR,
+	}, { /* 8: w-prefixed */
+		S('b') = UINT, S('B') = UINT,
+		S('d') = INT, S('i') = INT,
+		S('o') = UINT, S('u') = UINT,
+		S('x') = UINT, S('X') = UINT,
+		S('n') = PTR,
 	}
 };
 
@@ -163,7 +169,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 	int w, p, xp;
 	union arg arg;
 	int argpos;
-	unsigned st, ps;
+	unsigned st, ps, width=0;
 	int cnt=0, l=0;
 	int i;
 	int t;
@@ -242,9 +248,25 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 			if (OOB(*s)) goto inval;
 			ps=st;
 			st=states[st]S(*s++);
+ 			if (st == WPRE) {
+				if (*s == L'0') goto inval;
+				width = getint(&s);
+                        }
 		} while (st-1<STOP);
 		if (!st) goto inval;
 
+		if (ps == WPRE) switch (width) {
+		case 8:   ps = HHPRE; st = (st == UINT) ? UCHAR  : ((st == INT) ? CHAR  : PTR); break;
+		case 16:  ps = HPRE;  st = (st == UINT) ? USHORT : ((st == INT) ? SHORT : PTR); break;
+		case 32:  ps = BARE;  break;
+#if UINTPTR_MAX >= UINT64_MAX
+		case 64:  ps = LPRE;  st = (st == UINT) ? ULONG  : ((st == INT) ? LONG  : PTR); break;
+#else
+		case 64:  ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break;
+#endif
+		default:  goto inval;
+		}
+
 		/* Check validity of argument type (nl/normal) */
 		if (st==NOARG) {
 			if (argpos>=0) goto inval;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [musl] [C23 printf 3/3] C23: implement the wfN length modifiers for printf
       [not found] <cover.1685429467.git.Jens.Gustedt@inria.fr>
  2023-05-30  6:55 ` [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers Jens Gustedt
  2023-05-30  6:55 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt
@ 2023-05-30  6:55 ` Jens Gustedt
  2 siblings, 0 replies; 23+ messages in thread
From: Jens Gustedt @ 2023-05-30  6:55 UTC (permalink / raw)
  To: musl, jens.gustedt

Musl only has a difference between fixed-width and fastest-width
integer types for N == 16. And even here all architectures have made
the same choice, namely mapping to 32 bit types.
---
 src/stdio/vfprintf.c  | 3 +++
 src/stdio/vfwprintf.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index 265fb7ad..a531a513 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -534,8 +534,11 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			ps=st;
 			st=states[st]S(*s++);
  			if (st == WPRE) {
+                        	_Bool fast = (*s == 'f');
+                                if (fast) ++s;
 				if (*s == '0') goto inval;
 				width = getint(&s);
+                                if (fast && width == 16) width = 32;
                         }
 		} while (st-1<STOP);
 		if (!st) goto inval;
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index c3e81d2a..3689c2d5 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -249,8 +249,11 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 			ps=st;
 			st=states[st]S(*s++);
  			if (st == WPRE) {
+                        	_Bool fast = (*s == L'f');
+                                if (fast) ++s;
 				if (*s == L'0') goto inval;
 				width = getint(&s);
+                                if (fast && width == 16) width = 32;
                         }
 		} while (st-1<STOP);
 		if (!st) goto inval;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-30  6:55 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt
@ 2023-06-02 14:38   ` Rich Felker
  2023-06-02 15:09     ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 23+ messages in thread
From: Rich Felker @ 2023-06-02 14:38 UTC (permalink / raw)
  To: Jens Gustedt; +Cc: musl, jens.gustedt

On Tue, May 30, 2023 at 08:55:27AM +0200, Jens Gustedt wrote:
> These are mandatory for C23 and concern all types for which the
> platform has `int_leastN_t` and `uint_leastN_t`. For musl these types
> always coincide with `intN_t` and `uintN_t` and are always present for
> N equal 8, 16, 32 and 64.
> 
> They can be added for general use since all lowercase letters were
> previously reserved.
> 
> Nevertheless, users that use these modifiers will see a lot of
> warnings from compilers in the beginning. This is because the
> compilers have not yet integrated this form of a specifier into their
> correponding extensions (gcc attributes). So unfortunately also
> testing this feature may be a bit noisy for the moment.
> 
> The only architecture dependend choice is the type for N == 64, which
> may be `long` or `long long`. We just mimick the test that is done in
> other places to compare `UINTPTR_MAX` and `UINT64_MAX` to determine
> that.
> ---
>  src/stdio/vfprintf.c  | 28 +++++++++++++++++++++++++---
>  src/stdio/vfwprintf.c | 28 +++++++++++++++++++++++++---
>  2 files changed, 50 insertions(+), 6 deletions(-)
> 
> diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
> index cbc79783..265fb7ad 100644
> --- a/src/stdio/vfprintf.c
> +++ b/src/stdio/vfprintf.c
> @@ -33,7 +33,7 @@
>  
>  enum {
>  	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
> -	ZTPRE, JPRE,
> +	ZTPRE, JPRE, WPRE,
>  	STOP,
>  	PTR, INT, UINT, ULLONG,
>  	LONG, ULONG,
> @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = {
>  		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
>  		S('m') = NOARG,
>  		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
> -		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
> +		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE,
>  	}, { /* 1: l-prefixed */
>  		S('b') = ULONG, S('B') = ULONG,
>  		S('d') = LONG, S('i') = LONG,
> @@ -101,6 +101,12 @@ static const unsigned char states[]['z'-'A'+1] = {
>  		S('o') = UMAX, S('u') = UMAX,
>  		S('x') = UMAX, S('X') = UMAX,
>  		S('n') = PTR,
> +	}, { /* 8: w-prefixed */
> +		S('b') = UINT, S('B') = UINT,
> +		S('d') = INT, S('i') = INT,
> +		S('o') = UINT, S('u') = UINT,
> +		S('x') = UINT, S('X') = UINT,
> +		S('n') = PTR,
>  	}
>  };
>  
> @@ -447,7 +453,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
>  	int w, p, xp;
>  	union arg arg;
>  	int argpos;
> -	unsigned st, ps;
> +	unsigned st, ps, width=0;
>  	int cnt=0, l=0;
>  	size_t i;
>  	char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4];
> @@ -527,9 +533,25 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
>  			if (OOB(*s)) goto inval;
>  			ps=st;
>  			st=states[st]S(*s++);
> + 			if (st == WPRE) {
> +				if (*s == '0') goto inval;
> +				width = getint(&s);
> +                        }
>  		} while (st-1<STOP);
>  		if (!st) goto inval;
>  
> +		if (ps == WPRE) switch (width) {
> +		case 8:   ps = HHPRE; st = (st == UINT) ? UCHAR  : ((st == INT) ? CHAR  : PTR); break;
> +		case 16:  ps = HPRE;  st = (st == UINT) ? USHORT : ((st == INT) ? SHORT : PTR); break;
> +		case 32:  ps = BARE;  break;
> +#if UINTPTR_MAX >= UINT64_MAX
> +		case 64:  ps = LPRE;  st = (st == UINT) ? ULONG  : ((st == INT) ? LONG  : PTR); break;
> +#else
> +		case 64:  ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break;
> +#endif

The logic here for whether w64 is LPRE or LLPRE is wrong. The
condition for whether [u]int64_t is long or long long is not whether
uintptr_t or long long would be too wide (they never are, in our ABIs)
but whether long is long enough. All our [u]intN_t are the lowest-rank
type of the desired size, and this seems to be consistent with what
other implementations like glibc do.

Short of a compelling reason not to, though, I plan to just go with
the fix-up of my v2 patch for this functionality. Its ordering avoids
duplication of logic (the ?: branches in the cases above) and the need
for maintaining extra side state (the width variable) thru the state
machine.

Rich

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-06-02 14:38   ` Rich Felker
@ 2023-06-02 15:09     ` Jₑₙₛ Gustedt
  2023-06-02 15:16       ` Rich Felker
  0 siblings, 1 reply; 23+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-06-02 15:09 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl, jens.gustedt

[-- Attachment #1: Type: text/plain, Size: 1732 bytes --]

Rich,

on Fri, 2 Jun 2023 10:38:00 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> The logic here for whether w64 is LPRE or LLPRE is wrong. The
> condition for whether [u]int64_t is long or long long is not whether
> uintptr_t or long long would be too wide (they never are, in our ABIs)
> but whether long is long enough. All our [u]intN_t are the lowest-rank
> type of the desired size, and this seems to be consistent with what
> other implementations like glibc do.

I probably copied that logic wrong from somewhere. Yes, right thing is
probably just to compare to the new `ULONG_WIDTH` macro or so.

One thing to think about could be to always map 64 to `long long`,
just for this function here. This would always have the right size and
representation. And since this is an implementation of the C library,
we make the rules ;-)

> Short of a compelling reason not to, though, I plan to just go with
> the fix-up of my v2 patch for this functionality. Its ordering avoids
> duplication of logic (the ?: branches in the cases above) and the need
> for maintaining extra side state (the width variable) thru the state
> machine.

I thought a bit of having the state only in the width variable, but
since we also need to track signedness and the extra cases for FP,
this becomes quickly quite messy.

So yes, looks good (with some minor remarks in up-thread).

Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-06-02 15:09     ` Jₑₙₛ Gustedt
@ 2023-06-02 15:16       ` Rich Felker
  2023-06-02 15:37         ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 23+ messages in thread
From: Rich Felker @ 2023-06-02 15:16 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl, jens.gustedt

On Fri, Jun 02, 2023 at 05:09:26PM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Fri, 2 Jun 2023 10:38:00 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > The logic here for whether w64 is LPRE or LLPRE is wrong. The
> > condition for whether [u]int64_t is long or long long is not whether
> > uintptr_t or long long would be too wide (they never are, in our ABIs)
> > but whether long is long enough. All our [u]intN_t are the lowest-rank
> > type of the desired size, and this seems to be consistent with what
> > other implementations like glibc do.
> 
> I probably copied that logic wrong from somewhere. Yes, right thing is
> probably just to compare to the new `ULONG_WIDTH` macro or so.
> 
> One thing to think about could be to always map 64 to `long long`,
> just for this function here. This would always have the right size and
> representation. And since this is an implementation of the C library,
> we make the rules ;-)

Well, in musl we generally want the code to have well-defined behavior
even if it's inlined via LTO, where the types would mismatch. Note
that this is kinda a problem for scanf where va_arg is used with
void*, except that POSIX, conveniently, explicitly requires the C
(compiler) implementation accept mismatched va_arg types when both
types are pointers. This is a big hammer but it seems to be a
necessary assumption to admit implementing the %n$ form of scanf,
which POSIX requires.

Rich

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-06-02 15:16       ` Rich Felker
@ 2023-06-02 15:37         ` Jₑₙₛ Gustedt
  0 siblings, 0 replies; 23+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-06-02 15:37 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl, jens.gustedt

[-- Attachment #1: Type: text/plain, Size: 1570 bytes --]

Rich,

on Fri, 2 Jun 2023 11:16:40 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> Note that this is kinda a problem for scanf where va_arg is used
> with void*, except that POSIX, conveniently, explicitly requires the
> C (compiler) implementation accept mismatched va_arg types when both
> types are pointers.

I think that was a change for C23, we here have something similar in
7.16.1.1

  — one type is pointer to qualified or unqualified `void` and the
    other is a pointer to a qualified or unqualified character type;

Unfortunately, C can not go easily further than that, because other
than in POSIX there might be data pointers that have different
representations. (We could have said something like pointers with the
same width and representation, though.)

Also for the new `nullptr_t` type we now have

  — or, the type of the next argument is `nullptr_t` and type is a
    pointer type that has the same representation and alignment
    requirements as a pointer to a character type.

Which then makes it defined behavior to pass in a `nullptr` sentinel,
where it previously was UB when `NULL` was for example `OL` or so, or
when the application passed in an explicit zero instead of `NULL`.

Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-06-02 14:55                   ` Jₑₙₛ Gustedt
@ 2023-06-02 15:07                     ` Rich Felker
  0 siblings, 0 replies; 23+ messages in thread
From: Rich Felker @ 2023-06-02 15:07 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Fri, Jun 02, 2023 at 04:55:47PM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Fri, 2 Jun 2023 10:29:37 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > ...
> 
> > OK, some numbers. This alt version (with a couple bug fixes and
> > support for wf added), vs the full state machine version. On i386,
> > using this instead of the full state machine (which is still missing
> > 'wf') adds 246 bytes of .text but saves 440 bytes of .rodata. With wf
> > added, it will be even more of a win. So I think it makes more sense
> > to go with this version.
> 
> Ok, sounds promissing. Some remarks
> 
>  - I'd still prefer to name the constant `WPRE` instead of `PLAIN` for
>    documentation reasons.

Well it's not used for all w prefixes, only for w32/wf32/wf16. It's
really a state for "we already encountered some explicit prefix that
means plain int". I could call it W32PRE or something but that seems
misleading or at least incomplete too...?

>  - It seems that your version doesn't capture the leading 0 case. That
>    would still need an `if` condition that leads to `invalid`, I
>    guess.

It only enters the processing

	if (*s=='w' && s[1]-'1'<9u)

which is false for w0...

The subsequent automaton then rejects the unknown 'w'.

In the fixed up version with wf support, I separated the 2 conditions
(and fixed the failure to advance s before passing it to getint), so
it now just has inside the 'w' processing block:

	if (*++s-'1'>=9u) goto inval;

I'll send the full v2 shortly.

> > As an aside, since think the state table is the same for wide printf
> > as for normal, and since wide printf already depends on normal printf,
> > we could make the states table external (but hidden, of course) and
> > let wide printf reuse it, for some decent size savings and elimination
> > of source-level redundancy (DRY).
> 
> Same holds for the `pop_arg` function, which is also repeated.

Nice find, thanks! Deduplicating this requires also moving the union
type to a place it can be shared, but this is reasonable I think and
gives a place to declare the functions too.

Rich

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-06-02 14:29                 ` Rich Felker
@ 2023-06-02 14:55                   ` Jₑₙₛ Gustedt
  2023-06-02 15:07                     ` Rich Felker
  0 siblings, 1 reply; 23+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-06-02 14:55 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 1500 bytes --]

Rich,

on Fri, 2 Jun 2023 10:29:37 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> ...

> OK, some numbers. This alt version (with a couple bug fixes and
> support for wf added), vs the full state machine version. On i386,
> using this instead of the full state machine (which is still missing
> 'wf') adds 246 bytes of .text but saves 440 bytes of .rodata. With wf
> added, it will be even more of a win. So I think it makes more sense
> to go with this version.

Ok, sounds promissing. Some remarks

 - I'd still prefer to name the constant `WPRE` instead of `PLAIN` for
   documentation reasons.

 - It seems that your version doesn't capture the leading 0 case. That
   would still need an `if` condition that leads to `invalid`, I
   guess.

> As an aside, since think the state table is the same for wide printf
> as for normal, and since wide printf already depends on normal printf,
> we could make the states table external (but hidden, of course) and
> let wide printf reuse it, for some decent size savings and elimination
> of source-level redundancy (DRY).

Same holds for the `pop_arg` function, which is also repeated.

Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-30  1:48               ` Rich Felker
  2023-05-30  6:46                 ` Jₑₙₛ Gustedt
@ 2023-06-02 14:29                 ` Rich Felker
  2023-06-02 14:55                   ` Jₑₙₛ Gustedt
  1 sibling, 1 reply; 23+ messages in thread
From: Rich Felker @ 2023-06-02 14:29 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Mon, May 29, 2023 at 09:48:22PM -0400, Rich Felker wrote:
> On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote:
> > Rich,
> > 
> > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker <dalias@libc.org>)
> > wrote:
> > 
> > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:
> > > > Rich,
> > > > 
> > > > on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker
> > > > <dalias@libc.org>) wrote:
> > > >   
> > > > > I think you need an extra state that's "plain but not bare" that
> > > > > duplicates only the integer transitions out of it, like the l, ll,
> > > > > etc. prefix states do.  
> > > > 
> > > > Hm, the problem is that for the other prefixes the table entries
> > > > then encode the concrete type that is to be expected. We could not
> > > > do this here because the type depends on the requested width. So we
> > > > would then need to "repair" that type after the loop. A `switch` to
> > > > do that would look substantially similar to what is there, now. Do
> > > > you think that would be better?  
> > > 
> > > OK I think I can communicate better with code than natural language
> > > text, so here's a diff, completely untested, of what I had in mind.
> > 
> > that's ... ugh ... not so prety, I think
> > 
> > In my current version I track the desired width, if there is w
> > specifier, and then do the adjustments after the loop. That takes
> > indeed care of undefined character sequences.
> > 
> > I find that much better readable, and also easier to extend (later
> > there comes the `wf` case and the `128`, and perhaps some day `256`)
> 
> It sounds like the core issue is that you don't like the state machine
> approach to how musl's printf processes format specifiers. Personally,
> I like it because there's an obvious structured way to validate that
> it's accepting exactly the right things and nothing else, vs an
> approach like what you tried where you ended up accepting a lot of
> bogus specifiers.
> 
> One alternative I would consider is doing something like what you did,
> but moving it outside of/before the state machine loop, so it's not
> mixing the w* processing with the state machine. This avoids accepting
> bogus repeated w32 prefixes and similar (because there is no loop) and
> lets you get by with just adding one PLAIN state to have it start in
> (rather than BARE) after w32. I expect the overall size would be
> similar. Concept attached.
> 
> Rich

> diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
> index a712d80f..a9e4d638 100644
> --- a/src/stdio/vfprintf.c
> +++ b/src/stdio/vfprintf.c
> @@ -33,7 +33,7 @@
>  
>  enum {
>  	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
> -	ZTPRE, JPRE,
> +	ZTPRE, JPRE, PLAIN,
>  	STOP,
>  	PTR, INT, UINT, ULLONG,
>  	LONG, ULONG,
> @@ -44,9 +44,10 @@ enum {
>  	MAXSTATE
>  };
>  
> -#define S(x) [(x)-'A']
> +#define ST_BASE 'A'
> +#define S(x) [(x)-ST_BASE]
>  
> -static const unsigned char states[]['z'-'A'+1] = {
> +static const unsigned char states[]['z'-ST_BASE+1] = {
>  	{ /* 0: bare types */
>  		S('d') = INT, S('i') = INT,
>  		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
> @@ -94,10 +95,15 @@ static const unsigned char states[]['z'-'A'+1] = {
>  		S('o') = UMAX, S('u') = UMAX,
>  		S('x') = UMAX, S('X') = UMAX,
>  		S('n') = PTR,
> +	}, { /* 8: explicit-width-prefixed bare equivalent */
> +		S('d') = INT, S('i') = INT,
> +		S('o') = UINT, S('u') = UINT,
> +		S('x') = UINT, S('X') = UINT,
> +		S('n') = PTR,
>  	}
>  };
>  
> -#define OOB(x) ((unsigned)(x)-'A' > 'z'-'A')
> +#define OOB(x) ((unsigned)(x)-ST_BASE > 'z'-ST_BASE)
>  
>  union arg
>  {
> @@ -510,6 +516,13 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
>  
>  		/* Format specifier state machine */
>  		st=0;
> +		if (*s=='w' && s[1]-'1'<9u) switch (getint(&s)) {
> +		case 8:  st = HHPRE; break;
> +		case 16: st = HPRE; break;
> +		case 32: st = PLAIN; break;
> +		case 64: st = LLONG_MAX > LONG_MAX ? LLPRE : LPRE; break;
> +		default: goto inval;
> +		}
>  		do {
>  			if (OOB(*s)) goto inval;
>  			ps=st;
> @@ -547,6 +560,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
>  		switch(t) {
>  		case 'n':
>  			switch(ps) {
> +			case PLAIN:
>  			case BARE: *(int *)arg.p = cnt; break;
>  			case LPRE: *(long *)arg.p = cnt; break;
>  			case LLPRE: *(long long *)arg.p = cnt; break;

OK, some numbers. This alt version (with a couple bug fixes and
support for wf added), vs the full state machine version. On i386,
using this instead of the full state machine (which is still missing
'wf') adds 246 bytes of .text but saves 440 bytes of .rodata. With wf
added, it will be even more of a win. So I think it makes more sense
to go with this version.

As an aside, since think the state table is the same for wide printf
as for normal, and since wide printf already depends on normal printf,
we could make the states table external (but hidden, of course) and
let wide printf reuse it, for some decent size savings and elimination
of source-level redundancy (DRY).

Rich

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-31 14:04 [musl] [C23 printf 0/3] to be replaced Jens Gustedt
@ 2023-05-31 14:04 ` Jens Gustedt
  0 siblings, 0 replies; 23+ messages in thread
From: Jens Gustedt @ 2023-05-31 14:04 UTC (permalink / raw)
  To: musl

These are mandatory for C23 and concern all types for which the
platform has `int_leastN_t` and `uint_leastN_t`. For musl these types
always coincide with `intN_t` and `uintN_t` and are always present for
N equal 8, 16, 32 and 64.

They can be added for general use since all lowercase letters were
previously reserved.

Nevertheless, users that use these modifiers will see a lot of
warnings from compilers in the beginning. This is because the
compilers have not yet integrated this form of a specifier into their
correponding extensions (gcc attributes). So unfortunately also
testing this feature may be a bit noisy for the moment.

The only architecture dependend choice is the type for N == 64, which
may be `long` or `long long`. We just mimick the test that is done in
other places to compare `UINTPTR_MAX` and `UINT64_MAX` to determine
that.
---
 src/stdio/vfprintf.c  | 28 +++++++++++++++++++++++++---
 src/stdio/vfwprintf.c | 28 +++++++++++++++++++++++++---
 2 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index cbc79783..265fb7ad 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -33,7 +33,7 @@
 
 enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
-	ZTPRE, JPRE,
+	ZTPRE, JPRE, WPRE,
 	STOP,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
@@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
 		S('m') = NOARG,
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
-		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
+		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE,
 	}, { /* 1: l-prefixed */
 		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
@@ -101,6 +101,12 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
 		S('n') = PTR,
+	}, { /* 8: w-prefixed */
+		S('b') = UINT, S('B') = UINT,
+		S('d') = INT, S('i') = INT,
+		S('o') = UINT, S('u') = UINT,
+		S('x') = UINT, S('X') = UINT,
+		S('n') = PTR,
 	}
 };
 
@@ -447,7 +453,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 	int w, p, xp;
 	union arg arg;
 	int argpos;
-	unsigned st, ps;
+	unsigned st, ps, width=0;
 	int cnt=0, l=0;
 	size_t i;
 	char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4];
@@ -527,9 +533,25 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			if (OOB(*s)) goto inval;
 			ps=st;
 			st=states[st]S(*s++);
+ 			if (st == WPRE) {
+				if (*s == '0') goto inval;
+				width = getint(&s);
+                        }
 		} while (st-1<STOP);
 		if (!st) goto inval;
 
+		if (ps == WPRE) switch (width) {
+		case 8:   ps = HHPRE; st = (st == UINT) ? UCHAR  : ((st == INT) ? CHAR  : PTR); break;
+		case 16:  ps = HPRE;  st = (st == UINT) ? USHORT : ((st == INT) ? SHORT : PTR); break;
+		case 32:  ps = BARE;  break;
+#if UINTPTR_MAX >= UINT64_MAX
+		case 64:  ps = LPRE;  st = (st == UINT) ? ULONG  : ((st == INT) ? LONG  : PTR); break;
+#else
+		case 64:  ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break;
+#endif
+		default:  goto inval;
+		}
+
 		/* Check validity of argument type (nl/normal) */
 		if (st==NOARG) {
 			if (argpos>=0) goto inval;
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index dbc93f74..c3e81d2a 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -26,7 +26,7 @@
 
 enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
-	ZTPRE, JPRE,
+	ZTPRE, JPRE, WPRE,
 	STOP,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
@@ -50,7 +50,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
 		S('m') = NOARG,
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
-		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
+		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE,
 	}, { /* 1: l-prefixed */
 		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
@@ -94,6 +94,12 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
 		S('n') = PTR,
+	}, { /* 8: w-prefixed */
+		S('b') = UINT, S('B') = UINT,
+		S('d') = INT, S('i') = INT,
+		S('o') = UINT, S('u') = UINT,
+		S('x') = UINT, S('X') = UINT,
+		S('n') = PTR,
 	}
 };
 
@@ -163,7 +169,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 	int w, p, xp;
 	union arg arg;
 	int argpos;
-	unsigned st, ps;
+	unsigned st, ps, width=0;
 	int cnt=0, l=0;
 	int i;
 	int t;
@@ -242,9 +248,25 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 			if (OOB(*s)) goto inval;
 			ps=st;
 			st=states[st]S(*s++);
+ 			if (st == WPRE) {
+				if (*s == L'0') goto inval;
+				width = getint(&s);
+                        }
 		} while (st-1<STOP);
 		if (!st) goto inval;
 
+		if (ps == WPRE) switch (width) {
+		case 8:   ps = HHPRE; st = (st == UINT) ? UCHAR  : ((st == INT) ? CHAR  : PTR); break;
+		case 16:  ps = HPRE;  st = (st == UINT) ? USHORT : ((st == INT) ? SHORT : PTR); break;
+		case 32:  ps = BARE;  break;
+#if UINTPTR_MAX >= UINT64_MAX
+		case 64:  ps = LPRE;  st = (st == UINT) ? ULONG  : ((st == INT) ? LONG  : PTR); break;
+#else
+		case 64:  ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break;
+#endif
+		default:  goto inval;
+		}
+
 		/* Check validity of argument type (nl/normal) */
 		if (st==NOARG) {
 			if (argpos>=0) goto inval;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-30 17:28                   ` Rich Felker
  2023-05-30 18:00                     ` Rich Felker
@ 2023-05-31  7:59                     ` Jₑₙₛ Gustedt
  1 sibling, 0 replies; 23+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-05-31  7:59 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 1736 bytes --]

Rich,

on Tue, 30 May 2023 13:28:33 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Tue, May 30, 2023 at 08:46:36AM +0200, Jₑₙₛ Gustedt wrote:
> > Rich,
> > 
> > on Mon, 29 May 2023 21:48:22 -0400 you (Rich Felker
> > <dalias@libc.org>) wrote:
> >   
> > > On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote:  
>  [...]  
>  [...]  
> > >  [...]  
> > >  [...]  
> > >  [...]    
>  [...]  
>  [...]  
> > > 
> > > It sounds like the core issue is that you don't like the state
> > > machine approach to how musl's printf processes format
> > > specifiers.  
> > 
> > It is well suited for simple grammars, I agree with that, but here
> > the grammar is becomming more complex. Be it just for the fact that
> > you'd have to enlargen the set of possible values to match decimal
> > digits.  
> 
> I don't think it's really any more complex. …

It seems that we don't converge for this one. So it would probably
better if you do a complete patch for this that satisfies you
personally.

I need the complete functionality to build on my 128 bit patches and
to have a complete C23 system for my tests. So I will keep my variant
in my series for the moment, and rebase on top of yours once that is
ready and tested.

(Same for `memset` by the way or any other patch that you guys think
can be implemented better than I have foreseen.)

Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-30 17:28                   ` Rich Felker
@ 2023-05-30 18:00                     ` Rich Felker
  2023-05-31  7:59                     ` Jₑₙₛ Gustedt
  1 sibling, 0 replies; 23+ messages in thread
From: Rich Felker @ 2023-05-30 18:00 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Tue, May 30, 2023 at 01:28:33PM -0400, Rich Felker wrote:
> On Tue, May 30, 2023 at 08:46:36AM +0200, Jₑₙₛ Gustedt wrote:
> > Rich,
> > 
> > on Mon, 29 May 2023 21:48:22 -0400 you (Rich Felker <dalias@libc.org>)
> > wrote:
> > 
> > > On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote:
> > > > Rich,
> > > > 
> > > > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker
> > > > <dalias@libc.org>) wrote:
> > > >   
> > > > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:  
> > >  [...]  
> > >  [...]  
> > >  [...]  
> > > > > 
> > > > > OK I think I can communicate better with code than natural
> > > > > language text, so here's a diff, completely untested, of what I
> > > > > had in mind.  
> > > > 
> > > > that's ... ugh ... not so prety, I think
> > > > 
> > > > In my current version I track the desired width, if there is w
> > > > specifier, and then do the adjustments after the loop. That takes
> > > > indeed care of undefined character sequences.
> > > > 
> > > > I find that much better readable, and also easier to extend (later
> > > > there comes the `wf` case and the `128`, and perhaps some day
> > > > `256`)  
> > > 
> > > It sounds like the core issue is that you don't like the state machine
> > > approach to how musl's printf processes format specifiers.
> > 
> > It is well suited for simple grammars, I agree with that, but here the
> > grammar is becomming more complex. Be it just for the fact that you'd
> > have to enlargen the set of possible values to match decimal digits.
> 
> I don't think it's really any more complex. It's just a few gratuitous
> aliases that have a very small number of edge paths. The wf ones
> almost entirely collapse with the w ones, and if we wanted to get rid
> of the gratuitous separate hh/h loading, they'd entirely collapse. But
> the version I posted as code is probably enough smaller to be
> perferable. I guess I should take a look at that and see...

Ah, now I remember why we handle h/hh despite them seeing useless.
They're needed for %n, and once you distinguish them for that, there's
hardly any point in trying to treat them the same as bare elsewhere.

Rich

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-30  6:46                 ` Jₑₙₛ Gustedt
@ 2023-05-30 17:28                   ` Rich Felker
  2023-05-30 18:00                     ` Rich Felker
  2023-05-31  7:59                     ` Jₑₙₛ Gustedt
  0 siblings, 2 replies; 23+ messages in thread
From: Rich Felker @ 2023-05-30 17:28 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Tue, May 30, 2023 at 08:46:36AM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Mon, 29 May 2023 21:48:22 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote:
> > > Rich,
> > > 
> > > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker
> > > <dalias@libc.org>) wrote:
> > >   
> > > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:  
> >  [...]  
> >  [...]  
> >  [...]  
> > > > 
> > > > OK I think I can communicate better with code than natural
> > > > language text, so here's a diff, completely untested, of what I
> > > > had in mind.  
> > > 
> > > that's ... ugh ... not so prety, I think
> > > 
> > > In my current version I track the desired width, if there is w
> > > specifier, and then do the adjustments after the loop. That takes
> > > indeed care of undefined character sequences.
> > > 
> > > I find that much better readable, and also easier to extend (later
> > > there comes the `wf` case and the `128`, and perhaps some day
> > > `256`)  
> > 
> > It sounds like the core issue is that you don't like the state machine
> > approach to how musl's printf processes format specifiers.
> 
> It is well suited for simple grammars, I agree with that, but here the
> grammar is becomming more complex. Be it just for the fact that you'd
> have to enlargen the set of possible values to match decimal digits.

I don't think it's really any more complex. It's just a few gratuitous
aliases that have a very small number of edge paths. The wf ones
almost entirely collapse with the w ones, and if we wanted to get rid
of the gratuitous separate hh/h loading, they'd entirely collapse. But
the version I posted as code is probably enough smaller to be
perferable. I guess I should take a look at that and see...

> > Personally,
> > I like it because there's an obvious structured way to validate that
> > it's accepting exactly the right things and nothing else, vs an
> > approach like what you tried where you ended up accepting a lot of
> > bogus specifiers.
> > 
> > One alternative I would consider is doing something like what you did,
> > but moving it outside of/before the state machine loop, so it's not
> > mixing the w* processing with the state machine. This avoids accepting
> > bogus repeated w32 prefixes and similar (because there is no loop) and
> > lets you get by with just adding one PLAIN state to have it start in
> > (rather than BARE) after w32. I expect the overall size would be
> > similar. Concept attached.
> 
> I'll post what I have in a minute. It has the advantage over yours
> that it doesn't do the switch on the width inside the automaton and
> also that it doesn't have to increase the rows of the matrix.

I don't understand how that's supposed to be an improvement. It
duplicates the state machine logic for the final types with a bunch of
conditionals in code (mainly to handle %n vs integer specifiers)
rather than letting the state machine spit out the right type as its
final state naturally.

Mine didn't have any switch inside the automaton. It did it before the
automaton processing (since the 'w' can only appear at the beginning,
it can be thought of as just setting the initial state).

Rich

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-30  1:48               ` Rich Felker
@ 2023-05-30  6:46                 ` Jₑₙₛ Gustedt
  2023-05-30 17:28                   ` Rich Felker
  2023-06-02 14:29                 ` Rich Felker
  1 sibling, 1 reply; 23+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-05-30  6:46 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 2567 bytes --]

Rich,

on Mon, 29 May 2023 21:48:22 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote:
> > Rich,
> > 
> > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker
> > <dalias@libc.org>) wrote:
> >   
> > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:  
>  [...]  
>  [...]  
>  [...]  
> > > 
> > > OK I think I can communicate better with code than natural
> > > language text, so here's a diff, completely untested, of what I
> > > had in mind.  
> > 
> > that's ... ugh ... not so prety, I think
> > 
> > In my current version I track the desired width, if there is w
> > specifier, and then do the adjustments after the loop. That takes
> > indeed care of undefined character sequences.
> > 
> > I find that much better readable, and also easier to extend (later
> > there comes the `wf` case and the `128`, and perhaps some day
> > `256`)  
> 
> It sounds like the core issue is that you don't like the state machine
> approach to how musl's printf processes format specifiers.

It is well suited for simple grammars, I agree with that, but here the
grammar is becomming more complex. Be it just for the fact that you'd
have to enlargen the set of possible values to match decimal digits.

> Personally,
> I like it because there's an obvious structured way to validate that
> it's accepting exactly the right things and nothing else, vs an
> approach like what you tried where you ended up accepting a lot of
> bogus specifiers.
> 
> One alternative I would consider is doing something like what you did,
> but moving it outside of/before the state machine loop, so it's not
> mixing the w* processing with the state machine. This avoids accepting
> bogus repeated w32 prefixes and similar (because there is no loop) and
> lets you get by with just adding one PLAIN state to have it start in
> (rather than BARE) after w32. I expect the overall size would be
> similar. Concept attached.

I'll post what I have in a minute. It has the advantage over yours
that it doesn't do the switch on the width inside the automaton and
also that it doesn't have to increase the rows of the matrix.


Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-29 19:21             ` Jₑₙₛ Gustedt
@ 2023-05-30  1:48               ` Rich Felker
  2023-05-30  6:46                 ` Jₑₙₛ Gustedt
  2023-06-02 14:29                 ` Rich Felker
  0 siblings, 2 replies; 23+ messages in thread
From: Rich Felker @ 2023-05-30  1:48 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 2279 bytes --]

On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:
> > > Rich,
> > > 
> > > on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker
> > > <dalias@libc.org>) wrote:
> > >   
> > > > I think you need an extra state that's "plain but not bare" that
> > > > duplicates only the integer transitions out of it, like the l, ll,
> > > > etc. prefix states do.  
> > > 
> > > Hm, the problem is that for the other prefixes the table entries
> > > then encode the concrete type that is to be expected. We could not
> > > do this here because the type depends on the requested width. So we
> > > would then need to "repair" that type after the loop. A `switch` to
> > > do that would look substantially similar to what is there, now. Do
> > > you think that would be better?  
> > 
> > OK I think I can communicate better with code than natural language
> > text, so here's a diff, completely untested, of what I had in mind.
> 
> that's ... ugh ... not so prety, I think
> 
> In my current version I track the desired width, if there is w
> specifier, and then do the adjustments after the loop. That takes
> indeed care of undefined character sequences.
> 
> I find that much better readable, and also easier to extend (later
> there comes the `wf` case and the `128`, and perhaps some day `256`)

It sounds like the core issue is that you don't like the state machine
approach to how musl's printf processes format specifiers. Personally,
I like it because there's an obvious structured way to validate that
it's accepting exactly the right things and nothing else, vs an
approach like what you tried where you ended up accepting a lot of
bogus specifiers.

One alternative I would consider is doing something like what you did,
but moving it outside of/before the state machine loop, so it's not
mixing the w* processing with the state machine. This avoids accepting
bogus repeated w32 prefixes and similar (because there is no loop) and
lets you get by with just adding one PLAIN state to have it start in
(rather than BARE) after w32. I expect the overall size would be
similar. Concept attached.

Rich

[-- Attachment #2: printf-wprefix-alt.diff --]
[-- Type: text/plain, Size: 1829 bytes --]

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index a712d80f..a9e4d638 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -33,7 +33,7 @@
 
 enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
-	ZTPRE, JPRE,
+	ZTPRE, JPRE, PLAIN,
 	STOP,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
@@ -44,9 +44,10 @@ enum {
 	MAXSTATE
 };
 
-#define S(x) [(x)-'A']
+#define ST_BASE 'A'
+#define S(x) [(x)-ST_BASE]
 
-static const unsigned char states[]['z'-'A'+1] = {
+static const unsigned char states[]['z'-ST_BASE+1] = {
 	{ /* 0: bare types */
 		S('d') = INT, S('i') = INT,
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
@@ -94,10 +95,15 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
 		S('n') = PTR,
+	}, { /* 8: explicit-width-prefixed bare equivalent */
+		S('d') = INT, S('i') = INT,
+		S('o') = UINT, S('u') = UINT,
+		S('x') = UINT, S('X') = UINT,
+		S('n') = PTR,
 	}
 };
 
-#define OOB(x) ((unsigned)(x)-'A' > 'z'-'A')
+#define OOB(x) ((unsigned)(x)-ST_BASE > 'z'-ST_BASE)
 
 union arg
 {
@@ -510,6 +516,13 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 
 		/* Format specifier state machine */
 		st=0;
+		if (*s=='w' && s[1]-'1'<9u) switch (getint(&s)) {
+		case 8:  st = HHPRE; break;
+		case 16: st = HPRE; break;
+		case 32: st = PLAIN; break;
+		case 64: st = LLONG_MAX > LONG_MAX ? LLPRE : LPRE; break;
+		default: goto inval;
+		}
 		do {
 			if (OOB(*s)) goto inval;
 			ps=st;
@@ -547,6 +560,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		switch(t) {
 		case 'n':
 			switch(ps) {
+			case PLAIN:
 			case BARE: *(int *)arg.p = cnt; break;
 			case LPRE: *(long *)arg.p = cnt; break;
 			case LLPRE: *(long long *)arg.p = cnt; break;

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-29 15:46           ` Rich Felker
@ 2023-05-29 19:21             ` Jₑₙₛ Gustedt
  2023-05-30  1:48               ` Rich Felker
  0 siblings, 1 reply; 23+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-05-29 19:21 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 1692 bytes --]

Rich,

on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:
> > Rich,
> > 
> > on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker
> > <dalias@libc.org>) wrote:
> >   
> > > I think you need an extra state that's "plain but not bare" that
> > > duplicates only the integer transitions out of it, like the l, ll,
> > > etc. prefix states do.  
> > 
> > Hm, the problem is that for the other prefixes the table entries
> > then encode the concrete type that is to be expected. We could not
> > do this here because the type depends on the requested width. So we
> > would then need to "repair" that type after the loop. A `switch` to
> > do that would look substantially similar to what is there, now. Do
> > you think that would be better?  
> 
> OK I think I can communicate better with code than natural language
> text, so here's a diff, completely untested, of what I had in mind.

that's ... ugh ... not so prety, I think

In my current version I track the desired width, if there is w
specifier, and then do the adjustments after the loop. That takes
indeed care of undefined character sequences.

I find that much better readable, and also easier to extend (later
there comes the `wf` case and the `128`, and perhaps some day `256`)

Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-29  7:14         ` Jₑₙₛ Gustedt
@ 2023-05-29 15:46           ` Rich Felker
  2023-05-29 19:21             ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 23+ messages in thread
From: Rich Felker @ 2023-05-29 15:46 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 856 bytes --]

On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > I think you need an extra state that's "plain but not bare" that
> > duplicates only the integer transitions out of it, like the l, ll,
> > etc. prefix states do.
> 
> Hm, the problem is that for the other prefixes the table entries then
> encode the concrete type that is to be expected. We could not do this
> here because the type depends on the requested width. So we would then
> need to "repair" that type after the loop. A `switch` to do that would
> look substantially similar to what is there, now. Do you think that
> would be better?

OK I think I can communicate better with code than natural language
text, so here's a diff, completely untested, of what I had in mind.

Rich

[-- Attachment #2: printf-wprefix.diff --]
[-- Type: text/plain, Size: 1690 bytes --]

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index a712d80f..a751fbdf 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -33,7 +33,7 @@
 
 enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
-	ZTPRE, JPRE,
+	ZTPRE, JPRE, PLAIN, WPRE, W1PRE, W3PRE, W6PRE,
 	STOP,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
@@ -44,9 +44,10 @@ enum {
 	MAXSTATE
 };
 
-#define S(x) [(x)-'A']
+#define ST_BASE '1'
+#define S(x) [(x)-ST_BASE]
 
-static const unsigned char states[]['z'-'A'+1] = {
+static const unsigned char states[]['z'-ST_BASE+1] = {
 	{ /* 0: bare types */
 		S('d') = INT, S('i') = INT,
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
@@ -94,10 +95,24 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
 		S('n') = PTR,
+	}, { /* 8: explicit-width-prefixed bare equivalent */
+		S('d') = INT, S('i') = INT,
+		S('o') = UINT, S('u') = UINT,
+		S('x') = UINT, S('X') = UINT,
+		S('n') = PTR,
+	}, { /* 9: w-prefixed */
+		S('1') = W1PRE, S('3') = W3PRE,
+		S('6') = W6PRE, S('8') = HHPRE,
+	}, { /* 10: w1-prefixed */
+		S('6') = HPRE,
+	}, { /* 11: w3-prefixed */
+		S('2') = PLAIN,
+	}, { /* 12: w6-prefixed */
+		S('4') = LLONG_MAX > LONG_MAX ? LLPRE : LPRE,
 	}
 };
 
-#define OOB(x) ((unsigned)(x)-'A' > 'z'-'A')
+#define OOB(x) ((unsigned)(x)-ST_BASE > 'z'-ST_BASE)
 
 union arg
 {
@@ -547,6 +562,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		switch(t) {
 		case 'n':
 			switch(ps) {
+			case PLAIN:
 			case BARE: *(int *)arg.p = cnt; break;
 			case LPRE: *(long *)arg.p = cnt; break;
 			case LLPRE: *(long long *)arg.p = cnt; break;

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-26 21:03       ` Rich Felker
@ 2023-05-29  7:14         ` Jₑₙₛ Gustedt
  2023-05-29 15:46           ` Rich Felker
  0 siblings, 1 reply; 23+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-05-29  7:14 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 962 bytes --]

Rich,

on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> I think you need an extra state that's "plain but not bare" that
> duplicates only the integer transitions out of it, like the l, ll,
> etc. prefix states do.

Hm, the problem is that for the other prefixes the table entries then
encode the concrete type that is to be expected. We could not do this
here because the type depends on the requested width. So we would then
need to "repair" that type after the loop. A `switch` to do that would
look substantially similar to what is there, now. Do you think that
would be better?

Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-26 20:51     ` Jₑₙₛ Gustedt
@ 2023-05-26 21:03       ` Rich Felker
  2023-05-29  7:14         ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 23+ messages in thread
From: Rich Felker @ 2023-05-26 21:03 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Fri, May 26, 2023 at 10:51:19PM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Fri, 26 May 2023 16:31:07 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > On Fri, May 26, 2023 at 09:41:03PM +0200, Jens Gustedt wrote:
> > > These are mandatory for C23 and concern all types for which the
> > > platform has `int_leastN_t` and `uint_leastN_t`. For musl these
> > > types always coincide with `intN_t` and `uintN_t` and are always
> > > present for N equal 8, 16, 32 and 64.
> > > 
> > > They can be added for general use since all lowercase letters were
> > > previously reserved.
> > > 
> > > Nevertheless, users that use these modifiers will see a lot of
> > > warnings from compilers in the beginning. This is because the
> > > compilers have not yet integrated this form of a specifier into
> > > their correponding extensions (gcc attributes). So unfortunately
> > > also testing this feature may be a bit noisy for the moment.
> > > 
> > > The only architecture dependend choice is the type for N == 64,
> > > which may be `long` or `long long`. We just mimick the test that is
> > > done in other places to compare `UINTPTR_MAX` and `UINT64_MAX` to
> > > determine that.
> > > ---
> > >  src/stdio/vfprintf.c  | 18 ++++++++++++++++--
> > >  src/stdio/vfwprintf.c | 18 ++++++++++++++++--
> > >  2 files changed, 32 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
> > > index cbc79783..1a516663 100644
> > > --- a/src/stdio/vfprintf.c
> > > +++ b/src/stdio/vfprintf.c
> > > @@ -33,7 +33,7 @@
> > >  
> > >  enum {
> > >  	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
> > > -	ZTPRE, JPRE,
> > > +	ZTPRE, JPRE, WPRE,
> > >  	STOP,
> > >  	PTR, INT, UINT, ULLONG,
> > >  	LONG, ULONG,
> > > @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = {
> > >  		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n')
> > > = PTR, S('m') = NOARG,
> > >  		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
> > > -		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
> > > +		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
> > > S('w') = WPRE, }, { /* 1: l-prefixed */
> > >  		S('b') = ULONG, S('B') = ULONG,
> > >  		S('d') = LONG, S('i') = LONG,
> > > @@ -525,8 +525,22 @@ static int printf_core(FILE *f, const char
> > > *fmt, va_list *ap, union arg *nl_arg, st=0;
> > >  		do {
> > >  			if (OOB(*s)) goto inval;
> > > +		wpre:
> > >  			ps=st;
> > >  			st=states[st]S(*s++);
> > > +			if (st == WPRE) {
> > > +				switch (getint(&s)) {
> > > +				case 8:  st = HHPRE; goto wpre;
> > > +				case 16: st = HPRE; goto wpre;
> > > +				case 32: st = BARE; goto wpre;
> > > +#if UINTPTR_MAX >= UINT64_MAX
> > > +				case 64: st = LPRE; goto wpre;
> > > +#else
> > > +				case 64: st = LLPRE; goto wpre;
> > > +#endif
> > > +				default: goto inval;
> > > +				}
> > > +			}
> > >  		} while (st-1<STOP);
> > >  		if (!st) goto inval;  
> > 
> > I don't see how this works. While you're in this new WPRE state,
> > you're accesing an element of the states[] array with a potentially
> > out-of-bounds index, because you skipped over the bounds check to
> > ensure that the index is valid.
> 
> ah, ok, the `wpre` should probably move up a line.

OK, I was thinking you just want a break, but you're trying to avoid
a transition to BARE state getting treated as invalid by the loop
condition. However, the underlying problem here is that you can't
reuse the BARE state for something that's not BARE. If you do, formats
like %w32w32w32w32w32w32d or %w32lld or even %w32f will be allowed.

I think you need an extra state that's "plain but not bare" that
duplicates only the integer transitions out of it, like the l, ll,
etc. prefix states do.

> > I'm not clear why you're doing that
> > instead of just continuing the loop.
> > 
> > My preference would be not adding any code at all here and using the
> > existing state machine, adding state transitions for the new prefixes
> > to it, but that would require expanding the states stable to start at
> > '1' instead of 'A', and to have a couple more intermediate states. I'm
> > not sure how large that would get. There's a good chance it's
> > comparable to the size of any added code, though.
> 
> I am not sure that I understand the alternative that you are proposing.
> 
> The difficulty that lead to this, is the "BARE" state which now
> becomes accessible with a "w32" prefix. Then later, the "BARE" state
> assumes (for the stop condition of the loop) that there had not been a
> prefix, I think.

Yes, but I don't think you've solved that. It should be solvable as
above.

Rich

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-26 20:31   ` Rich Felker
@ 2023-05-26 20:51     ` Jₑₙₛ Gustedt
  2023-05-26 21:03       ` Rich Felker
  0 siblings, 1 reply; 23+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-05-26 20:51 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 4331 bytes --]

Rich,

on Fri, 26 May 2023 16:31:07 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Fri, May 26, 2023 at 09:41:03PM +0200, Jens Gustedt wrote:
> > These are mandatory for C23 and concern all types for which the
> > platform has `int_leastN_t` and `uint_leastN_t`. For musl these
> > types always coincide with `intN_t` and `uintN_t` and are always
> > present for N equal 8, 16, 32 and 64.
> > 
> > They can be added for general use since all lowercase letters were
> > previously reserved.
> > 
> > Nevertheless, users that use these modifiers will see a lot of
> > warnings from compilers in the beginning. This is because the
> > compilers have not yet integrated this form of a specifier into
> > their correponding extensions (gcc attributes). So unfortunately
> > also testing this feature may be a bit noisy for the moment.
> > 
> > The only architecture dependend choice is the type for N == 64,
> > which may be `long` or `long long`. We just mimick the test that is
> > done in other places to compare `UINTPTR_MAX` and `UINT64_MAX` to
> > determine that.
> > ---
> >  src/stdio/vfprintf.c  | 18 ++++++++++++++++--
> >  src/stdio/vfwprintf.c | 18 ++++++++++++++++--
> >  2 files changed, 32 insertions(+), 4 deletions(-)
> > 
> > diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
> > index cbc79783..1a516663 100644
> > --- a/src/stdio/vfprintf.c
> > +++ b/src/stdio/vfprintf.c
> > @@ -33,7 +33,7 @@
> >  
> >  enum {
> >  	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
> > -	ZTPRE, JPRE,
> > +	ZTPRE, JPRE, WPRE,
> >  	STOP,
> >  	PTR, INT, UINT, ULLONG,
> >  	LONG, ULONG,
> > @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = {
> >  		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n')
> > = PTR, S('m') = NOARG,
> >  		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
> > -		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
> > +		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
> > S('w') = WPRE, }, { /* 1: l-prefixed */
> >  		S('b') = ULONG, S('B') = ULONG,
> >  		S('d') = LONG, S('i') = LONG,
> > @@ -525,8 +525,22 @@ static int printf_core(FILE *f, const char
> > *fmt, va_list *ap, union arg *nl_arg, st=0;
> >  		do {
> >  			if (OOB(*s)) goto inval;
> > +		wpre:
> >  			ps=st;
> >  			st=states[st]S(*s++);
> > +			if (st == WPRE) {
> > +				switch (getint(&s)) {
> > +				case 8:  st = HHPRE; goto wpre;
> > +				case 16: st = HPRE; goto wpre;
> > +				case 32: st = BARE; goto wpre;
> > +#if UINTPTR_MAX >= UINT64_MAX
> > +				case 64: st = LPRE; goto wpre;
> > +#else
> > +				case 64: st = LLPRE; goto wpre;
> > +#endif
> > +				default: goto inval;
> > +				}
> > +			}
> >  		} while (st-1<STOP);
> >  		if (!st) goto inval;  
> 
> I don't see how this works. While you're in this new WPRE state,
> you're accesing an element of the states[] array with a potentially
> out-of-bounds index, because you skipped over the bounds check to
> ensure that the index is valid.

ah, ok, the `wpre` should probably move up a line.

> I'm not clear why you're doing that
> instead of just continuing the loop.
> 
> My preference would be not adding any code at all here and using the
> existing state machine, adding state transitions for the new prefixes
> to it, but that would require expanding the states stable to start at
> '1' instead of 'A', and to have a couple more intermediate states. I'm
> not sure how large that would get. There's a good chance it's
> comparable to the size of any added code, though.

I am not sure that I understand the alternative that you are proposing.

The difficulty that lead to this, is the "BARE" state which now
becomes accessible with a "w32" prefix. Then later, the "BARE" state
assumes (for the stop condition of the loop) that there had not been a
prefix, I think.

> One minor thing with the implementation using getint(): it accepts
> leading zeros, which are not valid here.

right, we should catch that.

Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-26 19:41 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt
@ 2023-05-26 20:31   ` Rich Felker
  2023-05-26 20:51     ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 23+ messages in thread
From: Rich Felker @ 2023-05-26 20:31 UTC (permalink / raw)
  To: Jens Gustedt; +Cc: musl

On Fri, May 26, 2023 at 09:41:03PM +0200, Jens Gustedt wrote:
> These are mandatory for C23 and concern all types for which the
> platform has `int_leastN_t` and `uint_leastN_t`. For musl these types
> always coincide with `intN_t` and `uintN_t` and are always present for
> N equal 8, 16, 32 and 64.
> 
> They can be added for general use since all lowercase letters were
> previously reserved.
> 
> Nevertheless, users that use these modifiers will see a lot of
> warnings from compilers in the beginning. This is because the
> compilers have not yet integrated this form of a specifier into their
> correponding extensions (gcc attributes). So unfortunately also
> testing this feature may be a bit noisy for the moment.
> 
> The only architecture dependend choice is the type for N == 64, which
> may be `long` or `long long`. We just mimick the test that is done in
> other places to compare `UINTPTR_MAX` and `UINT64_MAX` to determine
> that.
> ---
>  src/stdio/vfprintf.c  | 18 ++++++++++++++++--
>  src/stdio/vfwprintf.c | 18 ++++++++++++++++--
>  2 files changed, 32 insertions(+), 4 deletions(-)
> 
> diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
> index cbc79783..1a516663 100644
> --- a/src/stdio/vfprintf.c
> +++ b/src/stdio/vfprintf.c
> @@ -33,7 +33,7 @@
>  
>  enum {
>  	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
> -	ZTPRE, JPRE,
> +	ZTPRE, JPRE, WPRE,
>  	STOP,
>  	PTR, INT, UINT, ULLONG,
>  	LONG, ULONG,
> @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = {
>  		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
>  		S('m') = NOARG,
>  		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
> -		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
> +		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE,
>  	}, { /* 1: l-prefixed */
>  		S('b') = ULONG, S('B') = ULONG,
>  		S('d') = LONG, S('i') = LONG,
> @@ -525,8 +525,22 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
>  		st=0;
>  		do {
>  			if (OOB(*s)) goto inval;
> +		wpre:
>  			ps=st;
>  			st=states[st]S(*s++);
> +			if (st == WPRE) {
> +				switch (getint(&s)) {
> +				case 8:  st = HHPRE; goto wpre;
> +				case 16: st = HPRE; goto wpre;
> +				case 32: st = BARE; goto wpre;
> +#if UINTPTR_MAX >= UINT64_MAX
> +				case 64: st = LPRE; goto wpre;
> +#else
> +				case 64: st = LLPRE; goto wpre;
> +#endif
> +				default: goto inval;
> +				}
> +			}
>  		} while (st-1<STOP);
>  		if (!st) goto inval;

I don't see how this works. While you're in this new WPRE state,
you're accesing an element of the states[] array with a potentially
out-of-bounds index, because you skipped over the bounds check to
ensure that the index is valid. I'm not clear why you're doing that
instead of just continuing the loop.

My preference would be not adding any code at all here and using the
existing state machine, adding state transitions for the new prefixes
to it, but that would require expanding the states stable to start at
'1' instead of 'A', and to have a couple more intermediate states. I'm
not sure how large that would get. There's a good chance it's
comparable to the size of any added code, though.

One minor thing with the implementation using getint(): it accepts
leading zeros, which are not valid here.

Rich

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-26 19:41 [musl] [C23 printf 0/3] Jens Gustedt
@ 2023-05-26 19:41 ` Jens Gustedt
  2023-05-26 20:31   ` Rich Felker
  0 siblings, 1 reply; 23+ messages in thread
From: Jens Gustedt @ 2023-05-26 19:41 UTC (permalink / raw)
  To: musl

These are mandatory for C23 and concern all types for which the
platform has `int_leastN_t` and `uint_leastN_t`. For musl these types
always coincide with `intN_t` and `uintN_t` and are always present for
N equal 8, 16, 32 and 64.

They can be added for general use since all lowercase letters were
previously reserved.

Nevertheless, users that use these modifiers will see a lot of
warnings from compilers in the beginning. This is because the
compilers have not yet integrated this form of a specifier into their
correponding extensions (gcc attributes). So unfortunately also
testing this feature may be a bit noisy for the moment.

The only architecture dependend choice is the type for N == 64, which
may be `long` or `long long`. We just mimick the test that is done in
other places to compare `UINTPTR_MAX` and `UINT64_MAX` to determine
that.
---
 src/stdio/vfprintf.c  | 18 ++++++++++++++++--
 src/stdio/vfwprintf.c | 18 ++++++++++++++++--
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index cbc79783..1a516663 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -33,7 +33,7 @@
 
 enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
-	ZTPRE, JPRE,
+	ZTPRE, JPRE, WPRE,
 	STOP,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
@@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
 		S('m') = NOARG,
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
-		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
+		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE,
 	}, { /* 1: l-prefixed */
 		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
@@ -525,8 +525,22 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		st=0;
 		do {
 			if (OOB(*s)) goto inval;
+		wpre:
 			ps=st;
 			st=states[st]S(*s++);
+			if (st == WPRE) {
+				switch (getint(&s)) {
+				case 8:  st = HHPRE; goto wpre;
+				case 16: st = HPRE; goto wpre;
+				case 32: st = BARE; goto wpre;
+#if UINTPTR_MAX >= UINT64_MAX
+				case 64: st = LPRE; goto wpre;
+#else
+				case 64: st = LLPRE; goto wpre;
+#endif
+				default: goto inval;
+				}
+			}
 		} while (st-1<STOP);
 		if (!st) goto inval;
 
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index dbc93f74..4320761a 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -26,7 +26,7 @@
 
 enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
-	ZTPRE, JPRE,
+	ZTPRE, JPRE, WPRE,
 	STOP,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
@@ -50,7 +50,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
 		S('m') = NOARG,
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
-		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
+		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE,
 	}, { /* 1: l-prefixed */
 		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
@@ -240,8 +240,22 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 		st=0;
 		do {
 			if (OOB(*s)) goto inval;
+		wpre:
 			ps=st;
 			st=states[st]S(*s++);
+			if (st == WPRE) {
+				switch (getint(&s)) {
+				case 8:  st = HHPRE; goto wpre;
+				case 16: st = HPRE; goto wpre;
+				case 32: st = BARE; goto wpre;
+#if UINTPTR_MAX >= UINT64_MAX
+				case 64: st = LPRE; goto wpre;
+#else
+				case 64: st = LLPRE; goto wpre;
+#endif
+				default: goto inval;
+				}
+			}
 		} while (st-1<STOP);
 		if (!st) goto inval;
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2023-06-02 15:37 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <cover.1685429467.git.Jens.Gustedt@inria.fr>
2023-05-30  6:55 ` [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers Jens Gustedt
2023-05-30  6:55 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt
2023-06-02 14:38   ` Rich Felker
2023-06-02 15:09     ` Jₑₙₛ Gustedt
2023-06-02 15:16       ` Rich Felker
2023-06-02 15:37         ` Jₑₙₛ Gustedt
2023-05-30  6:55 ` [musl] [C23 printf 3/3] C23: implement the wfN length modifiers " Jens Gustedt
2023-05-31 14:04 [musl] [C23 printf 0/3] to be replaced Jens Gustedt
2023-05-31 14:04 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt
  -- strict thread matches above, loose matches on Subject: below --
2023-05-26 19:41 [musl] [C23 printf 0/3] Jens Gustedt
2023-05-26 19:41 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt
2023-05-26 20:31   ` Rich Felker
2023-05-26 20:51     ` Jₑₙₛ Gustedt
2023-05-26 21:03       ` Rich Felker
2023-05-29  7:14         ` Jₑₙₛ Gustedt
2023-05-29 15:46           ` Rich Felker
2023-05-29 19:21             ` Jₑₙₛ Gustedt
2023-05-30  1:48               ` Rich Felker
2023-05-30  6:46                 ` Jₑₙₛ Gustedt
2023-05-30 17:28                   ` Rich Felker
2023-05-30 18:00                     ` Rich Felker
2023-05-31  7:59                     ` Jₑₙₛ Gustedt
2023-06-02 14:29                 ` Rich Felker
2023-06-02 14:55                   ` Jₑₙₛ Gustedt
2023-06-02 15:07                     ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).