mailing list of musl libc
 help / color / mirror / code / Atom feed
* [musl] [C23 printf 0/3]
@ 2023-05-26 19:41 Jens Gustedt
  2023-05-26 19:41 ` [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers Jens Gustedt
                   ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: Jens Gustedt @ 2023-05-26 19:41 UTC (permalink / raw)
  To: musl

The changes that are mandatory for printf, excluding the 128 bit parts
for the moment.

Caution: this is not yet merged with the patch for the "b" specifier
that was already circulating.

Jens Gustedt (3):
  C23: implement the b and B printf specifiers
  C23: implement the wN length specifiers for printf
  C23: implement the wfN length modifiers for printf

 include/inttypes.h    | 34 +++++++++++++++++++++++++++++++++
 src/stdio/vfprintf.c  | 44 ++++++++++++++++++++++++++++++++++++++++---
 src/stdio/vfwprintf.c | 36 +++++++++++++++++++++++++++++++----
 3 files changed, 107 insertions(+), 7 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers
  2023-05-26 19:41 [musl] [C23 printf 0/3] Jens Gustedt
@ 2023-05-26 19:41 ` Jens Gustedt
  2023-05-26 19:41 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt
  2023-05-26 19:41 ` [musl] [C23 printf 3/3] C23: implement the wfN length modifiers " Jens Gustedt
  2 siblings, 0 replies; 22+ messages in thread
From: Jens Gustedt @ 2023-05-26 19:41 UTC (permalink / raw)
  To: musl

The b specifier is mandatory for C23. It has been reserved previously,
so we may safely add it, even for older compilation modes.

The B specifier is optional, but recommended for those implementations
that didn't have it reserved previously for other purposes.

The PRIbXXX and PRIBXXX macros are mandatory if the specifiers are
supported and may serve as feature test macros for users.
---
 include/inttypes.h    | 34 ++++++++++++++++++++++++++++++++++
 src/stdio/vfprintf.c  | 19 ++++++++++++++++++-
 src/stdio/vfwprintf.c | 11 +++++++++--
 3 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/include/inttypes.h b/include/inttypes.h
index 61dcb727..73a42e32 100644
--- a/include/inttypes.h
+++ b/include/inttypes.h
@@ -120,12 +120,44 @@ uintmax_t wcstoumax(const wchar_t *__restrict, wchar_t **__restrict, int);
 #define PRIXFAST32 "X"
 #define PRIXFAST64 __PRI64 "X"
 
+#define PRIb8  "b"
+#define PRIb16 "b"
+#define PRIb32 "b"
+#define PRIb64 __PRI64 "b"
+
+#define PRIbLEAST8  "b"
+#define PRIbLEAST16 "b"
+#define PRIbLEAST32 "b"
+#define PRIbLEAST64 __PRI64 "b"
+
+#define PRIbFAST8  "b"
+#define PRIbFAST16 "b"
+#define PRIbFAST32 "b"
+#define PRIbFAST64 __PRI64 "b"
+
+#define PRIB8  "B"
+#define PRIB16 "B"
+#define PRIB32 "B"
+#define PRIB64 __PRI64 "B"
+
+#define PRIBLEAST8  "B"
+#define PRIBLEAST16 "B"
+#define PRIBLEAST32 "B"
+#define PRIBLEAST64 __PRI64 "B"
+
+#define PRIBFAST8  "B"
+#define PRIBFAST16 "B"
+#define PRIBFAST32 "B"
+#define PRIBFAST64 __PRI64 "B"
+
 #define PRIdMAX __PRI64 "d"
 #define PRIiMAX __PRI64 "i"
 #define PRIoMAX __PRI64 "o"
 #define PRIuMAX __PRI64 "u"
 #define PRIxMAX __PRI64 "x"
 #define PRIXMAX __PRI64 "X"
+#define PRIbMAX __PRI64 "b"
+#define PRIBMAX __PRI64 "B"
 
 #define PRIdPTR __PRIPTR "d"
 #define PRIiPTR __PRIPTR "i"
@@ -133,6 +165,8 @@ uintmax_t wcstoumax(const wchar_t *__restrict, wchar_t **__restrict, int);
 #define PRIuPTR __PRIPTR "u"
 #define PRIxPTR __PRIPTR "x"
 #define PRIXPTR __PRIPTR "X"
+#define PRIbPTR __PRIPTR "b"
+#define PRIBPTR __PRIPTR "B"
 
 #define SCNd8   "hhd"
 #define SCNd16  "hd"
diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index a712d80f..cbc79783 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -48,6 +48,7 @@ enum {
 
 static const unsigned char states[]['z'-'A'+1] = {
 	{ /* 0: bare types */
+		S('b') = UINT, S('B') = UINT,
 		S('d') = INT, S('i') = INT,
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -58,6 +59,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
 		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
 	}, { /* 1: l-prefixed */
+		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
 		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -65,17 +67,20 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('c') = INT, S('s') = PTR, S('n') = PTR,
 		S('l') = LLPRE,
 	}, { /* 2: ll-prefixed */
+		S('b') = ULLONG, S('B') = ULLONG,
 		S('d') = LLONG, S('i') = LLONG,
 		S('o') = ULLONG, S('u') = ULLONG,
 		S('x') = ULLONG, S('X') = ULLONG,
 		S('n') = PTR,
 	}, { /* 3: h-prefixed */
+		S('b') = USHORT, S('B') = USHORT,
 		S('d') = SHORT, S('i') = SHORT,
 		S('o') = USHORT, S('u') = USHORT,
 		S('x') = USHORT, S('X') = USHORT,
 		S('n') = PTR,
 		S('h') = HHPRE,
 	}, { /* 4: hh-prefixed */
+		S('b') = UCHAR, S('B') = UCHAR,
 		S('d') = CHAR, S('i') = CHAR,
 		S('o') = UCHAR, S('u') = UCHAR,
 		S('x') = UCHAR, S('X') = UCHAR,
@@ -85,11 +90,13 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('E') = LDBL, S('F') = LDBL, S('G') = LDBL, S('A') = LDBL,
 		S('n') = PTR,
 	}, { /* 6: z- or t-prefixed (assumed to be same size) */
+		S('b') = SIZET, S('B') = SIZET,
 		S('d') = PDIFF, S('i') = PDIFF,
 		S('o') = SIZET, S('u') = SIZET,
 		S('x') = SIZET, S('X') = SIZET,
 		S('n') = PTR,
 	}, { /* 7: j-prefixed */
+		S('b') = UMAX, S('B') = UMAX,
 		S('d') = IMAX, S('i') = IMAX,
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
@@ -156,6 +163,12 @@ static char *fmt_x(uintmax_t x, char *s, int lower)
 	return s;
 }
 
+static char *fmt_b(uintmax_t x, char *s)
+{
+	for (; x; x>>=1) *--s = '0' + (x&1);
+	return s;
+}
+
 static char *fmt_o(uintmax_t x, char *s)
 {
 	for (; x; x>>=3) *--s = '0' + (x&7);
@@ -437,7 +450,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 	unsigned st, ps;
 	int cnt=0, l=0;
 	size_t i;
-	char buf[sizeof(uintmax_t)*3+3+LDBL_MANT_DIG/4];
+	char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4];
 	const char *prefix;
 	int t, pl;
 	wchar_t wc[2], *ws;
@@ -564,6 +577,10 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			a = fmt_x(arg.i, z, t&32);
 			if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
 			if (0) {
+		case 'b': case 'B':
+			a = fmt_b(arg.i, z);
+			if (arg.i && (fl & ALT_FORM)) prefix = (t == 'b' ? "0b" : "0B"), pl=2;
+			} if (0) {
 		case 'o':
 			a = fmt_o(arg.i, z);
 			if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1;
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index 53697701..dbc93f74 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -41,6 +41,7 @@ enum {
 
 static const unsigned char states[]['z'-'A'+1] = {
 	{ /* 0: bare types */
+		S('b') = UINT, S('B') = UINT,
 		S('d') = INT, S('i') = INT,
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -51,6 +52,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
 		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
 	}, { /* 1: l-prefixed */
+		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
 		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -58,17 +60,20 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('c') = INT, S('s') = PTR, S('n') = PTR,
 		S('l') = LLPRE,
 	}, { /* 2: ll-prefixed */
+		S('b') = ULLONG, S('B') = ULLONG,
 		S('d') = LLONG, S('i') = LLONG,
 		S('o') = ULLONG, S('u') = ULLONG,
 		S('x') = ULLONG, S('X') = ULLONG,
 		S('n') = PTR,
 	}, { /* 3: h-prefixed */
+		S('b') = USHORT, S('B') = USHORT,
 		S('d') = SHORT, S('i') = SHORT,
 		S('o') = USHORT, S('u') = USHORT,
 		S('x') = USHORT, S('X') = USHORT,
 		S('n') = PTR,
 		S('h') = HHPRE,
 	}, { /* 4: hh-prefixed */
+		S('b') = UCHAR, S('B') = UCHAR,
 		S('d') = CHAR, S('i') = CHAR,
 		S('o') = UCHAR, S('u') = UCHAR,
 		S('x') = UCHAR, S('X') = UCHAR,
@@ -78,11 +83,13 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('E') = LDBL, S('F') = LDBL, S('G') = LDBL, S('A') = LDBL,
 		S('n') = PTR,
 	}, { /* 6: z- or t-prefixed (assumed to be same size) */
+		S('b') = SIZET, S('B') = SIZET,
 		S('d') = PDIFF, S('i') = PDIFF,
 		S('o') = SIZET, S('u') = SIZET,
 		S('x') = SIZET, S('X') = SIZET,
 		S('n') = PTR,
 	}, { /* 7: j-prefixed */
+		S('b') = UMAX, S('B') = UMAX,
 		S('d') = IMAX, S('i') = IMAX,
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
@@ -145,7 +152,7 @@ static int getint(wchar_t **s) {
 
 static const char sizeprefix['y'-'a'] = {
 ['a'-'a']='L', ['e'-'a']='L', ['f'-'a']='L', ['g'-'a']='L',
-['d'-'a']='j', ['i'-'a']='j', ['o'-'a']='j', ['u'-'a']='j', ['x'-'a']='j',
+['b'-'a']='j', ['d'-'a']='j', ['i'-'a']='j', ['o'-'a']='j', ['u'-'a']='j', ['x'-'a']='j',
 ['p'-'a']='j'
 };
 
@@ -321,7 +328,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 		case 'a': case 'e': case 'f': case 'g':
 			l = fprintf(f, charfmt, w, p, arg.f);
 			break;
-		case 'd': case 'i': case 'o': case 'u': case 'x': case 'p':
+		case 'b': case 'd': case 'i': case 'o': case 'u': case 'x': case 'p':
 			l = fprintf(f, charfmt, w, p, arg.i);
 			break;
 		}
-- 
2.34.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-26 19:41 [musl] [C23 printf 0/3] Jens Gustedt
  2023-05-26 19:41 ` [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers Jens Gustedt
@ 2023-05-26 19:41 ` Jens Gustedt
  2023-05-26 20:31   ` Rich Felker
  2023-05-26 19:41 ` [musl] [C23 printf 3/3] C23: implement the wfN length modifiers " Jens Gustedt
  2 siblings, 1 reply; 22+ messages in thread
From: Jens Gustedt @ 2023-05-26 19:41 UTC (permalink / raw)
  To: musl

These are mandatory for C23 and concern all types for which the
platform has `int_leastN_t` and `uint_leastN_t`. For musl these types
always coincide with `intN_t` and `uintN_t` and are always present for
N equal 8, 16, 32 and 64.

They can be added for general use since all lowercase letters were
previously reserved.

Nevertheless, users that use these modifiers will see a lot of
warnings from compilers in the beginning. This is because the
compilers have not yet integrated this form of a specifier into their
correponding extensions (gcc attributes). So unfortunately also
testing this feature may be a bit noisy for the moment.

The only architecture dependend choice is the type for N == 64, which
may be `long` or `long long`. We just mimick the test that is done in
other places to compare `UINTPTR_MAX` and `UINT64_MAX` to determine
that.
---
 src/stdio/vfprintf.c  | 18 ++++++++++++++++--
 src/stdio/vfwprintf.c | 18 ++++++++++++++++--
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index cbc79783..1a516663 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -33,7 +33,7 @@
 
 enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
-	ZTPRE, JPRE,
+	ZTPRE, JPRE, WPRE,
 	STOP,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
@@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
 		S('m') = NOARG,
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
-		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
+		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE,
 	}, { /* 1: l-prefixed */
 		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
@@ -525,8 +525,22 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		st=0;
 		do {
 			if (OOB(*s)) goto inval;
+		wpre:
 			ps=st;
 			st=states[st]S(*s++);
+			if (st == WPRE) {
+				switch (getint(&s)) {
+				case 8:  st = HHPRE; goto wpre;
+				case 16: st = HPRE; goto wpre;
+				case 32: st = BARE; goto wpre;
+#if UINTPTR_MAX >= UINT64_MAX
+				case 64: st = LPRE; goto wpre;
+#else
+				case 64: st = LLPRE; goto wpre;
+#endif
+				default: goto inval;
+				}
+			}
 		} while (st-1<STOP);
 		if (!st) goto inval;
 
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index dbc93f74..4320761a 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -26,7 +26,7 @@
 
 enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
-	ZTPRE, JPRE,
+	ZTPRE, JPRE, WPRE,
 	STOP,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
@@ -50,7 +50,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
 		S('m') = NOARG,
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
-		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
+		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE,
 	}, { /* 1: l-prefixed */
 		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
@@ -240,8 +240,22 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 		st=0;
 		do {
 			if (OOB(*s)) goto inval;
+		wpre:
 			ps=st;
 			st=states[st]S(*s++);
+			if (st == WPRE) {
+				switch (getint(&s)) {
+				case 8:  st = HHPRE; goto wpre;
+				case 16: st = HPRE; goto wpre;
+				case 32: st = BARE; goto wpre;
+#if UINTPTR_MAX >= UINT64_MAX
+				case 64: st = LPRE; goto wpre;
+#else
+				case 64: st = LLPRE; goto wpre;
+#endif
+				default: goto inval;
+				}
+			}
 		} while (st-1<STOP);
 		if (!st) goto inval;
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [musl] [C23 printf 3/3] C23: implement the wfN length modifiers for printf
  2023-05-26 19:41 [musl] [C23 printf 0/3] Jens Gustedt
  2023-05-26 19:41 ` [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers Jens Gustedt
  2023-05-26 19:41 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt
@ 2023-05-26 19:41 ` Jens Gustedt
  2023-05-26 20:33   ` Rich Felker
  2 siblings, 1 reply; 22+ messages in thread
From: Jens Gustedt @ 2023-05-26 19:41 UTC (permalink / raw)
  To: musl

Musl only has a difference between fixed-width and fastest-width
integer types for N == 16. And even here all architectures have made
the same choice, namely mapping to 32 bit types.
---
 src/stdio/vfprintf.c  | 9 ++++++++-
 src/stdio/vfwprintf.c | 9 ++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index 1a516663..9f02a594 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -529,9 +529,16 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			ps=st;
 			st=states[st]S(*s++);
 			if (st == WPRE) {
+				// See if "fast" is requested. Difference is only
+				// relevant for a fast type of minimum width 16.
+				int fast16 = HPRE;
+				if (*s == 'f') {
+					fast16 = BARE;
+					++s;
+				}
 				switch (getint(&s)) {
 				case 8:  st = HHPRE; goto wpre;
-				case 16: st = HPRE; goto wpre;
+				case 16: st = fast16; goto wpre;
 				case 32: st = BARE; goto wpre;
 #if UINTPTR_MAX >= UINT64_MAX
 				case 64: st = LPRE; goto wpre;
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index 4320761a..4e9ce63a 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -244,9 +244,16 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 			ps=st;
 			st=states[st]S(*s++);
 			if (st == WPRE) {
+				// See if "fast" is requested. Difference is only
+				// relevant for a fast type of minimum width 16.
+				int fast16 = HPRE;
+				if (*s == 'f') {
+					fast16 = BARE;
+					++s;
+				}
 				switch (getint(&s)) {
 				case 8:  st = HHPRE; goto wpre;
-				case 16: st = HPRE; goto wpre;
+				case 16: st = fast16; goto wpre;
 				case 32: st = BARE; goto wpre;
 #if UINTPTR_MAX >= UINT64_MAX
 				case 64: st = LPRE; goto wpre;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-26 19:41 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt
@ 2023-05-26 20:31   ` Rich Felker
  2023-05-26 20:51     ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 22+ messages in thread
From: Rich Felker @ 2023-05-26 20:31 UTC (permalink / raw)
  To: Jens Gustedt; +Cc: musl

On Fri, May 26, 2023 at 09:41:03PM +0200, Jens Gustedt wrote:
> These are mandatory for C23 and concern all types for which the
> platform has `int_leastN_t` and `uint_leastN_t`. For musl these types
> always coincide with `intN_t` and `uintN_t` and are always present for
> N equal 8, 16, 32 and 64.
> 
> They can be added for general use since all lowercase letters were
> previously reserved.
> 
> Nevertheless, users that use these modifiers will see a lot of
> warnings from compilers in the beginning. This is because the
> compilers have not yet integrated this form of a specifier into their
> correponding extensions (gcc attributes). So unfortunately also
> testing this feature may be a bit noisy for the moment.
> 
> The only architecture dependend choice is the type for N == 64, which
> may be `long` or `long long`. We just mimick the test that is done in
> other places to compare `UINTPTR_MAX` and `UINT64_MAX` to determine
> that.
> ---
>  src/stdio/vfprintf.c  | 18 ++++++++++++++++--
>  src/stdio/vfwprintf.c | 18 ++++++++++++++++--
>  2 files changed, 32 insertions(+), 4 deletions(-)
> 
> diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
> index cbc79783..1a516663 100644
> --- a/src/stdio/vfprintf.c
> +++ b/src/stdio/vfprintf.c
> @@ -33,7 +33,7 @@
>  
>  enum {
>  	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
> -	ZTPRE, JPRE,
> +	ZTPRE, JPRE, WPRE,
>  	STOP,
>  	PTR, INT, UINT, ULLONG,
>  	LONG, ULONG,
> @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = {
>  		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
>  		S('m') = NOARG,
>  		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
> -		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
> +		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE,
>  	}, { /* 1: l-prefixed */
>  		S('b') = ULONG, S('B') = ULONG,
>  		S('d') = LONG, S('i') = LONG,
> @@ -525,8 +525,22 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
>  		st=0;
>  		do {
>  			if (OOB(*s)) goto inval;
> +		wpre:
>  			ps=st;
>  			st=states[st]S(*s++);
> +			if (st == WPRE) {
> +				switch (getint(&s)) {
> +				case 8:  st = HHPRE; goto wpre;
> +				case 16: st = HPRE; goto wpre;
> +				case 32: st = BARE; goto wpre;
> +#if UINTPTR_MAX >= UINT64_MAX
> +				case 64: st = LPRE; goto wpre;
> +#else
> +				case 64: st = LLPRE; goto wpre;
> +#endif
> +				default: goto inval;
> +				}
> +			}
>  		} while (st-1<STOP);
>  		if (!st) goto inval;

I don't see how this works. While you're in this new WPRE state,
you're accesing an element of the states[] array with a potentially
out-of-bounds index, because you skipped over the bounds check to
ensure that the index is valid. I'm not clear why you're doing that
instead of just continuing the loop.

My preference would be not adding any code at all here and using the
existing state machine, adding state transitions for the new prefixes
to it, but that would require expanding the states stable to start at
'1' instead of 'A', and to have a couple more intermediate states. I'm
not sure how large that would get. There's a good chance it's
comparable to the size of any added code, though.

One minor thing with the implementation using getint(): it accepts
leading zeros, which are not valid here.

Rich

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 3/3] C23: implement the wfN length modifiers for printf
  2023-05-26 19:41 ` [musl] [C23 printf 3/3] C23: implement the wfN length modifiers " Jens Gustedt
@ 2023-05-26 20:33   ` Rich Felker
  2023-05-26 21:00     ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 22+ messages in thread
From: Rich Felker @ 2023-05-26 20:33 UTC (permalink / raw)
  To: Jens Gustedt; +Cc: musl

On Fri, May 26, 2023 at 09:41:04PM +0200, Jens Gustedt wrote:
> Musl only has a difference between fixed-width and fastest-width
> integer types for N == 16. And even here all architectures have made
> the same choice, namely mapping to 32 bit types.
> ---
>  src/stdio/vfprintf.c  | 9 ++++++++-
>  src/stdio/vfwprintf.c | 9 ++++++++-
>  2 files changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
> index 1a516663..9f02a594 100644
> --- a/src/stdio/vfprintf.c
> +++ b/src/stdio/vfprintf.c
> @@ -529,9 +529,16 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
>  			ps=st;
>  			st=states[st]S(*s++);
>  			if (st == WPRE) {
> +				// See if "fast" is requested. Difference is only
> +				// relevant for a fast type of minimum width 16.
> +				int fast16 = HPRE;
> +				if (*s == 'f') {
> +					fast16 = BARE;
> +					++s;
> +				}
>  				switch (getint(&s)) {
>  				case 8:  st = HHPRE; goto wpre;
> -				case 16: st = HPRE; goto wpre;
> +				case 16: st = fast16; goto wpre;
>  				case 32: st = BARE; goto wpre;
>  #if UINTPTR_MAX >= UINT64_MAX
>  				case 64: st = LPRE; goto wpre;
> diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
> index 4320761a..4e9ce63a 100644
> --- a/src/stdio/vfwprintf.c
> +++ b/src/stdio/vfwprintf.c
> @@ -244,9 +244,16 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
>  			ps=st;
>  			st=states[st]S(*s++);
>  			if (st == WPRE) {
> +				// See if "fast" is requested. Difference is only
> +				// relevant for a fast type of minimum width 16.
> +				int fast16 = HPRE;
> +				if (*s == 'f') {
> +					fast16 = BARE;
> +					++s;
> +				}
>  				switch (getint(&s)) {
>  				case 8:  st = HHPRE; goto wpre;
> -				case 16: st = HPRE; goto wpre;
> +				case 16: st = fast16; goto wpre;
>  				case 32: st = BARE; goto wpre;
>  #if UINTPTR_MAX >= UINT64_MAX
>  				case 64: st = LPRE; goto wpre;
> -- 
> 2.34.1

It may just work to have w8, w16, wf8, and wf16 all resolve to BARE
unconditionally, as the default promitions for variadic functions
force these all to be passed as int. In the current code, for h and hh
prefixes we cast down and discard any high bits, but unless the caller
invoked UB by passing an argument with mismatched type, the cast is
guaranteed not to change the value.

Rich

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-26 20:31   ` Rich Felker
@ 2023-05-26 20:51     ` Jₑₙₛ Gustedt
  2023-05-26 21:03       ` Rich Felker
  0 siblings, 1 reply; 22+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-05-26 20:51 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 4331 bytes --]

Rich,

on Fri, 26 May 2023 16:31:07 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Fri, May 26, 2023 at 09:41:03PM +0200, Jens Gustedt wrote:
> > These are mandatory for C23 and concern all types for which the
> > platform has `int_leastN_t` and `uint_leastN_t`. For musl these
> > types always coincide with `intN_t` and `uintN_t` and are always
> > present for N equal 8, 16, 32 and 64.
> > 
> > They can be added for general use since all lowercase letters were
> > previously reserved.
> > 
> > Nevertheless, users that use these modifiers will see a lot of
> > warnings from compilers in the beginning. This is because the
> > compilers have not yet integrated this form of a specifier into
> > their correponding extensions (gcc attributes). So unfortunately
> > also testing this feature may be a bit noisy for the moment.
> > 
> > The only architecture dependend choice is the type for N == 64,
> > which may be `long` or `long long`. We just mimick the test that is
> > done in other places to compare `UINTPTR_MAX` and `UINT64_MAX` to
> > determine that.
> > ---
> >  src/stdio/vfprintf.c  | 18 ++++++++++++++++--
> >  src/stdio/vfwprintf.c | 18 ++++++++++++++++--
> >  2 files changed, 32 insertions(+), 4 deletions(-)
> > 
> > diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
> > index cbc79783..1a516663 100644
> > --- a/src/stdio/vfprintf.c
> > +++ b/src/stdio/vfprintf.c
> > @@ -33,7 +33,7 @@
> >  
> >  enum {
> >  	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
> > -	ZTPRE, JPRE,
> > +	ZTPRE, JPRE, WPRE,
> >  	STOP,
> >  	PTR, INT, UINT, ULLONG,
> >  	LONG, ULONG,
> > @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = {
> >  		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n')
> > = PTR, S('m') = NOARG,
> >  		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
> > -		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
> > +		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
> > S('w') = WPRE, }, { /* 1: l-prefixed */
> >  		S('b') = ULONG, S('B') = ULONG,
> >  		S('d') = LONG, S('i') = LONG,
> > @@ -525,8 +525,22 @@ static int printf_core(FILE *f, const char
> > *fmt, va_list *ap, union arg *nl_arg, st=0;
> >  		do {
> >  			if (OOB(*s)) goto inval;
> > +		wpre:
> >  			ps=st;
> >  			st=states[st]S(*s++);
> > +			if (st == WPRE) {
> > +				switch (getint(&s)) {
> > +				case 8:  st = HHPRE; goto wpre;
> > +				case 16: st = HPRE; goto wpre;
> > +				case 32: st = BARE; goto wpre;
> > +#if UINTPTR_MAX >= UINT64_MAX
> > +				case 64: st = LPRE; goto wpre;
> > +#else
> > +				case 64: st = LLPRE; goto wpre;
> > +#endif
> > +				default: goto inval;
> > +				}
> > +			}
> >  		} while (st-1<STOP);
> >  		if (!st) goto inval;  
> 
> I don't see how this works. While you're in this new WPRE state,
> you're accesing an element of the states[] array with a potentially
> out-of-bounds index, because you skipped over the bounds check to
> ensure that the index is valid.

ah, ok, the `wpre` should probably move up a line.

> I'm not clear why you're doing that
> instead of just continuing the loop.
> 
> My preference would be not adding any code at all here and using the
> existing state machine, adding state transitions for the new prefixes
> to it, but that would require expanding the states stable to start at
> '1' instead of 'A', and to have a couple more intermediate states. I'm
> not sure how large that would get. There's a good chance it's
> comparable to the size of any added code, though.

I am not sure that I understand the alternative that you are proposing.

The difficulty that lead to this, is the "BARE" state which now
becomes accessible with a "w32" prefix. Then later, the "BARE" state
assumes (for the stop condition of the loop) that there had not been a
prefix, I think.

> One minor thing with the implementation using getint(): it accepts
> leading zeros, which are not valid here.

right, we should catch that.

Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 3/3] C23: implement the wfN length modifiers for printf
  2023-05-26 20:33   ` Rich Felker
@ 2023-05-26 21:00     ` Jₑₙₛ Gustedt
  0 siblings, 0 replies; 22+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-05-26 21:00 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 3099 bytes --]

Rich,

on Fri, 26 May 2023 16:33:59 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Fri, May 26, 2023 at 09:41:04PM +0200, Jens Gustedt wrote:
> > Musl only has a difference between fixed-width and fastest-width
> > integer types for N == 16. And even here all architectures have made
> > the same choice, namely mapping to 32 bit types.
> > ---
> >  src/stdio/vfprintf.c  | 9 ++++++++-
> >  src/stdio/vfwprintf.c | 9 ++++++++-
> >  2 files changed, 16 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
> > index 1a516663..9f02a594 100644
> > --- a/src/stdio/vfprintf.c
> > +++ b/src/stdio/vfprintf.c
> > @@ -529,9 +529,16 @@ static int printf_core(FILE *f, const char
> > *fmt, va_list *ap, union arg *nl_arg, ps=st;
> >  			st=states[st]S(*s++);
> >  			if (st == WPRE) {
> > +				// See if "fast" is requested.
> > Difference is only
> > +				// relevant for a fast type of
> > minimum width 16.
> > +				int fast16 = HPRE;
> > +				if (*s == 'f') {
> > +					fast16 = BARE;
> > +					++s;
> > +				}
> >  				switch (getint(&s)) {
> >  				case 8:  st = HHPRE; goto wpre;
> > -				case 16: st = HPRE; goto wpre;
> > +				case 16: st = fast16; goto wpre;
> >  				case 32: st = BARE; goto wpre;
> >  #if UINTPTR_MAX >= UINT64_MAX
> >  				case 64: st = LPRE; goto wpre;
> > diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
> > index 4320761a..4e9ce63a 100644
> > --- a/src/stdio/vfwprintf.c
> > +++ b/src/stdio/vfwprintf.c
> > @@ -244,9 +244,16 @@ static int wprintf_core(FILE *f, const wchar_t
> > *fmt, va_list *ap, union arg *nl_ ps=st;
> >  			st=states[st]S(*s++);
> >  			if (st == WPRE) {
> > +				// See if "fast" is requested.
> > Difference is only
> > +				// relevant for a fast type of
> > minimum width 16.
> > +				int fast16 = HPRE;
> > +				if (*s == 'f') {
> > +					fast16 = BARE;
> > +					++s;
> > +				}
> >  				switch (getint(&s)) {
> >  				case 8:  st = HHPRE; goto wpre;
> > -				case 16: st = HPRE; goto wpre;
> > +				case 16: st = fast16; goto wpre;
> >  				case 32: st = BARE; goto wpre;
> >  #if UINTPTR_MAX >= UINT64_MAX
> >  				case 64: st = LPRE; goto wpre;
> > -- 
> > 2.34.1  
> 
> It may just work to have w8, w16, wf8, and wf16 all resolve to BARE
> unconditionally, as the default promitions for variadic functions
> force these all to be passed as int. In the current code, for h and hh
> prefixes we cast down and discard any high bits, but unless the caller
> invoked UB by passing an argument with mismatched type, the cast is
> guaranteed not to change the value.

This might be a nice simplification, I'd have to think of this. At a
first glance this sounds a bit user unfriendly to me.

Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-26 20:51     ` Jₑₙₛ Gustedt
@ 2023-05-26 21:03       ` Rich Felker
  2023-05-29  7:14         ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 22+ messages in thread
From: Rich Felker @ 2023-05-26 21:03 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Fri, May 26, 2023 at 10:51:19PM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Fri, 26 May 2023 16:31:07 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > On Fri, May 26, 2023 at 09:41:03PM +0200, Jens Gustedt wrote:
> > > These are mandatory for C23 and concern all types for which the
> > > platform has `int_leastN_t` and `uint_leastN_t`. For musl these
> > > types always coincide with `intN_t` and `uintN_t` and are always
> > > present for N equal 8, 16, 32 and 64.
> > > 
> > > They can be added for general use since all lowercase letters were
> > > previously reserved.
> > > 
> > > Nevertheless, users that use these modifiers will see a lot of
> > > warnings from compilers in the beginning. This is because the
> > > compilers have not yet integrated this form of a specifier into
> > > their correponding extensions (gcc attributes). So unfortunately
> > > also testing this feature may be a bit noisy for the moment.
> > > 
> > > The only architecture dependend choice is the type for N == 64,
> > > which may be `long` or `long long`. We just mimick the test that is
> > > done in other places to compare `UINTPTR_MAX` and `UINT64_MAX` to
> > > determine that.
> > > ---
> > >  src/stdio/vfprintf.c  | 18 ++++++++++++++++--
> > >  src/stdio/vfwprintf.c | 18 ++++++++++++++++--
> > >  2 files changed, 32 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
> > > index cbc79783..1a516663 100644
> > > --- a/src/stdio/vfprintf.c
> > > +++ b/src/stdio/vfprintf.c
> > > @@ -33,7 +33,7 @@
> > >  
> > >  enum {
> > >  	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
> > > -	ZTPRE, JPRE,
> > > +	ZTPRE, JPRE, WPRE,
> > >  	STOP,
> > >  	PTR, INT, UINT, ULLONG,
> > >  	LONG, ULONG,
> > > @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = {
> > >  		S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n')
> > > = PTR, S('m') = NOARG,
> > >  		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
> > > -		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
> > > +		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
> > > S('w') = WPRE, }, { /* 1: l-prefixed */
> > >  		S('b') = ULONG, S('B') = ULONG,
> > >  		S('d') = LONG, S('i') = LONG,
> > > @@ -525,8 +525,22 @@ static int printf_core(FILE *f, const char
> > > *fmt, va_list *ap, union arg *nl_arg, st=0;
> > >  		do {
> > >  			if (OOB(*s)) goto inval;
> > > +		wpre:
> > >  			ps=st;
> > >  			st=states[st]S(*s++);
> > > +			if (st == WPRE) {
> > > +				switch (getint(&s)) {
> > > +				case 8:  st = HHPRE; goto wpre;
> > > +				case 16: st = HPRE; goto wpre;
> > > +				case 32: st = BARE; goto wpre;
> > > +#if UINTPTR_MAX >= UINT64_MAX
> > > +				case 64: st = LPRE; goto wpre;
> > > +#else
> > > +				case 64: st = LLPRE; goto wpre;
> > > +#endif
> > > +				default: goto inval;
> > > +				}
> > > +			}
> > >  		} while (st-1<STOP);
> > >  		if (!st) goto inval;  
> > 
> > I don't see how this works. While you're in this new WPRE state,
> > you're accesing an element of the states[] array with a potentially
> > out-of-bounds index, because you skipped over the bounds check to
> > ensure that the index is valid.
> 
> ah, ok, the `wpre` should probably move up a line.

OK, I was thinking you just want a break, but you're trying to avoid
a transition to BARE state getting treated as invalid by the loop
condition. However, the underlying problem here is that you can't
reuse the BARE state for something that's not BARE. If you do, formats
like %w32w32w32w32w32w32d or %w32lld or even %w32f will be allowed.

I think you need an extra state that's "plain but not bare" that
duplicates only the integer transitions out of it, like the l, ll,
etc. prefix states do.

> > I'm not clear why you're doing that
> > instead of just continuing the loop.
> > 
> > My preference would be not adding any code at all here and using the
> > existing state machine, adding state transitions for the new prefixes
> > to it, but that would require expanding the states stable to start at
> > '1' instead of 'A', and to have a couple more intermediate states. I'm
> > not sure how large that would get. There's a good chance it's
> > comparable to the size of any added code, though.
> 
> I am not sure that I understand the alternative that you are proposing.
> 
> The difficulty that lead to this, is the "BARE" state which now
> becomes accessible with a "w32" prefix. Then later, the "BARE" state
> assumes (for the stop condition of the loop) that there had not been a
> prefix, I think.

Yes, but I don't think you've solved that. It should be solvable as
above.

Rich

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-26 21:03       ` Rich Felker
@ 2023-05-29  7:14         ` Jₑₙₛ Gustedt
  2023-05-29 15:46           ` Rich Felker
  0 siblings, 1 reply; 22+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-05-29  7:14 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 962 bytes --]

Rich,

on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> I think you need an extra state that's "plain but not bare" that
> duplicates only the integer transitions out of it, like the l, ll,
> etc. prefix states do.

Hm, the problem is that for the other prefixes the table entries then
encode the concrete type that is to be expected. We could not do this
here because the type depends on the requested width. So we would then
need to "repair" that type after the loop. A `switch` to do that would
look substantially similar to what is there, now. Do you think that
would be better?

Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-29  7:14         ` Jₑₙₛ Gustedt
@ 2023-05-29 15:46           ` Rich Felker
  2023-05-29 19:21             ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 22+ messages in thread
From: Rich Felker @ 2023-05-29 15:46 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 856 bytes --]

On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > I think you need an extra state that's "plain but not bare" that
> > duplicates only the integer transitions out of it, like the l, ll,
> > etc. prefix states do.
> 
> Hm, the problem is that for the other prefixes the table entries then
> encode the concrete type that is to be expected. We could not do this
> here because the type depends on the requested width. So we would then
> need to "repair" that type after the loop. A `switch` to do that would
> look substantially similar to what is there, now. Do you think that
> would be better?

OK I think I can communicate better with code than natural language
text, so here's a diff, completely untested, of what I had in mind.

Rich

[-- Attachment #2: printf-wprefix.diff --]
[-- Type: text/plain, Size: 1690 bytes --]

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index a712d80f..a751fbdf 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -33,7 +33,7 @@
 
 enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
-	ZTPRE, JPRE,
+	ZTPRE, JPRE, PLAIN, WPRE, W1PRE, W3PRE, W6PRE,
 	STOP,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
@@ -44,9 +44,10 @@ enum {
 	MAXSTATE
 };
 
-#define S(x) [(x)-'A']
+#define ST_BASE '1'
+#define S(x) [(x)-ST_BASE]
 
-static const unsigned char states[]['z'-'A'+1] = {
+static const unsigned char states[]['z'-ST_BASE+1] = {
 	{ /* 0: bare types */
 		S('d') = INT, S('i') = INT,
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
@@ -94,10 +95,24 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
 		S('n') = PTR,
+	}, { /* 8: explicit-width-prefixed bare equivalent */
+		S('d') = INT, S('i') = INT,
+		S('o') = UINT, S('u') = UINT,
+		S('x') = UINT, S('X') = UINT,
+		S('n') = PTR,
+	}, { /* 9: w-prefixed */
+		S('1') = W1PRE, S('3') = W3PRE,
+		S('6') = W6PRE, S('8') = HHPRE,
+	}, { /* 10: w1-prefixed */
+		S('6') = HPRE,
+	}, { /* 11: w3-prefixed */
+		S('2') = PLAIN,
+	}, { /* 12: w6-prefixed */
+		S('4') = LLONG_MAX > LONG_MAX ? LLPRE : LPRE,
 	}
 };
 
-#define OOB(x) ((unsigned)(x)-'A' > 'z'-'A')
+#define OOB(x) ((unsigned)(x)-ST_BASE > 'z'-ST_BASE)
 
 union arg
 {
@@ -547,6 +562,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		switch(t) {
 		case 'n':
 			switch(ps) {
+			case PLAIN:
 			case BARE: *(int *)arg.p = cnt; break;
 			case LPRE: *(long *)arg.p = cnt; break;
 			case LLPRE: *(long long *)arg.p = cnt; break;

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-29 15:46           ` Rich Felker
@ 2023-05-29 19:21             ` Jₑₙₛ Gustedt
  2023-05-30  1:48               ` Rich Felker
  0 siblings, 1 reply; 22+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-05-29 19:21 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 1692 bytes --]

Rich,

on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:
> > Rich,
> > 
> > on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker
> > <dalias@libc.org>) wrote:
> >   
> > > I think you need an extra state that's "plain but not bare" that
> > > duplicates only the integer transitions out of it, like the l, ll,
> > > etc. prefix states do.  
> > 
> > Hm, the problem is that for the other prefixes the table entries
> > then encode the concrete type that is to be expected. We could not
> > do this here because the type depends on the requested width. So we
> > would then need to "repair" that type after the loop. A `switch` to
> > do that would look substantially similar to what is there, now. Do
> > you think that would be better?  
> 
> OK I think I can communicate better with code than natural language
> text, so here's a diff, completely untested, of what I had in mind.

that's ... ugh ... not so prety, I think

In my current version I track the desired width, if there is w
specifier, and then do the adjustments after the loop. That takes
indeed care of undefined character sequences.

I find that much better readable, and also easier to extend (later
there comes the `wf` case and the `128`, and perhaps some day `256`)

Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-29 19:21             ` Jₑₙₛ Gustedt
@ 2023-05-30  1:48               ` Rich Felker
  2023-05-30  6:46                 ` Jₑₙₛ Gustedt
  2023-06-02 14:29                 ` Rich Felker
  0 siblings, 2 replies; 22+ messages in thread
From: Rich Felker @ 2023-05-30  1:48 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 2279 bytes --]

On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:
> > > Rich,
> > > 
> > > on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker
> > > <dalias@libc.org>) wrote:
> > >   
> > > > I think you need an extra state that's "plain but not bare" that
> > > > duplicates only the integer transitions out of it, like the l, ll,
> > > > etc. prefix states do.  
> > > 
> > > Hm, the problem is that for the other prefixes the table entries
> > > then encode the concrete type that is to be expected. We could not
> > > do this here because the type depends on the requested width. So we
> > > would then need to "repair" that type after the loop. A `switch` to
> > > do that would look substantially similar to what is there, now. Do
> > > you think that would be better?  
> > 
> > OK I think I can communicate better with code than natural language
> > text, so here's a diff, completely untested, of what I had in mind.
> 
> that's ... ugh ... not so prety, I think
> 
> In my current version I track the desired width, if there is w
> specifier, and then do the adjustments after the loop. That takes
> indeed care of undefined character sequences.
> 
> I find that much better readable, and also easier to extend (later
> there comes the `wf` case and the `128`, and perhaps some day `256`)

It sounds like the core issue is that you don't like the state machine
approach to how musl's printf processes format specifiers. Personally,
I like it because there's an obvious structured way to validate that
it's accepting exactly the right things and nothing else, vs an
approach like what you tried where you ended up accepting a lot of
bogus specifiers.

One alternative I would consider is doing something like what you did,
but moving it outside of/before the state machine loop, so it's not
mixing the w* processing with the state machine. This avoids accepting
bogus repeated w32 prefixes and similar (because there is no loop) and
lets you get by with just adding one PLAIN state to have it start in
(rather than BARE) after w32. I expect the overall size would be
similar. Concept attached.

Rich

[-- Attachment #2: printf-wprefix-alt.diff --]
[-- Type: text/plain, Size: 1829 bytes --]

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index a712d80f..a9e4d638 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -33,7 +33,7 @@
 
 enum {
 	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
-	ZTPRE, JPRE,
+	ZTPRE, JPRE, PLAIN,
 	STOP,
 	PTR, INT, UINT, ULLONG,
 	LONG, ULONG,
@@ -44,9 +44,10 @@ enum {
 	MAXSTATE
 };
 
-#define S(x) [(x)-'A']
+#define ST_BASE 'A'
+#define S(x) [(x)-ST_BASE]
 
-static const unsigned char states[]['z'-'A'+1] = {
+static const unsigned char states[]['z'-ST_BASE+1] = {
 	{ /* 0: bare types */
 		S('d') = INT, S('i') = INT,
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
@@ -94,10 +95,15 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
 		S('n') = PTR,
+	}, { /* 8: explicit-width-prefixed bare equivalent */
+		S('d') = INT, S('i') = INT,
+		S('o') = UINT, S('u') = UINT,
+		S('x') = UINT, S('X') = UINT,
+		S('n') = PTR,
 	}
 };
 
-#define OOB(x) ((unsigned)(x)-'A' > 'z'-'A')
+#define OOB(x) ((unsigned)(x)-ST_BASE > 'z'-ST_BASE)
 
 union arg
 {
@@ -510,6 +516,13 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 
 		/* Format specifier state machine */
 		st=0;
+		if (*s=='w' && s[1]-'1'<9u) switch (getint(&s)) {
+		case 8:  st = HHPRE; break;
+		case 16: st = HPRE; break;
+		case 32: st = PLAIN; break;
+		case 64: st = LLONG_MAX > LONG_MAX ? LLPRE : LPRE; break;
+		default: goto inval;
+		}
 		do {
 			if (OOB(*s)) goto inval;
 			ps=st;
@@ -547,6 +560,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		switch(t) {
 		case 'n':
 			switch(ps) {
+			case PLAIN:
 			case BARE: *(int *)arg.p = cnt; break;
 			case LPRE: *(long *)arg.p = cnt; break;
 			case LLPRE: *(long long *)arg.p = cnt; break;

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-30  1:48               ` Rich Felker
@ 2023-05-30  6:46                 ` Jₑₙₛ Gustedt
  2023-05-30 17:28                   ` Rich Felker
  2023-06-02 14:29                 ` Rich Felker
  1 sibling, 1 reply; 22+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-05-30  6:46 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 2567 bytes --]

Rich,

on Mon, 29 May 2023 21:48:22 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote:
> > Rich,
> > 
> > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker
> > <dalias@libc.org>) wrote:
> >   
> > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:  
>  [...]  
>  [...]  
>  [...]  
> > > 
> > > OK I think I can communicate better with code than natural
> > > language text, so here's a diff, completely untested, of what I
> > > had in mind.  
> > 
> > that's ... ugh ... not so prety, I think
> > 
> > In my current version I track the desired width, if there is w
> > specifier, and then do the adjustments after the loop. That takes
> > indeed care of undefined character sequences.
> > 
> > I find that much better readable, and also easier to extend (later
> > there comes the `wf` case and the `128`, and perhaps some day
> > `256`)  
> 
> It sounds like the core issue is that you don't like the state machine
> approach to how musl's printf processes format specifiers.

It is well suited for simple grammars, I agree with that, but here the
grammar is becomming more complex. Be it just for the fact that you'd
have to enlargen the set of possible values to match decimal digits.

> Personally,
> I like it because there's an obvious structured way to validate that
> it's accepting exactly the right things and nothing else, vs an
> approach like what you tried where you ended up accepting a lot of
> bogus specifiers.
> 
> One alternative I would consider is doing something like what you did,
> but moving it outside of/before the state machine loop, so it's not
> mixing the w* processing with the state machine. This avoids accepting
> bogus repeated w32 prefixes and similar (because there is no loop) and
> lets you get by with just adding one PLAIN state to have it start in
> (rather than BARE) after w32. I expect the overall size would be
> similar. Concept attached.

I'll post what I have in a minute. It has the advantage over yours
that it doesn't do the switch on the width inside the automaton and
also that it doesn't have to increase the rows of the matrix.


Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-30  6:46                 ` Jₑₙₛ Gustedt
@ 2023-05-30 17:28                   ` Rich Felker
  2023-05-30 18:00                     ` Rich Felker
  2023-05-31  7:59                     ` Jₑₙₛ Gustedt
  0 siblings, 2 replies; 22+ messages in thread
From: Rich Felker @ 2023-05-30 17:28 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Tue, May 30, 2023 at 08:46:36AM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Mon, 29 May 2023 21:48:22 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote:
> > > Rich,
> > > 
> > > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker
> > > <dalias@libc.org>) wrote:
> > >   
> > > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:  
> >  [...]  
> >  [...]  
> >  [...]  
> > > > 
> > > > OK I think I can communicate better with code than natural
> > > > language text, so here's a diff, completely untested, of what I
> > > > had in mind.  
> > > 
> > > that's ... ugh ... not so prety, I think
> > > 
> > > In my current version I track the desired width, if there is w
> > > specifier, and then do the adjustments after the loop. That takes
> > > indeed care of undefined character sequences.
> > > 
> > > I find that much better readable, and also easier to extend (later
> > > there comes the `wf` case and the `128`, and perhaps some day
> > > `256`)  
> > 
> > It sounds like the core issue is that you don't like the state machine
> > approach to how musl's printf processes format specifiers.
> 
> It is well suited for simple grammars, I agree with that, but here the
> grammar is becomming more complex. Be it just for the fact that you'd
> have to enlargen the set of possible values to match decimal digits.

I don't think it's really any more complex. It's just a few gratuitous
aliases that have a very small number of edge paths. The wf ones
almost entirely collapse with the w ones, and if we wanted to get rid
of the gratuitous separate hh/h loading, they'd entirely collapse. But
the version I posted as code is probably enough smaller to be
perferable. I guess I should take a look at that and see...

> > Personally,
> > I like it because there's an obvious structured way to validate that
> > it's accepting exactly the right things and nothing else, vs an
> > approach like what you tried where you ended up accepting a lot of
> > bogus specifiers.
> > 
> > One alternative I would consider is doing something like what you did,
> > but moving it outside of/before the state machine loop, so it's not
> > mixing the w* processing with the state machine. This avoids accepting
> > bogus repeated w32 prefixes and similar (because there is no loop) and
> > lets you get by with just adding one PLAIN state to have it start in
> > (rather than BARE) after w32. I expect the overall size would be
> > similar. Concept attached.
> 
> I'll post what I have in a minute. It has the advantage over yours
> that it doesn't do the switch on the width inside the automaton and
> also that it doesn't have to increase the rows of the matrix.

I don't understand how that's supposed to be an improvement. It
duplicates the state machine logic for the final types with a bunch of
conditionals in code (mainly to handle %n vs integer specifiers)
rather than letting the state machine spit out the right type as its
final state naturally.

Mine didn't have any switch inside the automaton. It did it before the
automaton processing (since the 'w' can only appear at the beginning,
it can be thought of as just setting the initial state).

Rich

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-30 17:28                   ` Rich Felker
@ 2023-05-30 18:00                     ` Rich Felker
  2023-05-31  7:59                     ` Jₑₙₛ Gustedt
  1 sibling, 0 replies; 22+ messages in thread
From: Rich Felker @ 2023-05-30 18:00 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Tue, May 30, 2023 at 01:28:33PM -0400, Rich Felker wrote:
> On Tue, May 30, 2023 at 08:46:36AM +0200, Jₑₙₛ Gustedt wrote:
> > Rich,
> > 
> > on Mon, 29 May 2023 21:48:22 -0400 you (Rich Felker <dalias@libc.org>)
> > wrote:
> > 
> > > On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote:
> > > > Rich,
> > > > 
> > > > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker
> > > > <dalias@libc.org>) wrote:
> > > >   
> > > > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:  
> > >  [...]  
> > >  [...]  
> > >  [...]  
> > > > > 
> > > > > OK I think I can communicate better with code than natural
> > > > > language text, so here's a diff, completely untested, of what I
> > > > > had in mind.  
> > > > 
> > > > that's ... ugh ... not so prety, I think
> > > > 
> > > > In my current version I track the desired width, if there is w
> > > > specifier, and then do the adjustments after the loop. That takes
> > > > indeed care of undefined character sequences.
> > > > 
> > > > I find that much better readable, and also easier to extend (later
> > > > there comes the `wf` case and the `128`, and perhaps some day
> > > > `256`)  
> > > 
> > > It sounds like the core issue is that you don't like the state machine
> > > approach to how musl's printf processes format specifiers.
> > 
> > It is well suited for simple grammars, I agree with that, but here the
> > grammar is becomming more complex. Be it just for the fact that you'd
> > have to enlargen the set of possible values to match decimal digits.
> 
> I don't think it's really any more complex. It's just a few gratuitous
> aliases that have a very small number of edge paths. The wf ones
> almost entirely collapse with the w ones, and if we wanted to get rid
> of the gratuitous separate hh/h loading, they'd entirely collapse. But
> the version I posted as code is probably enough smaller to be
> perferable. I guess I should take a look at that and see...

Ah, now I remember why we handle h/hh despite them seeing useless.
They're needed for %n, and once you distinguish them for that, there's
hardly any point in trying to treat them the same as bare elsewhere.

Rich

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-30 17:28                   ` Rich Felker
  2023-05-30 18:00                     ` Rich Felker
@ 2023-05-31  7:59                     ` Jₑₙₛ Gustedt
  1 sibling, 0 replies; 22+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-05-31  7:59 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 1736 bytes --]

Rich,

on Tue, 30 May 2023 13:28:33 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Tue, May 30, 2023 at 08:46:36AM +0200, Jₑₙₛ Gustedt wrote:
> > Rich,
> > 
> > on Mon, 29 May 2023 21:48:22 -0400 you (Rich Felker
> > <dalias@libc.org>) wrote:
> >   
> > > On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote:  
>  [...]  
>  [...]  
> > >  [...]  
> > >  [...]  
> > >  [...]    
>  [...]  
>  [...]  
> > > 
> > > It sounds like the core issue is that you don't like the state
> > > machine approach to how musl's printf processes format
> > > specifiers.  
> > 
> > It is well suited for simple grammars, I agree with that, but here
> > the grammar is becomming more complex. Be it just for the fact that
> > you'd have to enlargen the set of possible values to match decimal
> > digits.  
> 
> I don't think it's really any more complex. …

It seems that we don't converge for this one. So it would probably
better if you do a complete patch for this that satisfies you
personally.

I need the complete functionality to build on my 128 bit patches and
to have a complete C23 system for my tests. So I will keep my variant
in my series for the moment, and rebase on top of yours once that is
ready and tested.

(Same for `memset` by the way or any other patch that you guys think
can be implemented better than I have foreseen.)

Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-05-30  1:48               ` Rich Felker
  2023-05-30  6:46                 ` Jₑₙₛ Gustedt
@ 2023-06-02 14:29                 ` Rich Felker
  2023-06-02 14:55                   ` Jₑₙₛ Gustedt
  1 sibling, 1 reply; 22+ messages in thread
From: Rich Felker @ 2023-06-02 14:29 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Mon, May 29, 2023 at 09:48:22PM -0400, Rich Felker wrote:
> On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote:
> > Rich,
> > 
> > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker <dalias@libc.org>)
> > wrote:
> > 
> > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote:
> > > > Rich,
> > > > 
> > > > on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker
> > > > <dalias@libc.org>) wrote:
> > > >   
> > > > > I think you need an extra state that's "plain but not bare" that
> > > > > duplicates only the integer transitions out of it, like the l, ll,
> > > > > etc. prefix states do.  
> > > > 
> > > > Hm, the problem is that for the other prefixes the table entries
> > > > then encode the concrete type that is to be expected. We could not
> > > > do this here because the type depends on the requested width. So we
> > > > would then need to "repair" that type after the loop. A `switch` to
> > > > do that would look substantially similar to what is there, now. Do
> > > > you think that would be better?  
> > > 
> > > OK I think I can communicate better with code than natural language
> > > text, so here's a diff, completely untested, of what I had in mind.
> > 
> > that's ... ugh ... not so prety, I think
> > 
> > In my current version I track the desired width, if there is w
> > specifier, and then do the adjustments after the loop. That takes
> > indeed care of undefined character sequences.
> > 
> > I find that much better readable, and also easier to extend (later
> > there comes the `wf` case and the `128`, and perhaps some day `256`)
> 
> It sounds like the core issue is that you don't like the state machine
> approach to how musl's printf processes format specifiers. Personally,
> I like it because there's an obvious structured way to validate that
> it's accepting exactly the right things and nothing else, vs an
> approach like what you tried where you ended up accepting a lot of
> bogus specifiers.
> 
> One alternative I would consider is doing something like what you did,
> but moving it outside of/before the state machine loop, so it's not
> mixing the w* processing with the state machine. This avoids accepting
> bogus repeated w32 prefixes and similar (because there is no loop) and
> lets you get by with just adding one PLAIN state to have it start in
> (rather than BARE) after w32. I expect the overall size would be
> similar. Concept attached.
> 
> Rich

> diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
> index a712d80f..a9e4d638 100644
> --- a/src/stdio/vfprintf.c
> +++ b/src/stdio/vfprintf.c
> @@ -33,7 +33,7 @@
>  
>  enum {
>  	BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE,
> -	ZTPRE, JPRE,
> +	ZTPRE, JPRE, PLAIN,
>  	STOP,
>  	PTR, INT, UINT, ULLONG,
>  	LONG, ULONG,
> @@ -44,9 +44,10 @@ enum {
>  	MAXSTATE
>  };
>  
> -#define S(x) [(x)-'A']
> +#define ST_BASE 'A'
> +#define S(x) [(x)-ST_BASE]
>  
> -static const unsigned char states[]['z'-'A'+1] = {
> +static const unsigned char states[]['z'-ST_BASE+1] = {
>  	{ /* 0: bare types */
>  		S('d') = INT, S('i') = INT,
>  		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
> @@ -94,10 +95,15 @@ static const unsigned char states[]['z'-'A'+1] = {
>  		S('o') = UMAX, S('u') = UMAX,
>  		S('x') = UMAX, S('X') = UMAX,
>  		S('n') = PTR,
> +	}, { /* 8: explicit-width-prefixed bare equivalent */
> +		S('d') = INT, S('i') = INT,
> +		S('o') = UINT, S('u') = UINT,
> +		S('x') = UINT, S('X') = UINT,
> +		S('n') = PTR,
>  	}
>  };
>  
> -#define OOB(x) ((unsigned)(x)-'A' > 'z'-'A')
> +#define OOB(x) ((unsigned)(x)-ST_BASE > 'z'-ST_BASE)
>  
>  union arg
>  {
> @@ -510,6 +516,13 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
>  
>  		/* Format specifier state machine */
>  		st=0;
> +		if (*s=='w' && s[1]-'1'<9u) switch (getint(&s)) {
> +		case 8:  st = HHPRE; break;
> +		case 16: st = HPRE; break;
> +		case 32: st = PLAIN; break;
> +		case 64: st = LLONG_MAX > LONG_MAX ? LLPRE : LPRE; break;
> +		default: goto inval;
> +		}
>  		do {
>  			if (OOB(*s)) goto inval;
>  			ps=st;
> @@ -547,6 +560,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
>  		switch(t) {
>  		case 'n':
>  			switch(ps) {
> +			case PLAIN:
>  			case BARE: *(int *)arg.p = cnt; break;
>  			case LPRE: *(long *)arg.p = cnt; break;
>  			case LLPRE: *(long long *)arg.p = cnt; break;

OK, some numbers. This alt version (with a couple bug fixes and
support for wf added), vs the full state machine version. On i386,
using this instead of the full state machine (which is still missing
'wf') adds 246 bytes of .text but saves 440 bytes of .rodata. With wf
added, it will be even more of a win. So I think it makes more sense
to go with this version.

As an aside, since think the state table is the same for wide printf
as for normal, and since wide printf already depends on normal printf,
we could make the states table external (but hidden, of course) and
let wide printf reuse it, for some decent size savings and elimination
of source-level redundancy (DRY).

Rich

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-06-02 14:29                 ` Rich Felker
@ 2023-06-02 14:55                   ` Jₑₙₛ Gustedt
  2023-06-02 15:07                     ` Rich Felker
  0 siblings, 1 reply; 22+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-06-02 14:55 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 1500 bytes --]

Rich,

on Fri, 2 Jun 2023 10:29:37 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> ...

> OK, some numbers. This alt version (with a couple bug fixes and
> support for wf added), vs the full state machine version. On i386,
> using this instead of the full state machine (which is still missing
> 'wf') adds 246 bytes of .text but saves 440 bytes of .rodata. With wf
> added, it will be even more of a win. So I think it makes more sense
> to go with this version.

Ok, sounds promissing. Some remarks

 - I'd still prefer to name the constant `WPRE` instead of `PLAIN` for
   documentation reasons.

 - It seems that your version doesn't capture the leading 0 case. That
   would still need an `if` condition that leads to `invalid`, I
   guess.

> As an aside, since think the state table is the same for wide printf
> as for normal, and since wide printf already depends on normal printf,
> we could make the states table external (but hidden, of course) and
> let wide printf reuse it, for some decent size savings and elimination
> of source-level redundancy (DRY).

Same holds for the `pop_arg` function, which is also repeated.

Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf
  2023-06-02 14:55                   ` Jₑₙₛ Gustedt
@ 2023-06-02 15:07                     ` Rich Felker
  0 siblings, 0 replies; 22+ messages in thread
From: Rich Felker @ 2023-06-02 15:07 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Fri, Jun 02, 2023 at 04:55:47PM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Fri, 2 Jun 2023 10:29:37 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > ...
> 
> > OK, some numbers. This alt version (with a couple bug fixes and
> > support for wf added), vs the full state machine version. On i386,
> > using this instead of the full state machine (which is still missing
> > 'wf') adds 246 bytes of .text but saves 440 bytes of .rodata. With wf
> > added, it will be even more of a win. So I think it makes more sense
> > to go with this version.
> 
> Ok, sounds promissing. Some remarks
> 
>  - I'd still prefer to name the constant `WPRE` instead of `PLAIN` for
>    documentation reasons.

Well it's not used for all w prefixes, only for w32/wf32/wf16. It's
really a state for "we already encountered some explicit prefix that
means plain int". I could call it W32PRE or something but that seems
misleading or at least incomplete too...?

>  - It seems that your version doesn't capture the leading 0 case. That
>    would still need an `if` condition that leads to `invalid`, I
>    guess.

It only enters the processing

	if (*s=='w' && s[1]-'1'<9u)

which is false for w0...

The subsequent automaton then rejects the unknown 'w'.

In the fixed up version with wf support, I separated the 2 conditions
(and fixed the failure to advance s before passing it to getint), so
it now just has inside the 'w' processing block:

	if (*++s-'1'>=9u) goto inval;

I'll send the full v2 shortly.

> > As an aside, since think the state table is the same for wide printf
> > as for normal, and since wide printf already depends on normal printf,
> > we could make the states table external (but hidden, of course) and
> > let wide printf reuse it, for some decent size savings and elimination
> > of source-level redundancy (DRY).
> 
> Same holds for the `pop_arg` function, which is also repeated.

Nice find, thanks! Deduplicating this requires also moving the union
type to a place it can be shared, but this is reasonable I think and
gives a place to declare the functions too.

Rich

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers
  2023-05-31 14:04 [musl] [C23 printf 0/3] to be replaced Jens Gustedt
@ 2023-05-31 14:04 ` Jens Gustedt
  0 siblings, 0 replies; 22+ messages in thread
From: Jens Gustedt @ 2023-05-31 14:04 UTC (permalink / raw)
  To: musl

The b specifier is mandatory for C23. It has been reserved previously,
so we may safely add it, even for older compilation modes.

The B specifier is optional, but recommended for those implementations
that didn't have it reserved previously for other purposes.

The PRIbXXX and PRIBXXX macros are mandatory if the specifiers are
supported and may serve as feature test macros for users.
---
 include/inttypes.h    | 34 ++++++++++++++++++++++++++++++++++
 src/stdio/vfprintf.c  | 19 ++++++++++++++++++-
 src/stdio/vfwprintf.c | 11 +++++++++--
 3 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/include/inttypes.h b/include/inttypes.h
index 61dcb727..73a42e32 100644
--- a/include/inttypes.h
+++ b/include/inttypes.h
@@ -120,12 +120,44 @@ uintmax_t wcstoumax(const wchar_t *__restrict, wchar_t **__restrict, int);
 #define PRIXFAST32 "X"
 #define PRIXFAST64 __PRI64 "X"
 
+#define PRIb8  "b"
+#define PRIb16 "b"
+#define PRIb32 "b"
+#define PRIb64 __PRI64 "b"
+
+#define PRIbLEAST8  "b"
+#define PRIbLEAST16 "b"
+#define PRIbLEAST32 "b"
+#define PRIbLEAST64 __PRI64 "b"
+
+#define PRIbFAST8  "b"
+#define PRIbFAST16 "b"
+#define PRIbFAST32 "b"
+#define PRIbFAST64 __PRI64 "b"
+
+#define PRIB8  "B"
+#define PRIB16 "B"
+#define PRIB32 "B"
+#define PRIB64 __PRI64 "B"
+
+#define PRIBLEAST8  "B"
+#define PRIBLEAST16 "B"
+#define PRIBLEAST32 "B"
+#define PRIBLEAST64 __PRI64 "B"
+
+#define PRIBFAST8  "B"
+#define PRIBFAST16 "B"
+#define PRIBFAST32 "B"
+#define PRIBFAST64 __PRI64 "B"
+
 #define PRIdMAX __PRI64 "d"
 #define PRIiMAX __PRI64 "i"
 #define PRIoMAX __PRI64 "o"
 #define PRIuMAX __PRI64 "u"
 #define PRIxMAX __PRI64 "x"
 #define PRIXMAX __PRI64 "X"
+#define PRIbMAX __PRI64 "b"
+#define PRIBMAX __PRI64 "B"
 
 #define PRIdPTR __PRIPTR "d"
 #define PRIiPTR __PRIPTR "i"
@@ -133,6 +165,8 @@ uintmax_t wcstoumax(const wchar_t *__restrict, wchar_t **__restrict, int);
 #define PRIuPTR __PRIPTR "u"
 #define PRIxPTR __PRIPTR "x"
 #define PRIXPTR __PRIPTR "X"
+#define PRIbPTR __PRIPTR "b"
+#define PRIBPTR __PRIPTR "B"
 
 #define SCNd8   "hhd"
 #define SCNd16  "hd"
diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index a712d80f..cbc79783 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -48,6 +48,7 @@ enum {
 
 static const unsigned char states[]['z'-'A'+1] = {
 	{ /* 0: bare types */
+		S('b') = UINT, S('B') = UINT,
 		S('d') = INT, S('i') = INT,
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -58,6 +59,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
 		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
 	}, { /* 1: l-prefixed */
+		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
 		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -65,17 +67,20 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('c') = INT, S('s') = PTR, S('n') = PTR,
 		S('l') = LLPRE,
 	}, { /* 2: ll-prefixed */
+		S('b') = ULLONG, S('B') = ULLONG,
 		S('d') = LLONG, S('i') = LLONG,
 		S('o') = ULLONG, S('u') = ULLONG,
 		S('x') = ULLONG, S('X') = ULLONG,
 		S('n') = PTR,
 	}, { /* 3: h-prefixed */
+		S('b') = USHORT, S('B') = USHORT,
 		S('d') = SHORT, S('i') = SHORT,
 		S('o') = USHORT, S('u') = USHORT,
 		S('x') = USHORT, S('X') = USHORT,
 		S('n') = PTR,
 		S('h') = HHPRE,
 	}, { /* 4: hh-prefixed */
+		S('b') = UCHAR, S('B') = UCHAR,
 		S('d') = CHAR, S('i') = CHAR,
 		S('o') = UCHAR, S('u') = UCHAR,
 		S('x') = UCHAR, S('X') = UCHAR,
@@ -85,11 +90,13 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('E') = LDBL, S('F') = LDBL, S('G') = LDBL, S('A') = LDBL,
 		S('n') = PTR,
 	}, { /* 6: z- or t-prefixed (assumed to be same size) */
+		S('b') = SIZET, S('B') = SIZET,
 		S('d') = PDIFF, S('i') = PDIFF,
 		S('o') = SIZET, S('u') = SIZET,
 		S('x') = SIZET, S('X') = SIZET,
 		S('n') = PTR,
 	}, { /* 7: j-prefixed */
+		S('b') = UMAX, S('B') = UMAX,
 		S('d') = IMAX, S('i') = IMAX,
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
@@ -156,6 +163,12 @@ static char *fmt_x(uintmax_t x, char *s, int lower)
 	return s;
 }
 
+static char *fmt_b(uintmax_t x, char *s)
+{
+	for (; x; x>>=1) *--s = '0' + (x&1);
+	return s;
+}
+
 static char *fmt_o(uintmax_t x, char *s)
 {
 	for (; x; x>>=3) *--s = '0' + (x&7);
@@ -437,7 +450,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 	unsigned st, ps;
 	int cnt=0, l=0;
 	size_t i;
-	char buf[sizeof(uintmax_t)*3+3+LDBL_MANT_DIG/4];
+	char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4];
 	const char *prefix;
 	int t, pl;
 	wchar_t wc[2], *ws;
@@ -564,6 +577,10 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			a = fmt_x(arg.i, z, t&32);
 			if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
 			if (0) {
+		case 'b': case 'B':
+			a = fmt_b(arg.i, z);
+			if (arg.i && (fl & ALT_FORM)) prefix = (t == 'b' ? "0b" : "0B"), pl=2;
+			} if (0) {
 		case 'o':
 			a = fmt_o(arg.i, z);
 			if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1;
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index 53697701..dbc93f74 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -41,6 +41,7 @@ enum {
 
 static const unsigned char states[]['z'-'A'+1] = {
 	{ /* 0: bare types */
+		S('b') = UINT, S('B') = UINT,
 		S('d') = INT, S('i') = INT,
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -51,6 +52,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
 		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
 	}, { /* 1: l-prefixed */
+		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
 		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -58,17 +60,20 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('c') = INT, S('s') = PTR, S('n') = PTR,
 		S('l') = LLPRE,
 	}, { /* 2: ll-prefixed */
+		S('b') = ULLONG, S('B') = ULLONG,
 		S('d') = LLONG, S('i') = LLONG,
 		S('o') = ULLONG, S('u') = ULLONG,
 		S('x') = ULLONG, S('X') = ULLONG,
 		S('n') = PTR,
 	}, { /* 3: h-prefixed */
+		S('b') = USHORT, S('B') = USHORT,
 		S('d') = SHORT, S('i') = SHORT,
 		S('o') = USHORT, S('u') = USHORT,
 		S('x') = USHORT, S('X') = USHORT,
 		S('n') = PTR,
 		S('h') = HHPRE,
 	}, { /* 4: hh-prefixed */
+		S('b') = UCHAR, S('B') = UCHAR,
 		S('d') = CHAR, S('i') = CHAR,
 		S('o') = UCHAR, S('u') = UCHAR,
 		S('x') = UCHAR, S('X') = UCHAR,
@@ -78,11 +83,13 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('E') = LDBL, S('F') = LDBL, S('G') = LDBL, S('A') = LDBL,
 		S('n') = PTR,
 	}, { /* 6: z- or t-prefixed (assumed to be same size) */
+		S('b') = SIZET, S('B') = SIZET,
 		S('d') = PDIFF, S('i') = PDIFF,
 		S('o') = SIZET, S('u') = SIZET,
 		S('x') = SIZET, S('X') = SIZET,
 		S('n') = PTR,
 	}, { /* 7: j-prefixed */
+		S('b') = UMAX, S('B') = UMAX,
 		S('d') = IMAX, S('i') = IMAX,
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
@@ -145,7 +152,7 @@ static int getint(wchar_t **s) {
 
 static const char sizeprefix['y'-'a'] = {
 ['a'-'a']='L', ['e'-'a']='L', ['f'-'a']='L', ['g'-'a']='L',
-['d'-'a']='j', ['i'-'a']='j', ['o'-'a']='j', ['u'-'a']='j', ['x'-'a']='j',
+['b'-'a']='j', ['d'-'a']='j', ['i'-'a']='j', ['o'-'a']='j', ['u'-'a']='j', ['x'-'a']='j',
 ['p'-'a']='j'
 };
 
@@ -321,7 +328,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 		case 'a': case 'e': case 'f': case 'g':
 			l = fprintf(f, charfmt, w, p, arg.f);
 			break;
-		case 'd': case 'i': case 'o': case 'u': case 'x': case 'p':
+		case 'b': case 'd': case 'i': case 'o': case 'u': case 'x': case 'p':
 			l = fprintf(f, charfmt, w, p, arg.i);
 			break;
 		}
-- 
2.34.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers
       [not found] <cover.1685429467.git.Jens.Gustedt@inria.fr>
@ 2023-05-30  6:55 ` Jens Gustedt
  0 siblings, 0 replies; 22+ messages in thread
From: Jens Gustedt @ 2023-05-30  6:55 UTC (permalink / raw)
  To: musl, jens.gustedt

The b specifier is mandatory for C23. It has been reserved previously,
so we may safely add it, even for older compilation modes.

The B specifier is optional, but recommended for those implementations
that didn't have it reserved previously for other purposes.

The PRIbXXX and PRIBXXX macros are mandatory if the specifiers are
supported and may serve as feature test macros for users.
---
 include/inttypes.h    | 34 ++++++++++++++++++++++++++++++++++
 src/stdio/vfprintf.c  | 19 ++++++++++++++++++-
 src/stdio/vfwprintf.c | 11 +++++++++--
 3 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/include/inttypes.h b/include/inttypes.h
index 61dcb727..73a42e32 100644
--- a/include/inttypes.h
+++ b/include/inttypes.h
@@ -120,12 +120,44 @@ uintmax_t wcstoumax(const wchar_t *__restrict, wchar_t **__restrict, int);
 #define PRIXFAST32 "X"
 #define PRIXFAST64 __PRI64 "X"
 
+#define PRIb8  "b"
+#define PRIb16 "b"
+#define PRIb32 "b"
+#define PRIb64 __PRI64 "b"
+
+#define PRIbLEAST8  "b"
+#define PRIbLEAST16 "b"
+#define PRIbLEAST32 "b"
+#define PRIbLEAST64 __PRI64 "b"
+
+#define PRIbFAST8  "b"
+#define PRIbFAST16 "b"
+#define PRIbFAST32 "b"
+#define PRIbFAST64 __PRI64 "b"
+
+#define PRIB8  "B"
+#define PRIB16 "B"
+#define PRIB32 "B"
+#define PRIB64 __PRI64 "B"
+
+#define PRIBLEAST8  "B"
+#define PRIBLEAST16 "B"
+#define PRIBLEAST32 "B"
+#define PRIBLEAST64 __PRI64 "B"
+
+#define PRIBFAST8  "B"
+#define PRIBFAST16 "B"
+#define PRIBFAST32 "B"
+#define PRIBFAST64 __PRI64 "B"
+
 #define PRIdMAX __PRI64 "d"
 #define PRIiMAX __PRI64 "i"
 #define PRIoMAX __PRI64 "o"
 #define PRIuMAX __PRI64 "u"
 #define PRIxMAX __PRI64 "x"
 #define PRIXMAX __PRI64 "X"
+#define PRIbMAX __PRI64 "b"
+#define PRIBMAX __PRI64 "B"
 
 #define PRIdPTR __PRIPTR "d"
 #define PRIiPTR __PRIPTR "i"
@@ -133,6 +165,8 @@ uintmax_t wcstoumax(const wchar_t *__restrict, wchar_t **__restrict, int);
 #define PRIuPTR __PRIPTR "u"
 #define PRIxPTR __PRIPTR "x"
 #define PRIXPTR __PRIPTR "X"
+#define PRIbPTR __PRIPTR "b"
+#define PRIBPTR __PRIPTR "B"
 
 #define SCNd8   "hhd"
 #define SCNd16  "hd"
diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index a712d80f..cbc79783 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -48,6 +48,7 @@ enum {
 
 static const unsigned char states[]['z'-'A'+1] = {
 	{ /* 0: bare types */
+		S('b') = UINT, S('B') = UINT,
 		S('d') = INT, S('i') = INT,
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -58,6 +59,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
 		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
 	}, { /* 1: l-prefixed */
+		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
 		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -65,17 +67,20 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('c') = INT, S('s') = PTR, S('n') = PTR,
 		S('l') = LLPRE,
 	}, { /* 2: ll-prefixed */
+		S('b') = ULLONG, S('B') = ULLONG,
 		S('d') = LLONG, S('i') = LLONG,
 		S('o') = ULLONG, S('u') = ULLONG,
 		S('x') = ULLONG, S('X') = ULLONG,
 		S('n') = PTR,
 	}, { /* 3: h-prefixed */
+		S('b') = USHORT, S('B') = USHORT,
 		S('d') = SHORT, S('i') = SHORT,
 		S('o') = USHORT, S('u') = USHORT,
 		S('x') = USHORT, S('X') = USHORT,
 		S('n') = PTR,
 		S('h') = HHPRE,
 	}, { /* 4: hh-prefixed */
+		S('b') = UCHAR, S('B') = UCHAR,
 		S('d') = CHAR, S('i') = CHAR,
 		S('o') = UCHAR, S('u') = UCHAR,
 		S('x') = UCHAR, S('X') = UCHAR,
@@ -85,11 +90,13 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('E') = LDBL, S('F') = LDBL, S('G') = LDBL, S('A') = LDBL,
 		S('n') = PTR,
 	}, { /* 6: z- or t-prefixed (assumed to be same size) */
+		S('b') = SIZET, S('B') = SIZET,
 		S('d') = PDIFF, S('i') = PDIFF,
 		S('o') = SIZET, S('u') = SIZET,
 		S('x') = SIZET, S('X') = SIZET,
 		S('n') = PTR,
 	}, { /* 7: j-prefixed */
+		S('b') = UMAX, S('B') = UMAX,
 		S('d') = IMAX, S('i') = IMAX,
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
@@ -156,6 +163,12 @@ static char *fmt_x(uintmax_t x, char *s, int lower)
 	return s;
 }
 
+static char *fmt_b(uintmax_t x, char *s)
+{
+	for (; x; x>>=1) *--s = '0' + (x&1);
+	return s;
+}
+
 static char *fmt_o(uintmax_t x, char *s)
 {
 	for (; x; x>>=3) *--s = '0' + (x&7);
@@ -437,7 +450,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 	unsigned st, ps;
 	int cnt=0, l=0;
 	size_t i;
-	char buf[sizeof(uintmax_t)*3+3+LDBL_MANT_DIG/4];
+	char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4];
 	const char *prefix;
 	int t, pl;
 	wchar_t wc[2], *ws;
@@ -564,6 +577,10 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			a = fmt_x(arg.i, z, t&32);
 			if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
 			if (0) {
+		case 'b': case 'B':
+			a = fmt_b(arg.i, z);
+			if (arg.i && (fl & ALT_FORM)) prefix = (t == 'b' ? "0b" : "0B"), pl=2;
+			} if (0) {
 		case 'o':
 			a = fmt_o(arg.i, z);
 			if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1;
diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c
index 53697701..dbc93f74 100644
--- a/src/stdio/vfwprintf.c
+++ b/src/stdio/vfwprintf.c
@@ -41,6 +41,7 @@ enum {
 
 static const unsigned char states[]['z'-'A'+1] = {
 	{ /* 0: bare types */
+		S('b') = UINT, S('B') = UINT,
 		S('d') = INT, S('i') = INT,
 		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -51,6 +52,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE,
 		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
 	}, { /* 1: l-prefixed */
+		S('b') = ULONG, S('B') = ULONG,
 		S('d') = LONG, S('i') = LONG,
 		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
@@ -58,17 +60,20 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('c') = INT, S('s') = PTR, S('n') = PTR,
 		S('l') = LLPRE,
 	}, { /* 2: ll-prefixed */
+		S('b') = ULLONG, S('B') = ULLONG,
 		S('d') = LLONG, S('i') = LLONG,
 		S('o') = ULLONG, S('u') = ULLONG,
 		S('x') = ULLONG, S('X') = ULLONG,
 		S('n') = PTR,
 	}, { /* 3: h-prefixed */
+		S('b') = USHORT, S('B') = USHORT,
 		S('d') = SHORT, S('i') = SHORT,
 		S('o') = USHORT, S('u') = USHORT,
 		S('x') = USHORT, S('X') = USHORT,
 		S('n') = PTR,
 		S('h') = HHPRE,
 	}, { /* 4: hh-prefixed */
+		S('b') = UCHAR, S('B') = UCHAR,
 		S('d') = CHAR, S('i') = CHAR,
 		S('o') = UCHAR, S('u') = UCHAR,
 		S('x') = UCHAR, S('X') = UCHAR,
@@ -78,11 +83,13 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('E') = LDBL, S('F') = LDBL, S('G') = LDBL, S('A') = LDBL,
 		S('n') = PTR,
 	}, { /* 6: z- or t-prefixed (assumed to be same size) */
+		S('b') = SIZET, S('B') = SIZET,
 		S('d') = PDIFF, S('i') = PDIFF,
 		S('o') = SIZET, S('u') = SIZET,
 		S('x') = SIZET, S('X') = SIZET,
 		S('n') = PTR,
 	}, { /* 7: j-prefixed */
+		S('b') = UMAX, S('B') = UMAX,
 		S('d') = IMAX, S('i') = IMAX,
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
@@ -145,7 +152,7 @@ static int getint(wchar_t **s) {
 
 static const char sizeprefix['y'-'a'] = {
 ['a'-'a']='L', ['e'-'a']='L', ['f'-'a']='L', ['g'-'a']='L',
-['d'-'a']='j', ['i'-'a']='j', ['o'-'a']='j', ['u'-'a']='j', ['x'-'a']='j',
+['b'-'a']='j', ['d'-'a']='j', ['i'-'a']='j', ['o'-'a']='j', ['u'-'a']='j', ['x'-'a']='j',
 ['p'-'a']='j'
 };
 
@@ -321,7 +328,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_
 		case 'a': case 'e': case 'f': case 'g':
 			l = fprintf(f, charfmt, w, p, arg.f);
 			break;
-		case 'd': case 'i': case 'o': case 'u': case 'x': case 'p':
+		case 'b': case 'd': case 'i': case 'o': case 'u': case 'x': case 'p':
 			l = fprintf(f, charfmt, w, p, arg.i);
 			break;
 		}
-- 
2.34.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2023-06-02 15:08 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-26 19:41 [musl] [C23 printf 0/3] Jens Gustedt
2023-05-26 19:41 ` [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers Jens Gustedt
2023-05-26 19:41 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt
2023-05-26 20:31   ` Rich Felker
2023-05-26 20:51     ` Jₑₙₛ Gustedt
2023-05-26 21:03       ` Rich Felker
2023-05-29  7:14         ` Jₑₙₛ Gustedt
2023-05-29 15:46           ` Rich Felker
2023-05-29 19:21             ` Jₑₙₛ Gustedt
2023-05-30  1:48               ` Rich Felker
2023-05-30  6:46                 ` Jₑₙₛ Gustedt
2023-05-30 17:28                   ` Rich Felker
2023-05-30 18:00                     ` Rich Felker
2023-05-31  7:59                     ` Jₑₙₛ Gustedt
2023-06-02 14:29                 ` Rich Felker
2023-06-02 14:55                   ` Jₑₙₛ Gustedt
2023-06-02 15:07                     ` Rich Felker
2023-05-26 19:41 ` [musl] [C23 printf 3/3] C23: implement the wfN length modifiers " Jens Gustedt
2023-05-26 20:33   ` Rich Felker
2023-05-26 21:00     ` Jₑₙₛ Gustedt
     [not found] <cover.1685429467.git.Jens.Gustedt@inria.fr>
2023-05-30  6:55 ` [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers Jens Gustedt
2023-05-31 14:04 [musl] [C23 printf 0/3] to be replaced Jens Gustedt
2023-05-31 14:04 ` [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers Jens Gustedt

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).