* [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers [not found] <cover.1685429467.git.Jens.Gustedt@inria.fr> @ 2023-05-30 6:55 ` Jens Gustedt 2023-05-30 6:55 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt 2023-05-30 6:55 ` [musl] [C23 printf 3/3] C23: implement the wfN length modifiers " Jens Gustedt 2 siblings, 0 replies; 23+ messages in thread From: Jens Gustedt @ 2023-05-30 6:55 UTC (permalink / raw) To: musl, jens.gustedt The b specifier is mandatory for C23. It has been reserved previously, so we may safely add it, even for older compilation modes. The B specifier is optional, but recommended for those implementations that didn't have it reserved previously for other purposes. The PRIbXXX and PRIBXXX macros are mandatory if the specifiers are supported and may serve as feature test macros for users. --- include/inttypes.h | 34 ++++++++++++++++++++++++++++++++++ src/stdio/vfprintf.c | 19 ++++++++++++++++++- src/stdio/vfwprintf.c | 11 +++++++++-- 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/include/inttypes.h b/include/inttypes.h index 61dcb727..73a42e32 100644 --- a/include/inttypes.h +++ b/include/inttypes.h @@ -120,12 +120,44 @@ uintmax_t wcstoumax(const wchar_t *__restrict, wchar_t **__restrict, int); #define PRIXFAST32 "X" #define PRIXFAST64 __PRI64 "X" +#define PRIb8 "b" +#define PRIb16 "b" +#define PRIb32 "b" +#define PRIb64 __PRI64 "b" + +#define PRIbLEAST8 "b" +#define PRIbLEAST16 "b" +#define PRIbLEAST32 "b" +#define PRIbLEAST64 __PRI64 "b" + +#define PRIbFAST8 "b" +#define PRIbFAST16 "b" +#define PRIbFAST32 "b" +#define PRIbFAST64 __PRI64 "b" + +#define PRIB8 "B" +#define PRIB16 "B" +#define PRIB32 "B" +#define PRIB64 __PRI64 "B" + +#define PRIBLEAST8 "B" +#define PRIBLEAST16 "B" +#define PRIBLEAST32 "B" +#define PRIBLEAST64 __PRI64 "B" + +#define PRIBFAST8 "B" +#define PRIBFAST16 "B" +#define PRIBFAST32 "B" +#define PRIBFAST64 __PRI64 "B" + #define PRIdMAX __PRI64 "d" #define PRIiMAX __PRI64 "i" #define PRIoMAX __PRI64 "o" #define PRIuMAX __PRI64 "u" #define PRIxMAX __PRI64 "x" #define PRIXMAX __PRI64 "X" +#define PRIbMAX __PRI64 "b" +#define PRIBMAX __PRI64 "B" #define PRIdPTR __PRIPTR "d" #define PRIiPTR __PRIPTR "i" @@ -133,6 +165,8 @@ uintmax_t wcstoumax(const wchar_t *__restrict, wchar_t **__restrict, int); #define PRIuPTR __PRIPTR "u" #define PRIxPTR __PRIPTR "x" #define PRIXPTR __PRIPTR "X" +#define PRIbPTR __PRIPTR "b" +#define PRIBPTR __PRIPTR "B" #define SCNd8 "hhd" #define SCNd16 "hd" diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c index a712d80f..cbc79783 100644 --- a/src/stdio/vfprintf.c +++ b/src/stdio/vfprintf.c @@ -48,6 +48,7 @@ enum { static const unsigned char states[]['z'-'A'+1] = { { /* 0: bare types */ + S('b') = UINT, S('B') = UINT, S('d') = INT, S('i') = INT, S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT, S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL, @@ -58,6 +59,7 @@ static const unsigned char states[]['z'-'A'+1] = { S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, }, { /* 1: l-prefixed */ + S('b') = ULONG, S('B') = ULONG, S('d') = LONG, S('i') = LONG, S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG, S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL, @@ -65,17 +67,20 @@ static const unsigned char states[]['z'-'A'+1] = { S('c') = INT, S('s') = PTR, S('n') = PTR, S('l') = LLPRE, }, { /* 2: ll-prefixed */ + S('b') = ULLONG, S('B') = ULLONG, S('d') = LLONG, S('i') = LLONG, S('o') = ULLONG, S('u') = ULLONG, S('x') = ULLONG, S('X') = ULLONG, S('n') = PTR, }, { /* 3: h-prefixed */ + S('b') = USHORT, S('B') = USHORT, S('d') = SHORT, S('i') = SHORT, S('o') = USHORT, S('u') = USHORT, S('x') = USHORT, S('X') = USHORT, S('n') = PTR, S('h') = HHPRE, }, { /* 4: hh-prefixed */ + S('b') = UCHAR, S('B') = UCHAR, S('d') = CHAR, S('i') = CHAR, S('o') = UCHAR, S('u') = UCHAR, S('x') = UCHAR, S('X') = UCHAR, @@ -85,11 +90,13 @@ static const unsigned char states[]['z'-'A'+1] = { S('E') = LDBL, S('F') = LDBL, S('G') = LDBL, S('A') = LDBL, S('n') = PTR, }, { /* 6: z- or t-prefixed (assumed to be same size) */ + S('b') = SIZET, S('B') = SIZET, S('d') = PDIFF, S('i') = PDIFF, S('o') = SIZET, S('u') = SIZET, S('x') = SIZET, S('X') = SIZET, S('n') = PTR, }, { /* 7: j-prefixed */ + S('b') = UMAX, S('B') = UMAX, S('d') = IMAX, S('i') = IMAX, S('o') = UMAX, S('u') = UMAX, S('x') = UMAX, S('X') = UMAX, @@ -156,6 +163,12 @@ static char *fmt_x(uintmax_t x, char *s, int lower) return s; } +static char *fmt_b(uintmax_t x, char *s) +{ + for (; x; x>>=1) *--s = '0' + (x&1); + return s; +} + static char *fmt_o(uintmax_t x, char *s) { for (; x; x>>=3) *--s = '0' + (x&7); @@ -437,7 +450,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, unsigned st, ps; int cnt=0, l=0; size_t i; - char buf[sizeof(uintmax_t)*3+3+LDBL_MANT_DIG/4]; + char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4]; const char *prefix; int t, pl; wchar_t wc[2], *ws; @@ -564,6 +577,10 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, a = fmt_x(arg.i, z, t&32); if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2; if (0) { + case 'b': case 'B': + a = fmt_b(arg.i, z); + if (arg.i && (fl & ALT_FORM)) prefix = (t == 'b' ? "0b" : "0B"), pl=2; + } if (0) { case 'o': a = fmt_o(arg.i, z); if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1; diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c index 53697701..dbc93f74 100644 --- a/src/stdio/vfwprintf.c +++ b/src/stdio/vfwprintf.c @@ -41,6 +41,7 @@ enum { static const unsigned char states[]['z'-'A'+1] = { { /* 0: bare types */ + S('b') = UINT, S('B') = UINT, S('d') = INT, S('i') = INT, S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT, S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL, @@ -51,6 +52,7 @@ static const unsigned char states[]['z'-'A'+1] = { S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, }, { /* 1: l-prefixed */ + S('b') = ULONG, S('B') = ULONG, S('d') = LONG, S('i') = LONG, S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG, S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL, @@ -58,17 +60,20 @@ static const unsigned char states[]['z'-'A'+1] = { S('c') = INT, S('s') = PTR, S('n') = PTR, S('l') = LLPRE, }, { /* 2: ll-prefixed */ + S('b') = ULLONG, S('B') = ULLONG, S('d') = LLONG, S('i') = LLONG, S('o') = ULLONG, S('u') = ULLONG, S('x') = ULLONG, S('X') = ULLONG, S('n') = PTR, }, { /* 3: h-prefixed */ + S('b') = USHORT, S('B') = USHORT, S('d') = SHORT, S('i') = SHORT, S('o') = USHORT, S('u') = USHORT, S('x') = USHORT, S('X') = USHORT, S('n') = PTR, S('h') = HHPRE, }, { /* 4: hh-prefixed */ + S('b') = UCHAR, S('B') = UCHAR, S('d') = CHAR, S('i') = CHAR, S('o') = UCHAR, S('u') = UCHAR, S('x') = UCHAR, S('X') = UCHAR, @@ -78,11 +83,13 @@ static const unsigned char states[]['z'-'A'+1] = { S('E') = LDBL, S('F') = LDBL, S('G') = LDBL, S('A') = LDBL, S('n') = PTR, }, { /* 6: z- or t-prefixed (assumed to be same size) */ + S('b') = SIZET, S('B') = SIZET, S('d') = PDIFF, S('i') = PDIFF, S('o') = SIZET, S('u') = SIZET, S('x') = SIZET, S('X') = SIZET, S('n') = PTR, }, { /* 7: j-prefixed */ + S('b') = UMAX, S('B') = UMAX, S('d') = IMAX, S('i') = IMAX, S('o') = UMAX, S('u') = UMAX, S('x') = UMAX, S('X') = UMAX, @@ -145,7 +152,7 @@ static int getint(wchar_t **s) { static const char sizeprefix['y'-'a'] = { ['a'-'a']='L', ['e'-'a']='L', ['f'-'a']='L', ['g'-'a']='L', -['d'-'a']='j', ['i'-'a']='j', ['o'-'a']='j', ['u'-'a']='j', ['x'-'a']='j', +['b'-'a']='j', ['d'-'a']='j', ['i'-'a']='j', ['o'-'a']='j', ['u'-'a']='j', ['x'-'a']='j', ['p'-'a']='j' }; @@ -321,7 +328,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_ case 'a': case 'e': case 'f': case 'g': l = fprintf(f, charfmt, w, p, arg.f); break; - case 'd': case 'i': case 'o': case 'u': case 'x': case 'p': + case 'b': case 'd': case 'i': case 'o': case 'u': case 'x': case 'p': l = fprintf(f, charfmt, w, p, arg.i); break; } -- 2.34.1 ^ permalink raw reply [flat|nested] 23+ messages in thread
* [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf [not found] <cover.1685429467.git.Jens.Gustedt@inria.fr> 2023-05-30 6:55 ` [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers Jens Gustedt @ 2023-05-30 6:55 ` Jens Gustedt 2023-06-02 14:38 ` Rich Felker 2023-05-30 6:55 ` [musl] [C23 printf 3/3] C23: implement the wfN length modifiers " Jens Gustedt 2 siblings, 1 reply; 23+ messages in thread From: Jens Gustedt @ 2023-05-30 6:55 UTC (permalink / raw) To: musl, jens.gustedt These are mandatory for C23 and concern all types for which the platform has `int_leastN_t` and `uint_leastN_t`. For musl these types always coincide with `intN_t` and `uintN_t` and are always present for N equal 8, 16, 32 and 64. They can be added for general use since all lowercase letters were previously reserved. Nevertheless, users that use these modifiers will see a lot of warnings from compilers in the beginning. This is because the compilers have not yet integrated this form of a specifier into their correponding extensions (gcc attributes). So unfortunately also testing this feature may be a bit noisy for the moment. The only architecture dependend choice is the type for N == 64, which may be `long` or `long long`. We just mimick the test that is done in other places to compare `UINTPTR_MAX` and `UINT64_MAX` to determine that. --- src/stdio/vfprintf.c | 28 +++++++++++++++++++++++++--- src/stdio/vfwprintf.c | 28 +++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c index cbc79783..265fb7ad 100644 --- a/src/stdio/vfprintf.c +++ b/src/stdio/vfprintf.c @@ -33,7 +33,7 @@ enum { BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, - ZTPRE, JPRE, + ZTPRE, JPRE, WPRE, STOP, PTR, INT, UINT, ULLONG, LONG, ULONG, @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = { S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR, S('m') = NOARG, S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, - S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, + S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE, }, { /* 1: l-prefixed */ S('b') = ULONG, S('B') = ULONG, S('d') = LONG, S('i') = LONG, @@ -101,6 +101,12 @@ static const unsigned char states[]['z'-'A'+1] = { S('o') = UMAX, S('u') = UMAX, S('x') = UMAX, S('X') = UMAX, S('n') = PTR, + }, { /* 8: w-prefixed */ + S('b') = UINT, S('B') = UINT, + S('d') = INT, S('i') = INT, + S('o') = UINT, S('u') = UINT, + S('x') = UINT, S('X') = UINT, + S('n') = PTR, } }; @@ -447,7 +453,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, int w, p, xp; union arg arg; int argpos; - unsigned st, ps; + unsigned st, ps, width=0; int cnt=0, l=0; size_t i; char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4]; @@ -527,9 +533,25 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, if (OOB(*s)) goto inval; ps=st; st=states[st]S(*s++); + if (st == WPRE) { + if (*s == '0') goto inval; + width = getint(&s); + } } while (st-1<STOP); if (!st) goto inval; + if (ps == WPRE) switch (width) { + case 8: ps = HHPRE; st = (st == UINT) ? UCHAR : ((st == INT) ? CHAR : PTR); break; + case 16: ps = HPRE; st = (st == UINT) ? USHORT : ((st == INT) ? SHORT : PTR); break; + case 32: ps = BARE; break; +#if UINTPTR_MAX >= UINT64_MAX + case 64: ps = LPRE; st = (st == UINT) ? ULONG : ((st == INT) ? LONG : PTR); break; +#else + case 64: ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break; +#endif + default: goto inval; + } + /* Check validity of argument type (nl/normal) */ if (st==NOARG) { if (argpos>=0) goto inval; diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c index dbc93f74..c3e81d2a 100644 --- a/src/stdio/vfwprintf.c +++ b/src/stdio/vfwprintf.c @@ -26,7 +26,7 @@ enum { BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, - ZTPRE, JPRE, + ZTPRE, JPRE, WPRE, STOP, PTR, INT, UINT, ULLONG, LONG, ULONG, @@ -50,7 +50,7 @@ static const unsigned char states[]['z'-'A'+1] = { S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR, S('m') = NOARG, S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, - S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, + S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE, }, { /* 1: l-prefixed */ S('b') = ULONG, S('B') = ULONG, S('d') = LONG, S('i') = LONG, @@ -94,6 +94,12 @@ static const unsigned char states[]['z'-'A'+1] = { S('o') = UMAX, S('u') = UMAX, S('x') = UMAX, S('X') = UMAX, S('n') = PTR, + }, { /* 8: w-prefixed */ + S('b') = UINT, S('B') = UINT, + S('d') = INT, S('i') = INT, + S('o') = UINT, S('u') = UINT, + S('x') = UINT, S('X') = UINT, + S('n') = PTR, } }; @@ -163,7 +169,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_ int w, p, xp; union arg arg; int argpos; - unsigned st, ps; + unsigned st, ps, width=0; int cnt=0, l=0; int i; int t; @@ -242,9 +248,25 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_ if (OOB(*s)) goto inval; ps=st; st=states[st]S(*s++); + if (st == WPRE) { + if (*s == L'0') goto inval; + width = getint(&s); + } } while (st-1<STOP); if (!st) goto inval; + if (ps == WPRE) switch (width) { + case 8: ps = HHPRE; st = (st == UINT) ? UCHAR : ((st == INT) ? CHAR : PTR); break; + case 16: ps = HPRE; st = (st == UINT) ? USHORT : ((st == INT) ? SHORT : PTR); break; + case 32: ps = BARE; break; +#if UINTPTR_MAX >= UINT64_MAX + case 64: ps = LPRE; st = (st == UINT) ? ULONG : ((st == INT) ? LONG : PTR); break; +#else + case 64: ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break; +#endif + default: goto inval; + } + /* Check validity of argument type (nl/normal) */ if (st==NOARG) { if (argpos>=0) goto inval; -- 2.34.1 ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-30 6:55 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt @ 2023-06-02 14:38 ` Rich Felker 2023-06-02 15:09 ` Jₑₙₛ Gustedt 0 siblings, 1 reply; 23+ messages in thread From: Rich Felker @ 2023-06-02 14:38 UTC (permalink / raw) To: Jens Gustedt; +Cc: musl, jens.gustedt On Tue, May 30, 2023 at 08:55:27AM +0200, Jens Gustedt wrote: > These are mandatory for C23 and concern all types for which the > platform has `int_leastN_t` and `uint_leastN_t`. For musl these types > always coincide with `intN_t` and `uintN_t` and are always present for > N equal 8, 16, 32 and 64. > > They can be added for general use since all lowercase letters were > previously reserved. > > Nevertheless, users that use these modifiers will see a lot of > warnings from compilers in the beginning. This is because the > compilers have not yet integrated this form of a specifier into their > correponding extensions (gcc attributes). So unfortunately also > testing this feature may be a bit noisy for the moment. > > The only architecture dependend choice is the type for N == 64, which > may be `long` or `long long`. We just mimick the test that is done in > other places to compare `UINTPTR_MAX` and `UINT64_MAX` to determine > that. > --- > src/stdio/vfprintf.c | 28 +++++++++++++++++++++++++--- > src/stdio/vfwprintf.c | 28 +++++++++++++++++++++++++--- > 2 files changed, 50 insertions(+), 6 deletions(-) > > diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c > index cbc79783..265fb7ad 100644 > --- a/src/stdio/vfprintf.c > +++ b/src/stdio/vfprintf.c > @@ -33,7 +33,7 @@ > > enum { > BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, > - ZTPRE, JPRE, > + ZTPRE, JPRE, WPRE, > STOP, > PTR, INT, UINT, ULLONG, > LONG, ULONG, > @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = { > S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR, > S('m') = NOARG, > S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, > - S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, > + S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE, > }, { /* 1: l-prefixed */ > S('b') = ULONG, S('B') = ULONG, > S('d') = LONG, S('i') = LONG, > @@ -101,6 +101,12 @@ static const unsigned char states[]['z'-'A'+1] = { > S('o') = UMAX, S('u') = UMAX, > S('x') = UMAX, S('X') = UMAX, > S('n') = PTR, > + }, { /* 8: w-prefixed */ > + S('b') = UINT, S('B') = UINT, > + S('d') = INT, S('i') = INT, > + S('o') = UINT, S('u') = UINT, > + S('x') = UINT, S('X') = UINT, > + S('n') = PTR, > } > }; > > @@ -447,7 +453,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, > int w, p, xp; > union arg arg; > int argpos; > - unsigned st, ps; > + unsigned st, ps, width=0; > int cnt=0, l=0; > size_t i; > char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4]; > @@ -527,9 +533,25 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, > if (OOB(*s)) goto inval; > ps=st; > st=states[st]S(*s++); > + if (st == WPRE) { > + if (*s == '0') goto inval; > + width = getint(&s); > + } > } while (st-1<STOP); > if (!st) goto inval; > > + if (ps == WPRE) switch (width) { > + case 8: ps = HHPRE; st = (st == UINT) ? UCHAR : ((st == INT) ? CHAR : PTR); break; > + case 16: ps = HPRE; st = (st == UINT) ? USHORT : ((st == INT) ? SHORT : PTR); break; > + case 32: ps = BARE; break; > +#if UINTPTR_MAX >= UINT64_MAX > + case 64: ps = LPRE; st = (st == UINT) ? ULONG : ((st == INT) ? LONG : PTR); break; > +#else > + case 64: ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break; > +#endif The logic here for whether w64 is LPRE or LLPRE is wrong. The condition for whether [u]int64_t is long or long long is not whether uintptr_t or long long would be too wide (they never are, in our ABIs) but whether long is long enough. All our [u]intN_t are the lowest-rank type of the desired size, and this seems to be consistent with what other implementations like glibc do. Short of a compelling reason not to, though, I plan to just go with the fix-up of my v2 patch for this functionality. Its ordering avoids duplication of logic (the ?: branches in the cases above) and the need for maintaining extra side state (the width variable) thru the state machine. Rich ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-06-02 14:38 ` Rich Felker @ 2023-06-02 15:09 ` Jₑₙₛ Gustedt 2023-06-02 15:16 ` Rich Felker 0 siblings, 1 reply; 23+ messages in thread From: Jₑₙₛ Gustedt @ 2023-06-02 15:09 UTC (permalink / raw) To: Rich Felker; +Cc: musl, jens.gustedt [-- Attachment #1: Type: text/plain, Size: 1732 bytes --] Rich, on Fri, 2 Jun 2023 10:38:00 -0400 you (Rich Felker <dalias@libc.org>) wrote: > The logic here for whether w64 is LPRE or LLPRE is wrong. The > condition for whether [u]int64_t is long or long long is not whether > uintptr_t or long long would be too wide (they never are, in our ABIs) > but whether long is long enough. All our [u]intN_t are the lowest-rank > type of the desired size, and this seems to be consistent with what > other implementations like glibc do. I probably copied that logic wrong from somewhere. Yes, right thing is probably just to compare to the new `ULONG_WIDTH` macro or so. One thing to think about could be to always map 64 to `long long`, just for this function here. This would always have the right size and representation. And since this is an implementation of the C library, we make the rules ;-) > Short of a compelling reason not to, though, I plan to just go with > the fix-up of my v2 patch for this functionality. Its ordering avoids > duplication of logic (the ?: branches in the cases above) and the need > for maintaining extra side state (the width variable) thru the state > machine. I thought a bit of having the state only in the width variable, but since we also need to track signedness and the extra cases for FP, this becomes quickly quite messy. So yes, looks good (with some minor remarks in up-thread). Thanks Jₑₙₛ -- :: ICube :::::::::::::::::::::::::::::: deputy director :: :: Université de Strasbourg :::::::::::::::::::::: ICPS :: :: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus :: :: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 :: :: https://icube-icps.unistra.fr/index.php/Jens_Gustedt :: [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 195 bytes --] ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-06-02 15:09 ` Jₑₙₛ Gustedt @ 2023-06-02 15:16 ` Rich Felker 2023-06-02 15:37 ` Jₑₙₛ Gustedt 0 siblings, 1 reply; 23+ messages in thread From: Rich Felker @ 2023-06-02 15:16 UTC (permalink / raw) To: Jₑₙₛ Gustedt; +Cc: musl, jens.gustedt On Fri, Jun 02, 2023 at 05:09:26PM +0200, Jₑₙₛ Gustedt wrote: > Rich, > > on Fri, 2 Jun 2023 10:38:00 -0400 you (Rich Felker <dalias@libc.org>) > wrote: > > > The logic here for whether w64 is LPRE or LLPRE is wrong. The > > condition for whether [u]int64_t is long or long long is not whether > > uintptr_t or long long would be too wide (they never are, in our ABIs) > > but whether long is long enough. All our [u]intN_t are the lowest-rank > > type of the desired size, and this seems to be consistent with what > > other implementations like glibc do. > > I probably copied that logic wrong from somewhere. Yes, right thing is > probably just to compare to the new `ULONG_WIDTH` macro or so. > > One thing to think about could be to always map 64 to `long long`, > just for this function here. This would always have the right size and > representation. And since this is an implementation of the C library, > we make the rules ;-) Well, in musl we generally want the code to have well-defined behavior even if it's inlined via LTO, where the types would mismatch. Note that this is kinda a problem for scanf where va_arg is used with void*, except that POSIX, conveniently, explicitly requires the C (compiler) implementation accept mismatched va_arg types when both types are pointers. This is a big hammer but it seems to be a necessary assumption to admit implementing the %n$ form of scanf, which POSIX requires. Rich ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-06-02 15:16 ` Rich Felker @ 2023-06-02 15:37 ` Jₑₙₛ Gustedt 0 siblings, 0 replies; 23+ messages in thread From: Jₑₙₛ Gustedt @ 2023-06-02 15:37 UTC (permalink / raw) To: Rich Felker; +Cc: musl, jens.gustedt [-- Attachment #1: Type: text/plain, Size: 1570 bytes --] Rich, on Fri, 2 Jun 2023 11:16:40 -0400 you (Rich Felker <dalias@libc.org>) wrote: > Note that this is kinda a problem for scanf where va_arg is used > with void*, except that POSIX, conveniently, explicitly requires the > C (compiler) implementation accept mismatched va_arg types when both > types are pointers. I think that was a change for C23, we here have something similar in 7.16.1.1 — one type is pointer to qualified or unqualified `void` and the other is a pointer to a qualified or unqualified character type; Unfortunately, C can not go easily further than that, because other than in POSIX there might be data pointers that have different representations. (We could have said something like pointers with the same width and representation, though.) Also for the new `nullptr_t` type we now have — or, the type of the next argument is `nullptr_t` and type is a pointer type that has the same representation and alignment requirements as a pointer to a character type. Which then makes it defined behavior to pass in a `nullptr` sentinel, where it previously was UB when `NULL` was for example `OL` or so, or when the application passed in an explicit zero instead of `NULL`. Jₑₙₛ -- :: ICube :::::::::::::::::::::::::::::: deputy director :: :: Université de Strasbourg :::::::::::::::::::::: ICPS :: :: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus :: :: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 :: :: https://icube-icps.unistra.fr/index.php/Jens_Gustedt :: [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 195 bytes --] ^ permalink raw reply [flat|nested] 23+ messages in thread
* [musl] [C23 printf 3/3] C23: implement the wfN length modifiers for printf [not found] <cover.1685429467.git.Jens.Gustedt@inria.fr> 2023-05-30 6:55 ` [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers Jens Gustedt 2023-05-30 6:55 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt @ 2023-05-30 6:55 ` Jens Gustedt 2 siblings, 0 replies; 23+ messages in thread From: Jens Gustedt @ 2023-05-30 6:55 UTC (permalink / raw) To: musl, jens.gustedt Musl only has a difference between fixed-width and fastest-width integer types for N == 16. And even here all architectures have made the same choice, namely mapping to 32 bit types. --- src/stdio/vfprintf.c | 3 +++ src/stdio/vfwprintf.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c index 265fb7ad..a531a513 100644 --- a/src/stdio/vfprintf.c +++ b/src/stdio/vfprintf.c @@ -534,8 +534,11 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, ps=st; st=states[st]S(*s++); if (st == WPRE) { + _Bool fast = (*s == 'f'); + if (fast) ++s; if (*s == '0') goto inval; width = getint(&s); + if (fast && width == 16) width = 32; } } while (st-1<STOP); if (!st) goto inval; diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c index c3e81d2a..3689c2d5 100644 --- a/src/stdio/vfwprintf.c +++ b/src/stdio/vfwprintf.c @@ -249,8 +249,11 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_ ps=st; st=states[st]S(*s++); if (st == WPRE) { + _Bool fast = (*s == L'f'); + if (fast) ++s; if (*s == L'0') goto inval; width = getint(&s); + if (fast && width == 16) width = 32; } } while (st-1<STOP); if (!st) goto inval; -- 2.34.1 ^ permalink raw reply [flat|nested] 23+ messages in thread
* [musl] [C23 printf 0/3] to be replaced @ 2023-05-31 14:04 Jens Gustedt 2023-05-31 14:04 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt 0 siblings, 1 reply; 23+ messages in thread From: Jens Gustedt @ 2023-05-31 14:04 UTC (permalink / raw) To: musl There have been other approaches proposed for these patches that are considered superior to these here, although functionally equivalent. Take these ones here to ensure a functioning C23 system with 128 bit support, now. The 128 bit patches will be rebased on the new approaches once their implementation is completed. Jens Gustedt (3): C23: implement the b and B printf specifiers C23: implement the wN length specifiers for printf C23: implement the wfN length modifiers for printf include/inttypes.h | 34 +++++++++++++++++++++++++++++ src/stdio/vfprintf.c | 50 +++++++++++++++++++++++++++++++++++++++---- src/stdio/vfwprintf.c | 42 +++++++++++++++++++++++++++++++----- 3 files changed, 117 insertions(+), 9 deletions(-) -- 2.34.1 ^ permalink raw reply [flat|nested] 23+ messages in thread
* [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-31 14:04 [musl] [C23 printf 0/3] to be replaced Jens Gustedt @ 2023-05-31 14:04 ` Jens Gustedt 0 siblings, 0 replies; 23+ messages in thread From: Jens Gustedt @ 2023-05-31 14:04 UTC (permalink / raw) To: musl These are mandatory for C23 and concern all types for which the platform has `int_leastN_t` and `uint_leastN_t`. For musl these types always coincide with `intN_t` and `uintN_t` and are always present for N equal 8, 16, 32 and 64. They can be added for general use since all lowercase letters were previously reserved. Nevertheless, users that use these modifiers will see a lot of warnings from compilers in the beginning. This is because the compilers have not yet integrated this form of a specifier into their correponding extensions (gcc attributes). So unfortunately also testing this feature may be a bit noisy for the moment. The only architecture dependend choice is the type for N == 64, which may be `long` or `long long`. We just mimick the test that is done in other places to compare `UINTPTR_MAX` and `UINT64_MAX` to determine that. --- src/stdio/vfprintf.c | 28 +++++++++++++++++++++++++--- src/stdio/vfwprintf.c | 28 +++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c index cbc79783..265fb7ad 100644 --- a/src/stdio/vfprintf.c +++ b/src/stdio/vfprintf.c @@ -33,7 +33,7 @@ enum { BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, - ZTPRE, JPRE, + ZTPRE, JPRE, WPRE, STOP, PTR, INT, UINT, ULLONG, LONG, ULONG, @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = { S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR, S('m') = NOARG, S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, - S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, + S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE, }, { /* 1: l-prefixed */ S('b') = ULONG, S('B') = ULONG, S('d') = LONG, S('i') = LONG, @@ -101,6 +101,12 @@ static const unsigned char states[]['z'-'A'+1] = { S('o') = UMAX, S('u') = UMAX, S('x') = UMAX, S('X') = UMAX, S('n') = PTR, + }, { /* 8: w-prefixed */ + S('b') = UINT, S('B') = UINT, + S('d') = INT, S('i') = INT, + S('o') = UINT, S('u') = UINT, + S('x') = UINT, S('X') = UINT, + S('n') = PTR, } }; @@ -447,7 +453,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, int w, p, xp; union arg arg; int argpos; - unsigned st, ps; + unsigned st, ps, width=0; int cnt=0, l=0; size_t i; char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4]; @@ -527,9 +533,25 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, if (OOB(*s)) goto inval; ps=st; st=states[st]S(*s++); + if (st == WPRE) { + if (*s == '0') goto inval; + width = getint(&s); + } } while (st-1<STOP); if (!st) goto inval; + if (ps == WPRE) switch (width) { + case 8: ps = HHPRE; st = (st == UINT) ? UCHAR : ((st == INT) ? CHAR : PTR); break; + case 16: ps = HPRE; st = (st == UINT) ? USHORT : ((st == INT) ? SHORT : PTR); break; + case 32: ps = BARE; break; +#if UINTPTR_MAX >= UINT64_MAX + case 64: ps = LPRE; st = (st == UINT) ? ULONG : ((st == INT) ? LONG : PTR); break; +#else + case 64: ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break; +#endif + default: goto inval; + } + /* Check validity of argument type (nl/normal) */ if (st==NOARG) { if (argpos>=0) goto inval; diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c index dbc93f74..c3e81d2a 100644 --- a/src/stdio/vfwprintf.c +++ b/src/stdio/vfwprintf.c @@ -26,7 +26,7 @@ enum { BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, - ZTPRE, JPRE, + ZTPRE, JPRE, WPRE, STOP, PTR, INT, UINT, ULLONG, LONG, ULONG, @@ -50,7 +50,7 @@ static const unsigned char states[]['z'-'A'+1] = { S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR, S('m') = NOARG, S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, - S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, + S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE, }, { /* 1: l-prefixed */ S('b') = ULONG, S('B') = ULONG, S('d') = LONG, S('i') = LONG, @@ -94,6 +94,12 @@ static const unsigned char states[]['z'-'A'+1] = { S('o') = UMAX, S('u') = UMAX, S('x') = UMAX, S('X') = UMAX, S('n') = PTR, + }, { /* 8: w-prefixed */ + S('b') = UINT, S('B') = UINT, + S('d') = INT, S('i') = INT, + S('o') = UINT, S('u') = UINT, + S('x') = UINT, S('X') = UINT, + S('n') = PTR, } }; @@ -163,7 +169,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_ int w, p, xp; union arg arg; int argpos; - unsigned st, ps; + unsigned st, ps, width=0; int cnt=0, l=0; int i; int t; @@ -242,9 +248,25 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_ if (OOB(*s)) goto inval; ps=st; st=states[st]S(*s++); + if (st == WPRE) { + if (*s == L'0') goto inval; + width = getint(&s); + } } while (st-1<STOP); if (!st) goto inval; + if (ps == WPRE) switch (width) { + case 8: ps = HHPRE; st = (st == UINT) ? UCHAR : ((st == INT) ? CHAR : PTR); break; + case 16: ps = HPRE; st = (st == UINT) ? USHORT : ((st == INT) ? SHORT : PTR); break; + case 32: ps = BARE; break; +#if UINTPTR_MAX >= UINT64_MAX + case 64: ps = LPRE; st = (st == UINT) ? ULONG : ((st == INT) ? LONG : PTR); break; +#else + case 64: ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break; +#endif + default: goto inval; + } + /* Check validity of argument type (nl/normal) */ if (st==NOARG) { if (argpos>=0) goto inval; -- 2.34.1 ^ permalink raw reply [flat|nested] 23+ messages in thread
* [musl] [C23 printf 0/3] @ 2023-05-26 19:41 Jens Gustedt 2023-05-26 19:41 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt 0 siblings, 1 reply; 23+ messages in thread From: Jens Gustedt @ 2023-05-26 19:41 UTC (permalink / raw) To: musl The changes that are mandatory for printf, excluding the 128 bit parts for the moment. Caution: this is not yet merged with the patch for the "b" specifier that was already circulating. Jens Gustedt (3): C23: implement the b and B printf specifiers C23: implement the wN length specifiers for printf C23: implement the wfN length modifiers for printf include/inttypes.h | 34 +++++++++++++++++++++++++++++++++ src/stdio/vfprintf.c | 44 ++++++++++++++++++++++++++++++++++++++++--- src/stdio/vfwprintf.c | 36 +++++++++++++++++++++++++++++++---- 3 files changed, 107 insertions(+), 7 deletions(-) -- 2.34.1 ^ permalink raw reply [flat|nested] 23+ messages in thread
* [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-26 19:41 [musl] [C23 printf 0/3] Jens Gustedt @ 2023-05-26 19:41 ` Jens Gustedt 2023-05-26 20:31 ` Rich Felker 0 siblings, 1 reply; 23+ messages in thread From: Jens Gustedt @ 2023-05-26 19:41 UTC (permalink / raw) To: musl These are mandatory for C23 and concern all types for which the platform has `int_leastN_t` and `uint_leastN_t`. For musl these types always coincide with `intN_t` and `uintN_t` and are always present for N equal 8, 16, 32 and 64. They can be added for general use since all lowercase letters were previously reserved. Nevertheless, users that use these modifiers will see a lot of warnings from compilers in the beginning. This is because the compilers have not yet integrated this form of a specifier into their correponding extensions (gcc attributes). So unfortunately also testing this feature may be a bit noisy for the moment. The only architecture dependend choice is the type for N == 64, which may be `long` or `long long`. We just mimick the test that is done in other places to compare `UINTPTR_MAX` and `UINT64_MAX` to determine that. --- src/stdio/vfprintf.c | 18 ++++++++++++++++-- src/stdio/vfwprintf.c | 18 ++++++++++++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c index cbc79783..1a516663 100644 --- a/src/stdio/vfprintf.c +++ b/src/stdio/vfprintf.c @@ -33,7 +33,7 @@ enum { BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, - ZTPRE, JPRE, + ZTPRE, JPRE, WPRE, STOP, PTR, INT, UINT, ULLONG, LONG, ULONG, @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = { S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR, S('m') = NOARG, S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, - S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, + S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE, }, { /* 1: l-prefixed */ S('b') = ULONG, S('B') = ULONG, S('d') = LONG, S('i') = LONG, @@ -525,8 +525,22 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, st=0; do { if (OOB(*s)) goto inval; + wpre: ps=st; st=states[st]S(*s++); + if (st == WPRE) { + switch (getint(&s)) { + case 8: st = HHPRE; goto wpre; + case 16: st = HPRE; goto wpre; + case 32: st = BARE; goto wpre; +#if UINTPTR_MAX >= UINT64_MAX + case 64: st = LPRE; goto wpre; +#else + case 64: st = LLPRE; goto wpre; +#endif + default: goto inval; + } + } } while (st-1<STOP); if (!st) goto inval; diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c index dbc93f74..4320761a 100644 --- a/src/stdio/vfwprintf.c +++ b/src/stdio/vfwprintf.c @@ -26,7 +26,7 @@ enum { BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, - ZTPRE, JPRE, + ZTPRE, JPRE, WPRE, STOP, PTR, INT, UINT, ULLONG, LONG, ULONG, @@ -50,7 +50,7 @@ static const unsigned char states[]['z'-'A'+1] = { S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR, S('m') = NOARG, S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, - S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, + S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE, }, { /* 1: l-prefixed */ S('b') = ULONG, S('B') = ULONG, S('d') = LONG, S('i') = LONG, @@ -240,8 +240,22 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_ st=0; do { if (OOB(*s)) goto inval; + wpre: ps=st; st=states[st]S(*s++); + if (st == WPRE) { + switch (getint(&s)) { + case 8: st = HHPRE; goto wpre; + case 16: st = HPRE; goto wpre; + case 32: st = BARE; goto wpre; +#if UINTPTR_MAX >= UINT64_MAX + case 64: st = LPRE; goto wpre; +#else + case 64: st = LLPRE; goto wpre; +#endif + default: goto inval; + } + } } while (st-1<STOP); if (!st) goto inval; -- 2.34.1 ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-26 19:41 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt @ 2023-05-26 20:31 ` Rich Felker 2023-05-26 20:51 ` Jₑₙₛ Gustedt 0 siblings, 1 reply; 23+ messages in thread From: Rich Felker @ 2023-05-26 20:31 UTC (permalink / raw) To: Jens Gustedt; +Cc: musl On Fri, May 26, 2023 at 09:41:03PM +0200, Jens Gustedt wrote: > These are mandatory for C23 and concern all types for which the > platform has `int_leastN_t` and `uint_leastN_t`. For musl these types > always coincide with `intN_t` and `uintN_t` and are always present for > N equal 8, 16, 32 and 64. > > They can be added for general use since all lowercase letters were > previously reserved. > > Nevertheless, users that use these modifiers will see a lot of > warnings from compilers in the beginning. This is because the > compilers have not yet integrated this form of a specifier into their > correponding extensions (gcc attributes). So unfortunately also > testing this feature may be a bit noisy for the moment. > > The only architecture dependend choice is the type for N == 64, which > may be `long` or `long long`. We just mimick the test that is done in > other places to compare `UINTPTR_MAX` and `UINT64_MAX` to determine > that. > --- > src/stdio/vfprintf.c | 18 ++++++++++++++++-- > src/stdio/vfwprintf.c | 18 ++++++++++++++++-- > 2 files changed, 32 insertions(+), 4 deletions(-) > > diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c > index cbc79783..1a516663 100644 > --- a/src/stdio/vfprintf.c > +++ b/src/stdio/vfprintf.c > @@ -33,7 +33,7 @@ > > enum { > BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, > - ZTPRE, JPRE, > + ZTPRE, JPRE, WPRE, > STOP, > PTR, INT, UINT, ULLONG, > LONG, ULONG, > @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = { > S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR, > S('m') = NOARG, > S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, > - S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, > + S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, S('w') = WPRE, > }, { /* 1: l-prefixed */ > S('b') = ULONG, S('B') = ULONG, > S('d') = LONG, S('i') = LONG, > @@ -525,8 +525,22 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, > st=0; > do { > if (OOB(*s)) goto inval; > + wpre: > ps=st; > st=states[st]S(*s++); > + if (st == WPRE) { > + switch (getint(&s)) { > + case 8: st = HHPRE; goto wpre; > + case 16: st = HPRE; goto wpre; > + case 32: st = BARE; goto wpre; > +#if UINTPTR_MAX >= UINT64_MAX > + case 64: st = LPRE; goto wpre; > +#else > + case 64: st = LLPRE; goto wpre; > +#endif > + default: goto inval; > + } > + } > } while (st-1<STOP); > if (!st) goto inval; I don't see how this works. While you're in this new WPRE state, you're accesing an element of the states[] array with a potentially out-of-bounds index, because you skipped over the bounds check to ensure that the index is valid. I'm not clear why you're doing that instead of just continuing the loop. My preference would be not adding any code at all here and using the existing state machine, adding state transitions for the new prefixes to it, but that would require expanding the states stable to start at '1' instead of 'A', and to have a couple more intermediate states. I'm not sure how large that would get. There's a good chance it's comparable to the size of any added code, though. One minor thing with the implementation using getint(): it accepts leading zeros, which are not valid here. Rich ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-26 20:31 ` Rich Felker @ 2023-05-26 20:51 ` Jₑₙₛ Gustedt 2023-05-26 21:03 ` Rich Felker 0 siblings, 1 reply; 23+ messages in thread From: Jₑₙₛ Gustedt @ 2023-05-26 20:51 UTC (permalink / raw) To: Rich Felker; +Cc: musl [-- Attachment #1: Type: text/plain, Size: 4331 bytes --] Rich, on Fri, 26 May 2023 16:31:07 -0400 you (Rich Felker <dalias@libc.org>) wrote: > On Fri, May 26, 2023 at 09:41:03PM +0200, Jens Gustedt wrote: > > These are mandatory for C23 and concern all types for which the > > platform has `int_leastN_t` and `uint_leastN_t`. For musl these > > types always coincide with `intN_t` and `uintN_t` and are always > > present for N equal 8, 16, 32 and 64. > > > > They can be added for general use since all lowercase letters were > > previously reserved. > > > > Nevertheless, users that use these modifiers will see a lot of > > warnings from compilers in the beginning. This is because the > > compilers have not yet integrated this form of a specifier into > > their correponding extensions (gcc attributes). So unfortunately > > also testing this feature may be a bit noisy for the moment. > > > > The only architecture dependend choice is the type for N == 64, > > which may be `long` or `long long`. We just mimick the test that is > > done in other places to compare `UINTPTR_MAX` and `UINT64_MAX` to > > determine that. > > --- > > src/stdio/vfprintf.c | 18 ++++++++++++++++-- > > src/stdio/vfwprintf.c | 18 ++++++++++++++++-- > > 2 files changed, 32 insertions(+), 4 deletions(-) > > > > diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c > > index cbc79783..1a516663 100644 > > --- a/src/stdio/vfprintf.c > > +++ b/src/stdio/vfprintf.c > > @@ -33,7 +33,7 @@ > > > > enum { > > BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, > > - ZTPRE, JPRE, > > + ZTPRE, JPRE, WPRE, > > STOP, > > PTR, INT, UINT, ULLONG, > > LONG, ULONG, > > @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = { > > S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') > > = PTR, S('m') = NOARG, > > S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, > > - S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, > > + S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, > > S('w') = WPRE, }, { /* 1: l-prefixed */ > > S('b') = ULONG, S('B') = ULONG, > > S('d') = LONG, S('i') = LONG, > > @@ -525,8 +525,22 @@ static int printf_core(FILE *f, const char > > *fmt, va_list *ap, union arg *nl_arg, st=0; > > do { > > if (OOB(*s)) goto inval; > > + wpre: > > ps=st; > > st=states[st]S(*s++); > > + if (st == WPRE) { > > + switch (getint(&s)) { > > + case 8: st = HHPRE; goto wpre; > > + case 16: st = HPRE; goto wpre; > > + case 32: st = BARE; goto wpre; > > +#if UINTPTR_MAX >= UINT64_MAX > > + case 64: st = LPRE; goto wpre; > > +#else > > + case 64: st = LLPRE; goto wpre; > > +#endif > > + default: goto inval; > > + } > > + } > > } while (st-1<STOP); > > if (!st) goto inval; > > I don't see how this works. While you're in this new WPRE state, > you're accesing an element of the states[] array with a potentially > out-of-bounds index, because you skipped over the bounds check to > ensure that the index is valid. ah, ok, the `wpre` should probably move up a line. > I'm not clear why you're doing that > instead of just continuing the loop. > > My preference would be not adding any code at all here and using the > existing state machine, adding state transitions for the new prefixes > to it, but that would require expanding the states stable to start at > '1' instead of 'A', and to have a couple more intermediate states. I'm > not sure how large that would get. There's a good chance it's > comparable to the size of any added code, though. I am not sure that I understand the alternative that you are proposing. The difficulty that lead to this, is the "BARE" state which now becomes accessible with a "w32" prefix. Then later, the "BARE" state assumes (for the stop condition of the loop) that there had not been a prefix, I think. > One minor thing with the implementation using getint(): it accepts > leading zeros, which are not valid here. right, we should catch that. Thanks Jₑₙₛ -- :: ICube :::::::::::::::::::::::::::::: deputy director :: :: Université de Strasbourg :::::::::::::::::::::: ICPS :: :: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus :: :: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 :: :: https://icube-icps.unistra.fr/index.php/Jens_Gustedt :: [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 195 bytes --] ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-26 20:51 ` Jₑₙₛ Gustedt @ 2023-05-26 21:03 ` Rich Felker 2023-05-29 7:14 ` Jₑₙₛ Gustedt 0 siblings, 1 reply; 23+ messages in thread From: Rich Felker @ 2023-05-26 21:03 UTC (permalink / raw) To: Jₑₙₛ Gustedt; +Cc: musl On Fri, May 26, 2023 at 10:51:19PM +0200, Jₑₙₛ Gustedt wrote: > Rich, > > on Fri, 26 May 2023 16:31:07 -0400 you (Rich Felker <dalias@libc.org>) > wrote: > > > On Fri, May 26, 2023 at 09:41:03PM +0200, Jens Gustedt wrote: > > > These are mandatory for C23 and concern all types for which the > > > platform has `int_leastN_t` and `uint_leastN_t`. For musl these > > > types always coincide with `intN_t` and `uintN_t` and are always > > > present for N equal 8, 16, 32 and 64. > > > > > > They can be added for general use since all lowercase letters were > > > previously reserved. > > > > > > Nevertheless, users that use these modifiers will see a lot of > > > warnings from compilers in the beginning. This is because the > > > compilers have not yet integrated this form of a specifier into > > > their correponding extensions (gcc attributes). So unfortunately > > > also testing this feature may be a bit noisy for the moment. > > > > > > The only architecture dependend choice is the type for N == 64, > > > which may be `long` or `long long`. We just mimick the test that is > > > done in other places to compare `UINTPTR_MAX` and `UINT64_MAX` to > > > determine that. > > > --- > > > src/stdio/vfprintf.c | 18 ++++++++++++++++-- > > > src/stdio/vfwprintf.c | 18 ++++++++++++++++-- > > > 2 files changed, 32 insertions(+), 4 deletions(-) > > > > > > diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c > > > index cbc79783..1a516663 100644 > > > --- a/src/stdio/vfprintf.c > > > +++ b/src/stdio/vfprintf.c > > > @@ -33,7 +33,7 @@ > > > > > > enum { > > > BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, > > > - ZTPRE, JPRE, > > > + ZTPRE, JPRE, WPRE, > > > STOP, > > > PTR, INT, UINT, ULLONG, > > > LONG, ULONG, > > > @@ -57,7 +57,7 @@ static const unsigned char states[]['z'-'A'+1] = { > > > S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') > > > = PTR, S('m') = NOARG, > > > S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, > > > - S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, > > > + S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE, > > > S('w') = WPRE, }, { /* 1: l-prefixed */ > > > S('b') = ULONG, S('B') = ULONG, > > > S('d') = LONG, S('i') = LONG, > > > @@ -525,8 +525,22 @@ static int printf_core(FILE *f, const char > > > *fmt, va_list *ap, union arg *nl_arg, st=0; > > > do { > > > if (OOB(*s)) goto inval; > > > + wpre: > > > ps=st; > > > st=states[st]S(*s++); > > > + if (st == WPRE) { > > > + switch (getint(&s)) { > > > + case 8: st = HHPRE; goto wpre; > > > + case 16: st = HPRE; goto wpre; > > > + case 32: st = BARE; goto wpre; > > > +#if UINTPTR_MAX >= UINT64_MAX > > > + case 64: st = LPRE; goto wpre; > > > +#else > > > + case 64: st = LLPRE; goto wpre; > > > +#endif > > > + default: goto inval; > > > + } > > > + } > > > } while (st-1<STOP); > > > if (!st) goto inval; > > > > I don't see how this works. While you're in this new WPRE state, > > you're accesing an element of the states[] array with a potentially > > out-of-bounds index, because you skipped over the bounds check to > > ensure that the index is valid. > > ah, ok, the `wpre` should probably move up a line. OK, I was thinking you just want a break, but you're trying to avoid a transition to BARE state getting treated as invalid by the loop condition. However, the underlying problem here is that you can't reuse the BARE state for something that's not BARE. If you do, formats like %w32w32w32w32w32w32d or %w32lld or even %w32f will be allowed. I think you need an extra state that's "plain but not bare" that duplicates only the integer transitions out of it, like the l, ll, etc. prefix states do. > > I'm not clear why you're doing that > > instead of just continuing the loop. > > > > My preference would be not adding any code at all here and using the > > existing state machine, adding state transitions for the new prefixes > > to it, but that would require expanding the states stable to start at > > '1' instead of 'A', and to have a couple more intermediate states. I'm > > not sure how large that would get. There's a good chance it's > > comparable to the size of any added code, though. > > I am not sure that I understand the alternative that you are proposing. > > The difficulty that lead to this, is the "BARE" state which now > becomes accessible with a "w32" prefix. Then later, the "BARE" state > assumes (for the stop condition of the loop) that there had not been a > prefix, I think. Yes, but I don't think you've solved that. It should be solvable as above. Rich ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-26 21:03 ` Rich Felker @ 2023-05-29 7:14 ` Jₑₙₛ Gustedt 2023-05-29 15:46 ` Rich Felker 0 siblings, 1 reply; 23+ messages in thread From: Jₑₙₛ Gustedt @ 2023-05-29 7:14 UTC (permalink / raw) To: Rich Felker; +Cc: musl [-- Attachment #1: Type: text/plain, Size: 962 bytes --] Rich, on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker <dalias@libc.org>) wrote: > I think you need an extra state that's "plain but not bare" that > duplicates only the integer transitions out of it, like the l, ll, > etc. prefix states do. Hm, the problem is that for the other prefixes the table entries then encode the concrete type that is to be expected. We could not do this here because the type depends on the requested width. So we would then need to "repair" that type after the loop. A `switch` to do that would look substantially similar to what is there, now. Do you think that would be better? Thanks Jₑₙₛ -- :: ICube :::::::::::::::::::::::::::::: deputy director :: :: Université de Strasbourg :::::::::::::::::::::: ICPS :: :: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus :: :: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 :: :: https://icube-icps.unistra.fr/index.php/Jens_Gustedt :: [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 195 bytes --] ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-29 7:14 ` Jₑₙₛ Gustedt @ 2023-05-29 15:46 ` Rich Felker 2023-05-29 19:21 ` Jₑₙₛ Gustedt 0 siblings, 1 reply; 23+ messages in thread From: Rich Felker @ 2023-05-29 15:46 UTC (permalink / raw) To: Jₑₙₛ Gustedt; +Cc: musl [-- Attachment #1: Type: text/plain, Size: 856 bytes --] On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote: > Rich, > > on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker <dalias@libc.org>) > wrote: > > > I think you need an extra state that's "plain but not bare" that > > duplicates only the integer transitions out of it, like the l, ll, > > etc. prefix states do. > > Hm, the problem is that for the other prefixes the table entries then > encode the concrete type that is to be expected. We could not do this > here because the type depends on the requested width. So we would then > need to "repair" that type after the loop. A `switch` to do that would > look substantially similar to what is there, now. Do you think that > would be better? OK I think I can communicate better with code than natural language text, so here's a diff, completely untested, of what I had in mind. Rich [-- Attachment #2: printf-wprefix.diff --] [-- Type: text/plain, Size: 1690 bytes --] diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c index a712d80f..a751fbdf 100644 --- a/src/stdio/vfprintf.c +++ b/src/stdio/vfprintf.c @@ -33,7 +33,7 @@ enum { BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, - ZTPRE, JPRE, + ZTPRE, JPRE, PLAIN, WPRE, W1PRE, W3PRE, W6PRE, STOP, PTR, INT, UINT, ULLONG, LONG, ULONG, @@ -44,9 +44,10 @@ enum { MAXSTATE }; -#define S(x) [(x)-'A'] +#define ST_BASE '1' +#define S(x) [(x)-ST_BASE] -static const unsigned char states[]['z'-'A'+1] = { +static const unsigned char states[]['z'-ST_BASE+1] = { { /* 0: bare types */ S('d') = INT, S('i') = INT, S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT, @@ -94,10 +95,24 @@ static const unsigned char states[]['z'-'A'+1] = { S('o') = UMAX, S('u') = UMAX, S('x') = UMAX, S('X') = UMAX, S('n') = PTR, + }, { /* 8: explicit-width-prefixed bare equivalent */ + S('d') = INT, S('i') = INT, + S('o') = UINT, S('u') = UINT, + S('x') = UINT, S('X') = UINT, + S('n') = PTR, + }, { /* 9: w-prefixed */ + S('1') = W1PRE, S('3') = W3PRE, + S('6') = W6PRE, S('8') = HHPRE, + }, { /* 10: w1-prefixed */ + S('6') = HPRE, + }, { /* 11: w3-prefixed */ + S('2') = PLAIN, + }, { /* 12: w6-prefixed */ + S('4') = LLONG_MAX > LONG_MAX ? LLPRE : LPRE, } }; -#define OOB(x) ((unsigned)(x)-'A' > 'z'-'A') +#define OOB(x) ((unsigned)(x)-ST_BASE > 'z'-ST_BASE) union arg { @@ -547,6 +562,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, switch(t) { case 'n': switch(ps) { + case PLAIN: case BARE: *(int *)arg.p = cnt; break; case LPRE: *(long *)arg.p = cnt; break; case LLPRE: *(long long *)arg.p = cnt; break; ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-29 15:46 ` Rich Felker @ 2023-05-29 19:21 ` Jₑₙₛ Gustedt 2023-05-30 1:48 ` Rich Felker 0 siblings, 1 reply; 23+ messages in thread From: Jₑₙₛ Gustedt @ 2023-05-29 19:21 UTC (permalink / raw) To: Rich Felker; +Cc: musl [-- Attachment #1: Type: text/plain, Size: 1692 bytes --] Rich, on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker <dalias@libc.org>) wrote: > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote: > > Rich, > > > > on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker > > <dalias@libc.org>) wrote: > > > > > I think you need an extra state that's "plain but not bare" that > > > duplicates only the integer transitions out of it, like the l, ll, > > > etc. prefix states do. > > > > Hm, the problem is that for the other prefixes the table entries > > then encode the concrete type that is to be expected. We could not > > do this here because the type depends on the requested width. So we > > would then need to "repair" that type after the loop. A `switch` to > > do that would look substantially similar to what is there, now. Do > > you think that would be better? > > OK I think I can communicate better with code than natural language > text, so here's a diff, completely untested, of what I had in mind. that's ... ugh ... not so prety, I think In my current version I track the desired width, if there is w specifier, and then do the adjustments after the loop. That takes indeed care of undefined character sequences. I find that much better readable, and also easier to extend (later there comes the `wf` case and the `128`, and perhaps some day `256`) Jₑₙₛ -- :: ICube :::::::::::::::::::::::::::::: deputy director :: :: Université de Strasbourg :::::::::::::::::::::: ICPS :: :: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus :: :: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 :: :: https://icube-icps.unistra.fr/index.php/Jens_Gustedt :: [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 195 bytes --] ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-29 19:21 ` Jₑₙₛ Gustedt @ 2023-05-30 1:48 ` Rich Felker 2023-05-30 6:46 ` Jₑₙₛ Gustedt 2023-06-02 14:29 ` Rich Felker 0 siblings, 2 replies; 23+ messages in thread From: Rich Felker @ 2023-05-30 1:48 UTC (permalink / raw) To: Jₑₙₛ Gustedt; +Cc: musl [-- Attachment #1: Type: text/plain, Size: 2279 bytes --] On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote: > Rich, > > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker <dalias@libc.org>) > wrote: > > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote: > > > Rich, > > > > > > on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker > > > <dalias@libc.org>) wrote: > > > > > > > I think you need an extra state that's "plain but not bare" that > > > > duplicates only the integer transitions out of it, like the l, ll, > > > > etc. prefix states do. > > > > > > Hm, the problem is that for the other prefixes the table entries > > > then encode the concrete type that is to be expected. We could not > > > do this here because the type depends on the requested width. So we > > > would then need to "repair" that type after the loop. A `switch` to > > > do that would look substantially similar to what is there, now. Do > > > you think that would be better? > > > > OK I think I can communicate better with code than natural language > > text, so here's a diff, completely untested, of what I had in mind. > > that's ... ugh ... not so prety, I think > > In my current version I track the desired width, if there is w > specifier, and then do the adjustments after the loop. That takes > indeed care of undefined character sequences. > > I find that much better readable, and also easier to extend (later > there comes the `wf` case and the `128`, and perhaps some day `256`) It sounds like the core issue is that you don't like the state machine approach to how musl's printf processes format specifiers. Personally, I like it because there's an obvious structured way to validate that it's accepting exactly the right things and nothing else, vs an approach like what you tried where you ended up accepting a lot of bogus specifiers. One alternative I would consider is doing something like what you did, but moving it outside of/before the state machine loop, so it's not mixing the w* processing with the state machine. This avoids accepting bogus repeated w32 prefixes and similar (because there is no loop) and lets you get by with just adding one PLAIN state to have it start in (rather than BARE) after w32. I expect the overall size would be similar. Concept attached. Rich [-- Attachment #2: printf-wprefix-alt.diff --] [-- Type: text/plain, Size: 1829 bytes --] diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c index a712d80f..a9e4d638 100644 --- a/src/stdio/vfprintf.c +++ b/src/stdio/vfprintf.c @@ -33,7 +33,7 @@ enum { BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, - ZTPRE, JPRE, + ZTPRE, JPRE, PLAIN, STOP, PTR, INT, UINT, ULLONG, LONG, ULONG, @@ -44,9 +44,10 @@ enum { MAXSTATE }; -#define S(x) [(x)-'A'] +#define ST_BASE 'A' +#define S(x) [(x)-ST_BASE] -static const unsigned char states[]['z'-'A'+1] = { +static const unsigned char states[]['z'-ST_BASE+1] = { { /* 0: bare types */ S('d') = INT, S('i') = INT, S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT, @@ -94,10 +95,15 @@ static const unsigned char states[]['z'-'A'+1] = { S('o') = UMAX, S('u') = UMAX, S('x') = UMAX, S('X') = UMAX, S('n') = PTR, + }, { /* 8: explicit-width-prefixed bare equivalent */ + S('d') = INT, S('i') = INT, + S('o') = UINT, S('u') = UINT, + S('x') = UINT, S('X') = UINT, + S('n') = PTR, } }; -#define OOB(x) ((unsigned)(x)-'A' > 'z'-'A') +#define OOB(x) ((unsigned)(x)-ST_BASE > 'z'-ST_BASE) union arg { @@ -510,6 +516,13 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, /* Format specifier state machine */ st=0; + if (*s=='w' && s[1]-'1'<9u) switch (getint(&s)) { + case 8: st = HHPRE; break; + case 16: st = HPRE; break; + case 32: st = PLAIN; break; + case 64: st = LLONG_MAX > LONG_MAX ? LLPRE : LPRE; break; + default: goto inval; + } do { if (OOB(*s)) goto inval; ps=st; @@ -547,6 +560,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, switch(t) { case 'n': switch(ps) { + case PLAIN: case BARE: *(int *)arg.p = cnt; break; case LPRE: *(long *)arg.p = cnt; break; case LLPRE: *(long long *)arg.p = cnt; break; ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-30 1:48 ` Rich Felker @ 2023-05-30 6:46 ` Jₑₙₛ Gustedt 2023-05-30 17:28 ` Rich Felker 2023-06-02 14:29 ` Rich Felker 1 sibling, 1 reply; 23+ messages in thread From: Jₑₙₛ Gustedt @ 2023-05-30 6:46 UTC (permalink / raw) To: Rich Felker; +Cc: musl [-- Attachment #1: Type: text/plain, Size: 2567 bytes --] Rich, on Mon, 29 May 2023 21:48:22 -0400 you (Rich Felker <dalias@libc.org>) wrote: > On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote: > > Rich, > > > > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker > > <dalias@libc.org>) wrote: > > > > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote: > [...] > [...] > [...] > > > > > > OK I think I can communicate better with code than natural > > > language text, so here's a diff, completely untested, of what I > > > had in mind. > > > > that's ... ugh ... not so prety, I think > > > > In my current version I track the desired width, if there is w > > specifier, and then do the adjustments after the loop. That takes > > indeed care of undefined character sequences. > > > > I find that much better readable, and also easier to extend (later > > there comes the `wf` case and the `128`, and perhaps some day > > `256`) > > It sounds like the core issue is that you don't like the state machine > approach to how musl's printf processes format specifiers. It is well suited for simple grammars, I agree with that, but here the grammar is becomming more complex. Be it just for the fact that you'd have to enlargen the set of possible values to match decimal digits. > Personally, > I like it because there's an obvious structured way to validate that > it's accepting exactly the right things and nothing else, vs an > approach like what you tried where you ended up accepting a lot of > bogus specifiers. > > One alternative I would consider is doing something like what you did, > but moving it outside of/before the state machine loop, so it's not > mixing the w* processing with the state machine. This avoids accepting > bogus repeated w32 prefixes and similar (because there is no loop) and > lets you get by with just adding one PLAIN state to have it start in > (rather than BARE) after w32. I expect the overall size would be > similar. Concept attached. I'll post what I have in a minute. It has the advantage over yours that it doesn't do the switch on the width inside the automaton and also that it doesn't have to increase the rows of the matrix. Thanks Jₑₙₛ -- :: ICube :::::::::::::::::::::::::::::: deputy director :: :: Université de Strasbourg :::::::::::::::::::::: ICPS :: :: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus :: :: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 :: :: https://icube-icps.unistra.fr/index.php/Jens_Gustedt :: [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 195 bytes --] ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-30 6:46 ` Jₑₙₛ Gustedt @ 2023-05-30 17:28 ` Rich Felker 2023-05-30 18:00 ` Rich Felker 2023-05-31 7:59 ` Jₑₙₛ Gustedt 0 siblings, 2 replies; 23+ messages in thread From: Rich Felker @ 2023-05-30 17:28 UTC (permalink / raw) To: Jₑₙₛ Gustedt; +Cc: musl On Tue, May 30, 2023 at 08:46:36AM +0200, Jₑₙₛ Gustedt wrote: > Rich, > > on Mon, 29 May 2023 21:48:22 -0400 you (Rich Felker <dalias@libc.org>) > wrote: > > > On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote: > > > Rich, > > > > > > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker > > > <dalias@libc.org>) wrote: > > > > > > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote: > > [...] > > [...] > > [...] > > > > > > > > OK I think I can communicate better with code than natural > > > > language text, so here's a diff, completely untested, of what I > > > > had in mind. > > > > > > that's ... ugh ... not so prety, I think > > > > > > In my current version I track the desired width, if there is w > > > specifier, and then do the adjustments after the loop. That takes > > > indeed care of undefined character sequences. > > > > > > I find that much better readable, and also easier to extend (later > > > there comes the `wf` case and the `128`, and perhaps some day > > > `256`) > > > > It sounds like the core issue is that you don't like the state machine > > approach to how musl's printf processes format specifiers. > > It is well suited for simple grammars, I agree with that, but here the > grammar is becomming more complex. Be it just for the fact that you'd > have to enlargen the set of possible values to match decimal digits. I don't think it's really any more complex. It's just a few gratuitous aliases that have a very small number of edge paths. The wf ones almost entirely collapse with the w ones, and if we wanted to get rid of the gratuitous separate hh/h loading, they'd entirely collapse. But the version I posted as code is probably enough smaller to be perferable. I guess I should take a look at that and see... > > Personally, > > I like it because there's an obvious structured way to validate that > > it's accepting exactly the right things and nothing else, vs an > > approach like what you tried where you ended up accepting a lot of > > bogus specifiers. > > > > One alternative I would consider is doing something like what you did, > > but moving it outside of/before the state machine loop, so it's not > > mixing the w* processing with the state machine. This avoids accepting > > bogus repeated w32 prefixes and similar (because there is no loop) and > > lets you get by with just adding one PLAIN state to have it start in > > (rather than BARE) after w32. I expect the overall size would be > > similar. Concept attached. > > I'll post what I have in a minute. It has the advantage over yours > that it doesn't do the switch on the width inside the automaton and > also that it doesn't have to increase the rows of the matrix. I don't understand how that's supposed to be an improvement. It duplicates the state machine logic for the final types with a bunch of conditionals in code (mainly to handle %n vs integer specifiers) rather than letting the state machine spit out the right type as its final state naturally. Mine didn't have any switch inside the automaton. It did it before the automaton processing (since the 'w' can only appear at the beginning, it can be thought of as just setting the initial state). Rich ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-30 17:28 ` Rich Felker @ 2023-05-30 18:00 ` Rich Felker 2023-05-31 7:59 ` Jₑₙₛ Gustedt 1 sibling, 0 replies; 23+ messages in thread From: Rich Felker @ 2023-05-30 18:00 UTC (permalink / raw) To: Jₑₙₛ Gustedt; +Cc: musl On Tue, May 30, 2023 at 01:28:33PM -0400, Rich Felker wrote: > On Tue, May 30, 2023 at 08:46:36AM +0200, Jₑₙₛ Gustedt wrote: > > Rich, > > > > on Mon, 29 May 2023 21:48:22 -0400 you (Rich Felker <dalias@libc.org>) > > wrote: > > > > > On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote: > > > > Rich, > > > > > > > > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker > > > > <dalias@libc.org>) wrote: > > > > > > > > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote: > > > [...] > > > [...] > > > [...] > > > > > > > > > > OK I think I can communicate better with code than natural > > > > > language text, so here's a diff, completely untested, of what I > > > > > had in mind. > > > > > > > > that's ... ugh ... not so prety, I think > > > > > > > > In my current version I track the desired width, if there is w > > > > specifier, and then do the adjustments after the loop. That takes > > > > indeed care of undefined character sequences. > > > > > > > > I find that much better readable, and also easier to extend (later > > > > there comes the `wf` case and the `128`, and perhaps some day > > > > `256`) > > > > > > It sounds like the core issue is that you don't like the state machine > > > approach to how musl's printf processes format specifiers. > > > > It is well suited for simple grammars, I agree with that, but here the > > grammar is becomming more complex. Be it just for the fact that you'd > > have to enlargen the set of possible values to match decimal digits. > > I don't think it's really any more complex. It's just a few gratuitous > aliases that have a very small number of edge paths. The wf ones > almost entirely collapse with the w ones, and if we wanted to get rid > of the gratuitous separate hh/h loading, they'd entirely collapse. But > the version I posted as code is probably enough smaller to be > perferable. I guess I should take a look at that and see... Ah, now I remember why we handle h/hh despite them seeing useless. They're needed for %n, and once you distinguish them for that, there's hardly any point in trying to treat them the same as bare elsewhere. Rich ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-30 17:28 ` Rich Felker 2023-05-30 18:00 ` Rich Felker @ 2023-05-31 7:59 ` Jₑₙₛ Gustedt 1 sibling, 0 replies; 23+ messages in thread From: Jₑₙₛ Gustedt @ 2023-05-31 7:59 UTC (permalink / raw) To: Rich Felker; +Cc: musl [-- Attachment #1: Type: text/plain, Size: 1736 bytes --] Rich, on Tue, 30 May 2023 13:28:33 -0400 you (Rich Felker <dalias@libc.org>) wrote: > On Tue, May 30, 2023 at 08:46:36AM +0200, Jₑₙₛ Gustedt wrote: > > Rich, > > > > on Mon, 29 May 2023 21:48:22 -0400 you (Rich Felker > > <dalias@libc.org>) wrote: > > > > > On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote: > [...] > [...] > > > [...] > > > [...] > > > [...] > [...] > [...] > > > > > > It sounds like the core issue is that you don't like the state > > > machine approach to how musl's printf processes format > > > specifiers. > > > > It is well suited for simple grammars, I agree with that, but here > > the grammar is becomming more complex. Be it just for the fact that > > you'd have to enlargen the set of possible values to match decimal > > digits. > > I don't think it's really any more complex. … It seems that we don't converge for this one. So it would probably better if you do a complete patch for this that satisfies you personally. I need the complete functionality to build on my 128 bit patches and to have a complete C23 system for my tests. So I will keep my variant in my series for the moment, and rebase on top of yours once that is ready and tested. (Same for `memset` by the way or any other patch that you guys think can be implemented better than I have foreseen.) Thanks Jₑₙₛ -- :: ICube :::::::::::::::::::::::::::::: deputy director :: :: Université de Strasbourg :::::::::::::::::::::: ICPS :: :: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus :: :: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 :: :: https://icube-icps.unistra.fr/index.php/Jens_Gustedt :: [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 195 bytes --] ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-05-30 1:48 ` Rich Felker 2023-05-30 6:46 ` Jₑₙₛ Gustedt @ 2023-06-02 14:29 ` Rich Felker 2023-06-02 14:55 ` Jₑₙₛ Gustedt 1 sibling, 1 reply; 23+ messages in thread From: Rich Felker @ 2023-06-02 14:29 UTC (permalink / raw) To: Jₑₙₛ Gustedt; +Cc: musl On Mon, May 29, 2023 at 09:48:22PM -0400, Rich Felker wrote: > On Mon, May 29, 2023 at 09:21:55PM +0200, Jₑₙₛ Gustedt wrote: > > Rich, > > > > on Mon, 29 May 2023 11:46:40 -0400 you (Rich Felker <dalias@libc.org>) > > wrote: > > > > > On Mon, May 29, 2023 at 09:14:13AM +0200, Jₑₙₛ Gustedt wrote: > > > > Rich, > > > > > > > > on Fri, 26 May 2023 17:03:58 -0400 you (Rich Felker > > > > <dalias@libc.org>) wrote: > > > > > > > > > I think you need an extra state that's "plain but not bare" that > > > > > duplicates only the integer transitions out of it, like the l, ll, > > > > > etc. prefix states do. > > > > > > > > Hm, the problem is that for the other prefixes the table entries > > > > then encode the concrete type that is to be expected. We could not > > > > do this here because the type depends on the requested width. So we > > > > would then need to "repair" that type after the loop. A `switch` to > > > > do that would look substantially similar to what is there, now. Do > > > > you think that would be better? > > > > > > OK I think I can communicate better with code than natural language > > > text, so here's a diff, completely untested, of what I had in mind. > > > > that's ... ugh ... not so prety, I think > > > > In my current version I track the desired width, if there is w > > specifier, and then do the adjustments after the loop. That takes > > indeed care of undefined character sequences. > > > > I find that much better readable, and also easier to extend (later > > there comes the `wf` case and the `128`, and perhaps some day `256`) > > It sounds like the core issue is that you don't like the state machine > approach to how musl's printf processes format specifiers. Personally, > I like it because there's an obvious structured way to validate that > it's accepting exactly the right things and nothing else, vs an > approach like what you tried where you ended up accepting a lot of > bogus specifiers. > > One alternative I would consider is doing something like what you did, > but moving it outside of/before the state machine loop, so it's not > mixing the w* processing with the state machine. This avoids accepting > bogus repeated w32 prefixes and similar (because there is no loop) and > lets you get by with just adding one PLAIN state to have it start in > (rather than BARE) after w32. I expect the overall size would be > similar. Concept attached. > > Rich > diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c > index a712d80f..a9e4d638 100644 > --- a/src/stdio/vfprintf.c > +++ b/src/stdio/vfprintf.c > @@ -33,7 +33,7 @@ > > enum { > BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, > - ZTPRE, JPRE, > + ZTPRE, JPRE, PLAIN, > STOP, > PTR, INT, UINT, ULLONG, > LONG, ULONG, > @@ -44,9 +44,10 @@ enum { > MAXSTATE > }; > > -#define S(x) [(x)-'A'] > +#define ST_BASE 'A' > +#define S(x) [(x)-ST_BASE] > > -static const unsigned char states[]['z'-'A'+1] = { > +static const unsigned char states[]['z'-ST_BASE+1] = { > { /* 0: bare types */ > S('d') = INT, S('i') = INT, > S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT, > @@ -94,10 +95,15 @@ static const unsigned char states[]['z'-'A'+1] = { > S('o') = UMAX, S('u') = UMAX, > S('x') = UMAX, S('X') = UMAX, > S('n') = PTR, > + }, { /* 8: explicit-width-prefixed bare equivalent */ > + S('d') = INT, S('i') = INT, > + S('o') = UINT, S('u') = UINT, > + S('x') = UINT, S('X') = UINT, > + S('n') = PTR, > } > }; > > -#define OOB(x) ((unsigned)(x)-'A' > 'z'-'A') > +#define OOB(x) ((unsigned)(x)-ST_BASE > 'z'-ST_BASE) > > union arg > { > @@ -510,6 +516,13 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, > > /* Format specifier state machine */ > st=0; > + if (*s=='w' && s[1]-'1'<9u) switch (getint(&s)) { > + case 8: st = HHPRE; break; > + case 16: st = HPRE; break; > + case 32: st = PLAIN; break; > + case 64: st = LLONG_MAX > LONG_MAX ? LLPRE : LPRE; break; > + default: goto inval; > + } > do { > if (OOB(*s)) goto inval; > ps=st; > @@ -547,6 +560,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, > switch(t) { > case 'n': > switch(ps) { > + case PLAIN: > case BARE: *(int *)arg.p = cnt; break; > case LPRE: *(long *)arg.p = cnt; break; > case LLPRE: *(long long *)arg.p = cnt; break; OK, some numbers. This alt version (with a couple bug fixes and support for wf added), vs the full state machine version. On i386, using this instead of the full state machine (which is still missing 'wf') adds 246 bytes of .text but saves 440 bytes of .rodata. With wf added, it will be even more of a win. So I think it makes more sense to go with this version. As an aside, since think the state table is the same for wide printf as for normal, and since wide printf already depends on normal printf, we could make the states table external (but hidden, of course) and let wide printf reuse it, for some decent size savings and elimination of source-level redundancy (DRY). Rich ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-06-02 14:29 ` Rich Felker @ 2023-06-02 14:55 ` Jₑₙₛ Gustedt 2023-06-02 15:07 ` Rich Felker 0 siblings, 1 reply; 23+ messages in thread From: Jₑₙₛ Gustedt @ 2023-06-02 14:55 UTC (permalink / raw) To: Rich Felker; +Cc: musl [-- Attachment #1: Type: text/plain, Size: 1500 bytes --] Rich, on Fri, 2 Jun 2023 10:29:37 -0400 you (Rich Felker <dalias@libc.org>) wrote: > ... > OK, some numbers. This alt version (with a couple bug fixes and > support for wf added), vs the full state machine version. On i386, > using this instead of the full state machine (which is still missing > 'wf') adds 246 bytes of .text but saves 440 bytes of .rodata. With wf > added, it will be even more of a win. So I think it makes more sense > to go with this version. Ok, sounds promissing. Some remarks - I'd still prefer to name the constant `WPRE` instead of `PLAIN` for documentation reasons. - It seems that your version doesn't capture the leading 0 case. That would still need an `if` condition that leads to `invalid`, I guess. > As an aside, since think the state table is the same for wide printf > as for normal, and since wide printf already depends on normal printf, > we could make the states table external (but hidden, of course) and > let wide printf reuse it, for some decent size savings and elimination > of source-level redundancy (DRY). Same holds for the `pop_arg` function, which is also repeated. Thanks Jₑₙₛ -- :: ICube :::::::::::::::::::::::::::::: deputy director :: :: Université de Strasbourg :::::::::::::::::::::: ICPS :: :: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus :: :: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 :: :: https://icube-icps.unistra.fr/index.php/Jens_Gustedt :: [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 195 bytes --] ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf 2023-06-02 14:55 ` Jₑₙₛ Gustedt @ 2023-06-02 15:07 ` Rich Felker 0 siblings, 0 replies; 23+ messages in thread From: Rich Felker @ 2023-06-02 15:07 UTC (permalink / raw) To: Jₑₙₛ Gustedt; +Cc: musl On Fri, Jun 02, 2023 at 04:55:47PM +0200, Jₑₙₛ Gustedt wrote: > Rich, > > on Fri, 2 Jun 2023 10:29:37 -0400 you (Rich Felker <dalias@libc.org>) > wrote: > > > ... > > > OK, some numbers. This alt version (with a couple bug fixes and > > support for wf added), vs the full state machine version. On i386, > > using this instead of the full state machine (which is still missing > > 'wf') adds 246 bytes of .text but saves 440 bytes of .rodata. With wf > > added, it will be even more of a win. So I think it makes more sense > > to go with this version. > > Ok, sounds promissing. Some remarks > > - I'd still prefer to name the constant `WPRE` instead of `PLAIN` for > documentation reasons. Well it's not used for all w prefixes, only for w32/wf32/wf16. It's really a state for "we already encountered some explicit prefix that means plain int". I could call it W32PRE or something but that seems misleading or at least incomplete too...? > - It seems that your version doesn't capture the leading 0 case. That > would still need an `if` condition that leads to `invalid`, I > guess. It only enters the processing if (*s=='w' && s[1]-'1'<9u) which is false for w0... The subsequent automaton then rejects the unknown 'w'. In the fixed up version with wf support, I separated the 2 conditions (and fixed the failure to advance s before passing it to getint), so it now just has inside the 'w' processing block: if (*++s-'1'>=9u) goto inval; I'll send the full v2 shortly. > > As an aside, since think the state table is the same for wide printf > > as for normal, and since wide printf already depends on normal printf, > > we could make the states table external (but hidden, of course) and > > let wide printf reuse it, for some decent size savings and elimination > > of source-level redundancy (DRY). > > Same holds for the `pop_arg` function, which is also repeated. Nice find, thanks! Deduplicating this requires also moving the union type to a place it can be shared, but this is reasonable I think and gives a place to declare the functions too. Rich ^ permalink raw reply [flat|nested] 23+ messages in thread
end of thread, other threads:[~2023-06-02 15:37 UTC | newest] Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- [not found] <cover.1685429467.git.Jens.Gustedt@inria.fr> 2023-05-30 6:55 ` [musl] [C23 printf 1/3] C23: implement the b and B printf specifiers Jens Gustedt 2023-05-30 6:55 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt 2023-06-02 14:38 ` Rich Felker 2023-06-02 15:09 ` Jₑₙₛ Gustedt 2023-06-02 15:16 ` Rich Felker 2023-06-02 15:37 ` Jₑₙₛ Gustedt 2023-05-30 6:55 ` [musl] [C23 printf 3/3] C23: implement the wfN length modifiers " Jens Gustedt 2023-05-31 14:04 [musl] [C23 printf 0/3] to be replaced Jens Gustedt 2023-05-31 14:04 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt -- strict thread matches above, loose matches on Subject: below -- 2023-05-26 19:41 [musl] [C23 printf 0/3] Jens Gustedt 2023-05-26 19:41 ` [musl] [C23 printf 2/3] C23: implement the wN length specifiers for printf Jens Gustedt 2023-05-26 20:31 ` Rich Felker 2023-05-26 20:51 ` Jₑₙₛ Gustedt 2023-05-26 21:03 ` Rich Felker 2023-05-29 7:14 ` Jₑₙₛ Gustedt 2023-05-29 15:46 ` Rich Felker 2023-05-29 19:21 ` Jₑₙₛ Gustedt 2023-05-30 1:48 ` Rich Felker 2023-05-30 6:46 ` Jₑₙₛ Gustedt 2023-05-30 17:28 ` Rich Felker 2023-05-30 18:00 ` Rich Felker 2023-05-31 7:59 ` Jₑₙₛ Gustedt 2023-06-02 14:29 ` Rich Felker 2023-06-02 14:55 ` Jₑₙₛ Gustedt 2023-06-02 15:07 ` Rich Felker
Code repositories for project(s) associated with this public inbox https://git.vuxu.org/mirror/musl/ This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).