[musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers

mailing list of musl libc
 help / color / mirror / code / Atom feed

* [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
@ 2022-09-08 16:36 Gabriel Ravier
  2022-09-12 13:59 ` Rich Felker
                   ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Gabriel Ravier @ 2022-09-08 16:36 UTC (permalink / raw)
  To: gravier, musl; +Cc: Gabriel Ravier

These specifiers allow for formatted input/output of binary integers,
and have been added to C2x with N2630. The uppercase B specifier is
not formally required by C2x, as only lowercase specifiers were
reserved by C, and thus an implementation could have been using
uppercase B for an extension of their own, but C2x still has a note
saying that it is recommended practice to implement it as the logical
counterpart to b.

I have tested this on:
- glibc's tests for %b and %B
- The libc testsuite I'm developing over at https://github.com/GabrielRavier/yalibct
- musl's libc-test
- musl's libc-testsuite

and observed no regressions.
---
 src/stdio/vfprintf.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index 9b961e7f..89426b72 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -49,7 +49,7 @@ enum {
 static const unsigned char states[]['z'-'A'+1] = {
 	{ /* 0: bare types */
 		S('d') = INT, S('i') = INT,
-		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
+		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT, S('b') = UINT, S('B') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
 		S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL,
 		S('c') = CHAR, S('C') = INT,
@@ -59,7 +59,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
 	}, { /* 1: l-prefixed */
 		S('d') = LONG, S('i') = LONG,
-		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
+		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG, S('b') = ULONG, S('B') = ULONG,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
 		S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL,
 		S('c') = INT, S('s') = PTR, S('n') = PTR,
@@ -68,17 +68,20 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('d') = LLONG, S('i') = LLONG,
 		S('o') = ULLONG, S('u') = ULLONG,
 		S('x') = ULLONG, S('X') = ULLONG,
+		S('b') = ULLONG, S('B') = ULLONG,
 		S('n') = PTR,
 	}, { /* 3: h-prefixed */
 		S('d') = SHORT, S('i') = SHORT,
 		S('o') = USHORT, S('u') = USHORT,
 		S('x') = USHORT, S('X') = USHORT,
+		S('b') = USHORT, S('B') = USHORT,
 		S('n') = PTR,
 		S('h') = HHPRE,
 	}, { /* 4: hh-prefixed */
 		S('d') = CHAR, S('i') = CHAR,
 		S('o') = UCHAR, S('u') = UCHAR,
 		S('x') = UCHAR, S('X') = UCHAR,
+		S('b') = UCHAR, S('B') = UCHAR,
 		S('n') = PTR,
 	}, { /* 5: L-prefixed */
 		S('e') = LDBL, S('f') = LDBL, S('g') = LDBL, S('a') = LDBL,
@@ -88,11 +91,13 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('d') = PDIFF, S('i') = PDIFF,
 		S('o') = SIZET, S('u') = SIZET,
 		S('x') = SIZET, S('X') = SIZET,
+		S('b') = SIZET, S('B') = SIZET,
 		S('n') = PTR,
 	}, { /* 7: j-prefixed */
 		S('d') = IMAX, S('i') = IMAX,
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
+		S('b') = UMAX, S('B') = UMAX,
 		S('n') = PTR,
 	}
 };
@@ -162,6 +167,12 @@ static char *fmt_o(uintmax_t x, char *s)
 	return s;
 }
 
+static char *fmt_b(uintmax_t x, char *s)
+{
+	for (; x; x>>=1) *--s = '0' + (x&1);
+	return s;
+}
+
 static char *fmt_u(uintmax_t x, char *s)
 {
 	unsigned long y;
@@ -529,7 +540,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		if (!f) continue;
 
 		z = buf + sizeof(buf);
-		prefix = "-+   0X0x";
+		prefix = "-+   0X0x0B0b";
 		pl = 0;
 		t = s[-1];
 
@@ -559,6 +570,10 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			a = fmt_x(arg.i, z, t&32);
 			if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
 			if (0) {
+		case 'b': case 'B':
+			a = fmt_b(arg.i, z);
+			if (arg.i && (fl & ALT_FORM)) prefix += 9 + ((t == 'b') << 1), pl=2;
+			} if (0) {
 		case 'o':
 			a = fmt_o(arg.i, z);
 			if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1;
-- 
2.37.3


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-08 16:36 [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers Gabriel Ravier
@ 2022-09-12 13:59 ` Rich Felker
  2022-09-12 14:42   ` Jₑₙₛ Gustedt
  2023-04-15 12:28 ` [musl] [PATCH v2 0/1] " Gabriel Ravier
  2024-08-27 23:12 ` [musl] [PATCH v3 0/1] vfprintf: support C23 b and B " Gabriel Ravier
  2 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2022-09-12 13:59 UTC (permalink / raw)
  To: Gabriel Ravier; +Cc: gravier, musl

On Thu, Sep 08, 2022 at 06:36:49PM +0200, Gabriel Ravier wrote:
> These specifiers allow for formatted input/output of binary integers,
> and have been added to C2x with N2630. The uppercase B specifier is
> not formally required by C2x, as only lowercase specifiers were
> reserved by C, and thus an implementation could have been using
> uppercase B for an extension of their own, but C2x still has a note
> saying that it is recommended practice to implement it as the logical
> counterpart to b.
> 
> I have tested this on:
> - glibc's tests for %b and %B
> - The libc testsuite I'm developing over at https://github.com/GabrielRavier/yalibct
> - musl's libc-test
> - musl's libc-testsuite
> 
> and observed no regressions.
> ---
>  src/stdio/vfprintf.c | 21 ++++++++++++++++++---
>  1 file changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
> index 9b961e7f..89426b72 100644
> --- a/src/stdio/vfprintf.c
> +++ b/src/stdio/vfprintf.c
> @@ -49,7 +49,7 @@ enum {
>  static const unsigned char states[]['z'-'A'+1] = {
>  	{ /* 0: bare types */
>  		S('d') = INT, S('i') = INT,
> -		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
> +		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT, S('b') = UINT, S('B') = UINT,
>  		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
>  		S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL,
>  		S('c') = CHAR, S('C') = INT,
> @@ -59,7 +59,7 @@ static const unsigned char states[]['z'-'A'+1] = {
>  		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
>  	}, { /* 1: l-prefixed */
>  		S('d') = LONG, S('i') = LONG,
> -		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
> +		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG, S('b') = ULONG, S('B') = ULONG,
>  		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
>  		S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL,
>  		S('c') = INT, S('s') = PTR, S('n') = PTR,
> @@ -68,17 +68,20 @@ static const unsigned char states[]['z'-'A'+1] = {
>  		S('d') = LLONG, S('i') = LLONG,
>  		S('o') = ULLONG, S('u') = ULLONG,
>  		S('x') = ULLONG, S('X') = ULLONG,
> +		S('b') = ULLONG, S('B') = ULLONG,
>  		S('n') = PTR,
>  	}, { /* 3: h-prefixed */
>  		S('d') = SHORT, S('i') = SHORT,
>  		S('o') = USHORT, S('u') = USHORT,
>  		S('x') = USHORT, S('X') = USHORT,
> +		S('b') = USHORT, S('B') = USHORT,
>  		S('n') = PTR,
>  		S('h') = HHPRE,
>  	}, { /* 4: hh-prefixed */
>  		S('d') = CHAR, S('i') = CHAR,
>  		S('o') = UCHAR, S('u') = UCHAR,
>  		S('x') = UCHAR, S('X') = UCHAR,
> +		S('b') = UCHAR, S('B') = UCHAR,
>  		S('n') = PTR,
>  	}, { /* 5: L-prefixed */
>  		S('e') = LDBL, S('f') = LDBL, S('g') = LDBL, S('a') = LDBL,
> @@ -88,11 +91,13 @@ static const unsigned char states[]['z'-'A'+1] = {
>  		S('d') = PDIFF, S('i') = PDIFF,
>  		S('o') = SIZET, S('u') = SIZET,
>  		S('x') = SIZET, S('X') = SIZET,
> +		S('b') = SIZET, S('B') = SIZET,
>  		S('n') = PTR,
>  	}, { /* 7: j-prefixed */
>  		S('d') = IMAX, S('i') = IMAX,
>  		S('o') = UMAX, S('u') = UMAX,
>  		S('x') = UMAX, S('X') = UMAX,
> +		S('b') = UMAX, S('B') = UMAX,
>  		S('n') = PTR,
>  	}
>  };
> @@ -162,6 +167,12 @@ static char *fmt_o(uintmax_t x, char *s)
>  	return s;
>  }
>  
> +static char *fmt_b(uintmax_t x, char *s)
> +{
> +	for (; x; x>>=1) *--s = '0' + (x&1);
> +	return s;
> +}
> +
>  static char *fmt_u(uintmax_t x, char *s)
>  {
>  	unsigned long y;
> @@ -529,7 +540,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
>  		if (!f) continue;
>  
>  		z = buf + sizeof(buf);
> -		prefix = "-+   0X0x";
> +		prefix = "-+   0X0x0B0b";
>  		pl = 0;
>  		t = s[-1];
>  
> @@ -559,6 +570,10 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
>  			a = fmt_x(arg.i, z, t&32);
>  			if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
>  			if (0) {
> +		case 'b': case 'B':
> +			a = fmt_b(arg.i, z);
> +			if (arg.i && (fl & ALT_FORM)) prefix += 9 + ((t == 'b') << 1), pl=2;
> +			} if (0) {
>  		case 'o':
>  			a = fmt_o(arg.i, z);
>  			if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1;
> -- 
> 2.37.3

I'm not sure what the schedule on taking this or other C2x changes
relative to the standard becoming official should be, so we should
probably discuss that at some point. The only real hard concern is
that we need to be careful not to take anything where the standard
mandated behavior might change before it's final.

For the above patch specifically:

1. There's a buffer overflow because you did not adjust the size
   formula for buf[]. Presently it's only 40-55 bytes (due to the
   inclusion of LDBL_MANT_DIG/4 in there, which is almost surely cruft
   predating the initial release) while a binary-form string requires
   at least 64 bytes.

2. Presumably the wide printf forms need to accept %b too. Does scanf
   need to accept them too? I think those would all be easy changes,
   since scanf already has the strto* core with arbitrary base
   available to it, and wide printf just calls back to the narrow one.

I don't see anything else immediately wrong. No rush to submit an
updated patch. Let's first figure out the timeline for C2x features,
and I'll try to figure out what's going on with the weird buf[] size
expression and clean it up separately so that your patch is just
adding the feature and not also fighting with historical cruft.

Rich

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-12 13:59 ` Rich Felker
@ 2022-09-12 14:42   ` Jₑₙₛ Gustedt
  2022-09-19 15:09     ` Rich Felker
  0 siblings, 1 reply; 26+ messages in thread
From: Jₑₙₛ Gustedt @ 2022-09-12 14:42 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 2201 bytes --]

Rich,

on Mon, 12 Sep 2022 09:59:04 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> I don't see anything else immediately wrong. No rush to submit an
> updated patch. Let's first figure out the timeline for C2x features,
> and I'll try to figure out what's going on with the weird buf[] size
> expression and clean it up separately so that your patch is just
> adding the feature and not also fighting with historical cruft.

The specification of C23 has been finished in July, what follows now
in WG14 is merely editorial on the standard text itself and should not
have much impact on how things should be implemented. The current
draft is here

      https://open-std.org/jtc1/sc22/wg14/www/docs/n3047.pdf

and any changes from there should not have impact on implementations,
unless a majority of the national bodies imposes withdrawal of
specific new features. The timeline is publication in end of 2023, so
this leaves basically one year to have things ready when the new
standard is published

Already for `printf` and friends there are several additions, not only
the `b` formats but, e.g, also formats for the fixed-width types and
for the new bit-precise types.

Otherwise, there are changes to the C library all over the place, but
in general these are small and should be doable with limited work
load, in particular for implementations that also implement
POSIX. Those headers that have changes also are now supposed to have a
feature macro that describes the version for which a header works,
such that support for C23 can be added stepwise to C library
implementations.

There is one big addition, though, for the decimal floating point
types, but support here is optional. I don't know if there is already
an open stand-alone implementation of that stuff, but I think that
that should be the goal, instead of repeating the effort for every C
library implementation that is out there.

Jₑₙₛ

-- 
:: INRIA Nancy Grand Est ::: Camus ::::::: ICube/ICPS :::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-12 14:42   ` Jₑₙₛ Gustedt
@ 2022-09-19 15:09     ` Rich Felker
  2022-09-19 17:59       ` Szabolcs Nagy
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2022-09-19 15:09 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Mon, Sep 12, 2022 at 04:42:51PM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Mon, 12 Sep 2022 09:59:04 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > I don't see anything else immediately wrong. No rush to submit an
> > updated patch. Let's first figure out the timeline for C2x features,
> > and I'll try to figure out what's going on with the weird buf[] size
> > expression and clean it up separately so that your patch is just
> > adding the feature and not also fighting with historical cruft.
> 
> The specification of C23 has been finished in July, what follows now
> in WG14 is merely editorial on the standard text itself and should not
> have much impact on how things should be implemented. The current
> draft is here
> 
>       https://open-std.org/jtc1/sc22/wg14/www/docs/n3047.pdf
> 
> and any changes from there should not have impact on implementations,
> unless a majority of the national bodies imposes withdrawal of
> specific new features. The timeline is publication in end of 2023, so
> this leaves basically one year to have things ready when the new
> standard is published
> 
> Already for `printf` and friends there are several additions, not only
> the `b` formats but, e.g, also formats for the fixed-width types and
> for the new bit-precise types.
> 
> Otherwise, there are changes to the C library all over the place, but
> in general these are small and should be doable with limited work
> load, in particular for implementations that also implement
> POSIX. Those headers that have changes also are now supposed to have a
> feature macro that describes the version for which a header works,
> such that support for C23 can be added stepwise to C library
> implementations.
> 
> There is one big addition, though, for the decimal floating point
> types, but support here is optional. I don't know if there is already
> an open stand-alone implementation of that stuff, but I think that
> that should be the goal, instead of repeating the effort for every C
> library implementation that is out there.

What do these entail? It looks like there's a requirement for printf
to support them, so I don't see how you'd do that as a separate
library. It looks like __STDC_IEC_60559_DFP__ is optional though, so
maybe we could just decline to define it and leave the support
sporadic at the level the compiler supports, as an extension rather
than part of the standard-specified functionality?

Implementing any of this seems rather blocking since it depends on new
types at the compiler level for the compiler compiling libc. The only
way out from that is using asm thunks. For just the printf case, we
could do it with an arch-provided asm fragment that grabs the decimal
float from the va_list, which would be fairly low-cost to add/maintain
and allow the rest of the decimal float library code to be relegated
to a separate standalone library like you suggested. The only part of
printf that looks hard to implement for decimal floats is %a, and from
a quick glance it looks like the reverse direction in scanf/strto*
doesn't support hex for decimal floats?

Rich

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-19 15:09     ` Rich Felker
@ 2022-09-19 17:59       ` Szabolcs Nagy
  2022-09-19 18:10         ` Rich Felker
  0 siblings, 1 reply; 26+ messages in thread
From: Szabolcs Nagy @ 2022-09-19 17:59 UTC (permalink / raw)
  To: Rich Felker; +Cc: Jₑₙₛ Gustedt, musl

* Rich Felker <dalias@libc.org> [2022-09-19 11:09:17 -0400]:
> On Mon, Sep 12, 2022 at 04:42:51PM +0200, Jₑₙₛ Gustedt wrote:
> > There is one big addition, though, for the decimal floating point
> > types, but support here is optional. I don't know if there is already
> > an open stand-alone implementation of that stuff, but I think that
> > that should be the goal, instead of repeating the effort for every C
> > library implementation that is out there.
> 
> What do these entail? It looks like there's a requirement for printf
> to support them, so I don't see how you'd do that as a separate
> library. It looks like __STDC_IEC_60559_DFP__ is optional though, so
> maybe we could just decline to define it and leave the support
> sporadic at the level the compiler supports, as an extension rather
> than part of the standard-specified functionality?

it seems there is
https://github.com/libdfp/libdfp/tree/master/printf-hooks
using glibc specific apis (register_printf_specifier)

i'm not sure how musl can handle this internally since
we dont know in advance if the user links against libdfp.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-19 17:59       ` Szabolcs Nagy
@ 2022-09-19 18:10         ` Rich Felker
  2022-09-20  9:19           ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2022-09-19 18:10 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt, musl

On Mon, Sep 19, 2022 at 07:59:52PM +0200, Szabolcs Nagy wrote:
> * Rich Felker <dalias@libc.org> [2022-09-19 11:09:17 -0400]:
> > On Mon, Sep 12, 2022 at 04:42:51PM +0200, Jₑₙₛ Gustedt wrote:
> > > There is one big addition, though, for the decimal floating point
> > > types, but support here is optional. I don't know if there is already
> > > an open stand-alone implementation of that stuff, but I think that
> > > that should be the goal, instead of repeating the effort for every C
> > > library implementation that is out there.
> > 
> > What do these entail? It looks like there's a requirement for printf
> > to support them, so I don't see how you'd do that as a separate
> > library. It looks like __STDC_IEC_60559_DFP__ is optional though, so
> > maybe we could just decline to define it and leave the support
> > sporadic at the level the compiler supports, as an extension rather
> > than part of the standard-specified functionality?
> 
> it seems there is
> https://github.com/libdfp/libdfp/tree/master/printf-hooks
> using glibc specific apis (register_printf_specifier)
> 
> i'm not sure how musl can handle this internally since
> we dont know in advance if the user links against libdfp.

Yeah, I don't see that as being a usable approach. It's closely tied
to the glibc printf model that's not usable in bounded memory with
arbitrary width and precision, and not compatible with linking
semantics as you mention. The amount of code needed for decimal float
printing in decimal is miniscule anyway and something we can easily do
with no actual decimal floating point code. I thought the hard case
was hex, but looking at the spec again, %a doesn't actually do hex for
decimal floats, so it should be easy too.

Rich

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-19 18:10         ` Rich Felker
@ 2022-09-20  9:19           ` Jₑₙₛ Gustedt
  2022-09-20 12:28             ` Rich Felker
  0 siblings, 1 reply; 26+ messages in thread
From: Jₑₙₛ Gustedt @ 2022-09-20  9:19 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 4121 bytes --]

Rich,

on Mon, 19 Sep 2022 14:10:39 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Mon, Sep 19, 2022 at 07:59:52PM +0200, Szabolcs Nagy wrote:
> > * Rich Felker <dalias@libc.org> [2022-09-19 11:09:17 -0400]:  
> > > On Mon, Sep 12, 2022 at 04:42:51PM +0200, Jₑₙₛ Gustedt wrote:  
>  [...]  
> > > 
> > > What do these entail? It looks like there's a requirement for
> > > printf to support them, so I don't see how you'd do that as a
> > > separate library. It looks like __STDC_IEC_60559_DFP__ is
> > > optional though, so maybe we could just decline to define it and
> > > leave the support sporadic at the level the compiler supports, as
> > > an extension rather than part of the standard-specified
> > > functionality?  
> > 
> > it seems there is
> > https://github.com/libdfp/libdfp/tree/master/printf-hooks
> > using glibc specific apis (register_printf_specifier)
> > 
> > i'm not sure how musl can handle this internally since
> > we dont know in advance if the user links against libdfp.  
> 
> Yeah, I don't see that as being a usable approach. It's closely tied
> to the glibc printf model that's not usable in bounded memory with
> arbitrary width and precision, and not compatible with linking
> semantics as you mention. The amount of code needed for decimal float
> printing in decimal is miniscule anyway and something we can easily do
> with no actual decimal floating point code. I thought the hard case
> was hex, but looking at the spec again, %a doesn't actually do hex for
> decimal floats, so it should be easy too.

Yes exactly. There is nothing conceptually difficult here and nothing
that should not be in some form or another already in every C library.

So yes, sorry, for the separate library part I forgot formated IO and
string functions. But the huge amount of functions that are added for
these types are math functions (I guess something like 600 or so)
stepping on user's identifier space all over.

Unfortunately, again as for complex types, the standard doesn't
properly distinguish language support for the new optional types and
library support. I really would have preferred to have the whole thing
in a separate header, but my voice echoed in the void. There are the
`__STDC_VERSION_…_H__` macros now, so this gives at least some sort of
feature test.

But for implementing the parts that are outside of math, things should
indeed not be so difficult. gcc has support for the types since long,
I think, and should also provide predefined macros that could be used
to check for language support. Then, the types themselves have clear
definition and prescribed representation, the ABI is de-facto sorted
out, so there would be not much other implementation dependency to
worry about.

Other types that come with C23, and these are mandatory, are
bit-precise integers. There the support by compilers is probably not
yet completely established. I know of an integration into llvm, but I
am not sure about the state of affairs for gcc, nor if there is a
de-facto agreement on ABI issues. In any case, these types need
support in formatted IO, too.

Also, C23, provides the possibility for extended integer types that
are wider than `[u]intmax_t` under some conditions. This is intended
in particular to allow for implementations such as gcc on x86_64 to
interface the existing 128 bit integer types properly as
`[u]int128_t`. From a C library POV, these then also would need
integration into formatted IO, but here again support in the compiler
with usable feature test macros is there for ages and the ABI should
already be sorted out.

So in summary that means that there is some work to do to make
formatted IO of C libraries become compliant with C23. Let me know if
and where I could help to make that happen for musl.

Thanks
Jₑₙₛ

-- 
:: INRIA Nancy Grand Est ::: Camus ::::::: ICube/ICPS :::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-20  9:19           ` Jₑₙₛ Gustedt
@ 2022-09-20 12:28             ` Rich Felker
  2022-09-20 13:29               ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2022-09-20 12:28 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Tue, Sep 20, 2022 at 11:19:34AM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Mon, 19 Sep 2022 14:10:39 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > On Mon, Sep 19, 2022 at 07:59:52PM +0200, Szabolcs Nagy wrote:
> > > * Rich Felker <dalias@libc.org> [2022-09-19 11:09:17 -0400]:  
> > > > On Mon, Sep 12, 2022 at 04:42:51PM +0200, Jₑₙₛ Gustedt wrote:  
> >  [...]  
> > > > 
> > > > What do these entail? It looks like there's a requirement for
> > > > printf to support them, so I don't see how you'd do that as a
> > > > separate library. It looks like __STDC_IEC_60559_DFP__ is
> > > > optional though, so maybe we could just decline to define it and
> > > > leave the support sporadic at the level the compiler supports, as
> > > > an extension rather than part of the standard-specified
> > > > functionality?  
> > > 
> > > it seems there is
> > > https://github.com/libdfp/libdfp/tree/master/printf-hooks
> > > using glibc specific apis (register_printf_specifier)
> > > 
> > > i'm not sure how musl can handle this internally since
> > > we dont know in advance if the user links against libdfp.  
> > 
> > Yeah, I don't see that as being a usable approach. It's closely tied
> > to the glibc printf model that's not usable in bounded memory with
> > arbitrary width and precision, and not compatible with linking
> > semantics as you mention. The amount of code needed for decimal float
> > printing in decimal is miniscule anyway and something we can easily do
> > with no actual decimal floating point code. I thought the hard case
> > was hex, but looking at the spec again, %a doesn't actually do hex for
> > decimal floats, so it should be easy too.
> 
> Yes exactly. There is nothing conceptually difficult here and nothing
> that should not be in some form or another already in every C library.
> 
> So yes, sorry, for the separate library part I forgot formated IO and
> string functions. But the huge amount of functions that are added for
> these types are math functions (I guess something like 600 or so)
> stepping on user's identifier space all over.

Yes, I think it's fine for now to have a separate math library for the
math functions. Otherwise the work of adding these interfaces becomes
rather prohibitive. I would assume they're all pure functions where
correct implementations are basically interchangable, so I don't see a
lot of value in insisting these "go with" libc.

> Unfortunately, again as for complex types, the standard doesn't
> properly distinguish language support for the new optional types and
> library support. I really would have preferred to have the whole thing
> in a separate header, but my voice echoed in the void. There are the
> `__STDC_VERSION_…_H__` macros now, so this gives at least some sort of
> feature test.

I can see both viewpoints as having good motivation, but yes it's
frustrating.

> But for implementing the parts that are outside of math, things should
> indeed not be so difficult. gcc has support for the types since long,
> I think, and should also provide predefined macros that could be used
> to check for language support. Then, the types themselves have clear
> definition and prescribed representation, the ABI is de-facto sorted
> out, so there would be not much other implementation dependency to
> worry about.

The thing is we don't have the option to "check for language support".
Doing that would mean you get a deficient musl build if your compiler
doesn't have the language features, so essentially we'd be requiring
bleeding-edge gcc or clang (dropping all other-compiler support at the
same time) to get a properly featured libc.so that's capable of
supporting arbitrary musl-linked binaries.

This is why we're going to need asm thunks for performing va_arg with
the new types and (programmatically generated, I assume) asm entry
thunks for accepting arguments to any non-variadic functions, which
can convert (ideally as a no-op) the decimal float type arguments to
integer-type or struct arguments the underlying implementation files
would then receive.

> Other types that come with C23, and these are mandatory, are
> bit-precise integers. There the support by compilers is probably not
> yet completely established. I know of an integration into llvm, but I
> am not sure about the state of affairs for gcc, nor if there is a
> de-facto agreement on ABI issues. In any case, these types need
> support in formatted IO, too.

As far as I can tell, the draft standard makes printf support for all
but the ones defined as [u]intNN_t a choice for the implementation, so
the obvious choice is not to support any additional ones.

> Also, C23, provides the possibility for extended integer types that
> are wider than `[u]intmax_t` under some conditions. This is intended
> in particular to allow for implementations such as gcc on x86_64 to
> interface the existing 128 bit integer types properly as
> `[u]int128_t`. From a C library POV, these then also would need
> integration into formatted IO, but here again support in the compiler
> with usable feature test macros is there for ages and the ABI should
> already be sorted out.

Yes. I haven't followed the latest on this but my leaning was to leave
them as "compiler extensions" that don't count as "extended integer
types". However presumably they could be handled the same way as
decimal floats if needed.

> So in summary that means that there is some work to do to make
> formatted IO of C libraries become compliant with C23. Let me know if
> and where I could help to make that happen for musl.

The big issue is probably collating the list of what's actually needed
to meet requirements, and what the ABIs for them are. If there's
cross-arch agreement on a general pattern ABIs follow for them, that
would be wonderful, and even if not entirely so, a general pattern
would advise how we structure the underlying functions (to make thunks
as minimal as possible on the largest number of archs).

Rich

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-20 12:28             ` Rich Felker
@ 2022-09-20 13:29               ` Jₑₙₛ Gustedt
  2022-09-20 13:55                 ` Rich Felker
  0 siblings, 1 reply; 26+ messages in thread
From: Jₑₙₛ Gustedt @ 2022-09-20 13:29 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 8030 bytes --]

Rich,

on Tue, 20 Sep 2022 08:28:29 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Tue, Sep 20, 2022 at 11:19:34AM +0200, Jₑₙₛ Gustedt wrote:
> > Rich,
> > 
> > on Mon, 19 Sep 2022 14:10:39 -0400 you (Rich Felker
> > <dalias@libc.org>) wrote:
> >   
> > > On Mon, Sep 19, 2022 at 07:59:52PM +0200, Szabolcs Nagy wrote:  
>  [...]  
>  [...]  
> > >  [...]    
>  [...]  
>  [...]  
> > > 
> > > Yeah, I don't see that as being a usable approach. It's closely
> > > tied to the glibc printf model that's not usable in bounded
> > > memory with arbitrary width and precision, and not compatible
> > > with linking semantics as you mention. The amount of code needed
> > > for decimal float printing in decimal is miniscule anyway and
> > > something we can easily do with no actual decimal floating point
> > > code. I thought the hard case was hex, but looking at the spec
> > > again, %a doesn't actually do hex for decimal floats, so it
> > > should be easy too.  
> > 
> > Yes exactly. There is nothing conceptually difficult here and
> > nothing that should not be in some form or another already in every
> > C library.
> > 
> > So yes, sorry, for the separate library part I forgot formated IO
> > and string functions. But the huge amount of functions that are
> > added for these types are math functions (I guess something like
> > 600 or so) stepping on user's identifier space all over.  
> 
> Yes, I think it's fine for now to have a separate math library for the
> math functions. Otherwise the work of adding these interfaces becomes
> rather prohibitive. I would assume they're all pure functions where
> correct implementations are basically interchangable, so I don't see a
> lot of value in insisting these "go with" libc.

Depends on your instantiation of "pure", but yes, these should be
mostly interchangeable. The only thing to worry here are that there
are two possible representations for these types, one where the
mantissa is basically represented as an integer, and the other where
decimal digits are packed into groups of bytes in a clever way.

> …

> > But for implementing the parts that are outside of math, things
> > should indeed not be so difficult. gcc has support for the types
> > since long, I think, and should also provide predefined macros that
> > could be used to check for language support. Then, the types
> > themselves have clear definition and prescribed representation, the
> > ABI is de-facto sorted out, so there would be not much other
> > implementation dependency to worry about.  
> 
> The thing is we don't have the option to "check for language support".
> Doing that would mean you get a deficient musl build if your compiler
> doesn't have the language features, so essentially we'd be requiring
> bleeding-edge gcc or clang (dropping all other-compiler support at the
> same time) to get a properly featured libc.so that's capable of
> supporting arbitrary musl-linked binaries.

I don't think that this needs to be. If you add e.g support for
decimal floating point to `printf`, the compiler support for that only
has to be there on the platform where you compile musl. If a user
platform that uses such a library does not support it, that part will
simply never be called because users can't defined variables of that
type. This increases the size of `printf` a bit, though, but my guess
is that this would be marginal compared to the size that `printf` has,
anyhow.

> This is why we're going to need asm thunks for performing va_arg with
> the new types and (programmatically generated, I assume) asm entry
> thunks for accepting arguments to any non-variadic functions, which
> can convert (ideally as a no-op) the decimal float type arguments to
> integer-type or struct arguments the underlying implementation files
> would then receive.

There are no C library functions other than in math.h, I think, that
accept decimal floating types as prototyped arguments. So if we don't
do math.h, only `printf` and similar remain with `va_arg` calling
conventions.

The only functions that have decimal floating return types are the
`strtodN` functions in 7.24.1.6, AFAICS.

So, yes, we'd have to extend `va_arg` with the necessary knowledge to
obtain a decimal floating point, but hopefully that is just the same
as obtaining access to other 32, 64 or 128 bit types.

> > Other types that come with C23, and these are mandatory, are
> > bit-precise integers. There the support by compilers is probably not
> > yet completely established. I know of an integration into llvm, but
> > I am not sure about the state of affairs for gcc, nor if there is a
> > de-facto agreement on ABI issues. In any case, these types need
> > support in formatted IO, too.  
> 
> As far as I can tell, the draft standard makes printf support for all
> but the ones defined as [u]intNN_t a choice for the implementation, so
> the obvious choice is not to support any additional ones.

(There are also the "fast" version that have a different format
specifier, but which hopefully are basically the same as for the exact
width.)

I think for QoI it would be really good to support the bit-precise
types. These are a quite good design that avoids a lot of the
complications of the classical integer types. In particular we will
see them pop up for bit-fields and stuff like that, where there have
clearer semantics than the traditional ones and extend the
possibilities beyond the width of `int` to at least 64 bit.

> > Also, C23, provides the possibility for extended integer types that
> > are wider than `[u]intmax_t` under some conditions. This is intended
> > in particular to allow for implementations such as gcc on x86_64 to
> > interface the existing 128 bit integer types properly as
> > `[u]int128_t`. From a C library POV, these then also would need
> > integration into formatted IO, but here again support in the
> > compiler with usable feature test macros is there for ages and the
> > ABI should already be sorted out.  
> 
> Yes. I haven't followed the latest on this but my leaning was to leave
> them as "compiler extensions" that don't count as "extended integer
> types". However presumably they could be handled the same way as
> decimal floats if needed.

For once this allows to define extended integer types in the sense of
the standard and to provide full support for them. But you are right
the approach could be the same as for decimal floating point: compile
them in if the compilation platform of the C library supports them.

> > So in summary that means that there is some work to do to make
> > formatted IO of C libraries become compliant with C23. Let me know
> > if and where I could help to make that happen for musl.  
> 
> The big issue is probably collating the list of what's actually needed
> to meet requirements,

that I could do

> and what the ABIs for them are.

that's were I am not an expert in :-((

> If there's cross-arch agreement on a general pattern ABIs follow for
> them, that would be wonderful, and even if not entirely so, a
> general pattern would advise how we structure the underlying
> functions (to make thunks as minimal as possible on the largest
> number of archs).

My guess for that is that the decimal floating point types are just
handled by their respective width, and that the bit-precise integer
types of width N will be rounded up to the next power of two M and use
representation and calling convention for `uintM_t`. But that would of
course have to be verified. I can ask Aaron (who wrote this stuff and
has provided the implementation in llvm) how that is actually done
there.

Jₑₙₛ

-- 
:: INRIA Nancy Grand Est ::: Camus ::::::: ICube/ICPS :::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-20 13:29               ` Jₑₙₛ Gustedt
@ 2022-09-20 13:55                 ` Rich Felker
  2022-09-20 14:08                   ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2022-09-20 13:55 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Tue, Sep 20, 2022 at 03:29:29PM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Tue, 20 Sep 2022 08:28:29 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > On Tue, Sep 20, 2022 at 11:19:34AM +0200, Jₑₙₛ Gustedt wrote:
> > > Rich,
> > > 
> > > on Mon, 19 Sep 2022 14:10:39 -0400 you (Rich Felker
> > > <dalias@libc.org>) wrote:
> > >   
> > > > On Mon, Sep 19, 2022 at 07:59:52PM +0200, Szabolcs Nagy wrote:  
> > > So yes, sorry, for the separate library part I forgot formated IO
> > > and string functions. But the huge amount of functions that are
> > > added for these types are math functions (I guess something like
> > > 600 or so) stepping on user's identifier space all over.  
> > 
> > Yes, I think it's fine for now to have a separate math library for the
> > math functions. Otherwise the work of adding these interfaces becomes
> > rather prohibitive. I would assume they're all pure functions where
> > correct implementations are basically interchangable, so I don't see a
> > lot of value in insisting these "go with" libc.
> 
> Depends on your instantiation of "pure", but yes, these should be
> mostly interchangeable. The only thing to worry here are that there
> are two possible representations for these types, one where the
> mantissa is basically represented as an integer, and the other where
> decimal digits are packed into groups of bytes in a clever way.

Well the one in use is presumably defined by the psABI. Does it
actually vary by arch, or is there a common form that everyone
reasonable has agreed upon? Note that we're not considering the full
generality the C standard allows, just a single family of platforms.

> > > But for implementing the parts that are outside of math, things
> > > should indeed not be so difficult. gcc has support for the types
> > > since long, I think, and should also provide predefined macros that
> > > could be used to check for language support. Then, the types
> > > themselves have clear definition and prescribed representation, the
> > > ABI is de-facto sorted out, so there would be not much other
> > > implementation dependency to worry about.  
> > 
> > The thing is we don't have the option to "check for language support".
> > Doing that would mean you get a deficient musl build if your compiler
> > doesn't have the language features, so essentially we'd be requiring
> > bleeding-edge gcc or clang (dropping all other-compiler support at the
> > same time) to get a properly featured libc.so that's capable of
> > supporting arbitrary musl-linked binaries.
> 
> I don't think that this needs to be. If you add e.g support for
> decimal floating point to `printf`, the compiler support for that only
> has to be there on the platform where you compile musl. If a user
> platform that uses such a library does not support it, that part will
> simply never be called because users can't defined variables of that
> type. This increases the size of `printf` a bit, though, but my guess
> is that this would be marginal compared to the size that `printf` has,
> anyhow.

You're looking at it from a perspective of producing your own software
linked to musl. I'm looking at it from a perspective of building your
own build of musl to execute existing dynamic-linked binaries outside
your control. Users building musl need to be able to build a
feature-complete version regardless of their compiler. We don't have
different interface/feature profiles based on which compiler version
you built with or any build-time options, and not having that is very
intentional.

> > This is why we're going to need asm thunks for performing va_arg with
> > the new types and (programmatically generated, I assume) asm entry
> > thunks for accepting arguments to any non-variadic functions, which
> > can convert (ideally as a no-op) the decimal float type arguments to
> > integer-type or struct arguments the underlying implementation files
> > would then receive.
> 
> There are no C library functions other than in math.h, I think, that
> accept decimal floating types as prototyped arguments. So if we don't
> do math.h, only `printf` and similar remain with `va_arg` calling
> conventions.
> 
> The only functions that have decimal floating return types are the
> `strtodN` functions in 7.24.1.6, AFAICS.
> 
> So, yes, we'd have to extend `va_arg` with the necessary knowledge to
> obtain a decimal floating point, but hopefully that is just the same
> as obtaining access to other 32, 64 or 128 bit types.

Oh, nice -- I didn't realize they only appear as return values not
arguments. Unfortunately, return values are worse. In order to do the
thunk, you need to setup a call frame and possibly munge the return
value into the storage it belongs in to meet the ABI requirement. For
arguments, on the other hand, it's likely just a tail-call.

> > > Other types that come with C23, and these are mandatory, are
> > > bit-precise integers. There the support by compilers is probably not
> > > yet completely established. I know of an integration into llvm, but
> > > I am not sure about the state of affairs for gcc, nor if there is a
> > > de-facto agreement on ABI issues. In any case, these types need
> > > support in formatted IO, too.  
> > 
> > As far as I can tell, the draft standard makes printf support for all
> > but the ones defined as [u]intNN_t a choice for the implementation, so
> > the obvious choice is not to support any additional ones.
> 
> (There are also the "fast" version that have a different format
> specifier, but which hopefully are basically the same as for the exact
> width.)
> 
> I think for QoI it would be really good to support the bit-precise
> types. These are a quite good design that avoids a lot of the
> complications of the classical integer types. In particular we will
> see them pop up for bit-fields and stuff like that, where there have
> clearer semantics than the traditional ones and extend the
> possibilities beyond the width of `int` to at least 64 bit.

Surely there are going to be all sorts of ABI issues with passing them
as arguments. The right thing to do, as an application programmer
writing a portable program, is the same as when printing types without
a format specifier like time_t: just cast them up to a maximal-size
type that you know is supported and can represent the value. There is
no sense in optimizing the type you pass these things as when the
operation you're passing them for is as "big" as printf.

> > > Also, C23, provides the possibility for extended integer types that
> > > are wider than `[u]intmax_t` under some conditions. This is intended
> > > in particular to allow for implementations such as gcc on x86_64 to
> > > interface the existing 128 bit integer types properly as
> > > `[u]int128_t`. From a C library POV, these then also would need
> > > integration into formatted IO, but here again support in the
> > > compiler with usable feature test macros is there for ages and the
> > > ABI should already be sorted out.  
> > 
> > Yes. I haven't followed the latest on this but my leaning was to leave
> > them as "compiler extensions" that don't count as "extended integer
> > types". However presumably they could be handled the same way as
> > decimal floats if needed.
> 
> For once this allows to define extended integer types in the sense of
> the standard and to provide full support for them. But you are right
> the approach could be the same as for decimal floating point: compile
> them in if the compilation platform of the C library supports them.
> 
> > > So in summary that means that there is some work to do to make
> > > formatted IO of C libraries become compliant with C23. Let me know
> > > if and where I could help to make that happen for musl.  
> > 
> > The big issue is probably collating the list of what's actually needed
> > to meet requirements,
> 
> that I could do

Thanks!

> > and what the ABIs for them are.
> 
> that's were I am not an expert in :-((

OK, hopefully someone else is.

> > If there's cross-arch agreement on a general pattern ABIs follow for
> > them, that would be wonderful, and even if not entirely so, a
> > general pattern would advise how we structure the underlying
> > functions (to make thunks as minimal as possible on the largest
> > number of archs).
> 
> My guess for that is that the decimal floating point types are just
> handled by their respective width, and that the bit-precise integer

Unfortunately in most ABIs the convention for passing a type is not
just a function of its size. Floating point types are usually passed
in different registers, and aggregate types often have complex
conventions for whether they're passed in registers or on the stack
depending on their member types and sizes. So, for decimal float, it's
probably the same as *some* N-bit type, but which one? :)

> types of width N will be rounded up to the next power of two M and use
> representation and calling convention for `uintM_t`. But that would of
> course have to be verified. I can ask Aaron (who wrote this stuff and
> has provided the implementation in llvm) how that is actually done
> there.

If the width-N stuff is uniform like that, it might be possible to
support arbitrary in-range N, but again I'm skeptical of the value
especially when applications cannot portably use any Ns except the
ones with macros defined in stdint.h.

In general, offering non-portable functionality that applications
can't already generally expect to have on popular systems, with no way
to probe for availability, does not seem useful, and it's even less
useful when there's a trivial portable way to do the same thing.

Rich

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-20 13:55                 ` Rich Felker
@ 2022-09-20 14:08                   ` Jₑₙₛ Gustedt
  2022-09-20 14:15                     ` Rich Felker
  0 siblings, 1 reply; 26+ messages in thread
From: Jₑₙₛ Gustedt @ 2022-09-20 14:08 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 1023 bytes --]

Rich,

on Tue, 20 Sep 2022 09:55:54 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> In general, offering non-portable functionality that applications
> can't already generally expect to have on popular systems, with no way
> to probe for availability, does not seem useful, and it's even less
> useful when there's a trivial portable way to do the same thing.

Unfortunately, for the the bit-precise types there isn't. The
supported types may be wider than `long long` (128 and even 256 will
be common values that will probably widely supported) and then the
task of printing them gets as nasty as for today's `__int128`. My hope
was really to get all of these done for once, such that our users may
use their creativity to do more useful stuff.

Jₑₙₛ

-- 
:: INRIA Nancy Grand Est ::: Camus ::::::: ICube/ICPS :::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-20 14:08                   ` Jₑₙₛ Gustedt
@ 2022-09-20 14:15                     ` Rich Felker
  2022-09-20 14:22                       ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2022-09-20 14:15 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Tue, Sep 20, 2022 at 04:08:03PM +0200, Jₑₙₛ Gustedt wrote:
> Rich,
> 
> on Tue, 20 Sep 2022 09:55:54 -0400 you (Rich Felker <dalias@libc.org>)
> wrote:
> 
> > In general, offering non-portable functionality that applications
> > can't already generally expect to have on popular systems, with no way
> > to probe for availability, does not seem useful, and it's even less
> > useful when there's a trivial portable way to do the same thing.
> 
> Unfortunately, for the the bit-precise types there isn't. The
> supported types may be wider than `long long` (128 and even 256 will
> be common values that will probably widely supported) and then the
> task of printing them gets as nasty as for today's `__int128`. My hope
> was really to get all of these done for once, such that our users may
> use their creativity to do more useful stuff.

Well how are programmers supposed to probe what's available, and what
are they supposed to do as fallback when support is not available? And
what is the upper limit? Without answering those questions I don't see
how this functionality can be made useful, because it's not even safe
to use (you get UB if you guess wrong). The proposal on libc-coord for
how to advertise feature availability (including at runtime via
sysconf keys) addresses this kind of problem in general with
extensions you can't just probe for with a configure-time link test,
and might be the answer here...

Rich

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-20 14:15                     ` Rich Felker
@ 2022-09-20 14:22                       ` Jₑₙₛ Gustedt
  2022-09-20 14:27                         ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Jₑₙₛ Gustedt @ 2022-09-20 14:22 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 1637 bytes --]

Rich,

on Tue, 20 Sep 2022 10:15:18 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Tue, Sep 20, 2022 at 04:08:03PM +0200, Jₑₙₛ Gustedt wrote:
> > Rich,
> > 
> > on Tue, 20 Sep 2022 09:55:54 -0400 you (Rich Felker
> > <dalias@libc.org>) wrote:
> >   
> > > In general, offering non-portable functionality that applications
> > > can't already generally expect to have on popular systems, with
> > > no way to probe for availability, does not seem useful, and it's
> > > even less useful when there's a trivial portable way to do the
> > > same thing.  
> > 
> > Unfortunately, for the the bit-precise types there isn't. The
> > supported types may be wider than `long long` (128 and even 256 will
> > be common values that will probably widely supported) and then the
> > task of printing them gets as nasty as for today's `__int128`. My
> > hope was really to get all of these done for once, such that our
> > users may use their creativity to do more useful stuff.  
> 
> Well how are programmers supposed to probe what's available, and what
> are they supposed to do as fallback when support is not available? And
> what is the upper limit?

There is a feature test macro for the maximum width of bit-precise
integers, `BITINT_MAXWIDTH`. It is guaranteed to be at least
`ULLONG_WIDTH` but can (and will) be larger on many platforms.

Jₑₙₛ

-- 
:: INRIA Nancy Grand Est ::: Camus ::::::: ICube/ICPS :::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-20 14:22                       ` Jₑₙₛ Gustedt
@ 2022-09-20 14:27                         ` Jₑₙₛ Gustedt
  2022-09-20 14:46                           ` Rich Felker
  0 siblings, 1 reply; 26+ messages in thread
From: Jₑₙₛ Gustedt @ 2022-09-20 14:27 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 682 bytes --]

on Tue, 20 Sep 2022 16:22:36 +0200 you (Jₑₙₛ Gustedt
<jens.gustedt@inria.fr>) wrote:

> There is a feature test macro for the maximum width of bit-precise
> integers, `BITINT_MAXWIDTH`. It is guaranteed to be at least
> `ULLONG_WIDTH` but can (and will) be larger on many platforms.

e.g on my ubuntu-nothing-fancy machine I get

 clang -E -dM -xc /dev/null | grep -i bitint
 #define __BITINT_MAXWIDTH__ 128

Jₑₙₛ

-- 
:: INRIA Nancy Grand Est ::: Camus ::::::: ICube/ICPS :::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-20 14:27                         ` Jₑₙₛ Gustedt
@ 2022-09-20 14:46                           ` Rich Felker
  2022-09-20 15:20                             ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2022-09-20 14:46 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On Tue, Sep 20, 2022 at 04:27:28PM +0200, Jₑₙₛ Gustedt wrote:
> on Tue, 20 Sep 2022 16:22:36 +0200 you (Jₑₙₛ Gustedt
> <jens.gustedt@inria.fr>) wrote:
> 
> > There is a feature test macro for the maximum width of bit-precise
> > integers, `BITINT_MAXWIDTH`. It is guaranteed to be at least
> > `ULLONG_WIDTH` but can (and will) be larger on many platforms.
> 
> e.g on my ubuntu-nothing-fancy machine I get
> 
>  clang -E -dM -xc /dev/null | grep -i bitint
>  #define __BITINT_MAXWIDTH__ 128

As I understand it, that gives the application knowledge of what
bit-precise types the compiler can provide to it, but doesn't do
anything to tell the application what wN modifiers printf can be
expected to support. If it were required to support wN for
N==BITINT_MAXWIDTH that would at least be something to go from, but I
see no such requirement and I'm not sure it's desirable (it means you
can't let the compiler offer larger BITINT_MAXWIDTH, but would have to
define it as what libc supports).

I think a separate macro indicating what printf supports is needed to
solve this problem well.

Rich

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-20 14:46                           ` Rich Felker
@ 2022-09-20 15:20                             ` Jₑₙₛ Gustedt
  0 siblings, 0 replies; 26+ messages in thread
From: Jₑₙₛ Gustedt @ 2022-09-20 15:20 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 1656 bytes --]

Rich,

on Tue, 20 Sep 2022 10:46:18 -0400 you (Rich Felker <dalias@libc.org>)
wrote:

> On Tue, Sep 20, 2022 at 04:27:28PM +0200, Jₑₙₛ Gustedt wrote:
> > on Tue, 20 Sep 2022 16:22:36 +0200 you (Jₑₙₛ Gustedt
> > <jens.gustedt@inria.fr>) wrote:
> >   
> > > There is a feature test macro for the maximum width of bit-precise
> > > integers, `BITINT_MAXWIDTH`. It is guaranteed to be at least
> > > `ULLONG_WIDTH` but can (and will) be larger on many platforms.  
> > 
> > e.g on my ubuntu-nothing-fancy machine I get
> > 
> >  clang -E -dM -xc /dev/null | grep -i bitint
> >  #define __BITINT_MAXWIDTH__ 128  
> 
> As I understand it, that gives the application knowledge of what
> bit-precise types the compiler can provide to it, but doesn't do
> anything to tell the application what wN modifiers printf can be
> expected to support. If it were required to support wN for
> N==BITINT_MAXWIDTH that would at least be something to go from, but I
> see no such requirement and I'm not sure it's desirable (it means you
> can't let the compiler offer larger BITINT_MAXWIDTH, but would have to
> define it as what libc supports).
> 
> I think a separate macro indicating what printf supports is needed to
> solve this problem well.

That's an excellent point. It would be good if we filed a national
body comment for the ballot period to get such a thing in.

Jₑₙₛ

-- 
:: INRIA Nancy Grand Est ::: Camus ::::::: ICube/ICPS :::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [musl] [PATCH v2 0/1] vfprintf: support C2x %b and %B conversion specifiers
  2022-09-08 16:36 [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers Gabriel Ravier
  2022-09-12 13:59 ` Rich Felker
@ 2023-04-15 12:28 ` Gabriel Ravier
  2023-04-15 12:28   ` [musl] [PATCH v2 1/1] " Gabriel Ravier
  2024-08-27 23:12 ` [musl] [PATCH v3 0/1] vfprintf: support C23 b and B " Gabriel Ravier
  2 siblings, 1 reply; 26+ messages in thread
From: Gabriel Ravier @ 2023-04-15 12:28 UTC (permalink / raw)
  To: musl; +Cc: Gabriel Ravier

This updated version of the patch fixes the buffer overflow pointed out by Rich,
and rebases it upon latest musl (the old version of the patch fails to apply on
more recent versions).

Gabriel Ravier (1):
  vfprintf: support C2x %b and %B conversion specifiers

 src/stdio/vfprintf.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

-- 
2.39.2


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [musl] [PATCH v2 1/1] vfprintf: support C2x %b and %B conversion specifiers
  2023-04-15 12:28 ` [musl] [PATCH v2 0/1] " Gabriel Ravier
@ 2023-04-15 12:28   ` Gabriel Ravier
  2023-04-15 12:52     ` Jₑₙₛ Gustedt
  2023-04-16  6:51     ` Jₑₙₛ Gustedt
  0 siblings, 2 replies; 26+ messages in thread
From: Gabriel Ravier @ 2023-04-15 12:28 UTC (permalink / raw)
  To: musl; +Cc: Gabriel Ravier

These specifiers allow for formatted input/output of binary integers,
and have been added to C2x with N2630. The uppercase B specifier is
not formally required by C2x, as only lowercase specifiers were
reserved by C, and thus an implementation could have been using
uppercase B for an extension of their own, but C2x still has a note
saying that it is recommended practice to implement it as the logical
counterpart to b.
---
 src/stdio/vfprintf.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index a712d80f..3d7e0aeb 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -49,7 +49,7 @@ enum {
 static const unsigned char states[]['z'-'A'+1] = {
 	{ /* 0: bare types */
 		S('d') = INT, S('i') = INT,
-		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
+		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT, S('b') = UINT, S('B') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
 		S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL,
 		S('c') = CHAR, S('C') = INT,
@@ -59,7 +59,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
 	}, { /* 1: l-prefixed */
 		S('d') = LONG, S('i') = LONG,
-		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
+		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG, S('b') = ULONG, S('B') = ULONG,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
 		S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL,
 		S('c') = INT, S('s') = PTR, S('n') = PTR,
@@ -68,17 +68,20 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('d') = LLONG, S('i') = LLONG,
 		S('o') = ULLONG, S('u') = ULLONG,
 		S('x') = ULLONG, S('X') = ULLONG,
+		S('b') = ULLONG, S('B') = ULLONG,
 		S('n') = PTR,
 	}, { /* 3: h-prefixed */
 		S('d') = SHORT, S('i') = SHORT,
 		S('o') = USHORT, S('u') = USHORT,
 		S('x') = USHORT, S('X') = USHORT,
+		S('b') = USHORT, S('B') = USHORT,
 		S('n') = PTR,
 		S('h') = HHPRE,
 	}, { /* 4: hh-prefixed */
 		S('d') = CHAR, S('i') = CHAR,
 		S('o') = UCHAR, S('u') = UCHAR,
 		S('x') = UCHAR, S('X') = UCHAR,
+		S('b') = UCHAR, S('B') = UCHAR,
 		S('n') = PTR,
 	}, { /* 5: L-prefixed */
 		S('e') = LDBL, S('f') = LDBL, S('g') = LDBL, S('a') = LDBL,
@@ -88,11 +91,13 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('d') = PDIFF, S('i') = PDIFF,
 		S('o') = SIZET, S('u') = SIZET,
 		S('x') = SIZET, S('X') = SIZET,
+		S('b') = SIZET, S('B') = SIZET,
 		S('n') = PTR,
 	}, { /* 7: j-prefixed */
 		S('d') = IMAX, S('i') = IMAX,
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
+		S('b') = UMAX, S('B') = UMAX,
 		S('n') = PTR,
 	}
 };
@@ -162,6 +167,12 @@ static char *fmt_o(uintmax_t x, char *s)
 	return s;
 }
 
+static char *fmt_b(uintmax_t x, char *s)
+{
+	for (; x; x>>=1) *--s = '0' + (x&1);
+	return s;
+}
+
 static char *fmt_u(uintmax_t x, char *s)
 {
 	unsigned long y;
@@ -437,7 +448,12 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 	unsigned st, ps;
 	int cnt=0, l=0;
 	size_t i;
-	char buf[sizeof(uintmax_t)*3+3+LDBL_MANT_DIG/4];
+	/* This buffer is used for integer conversions. As such, it needs
+	 * to be able to contain the full representation of a number in base 2,
+	 * 8, 10 or 16, with base 2 having the largest possible requirement of
+	 * as many characters as the amount of bits in the largest possible
+	 * integer type */
+	char buf[sizeof(uintmax_t)*CHAR_BIT];
 	const char *prefix;
 	int t, pl;
 	wchar_t wc[2], *ws;
@@ -534,7 +550,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		if (ferror(f)) return -1;
 
 		z = buf + sizeof(buf);
-		prefix = "-+   0X0x";
+		prefix = "-+   0X0x0B0b";
 		pl = 0;
 		t = s[-1];
 
@@ -564,6 +580,10 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			a = fmt_x(arg.i, z, t&32);
 			if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
 			if (0) {
+		case 'b': case 'B':
+			a = fmt_b(arg.i, z);
+			if (arg.i && (fl & ALT_FORM)) prefix+=9+((t=='b')<<1), pl=2;
+			} if (0) {
 		case 'o':
 			a = fmt_o(arg.i, z);
 			if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1;
-- 
2.39.2


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH v2 1/1] vfprintf: support C2x %b and %B conversion specifiers
  2023-04-15 12:28   ` [musl] [PATCH v2 1/1] " Gabriel Ravier
@ 2023-04-15 12:52     ` Jₑₙₛ Gustedt
  2023-04-15 13:15       ` Gabriel Ravier
  2023-04-16  6:51     ` Jₑₙₛ Gustedt
  1 sibling, 1 reply; 26+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-04-15 12:52 UTC (permalink / raw)
  To: Gabriel Ravier; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 1354 bytes --]

Hi,
I was just working on these things, too. So great, one thing less on
my list.

Patch looks fine, but for the parts with the prefix, which I find
suspicious, anyhow, too much black magic for me.

What is still missing then, is to add PRIxYYY macros. With the latest
draft

        https://open-std.org/JTC1/SC22/WG14/www/docs/n3096.pdf

these become mandatory feature tests for this feature. I already have
a patch for that.

I also have already patches for

- the wN and wfN length modifiers

- an extension towards `[u]int128_t`

- adding the `*_WIDTH` macros

And I will soon be starting on the similar features for `scanf` and
friends. There is one problem zone for that, namely that `0b` prefixes
are not only extensions but change semantics of existing executables
that would be linked against different versions of the C library.

My plan is to make the distinction deep down in `__intscan` and have
that internal function in two versions `__intscan_c17` and
`__intscan_c23`, say.

Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH v2 1/1] vfprintf: support C2x %b and %B conversion specifiers
  2023-04-15 12:52     ` Jₑₙₛ Gustedt
@ 2023-04-15 13:15       ` Gabriel Ravier
  2023-04-15 14:15         ` Jₑₙₛ Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Gabriel Ravier @ 2023-04-15 13:15 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On 4/15/23 14:52, Jₑₙₛ Gustedt wrote:
> Hi,
> I was just working on these things, too. So great, one thing less on
> my list.
>
> Patch looks fine, but for the parts with the prefix, which I find
> suspicious, anyhow, too much black magic for me.
The parts with the prefix could instead be implemented as something like 
`"0B0b"+((t=='b')<<1)` (or something even cleaner, though it'd be a bit 
more verbose too) but I figured it was better to keep the same code 
style as for the hexadecimal conversion for now - changes to make it be 
less like black magic seem more like code style cleanup that probably 
aren't appropriate to put in the same patch (I suppose it also shaves 
off a single byte to do it the way its done right now, since a separate 
string would require an extra null terminator, and I highly doubt most 
compilers right now are able to figure out it's unneeded - to avoid 
taking an extra byte of space would require making a special array just 
for `0B0b` that specifically has a size of 4).
>
> What is still missing then, is to add PRIxYYY macros. With the latest
> draft
>
>          https://open-std.org/JTC1/SC22/WG14/www/docs/n3096.pdf
>
> these become mandatory feature tests for this feature. I already have
> a patch for that.
Nice to have a patch for that too, thanks. I wasn't planning on adding 
them myself, mostly because I don't really have any tests for them (as 
opposed to the b/B conversion specifiers which I have plenty of tests 
for) and don't plan on adding any anytime soon.
>
> I also have already patches for
>
> - the wN and wfN length modifiers
>
> - an extension towards `[u]int128_t`
>
> - adding the `*_WIDTH` macros
>
> And I will soon be starting on the similar features for `scanf` and
> friends. There is one problem zone for that, namely that `0b` prefixes
> are not only extensions but change semantics of existing executables
> that would be linked against different versions of the C library.
>
> My plan is to make the distinction deep down in `__intscan` and have
> that internal function in two versions `__intscan_c17` and
> `__intscan_c23`, say.
Doesn't the same apply to strto{,u}l{,l} given the behavior change is 
caused by the corresponding change in those functions ?
>
> Thanks
> Jₑₙₛ
>


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH v2 1/1] vfprintf: support C2x %b and %B conversion specifiers
  2023-04-15 13:15       ` Gabriel Ravier
@ 2023-04-15 14:15         ` Jₑₙₛ Gustedt
  0 siblings, 0 replies; 26+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-04-15 14:15 UTC (permalink / raw)
  To: Gabriel Ravier; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 2105 bytes --]

Gabriel,

on Sat, 15 Apr 2023 15:15:48 +0200 you (Gabriel Ravier
<gabravier@gmail.com>) wrote:

> > What is still missing then, is to add PRIxYYY macros. With the
> > latest draft
> >
> >          https://open-std.org/JTC1/SC22/WG14/www/docs/n3096.pdf
> >
> > these become mandatory feature tests for this feature. I already
> > have a patch for that.  
> Nice to have a patch for that too, thanks. I wasn't planning on
> adding them myself, mostly because I don't really have any tests for
> them (as opposed to the b/B conversion specifiers which I have plenty
> of tests for) and don't plan on adding any anytime soon.

As said they are now mandatory, because the `B` also changes state
from just "recommended" to "recommended optional". The git message
could also reflect that.

What I noticed after sending my mail, you are also missing to do the
same for vfwprintf which unfortunately doubles the logic, here. A
refactoring for these two functions could probably gain some kilo or
so.

> > And I will soon be starting on the similar features for `scanf` and
> > friends. There is one problem zone for that, namely that `0b`
> > prefixes are not only extensions but change semantics of existing
> > executables that would be linked against different versions of the
> > C library.
> >
> > My plan is to make the distinction deep down in `__intscan` and have
> > that internal function in two versions `__intscan_c17` and
> > `__intscan_c23`, say.  
> Doesn't the same apply to strto{,u}l{,l} given the behavior change is 
> caused by the corresponding change in those functions ?

Yes. I have a very generous notion of "friend" ;-)

They all use that same function __intscan underneath, so this factors
out cleanly, it seems.

Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH v2 1/1] vfprintf: support C2x %b and %B conversion specifiers
  2023-04-15 12:28   ` [musl] [PATCH v2 1/1] " Gabriel Ravier
  2023-04-15 12:52     ` Jₑₙₛ Gustedt
@ 2023-04-16  6:51     ` Jₑₙₛ Gustedt
  2023-04-16 13:20       ` Gabriel Ravier
  1 sibling, 1 reply; 26+ messages in thread
From: Jₑₙₛ Gustedt @ 2023-04-16  6:51 UTC (permalink / raw)
  To: Gabriel Ravier; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 935 bytes --]

Gabriel,
it also seems to me that ...

on Sat, 15 Apr 2023 14:28:28 +0200 you (Gabriel Ravier
<gabravier@gmail.com>) wrote:

> +	/* This buffer is used for integer conversions. As such, it needs
> +	 * to be able to contain the full representation of a number in base 2,
> +	 * 8, 10 or 16, with base 2 having the largest possible requirement of
> +	 * as many characters as the amount of bits in the largest
> possible
> +	 * integer type */
> +	char buf[sizeof(uintmax_t)*CHAR_BIT];

... here a `+3` seems to be in order to take care of the `0[bx]`
prefix and a terminating null byte.

Thanks
Jₑₙₛ

-- 
:: ICube :::::::::::::::::::::::::::::: deputy director ::
:: Université de Strasbourg :::::::::::::::::::::: ICPS ::
:: INRIA Nancy Grand Est :::::::::::::::::::::::: Camus ::
:: :::::::::::::::::::::::::::::::::::: ☎ +33 368854536 ::
:: https://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH v2 1/1] vfprintf: support C2x %b and %B conversion specifiers
  2023-04-16  6:51     ` Jₑₙₛ Gustedt
@ 2023-04-16 13:20       ` Gabriel Ravier
  2023-04-16 14:39         ` Rich Felker
  0 siblings, 1 reply; 26+ messages in thread
From: Gabriel Ravier @ 2023-04-16 13:20 UTC (permalink / raw)
  To: Jₑₙₛ Gustedt; +Cc: musl

On 4/16/23 08:51, Jₑₙₛ Gustedt wrote:
> Gabriel,
> it also seems to me that ...
>
> on Sat, 15 Apr 2023 14:28:28 +0200 you (Gabriel Ravier
> <gabravier@gmail.com>) wrote:
>
>> +	/* This buffer is used for integer conversions. As such, it needs
>> +	 * to be able to contain the full representation of a number in base 2,
>> +	 * 8, 10 or 16, with base 2 having the largest possible requirement of
>> +	 * as many characters as the amount of bits in the largest
>> possible
>> +	 * integer type */
>> +	char buf[sizeof(uintmax_t)*CHAR_BIT];
> ... here a `+3` seems to be in order to take care of the `0[bx]`
> prefix and a terminating null byte.

This buffer is only used specifically for storing converted digits, and 
is never used to store the alternative form, and never contains a null 
terminator either as the code knows the used length and never passes the 
buffer to a function that doesn't do so, so from what I can see these 
objections are wrong (in fact it wouldn't make much sense to store the 
prefix in that buffer given that the code also has to handle the 
possibility of extremely large 0-padding that goes between the prefix 
and the converted digits).

Though perhaps the comment could be improved, I suppose it could be 
confusing...

>
> Thanks
> Jₑₙₛ
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [musl] [PATCH v2 1/1] vfprintf: support C2x %b and %B conversion specifiers
  2023-04-16 13:20       ` Gabriel Ravier
@ 2023-04-16 14:39         ` Rich Felker
  0 siblings, 0 replies; 26+ messages in thread
From: Rich Felker @ 2023-04-16 14:39 UTC (permalink / raw)
  To: Gabriel Ravier; +Cc: Jₑₙₛ Gustedt, musl

On Sun, Apr 16, 2023 at 03:20:59PM +0200, Gabriel Ravier wrote:
> On 4/16/23 08:51, Jₑₙₛ Gustedt wrote:
> >Gabriel,
> >it also seems to me that ...
> >
> >on Sat, 15 Apr 2023 14:28:28 +0200 you (Gabriel Ravier
> ><gabravier@gmail.com>) wrote:
> >
> >>+	/* This buffer is used for integer conversions. As such, it needs
> >>+	 * to be able to contain the full representation of a number in base 2,
> >>+	 * 8, 10 or 16, with base 2 having the largest possible requirement of
> >>+	 * as many characters as the amount of bits in the largest
> >>possible
> >>+	 * integer type */
> >>+	char buf[sizeof(uintmax_t)*CHAR_BIT];
> >... here a `+3` seems to be in order to take care of the `0[bx]`
> >prefix and a terminating null byte.
> 
> This buffer is only used specifically for storing converted digits,
> and is never used to store the alternative form, and never contains
> a null terminator either as the code knows the used length and never
> passes the buffer to a function that doesn't do so, so from what I
> can see these objections are wrong (in fact it wouldn't make much
> sense to store the prefix in that buffer given that the code also
> has to handle the possibility of extremely large 0-padding that goes
> between the prefix and the converted digits).
> 
> Though perhaps the comment could be improved, I suppose it could be
> confusing...

Yes, without looking I'm pretty sure you're right about what the
buffer is and isn't used for. I think both the +3 and the
+LDBL_MANT_DIG/4 are artifacts of proto-musl code long ago where
things were done differently and floating point code was also in the
main printf_core function and did naive floating point math. Maybe
these should be fixed with a separate patch first (which I could make
and explain what I recall of the history) so this unrelated change
isn't a distraction from the %b patches.

Rich

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [musl] [PATCH v3 0/1] vfprintf: support C23 b and B conversion specifiers
  2022-09-08 16:36 [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers Gabriel Ravier
  2022-09-12 13:59 ` Rich Felker
  2023-04-15 12:28 ` [musl] [PATCH v2 0/1] " Gabriel Ravier
@ 2024-08-27 23:12 ` Gabriel Ravier
  2024-08-27 23:12   ` [musl] [PATCH v3 1/1] " Gabriel Ravier
  2 siblings, 1 reply; 26+ messages in thread
From: Gabriel Ravier @ 2024-08-27 23:12 UTC (permalink / raw)
  To: musl; +Cc: Gabriel Ravier

This updated version of the patch:
 - rebases it on latest musl (the old version of the patch fails to
   apply on current git)
 - clarifies the comment before the declaration of buf slightly to
   make it less potentially confusing
 - adjusts the commit message since C2x is now officially C23

Gabriel Ravier (1):
  vfprintf: support C23 b and B conversion specifiers

 src/stdio/vfprintf.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

-- 
2.46.0


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [musl] [PATCH v3 1/1] vfprintf: support C23 b and B conversion specifiers
  2024-08-27 23:12 ` [musl] [PATCH v3 0/1] vfprintf: support C23 b and B " Gabriel Ravier
@ 2024-08-27 23:12   ` Gabriel Ravier
  0 siblings, 0 replies; 26+ messages in thread
From: Gabriel Ravier @ 2024-08-27 23:12 UTC (permalink / raw)
  To: musl; +Cc: Gabriel Ravier

These specifiers allow for formatted output of binary integers, and
have been added to C23 through N2630. The uppoercase B specifier is
not made entirely mandatory by C23, as only lowercase specifiers are
reserved for the standard, and thus an implementation could have been
using uppercase B for an unrelated extension, but C23 still has a note
stating it is recommended practice to implement it as the uppercase
counterpart of the b specifier.

I have tested this on:
 - glibc's tests for %b and %B
 - The libc testsuite I'm developing over at https://github.com/GabrielRavier/yalibct
 - musl's libc-test
 - musl's libc-testsuite

And observed no regressions.
---
 src/stdio/vfprintf.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c
index 360d723a..ec51aa3c 100644
--- a/src/stdio/vfprintf.c
+++ b/src/stdio/vfprintf.c
@@ -49,7 +49,7 @@ enum {
 static const unsigned char states[]['z'-'A'+1] = {
 	{ /* 0: bare types */
 		S('d') = INT, S('i') = INT,
-		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT,
+		S('o') = UINT, S('u') = UINT, S('x') = UINT, S('X') = UINT, S('b') = UINT, S('B') = UINT,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
 		S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL,
 		S('c') = INT, S('C') = UINT,
@@ -59,7 +59,7 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('z') = ZTPRE, S('j') = JPRE, S('t') = ZTPRE,
 	}, { /* 1: l-prefixed */
 		S('d') = LONG, S('i') = LONG,
-		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG,
+		S('o') = ULONG, S('u') = ULONG, S('x') = ULONG, S('X') = ULONG, S('b') = ULONG, S('B') = ULONG,
 		S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
 		S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL,
 		S('c') = UINT, S('s') = PTR, S('n') = PTR,
@@ -68,17 +68,20 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('d') = LLONG, S('i') = LLONG,
 		S('o') = ULLONG, S('u') = ULLONG,
 		S('x') = ULLONG, S('X') = ULLONG,
+		S('b') = ULLONG, S('B') = ULLONG,
 		S('n') = PTR,
 	}, { /* 3: h-prefixed */
 		S('d') = SHORT, S('i') = SHORT,
 		S('o') = USHORT, S('u') = USHORT,
 		S('x') = USHORT, S('X') = USHORT,
+		S('b') = USHORT, S('B') = USHORT,
 		S('n') = PTR,
 		S('h') = HHPRE,
 	}, { /* 4: hh-prefixed */
 		S('d') = CHAR, S('i') = CHAR,
 		S('o') = UCHAR, S('u') = UCHAR,
 		S('x') = UCHAR, S('X') = UCHAR,
+		S('b') = UCHAR, S('B') = UCHAR,
 		S('n') = PTR,
 	}, { /* 5: L-prefixed */
 		S('e') = LDBL, S('f') = LDBL, S('g') = LDBL, S('a') = LDBL,
@@ -88,11 +91,13 @@ static const unsigned char states[]['z'-'A'+1] = {
 		S('d') = PDIFF, S('i') = PDIFF,
 		S('o') = SIZET, S('u') = SIZET,
 		S('x') = SIZET, S('X') = SIZET,
+		S('b') = SIZET, S('B') = SIZET,
 		S('n') = PTR,
 	}, { /* 7: j-prefixed */
 		S('d') = IMAX, S('i') = IMAX,
 		S('o') = UMAX, S('u') = UMAX,
 		S('x') = UMAX, S('X') = UMAX,
+		S('b') = UMAX, S('B') = UMAX,
 		S('n') = PTR,
 	}
 };
@@ -150,6 +155,12 @@ static const char xdigits[16] = {
 	"0123456789ABCDEF"
 };
 
+static char *fmt_b(uintmax_t x, char *s)
+{
+	for (; x; x>>=1) *--s = '0' + (x&1);
+	return s;
+}
+
 static char *fmt_x(uintmax_t x, char *s, int lower)
 {
 	for (; x; x>>=4) *--s = xdigits[(x&15)]|lower;
@@ -431,7 +442,12 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 	unsigned st, ps;
 	int cnt=0, l=0;
 	size_t i;
-	char buf[sizeof(uintmax_t)*3];
+	/* This buffer is used for integer conversions. As such, it needs
+	 * to be able to contain the full representation of a number (without a
+	 * prefix/padding or null terminator) in base 2, 8, 10 or 16, with base
+	 * 2 having the largest possible requirement of as many characters as
+	 * the amount of bits in the largest possible integer type */
+	char buf[sizeof(uintmax_t)*CHAR_BIT];
 	const char *prefix;
 	int t, pl;
 	wchar_t wc[2], *ws;
@@ -528,7 +544,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 		if (ferror(f)) return -1;
 
 		z = buf + sizeof(buf);
-		prefix = "-+   0X0x";
+		prefix = "-+   0X0x0B0b";
 		pl = 0;
 		t = s[-1];
 
@@ -558,6 +574,10 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg,
 			a = fmt_x(arg.i, z, t&32);
 			if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2;
 			if (0) {
+		case 'b': case 'B':
+			a = fmt_b(arg.i, z);
+			if (arg.i && (fl & ALT_FORM)) prefix+=9+((t=='b')<<1), pl=2;
+			} if (0) {
 		case 'o':
 			a = fmt_o(arg.i, z);
 			if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1;
-- 
2.46.0


^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2024-08-27 23:12 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-08 16:36 [musl] [PATCH] vfprintf: support C2x %b and %B conversion specifiers Gabriel Ravier
2022-09-12 13:59 ` Rich Felker
2022-09-12 14:42   ` Jₑₙₛ Gustedt
2022-09-19 15:09     ` Rich Felker
2022-09-19 17:59       ` Szabolcs Nagy
2022-09-19 18:10         ` Rich Felker
2022-09-20  9:19           ` Jₑₙₛ Gustedt
2022-09-20 12:28             ` Rich Felker
2022-09-20 13:29               ` Jₑₙₛ Gustedt
2022-09-20 13:55                 ` Rich Felker
2022-09-20 14:08                   ` Jₑₙₛ Gustedt
2022-09-20 14:15                     ` Rich Felker
2022-09-20 14:22                       ` Jₑₙₛ Gustedt
2022-09-20 14:27                         ` Jₑₙₛ Gustedt
2022-09-20 14:46                           ` Rich Felker
2022-09-20 15:20                             ` Jₑₙₛ Gustedt
2023-04-15 12:28 ` [musl] [PATCH v2 0/1] " Gabriel Ravier
2023-04-15 12:28   ` [musl] [PATCH v2 1/1] " Gabriel Ravier
2023-04-15 12:52     ` Jₑₙₛ Gustedt
2023-04-15 13:15       ` Gabriel Ravier
2023-04-15 14:15         ` Jₑₙₛ Gustedt
2023-04-16  6:51     ` Jₑₙₛ Gustedt
2023-04-16 13:20       ` Gabriel Ravier
2023-04-16 14:39         ` Rich Felker
2024-08-27 23:12 ` [musl] [PATCH v3 0/1] vfprintf: support C23 b and B " Gabriel Ravier
2024-08-27 23:12   ` [musl] [PATCH v3 1/1] " Gabriel Ravier

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).