[musl] [C23 128 bit 1/4] add an emulation for 128 bit arithmetic as needed for C library support

mailing list of musl libc
 help / color / mirror / code / Atom feed

From: Jens Gustedt <Jens.Gustedt@inria.fr>
To: musl@lists.openwall.com
Subject: [musl] [C23 128 bit 1/4] add an emulation for 128 bit arithmetic as needed for C library support
Date: Wed, 31 May 2023 16:15:47 +0200	[thread overview]
Message-ID: <04d1def74188d4783389363c4a3a0e1d942d7e36.1685536608.git.Jens.Gustedt@inria.fr> (raw)
In-Reply-To: <cover.1685536608.git.Jens.Gustedt@inria.fr>

With C23 it will be possible for compilers to add extended integer
types that are wider than `intmax_t`, in particular for the
`[u]int128_t` types. This patch enables the minimal support for these
types that is needed to implement the necessary bits in `printf` and
`scanf` like interfaces. This support is added unconditionally, but
only compilers that effectively implement the types will be able to
profit from this new setting.

The usage of these types is then even independent of whether or not a
given compiler has full support for `[u]int128_t`; just the types
indicated by gcc' `__int128` are needed for this to be useful.

We provide two implementations for most of the functions, but which
both have the same ABI. One that uses gcc' `__int128` types underneath
and anotherone that hand-codes minimal operations by sticking them
together from different sub-words of the 128 entity.

The hand-coded version is not complete, it lacks a important bit in
terms of a function

      uwide128 __uwide128_pop(va_list *ap);

that is used by `vfprintf` and similar to pop a 128 bit value from the
variable argument list. (`uwide128` is an internal structure type that
is used for the emulation)

This version *must* be completed for each architecture that we support
by a .s file. For architectures that have a compiler that implements
`__int128`, it is relatively simple to generate such a file: just
compile the file uwide128.c with -S option and extract this one and
only function into a platform specific subdirectory.
---
 src/internal/uwide128.c            | 213 +++++++++++++++++++++++++++++
 src/internal/uwide128.h            |  57 ++++++++
 src/internal/x86_64/uwide128_pop.s |  27 ++++
 3 files changed, 297 insertions(+)
 create mode 100644 src/internal/uwide128.c
 create mode 100644 src/internal/uwide128.h
 create mode 100644 src/internal/x86_64/uwide128_pop.s

diff --git a/src/internal/uwide128.c b/src/internal/uwide128.c
new file mode 100644
index 00000000..6e4b08bf
--- /dev/null
+++ b/src/internal/uwide128.c
@@ -0,0 +1,213 @@
+#include <uwide128.h>
+#include <stdarg.h>
+
+#if __SIZEOF_INT128__
+
+union u { unsigned __int128 x; uwide128 s; };
+
+__attribute__((__weak__))
+uwide128 __uwide128_neg(uwide128 a) {
+	union u both = { .s = a, };
+	both.x = -both.x;
+	return both.s;
+}
+
+__attribute__((__weak__))
+uwide128 __uwide128_add(uwide128 a, uint8_t b) {
+	union u both = { .s = a, };
+	both.x += b;
+	return both.s;
+}
+
+__attribute__((__weak__))
+uwide128 __uwide128_sub(uwide128 a, uint8_t b) {
+	union u both = { .s = a, };
+	both.x -= b;
+	return both.s;
+}
+
+__attribute__((__weak__))
+uwide128 __uwide128_mul(uwide128 a, uint8_t b) {
+	union u both = { .s = a, };
+	both.x *= b;
+	return both.s;
+}
+
+__attribute__((__weak__))
+uint8_t __uwide128_div10(uwide128* a) {
+	union u both = { .s = *a, };
+	uint8_t ret = both.x % 10;
+	both.x /= 10;
+	*a = both.s;
+	return ret;
+}
+
+__attribute__((__weak__))
+uint8_t __uwide128_div2(uwide128* a) {
+	union u both = { .s = *a, };
+	uint8_t ret = both.x % 2;
+	both.x /= 2;
+	*a = both.s;
+	return ret;
+}
+
+__attribute__((__weak__))
+uint8_t __uwide128_div8(uwide128* a) {
+	union u both = { .s = *a, };
+	uint8_t ret = both.x % 8;
+	both.x /= 8;
+	*a = both.s;
+	return ret;
+}
+
+__attribute__((__weak__))
+uint8_t __uwide128_div16(uwide128* a) {
+	union u both = { .s = *a, };
+	uint8_t ret = both.x % 16;
+	both.x /= 16;
+	*a = both.s;
+	return ret;
+}
+
+__attribute__((__weak__))
+_Bool  __uwide128_le(uwide128 a, uwide128 b) {
+	union u botha = { .s = a, };
+	union u bothb = { .s = b, };
+	return botha.x <= bothb.x;
+}
+
+__attribute__((__weak__))
+_Bool  __uwide128_iszero(uwide128 a) {
+	union u both = { .s = a, };
+	return !both.x;
+}
+
+uwide128 __uwide128_pop(va_list *ap)
+{
+	return (union u){ .x = va_arg(*ap, __int128) }.s;
+}
+
+#else
+
+__attribute__((__weak__))
+_Bool __uwide128_le(uwide128 a, uwide128 b) {
+	return (a.v64[hi64] > b.v64[hi64])
+	  ? false
+	  : ((a.v64[hi64] < b.v64[hi64])
+	     ? true
+	     : (a.v64[lo64] <= b.v64[lo64]));
+}
+
+__attribute__((__weak__))
+_Bool __uwide128_iszero(uwide128 a)
+{
+	return !a.v64[0] && !a.v64[1];
+}
+
+__attribute__((__weak__))
+uwide128 __uwide128_neg(uwide128 a)
+{
+	uwide128 ret = { .v64 = { [0] = ~a.v64[0], [1] = ~a.v64[1], }, };
+	if (!a.v64[lo64]) ret.v64[hi64]--;
+	ret.v64[lo64]--;
+	return ret;
+}
+
+uwide128 __uwide128_add(uwide128 a, uint8_t b)
+{
+	uwide128 ret;
+	uint64_t carry = a.v32[wo32_0];
+	carry += b;
+	ret.v32[wo32_0] = carry;
+	carry >>= 32;
+	carry += a.v32[wo32_1];
+	ret.v32[wo32_1] = carry;
+	carry >>= 32;
+	carry += a.v32[wo32_2];
+	ret.v32[wo32_2] = carry;
+	carry >>= 32;
+	carry += a.v32[wo32_3];
+	ret.v32[wo32_3] = carry;
+	return ret;
+}
+
+uwide128 __uwide128_sub(uwide128 a, uint8_t b)
+{
+	uwide128 ret;
+	int64_t carry = a.v32[wo32_0];
+	carry -= b;
+	ret.v32[wo32_0] = carry;
+	carry /= UINT64_C(0x100000000);
+	carry += a.v32[wo32_1];
+	ret.v32[wo32_1] = carry;
+	carry /= UINT64_C(0x100000000);
+	carry += a.v32[wo32_2];
+	ret.v32[wo32_2] = carry;
+	carry /= UINT64_C(0x100000000);
+	carry += a.v32[wo32_3];
+	ret.v32[wo32_3] = carry;
+	return ret;
+}
+
+uwide128 __uwide128_mul(uwide128 a, uint8_t b)
+{
+	uwide128 ret;
+	uint64_t carry;
+	uint64_t prod = a.v32[wo32_0];
+	prod *= b;
+	carry = prod;
+	ret.v32[wo32_0] = carry;
+	carry >>= 32;
+	prod = a.v32[wo32_1];
+	prod *= b;
+	carry += prod;
+	ret.v32[wo32_1] = carry;
+	carry >>= 32;
+	prod = a.v32[wo32_2];
+	prod *= b;
+	carry += prod;
+	ret.v32[wo32_2] = carry;
+	carry >>= 32;
+	prod = a.v32[wo32_3];
+	prod *= b;
+	carry += prod;
+	ret.v32[wo32_3] = carry;
+	return ret;
+}
+
+static uint8_t __uwide128_div(uwide128* a, uint8_t b)
+{
+	uint64_t rest = a->v64[hi64] % b;
+	a->v64[hi64] /= b;
+	rest <<= 32;
+	rest |= a->v32[wo32_1];
+	a->v32[wo32_1] = rest / b;
+	rest %= b;
+	rest <<= 32;
+	rest |= a->v32[wo32_0];
+	a->v32[wo32_0] = rest / b;
+	rest %= b;
+	return rest;
+}
+
+uint8_t __uwide128_div10(uwide128* a)
+{
+	return __uwide128_div(a, 10);
+}
+
+uint8_t __uwide128_div2(uwide128* a)
+{
+	return __uwide128_div(a, 2);
+}
+
+uint8_t __uwide128_div8(uwide128* a)
+{
+	return __uwide128_div(a, 8);
+}
+
+uint8_t __uwide128_div16(uwide128* a)
+{
+	return __uwide128_div(a, 16);
+}
+
+#endif
diff --git a/src/internal/uwide128.h b/src/internal/uwide128.h
new file mode 100644
index 00000000..f6a02934
--- /dev/null
+++ b/src/internal/uwide128.h
@@ -0,0 +1,57 @@
+#include <features.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+typedef union uwide128 uwide128;
+union uwide128 {
+	uint64_t v64[2];
+	uint32_t v32[4];
+};
+
+#define word64(X) (__BYTE_ORDER == __LITTLE_ENDIAN ? (X) : (2-(X)))
+#define lo64 word64(0)
+#define hi64 word64(1)
+
+#define word32(X) (__BYTE_ORDER == __LITTLE_ENDIAN ? (X) : (4-(X)))
+#define wo32_0 word32(0)
+#define wo32_1 word32(1)
+#define wo32_2 word32(2)
+#define wo32_3 word32(3)
+
+
+#define comp2(LO, HI) { { [lo64] = (LO), [hi64] = (HI), }, }
+
+#if __STDC_VERSION__ >= 202311L
+constexpr uwide128 __uwide128_max = { -1, -1, };
+#define UWIDE128_MAX __uwide128_max
+#else
+#define UWIDE128_MAX ((void)0, (uwide128)comp2(-1, -1))
+#endif
+
+static __inline uwide128 __uwide128_i64(int64_t a)  __unsequenced;
+static __inline uwide128 __uwide128_u64(uint64_t a) __unsequenced;
+
+static __inline uwide128 __uwide128_i64(int64_t a)
+{
+	return (uwide128)comp2(a, (a < 0) ? UINT64_MAX : 0);
+}
+
+static __inline uwide128 __uwide128_u64(uint64_t a)
+{
+	return (uwide128)comp2(a, 0);
+}
+
+_Bool __uwide128_le(uwide128, uwide128)    __unsequenced; // intscan
+_Bool __uwide128_iszero(uwide128)          __unsequenced; // vfprintf
+uwide128 __uwide128_neg(uwide128)          __unsequenced; // intscan
+uwide128 __uwide128_add(uwide128, uint8_t) __unsequenced; // intscan
+uwide128 __uwide128_sub(uwide128, uint8_t) __unsequenced; // intscan
+uwide128 __uwide128_mul(uwide128, uint8_t) __unsequenced; // intscan
+uint8_t __uwide128_div10(uwide128*);           // vfprintf
+uint8_t __uwide128_div2(uwide128*);            // vfprintf
+uint8_t __uwide128_div8(uwide128*);            // vfprintf
+uint8_t __uwide128_div16(uwide128*);           // vfprintf
+uwide128 __uwide128_pop(va_list *ap);          // vfprintf
+uwide128 __uwide128_i64(int64_t);              // vfprintf
+uwide128 __uwide128_u64(uint64_t);             // vfprintf
diff --git a/src/internal/x86_64/uwide128_pop.s b/src/internal/x86_64/uwide128_pop.s
new file mode 100644
index 00000000..f9f84348
--- /dev/null
+++ b/src/internal/x86_64/uwide128_pop.s
@@ -0,0 +1,27 @@
+.text
+.global __uwide128_pop
+.weak	__uwide128_pop
+.type	__uwide128_pop, @function
+__uwide128_pop:
+	endbr64
+	movl	(%rdi), %edx
+	cmpl	$39, %edx
+	ja	1
+	movl	%edx, %eax
+	addl	$16, %edx
+	addq	16(%rdi), %rax
+	movl	%edx, (%rdi)
+	movq	8(%rax), %rdx
+	movq	(%rax), %rax
+	ret
+	.p2align 4,,10
+	.p2align 3
+1:
+	movq	8(%rdi), %rax
+	addq	$15, %rax
+	andq	$-16, %rax
+	leaq	16(%rax), %rdx
+	movq	%rdx, 8(%rdi)
+	movq	8(%rax), %rdx
+	movq	(%rax), %rax
+	ret
-- 
2.34.1

next prev parent reply	other threads:[~2023-05-31 14:16 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-31 14:15 [musl] [C23 128 bit 0/4] implement the library part of 128 bit support Jens Gustedt
2023-05-31 14:15 ` Jens Gustedt [this message]
2023-05-31 14:15 ` [musl] [C23 128 bit 2/4] C23: implement w128 and wf128 support for printf Jens Gustedt
2023-05-31 14:15 ` [musl] [C23 128 bit 3/4] C23: implement w128 and wf128 for scanf and similar Jens Gustedt
2023-05-31 14:15 ` [musl] [C23 128 bit 4/4] C23: implement proper support for int128_t and uint128_t Jens Gustedt
2023-05-31 14:27   ` Rich Felker
2023-05-31 14:29     ` Rich Felker
2023-05-31 14:36     ` Jₑₙₛ Gustedt
2023-05-31 14:41       ` Rich Felker
2023-05-31 14:55         ` Jₑₙₛ Gustedt
2023-05-31 14:57           ` Rich Felker
2023-05-31 15:07             ` Jₑₙₛ Gustedt
2023-05-31 15:14               ` Rich Felker
2023-05-31 15:37                 ` Jₑₙₛ Gustedt
2023-05-31 15:40                   ` Rich Felker
2023-05-31 15:56                     ` Jₑₙₛ Gustedt
2023-05-31 16:30                       ` Alexander Monakov
2023-05-31 16:58                         ` Jens Gustedt
2023-05-31 17:03                           ` Rich Felker
2023-05-31 17:09                           ` Alexander Monakov
2023-06-01  7:24                             ` Jₑₙₛ Gustedt
2023-05-31 14:42     ` Jₑₙₛ Gustedt
2023-05-31 14:47       ` Rich Felker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=04d1def74188d4783389363c4a3a0e1d942d7e36.1685536608.git.Jens.Gustedt@inria.fr \
    --to=jens.gustedt@inria.fr \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).