mailing list of musl libc
 help / color / mirror / code / Atom feed
* [musl] [PATCH] aarch64: rewrite fenv routines in C using inline asm
@ 2026-01-27  6:04 Bill Roberts
  2026-01-28 22:13 ` Szabolcs Nagy
  0 siblings, 1 reply; 4+ messages in thread
From: Bill Roberts @ 2026-01-27  6:04 UTC (permalink / raw)
  To: musl; +Cc: dalias, dalias, Bill Roberts

Rewrite the AArch64 floating-point environment routines (fegetround,
__fesetround, fetestexcept, feclearexcept, feraiseexcept, fegetenv,
fesetenv) from assembly into C implementations using inline assembly.

This change eliminates the need for handwritten function prologues and
epilogues in fenv.s, which simplifies maintenance and allows the compiler
to automatically insert architecture features such as BTI landing pads and
pointer authentication (PAC) sequences where applicable.

The new implementations mirror the original assembly semantics exactly:
- access FPCR/FPSR using `mrs`/`msr` inline asm
- preserve the same FE_* masks and return conventions
- retain `__fesetround` as a hidden internal symbol
- support the special FE_DFL_ENV case in fesetenv

Moving to C also enables the compiler to manage register allocation,
stack usage, and ABI compliance automatically while keeping the low-level
behavior (bitmasks and register accesses) explicit and verifiable.

No functional changes intended.

Signed-off-by: Bill Roberts <bill.roberts@arm.com>
---
 arch/aarch64/crt_arch.h | 29 +++++++------
 src/fenv/aarch64/fenv.c | 96 +++++++++++++++++++++++++++++++++++++++++
 src/fenv/aarch64/fenv.s | 68 -----------------------------
 3 files changed, 112 insertions(+), 81 deletions(-)
 create mode 100644 src/fenv/aarch64/fenv.c
 delete mode 100644 src/fenv/aarch64/fenv.s

diff --git a/arch/aarch64/crt_arch.h b/arch/aarch64/crt_arch.h
index b64fb3dd..cff8edb3 100644
--- a/arch/aarch64/crt_arch.h
+++ b/arch/aarch64/crt_arch.h
@@ -1,15 +1,18 @@
 __asm__(
-".text \n"
-".global " START "\n"
-".type " START ",%function\n"
-START ":\n"
-"	mov x29, #0\n"
-"	mov x30, #0\n"
-"	mov x0, sp\n"
-".weak _DYNAMIC\n"
-".hidden _DYNAMIC\n"
-"	adrp x1, _DYNAMIC\n"
-"	add x1, x1, #:lo12:_DYNAMIC\n"
-"	and sp, x0, #-16\n"
-"	b " START "_c\n"
+".text \n\t"
+".global " START "\n\t"
+".type " START ",%function\n\t"
+START ":\n\t"
+#if defined(__ARM_FEATURE_BTI_DEFAULT)
+"	hint 34\n\t"
+#endif
+"	mov x29, #0\n\t"
+"	mov x30, #0\n\t"
+"	mov x0, sp\n\t"
+".weak _DYNAMIC\n\t"
+".hidden _DYNAMIC\n\t"
+"	adrp x1, _DYNAMIC\n\t"
+"	add x1, x1, #:lo12:_DYNAMIC\n\t"
+"	and sp, x0, #-16\n\t"
+"	b " START "_c\n\t"
 );
diff --git a/src/fenv/aarch64/fenv.c b/src/fenv/aarch64/fenv.c
new file mode 100644
index 00000000..6d84feac
--- /dev/null
+++ b/src/fenv/aarch64/fenv.c
@@ -0,0 +1,96 @@
+#include <fenv.h>
+#include <stdint.h>
+
+#define FE_RMODE_MASK 0x00C00000u  // FPCR RMode bits [23:22]
+#define FE_EXC_MASK   0x0000001Fu  // FPSR exception flags [4:0]
+
+static inline uint32_t read_fpcr_u32(void)
+{
+	uint64_t x;
+	__asm__ volatile ("mrs %0, fpcr" : "=r"(x));
+	return (uint32_t)x;
+}
+
+static inline void write_fpcr_u32(uint32_t v)
+{
+	uint64_t x = (uint64_t)v;
+	__asm__ volatile ("msr fpcr, %0" :: "r"(x) : "memory");
+}
+
+static inline uint32_t read_fpsr_u32(void)
+{
+	uint64_t x;
+	__asm__ volatile ("mrs %0, fpsr" : "=r"(x));
+	return (uint32_t)x;
+}
+
+static inline void write_fpsr_u32(uint32_t v)
+{
+	uint64_t x = (uint64_t)v;
+	__asm__ volatile ("msr fpsr, %0" :: "r"(x) : "memory");
+}
+
+int fegetround(void)
+{
+	uint32_t fpcr = read_fpcr_u32();
+	return (int)(fpcr & FE_RMODE_MASK);
+}
+
+__attribute__((__visibility__("hidden")))
+int __fesetround(int rm)
+{
+	uint32_t fpcr = read_fpcr_u32();
+	fpcr &= ~FE_RMODE_MASK;
+	fpcr |= (uint32_t)rm;
+	write_fpcr_u32(fpcr);
+	return 0;
+}
+
+int fetestexcept(int mask)
+{
+	uint32_t m   = (uint32_t)mask & FE_EXC_MASK;
+	uint32_t fpsr = read_fpsr_u32();
+	return (int)(m & fpsr);
+}
+
+int feclearexcept(int mask)
+{
+	uint32_t m    = (uint32_t)mask & FE_EXC_MASK;
+	uint32_t fpsr = read_fpsr_u32();
+	fpsr &= ~m;
+	write_fpsr_u32(fpsr);
+	return 0;
+}
+
+int feraiseexcept(int mask)
+{
+	uint32_t m    = (uint32_t)mask & FE_EXC_MASK;
+	uint32_t fpsr = read_fpsr_u32();
+	fpsr |= m;
+	write_fpsr_u32(fpsr);
+	return 0;
+}
+
+int fegetenv(fenv_t *env)
+{
+	uint32_t fpcr = read_fpcr_u32();
+	uint32_t fpsr = read_fpsr_u32();
+	env->__fpcr = fpcr;
+	env->__fpsr = fpsr;
+	return 0;
+}
+
+int fesetenv(const fenv_t *env)
+{
+	uint32_t fpcr = 0;
+	uint32_t fpsr = 0;
+
+	if (env != FE_DFL_ENV) {
+		fpcr = env->__fpcr;
+		fpsr = env->__fpsr;
+	}
+
+	write_fpcr_u32(fpcr);
+	write_fpsr_u32(fpsr);
+	return 0;
+}
diff --git a/src/fenv/aarch64/fenv.s b/src/fenv/aarch64/fenv.s
deleted file mode 100644
index 8f3ec965..00000000
--- a/src/fenv/aarch64/fenv.s
+++ /dev/null
@@ -1,68 +0,0 @@
-.global fegetround
-.type fegetround,%function
-fegetround:
-	mrs x0, fpcr
-	and w0, w0, #0xc00000
-	ret
-
-.global __fesetround
-.hidden __fesetround
-.type __fesetround,%function
-__fesetround:
-	mrs x1, fpcr
-	bic w1, w1, #0xc00000
-	orr w1, w1, w0
-	msr fpcr, x1
-	mov w0, #0
-	ret
-
-.global fetestexcept
-.type fetestexcept,%function
-fetestexcept:
-	and w0, w0, #0x1f
-	mrs x1, fpsr
-	and w0, w0, w1
-	ret
-
-.global feclearexcept
-.type feclearexcept,%function
-feclearexcept:
-	and w0, w0, #0x1f
-	mrs x1, fpsr
-	bic w1, w1, w0
-	msr fpsr, x1
-	mov w0, #0
-	ret
-
-.global feraiseexcept
-.type feraiseexcept,%function
-feraiseexcept:
-	and w0, w0, #0x1f
-	mrs x1, fpsr
-	orr w1, w1, w0
-	msr fpsr, x1
-	mov w0, #0
-	ret
-
-.global fegetenv
-.type fegetenv,%function
-fegetenv:
-	mrs x1, fpcr
-	mrs x2, fpsr
-	stp w1, w2, [x0]
-	mov w0, #0
-	ret
-
-// TODO preserve some bits
-.global fesetenv
-.type fesetenv,%function
-fesetenv:
-	mov x1, #0
-	mov x2, #0
-	cmn x0, #1
-	b.eq 1f
-	ldp w1, w2, [x0]
-1:	msr fpcr, x1
-	msr fpsr, x2
-	mov w0, #0
-	ret
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [musl] [PATCH] aarch64: rewrite fenv routines in C using inline asm
  2026-01-28 22:13 ` Szabolcs Nagy
@ 2026-01-28  1:45   ` Bill Roberts
  2026-01-29 18:14   ` Markus Wichmann
  1 sibling, 0 replies; 4+ messages in thread
From: Bill Roberts @ 2026-01-28  1:45 UTC (permalink / raw)
  To: Bill Roberts, musl, dalias, dalias



On 1/28/26 4:13 PM, Szabolcs Nagy wrote:
> * Bill Roberts <bill.roberts@arm.com> [2026-01-27 00:04:27 -0600]:
>> Rewrite the AArch64 floating-point environment routines (fegetround,
>> __fesetround, fetestexcept, feclearexcept, feraiseexcept, fegetenv,
>> fesetenv) from assembly into C implementations using inline assembly.
>>
>> This change eliminates the need for handwritten function prologues and
>> epilogues in fenv.s, which simplifies maintenance and allows the compiler
>> to automatically insert architecture features such as BTI landing pads and
>> pointer authentication (PAC) sequences where applicable.
>>
>> The new implementations mirror the original assembly semantics exactly:
>> - access FPCR/FPSR using `mrs`/`msr` inline asm
>> - preserve the same FE_* masks and return conventions
>> - retain `__fesetround` as a hidden internal symbol
>> - support the special FE_DFL_ENV case in fesetenv
>>
>> Moving to C also enables the compiler to manage register allocation,
>> stack usage, and ABI compliance automatically while keeping the low-level
>> behavior (bitmasks and register accesses) explicit and verifiable.
>>
>> No functional changes intended.
>>
>> Signed-off-by: Bill Roberts <bill.roberts@arm.com>
>> ---
>>   arch/aarch64/crt_arch.h | 29 +++++++------
>>   src/fenv/aarch64/fenv.c | 96 +++++++++++++++++++++++++++++++++++++++++
>>   src/fenv/aarch64/fenv.s | 68 -----------------------------
>>   3 files changed, 112 insertions(+), 81 deletions(-)
>>   create mode 100644 src/fenv/aarch64/fenv.c
>>   delete mode 100644 src/fenv/aarch64/fenv.s
>>
>> diff --git a/arch/aarch64/crt_arch.h b/arch/aarch64/crt_arch.h
>> index b64fb3dd..cff8edb3 100644
>> --- a/arch/aarch64/crt_arch.h
>> +++ b/arch/aarch64/crt_arch.h
>> @@ -1,15 +1,18 @@
>>   __asm__(
>> -".text \n"
>> -".global " START "\n"
>> -".type " START ",%function\n"
>> -START ":\n"
>> -"	mov x29, #0\n"
>> -"	mov x30, #0\n"
>> -"	mov x0, sp\n"
>> -".weak _DYNAMIC\n"
>> -".hidden _DYNAMIC\n"
>> -"	adrp x1, _DYNAMIC\n"
>> -"	add x1, x1, #:lo12:_DYNAMIC\n"
>> -"	and sp, x0, #-16\n"
>> -"	b " START "_c\n"
>> +".text \n\t"
>> +".global " START "\n\t"
>> +".type " START ",%function\n\t"
>> +START ":\n\t"
>> +#if defined(__ARM_FEATURE_BTI_DEFAULT)
>> +"	hint 34\n\t"
>> +#endif
>> +"	mov x29, #0\n\t"
>> +"	mov x30, #0\n\t"
>> +"	mov x0, sp\n\t"
>> +".weak _DYNAMIC\n\t"
>> +".hidden _DYNAMIC\n\t"
>> +"	adrp x1, _DYNAMIC\n\t"
>> +"	add x1, x1, #:lo12:_DYNAMIC\n\t"
>> +"	and sp, x0, #-16\n\t"
>> +"	b " START "_c\n\t"
> 
> given the commit msg, this bit should be a separate patch.
> 
> the \t changes are a distraction here

Yep I meant to drop those too, thanks.

> 
>>   );
>> diff --git a/src/fenv/aarch64/fenv.c b/src/fenv/aarch64/fenv.c
>> new file mode 100644
>> index 00000000..6d84feac
>> --- /dev/null
>> +++ b/src/fenv/aarch64/fenv.c
>> @@ -0,0 +1,96 @@
>> +#include <fenv.h>
>> +#include <stdint.h>
>> +
>> +#define FE_RMODE_MASK 0x00C00000u  // FPCR RMode bits [23:22]
>> +#define FE_EXC_MASK   0x0000001Fu  // FPSR exception flags [4:0]
>> +
>> +static inline uint32_t read_fpcr_u32(void)
>> +{
>> +	uint64_t x;
>> +	__asm__ volatile ("mrs %0, fpcr" : "=r"(x));
>> +	return (uint32_t)x;
>> +}
>> +
>> +static inline void write_fpcr_u32(uint32_t v)
>> +{
>> +	uint64_t x = (uint64_t)v;
>> +	__asm__ volatile ("msr fpcr, %0" :: "r"(x) : "memory");
>> +}
>> +
>> +static inline uint32_t read_fpsr_u32(void)
>> +{
>> +	uint64_t x;
>> +	__asm__ volatile ("mrs %0, fpsr" : "=r"(x));
>> +	return (uint32_t)x;
>> +}
>> +

> i dont think the casts are useful but the logic looks right

Yeah the cast to uint64_t doesn't need to be there and can be dropped.
I just usually add a cast by forced habit, to make it explicit
what I was intending, but I will drop.

> 
> im not sure if the memory clobber is needed.

I think we need this, as the write needs to be in the
correct order with respect to other floating point operations
and the memory clobber seems to get us that. gcc doesn't come
right out and say it, but talking with compiler folks who
know a lot more than me, they say, "that's the canonical way to
do it".

V2 coming shortly.

> 
>> +
>> +int fegetround(void)
>> +{
>> +	uint32_t fpcr = read_fpcr_u32();
>> +	return (int)(fpcr & FE_RMODE_MASK);
>> +}
>> +
>> +__attribute__((__visibility__("hidden")))
> 
> use "hidden" from features.h
> 
> the rest looks ok.
> 
>> +int __fesetround(int rm)
>> +{
>> +	uint32_t fpcr = read_fpcr_u32();
>> +	fpcr &= ~FE_RMODE_MASK;
>> +	fpcr |= (uint32_t)rm;
>> +	write_fpcr_u32(fpcr);
>> +	return 0;
>> +}
>> +
>> +int fetestexcept(int mask)
>> +{
>> +	uint32_t m   = (uint32_t)mask & FE_EXC_MASK;
>> +	uint32_t fpsr = read_fpsr_u32();
>> +	return (int)(m & fpsr);
>> +}
>> +
>> +int feclearexcept(int mask)
>> +{
>> +	uint32_t m    = (uint32_t)mask & FE_EXC_MASK;
>> +	uint32_t fpsr = read_fpsr_u32();
>> +	fpsr &= ~m;
>> +	write_fpsr_u32(fpsr);
>> +	return 0;
>> +}
>> +
>> +int feraiseexcept(int mask)
>> +{
>> +	uint32_t m    = (uint32_t)mask & FE_EXC_MASK;
>> +	uint32_t fpsr = read_fpsr_u32();
>> +	fpsr |= m;
>> +	write_fpsr_u32(fpsr);
>> +	return 0;
>> +}
>> +
>> +int fegetenv(fenv_t *env)
>> +{
>> +	uint32_t fpcr = read_fpcr_u32();
>> +	uint32_t fpsr = read_fpsr_u32();
>> +	env->__fpcr = fpcr;
>> +	env->__fpsr = fpsr;
>> +	return 0;
>> +}
>> +
>> +int fesetenv(const fenv_t *env)
>> +{
>> +	uint32_t fpcr = 0;
>> +	uint32_t fpsr = 0;
>> +
>> +	if (env != FE_DFL_ENV) {
>> +		fpcr = env->__fpcr;
>> +		fpsr = env->__fpsr;
>> +	}
>> +
>> +	write_fpcr_u32(fpcr);
>> +	write_fpsr_u32(fpsr);
>> +	return 0;
>> +}
>> diff --git a/src/fenv/aarch64/fenv.s b/src/fenv/aarch64/fenv.s
>> deleted file mode 100644
>> index 8f3ec965..00000000
>> --- a/src/fenv/aarch64/fenv.s
>> +++ /dev/null
>> @@ -1,68 +0,0 @@
>> -.global fegetround
>> -.type fegetround,%function
>> -fegetround:
>> -	mrs x0, fpcr
>> -	and w0, w0, #0xc00000
>> -	ret
>> -
>> -.global __fesetround
>> -.hidden __fesetround
>> -.type __fesetround,%function
>> -__fesetround:
>> -	mrs x1, fpcr
>> -	bic w1, w1, #0xc00000
>> -	orr w1, w1, w0
>> -	msr fpcr, x1
>> -	mov w0, #0
>> -	ret
>> -
>> -.global fetestexcept
>> -.type fetestexcept,%function
>> -fetestexcept:
>> -	and w0, w0, #0x1f
>> -	mrs x1, fpsr
>> -	and w0, w0, w1
>> -	ret
>> -
>> -.global feclearexcept
>> -.type feclearexcept,%function
>> -feclearexcept:
>> -	and w0, w0, #0x1f
>> -	mrs x1, fpsr
>> -	bic w1, w1, w0
>> -	msr fpsr, x1
>> -	mov w0, #0
>> -	ret
>> -
>> -.global feraiseexcept
>> -.type feraiseexcept,%function
>> -feraiseexcept:
>> -	and w0, w0, #0x1f
>> -	mrs x1, fpsr
>> -	orr w1, w1, w0
>> -	msr fpsr, x1
>> -	mov w0, #0
>> -	ret
>> -
>> -.global fegetenv
>> -.type fegetenv,%function
>> -fegetenv:
>> -	mrs x1, fpcr
>> -	mrs x2, fpsr
>> -	stp w1, w2, [x0]
>> -	mov w0, #0
>> -	ret
>> -
>> -// TODO preserve some bits
>> -.global fesetenv
>> -.type fesetenv,%function
>> -fesetenv:
>> -	mov x1, #0
>> -	mov x2, #0
>> -	cmn x0, #1
>> -	b.eq 1f
>> -	ldp w1, w2, [x0]
>> -1:	msr fpcr, x1
>> -	msr fpsr, x2
>> -	mov w0, #0
>> -	ret
>> -- 
>> 2.51.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [musl] [PATCH] aarch64: rewrite fenv routines in C using inline asm
  2026-01-27  6:04 [musl] [PATCH] aarch64: rewrite fenv routines in C using inline asm Bill Roberts
@ 2026-01-28 22:13 ` Szabolcs Nagy
  2026-01-28  1:45   ` Bill Roberts
  2026-01-29 18:14   ` Markus Wichmann
  0 siblings, 2 replies; 4+ messages in thread
From: Szabolcs Nagy @ 2026-01-28 22:13 UTC (permalink / raw)
  To: Bill Roberts; +Cc: musl, dalias, dalias

* Bill Roberts <bill.roberts@arm.com> [2026-01-27 00:04:27 -0600]:
> Rewrite the AArch64 floating-point environment routines (fegetround,
> __fesetround, fetestexcept, feclearexcept, feraiseexcept, fegetenv,
> fesetenv) from assembly into C implementations using inline assembly.
> 
> This change eliminates the need for handwritten function prologues and
> epilogues in fenv.s, which simplifies maintenance and allows the compiler
> to automatically insert architecture features such as BTI landing pads and
> pointer authentication (PAC) sequences where applicable.
> 
> The new implementations mirror the original assembly semantics exactly:
> - access FPCR/FPSR using `mrs`/`msr` inline asm
> - preserve the same FE_* masks and return conventions
> - retain `__fesetround` as a hidden internal symbol
> - support the special FE_DFL_ENV case in fesetenv
> 
> Moving to C also enables the compiler to manage register allocation,
> stack usage, and ABI compliance automatically while keeping the low-level
> behavior (bitmasks and register accesses) explicit and verifiable.
> 
> No functional changes intended.
> 
> Signed-off-by: Bill Roberts <bill.roberts@arm.com>
> ---
>  arch/aarch64/crt_arch.h | 29 +++++++------
>  src/fenv/aarch64/fenv.c | 96 +++++++++++++++++++++++++++++++++++++++++
>  src/fenv/aarch64/fenv.s | 68 -----------------------------
>  3 files changed, 112 insertions(+), 81 deletions(-)
>  create mode 100644 src/fenv/aarch64/fenv.c
>  delete mode 100644 src/fenv/aarch64/fenv.s
> 
> diff --git a/arch/aarch64/crt_arch.h b/arch/aarch64/crt_arch.h
> index b64fb3dd..cff8edb3 100644
> --- a/arch/aarch64/crt_arch.h
> +++ b/arch/aarch64/crt_arch.h
> @@ -1,15 +1,18 @@
>  __asm__(
> -".text \n"
> -".global " START "\n"
> -".type " START ",%function\n"
> -START ":\n"
> -"	mov x29, #0\n"
> -"	mov x30, #0\n"
> -"	mov x0, sp\n"
> -".weak _DYNAMIC\n"
> -".hidden _DYNAMIC\n"
> -"	adrp x1, _DYNAMIC\n"
> -"	add x1, x1, #:lo12:_DYNAMIC\n"
> -"	and sp, x0, #-16\n"
> -"	b " START "_c\n"
> +".text \n\t"
> +".global " START "\n\t"
> +".type " START ",%function\n\t"
> +START ":\n\t"
> +#if defined(__ARM_FEATURE_BTI_DEFAULT)
> +"	hint 34\n\t"
> +#endif
> +"	mov x29, #0\n\t"
> +"	mov x30, #0\n\t"
> +"	mov x0, sp\n\t"
> +".weak _DYNAMIC\n\t"
> +".hidden _DYNAMIC\n\t"
> +"	adrp x1, _DYNAMIC\n\t"
> +"	add x1, x1, #:lo12:_DYNAMIC\n\t"
> +"	and sp, x0, #-16\n\t"
> +"	b " START "_c\n\t"

given the commit msg, this bit should be a separate patch.

the \t changes are a distraction here

>  );
> diff --git a/src/fenv/aarch64/fenv.c b/src/fenv/aarch64/fenv.c
> new file mode 100644
> index 00000000..6d84feac
> --- /dev/null
> +++ b/src/fenv/aarch64/fenv.c
> @@ -0,0 +1,96 @@
> +#include <fenv.h>
> +#include <stdint.h>
> +
> +#define FE_RMODE_MASK 0x00C00000u  // FPCR RMode bits [23:22]
> +#define FE_EXC_MASK   0x0000001Fu  // FPSR exception flags [4:0]
> +
> +static inline uint32_t read_fpcr_u32(void)
> +{
> +	uint64_t x;
> +	__asm__ volatile ("mrs %0, fpcr" : "=r"(x));
> +	return (uint32_t)x;
> +}
> +
> +static inline void write_fpcr_u32(uint32_t v)
> +{
> +	uint64_t x = (uint64_t)v;
> +	__asm__ volatile ("msr fpcr, %0" :: "r"(x) : "memory");
> +}
> +
> +static inline uint32_t read_fpsr_u32(void)
> +{
> +	uint64_t x;
> +	__asm__ volatile ("mrs %0, fpsr" : "=r"(x));
> +	return (uint32_t)x;
> +}
> +
> +static inline void write_fpsr_u32(uint32_t v)
> +{
> +	uint64_t x = (uint64_t)v;
> +	__asm__ volatile ("msr fpsr, %0" :: "r"(x) : "memory");
> +}

i dont think the casts are useful but the logic looks right

im not sure if the memory clobber is needed.

> +
> +int fegetround(void)
> +{
> +	uint32_t fpcr = read_fpcr_u32();
> +	return (int)(fpcr & FE_RMODE_MASK);
> +}
> +
> +__attribute__((__visibility__("hidden")))

use "hidden" from features.h

the rest looks ok.

> +int __fesetround(int rm)
> +{
> +	uint32_t fpcr = read_fpcr_u32();
> +	fpcr &= ~FE_RMODE_MASK;
> +	fpcr |= (uint32_t)rm;
> +	write_fpcr_u32(fpcr);
> +	return 0;
> +}
> +
> +int fetestexcept(int mask)
> +{
> +	uint32_t m   = (uint32_t)mask & FE_EXC_MASK;
> +	uint32_t fpsr = read_fpsr_u32();
> +	return (int)(m & fpsr);
> +}
> +
> +int feclearexcept(int mask)
> +{
> +	uint32_t m    = (uint32_t)mask & FE_EXC_MASK;
> +	uint32_t fpsr = read_fpsr_u32();
> +	fpsr &= ~m;
> +	write_fpsr_u32(fpsr);
> +	return 0;
> +}
> +
> +int feraiseexcept(int mask)
> +{
> +	uint32_t m    = (uint32_t)mask & FE_EXC_MASK;
> +	uint32_t fpsr = read_fpsr_u32();
> +	fpsr |= m;
> +	write_fpsr_u32(fpsr);
> +	return 0;
> +}
> +
> +int fegetenv(fenv_t *env)
> +{
> +	uint32_t fpcr = read_fpcr_u32();
> +	uint32_t fpsr = read_fpsr_u32();
> +	env->__fpcr = fpcr;
> +	env->__fpsr = fpsr;
> +	return 0;
> +}
> +
> +int fesetenv(const fenv_t *env)
> +{
> +	uint32_t fpcr = 0;
> +	uint32_t fpsr = 0;
> +
> +	if (env != FE_DFL_ENV) {
> +		fpcr = env->__fpcr;
> +		fpsr = env->__fpsr;
> +	}
> +
> +	write_fpcr_u32(fpcr);
> +	write_fpsr_u32(fpsr);
> +	return 0;
> +}
> diff --git a/src/fenv/aarch64/fenv.s b/src/fenv/aarch64/fenv.s
> deleted file mode 100644
> index 8f3ec965..00000000
> --- a/src/fenv/aarch64/fenv.s
> +++ /dev/null
> @@ -1,68 +0,0 @@
> -.global fegetround
> -.type fegetround,%function
> -fegetround:
> -	mrs x0, fpcr
> -	and w0, w0, #0xc00000
> -	ret
> -
> -.global __fesetround
> -.hidden __fesetround
> -.type __fesetround,%function
> -__fesetround:
> -	mrs x1, fpcr
> -	bic w1, w1, #0xc00000
> -	orr w1, w1, w0
> -	msr fpcr, x1
> -	mov w0, #0
> -	ret
> -
> -.global fetestexcept
> -.type fetestexcept,%function
> -fetestexcept:
> -	and w0, w0, #0x1f
> -	mrs x1, fpsr
> -	and w0, w0, w1
> -	ret
> -
> -.global feclearexcept
> -.type feclearexcept,%function
> -feclearexcept:
> -	and w0, w0, #0x1f
> -	mrs x1, fpsr
> -	bic w1, w1, w0
> -	msr fpsr, x1
> -	mov w0, #0
> -	ret
> -
> -.global feraiseexcept
> -.type feraiseexcept,%function
> -feraiseexcept:
> -	and w0, w0, #0x1f
> -	mrs x1, fpsr
> -	orr w1, w1, w0
> -	msr fpsr, x1
> -	mov w0, #0
> -	ret
> -
> -.global fegetenv
> -.type fegetenv,%function
> -fegetenv:
> -	mrs x1, fpcr
> -	mrs x2, fpsr
> -	stp w1, w2, [x0]
> -	mov w0, #0
> -	ret
> -
> -// TODO preserve some bits
> -.global fesetenv
> -.type fesetenv,%function
> -fesetenv:
> -	mov x1, #0
> -	mov x2, #0
> -	cmn x0, #1
> -	b.eq 1f
> -	ldp w1, w2, [x0]
> -1:	msr fpcr, x1
> -	msr fpsr, x2
> -	mov w0, #0
> -	ret
> -- 
> 2.51.0

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [musl] [PATCH] aarch64: rewrite fenv routines in C using inline asm
  2026-01-28 22:13 ` Szabolcs Nagy
  2026-01-28  1:45   ` Bill Roberts
@ 2026-01-29 18:14   ` Markus Wichmann
  1 sibling, 0 replies; 4+ messages in thread
From: Markus Wichmann @ 2026-01-29 18:14 UTC (permalink / raw)
  To: Bill Roberts, musl

Am Wed, Jan 28, 2026 at 11:13:23PM +0100 schrieb Szabolcs Nagy:
> * Bill Roberts <bill.roberts@arm.com> [2026-01-27 00:04:27 -0600]:
> > diff --git a/src/fenv/aarch64/fenv.c b/src/fenv/aarch64/fenv.c
> > new file mode 100644
> > index 00000000..6d84feac
> > --- /dev/null
> > +++ b/src/fenv/aarch64/fenv.c
> > @@ -0,0 +1,96 @@
> > +#include <fenv.h>
> > +#include <stdint.h>
> > +
> > +#define FE_RMODE_MASK 0x00C00000u  // FPCR RMode bits [23:22]
> > +#define FE_EXC_MASK   0x0000001Fu  // FPSR exception flags [4:0]
> > +
> > +static inline uint32_t read_fpcr_u32(void)
> > +{
> > +	uint64_t x;
> > +	__asm__ volatile ("mrs %0, fpcr" : "=r"(x));
> > +	return (uint32_t)x;
> > +}
> > +
> > +static inline void write_fpcr_u32(uint32_t v)
> > +{
> > +	uint64_t x = (uint64_t)v;
> > +	__asm__ volatile ("msr fpcr, %0" :: "r"(x) : "memory");
> > +}
> > +
> > +static inline uint32_t read_fpsr_u32(void)
> > +{
> > +	uint64_t x;
> > +	__asm__ volatile ("mrs %0, fpsr" : "=r"(x));
> > +	return (uint32_t)x;
> > +}
> > +
> > +static inline void write_fpsr_u32(uint32_t v)
> > +{
> > +	uint64_t x = (uint64_t)v;
> > +	__asm__ volatile ("msr fpsr, %0" :: "r"(x) : "memory");
> > +}
> 
> i dont think the casts are useful but the logic looks right
> 
> im not sure if the memory clobber is needed.
> 

Per the GCC docs, setting the FPCR should come with a dependency to the
floating-point calculations that come later in the program, so that GCC
doesn't reorder the two for some reason (particularly with LTO). Since
it is impossible for libc to carry a dependency to all future FP
calculations, the memory clobber is the next best thing.

Similar for the FPSR, since it is an implicit output of all FP
calculations.

Ciao,
Markus

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-01-29 19:48 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-27  6:04 [musl] [PATCH] aarch64: rewrite fenv routines in C using inline asm Bill Roberts
2026-01-28 22:13 ` Szabolcs Nagy
2026-01-28  1:45   ` Bill Roberts
2026-01-29 18:14   ` Markus Wichmann

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).