[musl] [PATCH] math: add LoongArch support for common APIs with inline assembly.

mailing list of musl libc
 help / color / mirror / code / Atom feed

* [musl] [PATCH] math: add LoongArch support for common APIs with inline assembly.
@ 2024-04-23  2:26 ticat_fp
  2024-04-23 15:56 ` Rich Felker
  0 siblings, 1 reply; 2+ messages in thread
From: ticat_fp @ 2024-04-23  2:26 UTC (permalink / raw)
  To: musl; +Cc: lixing, huajingyun, wanghongliang

Including: ceil, copysign, fabs, floor, fma, fmax, fmin, llrint,
lrint, rint, sqrt and their f versions.

---
 src/math/loongarch64/ceil.c      | 25 +++++++++++++++++++++++++
 src/math/loongarch64/ceilf.c     | 25 +++++++++++++++++++++++++
 src/math/loongarch64/copysign.c  |  7 +++++++
 src/math/loongarch64/copysignf.c |  7 +++++++
 src/math/loongarch64/fabs.c      |  7 +++++++
 src/math/loongarch64/fabsf.c     |  7 +++++++
 src/math/loongarch64/floor.c     | 22 ++++++++++++++++++++++
 src/math/loongarch64/floorf.c    | 22 ++++++++++++++++++++++
 src/math/loongarch64/fma.c       |  7 +++++++
 src/math/loongarch64/fmaf.c      |  7 +++++++
 src/math/loongarch64/fmax.c      |  7 +++++++
 src/math/loongarch64/fmaxf.c     |  7 +++++++
 src/math/loongarch64/fmin.c      |  7 +++++++
 src/math/loongarch64/fminf.c     |  7 +++++++
 src/math/loongarch64/llrint.c    | 17 +++++++++++++++++
 src/math/loongarch64/llrintf.c   | 17 +++++++++++++++++
 src/math/loongarch64/lrint.c     | 17 +++++++++++++++++
 src/math/loongarch64/lrintf.c    | 17 +++++++++++++++++
 src/math/loongarch64/rint.c      |  7 +++++++
 src/math/loongarch64/rintf.c     |  7 +++++++
 src/math/loongarch64/sqrt.c      |  7 +++++++
 src/math/loongarch64/sqrtf.c     |  7 +++++++
 22 files changed, 260 insertions(+)
 create mode 100644 src/math/loongarch64/ceil.c
 create mode 100644 src/math/loongarch64/ceilf.c
 create mode 100644 src/math/loongarch64/copysign.c
 create mode 100644 src/math/loongarch64/copysignf.c
 create mode 100644 src/math/loongarch64/fabs.c
 create mode 100644 src/math/loongarch64/fabsf.c
 create mode 100644 src/math/loongarch64/floor.c
 create mode 100644 src/math/loongarch64/floorf.c
 create mode 100644 src/math/loongarch64/fma.c
 create mode 100644 src/math/loongarch64/fmaf.c
 create mode 100644 src/math/loongarch64/fmax.c
 create mode 100644 src/math/loongarch64/fmaxf.c
 create mode 100644 src/math/loongarch64/fmin.c
 create mode 100644 src/math/loongarch64/fminf.c
 create mode 100644 src/math/loongarch64/llrint.c
 create mode 100644 src/math/loongarch64/llrintf.c
 create mode 100644 src/math/loongarch64/lrint.c
 create mode 100644 src/math/loongarch64/lrintf.c
 create mode 100644 src/math/loongarch64/rint.c
 create mode 100644 src/math/loongarch64/rintf.c
 create mode 100644 src/math/loongarch64/sqrt.c
 create mode 100644 src/math/loongarch64/sqrtf.c

diff --git a/src/math/loongarch64/ceil.c b/src/math/loongarch64/ceil.c
new file mode 100644
index 00000000..95781f4b
--- /dev/null
+++ b/src/math/loongarch64/ceil.c
@@ -0,0 +1,25 @@
+#include <math.h>
+#include <stdint.h>
+
+double ceil(double x)
+{
+    int32_t old;                                                  
+    int32_t new;                                                  
+    int32_t tmp1;
+    int32_t tmp2;
+
+    __asm__ __volatile__(                    
+    "movfcsr2gr %[orig_old],  $r0               \n\t"
+    "li.d       %[tmp1], 0x200                  \n\t"
+    "or         %[new],  %[orig_old], %[tmp1]   \n\t"
+    "li.d       %[tmp2], 0xfffffeff             \n\t"
+    "and        %[new],  %[new], %[tmp2]        \n\t"
+    "movgr2fcsr $r0,     %[new]                 \n\t"
+    "frint.d    %[result],       %[orig_x]      \n\t"
+    "movgr2fcsr $r0,     %[orig_old]            \n\t"                                                                                                                                     
+    : [result] "+f"(x), [old]"+r"(old), [new]"+r"(new), [tmp1] "+r"(tmp1), [tmp2] "+r"(tmp2)
+    : [orig_x] "f"(x), [orig_old]"r"(old), [orig_new]"r"(new), [orig_tmp1] "r"(tmp1), [orig_tmp2] "r"(tmp2)
+    :);
+
+    return x;
+}
diff --git a/src/math/loongarch64/ceilf.c b/src/math/loongarch64/ceilf.c
new file mode 100644
index 00000000..03a2d933
--- /dev/null
+++ b/src/math/loongarch64/ceilf.c
@@ -0,0 +1,25 @@
+#include <math.h>
+#include <stdint.h>
+
+float ceilf(float x)
+{
+    int32_t old;
+    int32_t new;
+    int32_t tmp1;
+    int32_t tmp2;
+
+    __asm__ __volatile__(                    
+    "movfcsr2gr %[orig_old],  $r0               \n\t"
+    "li.d       %[tmp1], 0x200                  \n\t"
+    "or         %[new],  %[orig_old], %[tmp1]   \n\t"
+    "li.d       %[tmp2], 0xfffffeff             \n\t"
+    "and        %[new],  %[new], %[tmp2]        \n\t"
+    "movgr2fcsr $r0,     %[new]                 \n\t"
+    "frint.s    %[result],       %[orig_x]      \n\t"
+    "movgr2fcsr $r0,     %[orig_old]            \n\t"                                                                                                                                     
+    : [result] "+f"(x), [old]"+r"(old), [new]"+r"(new), [tmp1] "+r"(tmp1), [tmp2] "+r"(tmp2)
+    : [orig_x] "f"(x), [orig_old]"r"(old), [orig_new]"r"(new), [orig_tmp1] "r"(tmp1), [orig_tmp2] "r"(tmp2)
+    :);
+
+    return x;
+}
diff --git a/src/math/loongarch64/copysign.c b/src/math/loongarch64/copysign.c
new file mode 100644
index 00000000..9e3b8de3
--- /dev/null
+++ b/src/math/loongarch64/copysign.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double copysign(double x, double y)
+{
+	__asm__ __volatile__("fcopysign.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
diff --git a/src/math/loongarch64/copysignf.c b/src/math/loongarch64/copysignf.c
new file mode 100644
index 00000000..98df4254
--- /dev/null
+++ b/src/math/loongarch64/copysignf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float copysignf(float x, float y)
+{
+	__asm__ __volatile__("fcopysign.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
diff --git a/src/math/loongarch64/fabs.c b/src/math/loongarch64/fabs.c
new file mode 100644
index 00000000..3db57fb5
--- /dev/null
+++ b/src/math/loongarch64/fabs.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fabs(double x)
+{
+	__asm__ __volatile__("fabs.d   %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/loongarch64/fabsf.c b/src/math/loongarch64/fabsf.c
new file mode 100644
index 00000000..e24201c5
--- /dev/null
+++ b/src/math/loongarch64/fabsf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fabsf(float x)
+{
+	__asm__ __volatile__("fabs.s   %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/loongarch64/floor.c b/src/math/loongarch64/floor.c
new file mode 100644
index 00000000..7aead2a3
--- /dev/null
+++ b/src/math/loongarch64/floor.c
@@ -0,0 +1,22 @@
+#include <math.h>
+#include <stdint.h>
+
+double floor(double x)
+{
+    int32_t old;
+    int32_t new;
+    int32_t tmp1;
+
+    __asm__ __volatile__(
+    "movfcsr2gr %[old],  $r0                \n\t"
+    "li.d       %[tmp1], 0x300              \n\t"
+    "or         %[new],  %[old], %[tmp1]    \n\t"
+    "movgr2fcsr $r0,    %[new]              \n\t"
+    "frint.d    %[result],       %[orig_x]  \n\t"
+    "movgr2fcsr $r0, %[old]                 \n\t"
+    : [result] "+f"(x), [old]"+r"(old), [tmp1] "+r"(tmp1), [new]"+r"(new)
+    : [orig_x] "f"(x), [origin_old] "r"(old), [orig_new] "r"(new), [orig_tmp1] "r"(tmp1)
+    :);
+
+    return x;
+}
diff --git a/src/math/loongarch64/floorf.c b/src/math/loongarch64/floorf.c
new file mode 100644
index 00000000..772d15eb
--- /dev/null
+++ b/src/math/loongarch64/floorf.c
@@ -0,0 +1,22 @@
+#include <math.h>
+#include <stdint.h>
+
+float floorf(float x)
+{
+    int32_t old;
+    int32_t new;
+    int32_t tmp1;                                                                                                                                                                             
+
+    __asm__ __volatile__(
+    "movfcsr2gr %[old],  $r0                \n\t"
+    "li.d       %[tmp1], 0x300              \n\t"
+    "or         %[new],  %[old], %[tmp1]    \n\t"
+    "movgr2fcsr $r0,    %[new]              \n\t"
+    "frint.s    %[result],       %[orig_x]  \n\t"
+    "movgr2fcsr $r0, %[old]                 \n\t"
+    : [result] "+f"(x), [old]"+r"(old), [tmp1] "+r"(tmp1), [new]"+r"(new)
+    : [orig_x] "f"(x), [origin_old] "r"(old), [orig_new] "r"(new), [orig_tmp1] "r"(tmp1)
+    :);
+
+    return x;
+}
diff --git a/src/math/loongarch64/fma.c b/src/math/loongarch64/fma.c
new file mode 100644
index 00000000..0b6a3f23
--- /dev/null
+++ b/src/math/loongarch64/fma.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fma(double x, double y, double z)
+{
+	__asm__ __volatile__("fmadd.d %0, %1, %2, %3" : "=f" (x) : "f"(x) , "f" (y), "f" (z));
+	return x;
+}
diff --git a/src/math/loongarch64/fmaf.c b/src/math/loongarch64/fmaf.c
new file mode 100644
index 00000000..77a8363b
--- /dev/null
+++ b/src/math/loongarch64/fmaf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fmaf(float x, float y, float z)
+{
+	__asm__ __volatile__("fmadd.s %0, %1, %2, %3" : "=f" (x) : "f"(x) , "f" (y), "f" (z));
+	return x;
+}
diff --git a/src/math/loongarch64/fmax.c b/src/math/loongarch64/fmax.c
new file mode 100644
index 00000000..2d091877
--- /dev/null
+++ b/src/math/loongarch64/fmax.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fmax(double x, double y)
+{
+	__asm__ __volatile__("fmax.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
diff --git a/src/math/loongarch64/fmaxf.c b/src/math/loongarch64/fmaxf.c
new file mode 100644
index 00000000..1106d47c
--- /dev/null
+++ b/src/math/loongarch64/fmaxf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fmaxf(float x, float y)
+{
+	__asm__ __volatile__("fmax.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
diff --git a/src/math/loongarch64/fmin.c b/src/math/loongarch64/fmin.c
new file mode 100644
index 00000000..9c44ce87
--- /dev/null
+++ b/src/math/loongarch64/fmin.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fmin(double x, double y)
+{
+	__asm__ __volatile__("fmin.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
diff --git a/src/math/loongarch64/fminf.c b/src/math/loongarch64/fminf.c
new file mode 100644
index 00000000..94a0fa45
--- /dev/null
+++ b/src/math/loongarch64/fminf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fminf(float x, float y)
+{
+	__asm__ __volatile__("fmin.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+	return x;
+}
diff --git a/src/math/loongarch64/llrint.c b/src/math/loongarch64/llrint.c
new file mode 100644
index 00000000..766222d3
--- /dev/null
+++ b/src/math/loongarch64/llrint.c
@@ -0,0 +1,17 @@
+#include <math.h>
+#include <stdint.h>
+
+long long llrint(double x)
+{
+    long long r;
+
+    __asm__ __volatile__(
+    "frint.d %[x], %[orig_x]             \n\t"
+    "ftintrz.l.d %[x], %[x]              \n\t"
+    "movfr2gr.d %[result], %[x]          \n\t"
+    : [result]"+r"(r), [x]"+f"(x)
+    : [orig_x]"f"(x)
+    :);
+
+    return r;
+}
diff --git a/src/math/loongarch64/llrintf.c b/src/math/loongarch64/llrintf.c
new file mode 100644
index 00000000..f5b9dd9f
--- /dev/null
+++ b/src/math/loongarch64/llrintf.c
@@ -0,0 +1,17 @@
+#include <math.h>
+#include <stdint.h>
+
+long long llrintf(float x)
+{
+    long long r;
+
+    __asm__ __volatile__(
+    "frint.s %[x], %[orig_x]            \n\t"
+    "ftintrz.w.s %[x], %[x]             \n\t"
+    "movfr2gr.s %[result], %[x]         \n\t"
+    : [result]"+r"(r), [x]"+f"(x)
+    : [orig_x]"f"(x)
+    :);
+
+    return r;
+}
diff --git a/src/math/loongarch64/lrint.c b/src/math/loongarch64/lrint.c
new file mode 100644
index 00000000..d82239d1
--- /dev/null
+++ b/src/math/loongarch64/lrint.c
@@ -0,0 +1,17 @@
+#include <math.h>
+#include <stdint.h>
+
+long lrint(double x)
+{
+    long r;
+
+    __asm__ __volatile__(
+    "frint.d %[x], %[orig_x]             \n\t"
+    "ftintrz.l.d %[x], %[x]             \n\t"
+    "movfr2gr.d %[result], %[x]          \n\t"
+    : [result]"+r"(r), [x]"+f"(x)
+    : [orig_x]"f"(x)
+    :);
+
+    return r;
+}
diff --git a/src/math/loongarch64/lrintf.c b/src/math/loongarch64/lrintf.c
new file mode 100644
index 00000000..b30872e9
--- /dev/null
+++ b/src/math/loongarch64/lrintf.c
@@ -0,0 +1,17 @@
+#include <math.h>
+#include <stdint.h>
+
+long lrintf(float x)
+{
+    long r;
+
+    __asm__ __volatile__(
+    "frint.s %[x], %[orig_x]             \n\t"
+    "ftintrz.l.s %[x], %[x]             \n\t"
+    "movfr2gr.s %[result], %[x]          \n\t"
+    : [result]"+r"(r), [x]"+f"(x)
+    : [orig_x]"f"(x)
+    :);
+
+    return r;
+}
diff --git a/src/math/loongarch64/rint.c b/src/math/loongarch64/rint.c
new file mode 100644
index 00000000..862cea8c
--- /dev/null
+++ b/src/math/loongarch64/rint.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double rint(double x)
+{
+    __asm__ __volatile__("frint.d %0, %1" : "=f"(x) : "f"(x));
+    return x;
+}
diff --git a/src/math/loongarch64/rintf.c b/src/math/loongarch64/rintf.c
new file mode 100644
index 00000000..79ac216b
--- /dev/null
+++ b/src/math/loongarch64/rintf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float rintf(float x)
+{
+    __asm__ __volatile__("frint.s %0, %1" : "=f"(x) : "f"(x));
+    return x;
+}
diff --git a/src/math/loongarch64/sqrt.c b/src/math/loongarch64/sqrt.c
new file mode 100644
index 00000000..a70e20e9
--- /dev/null
+++ b/src/math/loongarch64/sqrt.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double sqrt(double x)
+{
+	__asm__ __volatile__("fsqrt.d %1, %0" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/loongarch64/sqrtf.c b/src/math/loongarch64/sqrtf.c
new file mode 100644
index 00000000..796609b0
--- /dev/null
+++ b/src/math/loongarch64/sqrtf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float sqrtf(float x)
+{
+	__asm__ __volatile__("fsqrt.s %1, %0" : "=f"(x) : "f"(x));
+	return x;
+}
-- 
2.33.0


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [musl] [PATCH] math: add LoongArch support for common APIs with inline assembly.
  2024-04-23  2:26 [musl] [PATCH] math: add LoongArch support for common APIs with inline assembly ticat_fp
@ 2024-04-23 15:56 ` Rich Felker
  0 siblings, 0 replies; 2+ messages in thread
From: Rich Felker @ 2024-04-23 15:56 UTC (permalink / raw)
  To: ticat_fp; +Cc: musl, lixing, huajingyun, wanghongliang

On Tue, Apr 23, 2024 at 10:26:19AM +0800, ticat_fp wrote:
> Including: ceil, copysign, fabs, floor, fma, fmax, fmin, llrint,
> lrint, rint, sqrt and their f versions.
> 
> ---
>  src/math/loongarch64/ceil.c      | 25 +++++++++++++++++++++++++
>  src/math/loongarch64/ceilf.c     | 25 +++++++++++++++++++++++++
>  src/math/loongarch64/copysign.c  |  7 +++++++
>  src/math/loongarch64/copysignf.c |  7 +++++++
>  src/math/loongarch64/fabs.c      |  7 +++++++
>  src/math/loongarch64/fabsf.c     |  7 +++++++
>  src/math/loongarch64/floor.c     | 22 ++++++++++++++++++++++
>  src/math/loongarch64/floorf.c    | 22 ++++++++++++++++++++++
>  src/math/loongarch64/fma.c       |  7 +++++++
>  src/math/loongarch64/fmaf.c      |  7 +++++++
>  src/math/loongarch64/fmax.c      |  7 +++++++
>  src/math/loongarch64/fmaxf.c     |  7 +++++++
>  src/math/loongarch64/fmin.c      |  7 +++++++
>  src/math/loongarch64/fminf.c     |  7 +++++++
>  src/math/loongarch64/llrint.c    | 17 +++++++++++++++++
>  src/math/loongarch64/llrintf.c   | 17 +++++++++++++++++
>  src/math/loongarch64/lrint.c     | 17 +++++++++++++++++
>  src/math/loongarch64/lrintf.c    | 17 +++++++++++++++++
>  src/math/loongarch64/rint.c      |  7 +++++++
>  src/math/loongarch64/rintf.c     |  7 +++++++
>  src/math/loongarch64/sqrt.c      |  7 +++++++
>  src/math/loongarch64/sqrtf.c     |  7 +++++++
>  22 files changed, 260 insertions(+)
>  create mode 100644 src/math/loongarch64/ceil.c
>  create mode 100644 src/math/loongarch64/ceilf.c
>  create mode 100644 src/math/loongarch64/copysign.c
>  create mode 100644 src/math/loongarch64/copysignf.c
>  create mode 100644 src/math/loongarch64/fabs.c
>  create mode 100644 src/math/loongarch64/fabsf.c
>  create mode 100644 src/math/loongarch64/floor.c
>  create mode 100644 src/math/loongarch64/floorf.c
>  create mode 100644 src/math/loongarch64/fma.c
>  create mode 100644 src/math/loongarch64/fmaf.c
>  create mode 100644 src/math/loongarch64/fmax.c
>  create mode 100644 src/math/loongarch64/fmaxf.c
>  create mode 100644 src/math/loongarch64/fmin.c
>  create mode 100644 src/math/loongarch64/fminf.c
>  create mode 100644 src/math/loongarch64/llrint.c
>  create mode 100644 src/math/loongarch64/llrintf.c
>  create mode 100644 src/math/loongarch64/lrint.c
>  create mode 100644 src/math/loongarch64/lrintf.c
>  create mode 100644 src/math/loongarch64/rint.c
>  create mode 100644 src/math/loongarch64/rintf.c
>  create mode 100644 src/math/loongarch64/sqrt.c
>  create mode 100644 src/math/loongarch64/sqrtf.c
> 
> diff --git a/src/math/loongarch64/ceil.c b/src/math/loongarch64/ceil.c
> new file mode 100644
> index 00000000..95781f4b
> --- /dev/null
> +++ b/src/math/loongarch64/ceil.c
> @@ -0,0 +1,25 @@
> +#include <math.h>
> +#include <stdint.h>
> +
> +double ceil(double x)
> +{
> +    int32_t old;                                                  
> +    int32_t new;                                                  
> +    int32_t tmp1;
> +    int32_t tmp2;
> +
> +    __asm__ __volatile__(                    
> +    "movfcsr2gr %[orig_old],  $r0               \n\t"
> +    "li.d       %[tmp1], 0x200                  \n\t"
> +    "or         %[new],  %[orig_old], %[tmp1]   \n\t"
> +    "li.d       %[tmp2], 0xfffffeff             \n\t"
> +    "and        %[new],  %[new], %[tmp2]        \n\t"
> +    "movgr2fcsr $r0,     %[new]                 \n\t"
> +    "frint.d    %[result],       %[orig_x]      \n\t"
> +    "movgr2fcsr $r0,     %[orig_old]            \n\t"                                                                                                                                     
> +    : [result] "+f"(x), [old]"+r"(old), [new]"+r"(new), [tmp1] "+r"(tmp1), [tmp2] "+r"(tmp2)
> +    : [orig_x] "f"(x), [orig_old]"r"(old), [orig_new]"r"(new), [orig_tmp1] "r"(tmp1), [orig_tmp2] "r"(tmp2)
> +    :);
> +
> +    return x;
> +}

Is it possible to write these with the control register logic in C
rather than a big block of asm?

Also, while probably all versions of gcc and clang with loongarch64
support the named-argument inline asm, we generally don't depend on
this extension in musl. I see how it makes the code more readable with
the big asm block, but if we could get rid of the bit asm block so
that it's just a single asm statement to read the old control register
value, C to modify it, and a pair of instructions (round and restore
control register) taking the argument value and old control register
value to restore as inputs, there wouldn't be any need for them to
make it readable.

Rich

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-04-23 15:56 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-23  2:26 [musl] [PATCH] math: add LoongArch support for common APIs with inline assembly ticat_fp
2024-04-23 15:56 ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).