From: ticat_fp <fanpeng@loongson.cn>
To: musl@lists.openwall.com
Cc: lixing@loongson.cn, huajingyun@loongson.cn, wanghongliang@loongson.cn
Subject: [musl] [PATCH] math: add LoongArch support for common APIs with inline assembly.
Date: Tue, 23 Apr 2024 10:26:19 +0800 [thread overview]
Message-ID: <20240423022619.1253464-1-fanpeng@loongson.cn> (raw)
Including: ceil, copysign, fabs, floor, fma, fmax, fmin, llrint,
lrint, rint, sqrt and their f versions.
---
src/math/loongarch64/ceil.c | 25 +++++++++++++++++++++++++
src/math/loongarch64/ceilf.c | 25 +++++++++++++++++++++++++
src/math/loongarch64/copysign.c | 7 +++++++
src/math/loongarch64/copysignf.c | 7 +++++++
src/math/loongarch64/fabs.c | 7 +++++++
src/math/loongarch64/fabsf.c | 7 +++++++
src/math/loongarch64/floor.c | 22 ++++++++++++++++++++++
src/math/loongarch64/floorf.c | 22 ++++++++++++++++++++++
src/math/loongarch64/fma.c | 7 +++++++
src/math/loongarch64/fmaf.c | 7 +++++++
src/math/loongarch64/fmax.c | 7 +++++++
src/math/loongarch64/fmaxf.c | 7 +++++++
src/math/loongarch64/fmin.c | 7 +++++++
src/math/loongarch64/fminf.c | 7 +++++++
src/math/loongarch64/llrint.c | 17 +++++++++++++++++
src/math/loongarch64/llrintf.c | 17 +++++++++++++++++
src/math/loongarch64/lrint.c | 17 +++++++++++++++++
src/math/loongarch64/lrintf.c | 17 +++++++++++++++++
src/math/loongarch64/rint.c | 7 +++++++
src/math/loongarch64/rintf.c | 7 +++++++
src/math/loongarch64/sqrt.c | 7 +++++++
src/math/loongarch64/sqrtf.c | 7 +++++++
22 files changed, 260 insertions(+)
create mode 100644 src/math/loongarch64/ceil.c
create mode 100644 src/math/loongarch64/ceilf.c
create mode 100644 src/math/loongarch64/copysign.c
create mode 100644 src/math/loongarch64/copysignf.c
create mode 100644 src/math/loongarch64/fabs.c
create mode 100644 src/math/loongarch64/fabsf.c
create mode 100644 src/math/loongarch64/floor.c
create mode 100644 src/math/loongarch64/floorf.c
create mode 100644 src/math/loongarch64/fma.c
create mode 100644 src/math/loongarch64/fmaf.c
create mode 100644 src/math/loongarch64/fmax.c
create mode 100644 src/math/loongarch64/fmaxf.c
create mode 100644 src/math/loongarch64/fmin.c
create mode 100644 src/math/loongarch64/fminf.c
create mode 100644 src/math/loongarch64/llrint.c
create mode 100644 src/math/loongarch64/llrintf.c
create mode 100644 src/math/loongarch64/lrint.c
create mode 100644 src/math/loongarch64/lrintf.c
create mode 100644 src/math/loongarch64/rint.c
create mode 100644 src/math/loongarch64/rintf.c
create mode 100644 src/math/loongarch64/sqrt.c
create mode 100644 src/math/loongarch64/sqrtf.c
diff --git a/src/math/loongarch64/ceil.c b/src/math/loongarch64/ceil.c
new file mode 100644
index 00000000..95781f4b
--- /dev/null
+++ b/src/math/loongarch64/ceil.c
@@ -0,0 +1,25 @@
+#include <math.h>
+#include <stdint.h>
+
+double ceil(double x)
+{
+ int32_t old;
+ int32_t new;
+ int32_t tmp1;
+ int32_t tmp2;
+
+ __asm__ __volatile__(
+ "movfcsr2gr %[orig_old], $r0 \n\t"
+ "li.d %[tmp1], 0x200 \n\t"
+ "or %[new], %[orig_old], %[tmp1] \n\t"
+ "li.d %[tmp2], 0xfffffeff \n\t"
+ "and %[new], %[new], %[tmp2] \n\t"
+ "movgr2fcsr $r0, %[new] \n\t"
+ "frint.d %[result], %[orig_x] \n\t"
+ "movgr2fcsr $r0, %[orig_old] \n\t"
+ : [result] "+f"(x), [old]"+r"(old), [new]"+r"(new), [tmp1] "+r"(tmp1), [tmp2] "+r"(tmp2)
+ : [orig_x] "f"(x), [orig_old]"r"(old), [orig_new]"r"(new), [orig_tmp1] "r"(tmp1), [orig_tmp2] "r"(tmp2)
+ :);
+
+ return x;
+}
diff --git a/src/math/loongarch64/ceilf.c b/src/math/loongarch64/ceilf.c
new file mode 100644
index 00000000..03a2d933
--- /dev/null
+++ b/src/math/loongarch64/ceilf.c
@@ -0,0 +1,25 @@
+#include <math.h>
+#include <stdint.h>
+
+float ceilf(float x)
+{
+ int32_t old;
+ int32_t new;
+ int32_t tmp1;
+ int32_t tmp2;
+
+ __asm__ __volatile__(
+ "movfcsr2gr %[orig_old], $r0 \n\t"
+ "li.d %[tmp1], 0x200 \n\t"
+ "or %[new], %[orig_old], %[tmp1] \n\t"
+ "li.d %[tmp2], 0xfffffeff \n\t"
+ "and %[new], %[new], %[tmp2] \n\t"
+ "movgr2fcsr $r0, %[new] \n\t"
+ "frint.s %[result], %[orig_x] \n\t"
+ "movgr2fcsr $r0, %[orig_old] \n\t"
+ : [result] "+f"(x), [old]"+r"(old), [new]"+r"(new), [tmp1] "+r"(tmp1), [tmp2] "+r"(tmp2)
+ : [orig_x] "f"(x), [orig_old]"r"(old), [orig_new]"r"(new), [orig_tmp1] "r"(tmp1), [orig_tmp2] "r"(tmp2)
+ :);
+
+ return x;
+}
diff --git a/src/math/loongarch64/copysign.c b/src/math/loongarch64/copysign.c
new file mode 100644
index 00000000..9e3b8de3
--- /dev/null
+++ b/src/math/loongarch64/copysign.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double copysign(double x, double y)
+{
+ __asm__ __volatile__("fcopysign.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+ return x;
+}
diff --git a/src/math/loongarch64/copysignf.c b/src/math/loongarch64/copysignf.c
new file mode 100644
index 00000000..98df4254
--- /dev/null
+++ b/src/math/loongarch64/copysignf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float copysignf(float x, float y)
+{
+ __asm__ __volatile__("fcopysign.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+ return x;
+}
diff --git a/src/math/loongarch64/fabs.c b/src/math/loongarch64/fabs.c
new file mode 100644
index 00000000..3db57fb5
--- /dev/null
+++ b/src/math/loongarch64/fabs.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fabs(double x)
+{
+ __asm__ __volatile__("fabs.d %0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/loongarch64/fabsf.c b/src/math/loongarch64/fabsf.c
new file mode 100644
index 00000000..e24201c5
--- /dev/null
+++ b/src/math/loongarch64/fabsf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fabsf(float x)
+{
+ __asm__ __volatile__("fabs.s %0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/loongarch64/floor.c b/src/math/loongarch64/floor.c
new file mode 100644
index 00000000..7aead2a3
--- /dev/null
+++ b/src/math/loongarch64/floor.c
@@ -0,0 +1,22 @@
+#include <math.h>
+#include <stdint.h>
+
+double floor(double x)
+{
+ int32_t old;
+ int32_t new;
+ int32_t tmp1;
+
+ __asm__ __volatile__(
+ "movfcsr2gr %[old], $r0 \n\t"
+ "li.d %[tmp1], 0x300 \n\t"
+ "or %[new], %[old], %[tmp1] \n\t"
+ "movgr2fcsr $r0, %[new] \n\t"
+ "frint.d %[result], %[orig_x] \n\t"
+ "movgr2fcsr $r0, %[old] \n\t"
+ : [result] "+f"(x), [old]"+r"(old), [tmp1] "+r"(tmp1), [new]"+r"(new)
+ : [orig_x] "f"(x), [origin_old] "r"(old), [orig_new] "r"(new), [orig_tmp1] "r"(tmp1)
+ :);
+
+ return x;
+}
diff --git a/src/math/loongarch64/floorf.c b/src/math/loongarch64/floorf.c
new file mode 100644
index 00000000..772d15eb
--- /dev/null
+++ b/src/math/loongarch64/floorf.c
@@ -0,0 +1,22 @@
+#include <math.h>
+#include <stdint.h>
+
+float floorf(float x)
+{
+ int32_t old;
+ int32_t new;
+ int32_t tmp1;
+
+ __asm__ __volatile__(
+ "movfcsr2gr %[old], $r0 \n\t"
+ "li.d %[tmp1], 0x300 \n\t"
+ "or %[new], %[old], %[tmp1] \n\t"
+ "movgr2fcsr $r0, %[new] \n\t"
+ "frint.s %[result], %[orig_x] \n\t"
+ "movgr2fcsr $r0, %[old] \n\t"
+ : [result] "+f"(x), [old]"+r"(old), [tmp1] "+r"(tmp1), [new]"+r"(new)
+ : [orig_x] "f"(x), [origin_old] "r"(old), [orig_new] "r"(new), [orig_tmp1] "r"(tmp1)
+ :);
+
+ return x;
+}
diff --git a/src/math/loongarch64/fma.c b/src/math/loongarch64/fma.c
new file mode 100644
index 00000000..0b6a3f23
--- /dev/null
+++ b/src/math/loongarch64/fma.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fma(double x, double y, double z)
+{
+ __asm__ __volatile__("fmadd.d %0, %1, %2, %3" : "=f" (x) : "f"(x) , "f" (y), "f" (z));
+ return x;
+}
diff --git a/src/math/loongarch64/fmaf.c b/src/math/loongarch64/fmaf.c
new file mode 100644
index 00000000..77a8363b
--- /dev/null
+++ b/src/math/loongarch64/fmaf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fmaf(float x, float y, float z)
+{
+ __asm__ __volatile__("fmadd.s %0, %1, %2, %3" : "=f" (x) : "f"(x) , "f" (y), "f" (z));
+ return x;
+}
diff --git a/src/math/loongarch64/fmax.c b/src/math/loongarch64/fmax.c
new file mode 100644
index 00000000..2d091877
--- /dev/null
+++ b/src/math/loongarch64/fmax.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fmax(double x, double y)
+{
+ __asm__ __volatile__("fmax.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+ return x;
+}
diff --git a/src/math/loongarch64/fmaxf.c b/src/math/loongarch64/fmaxf.c
new file mode 100644
index 00000000..1106d47c
--- /dev/null
+++ b/src/math/loongarch64/fmaxf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fmaxf(float x, float y)
+{
+ __asm__ __volatile__("fmax.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+ return x;
+}
diff --git a/src/math/loongarch64/fmin.c b/src/math/loongarch64/fmin.c
new file mode 100644
index 00000000..9c44ce87
--- /dev/null
+++ b/src/math/loongarch64/fmin.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fmin(double x, double y)
+{
+ __asm__ __volatile__("fmin.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+ return x;
+}
diff --git a/src/math/loongarch64/fminf.c b/src/math/loongarch64/fminf.c
new file mode 100644
index 00000000..94a0fa45
--- /dev/null
+++ b/src/math/loongarch64/fminf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fminf(float x, float y)
+{
+ __asm__ __volatile__("fmin.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y));
+ return x;
+}
diff --git a/src/math/loongarch64/llrint.c b/src/math/loongarch64/llrint.c
new file mode 100644
index 00000000..766222d3
--- /dev/null
+++ b/src/math/loongarch64/llrint.c
@@ -0,0 +1,17 @@
+#include <math.h>
+#include <stdint.h>
+
+long long llrint(double x)
+{
+ long long r;
+
+ __asm__ __volatile__(
+ "frint.d %[x], %[orig_x] \n\t"
+ "ftintrz.l.d %[x], %[x] \n\t"
+ "movfr2gr.d %[result], %[x] \n\t"
+ : [result]"+r"(r), [x]"+f"(x)
+ : [orig_x]"f"(x)
+ :);
+
+ return r;
+}
diff --git a/src/math/loongarch64/llrintf.c b/src/math/loongarch64/llrintf.c
new file mode 100644
index 00000000..f5b9dd9f
--- /dev/null
+++ b/src/math/loongarch64/llrintf.c
@@ -0,0 +1,17 @@
+#include <math.h>
+#include <stdint.h>
+
+long long llrintf(float x)
+{
+ long long r;
+
+ __asm__ __volatile__(
+ "frint.s %[x], %[orig_x] \n\t"
+ "ftintrz.w.s %[x], %[x] \n\t"
+ "movfr2gr.s %[result], %[x] \n\t"
+ : [result]"+r"(r), [x]"+f"(x)
+ : [orig_x]"f"(x)
+ :);
+
+ return r;
+}
diff --git a/src/math/loongarch64/lrint.c b/src/math/loongarch64/lrint.c
new file mode 100644
index 00000000..d82239d1
--- /dev/null
+++ b/src/math/loongarch64/lrint.c
@@ -0,0 +1,17 @@
+#include <math.h>
+#include <stdint.h>
+
+long lrint(double x)
+{
+ long r;
+
+ __asm__ __volatile__(
+ "frint.d %[x], %[orig_x] \n\t"
+ "ftintrz.l.d %[x], %[x] \n\t"
+ "movfr2gr.d %[result], %[x] \n\t"
+ : [result]"+r"(r), [x]"+f"(x)
+ : [orig_x]"f"(x)
+ :);
+
+ return r;
+}
diff --git a/src/math/loongarch64/lrintf.c b/src/math/loongarch64/lrintf.c
new file mode 100644
index 00000000..b30872e9
--- /dev/null
+++ b/src/math/loongarch64/lrintf.c
@@ -0,0 +1,17 @@
+#include <math.h>
+#include <stdint.h>
+
+long lrintf(float x)
+{
+ long r;
+
+ __asm__ __volatile__(
+ "frint.s %[x], %[orig_x] \n\t"
+ "ftintrz.l.s %[x], %[x] \n\t"
+ "movfr2gr.s %[result], %[x] \n\t"
+ : [result]"+r"(r), [x]"+f"(x)
+ : [orig_x]"f"(x)
+ :);
+
+ return r;
+}
diff --git a/src/math/loongarch64/rint.c b/src/math/loongarch64/rint.c
new file mode 100644
index 00000000..862cea8c
--- /dev/null
+++ b/src/math/loongarch64/rint.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double rint(double x)
+{
+ __asm__ __volatile__("frint.d %0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/loongarch64/rintf.c b/src/math/loongarch64/rintf.c
new file mode 100644
index 00000000..79ac216b
--- /dev/null
+++ b/src/math/loongarch64/rintf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float rintf(float x)
+{
+ __asm__ __volatile__("frint.s %0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/loongarch64/sqrt.c b/src/math/loongarch64/sqrt.c
new file mode 100644
index 00000000..a70e20e9
--- /dev/null
+++ b/src/math/loongarch64/sqrt.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double sqrt(double x)
+{
+ __asm__ __volatile__("fsqrt.d %1, %0" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/loongarch64/sqrtf.c b/src/math/loongarch64/sqrtf.c
new file mode 100644
index 00000000..796609b0
--- /dev/null
+++ b/src/math/loongarch64/sqrtf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float sqrtf(float x)
+{
+ __asm__ __volatile__("fsqrt.s %1, %0" : "=f"(x) : "f"(x));
+ return x;
+}
--
2.33.0
next reply other threads:[~2024-04-23 2:27 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-23 2:26 ticat_fp [this message]
2024-04-23 15:56 ` Rich Felker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240423022619.1253464-1-fanpeng@loongson.cn \
--to=fanpeng@loongson.cn \
--cc=huajingyun@loongson.cn \
--cc=lixing@loongson.cn \
--cc=musl@lists.openwall.com \
--cc=wanghongliang@loongson.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/musl/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).