[PATCH] aarch64: add single instruction math functions

mailing list of musl libc
 help / color / mirror / code / Atom feed

From: Szabolcs Nagy <nsz@port70.net>
To: musl@lists.openwall.com
Subject: [PATCH] aarch64: add single instruction math functions
Date: Sun, 19 Mar 2017 05:26:45 +0100	[thread overview]
Message-ID: <20170319042644.GP2082@port70.net> (raw)

this should increase performance and reduce code size on aarch64.

the compiled code was checked against using __builtin_* instead
of inline asm with gcc-6.2.0.

lrint is two instructions.

c with inline asm is used because it is safer than a pure asm
implementation, this prevents ll{rint,round} to be an alias
of l{rint,round} (because the types don't match) and depends
on gcc style inline asm support.

ceil, floor, round, trunc can either raise inexact on finite
non-integer inputs or not raise any exceptions. the new
implementation does not raise exceptions while the generic
c code does.

on aarch64, the underflow exception is signaled before rounding
(ieee 754 allows both before and after rounding, but it must be
consistent), the generic fma c code signals it after rounding
so using single instruction fixes a slight conformance issue too.
---
 src/math/aarch64/ceil.c       |  7 +++++++
 src/math/aarch64/ceilf.c      |  7 +++++++
 src/math/aarch64/fabs.c       |  7 +++++++
 src/math/aarch64/fabs.s       |  6 ------
 src/math/aarch64/fabsf.c      |  7 +++++++
 src/math/aarch64/fabsf.s      |  6 ------
 src/math/aarch64/floor.c      |  7 +++++++
 src/math/aarch64/floorf.c     |  7 +++++++
 src/math/aarch64/fma.c        |  7 +++++++
 src/math/aarch64/fmaf.c       |  7 +++++++
 src/math/aarch64/fmax.c       |  7 +++++++
 src/math/aarch64/fmaxf.c      |  7 +++++++
 src/math/aarch64/fmin.c       |  7 +++++++
 src/math/aarch64/fminf.c      |  7 +++++++
 src/math/aarch64/llrint.c     | 10 ++++++++++
 src/math/aarch64/llrintf.c    | 10 ++++++++++
 src/math/aarch64/llround.c    |  8 ++++++++
 src/math/aarch64/llroundf.c   |  8 ++++++++
 src/math/aarch64/lrint.c      | 10 ++++++++++
 src/math/aarch64/lrintf.c     | 10 ++++++++++
 src/math/aarch64/lround.c     |  8 ++++++++
 src/math/aarch64/lroundf.c    |  8 ++++++++
 src/math/aarch64/nearbyint.c  |  7 +++++++
 src/math/aarch64/nearbyintf.c |  7 +++++++
 src/math/aarch64/rint.c       |  7 +++++++
 src/math/aarch64/rintf.c      |  7 +++++++
 src/math/aarch64/round.c      |  7 +++++++
 src/math/aarch64/roundf.c     |  7 +++++++
 src/math/aarch64/sqrt.c       |  7 +++++++
 src/math/aarch64/sqrt.s       |  6 ------
 src/math/aarch64/sqrtf.c      |  7 +++++++
 src/math/aarch64/sqrtf.s      |  6 ------
 src/math/aarch64/trunc.c      |  7 +++++++
 src/math/aarch64/truncf.c     |  7 +++++++
 34 files changed, 226 insertions(+), 24 deletions(-)
 create mode 100644 src/math/aarch64/ceil.c
 create mode 100644 src/math/aarch64/ceilf.c
 create mode 100644 src/math/aarch64/fabs.c
 delete mode 100644 src/math/aarch64/fabs.s
 create mode 100644 src/math/aarch64/fabsf.c
 delete mode 100644 src/math/aarch64/fabsf.s
 create mode 100644 src/math/aarch64/floor.c
 create mode 100644 src/math/aarch64/floorf.c
 create mode 100644 src/math/aarch64/fma.c
 create mode 100644 src/math/aarch64/fmaf.c
 create mode 100644 src/math/aarch64/fmax.c
 create mode 100644 src/math/aarch64/fmaxf.c
 create mode 100644 src/math/aarch64/fmin.c
 create mode 100644 src/math/aarch64/fminf.c
 create mode 100644 src/math/aarch64/llrint.c
 create mode 100644 src/math/aarch64/llrintf.c
 create mode 100644 src/math/aarch64/llround.c
 create mode 100644 src/math/aarch64/llroundf.c
 create mode 100644 src/math/aarch64/lrint.c
 create mode 100644 src/math/aarch64/lrintf.c
 create mode 100644 src/math/aarch64/lround.c
 create mode 100644 src/math/aarch64/lroundf.c
 create mode 100644 src/math/aarch64/nearbyint.c
 create mode 100644 src/math/aarch64/nearbyintf.c
 create mode 100644 src/math/aarch64/rint.c
 create mode 100644 src/math/aarch64/rintf.c
 create mode 100644 src/math/aarch64/round.c
 create mode 100644 src/math/aarch64/roundf.c
 create mode 100644 src/math/aarch64/sqrt.c
 delete mode 100644 src/math/aarch64/sqrt.s
 create mode 100644 src/math/aarch64/sqrtf.c
 delete mode 100644 src/math/aarch64/sqrtf.s
 create mode 100644 src/math/aarch64/trunc.c
 create mode 100644 src/math/aarch64/truncf.c

diff --git a/src/math/aarch64/ceil.c b/src/math/aarch64/ceil.c
new file mode 100644
index 00000000..ac80c1dc
--- /dev/null
+++ b/src/math/aarch64/ceil.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double ceil(double x)
+{
+	__asm__ ("frintp %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/ceilf.c b/src/math/aarch64/ceilf.c
new file mode 100644
index 00000000..1ef1e9c8
--- /dev/null
+++ b/src/math/aarch64/ceilf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float ceilf(float x)
+{
+	__asm__ ("frintp %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/fabs.c b/src/math/aarch64/fabs.c
new file mode 100644
index 00000000..5c3ecaf4
--- /dev/null
+++ b/src/math/aarch64/fabs.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fabs(double x)
+{
+	__asm__ ("fabs %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/fabs.s b/src/math/aarch64/fabs.s
deleted file mode 100644
index 8c04d091..00000000
--- a/src/math/aarch64/fabs.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.text
-.global fabs
-.type   fabs,%function
-fabs:
-	fabs d0, d0
-	ret
diff --git a/src/math/aarch64/fabsf.c b/src/math/aarch64/fabsf.c
new file mode 100644
index 00000000..7fde9817
--- /dev/null
+++ b/src/math/aarch64/fabsf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fabsf(float x)
+{
+	__asm__ ("fabs %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/fabsf.s b/src/math/aarch64/fabsf.s
deleted file mode 100644
index 6e96dd43..00000000
--- a/src/math/aarch64/fabsf.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.text
-.global fabsf
-.type   fabsf,%function
-fabsf:
-	fabs s0, s0
-	ret
diff --git a/src/math/aarch64/floor.c b/src/math/aarch64/floor.c
new file mode 100644
index 00000000..50ffdb28
--- /dev/null
+++ b/src/math/aarch64/floor.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double floor(double x)
+{
+	__asm__ ("frintm %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/floorf.c b/src/math/aarch64/floorf.c
new file mode 100644
index 00000000..8d007e9f
--- /dev/null
+++ b/src/math/aarch64/floorf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float floorf(float x)
+{
+	__asm__ ("frintm %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/fma.c b/src/math/aarch64/fma.c
new file mode 100644
index 00000000..2450ea7e
--- /dev/null
+++ b/src/math/aarch64/fma.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fma(double x, double y, double z)
+{
+	__asm__ ("fmadd %d0, %d1, %d2, %d3" : "=w"(x) : "w"(x), "w"(y), "w"(z));
+	return x;
+}
diff --git a/src/math/aarch64/fmaf.c b/src/math/aarch64/fmaf.c
new file mode 100644
index 00000000..9a147213
--- /dev/null
+++ b/src/math/aarch64/fmaf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fmaf(float x, float y, float z)
+{
+	__asm__ ("fmadd %s0, %s1, %s2, %s3" : "=w"(x) : "w"(x), "w"(y), "w"(z));
+	return x;
+}
diff --git a/src/math/aarch64/fmax.c b/src/math/aarch64/fmax.c
new file mode 100644
index 00000000..86dcb3b4
--- /dev/null
+++ b/src/math/aarch64/fmax.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fmax(double x, double y)
+{
+	__asm__ ("fmaxnm %d0, %d1, %d2" : "=w"(x) : "w"(x), "w"(y));
+	return x;
+}
diff --git a/src/math/aarch64/fmaxf.c b/src/math/aarch64/fmaxf.c
new file mode 100644
index 00000000..ee5eac2d
--- /dev/null
+++ b/src/math/aarch64/fmaxf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fmaxf(float x, float y)
+{
+	__asm__ ("fmaxnm %s0, %s1, %s2" : "=w"(x) : "w"(x), "w"(y));
+	return x;
+}
diff --git a/src/math/aarch64/fmin.c b/src/math/aarch64/fmin.c
new file mode 100644
index 00000000..f1e99808
--- /dev/null
+++ b/src/math/aarch64/fmin.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fmin(double x, double y)
+{
+	__asm__ ("fminnm %d0, %d1, %d2" : "=w"(x) : "w"(x), "w"(y));
+	return x;
+}
diff --git a/src/math/aarch64/fminf.c b/src/math/aarch64/fminf.c
new file mode 100644
index 00000000..80468f67
--- /dev/null
+++ b/src/math/aarch64/fminf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fminf(float x, float y)
+{
+	__asm__ ("fminnm %s0, %s1, %s2" : "=w"(x) : "w"(x), "w"(y));
+	return x;
+}
diff --git a/src/math/aarch64/llrint.c b/src/math/aarch64/llrint.c
new file mode 100644
index 00000000..a9e07a93
--- /dev/null
+++ b/src/math/aarch64/llrint.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+long long llrint(double x)
+{
+	long long n;
+	__asm__ (
+		"frintx %d1, %d1\n"
+		"fcvtzs %x0, %d1\n" : "=r"(n), "+w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/llrintf.c b/src/math/aarch64/llrintf.c
new file mode 100644
index 00000000..12b6804f
--- /dev/null
+++ b/src/math/aarch64/llrintf.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+long long llrintf(float x)
+{
+	long long n;
+	__asm__ (
+		"frintx %s1, %s1\n"
+		"fcvtzs %x0, %s1\n" : "=r"(n), "+w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/llround.c b/src/math/aarch64/llround.c
new file mode 100644
index 00000000..e09ddd48
--- /dev/null
+++ b/src/math/aarch64/llround.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long long llround(double x)
+{
+	long long n;
+	__asm__ ("fcvtas %x0, %d1" : "=r"(n) : "w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/llroundf.c b/src/math/aarch64/llroundf.c
new file mode 100644
index 00000000..16699598
--- /dev/null
+++ b/src/math/aarch64/llroundf.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long long llroundf(float x)
+{
+	long long n;
+	__asm__ ("fcvtas %x0, %s1" : "=r"(n) : "w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/lrint.c b/src/math/aarch64/lrint.c
new file mode 100644
index 00000000..cb7785ad
--- /dev/null
+++ b/src/math/aarch64/lrint.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+long lrint(double x)
+{
+	long n;
+	__asm__ (
+		"frintx %d1, %d1\n"
+		"fcvtzs %x0, %d1\n" : "=r"(n), "+w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/lrintf.c b/src/math/aarch64/lrintf.c
new file mode 100644
index 00000000..4d750d69
--- /dev/null
+++ b/src/math/aarch64/lrintf.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+long lrintf(float x)
+{
+	long n;
+	__asm__ (
+		"frintx %s1, %s1\n"
+		"fcvtzs %x0, %s1\n" : "=r"(n), "+w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/lround.c b/src/math/aarch64/lround.c
new file mode 100644
index 00000000..85656c78
--- /dev/null
+++ b/src/math/aarch64/lround.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long lround(double x)
+{
+	long n;
+	__asm__ ("fcvtas %x0, %d1" : "=r"(n) : "w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/lroundf.c b/src/math/aarch64/lroundf.c
new file mode 100644
index 00000000..32e51f3c
--- /dev/null
+++ b/src/math/aarch64/lroundf.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long lroundf(float x)
+{
+	long n;
+	__asm__ ("fcvtas %x0, %s1" : "=r"(n) : "w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/nearbyint.c b/src/math/aarch64/nearbyint.c
new file mode 100644
index 00000000..9c3fdb44
--- /dev/null
+++ b/src/math/aarch64/nearbyint.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double nearbyint(double x)
+{
+	__asm__ ("frinti %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/nearbyintf.c b/src/math/aarch64/nearbyintf.c
new file mode 100644
index 00000000..8e7f61df
--- /dev/null
+++ b/src/math/aarch64/nearbyintf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float nearbyintf(float x)
+{
+	__asm__ ("frinti %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/rint.c b/src/math/aarch64/rint.c
new file mode 100644
index 00000000..45b194b5
--- /dev/null
+++ b/src/math/aarch64/rint.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double rint(double x)
+{
+	__asm__ ("frintx %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/rintf.c b/src/math/aarch64/rintf.c
new file mode 100644
index 00000000..1ae7dd25
--- /dev/null
+++ b/src/math/aarch64/rintf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float rintf(float x)
+{
+	__asm__ ("frintx %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/round.c b/src/math/aarch64/round.c
new file mode 100644
index 00000000..897a84cc
--- /dev/null
+++ b/src/math/aarch64/round.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double round(double x)
+{
+	__asm__ ("frinta %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/roundf.c b/src/math/aarch64/roundf.c
new file mode 100644
index 00000000..91637eaa
--- /dev/null
+++ b/src/math/aarch64/roundf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float roundf(float x)
+{
+	__asm__ ("frinta %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/sqrt.c b/src/math/aarch64/sqrt.c
new file mode 100644
index 00000000..fe93c3e6
--- /dev/null
+++ b/src/math/aarch64/sqrt.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double sqrt(double x)
+{
+	__asm__ ("fsqrt %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/sqrt.s b/src/math/aarch64/sqrt.s
deleted file mode 100644
index 1917e18d..00000000
--- a/src/math/aarch64/sqrt.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.text
-.global sqrt
-.type   sqrt,%function
-sqrt:
-	fsqrt d0, d0
-	ret
diff --git a/src/math/aarch64/sqrtf.c b/src/math/aarch64/sqrtf.c
new file mode 100644
index 00000000..275c7f39
--- /dev/null
+++ b/src/math/aarch64/sqrtf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float sqrtf(float x)
+{
+	__asm__ ("fsqrt %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/sqrtf.s b/src/math/aarch64/sqrtf.s
deleted file mode 100644
index 1639497b..00000000
--- a/src/math/aarch64/sqrtf.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.text
-.global sqrtf
-.type   sqrtf,%function
-sqrtf:
-	fsqrt s0, s0
-	ret
diff --git a/src/math/aarch64/trunc.c b/src/math/aarch64/trunc.c
new file mode 100644
index 00000000..e592147a
--- /dev/null
+++ b/src/math/aarch64/trunc.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double trunc(double x)
+{
+	__asm__ ("frintz %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/truncf.c b/src/math/aarch64/truncf.c
new file mode 100644
index 00000000..20ef30f1
--- /dev/null
+++ b/src/math/aarch64/truncf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float truncf(float x)
+{
+	__asm__ ("frintz %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
-- 
2.11.0

next             reply	other threads:[~2017-03-19  4:26 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-19  4:26 Szabolcs Nagy [this message]
2017-03-19 12:55 ` David Edelsohn
2017-03-19 14:51   ` Szabolcs Nagy
2017-03-19 15:05   ` Rich Felker
2017-03-21 16:50     ` David Edelsohn
2017-03-21 16:58       ` Rich Felker
2017-03-21 16:50 ` Rich Felker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170319042644.GP2082@port70.net \
    --to=nsz@port70.net \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).