[PATCH] aarch64: add single instruction math functions

mailing list of musl libc
 help / color / mirror / code / Atom feed

* [PATCH] aarch64: add single instruction math functions
@ 2017-03-19  4:26 Szabolcs Nagy
  2017-03-19 12:55 ` David Edelsohn
  2017-03-21 16:50 ` Rich Felker
  0 siblings, 2 replies; 7+ messages in thread
From: Szabolcs Nagy @ 2017-03-19  4:26 UTC (permalink / raw)
  To: musl

this should increase performance and reduce code size on aarch64.

the compiled code was checked against using __builtin_* instead
of inline asm with gcc-6.2.0.

lrint is two instructions.

c with inline asm is used because it is safer than a pure asm
implementation, this prevents ll{rint,round} to be an alias
of l{rint,round} (because the types don't match) and depends
on gcc style inline asm support.

ceil, floor, round, trunc can either raise inexact on finite
non-integer inputs or not raise any exceptions. the new
implementation does not raise exceptions while the generic
c code does.

on aarch64, the underflow exception is signaled before rounding
(ieee 754 allows both before and after rounding, but it must be
consistent), the generic fma c code signals it after rounding
so using single instruction fixes a slight conformance issue too.
---
 src/math/aarch64/ceil.c       |  7 +++++++
 src/math/aarch64/ceilf.c      |  7 +++++++
 src/math/aarch64/fabs.c       |  7 +++++++
 src/math/aarch64/fabs.s       |  6 ------
 src/math/aarch64/fabsf.c      |  7 +++++++
 src/math/aarch64/fabsf.s      |  6 ------
 src/math/aarch64/floor.c      |  7 +++++++
 src/math/aarch64/floorf.c     |  7 +++++++
 src/math/aarch64/fma.c        |  7 +++++++
 src/math/aarch64/fmaf.c       |  7 +++++++
 src/math/aarch64/fmax.c       |  7 +++++++
 src/math/aarch64/fmaxf.c      |  7 +++++++
 src/math/aarch64/fmin.c       |  7 +++++++
 src/math/aarch64/fminf.c      |  7 +++++++
 src/math/aarch64/llrint.c     | 10 ++++++++++
 src/math/aarch64/llrintf.c    | 10 ++++++++++
 src/math/aarch64/llround.c    |  8 ++++++++
 src/math/aarch64/llroundf.c   |  8 ++++++++
 src/math/aarch64/lrint.c      | 10 ++++++++++
 src/math/aarch64/lrintf.c     | 10 ++++++++++
 src/math/aarch64/lround.c     |  8 ++++++++
 src/math/aarch64/lroundf.c    |  8 ++++++++
 src/math/aarch64/nearbyint.c  |  7 +++++++
 src/math/aarch64/nearbyintf.c |  7 +++++++
 src/math/aarch64/rint.c       |  7 +++++++
 src/math/aarch64/rintf.c      |  7 +++++++
 src/math/aarch64/round.c      |  7 +++++++
 src/math/aarch64/roundf.c     |  7 +++++++
 src/math/aarch64/sqrt.c       |  7 +++++++
 src/math/aarch64/sqrt.s       |  6 ------
 src/math/aarch64/sqrtf.c      |  7 +++++++
 src/math/aarch64/sqrtf.s      |  6 ------
 src/math/aarch64/trunc.c      |  7 +++++++
 src/math/aarch64/truncf.c     |  7 +++++++
 34 files changed, 226 insertions(+), 24 deletions(-)
 create mode 100644 src/math/aarch64/ceil.c
 create mode 100644 src/math/aarch64/ceilf.c
 create mode 100644 src/math/aarch64/fabs.c
 delete mode 100644 src/math/aarch64/fabs.s
 create mode 100644 src/math/aarch64/fabsf.c
 delete mode 100644 src/math/aarch64/fabsf.s
 create mode 100644 src/math/aarch64/floor.c
 create mode 100644 src/math/aarch64/floorf.c
 create mode 100644 src/math/aarch64/fma.c
 create mode 100644 src/math/aarch64/fmaf.c
 create mode 100644 src/math/aarch64/fmax.c
 create mode 100644 src/math/aarch64/fmaxf.c
 create mode 100644 src/math/aarch64/fmin.c
 create mode 100644 src/math/aarch64/fminf.c
 create mode 100644 src/math/aarch64/llrint.c
 create mode 100644 src/math/aarch64/llrintf.c
 create mode 100644 src/math/aarch64/llround.c
 create mode 100644 src/math/aarch64/llroundf.c
 create mode 100644 src/math/aarch64/lrint.c
 create mode 100644 src/math/aarch64/lrintf.c
 create mode 100644 src/math/aarch64/lround.c
 create mode 100644 src/math/aarch64/lroundf.c
 create mode 100644 src/math/aarch64/nearbyint.c
 create mode 100644 src/math/aarch64/nearbyintf.c
 create mode 100644 src/math/aarch64/rint.c
 create mode 100644 src/math/aarch64/rintf.c
 create mode 100644 src/math/aarch64/round.c
 create mode 100644 src/math/aarch64/roundf.c
 create mode 100644 src/math/aarch64/sqrt.c
 delete mode 100644 src/math/aarch64/sqrt.s
 create mode 100644 src/math/aarch64/sqrtf.c
 delete mode 100644 src/math/aarch64/sqrtf.s
 create mode 100644 src/math/aarch64/trunc.c
 create mode 100644 src/math/aarch64/truncf.c

diff --git a/src/math/aarch64/ceil.c b/src/math/aarch64/ceil.c
new file mode 100644
index 00000000..ac80c1dc
--- /dev/null
+++ b/src/math/aarch64/ceil.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double ceil(double x)
+{
+	__asm__ ("frintp %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/ceilf.c b/src/math/aarch64/ceilf.c
new file mode 100644
index 00000000..1ef1e9c8
--- /dev/null
+++ b/src/math/aarch64/ceilf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float ceilf(float x)
+{
+	__asm__ ("frintp %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/fabs.c b/src/math/aarch64/fabs.c
new file mode 100644
index 00000000..5c3ecaf4
--- /dev/null
+++ b/src/math/aarch64/fabs.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fabs(double x)
+{
+	__asm__ ("fabs %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/fabs.s b/src/math/aarch64/fabs.s
deleted file mode 100644
index 8c04d091..00000000
--- a/src/math/aarch64/fabs.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.text
-.global fabs
-.type   fabs,%function
-fabs:
-	fabs d0, d0
-	ret
diff --git a/src/math/aarch64/fabsf.c b/src/math/aarch64/fabsf.c
new file mode 100644
index 00000000..7fde9817
--- /dev/null
+++ b/src/math/aarch64/fabsf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fabsf(float x)
+{
+	__asm__ ("fabs %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/fabsf.s b/src/math/aarch64/fabsf.s
deleted file mode 100644
index 6e96dd43..00000000
--- a/src/math/aarch64/fabsf.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.text
-.global fabsf
-.type   fabsf,%function
-fabsf:
-	fabs s0, s0
-	ret
diff --git a/src/math/aarch64/floor.c b/src/math/aarch64/floor.c
new file mode 100644
index 00000000..50ffdb28
--- /dev/null
+++ b/src/math/aarch64/floor.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double floor(double x)
+{
+	__asm__ ("frintm %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/floorf.c b/src/math/aarch64/floorf.c
new file mode 100644
index 00000000..8d007e9f
--- /dev/null
+++ b/src/math/aarch64/floorf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float floorf(float x)
+{
+	__asm__ ("frintm %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/fma.c b/src/math/aarch64/fma.c
new file mode 100644
index 00000000..2450ea7e
--- /dev/null
+++ b/src/math/aarch64/fma.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fma(double x, double y, double z)
+{
+	__asm__ ("fmadd %d0, %d1, %d2, %d3" : "=w"(x) : "w"(x), "w"(y), "w"(z));
+	return x;
+}
diff --git a/src/math/aarch64/fmaf.c b/src/math/aarch64/fmaf.c
new file mode 100644
index 00000000..9a147213
--- /dev/null
+++ b/src/math/aarch64/fmaf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fmaf(float x, float y, float z)
+{
+	__asm__ ("fmadd %s0, %s1, %s2, %s3" : "=w"(x) : "w"(x), "w"(y), "w"(z));
+	return x;
+}
diff --git a/src/math/aarch64/fmax.c b/src/math/aarch64/fmax.c
new file mode 100644
index 00000000..86dcb3b4
--- /dev/null
+++ b/src/math/aarch64/fmax.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fmax(double x, double y)
+{
+	__asm__ ("fmaxnm %d0, %d1, %d2" : "=w"(x) : "w"(x), "w"(y));
+	return x;
+}
diff --git a/src/math/aarch64/fmaxf.c b/src/math/aarch64/fmaxf.c
new file mode 100644
index 00000000..ee5eac2d
--- /dev/null
+++ b/src/math/aarch64/fmaxf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fmaxf(float x, float y)
+{
+	__asm__ ("fmaxnm %s0, %s1, %s2" : "=w"(x) : "w"(x), "w"(y));
+	return x;
+}
diff --git a/src/math/aarch64/fmin.c b/src/math/aarch64/fmin.c
new file mode 100644
index 00000000..f1e99808
--- /dev/null
+++ b/src/math/aarch64/fmin.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fmin(double x, double y)
+{
+	__asm__ ("fminnm %d0, %d1, %d2" : "=w"(x) : "w"(x), "w"(y));
+	return x;
+}
diff --git a/src/math/aarch64/fminf.c b/src/math/aarch64/fminf.c
new file mode 100644
index 00000000..80468f67
--- /dev/null
+++ b/src/math/aarch64/fminf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fminf(float x, float y)
+{
+	__asm__ ("fminnm %s0, %s1, %s2" : "=w"(x) : "w"(x), "w"(y));
+	return x;
+}
diff --git a/src/math/aarch64/llrint.c b/src/math/aarch64/llrint.c
new file mode 100644
index 00000000..a9e07a93
--- /dev/null
+++ b/src/math/aarch64/llrint.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+long long llrint(double x)
+{
+	long long n;
+	__asm__ (
+		"frintx %d1, %d1\n"
+		"fcvtzs %x0, %d1\n" : "=r"(n), "+w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/llrintf.c b/src/math/aarch64/llrintf.c
new file mode 100644
index 00000000..12b6804f
--- /dev/null
+++ b/src/math/aarch64/llrintf.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+long long llrintf(float x)
+{
+	long long n;
+	__asm__ (
+		"frintx %s1, %s1\n"
+		"fcvtzs %x0, %s1\n" : "=r"(n), "+w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/llround.c b/src/math/aarch64/llround.c
new file mode 100644
index 00000000..e09ddd48
--- /dev/null
+++ b/src/math/aarch64/llround.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long long llround(double x)
+{
+	long long n;
+	__asm__ ("fcvtas %x0, %d1" : "=r"(n) : "w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/llroundf.c b/src/math/aarch64/llroundf.c
new file mode 100644
index 00000000..16699598
--- /dev/null
+++ b/src/math/aarch64/llroundf.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long long llroundf(float x)
+{
+	long long n;
+	__asm__ ("fcvtas %x0, %s1" : "=r"(n) : "w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/lrint.c b/src/math/aarch64/lrint.c
new file mode 100644
index 00000000..cb7785ad
--- /dev/null
+++ b/src/math/aarch64/lrint.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+long lrint(double x)
+{
+	long n;
+	__asm__ (
+		"frintx %d1, %d1\n"
+		"fcvtzs %x0, %d1\n" : "=r"(n), "+w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/lrintf.c b/src/math/aarch64/lrintf.c
new file mode 100644
index 00000000..4d750d69
--- /dev/null
+++ b/src/math/aarch64/lrintf.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+long lrintf(float x)
+{
+	long n;
+	__asm__ (
+		"frintx %s1, %s1\n"
+		"fcvtzs %x0, %s1\n" : "=r"(n), "+w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/lround.c b/src/math/aarch64/lround.c
new file mode 100644
index 00000000..85656c78
--- /dev/null
+++ b/src/math/aarch64/lround.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long lround(double x)
+{
+	long n;
+	__asm__ ("fcvtas %x0, %d1" : "=r"(n) : "w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/lroundf.c b/src/math/aarch64/lroundf.c
new file mode 100644
index 00000000..32e51f3c
--- /dev/null
+++ b/src/math/aarch64/lroundf.c
@@ -0,0 +1,8 @@
+#include <math.h>
+
+long lroundf(float x)
+{
+	long n;
+	__asm__ ("fcvtas %x0, %s1" : "=r"(n) : "w"(x));
+	return n;
+}
diff --git a/src/math/aarch64/nearbyint.c b/src/math/aarch64/nearbyint.c
new file mode 100644
index 00000000..9c3fdb44
--- /dev/null
+++ b/src/math/aarch64/nearbyint.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double nearbyint(double x)
+{
+	__asm__ ("frinti %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/nearbyintf.c b/src/math/aarch64/nearbyintf.c
new file mode 100644
index 00000000..8e7f61df
--- /dev/null
+++ b/src/math/aarch64/nearbyintf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float nearbyintf(float x)
+{
+	__asm__ ("frinti %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/rint.c b/src/math/aarch64/rint.c
new file mode 100644
index 00000000..45b194b5
--- /dev/null
+++ b/src/math/aarch64/rint.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double rint(double x)
+{
+	__asm__ ("frintx %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/rintf.c b/src/math/aarch64/rintf.c
new file mode 100644
index 00000000..1ae7dd25
--- /dev/null
+++ b/src/math/aarch64/rintf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float rintf(float x)
+{
+	__asm__ ("frintx %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/round.c b/src/math/aarch64/round.c
new file mode 100644
index 00000000..897a84cc
--- /dev/null
+++ b/src/math/aarch64/round.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double round(double x)
+{
+	__asm__ ("frinta %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/roundf.c b/src/math/aarch64/roundf.c
new file mode 100644
index 00000000..91637eaa
--- /dev/null
+++ b/src/math/aarch64/roundf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float roundf(float x)
+{
+	__asm__ ("frinta %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/sqrt.c b/src/math/aarch64/sqrt.c
new file mode 100644
index 00000000..fe93c3e6
--- /dev/null
+++ b/src/math/aarch64/sqrt.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double sqrt(double x)
+{
+	__asm__ ("fsqrt %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/sqrt.s b/src/math/aarch64/sqrt.s
deleted file mode 100644
index 1917e18d..00000000
--- a/src/math/aarch64/sqrt.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.text
-.global sqrt
-.type   sqrt,%function
-sqrt:
-	fsqrt d0, d0
-	ret
diff --git a/src/math/aarch64/sqrtf.c b/src/math/aarch64/sqrtf.c
new file mode 100644
index 00000000..275c7f39
--- /dev/null
+++ b/src/math/aarch64/sqrtf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float sqrtf(float x)
+{
+	__asm__ ("fsqrt %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/sqrtf.s b/src/math/aarch64/sqrtf.s
deleted file mode 100644
index 1639497b..00000000
--- a/src/math/aarch64/sqrtf.s
+++ /dev/null
@@ -1,6 +0,0 @@
-.text
-.global sqrtf
-.type   sqrtf,%function
-sqrtf:
-	fsqrt s0, s0
-	ret
diff --git a/src/math/aarch64/trunc.c b/src/math/aarch64/trunc.c
new file mode 100644
index 00000000..e592147a
--- /dev/null
+++ b/src/math/aarch64/trunc.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double trunc(double x)
+{
+	__asm__ ("frintz %d0, %d1" : "=w"(x) : "w"(x));
+	return x;
+}
diff --git a/src/math/aarch64/truncf.c b/src/math/aarch64/truncf.c
new file mode 100644
index 00000000..20ef30f1
--- /dev/null
+++ b/src/math/aarch64/truncf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float truncf(float x)
+{
+	__asm__ ("frintz %s0, %s1" : "=w"(x) : "w"(x));
+	return x;
+}
-- 
2.11.0



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] aarch64: add single instruction math functions
  2017-03-19  4:26 [PATCH] aarch64: add single instruction math functions Szabolcs Nagy
@ 2017-03-19 12:55 ` David Edelsohn
  2017-03-19 14:51   ` Szabolcs Nagy
  2017-03-19 15:05   ` Rich Felker
  2017-03-21 16:50 ` Rich Felker
  1 sibling, 2 replies; 7+ messages in thread
From: David Edelsohn @ 2017-03-19 12:55 UTC (permalink / raw)
  To: musl

I thought that the goal of musl was "Minimal machine-specific code".
Does musl want to start a new arms race?

- David


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] aarch64: add single instruction math functions
  2017-03-19 12:55 ` David Edelsohn
@ 2017-03-19 14:51   ` Szabolcs Nagy
  2017-03-19 15:05   ` Rich Felker
  1 sibling, 0 replies; 7+ messages in thread
From: Szabolcs Nagy @ 2017-03-19 14:51 UTC (permalink / raw)
  To: musl

* David Edelsohn <dje.gcc@gmail.com> [2017-03-19 08:55:58 -0400]:
> I thought that the goal of musl was "Minimal machine-specific code".
> Does musl want to start a new arms race?

this is not ideal, but the generic code is not ideal either.
gcc will inline most calls and those will behave differently
compared to the generic code as explained (at least for some
of the rounding functions and fma)

i plan to add FP_FAST_FMA support in math.h but for that
targets with fma should have a single instruction implementation.
(it turns out gcc does not set __FP_FAST_FMA correctly so
the libc has to work harder)

another approach i thought of but did not implement was

math/builtin/foo.c has '.. return __builtin_foo(); ..' and
math/arch/foo.c has '#include "../builtin/foo.c"' ifdefed if the
compiler has support for the builtin and it is known to be not a
call libc.  (then the maintenance is minimized on the musl side,
it's on the compiler to get it right, but we need much more
configure check machinery)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] aarch64: add single instruction math functions
  2017-03-19 12:55 ` David Edelsohn
  2017-03-19 14:51   ` Szabolcs Nagy
@ 2017-03-19 15:05   ` Rich Felker
  2017-03-21 16:50     ` David Edelsohn
  1 sibling, 1 reply; 7+ messages in thread
From: Rich Felker @ 2017-03-19 15:05 UTC (permalink / raw)
  To: musl

On Sun, Mar 19, 2017 at 08:55:58AM -0400, David Edelsohn wrote:
> I thought that the goal of musl was "Minimal machine-specific code".

My interpretation of minimal is two-fold:

- minimal amount of arch-specific coding required to bring up a new
  arch.

- when arch-specific code is present by necessity or for optimization
  (speed or size), keeping complexity, maintenance cost, and room for
  arch-specific bugs minimal.

This is not intended to preclude use of single-instruction primitives
(see existing code for x86, etc.) for math functions or even critical
things that may be somewhat more complex like memcpy.

> Does musl want to start a new arms race?

What I call an arms race is perpetually adding new, often
runtime-selected, vectorized memcpy variants for every new vector isa
level, often with costly tradeoffs (like greatly increasing context
switch time) and no empirical evidence to justify them on a
system-wide scale, just benchmarks tailored to get the results the
author of the asm wants. That kind of stuff is not appropriate for
musl.

Ideally we'll eventually be able to rely on compilers just generating
the optimal code for memcpy from a trivial one-line C version. We're
not there yet. But I don't even have such hope for the future with
math. It's highly nontrivial for a compiler to establish that complex
code with multiple levels of branches is equivalent to a single ieee
operation that the isa implements.

Rich

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] aarch64: add single instruction math functions
  2017-03-19  4:26 [PATCH] aarch64: add single instruction math functions Szabolcs Nagy
  2017-03-19 12:55 ` David Edelsohn
@ 2017-03-21 16:50 ` Rich Felker
  1 sibling, 0 replies; 7+ messages in thread
From: Rich Felker @ 2017-03-21 16:50 UTC (permalink / raw)
  To: musl

On Sun, Mar 19, 2017 at 05:26:45AM +0100, Szabolcs Nagy wrote:
> this should increase performance and reduce code size on aarch64.
> 
> the compiled code was checked against using __builtin_* instead
> of inline asm with gcc-6.2.0.
> 
> lrint is two instructions.
> 
> c with inline asm is used because it is safer than a pure asm
> implementation, this prevents ll{rint,round} to be an alias
> of l{rint,round} (because the types don't match) and depends
> on gcc style inline asm support.
> 
> ceil, floor, round, trunc can either raise inexact on finite
> non-integer inputs or not raise any exceptions. the new
> implementation does not raise exceptions while the generic
> c code does.
> 
> on aarch64, the underflow exception is signaled before rounding
> (ieee 754 allows both before and after rounding, but it must be
> consistent), the generic fma c code signals it after rounding
> so using single instruction fixes a slight conformance issue too.
> ---

Thanks, committing.

Rich


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] aarch64: add single instruction math functions
  2017-03-19 15:05   ` Rich Felker
@ 2017-03-21 16:50     ` David Edelsohn
  2017-03-21 16:58       ` Rich Felker
  0 siblings, 1 reply; 7+ messages in thread
From: David Edelsohn @ 2017-03-21 16:50 UTC (permalink / raw)
  To: musl

On Sun, Mar 19, 2017 at 11:05 AM, Rich Felker <dalias@libc.org> wrote:
> On Sun, Mar 19, 2017 at 08:55:58AM -0400, David Edelsohn wrote:
>> I thought that the goal of musl was "Minimal machine-specific code".
>
> My interpretation of minimal is two-fold:
>
> - minimal amount of arch-specific coding required to bring up a new
>   arch.
>
> - when arch-specific code is present by necessity or for optimization
>   (speed or size), keeping complexity, maintenance cost, and room for
>   arch-specific bugs minimal.
>
> This is not intended to preclude use of single-instruction primitives
> (see existing code for x86, etc.) for math functions or even critical
> things that may be somewhat more complex like memcpy.

This policy makes maintenance more difficult and bugs more difficult
to analyze because different ports of musl libc may use less common
code.

Single instruction primitives occur more often in CISC architectures
by definition, so this preferences CISC.

This policy makes the decision process for architecture-specific
changes much more arbitrary.

- David


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] aarch64: add single instruction math functions
  2017-03-21 16:50     ` David Edelsohn
@ 2017-03-21 16:58       ` Rich Felker
  0 siblings, 0 replies; 7+ messages in thread
From: Rich Felker @ 2017-03-21 16:58 UTC (permalink / raw)
  To: musl

On Tue, Mar 21, 2017 at 12:50:46PM -0400, David Edelsohn wrote:
> On Sun, Mar 19, 2017 at 11:05 AM, Rich Felker <dalias@libc.org> wrote:
> > On Sun, Mar 19, 2017 at 08:55:58AM -0400, David Edelsohn wrote:
> >> I thought that the goal of musl was "Minimal machine-specific code".
> >
> > My interpretation of minimal is two-fold:
> >
> > - minimal amount of arch-specific coding required to bring up a new
> >   arch.
> >
> > - when arch-specific code is present by necessity or for optimization
> >   (speed or size), keeping complexity, maintenance cost, and room for
> >   arch-specific bugs minimal.
> >
> > This is not intended to preclude use of single-instruction primitives
> > (see existing code for x86, etc.) for math functions or even critical
> > things that may be somewhat more complex like memcpy.
> 
> This policy makes maintenance more difficult and bugs more difficult
> to analyze because different ports of musl libc may use less common
> code.

This is a good point and actually a reason why I've considered looking
for a good way to structure "mandatory" arch files vs "optimization"
ones, so that you could opt to build without the latter. In the case
of math, there may even be a few cases left where the C code does not
even work correctly on archs (x86, future m68k) with excess precision,
and it would be nice to be able to check it easily and fix any bugs
that remain.

> Single instruction primitives occur more often in CISC architectures
> by definition, so this preferences CISC.

I don't think this is a meaningful distinction. Any modern arch has
floating point instructions for more than just +-*/. Stuff like trig
is definitely CISCy (and likely useless; on x86 it gives wrong results
and it's slower than doing the trig in C anyway) but you most
certainly want the fpu to have sqrt and rounding instructions because
they're very costly to emulate.

> This policy makes the decision process for architecture-specific
> changes much more arbitrary.

If by "arbitrary" you mean "not fitting a minimally-expressable
absolute rule, but very well justified"...

Rich

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-03-21 16:58 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-19  4:26 [PATCH] aarch64: add single instruction math functions Szabolcs Nagy
2017-03-19 12:55 ` David Edelsohn
2017-03-19 14:51   ` Szabolcs Nagy
2017-03-19 15:05   ` Rich Felker
2017-03-21 16:50     ` David Edelsohn
2017-03-21 16:58       ` Rich Felker
2017-03-21 16:50 ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).