[PATCH 01-10/18] math updates

mailing list of musl libc
 help / color / mirror / code / Atom feed

From: Szabolcs Nagy <nsz@port70.net>
To: musl@lists.openwall.com
Subject: [PATCH 01-10/18]  math updates
Date: Sat, 8 Dec 2018 13:54:23 +0100	[thread overview]
Message-ID: <20181208125422.GZ21289@port70.net> (raw)
In-Reply-To: <20181208125009.GY21289@port70.net>

[-- Attachment #1: Type: text/plain, Size: 62 bytes --]

math clean up patches in preparation for new implementations.

[-- Attachment #2: 0001-define-FP_FAST_FMA-when-fma-can-be-inlined.patch --]
[-- Type: text/x-diff, Size: 1644 bytes --]

From b9853935d1bb1d0518705c320cb3c47ddc9642ca Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Tue, 16 Oct 2018 22:20:39 +0000
Subject: [PATCH 01/18] define FP_FAST_FMA* when fma* can be inlined

FP_FAST_FMA can be defined if "the fma function generally executes about
as fast as, or faster than, a multiply and an add of double operands",
which can only be true if the fma call is inlined as an instruction.

gcc sets __FP_FAST_FMA if __builtin_fma is inlined as an instruction,
but that does not mean an fma call will be inlined (e.g. it is defined
with -fno-builtin-fma), other compilers (clang) don't even have such
macro, but this is the closest we can get.

(even if the libc fma implementation is a single instruction, the extern
call overhead is already too big when the macro is used to decide between
x*y+z and fma(x,y,z) so it cannot be based on libc only, defining the
macro unconditionally on targets which have fma in the base isa is also
incorrect: the compiler might not inline fma anyway.)

this solution works with gcc unless fma inlining is explicitly turned off.
---
 include/math.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/math.h b/include/math.h
index fea34686..14f28ec8 100644
--- a/include/math.h
+++ b/include/math.h
@@ -36,6 +36,18 @@ extern "C" {
 #define FP_SUBNORMAL 3
 #define FP_NORMAL    4
 
+#ifdef __FP_FAST_FMA
+#define FP_FAST_FMA 1
+#endif
+
+#ifdef __FP_FAST_FMAF
+#define FP_FAST_FMAF 1
+#endif
+
+#ifdef __FP_FAST_FMAL
+#define FP_FAST_FMAL 1
+#endif
+
 int __fpclassify(double);
 int __fpclassifyf(float);
 int __fpclassifyl(long double);
-- 
2.19.1


[-- Attachment #3: 0002-math-move-complex-math-out-of-libm.h.patch --]
[-- Type: text/x-diff, Size: 22958 bytes --]

From 594b279f202f7c20952864d893a1baf6a9fe1396 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Thu, 29 Nov 2018 22:09:53 +0000
Subject: [PATCH 02/18] math: move complex math out of libm.h

This makes it easier to build musl math code with a compiler that
does not support complex types (tcc) and in general more sensible
factorization of the internal headers.
---
 src/complex/__cexp.c        |  2 +-
 src/complex/__cexpf.c       |  2 +-
 src/complex/cabs.c          |  2 +-
 src/complex/cabsf.c         |  2 +-
 src/complex/cabsl.c         |  2 +-
 src/complex/cacos.c         |  2 +-
 src/complex/cacosf.c        |  2 +-
 src/complex/cacosh.c        |  2 +-
 src/complex/cacoshf.c       |  2 +-
 src/complex/cacoshl.c       |  2 +-
 src/complex/cacosl.c        |  2 +-
 src/complex/carg.c          |  2 +-
 src/complex/cargf.c         |  2 +-
 src/complex/cargl.c         |  2 +-
 src/complex/casin.c         |  2 +-
 src/complex/casinf.c        |  2 +-
 src/complex/casinh.c        |  2 +-
 src/complex/casinhf.c       |  2 +-
 src/complex/casinhl.c       |  2 +-
 src/complex/casinl.c        |  2 +-
 src/complex/catan.c         |  2 +-
 src/complex/catanf.c        |  2 +-
 src/complex/catanh.c        |  2 +-
 src/complex/catanhf.c       |  2 +-
 src/complex/catanhl.c       |  2 +-
 src/complex/catanl.c        |  2 +-
 src/complex/ccos.c          |  2 +-
 src/complex/ccosf.c         |  2 +-
 src/complex/ccosh.c         |  2 +-
 src/complex/ccoshf.c        |  2 +-
 src/complex/ccoshl.c        |  2 +-
 src/complex/ccosl.c         |  2 +-
 src/complex/cexp.c          |  2 +-
 src/complex/cexpf.c         |  2 +-
 src/complex/cexpl.c         |  2 +-
 src/complex/cimag.c         |  2 +-
 src/complex/cimagf.c        |  2 +-
 src/complex/cimagl.c        |  2 +-
 src/complex/clog.c          |  2 +-
 src/complex/clogf.c         |  2 +-
 src/complex/clogl.c         |  2 +-
 src/complex/conj.c          |  2 +-
 src/complex/conjf.c         |  2 +-
 src/complex/conjl.c         |  2 +-
 src/complex/cpow.c          |  2 +-
 src/complex/cpowf.c         |  2 +-
 src/complex/cpowl.c         |  2 +-
 src/complex/cproj.c         |  2 +-
 src/complex/cprojf.c        |  2 +-
 src/complex/cprojl.c        |  2 +-
 src/complex/csin.c          |  2 +-
 src/complex/csinf.c         |  2 +-
 src/complex/csinh.c         |  2 +-
 src/complex/csinhf.c        |  2 +-
 src/complex/csinhl.c        |  2 +-
 src/complex/csinl.c         |  2 +-
 src/complex/csqrt.c         |  2 +-
 src/complex/csqrtf.c        |  2 +-
 src/complex/csqrtl.c        |  2 +-
 src/complex/ctan.c          |  2 +-
 src/complex/ctanf.c         |  2 +-
 src/complex/ctanh.c         |  2 +-
 src/complex/ctanhf.c        |  2 +-
 src/complex/ctanhl.c        |  2 +-
 src/complex/ctanl.c         |  2 +-
 src/internal/complex_impl.h | 22 ++++++++++++++++++++++
 src/internal/libm.h         | 15 ---------------
 67 files changed, 87 insertions(+), 80 deletions(-)
 create mode 100644 src/internal/complex_impl.h

diff --git a/src/complex/__cexp.c b/src/complex/__cexp.c
index 05ac28c7..003d20af 100644
--- a/src/complex/__cexp.c
+++ b/src/complex/__cexp.c
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const uint32_t k = 1799; /* constant for reduction */
 static const double kln2 = 1246.97177782734161156; /* k * ln2 */
diff --git a/src/complex/__cexpf.c b/src/complex/__cexpf.c
index 69b54045..ee5ff2bc 100644
--- a/src/complex/__cexpf.c
+++ b/src/complex/__cexpf.c
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const uint32_t k = 235; /* constant for reduction */
 static const float kln2 = 162.88958740F; /* k * ln2 */
diff --git a/src/complex/cabs.c b/src/complex/cabs.c
index f61d364e..c5ad58ab 100644
--- a/src/complex/cabs.c
+++ b/src/complex/cabs.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 double cabs(double complex z)
 {
diff --git a/src/complex/cabsf.c b/src/complex/cabsf.c
index 30b25c70..619f28d3 100644
--- a/src/complex/cabsf.c
+++ b/src/complex/cabsf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float cabsf(float complex z)
 {
diff --git a/src/complex/cabsl.c b/src/complex/cabsl.c
index 40a067c1..d37e3f2e 100644
--- a/src/complex/cabsl.c
+++ b/src/complex/cabsl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double cabsl(long double complex z)
diff --git a/src/complex/cacos.c b/src/complex/cacos.c
index 27c35636..c39d257b 100644
--- a/src/complex/cacos.c
+++ b/src/complex/cacos.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 // FIXME: Hull et al. "Implementing the complex arcsine and arccosine functions using exception handling" 1997
 
diff --git a/src/complex/cacosf.c b/src/complex/cacosf.c
index 11852659..2e048540 100644
--- a/src/complex/cacosf.c
+++ b/src/complex/cacosf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 // FIXME
 
diff --git a/src/complex/cacosh.c b/src/complex/cacosh.c
index 8c68cb01..8e42f1ae 100644
--- a/src/complex/cacosh.c
+++ b/src/complex/cacosh.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* acosh(z) = i acos(z) */
 
diff --git a/src/complex/cacoshf.c b/src/complex/cacoshf.c
index ade01c09..d7e6b545 100644
--- a/src/complex/cacoshf.c
+++ b/src/complex/cacoshf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex cacoshf(float complex z)
 {
diff --git a/src/complex/cacoshl.c b/src/complex/cacoshl.c
index 65342557..d3eaee20 100644
--- a/src/complex/cacoshl.c
+++ b/src/complex/cacoshl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex cacoshl(long double complex z)
diff --git a/src/complex/cacosl.c b/src/complex/cacosl.c
index 7fd4a2f6..cc20dcd7 100644
--- a/src/complex/cacosl.c
+++ b/src/complex/cacosl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex cacosl(long double complex z)
diff --git a/src/complex/carg.c b/src/complex/carg.c
index d2d1b462..dfe9b97a 100644
--- a/src/complex/carg.c
+++ b/src/complex/carg.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 double carg(double complex z)
 {
diff --git a/src/complex/cargf.c b/src/complex/cargf.c
index ce183c4b..9a6c19b6 100644
--- a/src/complex/cargf.c
+++ b/src/complex/cargf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float cargf(float complex z)
 {
diff --git a/src/complex/cargl.c b/src/complex/cargl.c
index e0d50478..88f95f96 100644
--- a/src/complex/cargl.c
+++ b/src/complex/cargl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double cargl(long double complex z)
diff --git a/src/complex/casin.c b/src/complex/casin.c
index 01ed6184..3244bebb 100644
--- a/src/complex/casin.c
+++ b/src/complex/casin.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 // FIXME
 
diff --git a/src/complex/casinf.c b/src/complex/casinf.c
index 4fcb76fc..2cda2f08 100644
--- a/src/complex/casinf.c
+++ b/src/complex/casinf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 // FIXME
 
diff --git a/src/complex/casinh.c b/src/complex/casinh.c
index b57fe8c4..50bf27ce 100644
--- a/src/complex/casinh.c
+++ b/src/complex/casinh.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* asinh(z) = -i asin(i z) */
 
diff --git a/src/complex/casinhf.c b/src/complex/casinhf.c
index a11bf902..93d82e5f 100644
--- a/src/complex/casinhf.c
+++ b/src/complex/casinhf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex casinhf(float complex z)
 {
diff --git a/src/complex/casinhl.c b/src/complex/casinhl.c
index 714f1893..68ba3ddf 100644
--- a/src/complex/casinhl.c
+++ b/src/complex/casinhl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex casinhl(long double complex z)
diff --git a/src/complex/casinl.c b/src/complex/casinl.c
index 3b7ceba7..072adc45 100644
--- a/src/complex/casinl.c
+++ b/src/complex/casinl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex casinl(long double complex z)
diff --git a/src/complex/catan.c b/src/complex/catan.c
index 7dc2afeb..ccc2fb53 100644
--- a/src/complex/catan.c
+++ b/src/complex/catan.c
@@ -58,7 +58,7 @@
  * 2.9e-17.  See also clog().
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 #define MAXNUM 1.0e308
 
diff --git a/src/complex/catanf.c b/src/complex/catanf.c
index 8533bde3..e10d9c09 100644
--- a/src/complex/catanf.c
+++ b/src/complex/catanf.c
@@ -53,7 +53,7 @@
  *    IEEE      -10,+10     30000        2.3e-6      5.2e-8
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 #define MAXNUMF 1.0e38F
 
diff --git a/src/complex/catanh.c b/src/complex/catanh.c
index e248d9b9..c324c7f2 100644
--- a/src/complex/catanh.c
+++ b/src/complex/catanh.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* atanh = -i atan(i z) */
 
diff --git a/src/complex/catanhf.c b/src/complex/catanhf.c
index 4a5eb040..b0505f60 100644
--- a/src/complex/catanhf.c
+++ b/src/complex/catanhf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex catanhf(float complex z)
 {
diff --git a/src/complex/catanhl.c b/src/complex/catanhl.c
index a5dd538e..6025c414 100644
--- a/src/complex/catanhl.c
+++ b/src/complex/catanhl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex catanhl(long double complex z)
diff --git a/src/complex/catanl.c b/src/complex/catanl.c
index 5ace7704..a9fc02db 100644
--- a/src/complex/catanl.c
+++ b/src/complex/catanl.c
@@ -59,7 +59,7 @@
 
 #include <complex.h>
 #include <float.h>
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex catanl(long double complex z)
diff --git a/src/complex/ccos.c b/src/complex/ccos.c
index 645aec29..f32e1fad 100644
--- a/src/complex/ccos.c
+++ b/src/complex/ccos.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* cos(z) = cosh(i z) */
 
diff --git a/src/complex/ccosf.c b/src/complex/ccosf.c
index 9a67241f..490be9b3 100644
--- a/src/complex/ccosf.c
+++ b/src/complex/ccosf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex ccosf(float complex z)
 {
diff --git a/src/complex/ccosh.c b/src/complex/ccosh.c
index 401f3c60..c995da7b 100644
--- a/src/complex/ccosh.c
+++ b/src/complex/ccosh.c
@@ -34,7 +34,7 @@
  * These values and the return value were taken from n1124.pdf.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const double huge = 0x1p1023;
 
diff --git a/src/complex/ccoshf.c b/src/complex/ccoshf.c
index 90acfe05..189ce946 100644
--- a/src/complex/ccoshf.c
+++ b/src/complex/ccoshf.c
@@ -28,7 +28,7 @@
  * Hyperbolic cosine of a complex argument.  See s_ccosh.c for details.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const float huge = 0x1p127;
 
diff --git a/src/complex/ccoshl.c b/src/complex/ccoshl.c
index 9b2aed9e..ffb4d8a1 100644
--- a/src/complex/ccoshl.c
+++ b/src/complex/ccoshl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 //FIXME
 long double complex ccoshl(long double complex z)
diff --git a/src/complex/ccosl.c b/src/complex/ccosl.c
index d787047f..2530006b 100644
--- a/src/complex/ccosl.c
+++ b/src/complex/ccosl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex ccosl(long double complex z)
diff --git a/src/complex/cexp.c b/src/complex/cexp.c
index 5118e00e..7fb489bb 100644
--- a/src/complex/cexp.c
+++ b/src/complex/cexp.c
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const uint32_t
 exp_ovfl  = 0x40862e42,  /* high bits of MAX_EXP * ln2 ~= 710 */
diff --git a/src/complex/cexpf.c b/src/complex/cexpf.c
index 1a09964c..00d258f3 100644
--- a/src/complex/cexpf.c
+++ b/src/complex/cexpf.c
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const uint32_t
 exp_ovfl  = 0x42b17218,  /* MAX_EXP * ln2 ~= 88.722839355 */
diff --git a/src/complex/cexpl.c b/src/complex/cexpl.c
index a27f85c0..d4df950e 100644
--- a/src/complex/cexpl.c
+++ b/src/complex/cexpl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 //FIXME
 long double complex cexpl(long double complex z)
diff --git a/src/complex/cimag.c b/src/complex/cimag.c
index 00955641..d6b0e683 100644
--- a/src/complex/cimag.c
+++ b/src/complex/cimag.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 double (cimag)(double complex z)
 {
diff --git a/src/complex/cimagf.c b/src/complex/cimagf.c
index f7bcd76e..b7166dcf 100644
--- a/src/complex/cimagf.c
+++ b/src/complex/cimagf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float (cimagf)(float complex z)
 {
diff --git a/src/complex/cimagl.c b/src/complex/cimagl.c
index 9ec24eee..4db77f20 100644
--- a/src/complex/cimagl.c
+++ b/src/complex/cimagl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 long double (cimagl)(long double complex z)
 {
diff --git a/src/complex/clog.c b/src/complex/clog.c
index 12aae9c7..b587c291 100644
--- a/src/complex/clog.c
+++ b/src/complex/clog.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 // FIXME
 
diff --git a/src/complex/clogf.c b/src/complex/clogf.c
index e9b32e60..0389d472 100644
--- a/src/complex/clogf.c
+++ b/src/complex/clogf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 // FIXME
 
diff --git a/src/complex/clogl.c b/src/complex/clogl.c
index 18f16088..88e83e87 100644
--- a/src/complex/clogl.c
+++ b/src/complex/clogl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex clogl(long double complex z)
diff --git a/src/complex/conj.c b/src/complex/conj.c
index 0b3f5f46..a3b19a4a 100644
--- a/src/complex/conj.c
+++ b/src/complex/conj.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 double complex conj(double complex z)
 {
diff --git a/src/complex/conjf.c b/src/complex/conjf.c
index 9af6b2c3..b2195c84 100644
--- a/src/complex/conjf.c
+++ b/src/complex/conjf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex conjf(float complex z)
 {
diff --git a/src/complex/conjl.c b/src/complex/conjl.c
index 67f11b9d..87a4ebec 100644
--- a/src/complex/conjl.c
+++ b/src/complex/conjl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 long double complex conjl(long double complex z)
 {
diff --git a/src/complex/cpow.c b/src/complex/cpow.c
index f863588f..1137d391 100644
--- a/src/complex/cpow.c
+++ b/src/complex/cpow.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* pow(z, c) = exp(c log(z)), See C99 G.6.4.1 */
 
diff --git a/src/complex/cpowf.c b/src/complex/cpowf.c
index 53c65dcb..f3fd4b7b 100644
--- a/src/complex/cpowf.c
+++ b/src/complex/cpowf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex cpowf(float complex z, float complex c)
 {
diff --git a/src/complex/cpowl.c b/src/complex/cpowl.c
index c1a80a7b..be36f046 100644
--- a/src/complex/cpowl.c
+++ b/src/complex/cpowl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex cpowl(long double complex z, long double complex c)
diff --git a/src/complex/cproj.c b/src/complex/cproj.c
index 15f358a1..9ae1e17c 100644
--- a/src/complex/cproj.c
+++ b/src/complex/cproj.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 double complex cproj(double complex z)
 {
diff --git a/src/complex/cprojf.c b/src/complex/cprojf.c
index 653be5e8..03fab339 100644
--- a/src/complex/cprojf.c
+++ b/src/complex/cprojf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex cprojf(float complex z)
 {
diff --git a/src/complex/cprojl.c b/src/complex/cprojl.c
index 6731aaa2..38a494c5 100644
--- a/src/complex/cprojl.c
+++ b/src/complex/cprojl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex cprojl(long double complex z)
diff --git a/src/complex/csin.c b/src/complex/csin.c
index ad8ae67a..535c4bf8 100644
--- a/src/complex/csin.c
+++ b/src/complex/csin.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* sin(z) = -i sinh(i z) */
 
diff --git a/src/complex/csinf.c b/src/complex/csinf.c
index 60b3cbaa..69f5164e 100644
--- a/src/complex/csinf.c
+++ b/src/complex/csinf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex csinf(float complex z)
 {
diff --git a/src/complex/csinh.c b/src/complex/csinh.c
index 0f8035d1..eda0ab59 100644
--- a/src/complex/csinh.c
+++ b/src/complex/csinh.c
@@ -34,7 +34,7 @@
  * These values and the return value were taken from n1124.pdf.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const double huge = 0x1p1023;
 
diff --git a/src/complex/csinhf.c b/src/complex/csinhf.c
index 49697f02..eb1d98c5 100644
--- a/src/complex/csinhf.c
+++ b/src/complex/csinhf.c
@@ -28,7 +28,7 @@
  * Hyperbolic sine of a complex argument z.  See s_csinh.c for details.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 static const float huge = 0x1p127;
 
diff --git a/src/complex/csinhl.c b/src/complex/csinhl.c
index c566653b..09fd18f9 100644
--- a/src/complex/csinhl.c
+++ b/src/complex/csinhl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 //FIXME
 long double complex csinhl(long double complex z)
diff --git a/src/complex/csinl.c b/src/complex/csinl.c
index 4e9f86c3..90a4eb37 100644
--- a/src/complex/csinl.c
+++ b/src/complex/csinl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex csinl(long double complex z)
diff --git a/src/complex/csqrt.c b/src/complex/csqrt.c
index 8a2ba608..c36de001 100644
--- a/src/complex/csqrt.c
+++ b/src/complex/csqrt.c
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 /*
  * gcc doesn't implement complex multiplication or division correctly,
diff --git a/src/complex/csqrtf.c b/src/complex/csqrtf.c
index ab5102f0..a6163974 100644
--- a/src/complex/csqrtf.c
+++ b/src/complex/csqrtf.c
@@ -25,7 +25,7 @@
  * SUCH DAMAGE.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 /*
  * gcc doesn't implement complex multiplication or division correctly,
diff --git a/src/complex/csqrtl.c b/src/complex/csqrtl.c
index 0600ef3b..22539379 100644
--- a/src/complex/csqrtl.c
+++ b/src/complex/csqrtl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 //FIXME
 long double complex csqrtl(long double complex z)
diff --git a/src/complex/ctan.c b/src/complex/ctan.c
index c0926374..918717bf 100644
--- a/src/complex/ctan.c
+++ b/src/complex/ctan.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 /* tan(z) = -i tanh(i z) */
 
diff --git a/src/complex/ctanf.c b/src/complex/ctanf.c
index 009b1921..04c3ff19 100644
--- a/src/complex/ctanf.c
+++ b/src/complex/ctanf.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex ctanf(float complex z)
 {
diff --git a/src/complex/ctanh.c b/src/complex/ctanh.c
index 3ba3a899..54004cd7 100644
--- a/src/complex/ctanh.c
+++ b/src/complex/ctanh.c
@@ -63,7 +63,7 @@
  *   precision.  I also handle large x differently.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 double complex ctanh(double complex z)
 {
diff --git a/src/complex/ctanhf.c b/src/complex/ctanhf.c
index 72b76da0..7f422ba7 100644
--- a/src/complex/ctanhf.c
+++ b/src/complex/ctanhf.c
@@ -28,7 +28,7 @@
  * Hyperbolic tangent of a complex argument z.  See s_ctanh.c for details.
  */
 
-#include "libm.h"
+#include "complex_impl.h"
 
 float complex ctanhf(float complex z)
 {
diff --git a/src/complex/ctanhl.c b/src/complex/ctanhl.c
index 89a75d13..45d5862c 100644
--- a/src/complex/ctanhl.c
+++ b/src/complex/ctanhl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 //FIXME
 long double complex ctanhl(long double complex z)
diff --git a/src/complex/ctanl.c b/src/complex/ctanl.c
index ac1c3e0a..4b87420d 100644
--- a/src/complex/ctanl.c
+++ b/src/complex/ctanl.c
@@ -1,4 +1,4 @@
-#include "libm.h"
+#include "complex_impl.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double complex ctanl(long double complex z)
diff --git a/src/internal/complex_impl.h b/src/internal/complex_impl.h
new file mode 100644
index 00000000..51fb298a
--- /dev/null
+++ b/src/internal/complex_impl.h
@@ -0,0 +1,22 @@
+#ifndef _COMPLEX_IMPL_H
+#define _COMPLEX_IMPL_H
+
+#include <complex.h>
+#include "libm.h"
+
+#undef __CMPLX
+#undef CMPLX
+#undef CMPLXF
+#undef CMPLXL
+
+#define __CMPLX(x, y, t) \
+	((union { _Complex t __z; t __xy[2]; }){.__xy = {(x),(y)}}.__z)
+
+#define CMPLX(x, y) __CMPLX(x, y, double)
+#define CMPLXF(x, y) __CMPLX(x, y, float)
+#define CMPLXL(x, y) __CMPLX(x, y, long double)
+
+hidden double complex __ldexp_cexp(double complex,int);
+hidden float complex __ldexp_cexpf(float complex,int);
+
+#endif
diff --git a/src/internal/libm.h b/src/internal/libm.h
index fd916277..6e2d1900 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -16,7 +16,6 @@
 #include <stdint.h>
 #include <float.h>
 #include <math.h>
-#include <complex.h>
 #include <endian.h>
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
@@ -153,18 +152,6 @@ do {                                              \
   (d) = __u.f;                                    \
 } while (0)
 
-#undef __CMPLX
-#undef CMPLX
-#undef CMPLXF
-#undef CMPLXL
-
-#define __CMPLX(x, y, t) \
-	((union { _Complex t __z; t __xy[2]; }){.__xy = {(x),(y)}}.__z)
-
-#define CMPLX(x, y) __CMPLX(x, y, double)
-#define CMPLXF(x, y) __CMPLX(x, y, float)
-#define CMPLXL(x, y) __CMPLX(x, y, long double)
-
 /* fdlibm kernel functions */
 
 hidden int    __rem_pio2_large(double*,double*,int,int,int);
@@ -174,14 +161,12 @@ hidden double __sin(double,double,int);
 hidden double __cos(double,double);
 hidden double __tan(double,double,int);
 hidden double __expo2(double);
-hidden double complex __ldexp_cexp(double complex,int);
 
 hidden int    __rem_pio2f(float,double*);
 hidden float  __sindf(double);
 hidden float  __cosdf(double);
 hidden float  __tandf(double,int);
 hidden float  __expo2f(float);
-hidden float complex __ldexp_cexpf(float complex,int);
 
 hidden int __rem_pio2l(long double, long double *);
 hidden long double __sinl(long double, long double, int);
-- 
2.19.1


[-- Attachment #4: 0003-math-add-asuint-asuint64-asfloat-and-asdouble.patch --]
[-- Type: text/x-diff, Size: 4426 bytes --]

From e47748c402121f3a49c9fdce661481978258ee7b Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sat, 21 Oct 2017 21:09:02 +0000
Subject: [PATCH 03/18] math: add asuint, asuint64, asfloat and asdouble

Code generation for SET_HIGH_WORD slightly changes, but it only affects
pow, otherwise the generated code is unchanged.
---
 src/internal/libm.h | 48 ++++++++++++++-------------------------------
 1 file changed, 15 insertions(+), 33 deletions(-)

diff --git a/src/internal/libm.h b/src/internal/libm.h
index 6e2d1900..098c7058 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -83,73 +83,55 @@ union ldshape {
 	}                                         \
 } while(0)
 
+#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
+#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
+#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
+#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
+
 /* Get two 32 bit ints from a double.  */
 #define EXTRACT_WORDS(hi,lo,d)                    \
 do {                                              \
-  union {double f; uint64_t i;} __u;              \
-  __u.f = (d);                                    \
-  (hi) = __u.i >> 32;                             \
-  (lo) = (uint32_t)__u.i;                         \
+  uint64_t __u = asuint64(d);                     \
+  (hi) = __u >> 32;                               \
+  (lo) = (uint32_t)__u;                           \
 } while (0)
 
 /* Get the more significant 32 bit int from a double.  */
 #define GET_HIGH_WORD(hi,d)                       \
 do {                                              \
-  union {double f; uint64_t i;} __u;              \
-  __u.f = (d);                                    \
-  (hi) = __u.i >> 32;                             \
+  (hi) = asuint64(d) >> 32;                       \
 } while (0)
 
 /* Get the less significant 32 bit int from a double.  */
 #define GET_LOW_WORD(lo,d)                        \
 do {                                              \
-  union {double f; uint64_t i;} __u;              \
-  __u.f = (d);                                    \
-  (lo) = (uint32_t)__u.i;                         \
+  (lo) = (uint32_t)asuint64(d);                   \
 } while (0)
 
 /* Set a double from two 32 bit ints.  */
 #define INSERT_WORDS(d,hi,lo)                     \
 do {                                              \
-  union {double f; uint64_t i;} __u;              \
-  __u.i = ((uint64_t)(hi)<<32) | (uint32_t)(lo);  \
-  (d) = __u.f;                                    \
+  (d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
 } while (0)
 
 /* Set the more significant 32 bits of a double from an int.  */
 #define SET_HIGH_WORD(d,hi)                       \
-do {                                              \
-  union {double f; uint64_t i;} __u;              \
-  __u.f = (d);                                    \
-  __u.i &= 0xffffffff;                            \
-  __u.i |= (uint64_t)(hi) << 32;                  \
-  (d) = __u.f;                                    \
-} while (0)
+  INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
 
 /* Set the less significant 32 bits of a double from an int.  */
 #define SET_LOW_WORD(d,lo)                        \
-do {                                              \
-  union {double f; uint64_t i;} __u;              \
-  __u.f = (d);                                    \
-  __u.i &= 0xffffffff00000000ull;                 \
-  __u.i |= (uint32_t)(lo);                        \
-  (d) = __u.f;                                    \
-} while (0)
+  INSERT_WORDS(d, asuint64(d)>>32, lo)
 
 /* Get a 32 bit int from a float.  */
 #define GET_FLOAT_WORD(w,d)                       \
 do {                                              \
-  union {float f; uint32_t i;} __u;               \
-  __u.f = (d);                                    \
-  (w) = __u.i;                                    \
+  (w) = asuint(d);                                \
 } while (0)
 
 /* Set a float from a 32 bit int.  */
 #define SET_FLOAT_WORD(d,w)                       \
 do {                                              \
-  union {float f; uint32_t i;} __u;               \
-  __u.i = (w);                                    \
-  (d) = __u.f;                                    \
+  (d) = asfloat(w);                               \
 } while (0)
 
 /* fdlibm kernel functions */
-- 
2.19.1


[-- Attachment #5: 0004-math-remove-sun-copyright-from-libm.h.patch --]
[-- Type: text/x-diff, Size: 3587 bytes --]

From 0dcb9cab3ac862fac03d5a5bd4c9835f01dcf095 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Thu, 29 Nov 2018 22:25:38 +0000
Subject: [PATCH 04/18] math: remove sun copyright from libm.h

Nothing is left from the original fdlibm header nor from the bsd
modifications to it other than some internal api declarations.

Comments are dropped that may be copyrightable content.
---
 src/internal/libm.h | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/src/internal/libm.h b/src/internal/libm.h
index 098c7058..f7dd9678 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -1,15 +1,3 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/math_private.h */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
 #ifndef _LIBM_H
 #define _LIBM_H
 
@@ -88,7 +76,6 @@ union ldshape {
 #define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
 #define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
 
-/* Get two 32 bit ints from a double.  */
 #define EXTRACT_WORDS(hi,lo,d)                    \
 do {                                              \
   uint64_t __u = asuint64(d);                     \
@@ -96,46 +83,37 @@ do {                                              \
   (lo) = (uint32_t)__u;                           \
 } while (0)
 
-/* Get the more significant 32 bit int from a double.  */
 #define GET_HIGH_WORD(hi,d)                       \
 do {                                              \
   (hi) = asuint64(d) >> 32;                       \
 } while (0)
 
-/* Get the less significant 32 bit int from a double.  */
 #define GET_LOW_WORD(lo,d)                        \
 do {                                              \
   (lo) = (uint32_t)asuint64(d);                   \
 } while (0)
 
-/* Set a double from two 32 bit ints.  */
 #define INSERT_WORDS(d,hi,lo)                     \
 do {                                              \
   (d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
 } while (0)
 
-/* Set the more significant 32 bits of a double from an int.  */
 #define SET_HIGH_WORD(d,hi)                       \
   INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
 
-/* Set the less significant 32 bits of a double from an int.  */
 #define SET_LOW_WORD(d,lo)                        \
   INSERT_WORDS(d, asuint64(d)>>32, lo)
 
-/* Get a 32 bit int from a float.  */
 #define GET_FLOAT_WORD(w,d)                       \
 do {                                              \
   (w) = asuint(d);                                \
 } while (0)
 
-/* Set a float from a 32 bit int.  */
 #define SET_FLOAT_WORD(d,w)                       \
 do {                                              \
   (d) = asfloat(w);                               \
 } while (0)
 
-/* fdlibm kernel functions */
-
 hidden int    __rem_pio2_large(double*,double*,int,int,int);
 
 hidden int    __rem_pio2(double,double*);
@@ -155,7 +133,6 @@ hidden long double __sinl(long double, long double, int);
 hidden long double __cosl(long double, long double);
 hidden long double __tanl(long double, long double, int);
 
-/* polynomial evaluation */
 hidden long double __polevll(long double, const long double *, int);
 hidden long double __p1evll(long double, const long double *, int);
 
-- 
2.19.1


[-- Attachment #6: 0005-math-add-fp_arch.h-with-fp_barrier-and-fp_force_eval.patch --]
[-- Type: text/x-diff, Size: 5943 bytes --]

From 303b82453d6658546567dae0fe5773fce737e228 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Mon, 26 Nov 2018 23:30:00 +0000
Subject: [PATCH 05/18] math: add fp_arch.h with fp_barrier and fp_force_eval

C99 has ways to support fenv access, but compilers don't implement it
and assume nearest rounding mode and no fp status flag access. (gcc has
-frounding-math and then it does not assume nearest rounding mode, but
it still assumes the compiled code itself does not change the mode.
Even if the C99 mechanism was implemented it is not ideal: it requires
all code in the library to be compiled with FENV_ACCESS "on" to make it
usable in non-nearest rounding mode, but that limits optimizations more
than necessary.)

The math functions should give reasonable results in all rounding modes
(but the quality may be degraded in non-nearest rounding modes) and the
fp status flag settings should follow the spec, so fenv side-effects are
important and code transformations that break them should be prevented.

Unfortunately compilers don't give any help with this, the best we can
do is to add fp barriers to the code using volatile local variables
(they create a stack frame and undesirable memory accesses to it) or
inline asm (gcc specific, requires target specific fp reg constraints,
often creates unnecessary reg moves and multiple barriers are needed to
express that an operation has side-effects) or extern call (only useful
in tail-call position to avoid stack-frame creation and does not work
with lto).

We assume that in a math function if an operation depends on the input
and the output depends on it, then the operation will be evaluated at
runtime when the function is called, producing all the expected fenv
side-effects (this is not true in case of lto and in case the operation
is evaluated with excess precision that is not rounded away). So fp
barriers are needed (1) to prevent the move of an operation within a
function (in case it may be moved from an unevaluated code path into an
evaluated one or if it may be moved across a fenv access), (2) force the
evaluation of an operation for its side-effect when it has no input
dependency (may be constant folded) or (3) when its output is unused. I
belive that fp_barrier and fp_force_eval can take care of these and they
should not be needed in hot code paths.
---
 arch/aarch64/fp_arch.h | 25 +++++++++++++++
 arch/generic/fp_arch.h |  0
 src/internal/libm.h    | 71 ++++++++++++++++++++++++++++++++++++++----
 3 files changed, 90 insertions(+), 6 deletions(-)
 create mode 100644 arch/aarch64/fp_arch.h
 create mode 100644 arch/generic/fp_arch.h

diff --git a/arch/aarch64/fp_arch.h b/arch/aarch64/fp_arch.h
new file mode 100644
index 00000000..f3d445b9
--- /dev/null
+++ b/arch/aarch64/fp_arch.h
@@ -0,0 +1,25 @@
+#define fp_barrierf fp_barrierf
+static inline float fp_barrierf(float x)
+{
+	__asm__ __volatile__ ("" : "+w"(x));
+	return x;
+}
+
+#define fp_barrier fp_barrier
+static inline double fp_barrier(double x)
+{
+	__asm__ __volatile__ ("" : "+w"(x));
+	return x;
+}
+
+#define fp_force_evalf fp_force_evalf
+static inline void fp_force_evalf(float x)
+{
+	__asm__ __volatile__ ("" : "+w"(x));
+}
+
+#define fp_force_eval fp_force_eval
+static inline void fp_force_eval(double x)
+{
+	__asm__ __volatile__ ("" : "+w"(x));
+}
diff --git a/arch/generic/fp_arch.h b/arch/generic/fp_arch.h
new file mode 100644
index 00000000..e69de29b
diff --git a/src/internal/libm.h b/src/internal/libm.h
index f7dd9678..5669c046 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -5,6 +5,7 @@
 #include <float.h>
 #include <math.h>
 #include <endian.h>
+#include "fp_arch.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
@@ -58,16 +59,74 @@ union ldshape {
 #error Unsupported long double representation
 #endif
 
+/* fp_barrier returns its input, but limits code transformations
+   as if it had a side-effect (e.g. observable io) and returned
+   an arbitrary value.  */
+
+#ifndef fp_barrierf
+#define fp_barrierf fp_barrierf
+static inline float fp_barrierf(float x)
+{
+	volatile float y = x;
+	return y;
+}
+#endif
+
+#ifndef fp_barrier
+#define fp_barrier fp_barrier
+static inline double fp_barrier(double x)
+{
+	volatile double y = x;
+	return y;
+}
+#endif
+
+#ifndef fp_barrierl
+#define fp_barrierl fp_barrierl
+static inline long double fp_barrierl(long double x)
+{
+	volatile long double y = x;
+	return y;
+}
+#endif
+
+/* fp_force_eval ensures that the input value is computed when that's
+   otherwise unused.  To prevent the constant folding of the input
+   expression, an additional fp_barrier may be needed or a compilation
+   mode that does so (e.g. -frounding-math in gcc). Then it can be
+   used to evaluate an expression for its fenv side-effects only.   */
+
+#ifndef fp_force_evalf
+#define fp_force_evalf fp_force_evalf
+static inline void fp_force_evalf(float x)
+{
+	volatile float y = x;
+}
+#endif
+
+#ifndef fp_force_eval
+#define fp_force_eval fp_force_eval
+static inline void fp_force_eval(double x)
+{
+	volatile double y = x;
+}
+#endif
+
+#ifndef fp_force_evall
+#define fp_force_evall fp_force_evall
+static inline void fp_force_evall(long double x)
+{
+	volatile long double y = x;
+}
+#endif
+
 #define FORCE_EVAL(x) do {                        \
 	if (sizeof(x) == sizeof(float)) {         \
-		volatile float __x;               \
-		__x = (x);                        \
+		fp_force_evalf(x);                \
 	} else if (sizeof(x) == sizeof(double)) { \
-		volatile double __x;              \
-		__x = (x);                        \
+		fp_force_eval(x);                 \
 	} else {                                  \
-		volatile long double __x;         \
-		__x = (x);                        \
+		fp_force_evall(x);                \
 	}                                         \
 } while(0)
 
-- 
2.19.1


[-- Attachment #7: 0006-math-add-eval_as_float-and-eval_as_double.patch --]
[-- Type: text/x-diff, Size: 1945 bytes --]

From b2d0b90f284e8cd90ad033e1cb621ec1426ffed5 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sat, 1 Dec 2018 23:52:34 +0000
Subject: [PATCH 06/18] math: add eval_as_float and eval_as_double

Previously type casts or assignments were used for handling excess
precision, which assumed standard C99 semantics, but since it's a
rarely needed obscure detail, it's better to use explicit helper
functions to document where we rely on this.  It also helps if the
code is used outside of the libc in non-C99 compilation mode: with the
default excess precision handling of gcc, explicit inline asm barriers
are needed for narrowing on FLT_EVAL_METHOD!=0 targets.

I plan to use this in new code with the existing style that uses
double_t and float_t as much as possible.

One ugliness is that it is required for almost every return statement
since that does not drop excess precision (the standard changed this
in C11 annex F, but that does not help in non-standard compilation
modes or with old compilers).
---
 src/internal/libm.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/internal/libm.h b/src/internal/libm.h
index 5669c046..09fcfde3 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -59,6 +59,23 @@ union ldshape {
 #error Unsupported long double representation
 #endif
 
+/* Evaluate an expression as the specified type. With standard excess
+   precision handling a type cast or assignment is enough (with
+   -ffloat-store an assignment is required, in old compilers argument
+   passing and return statement may not drop excess precision).  */
+
+static inline float eval_as_float(float x)
+{
+	float y = x;
+	return y;
+}
+
+static inline double eval_as_double(double x)
+{
+	double y = x;
+	return y;
+}
+
 /* fp_barrier returns its input, but limits code transformations
    as if it had a side-effect (e.g. observable io) and returned
    an arbitrary value.  */
-- 
2.19.1


[-- Attachment #8: 0007-math-add-single-precision-error-handling-functions.patch --]
[-- Type: text/x-diff, Size: 2974 bytes --]

From 15a4361c8b981cfde63445511302238e2283626a Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sun, 22 Oct 2017 13:51:35 +0000
Subject: [PATCH 07/18] math: add single precision error handling functions

These are supposed to be used in tail call positions when handling
special cases in new code. (fp exceptions may be raised "naturally"
by the common code path if special casing is more effort.)

This implements the error handling apis used in
https://github.com/ARM-software/optimized-routines
without errno setting.
---
 src/internal/libm.h        | 7 +++++++
 src/math/__math_divzerof.c | 6 ++++++
 src/math/__math_invalidf.c | 6 ++++++
 src/math/__math_oflowf.c   | 6 ++++++
 src/math/__math_uflowf.c   | 6 ++++++
 src/math/__math_xflowf.c   | 6 ++++++
 6 files changed, 37 insertions(+)
 create mode 100644 src/math/__math_divzerof.c
 create mode 100644 src/math/__math_invalidf.c
 create mode 100644 src/math/__math_oflowf.c
 create mode 100644 src/math/__math_uflowf.c
 create mode 100644 src/math/__math_xflowf.c

diff --git a/src/internal/libm.h b/src/internal/libm.h
index 09fcfde3..a52a0b83 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -216,4 +216,11 @@ extern int __signgam;
 hidden double __lgamma_r(double, int *);
 hidden float __lgammaf_r(float, int *);
 
+/* error handling functions */
+hidden float __math_xflowf(uint32_t, float);
+hidden float __math_uflowf(uint32_t);
+hidden float __math_oflowf(uint32_t);
+hidden float __math_divzerof(uint32_t);
+hidden float __math_invalidf(float);
+
 #endif
diff --git a/src/math/__math_divzerof.c b/src/math/__math_divzerof.c
new file mode 100644
index 00000000..ce046f3e
--- /dev/null
+++ b/src/math/__math_divzerof.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_divzerof(uint32_t sign)
+{
+	return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
+}
diff --git a/src/math/__math_invalidf.c b/src/math/__math_invalidf.c
new file mode 100644
index 00000000..357d4b12
--- /dev/null
+++ b/src/math/__math_invalidf.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_invalidf(float x)
+{
+	return (x - x) / (x - x);
+}
diff --git a/src/math/__math_oflowf.c b/src/math/__math_oflowf.c
new file mode 100644
index 00000000..fa7d0620
--- /dev/null
+++ b/src/math/__math_oflowf.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_oflowf(uint32_t sign)
+{
+	return __math_xflowf(sign, 0x1p97f);
+}
diff --git a/src/math/__math_uflowf.c b/src/math/__math_uflowf.c
new file mode 100644
index 00000000..94d50f2b
--- /dev/null
+++ b/src/math/__math_uflowf.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_uflowf(uint32_t sign)
+{
+	return __math_xflowf(sign, 0x1p-95f);
+}
diff --git a/src/math/__math_xflowf.c b/src/math/__math_xflowf.c
new file mode 100644
index 00000000..f2c84784
--- /dev/null
+++ b/src/math/__math_xflowf.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+float __math_xflowf(uint32_t sign, float y)
+{
+	return eval_as_float(fp_barrierf(sign ? -y : y) * y);
+}
-- 
2.19.1


[-- Attachment #9: 0008-math-add-double-precision-error-handling-functions.patch --]
[-- Type: text/x-diff, Size: 2644 bytes --]

From bb149b430c83e697732749f75965d83edb82d43d Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Fri, 30 Nov 2018 21:15:23 +0000
Subject: [PATCH 08/18] math: add double precision error handling functions

---
 src/internal/libm.h       | 5 +++++
 src/math/__math_divzero.c | 6 ++++++
 src/math/__math_invalid.c | 6 ++++++
 src/math/__math_oflow.c   | 6 ++++++
 src/math/__math_uflow.c   | 6 ++++++
 src/math/__math_xflow.c   | 6 ++++++
 6 files changed, 35 insertions(+)
 create mode 100644 src/math/__math_divzero.c
 create mode 100644 src/math/__math_invalid.c
 create mode 100644 src/math/__math_oflow.c
 create mode 100644 src/math/__math_uflow.c
 create mode 100644 src/math/__math_xflow.c

diff --git a/src/internal/libm.h b/src/internal/libm.h
index a52a0b83..62da4bdb 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -222,5 +222,10 @@ hidden float __math_uflowf(uint32_t);
 hidden float __math_oflowf(uint32_t);
 hidden float __math_divzerof(uint32_t);
 hidden float __math_invalidf(float);
+hidden double __math_xflow(uint32_t, double);
+hidden double __math_uflow(uint32_t);
+hidden double __math_oflow(uint32_t);
+hidden double __math_divzero(uint32_t);
+hidden double __math_invalid(double);
 
 #endif
diff --git a/src/math/__math_divzero.c b/src/math/__math_divzero.c
new file mode 100644
index 00000000..59d21350
--- /dev/null
+++ b/src/math/__math_divzero.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_divzero(uint32_t sign)
+{
+	return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
+}
diff --git a/src/math/__math_invalid.c b/src/math/__math_invalid.c
new file mode 100644
index 00000000..17740490
--- /dev/null
+++ b/src/math/__math_invalid.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_invalid(double x)
+{
+	return (x - x) / (x - x);
+}
diff --git a/src/math/__math_oflow.c b/src/math/__math_oflow.c
new file mode 100644
index 00000000..c85dbf98
--- /dev/null
+++ b/src/math/__math_oflow.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_oflow(uint32_t sign)
+{
+	return __math_xflow(sign, 0x1p769);
+}
diff --git a/src/math/__math_uflow.c b/src/math/__math_uflow.c
new file mode 100644
index 00000000..b90594ae
--- /dev/null
+++ b/src/math/__math_uflow.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_uflow(uint32_t sign)
+{
+	return __math_xflow(sign, 0x1p-767);
+}
diff --git a/src/math/__math_xflow.c b/src/math/__math_xflow.c
new file mode 100644
index 00000000..744203c4
--- /dev/null
+++ b/src/math/__math_xflow.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_xflow(uint32_t sign, double y)
+{
+	return eval_as_double(fp_barrier(sign ? -y : y) * y);
+}
-- 
2.19.1


[-- Attachment #10: 0008-math-add-double-precision-error-handling-functions.patch --]
[-- Type: text/x-diff, Size: 2644 bytes --]

From bb149b430c83e697732749f75965d83edb82d43d Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Fri, 30 Nov 2018 21:15:23 +0000
Subject: [PATCH 08/18] math: add double precision error handling functions

---
 src/internal/libm.h       | 5 +++++
 src/math/__math_divzero.c | 6 ++++++
 src/math/__math_invalid.c | 6 ++++++
 src/math/__math_oflow.c   | 6 ++++++
 src/math/__math_uflow.c   | 6 ++++++
 src/math/__math_xflow.c   | 6 ++++++
 6 files changed, 35 insertions(+)
 create mode 100644 src/math/__math_divzero.c
 create mode 100644 src/math/__math_invalid.c
 create mode 100644 src/math/__math_oflow.c
 create mode 100644 src/math/__math_uflow.c
 create mode 100644 src/math/__math_xflow.c

diff --git a/src/internal/libm.h b/src/internal/libm.h
index a52a0b83..62da4bdb 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -222,5 +222,10 @@ hidden float __math_uflowf(uint32_t);
 hidden float __math_oflowf(uint32_t);
 hidden float __math_divzerof(uint32_t);
 hidden float __math_invalidf(float);
+hidden double __math_xflow(uint32_t, double);
+hidden double __math_uflow(uint32_t);
+hidden double __math_oflow(uint32_t);
+hidden double __math_divzero(uint32_t);
+hidden double __math_invalid(double);
 
 #endif
diff --git a/src/math/__math_divzero.c b/src/math/__math_divzero.c
new file mode 100644
index 00000000..59d21350
--- /dev/null
+++ b/src/math/__math_divzero.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_divzero(uint32_t sign)
+{
+	return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
+}
diff --git a/src/math/__math_invalid.c b/src/math/__math_invalid.c
new file mode 100644
index 00000000..17740490
--- /dev/null
+++ b/src/math/__math_invalid.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_invalid(double x)
+{
+	return (x - x) / (x - x);
+}
diff --git a/src/math/__math_oflow.c b/src/math/__math_oflow.c
new file mode 100644
index 00000000..c85dbf98
--- /dev/null
+++ b/src/math/__math_oflow.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_oflow(uint32_t sign)
+{
+	return __math_xflow(sign, 0x1p769);
+}
diff --git a/src/math/__math_uflow.c b/src/math/__math_uflow.c
new file mode 100644
index 00000000..b90594ae
--- /dev/null
+++ b/src/math/__math_uflow.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_uflow(uint32_t sign)
+{
+	return __math_xflow(sign, 0x1p-767);
+}
diff --git a/src/math/__math_xflow.c b/src/math/__math_xflow.c
new file mode 100644
index 00000000..744203c4
--- /dev/null
+++ b/src/math/__math_xflow.c
@@ -0,0 +1,6 @@
+#include "libm.h"
+
+double __math_xflow(uint32_t sign, double y)
+{
+	return eval_as_double(fp_barrier(sign ? -y : y) * y);
+}
-- 
2.19.1


[-- Attachment #11: 0009-math-add-macros-for-static-branch-prediction-hints.patch --]
[-- Type: text/x-diff, Size: 1268 bytes --]

From ae939d0b35714a3934f27a6be9897e07af8c22e6 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Thu, 29 Nov 2018 23:33:40 +0000
Subject: [PATCH 09/18] math: add macros for static branch prediction hints

These don't have an effectw with -Os so not useful with default settings
other than documenting the expectation.

With --enable-optimize=internal,malloc,string,math the libc.so code size
increases by 18K on x86_64 and performance varies in -2% .. +10%.
---
 src/internal/libm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/internal/libm.h b/src/internal/libm.h
index 62da4bdb..28537603 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -59,6 +59,15 @@ union ldshape {
 #error Unsupported long double representation
 #endif
 
+/* Helps static branch prediction so hot path can be better optimized.  */
+#ifdef __GNUC__
+#define predict_true(x) __builtin_expect(!!(x), 1)
+#define predict_false(x) __builtin_expect(x, 0)
+#else
+#define predict_true(x) (x)
+#define predict_false(x) (x)
+#endif
+
 /* Evaluate an expression as the specified type. With standard excess
    precision handling a type cast or assignment is enough (with
    -ffloat-store an assignment is required, in old compilers argument
-- 
2.19.1


[-- Attachment #12: 0010-math-add-configuration-macros.patch --]
[-- Type: text/x-diff, Size: 1010 bytes --]

From 32e1cd071774472e454f334aee333188e4f8f9b9 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sun, 2 Dec 2018 18:53:37 +0000
Subject: [PATCH 10/18] math: add configuration macros

Musl currently aims to support non-nearest rounding mode and does not
support SNaNs. These macros allow marking relevant code paths in case
these decisions are changed later (they also help documenting the
corner cases involved).
---
 src/internal/libm.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/internal/libm.h b/src/internal/libm.h
index 28537603..5212bab1 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -59,6 +59,11 @@ union ldshape {
 #error Unsupported long double representation
 #endif
 
+/* Support non-nearest rounding mode.  */
+#define WANT_ROUNDING 1
+/* Support signaling NaNs.  */
+#define WANT_SNAN 0
+
 /* Helps static branch prediction so hot path can be better optimized.  */
 #ifdef __GNUC__
 #define predict_true(x) __builtin_expect(!!(x), 1)
-- 
2.19.1

next prev parent reply	other threads:[~2018-12-08 12:54 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-08 12:50 [PATCH 00/18] " Szabolcs Nagy
2018-12-08 12:54 ` Szabolcs Nagy [this message]
2018-12-08 12:56 ` [PATCH 11-18/18] " Szabolcs Nagy
2019-02-23 23:36 ` [PATCH 00/18] " Rich Felker
2019-02-24 21:00   ` Szabolcs Nagy
2019-02-24 20:58 ` Joe Duarte
2019-02-24 21:25   ` Szabolcs Nagy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181208125422.GZ21289@port70.net \
    --to=nsz@port70.net \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).