From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=-3.0 required=5.0 tests=HEADER_FROM_DIFFERENT_DOMAINS, MAILING_LIST_MULTI,RCVD_IN_DNSWL_MED,RCVD_IN_MSPIKE_H3, RCVD_IN_MSPIKE_WL autolearn=ham autolearn_force=no version=3.4.2 Received: from mother.openwall.net (mother.openwall.net [195.42.179.200]) by inbox.vuxu.org (OpenSMTPD) with SMTP id 52dc16cf for ; Mon, 20 Jan 2020 22:37:40 +0000 (UTC) Received: (qmail 26013 invoked by uid 550); 20 Jan 2020 22:37:38 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Reply-To: musl@lists.openwall.com Received: (qmail 25979 invoked from network); 20 Jan 2020 22:37:38 -0000 Date: Mon, 20 Jan 2020 23:37:26 +0100 From: Szabolcs Nagy To: musl@lists.openwall.com Message-ID: <20200120223726.GQ23985@port70.net> Mail-Followup-To: musl@lists.openwall.com MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="b5gNqxB1S1yM7hjW" Content-Disposition: inline User-Agent: Mutt/1.10.1 (2018-07-13) Subject: [musl] [PATCH 0/2] math: fix known directed rounding problems --b5gNqxB1S1yM7hjW Content-Type: text/plain; charset=us-ascii Content-Disposition: inline fix the two known directed rounding bugs in current math code. (there are still large ulp errors in j0, y0, jn, yn functions, but all other functions should have small worst case ulp error now) Szabolcs Nagy (2): math: fix __rem_pio2 in non-nearest rounding modes math: fix sinh overflows in non-nearest rounding src/internal/libm.h | 4 ++-- src/math/__expo2.c | 5 +++-- src/math/__expo2f.c | 5 +++-- src/math/__rem_pio2.c | 15 ++++++++++++++- src/math/__rem_pio2f.c | 13 ++++++++++++- src/math/__rem_pio2l.c | 16 +++++++++++++++- src/math/cosh.c | 2 +- src/math/coshf.c | 2 +- src/math/sinh.c | 2 +- src/math/sinhf.c | 2 +- 10 files changed, 53 insertions(+), 13 deletions(-) -- 2.24.1 --b5gNqxB1S1yM7hjW Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="0001-math-fix-__rem_pio2-in-non-nearest-rounding-modes.patch" >From 7d2aed5f8f699b7e644be030cc826d0152009ef8 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sat, 18 Jan 2020 17:55:25 +0000 Subject: [PATCH 1/2] math: fix __rem_pio2 in non-nearest rounding modes Handle when after reduction |y| > pi/4+tiny. This happens in directed rounding modes because the fast round to int code does not give the nearest integer. In such cases the reduction may not be symmetric between x and -x so e.g. cos(x)==cos(-x) may not hold (but polynomial evaluation is not symmetric either with directed rounding so fixing that would require more changes with bigger performance impact). The fix only adds two predictable branches in nearest rounding mode, simple ubenchmark does not show relevant performance regression in nearest rounding mode. The code could be improved: e.g reducing the medium size threshold such that two step reduction is enough instead of three, and the single precision case can avoid the issue by doing the round to int differently, but this fix was kept minimal. --- src/math/__rem_pio2.c | 15 ++++++++++++++- src/math/__rem_pio2f.c | 13 ++++++++++++- src/math/__rem_pio2l.c | 16 +++++++++++++++- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/src/math/__rem_pio2.c b/src/math/__rem_pio2.c index d403f81c..dcf672fb 100644 --- a/src/math/__rem_pio2.c +++ b/src/math/__rem_pio2.c @@ -36,6 +36,7 @@ */ static const double toint = 1.5/EPS, +pio4 = 0x1.921fb54442d18p-1, invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ @@ -117,11 +118,23 @@ int __rem_pio2(double x, double *y) } if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ medium: - /* rint(x/(pi/2)), Assume round-to-nearest. */ + /* rint(x/(pi/2)) */ fn = (double_t)x*invpio2 + toint - toint; n = (int32_t)fn; r = x - fn*pio2_1; w = fn*pio2_1t; /* 1st round, good to 85 bits */ + /* Matters with directed rounding. */ + if (predict_false(r - w < -pio4)) { + n--; + fn--; + r = x - fn*pio2_1; + w = fn*pio2_1t; + } else if (predict_false(r - w > pio4)) { + n++; + fn++; + r = x - fn*pio2_1; + w = fn*pio2_1t; + } y[0] = r - w; u.f = y[0]; ey = u.i>>52 & 0x7ff; diff --git a/src/math/__rem_pio2f.c b/src/math/__rem_pio2f.c index 4473c1c4..e6765643 100644 --- a/src/math/__rem_pio2f.c +++ b/src/math/__rem_pio2f.c @@ -35,6 +35,7 @@ */ static const double toint = 1.5/EPS, +pio4 = 0x1.921fb6p-1, invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ pio2_1 = 1.57079631090164184570e+00, /* 0x3FF921FB, 0x50000000 */ pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ @@ -50,10 +51,20 @@ int __rem_pio2f(float x, double *y) ix = u.i & 0x7fffffff; /* 25+53 bit pi is good enough for medium size */ if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */ - /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + /* Use a specialized rint() to get fn. */ fn = (double_t)x*invpio2 + toint - toint; n = (int32_t)fn; *y = x - fn*pio2_1 - fn*pio2_1t; + /* Matters with directed rounding. */ + if (predict_false(*y < -pio4)) { + n--; + fn--; + *y = x - fn*pio2_1 - fn*pio2_1t; + } else if (predict_false(*y > pio4)) { + n++; + fn++; + *y = x - fn*pio2_1 - fn*pio2_1t; + } return n; } if(ix>=0x7f800000) { /* x is inf or NaN */ diff --git a/src/math/__rem_pio2l.c b/src/math/__rem_pio2l.c index 77255bd8..236b2def 100644 --- a/src/math/__rem_pio2l.c +++ b/src/math/__rem_pio2l.c @@ -44,6 +44,7 @@ pio2_1 = 1.57079632679597125389e+00, /* 0x3FF921FB, 0x54444000 */ pio2_2 = -1.07463465549783099519e-12, /* -0x12e7b967674000.0p-92 */ pio2_3 = 6.36831716351370313614e-25; /* 0x18a2e037074000.0p-133 */ static const long double +pio4 = 0x1.921fb54442d1846ap-1L, invpio2 = 6.36619772367581343076e-01L, /* 0xa2f9836e4e44152a.0p-64 */ pio2_1t = -1.07463465549719416346e-12L, /* -0x973dcb3b399d747f.0p-103 */ pio2_2t = 6.36831716351095013979e-25L, /* 0xc51701b839a25205.0p-144 */ @@ -57,6 +58,7 @@ pio2_3t = -2.75299651904407171810e-37L; /* -0xbb5bf6c7ddd660ce.0p-185 */ #define NX 5 #define NY 3 static const long double +pio4 = 0x1.921fb54442d18469898cc51701b8p-1L, invpio2 = 6.3661977236758134307553505349005747e-01L, /* 0x145f306dc9c882a53f84eafa3ea6a.0p-113 */ pio2_1 = 1.5707963267948966192292994253909555e+00L, /* 0x1921fb54442d18469800000000000.0p-112 */ pio2_1t = 2.0222662487959507323996846200947577e-21L, /* 0x13198a2e03707344a4093822299f3.0p-181 */ @@ -76,11 +78,23 @@ int __rem_pio2l(long double x, long double *y) u.f = x; ex = u.i.se & 0x7fff; if (SMALL(u)) { - /* rint(x/(pi/2)), Assume round-to-nearest. */ + /* rint(x/(pi/2)) */ fn = x*invpio2 + toint - toint; n = QUOBITS(fn); r = x-fn*pio2_1; w = fn*pio2_1t; /* 1st round good to 102/180 bits (ld80/ld128) */ + /* Matters with directed rounding. */ + if (predict_false(r - w < -pio4)) { + n--; + fn--; + r = x - fn*pio2_1; + w = fn*pio2_1t; + } else if (predict_false(r - w > pio4)) { + n++; + fn++; + r = x - fn*pio2_1; + w = fn*pio2_1t; + } y[0] = r-w; u.f = y[0]; ey = u.i.se & 0x7fff; -- 2.24.1 --b5gNqxB1S1yM7hjW Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="0002-math-fix-sinh-overflows-in-non-nearest-rounding.patch" >From f8c5704213b09821d37dd95872b7e0f89906375c Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Mon, 20 Jan 2020 20:38:45 +0000 Subject: [PATCH 2/2] math: fix sinh overflows in non-nearest rounding The final roundig operation should be done with the correct sign otherwise huge results may incorrectly get rounded to or away from infinity in upward or downward rounding modes. This affected sinh and sinhf which set the sign on the result after a potentially overflowing mul. There may be other non-nearest rounding issues, but this was a known long standing issue with large ulp error (depending on how ulp is defined near infinity). The fix should have no effect on sinh and sinhf performance but may have a tiny effect on cosh and coshf. --- src/internal/libm.h | 4 ++-- src/math/__expo2.c | 5 +++-- src/math/__expo2f.c | 5 +++-- src/math/cosh.c | 2 +- src/math/coshf.c | 2 +- src/math/sinh.c | 2 +- src/math/sinhf.c | 2 +- 7 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/internal/libm.h b/src/internal/libm.h index b5bd26b8..7533f6ba 100644 --- a/src/internal/libm.h +++ b/src/internal/libm.h @@ -236,13 +236,13 @@ hidden int __rem_pio2(double,double*); hidden double __sin(double,double,int); hidden double __cos(double,double); hidden double __tan(double,double,int); -hidden double __expo2(double); +hidden double __expo2(double,double); hidden int __rem_pio2f(float,double*); hidden float __sindf(double); hidden float __cosdf(double); hidden float __tandf(double,int); -hidden float __expo2f(float); +hidden float __expo2f(float,float); hidden int __rem_pio2l(long double, long double *); hidden long double __sinl(long double, long double, int); diff --git a/src/math/__expo2.c b/src/math/__expo2.c index 740ac680..248f052b 100644 --- a/src/math/__expo2.c +++ b/src/math/__expo2.c @@ -5,12 +5,13 @@ static const int k = 2043; static const double kln2 = 0x1.62066151add8bp+10; /* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ -double __expo2(double x) +double __expo2(double x, double sign) { double scale; /* note that k is odd and scale*scale overflows */ INSERT_WORDS(scale, (uint32_t)(0x3ff + k/2) << 20, 0); /* exp(x - k ln2) * 2**(k-1) */ - return exp(x - kln2) * scale * scale; + /* in directed rounding correct sign before rounding or overflow is important */ + return exp(x - kln2) * (sign * scale) * scale; } diff --git a/src/math/__expo2f.c b/src/math/__expo2f.c index 5163e418..538eb09c 100644 --- a/src/math/__expo2f.c +++ b/src/math/__expo2f.c @@ -5,12 +5,13 @@ static const int k = 235; static const float kln2 = 0x1.45c778p+7f; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ -float __expo2f(float x) +float __expo2f(float x, float sign) { float scale; /* note that k is odd and scale*scale overflows */ SET_FLOAT_WORD(scale, (uint32_t)(0x7f + k/2) << 23); /* exp(x - k ln2) * 2**(k-1) */ - return expf(x - kln2) * scale * scale; + /* in directed rounding correct sign before rounding or overflow is important */ + return expf(x - kln2) * (sign * scale) * scale; } diff --git a/src/math/cosh.c b/src/math/cosh.c index 100f8231..490c15fb 100644 --- a/src/math/cosh.c +++ b/src/math/cosh.c @@ -35,6 +35,6 @@ double cosh(double x) /* |x| > log(DBL_MAX) or nan */ /* note: the result is stored to handle overflow */ - t = __expo2(x); + t = __expo2(x, 1.0); return t; } diff --git a/src/math/coshf.c b/src/math/coshf.c index b09f2ee5..e739cff9 100644 --- a/src/math/coshf.c +++ b/src/math/coshf.c @@ -28,6 +28,6 @@ float coshf(float x) } /* |x| > log(FLT_MAX) or nan */ - t = __expo2f(x); + t = __expo2f(x, 1.0f); return t; } diff --git a/src/math/sinh.c b/src/math/sinh.c index 00022c4e..a01951ae 100644 --- a/src/math/sinh.c +++ b/src/math/sinh.c @@ -34,6 +34,6 @@ double sinh(double x) /* |x| > log(DBL_MAX) or nan */ /* note: the result is stored to handle overflow */ - t = 2*h*__expo2(absx); + t = __expo2(absx, 2*h); return t; } diff --git a/src/math/sinhf.c b/src/math/sinhf.c index 6ad19ea2..b9caa793 100644 --- a/src/math/sinhf.c +++ b/src/math/sinhf.c @@ -26,6 +26,6 @@ float sinhf(float x) } /* |x| > logf(FLT_MAX) or nan */ - t = 2*h*__expo2f(absx); + t = __expo2f(absx, 2*h); return t; } -- 2.24.1 --b5gNqxB1S1yM7hjW--