From mboxrd@z Thu Jan 1 00:00:00 1970 X-Msuck: nntp://news.gmane.org/gmane.linux.lib.musl.general/14710 Path: news.gmane.org!.POSTED.blaine.gmane.org!not-for-mail From: Rich Felker Newsgroups: gmane.linux.lib.musl.general Subject: Re: [PATCH] math: optimize lrint on 32bit targets Date: Mon, 23 Sep 2019 13:40:29 -0400 Message-ID: <20190923174029.GN9017@brightrain.aerifal.cx> References: <20190921155234.GA22009@port70.net> <20190922204335.GC22009@port70.net> Reply-To: musl@lists.openwall.com Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Injection-Info: blaine.gmane.org; posting-host="blaine.gmane.org:195.159.176.226"; logging-data="265430"; mail-complaints-to="usenet@blaine.gmane.org" User-Agent: Mutt/1.5.21 (2010-09-15) To: musl@lists.openwall.com Original-X-From: musl-return-14726-gllmg-musl=m.gmane.org@lists.openwall.com Mon Sep 23 19:40:47 2019 Return-path: Envelope-to: gllmg-musl@m.gmane.org Original-Received: from mother.openwall.net ([195.42.179.200]) by blaine.gmane.org with smtp (Exim 4.89) (envelope-from ) id 1iCSKL-0016qb-29 for gllmg-musl@m.gmane.org; Mon, 23 Sep 2019 19:40:45 +0200 Original-Received: (qmail 25886 invoked by uid 550); 23 Sep 2019 17:40:42 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Original-Received: (qmail 25865 invoked from network); 23 Sep 2019 17:40:41 -0000 Content-Disposition: inline In-Reply-To: <20190922204335.GC22009@port70.net> Original-Sender: Rich Felker Xref: news.gmane.org gmane.linux.lib.musl.general:14710 Archived-At: On Sun, Sep 22, 2019 at 10:43:35PM +0200, Szabolcs Nagy wrote: > * Szabolcs Nagy [2019-09-21 17:52:35 +0200]: > > this was discussed on irc. > > did more benchmarks, on i486 branches seem better > than setting the sign bit but on arm branch is > worse so i keep the original code, just changed > the code style (asuint macro instead of union). > > >From 67990a5c85fc5db55831f9ddddc58317e5b344b6 Mon Sep 17 00:00:00 2001 > From: Szabolcs Nagy > Date: Mon, 16 Sep 2019 20:33:11 +0000 > Subject: [PATCH] math: optimize lrint on 32bit targets > > lrint in (LONG_MAX, 1/DBL_EPSILON) and in (-1/DBL_EPSILON, LONG_MIN) > is not trivial: rounding to int may be inexact, but the conversion to > int may overflow and then the inexact flag must not be raised. (the > overflow threshold is rounding mode dependent). > > this matters on 32bit targets (without single instruction lrint or > rint), so the common case (when there is no overflow) is optimized by > inlining the lrint logic, otherwise the old code is kept as a fallback. > > on my laptop an i486 lrint call is asm:10ns, old c:30ns, new c:21ns > on a smaller arm core: old c:71ns, new c:34ns > on a bigger arm core: old c:27ns, new c:19ns > --- > src/math/lrint.c | 28 +++++++++++++++++++++++++++- > 1 file changed, 27 insertions(+), 1 deletion(-) > > diff --git a/src/math/lrint.c b/src/math/lrint.c > index bdca8b7c..ddee7a0d 100644 > --- a/src/math/lrint.c > +++ b/src/math/lrint.c > @@ -1,5 +1,6 @@ > #include > #include > +#include > #include "libm.h" > > /* > @@ -26,7 +27,18 @@ as a double. > */ > > #if LONG_MAX < 1U<<53 && defined(FE_INEXACT) > -long lrint(double x) > +#include > +#include > +#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 > +#define EPS DBL_EPSILON > +#elif FLT_EVAL_METHOD==2 > +#define EPS LDBL_EPSILON > +#endif > +#ifdef __GNUC__ > +/* avoid stack frame in lrint */ > +__attribute__((noinline)) > +#endif > +static long lrint_slow(double x) > { > #pragma STDC FENV_ACCESS ON > int e; > @@ -38,6 +50,20 @@ long lrint(double x) > /* conversion */ > return x; > } > + > +long lrint(double x) > +{ > + uint32_t abstop = asuint64(x)>>32 & 0x7fffffff; > + uint64_t sign = asuint64(x) & (1ULL << 63); > + > + if (abstop < 0x41dfffff) { > + /* |x| < 0x7ffffc00, no overflow */ > + double_t toint = asdouble(asuint64(1/EPS) | sign); > + double_t y = x + toint - toint; > + return (long)y; > + } > + return lrint_slow(x); > +} > #else > long lrint(double x) > { This code should be considerably faster than calling rint on 64-bit archs too, no? I wonder if it should be something like (untested, written inline here): long lrint(double x) { uint32_t abstop = asuint64(x)>>32 & 0x7fffffff; uint64_t sign = asuint64(x) & (1ULL << 63); #if LONG_MAX < 1U<<53 && defined(FE_INEXACT) if (abstop >= 0x41dfffff) return lrint_slow(x); #endif /* |x| < 0x7ffffc00, no overflow */ double_t toint = asdouble(asuint64(1/EPS) | sign); double_t y = x + toint - toint; return (long)y; } Rich