mailing list of musl libc
 help / color / mirror / code / Atom feed
* Patches for math subtree
@ 2019-12-07 20:15 Stefan Kanthak
  2019-12-07 20:38 ` Rich Felker
  0 siblings, 1 reply; 3+ messages in thread
From: Stefan Kanthak @ 2019-12-07 20:15 UTC (permalink / raw)
  To: musl

Just some optimisations.

--- -/src/math/i386/remquo.s
+++ +/src/math/i386/remquo.s
@@ -23,23 +23,17 @@
 remquo:
         mov 20(%esp),%ecx
         fldl 12(%esp)
         fldl 4(%esp)
         mov 19(%esp),%dh
         xor 11(%esp),%dh
 1:      fprem1
         fnstsw %ax
         sahf
         jp 1b
         fstp %st(1)
-        mov %ah,%dl
-        shr %dl
-        and $1,%dl
-        mov %ah,%al
-        shr $5,%al
-        and $2,%al
-        or %al,%dl
-        mov %ah,%al
-        shl $2,%al
-        and $4,%al
-        or %al,%dl
+        setc %dl
+        shl $2,%ah
+        adc %dl,%dl
+        shl $5,%ah
+        adc %dl,%dl
         test %dh,%dh

--- -/src/math/ceil.c
+++ +/src/math/ceil.c
@@ -18,10 +18,10 @@
+        /* special case because of non-nearest rounding modes */
+        if (e < 0x3ff) {
+                FORCE_EVAL(y);
+                return u.i >> 63 ? -0.0 : 1.0;
+        }
         /* y = int(x) - x, where int(x) is an integer neighbor of x */
         if (u.i >> 63)
                 y = x - toint + toint - x;
         else
                 y = x + toint - toint - x;
-        /* special case because of non-nearest rounding modes */
-        if (e <= 0x3ff-1) {
-                FORCE_EVAL(y);
-                return u.i >> 63 ? -0.0 : 1;
-        }

--- -/src/math/floor.c
+++ +/src/math/floor.c
@@ -18,10 +18,10 @@
+        /* special case because of non-nearest rounding modes */
+        if (e < 0x3ff) {
+                 FORCE_EVAL(y);
+                 return u.i >> 63 ? -1.0 : 0.0;
+        }
         /* y = int(x) - x, where int(x) is an integer neighbor of x */
         if (u.i >> 63)
                 y = x - toint + toint - x;
         else
                 y = x + toint - toint - x;
-        /* special case because of non-nearest rounding modes */
-        if (e <= 0x3ff-1) {
-                 FORCE_EVAL(y);
-                 return u.i >> 63 ? -1 : 0;
-        }



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: Patches for math subtree
  2019-12-07 20:15 Patches for math subtree Stefan Kanthak
@ 2019-12-07 20:38 ` Rich Felker
  2019-12-08 14:54   ` Rich Felker
  0 siblings, 1 reply; 3+ messages in thread
From: Rich Felker @ 2019-12-07 20:38 UTC (permalink / raw)
  To: musl

On Sat, Dec 07, 2019 at 09:15:34PM +0100, Stefan Kanthak wrote:
> Just some optimisations.
> 
> --- -/src/math/i386/remquo.s
> +++ +/src/math/i386/remquo.s
> @@ -23,23 +23,17 @@
>  remquo:
>          mov 20(%esp),%ecx
>          fldl 12(%esp)
>          fldl 4(%esp)
>          mov 19(%esp),%dh
>          xor 11(%esp),%dh
>  1:      fprem1
>          fnstsw %ax
>          sahf
>          jp 1b
>          fstp %st(1)
> -        mov %ah,%dl
> -        shr %dl
> -        and $1,%dl
> -        mov %ah,%al
> -        shr $5,%al
> -        and $2,%al
> -        or %al,%dl
> -        mov %ah,%al
> -        shl $2,%al
> -        and $4,%al
> -        or %al,%dl
> +        setc %dl
> +        shl $2,%ah
> +        adc %dl,%dl
> +        shl $5,%ah
> +        adc %dl,%dl
>          test %dh,%dh
> 
> --- -/src/math/ceil.c
> +++ +/src/math/ceil.c
> @@ -18,10 +18,10 @@
> +        /* special case because of non-nearest rounding modes */
> +        if (e < 0x3ff) {
> +                FORCE_EVAL(y);
> +                return u.i >> 63 ? -0.0 : 1.0;
> +        }
>          /* y = int(x) - x, where int(x) is an integer neighbor of x */
>          if (u.i >> 63)
>                  y = x - toint + toint - x;
>          else
>                  y = x + toint - toint - x;
> -        /* special case because of non-nearest rounding modes */
> -        if (e <= 0x3ff-1) {
> -                FORCE_EVAL(y);
> -                return u.i >> 63 ? -0.0 : 1;
> -        }
> 
> --- -/src/math/floor.c
> +++ +/src/math/floor.c
> @@ -18,10 +18,10 @@
> +        /* special case because of non-nearest rounding modes */
> +        if (e < 0x3ff) {
> +                 FORCE_EVAL(y);
> +                 return u.i >> 63 ? -1.0 : 0.0;
> +        }
>          /* y = int(x) - x, where int(x) is an integer neighbor of x */
>          if (u.i >> 63)
>                  y = x - toint + toint - x;
>          else
>                  y = x + toint - toint - x;
> -        /* special case because of non-nearest rounding modes */
> -        if (e <= 0x3ff-1) {
> -                 FORCE_EVAL(y);
> -                 return u.i >> 63 ? -1 : 0;
> -        }

Do you have any explanation of why these are optimizations?

Rich


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: Patches for math subtree
  2019-12-07 20:38 ` Rich Felker
@ 2019-12-08 14:54   ` Rich Felker
  0 siblings, 0 replies; 3+ messages in thread
From: Rich Felker @ 2019-12-08 14:54 UTC (permalink / raw)
  To: musl

On Sat, Dec 07, 2019 at 03:38:04PM -0500, Rich Felker wrote:
> On Sat, Dec 07, 2019 at 09:15:34PM +0100, Stefan Kanthak wrote:
> > Just some optimisations.
> > 
> > --- -/src/math/i386/remquo.s
> > +++ +/src/math/i386/remquo.s
> > @@ -23,23 +23,17 @@
> >  remquo:
> >          mov 20(%esp),%ecx
> >          fldl 12(%esp)
> >          fldl 4(%esp)
> >          mov 19(%esp),%dh
> >          xor 11(%esp),%dh
> >  1:      fprem1
> >          fnstsw %ax
> >          sahf
> >          jp 1b
> >          fstp %st(1)
> > -        mov %ah,%dl
> > -        shr %dl
> > -        and $1,%dl
> > -        mov %ah,%al
> > -        shr $5,%al
> > -        and $2,%al
> > -        or %al,%dl
> > -        mov %ah,%al
> > -        shl $2,%al
> > -        and $4,%al
> > -        or %al,%dl
> > +        setc %dl
> > +        shl $2,%ah
> > +        adc %dl,%dl
> > +        shl $5,%ah
> > +        adc %dl,%dl
> >          test %dh,%dh
> > 
> > --- -/src/math/ceil.c
> > +++ +/src/math/ceil.c
> > @@ -18,10 +18,10 @@
> > +        /* special case because of non-nearest rounding modes */
> > +        if (e < 0x3ff) {
> > +                FORCE_EVAL(y);
> > +                return u.i >> 63 ? -0.0 : 1.0;
> > +        }
> >          /* y = int(x) - x, where int(x) is an integer neighbor of x */
> >          if (u.i >> 63)
> >                  y = x - toint + toint - x;
> >          else
> >                  y = x + toint - toint - x;
> > -        /* special case because of non-nearest rounding modes */
> > -        if (e <= 0x3ff-1) {
> > -                FORCE_EVAL(y);
> > -                return u.i >> 63 ? -0.0 : 1;
> > -        }
> > 
> > --- -/src/math/floor.c
> > +++ +/src/math/floor.c
> > @@ -18,10 +18,10 @@
> > +        /* special case because of non-nearest rounding modes */
> > +        if (e < 0x3ff) {
> > +                 FORCE_EVAL(y);
> > +                 return u.i >> 63 ? -1.0 : 0.0;
> > +        }
> >          /* y = int(x) - x, where int(x) is an integer neighbor of x */
> >          if (u.i >> 63)
> >                  y = x - toint + toint - x;
> >          else
> >                  y = x + toint - toint - x;
> > -        /* special case because of non-nearest rounding modes */
> > -        if (e <= 0x3ff-1) {
> > -                 FORCE_EVAL(y);
> > -                 return u.i >> 63 ? -1 : 0;
> > -        }
> 
> Do you have any explanation of why these are optimizations?

Specifically, the x86 asm one looks like it probably is, but I haven't
read closely enough to verify. If it's a measurable improvement I'll
try to take a look at it soon, but at some point all of the .s files
here are slated for removal and replacement with inline asm in .c
files that avoids all of the delicate flow/logic in asm and just uses
the x87 instructions needed, so I don't want to spend a lot of effort
on improving and validating improvements to them.

For the latter two, the patches as written are wrong. They evaluate an
uninitialized variable y. And I think these functions are required to
set the status flags, so you can't just remove that. Maybe there's
some alternate way to do it that would be faster, like just evaluating
x±toint rather than the whole expression, but I'm not sure it helps.

Rich


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-12-08 14:54 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-07 20:15 Patches for math subtree Stefan Kanthak
2019-12-07 20:38 ` Rich Felker
2019-12-08 14:54   ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).