mailing list of musl libc
 help / color / mirror / code / Atom feed
* More patches for math subtree
@ 2019-12-10 16:57 Stefan Kanthak
  2019-12-10 19:35 ` Rich Felker
  0 siblings, 1 reply; 7+ messages in thread
From: Stefan Kanthak @ 2019-12-10 16:57 UTC (permalink / raw)
  To: musl

Some more optimisations: the current implementations of ceil(), floor()
and trunc() for i386 change the rounding control using fldcw instructions,
which are SLOW; these patches provide faster and smaller branch-free (!)
implementations.

JFTR: I'm NOT subscribed to your mailing list, so CC: me in replies!

--- -/src/math/i386/floor.s
+++ +/src/math/i386/floor.s
@@ -1,67 +1,26 @@
 .global floorf
 .type floorf,@function
 floorf:
         flds 4(%esp)
         jmp 1f
 
 .global floorl
 .type floorl,@function
 floorl:
         fldt 4(%esp)
         jmp 1f
 
 .global floor
 .type floor,@function
 floor:
         fldl 4(%esp)
+1:      fld %st(0)
+        frndint
+        fxch %st(1)
+        fucomip %st(1),%st(0)
+        fld1
+        fldz
+        fcmovb %st(1),%st(0)
+        fsubp %st(0),%st(2)
+        fstp %st(0)
+        ret
-1:      mov $0x7,%al
-1:      fstcw 4(%esp)
-        mov 5(%esp),%ah
-        mov %al,5(%esp)
-        fldcw 4(%esp)
-        frndint
-        mov %ah,5(%esp)
-        fldcw 4(%esp)
-        ret
-
-.global ceil
-.type ceil,@function
-ceil:
-        fldl 4(%esp)
-        mov $0xb,%al
-        jmp 1b
-
-.global ceilf
-.type ceilf,@function
-ceilf:
-        flds 4(%esp)
-        mov $0xb,%al
-        jmp 1b
-
-.global ceill
-.type ceill,@function
-ceill:
-        fldt 4(%esp)
-        mov $0xb,%al
-        jmp 1b
-
-.global trunc
-.type trunc,@function
-trunc:
-        fldl 4(%esp)
-        mov $0xf,%al
-        jmp 1b
-
-.global truncf
-.type truncf,@function
-truncf:
-        flds 4(%esp)
-        mov $0xf,%al
-        jmp 1b
-
-.global truncl
-.type truncl,@function
-truncl:
-        fldt 4(%esp)
-        mov $0xf,%al
-        jmp 1b

--- -/src/math/i386/ceilf.s
+++ +/src/math/i386/ceilf.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see ceil.s

--- -/src/math/i386/ceill.s
+++ +/src/math/i386/ceill.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see ceil.s

--- -/src/math/i386/ceil.s
+++ +/src/math/i386/ceil.s
@@ -1,1 +1,26 @@
-# see floor.s
+.global ceilf
+.type ceilf,@function
+ceilf:
+        flds 4(%esp)
+        jmp 1f
+
+.global ceill
+.type ceill,@function
+ceill:
+        fldt 4(%esp)
+        jmp 1f
+
+.global ceil
+.type ceil,@function
+ceil:
+        fldl 4(%esp)
+1:      fld %st(0)
+        frndint
+        fxch %st(1)
+        fucomip %st(1),%st(0)
+        fld1
+        fldz
+        fcmovnbe %st(1),%st(0)
+        faddp %st(0),%st(1)
+        fstp %st(0)
+        ret

--- -/src/math/i386/truncf.s
+++ +/src/math/i386/truncf.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see trunc.s

--- -/src/math/i386/truncl.s
+++ +/src/math/i386/truncl.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see trunc.s

--- -/src/math/i386/trunc.s
+++ +/src/math/i386/trunc.s
@@ -1,1 +1,32 @@
-# see floor.s
+.global truncf
+.type truncf,@function
+truncf:
+        flds 4(%esp)
+        jmp 1f
+
+.global truncl
+.type truncl,@function
+truncl:
+        fldt 4(%esp)
+        jmp 1f
+
+.global trunc
+.type trunc,@function
+trunc:
+        fldl 4(%esp)
+1:      fld %st(0)
+        fabs
+        fld %st(0)
+        frndint
+        fxch %st(1)
+        fucomip %st(1),%st(0)
+        fldz
+        fld1
+        fcmovnb %st(1),%st(0)
+        fsubp %st(0),%st(2)
+        fucomip %st(2),%st(0)
+        fst %st(1)
+        fchs
+        fcmovbe %st(1),%st(0)
+        fstp %st(1)
+        ret



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-12-11 10:28 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-10 16:57 More patches for math subtree Stefan Kanthak
2019-12-10 19:35 ` Rich Felker
2019-12-10 21:32   ` Stefan Kanthak
2019-12-10 22:17     ` Rich Felker
2019-12-11  1:13       ` Rosen Penev
2019-12-11  9:53       ` Stefan Kanthak
2019-12-11 10:28         ` Szabolcs Nagy

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).