mailing list of musl libc
 help / color / mirror / code / Atom feed
From: "Stefan Kanthak" <stefan.kanthak@nexgo.de>
To: <musl@lists.openwall.com>
Subject: More patches for math subtree
Date: Tue, 10 Dec 2019 17:57:55 +0100	[thread overview]
Message-ID: <2C3325A208DA4260A1A0F7B4517D6DFA@H270> (raw)

Some more optimisations: the current implementations of ceil(), floor()
and trunc() for i386 change the rounding control using fldcw instructions,
which are SLOW; these patches provide faster and smaller branch-free (!)
implementations.

JFTR: I'm NOT subscribed to your mailing list, so CC: me in replies!

--- -/src/math/i386/floor.s
+++ +/src/math/i386/floor.s
@@ -1,67 +1,26 @@
 .global floorf
 .type floorf,@function
 floorf:
         flds 4(%esp)
         jmp 1f
 
 .global floorl
 .type floorl,@function
 floorl:
         fldt 4(%esp)
         jmp 1f
 
 .global floor
 .type floor,@function
 floor:
         fldl 4(%esp)
+1:      fld %st(0)
+        frndint
+        fxch %st(1)
+        fucomip %st(1),%st(0)
+        fld1
+        fldz
+        fcmovb %st(1),%st(0)
+        fsubp %st(0),%st(2)
+        fstp %st(0)
+        ret
-1:      mov $0x7,%al
-1:      fstcw 4(%esp)
-        mov 5(%esp),%ah
-        mov %al,5(%esp)
-        fldcw 4(%esp)
-        frndint
-        mov %ah,5(%esp)
-        fldcw 4(%esp)
-        ret
-
-.global ceil
-.type ceil,@function
-ceil:
-        fldl 4(%esp)
-        mov $0xb,%al
-        jmp 1b
-
-.global ceilf
-.type ceilf,@function
-ceilf:
-        flds 4(%esp)
-        mov $0xb,%al
-        jmp 1b
-
-.global ceill
-.type ceill,@function
-ceill:
-        fldt 4(%esp)
-        mov $0xb,%al
-        jmp 1b
-
-.global trunc
-.type trunc,@function
-trunc:
-        fldl 4(%esp)
-        mov $0xf,%al
-        jmp 1b
-
-.global truncf
-.type truncf,@function
-truncf:
-        flds 4(%esp)
-        mov $0xf,%al
-        jmp 1b
-
-.global truncl
-.type truncl,@function
-truncl:
-        fldt 4(%esp)
-        mov $0xf,%al
-        jmp 1b

--- -/src/math/i386/ceilf.s
+++ +/src/math/i386/ceilf.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see ceil.s

--- -/src/math/i386/ceill.s
+++ +/src/math/i386/ceill.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see ceil.s

--- -/src/math/i386/ceil.s
+++ +/src/math/i386/ceil.s
@@ -1,1 +1,26 @@
-# see floor.s
+.global ceilf
+.type ceilf,@function
+ceilf:
+        flds 4(%esp)
+        jmp 1f
+
+.global ceill
+.type ceill,@function
+ceill:
+        fldt 4(%esp)
+        jmp 1f
+
+.global ceil
+.type ceil,@function
+ceil:
+        fldl 4(%esp)
+1:      fld %st(0)
+        frndint
+        fxch %st(1)
+        fucomip %st(1),%st(0)
+        fld1
+        fldz
+        fcmovnbe %st(1),%st(0)
+        faddp %st(0),%st(1)
+        fstp %st(0)
+        ret

--- -/src/math/i386/truncf.s
+++ +/src/math/i386/truncf.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see trunc.s

--- -/src/math/i386/truncl.s
+++ +/src/math/i386/truncl.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see trunc.s

--- -/src/math/i386/trunc.s
+++ +/src/math/i386/trunc.s
@@ -1,1 +1,32 @@
-# see floor.s
+.global truncf
+.type truncf,@function
+truncf:
+        flds 4(%esp)
+        jmp 1f
+
+.global truncl
+.type truncl,@function
+truncl:
+        fldt 4(%esp)
+        jmp 1f
+
+.global trunc
+.type trunc,@function
+trunc:
+        fldl 4(%esp)
+1:      fld %st(0)
+        fabs
+        fld %st(0)
+        frndint
+        fxch %st(1)
+        fucomip %st(1),%st(0)
+        fldz
+        fld1
+        fcmovnb %st(1),%st(0)
+        fsubp %st(0),%st(2)
+        fucomip %st(2),%st(0)
+        fst %st(1)
+        fchs
+        fcmovbe %st(1),%st(0)
+        fstp %st(1)
+        ret



             reply	other threads:[~2019-12-10 16:57 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-10 16:57 Stefan Kanthak [this message]
2019-12-10 19:35 ` Rich Felker
2019-12-10 21:32   ` Stefan Kanthak
2019-12-10 22:17     ` Rich Felker
2019-12-11  1:13       ` Rosen Penev
2019-12-11  9:53       ` Stefan Kanthak
2019-12-11 10:28         ` Szabolcs Nagy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2C3325A208DA4260A1A0F7B4517D6DFA@H270 \
    --to=stefan.kanthak@nexgo.de \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).