mailing list of musl libc
 help / color / mirror / code / Atom feed
* [musl] [Patch] src/math/i386/remquo.s: remove conditional branch, shorter bit twiddling
@ 2021-08-01 15:59 Stefan Kanthak
  2021-08-03 20:27 ` Szabolcs Nagy
  0 siblings, 1 reply; 9+ messages in thread
From: Stefan Kanthak @ 2021-08-01 15:59 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 1624 bytes --]

<https://git.musl-libc.org/cgit/musl/plain/src/math/i386/remquo.s>

Halve the number of instructions (from 12 to 6) to fetch the
(3-bit partial) quotient from the FPU flags C0:C3:C1, and
perform its negation without conditional branch.

--- -/math/i386/remquo.s
+++ +/math/i386/remquo.s
@@ -2,49 +2,44 @@
 .type remquof,@function
 remquof:
        mov 12(%esp),%ecx
+       mov 8(%esp),%eax
+       xor 4(%esp),%eax
        flds 8(%esp)
        flds 4(%esp)
-       mov 11(%esp),%dh
-       xor 7(%esp),%dh
-       jmp 1f
+       jmp 0f

 .global remquol
 .type remquol,@function
 remquol:
        mov 28(%esp),%ecx
+       mov 24(%esp),%eax
+       xor 12(%esp),%eax
+       cwtl
        fldt 16(%esp)
        fldt 4(%esp)
-       mov 25(%esp),%dh
-       xor 13(%esp),%dh
-       jmp 1f
+       jmp 0f

 .global remquo
 .type remquo,@function
 remquo:
        mov 20(%esp),%ecx
+       mov 16(%esp),%eax
+       xor 8(%esp),%eax
        fldl 12(%esp)
        fldl 4(%esp)
-       mov 19(%esp),%dh
-       xor 11(%esp),%dh
+0:     cltd
 1:     fprem1
        fnstsw %ax
        sahf
        jp 1b
        fstp %st(1)
-       mov %ah,%dl
-       shr %dl
-       and $1,%dl
-       mov %ah,%al
-       shr $5,%al
-       and $2,%al
-       or %al,%dl
-       mov %ah,%al
-       shl $2,%al
-       and $4,%al
-       or %al,%dl
-       test %dh,%dh
-       jns 1f
-       neg %dl
-1:     movsbl %dl,%edx
-       mov %edx,(%ecx)
+       adc %al,%al
+       shl $2,%ah
+       adc %al,%al
+       shl $5,%ah
+       adc %al,%al
+       and $7,%eax
+       xor %edx,%eax
+       sub %edx,%eax
+       mov %eax,(%ecx)
        ret

[-- Attachment #2: remquo.patch --]
[-- Type: application/octet-stream, Size: 1097 bytes --]

--- -remquo.s
+++ +remquo.s
@@ -2,49 +2,44 @@
 .type remquof,@function
 remquof:
 	mov 12(%esp),%ecx
+	mov 8(%esp),%eax
+	xor 4(%esp),%eax
 	flds 8(%esp)
 	flds 4(%esp)
-	mov 11(%esp),%dh
-	xor 7(%esp),%dh
-	jmp 1f
+	jmp 0f
 
 .global remquol
 .type remquol,@function
 remquol:
 	mov 28(%esp),%ecx
+	mov 24(%esp),%eax
+	xor 12(%esp),%eax
+	cwtl
 	fldt 16(%esp)
 	fldt 4(%esp)
-	mov 25(%esp),%dh
-	xor 13(%esp),%dh
-	jmp 1f
+	jmp 0f
 
 .global remquo
 .type remquo,@function
 remquo:
 	mov 20(%esp),%ecx
+	mov 16(%esp),%eax
+	xor 8(%esp),%eax
 	fldl 12(%esp)
 	fldl 4(%esp)
-	mov 19(%esp),%dh
-	xor 11(%esp),%dh
+0:	cltd
 1:	fprem1
 	fnstsw %ax
 	sahf
 	jp 1b
 	fstp %st(1)
-	mov %ah,%dl
-	shr %dl
-	and $1,%dl
-	mov %ah,%al
-	shr $5,%al
-	and $2,%al
-	or %al,%dl
-	mov %ah,%al
-	shl $2,%al
-	and $4,%al
-	or %al,%dl
-	test %dh,%dh
-	jns 1f
-	neg %dl
-1:	movsbl %dl,%edx
-	mov %edx,(%ecx)
+	adc %al,%al
+	shl $2,%ah
+	adc %al,%al
+	shl $5,%ah
+	adc %al,%al
+	and $7,%eax
+	xor %edx,%eax
+	sub %edx,%eax
+	mov %eax,(%ecx)
 	ret

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-08-07 13:26 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-01 15:59 [musl] [Patch] src/math/i386/remquo.s: remove conditional branch, shorter bit twiddling Stefan Kanthak
2021-08-03 20:27 ` Szabolcs Nagy
2021-08-04 10:02   ` Stefan Kanthak
2021-08-05 13:40     ` Alexander Monakov
2021-08-06 10:17       ` Stefan Kanthak
2021-08-06 14:27         ` Rich Felker
2021-08-06 17:23           ` Stefan Kanthak
2021-08-07  0:55             ` Rich Felker
2021-08-07 13:12               ` Stefan Kanthak

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).