From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=-3.3 required=5.0 tests=MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED,RCVD_IN_MSPIKE_H3,RCVD_IN_MSPIKE_WL autolearn=ham autolearn_force=no version=3.4.4 Received: (qmail 13878 invoked from network); 12 Aug 2020 11:34:47 -0000 Received: from mother.openwall.net (195.42.179.200) by inbox.vuxu.org with ESMTPUTF8; 12 Aug 2020 11:34:47 -0000 Received: (qmail 7981 invoked by uid 550); 12 Aug 2020 11:34:42 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Reply-To: musl@lists.openwall.com Received: (qmail 7950 invoked from network); 12 Aug 2020 11:34:42 -0000 From: Alexander Monakov To: musl@lists.openwall.com Date: Wed, 12 Aug 2020 14:34:30 +0300 Message-Id: <20200812113430.9254-1-amonakov@ispras.ru> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20200811231008.8896-1-amonakov@ispras.ru> References: <20200811231008.8896-1-amonakov@ispras.ru> Subject: [musl] [PATCH 4/3] setjmp: optimize longjmp prologues Use a branchless sequence that is one byte shorter on 64-bit, same size on 32-bit. Thanks to Pete Cawley for suggesting this variant. --- I'm sending a revised variant after Pete Cawley (@corsix) suggested a preferable variant on Twitter. A similar cmp-adc combo can be used to replace the branchy sequence in i386 longjmp code. src/setjmp/i386/longjmp.s | 6 ++---- src/setjmp/x32/longjmp.s | 8 +++----- src/setjmp/x86_64/longjmp.s | 8 +++----- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/setjmp/i386/longjmp.s b/src/setjmp/i386/longjmp.s index b429f135..8188f06b 100644 --- a/src/setjmp/i386/longjmp.s +++ b/src/setjmp/i386/longjmp.s @@ -6,10 +6,8 @@ _longjmp: longjmp: mov 4(%esp),%edx mov 8(%esp),%eax - test %eax,%eax - jnz 1f - inc %eax -1: + cmp $1,%eax + adc $0, %al mov (%edx),%ebx mov 4(%edx),%esi mov 8(%edx),%edi diff --git a/src/setjmp/x32/longjmp.s b/src/setjmp/x32/longjmp.s index bb88afa1..1b2661c3 100644 --- a/src/setjmp/x32/longjmp.s +++ b/src/setjmp/x32/longjmp.s @@ -5,11 +5,9 @@ .type longjmp,@function _longjmp: longjmp: - mov %esi,%eax /* val will be longjmp return */ - test %esi,%esi - jnz 1f - inc %eax /* if val==0, val=1 per longjmp semantics */ -1: + xor %eax,%eax + cmp $1,%esi /* CF = val ? 0 : 1 */ + adc %esi,%eax /* eax = val + !val */ mov (%rdi),%rbx /* rdi is the jmp_buf, restore regs from it */ mov 8(%rdi),%rbp mov 16(%rdi),%r12 diff --git a/src/setjmp/x86_64/longjmp.s b/src/setjmp/x86_64/longjmp.s index bb88afa1..1b2661c3 100644 --- a/src/setjmp/x86_64/longjmp.s +++ b/src/setjmp/x86_64/longjmp.s @@ -5,11 +5,9 @@ .type longjmp,@function _longjmp: longjmp: - mov %esi,%eax /* val will be longjmp return */ - test %esi,%esi - jnz 1f - inc %eax /* if val==0, val=1 per longjmp semantics */ -1: + xor %eax,%eax + cmp $1,%esi /* CF = val ? 0 : 1 */ + adc %esi,%eax /* eax = val + !val */ mov (%rdi),%rbx /* rdi is the jmp_buf, restore regs from it */ mov 8(%rdi),%rbp mov 16(%rdi),%r12 -- 2.11.0