From mboxrd@z Thu Jan 1 00:00:00 1970 X-Msuck: nntp://news.gmane.org/gmane.linux.lib.musl.general/14671 Path: news.gmane.org!.POSTED.blaine.gmane.org!not-for-mail From: Andre McCurdy Newsgroups: gmane.linux.lib.musl.general Subject: [PATCH 2/2] Add big-endian support to ARM assembler memcpy Date: Fri, 13 Sep 2019 11:44:32 -0700 Message-ID: <20190913184432.29753-2-armccurdy@gmail.com> References: <20190913184432.29753-1-armccurdy@gmail.com> Reply-To: musl@lists.openwall.com Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Injection-Info: blaine.gmane.org; posting-host="blaine.gmane.org:195.159.176.226"; logging-data="245278"; mail-complaints-to="usenet@blaine.gmane.org" Cc: Andre McCurdy To: musl@lists.openwall.com Original-X-From: musl-return-14687-gllmg-musl=m.gmane.org@lists.openwall.com Fri Sep 13 20:45:00 2019 Return-path: Envelope-to: gllmg-musl@m.gmane.org Original-Received: from mother.openwall.net ([195.42.179.200]) by blaine.gmane.org with smtp (Exim 4.89) (envelope-from ) id 1i8qZ2-0011hR-8n for gllmg-musl@m.gmane.org; Fri, 13 Sep 2019 20:45:00 +0200 Original-Received: (qmail 31825 invoked by uid 550); 13 Sep 2019 18:44:56 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Original-Received: (qmail 30656 invoked from network); 13 Sep 2019 18:44:55 -0000 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=n2MGQS48F/TnN2WsCCvGlHmsFNhSW8kGT8AgCkp9940=; b=OVHCMUVffBWVUSiz8+pBfGNZiTe+kPaKZMVDkF4LMJLulK1zE6g4Y6wSKZPMcz8Suv C/597oq1hc8vWdKH4MzuXrx1S5T2GrTdla8kfbMGCpz/nHh3UlgH3vg3mVDdOWqXsP6S O0+pFZ0CVLYyu/5kcBcHNsnON0WW9LQPh8HkJ6wXE9Xp7BC52B1jP9TtEBKI4wp5ekLS 7va62pK8PGItQtopl/ugieGYWR7QivIj88I51Qt9l5AZWyEn8sdrvvcxdzEfYe6vwdis XYOSKuhxTa1qNnKvF9p/Aqsip4/t/0O/Khvv6zRnkcfcHF+0FhYXT+wjdjHt0TCRNDye e7iw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=n2MGQS48F/TnN2WsCCvGlHmsFNhSW8kGT8AgCkp9940=; b=N/aXwa6m+/K4aStqnere4SSfSMRz7WBJ/vXqcKXQv9pRoXdYzGf6RjALzbDZol4zIA sX9N3BrSbsTWk4lPJt2/GLfwvLumoBAjWaIdGFqu77oAbHkyY5Cg01ykevODmlbA2Pwa bgFQDLvXRNYvfypxdIFHUJkDqTfdJ1uJtERRCeZcn0w9j9PWbkAfiGMoiGOR6bLaG71B POYE4haFVAWkURNKU0ny7zUv8FhxUhIC/VLQxBrr2PZynUZ4aOgzdtLqPvSw3lacePBa BPBoA7j42pwIgGGdXnY9mlC4DwFYU/8rfckm10CfLDDv/cSBftkOHp0DloKly5jLgWPJ 9EvA== X-Gm-Message-State: APjAAAVQHjeMJYUFF8bsFhKfe1QjDeqTKp8G+bD9wzt4EFWV68FGT5RZ nTGR/h81Id0/V0CQyYm0S3TLt5o5 X-Google-Smtp-Source: APXvYqx4W6mlpa4S6oEKE2rjp/CTGlfKthndId7jeid94sp8lS4RAGR33vXVDvMbEq+innx5d4Mckg== X-Received: by 2002:a17:90a:28c5:: with SMTP id f63mr6840641pjd.67.1568400283650; Fri, 13 Sep 2019 11:44:43 -0700 (PDT) X-Mailer: git-send-email 2.23.0 In-Reply-To: <20190913184432.29753-1-armccurdy@gmail.com> Xref: news.gmane.org gmane.linux.lib.musl.general:14671 Archived-At: Allow the existing ARM assembler memcpy implementation to be used for both big and little endian targets. --- COPYRIGHT | 2 +- src/string/arm/{memcpy_le.S => memcpy.S} | 101 ++++++++++++++++++++++- src/string/arm/memcpy.c | 3 - 3 files changed, 98 insertions(+), 8 deletions(-) rename src/string/arm/{memcpy_le.S => memcpy.S} (83%) delete mode 100644 src/string/arm/memcpy.c diff --git a/COPYRIGHT b/COPYRIGHT index 2525ffb5..96c2b070 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -126,7 +126,7 @@ Copyright © 2008 Stephen L. Moshier and labelled as such in comments in the individual source files. All have been licensed under extremely permissive terms. -The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008 +The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008 The Android Open Source Project and is licensed under a two-clause BSD license. It was taken from Bionic libc, used on Android. diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy.S similarity index 83% rename from src/string/arm/memcpy_le.S rename to src/string/arm/memcpy.S index 64bc5f9e..766a88a5 100644 --- a/src/string/arm/memcpy_le.S +++ b/src/string/arm/memcpy.S @@ -1,5 +1,3 @@ -#if !__ARMEB__ - /* * Copyright (C) 2008 The Android Open Source Project * All rights reserved. @@ -42,7 +40,7 @@ * code safely callable from thumb mode, adjusting the return * instructions to be compatible with pre-thumb ARM cpus, removal of * prefetch code that is not compatible with older cpus and support for - * building as thumb 2. + * building as thumb 2 and big-endian. */ .syntax unified @@ -227,24 +225,45 @@ non_congruent: * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) */ movs r5, r5, lsl #31 + +#if __ARMEB__ + movmi r3, r3, ror #24 + strbmi r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 +#else strbmi r3, [r0], #1 movmi r3, r3, lsr #8 strbcs r3, [r0], #1 movcs r3, r3, lsr #8 strbcs r3, [r0], #1 movcs r3, r3, lsr #8 +#endif cmp r2, #4 blo partial_word_tail +#if __ARMEB__ + mov r3, r3, lsr r12 + mov r3, r3, lsl r12 +#endif + /* Align destination to 32 bytes (cache line boundary) */ 1: tst r0, #0x1c beq 2f ldr r5, [r1], #4 sub r2, r2, #4 +#if __ARMEB__ + mov r4, r5, lsr lr + orr r4, r4, r3 + mov r3, r5, lsl r12 +#else mov r4, r5, lsl lr orr r4, r4, r3 mov r3, r5, lsr r12 +#endif str r4, [r0], #4 cmp r2, #4 bhs 1b @@ -270,6 +289,25 @@ loop16: ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} subs r2, r2, #32 ldrhs r12, [r1], #4 +#if __ARMEB__ + orr r3, r3, r4, lsr #16 + mov r4, r4, lsl #16 + orr r4, r4, r5, lsr #16 + mov r5, r5, lsl #16 + orr r5, r5, r6, lsr #16 + mov r6, r6, lsl #16 + orr r6, r6, r7, lsr #16 + mov r7, r7, lsl #16 + orr r7, r7, r8, lsr #16 + mov r8, r8, lsl #16 + orr r8, r8, r9, lsr #16 + mov r9, r9, lsl #16 + orr r9, r9, r10, lsr #16 + mov r10, r10, lsl #16 + orr r10, r10, r11, lsr #16 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsl #16 +#else orr r3, r3, r4, lsl #16 mov r4, r4, lsr #16 orr r4, r4, r5, lsl #16 @@ -287,6 +325,7 @@ loop16: orr r10, r10, r11, lsl #16 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #16 +#endif bhs 1b b less_than_thirtytwo @@ -296,6 +335,25 @@ loop8: ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} subs r2, r2, #32 ldrhs r12, [r1], #4 +#if __ARMEB__ + orr r3, r3, r4, lsr #24 + mov r4, r4, lsl #8 + orr r4, r4, r5, lsr #24 + mov r5, r5, lsl #8 + orr r5, r5, r6, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r7, lsr #24 + mov r7, r7, lsl #8 + orr r7, r7, r8, lsr #24 + mov r8, r8, lsl #8 + orr r8, r8, r9, lsr #24 + mov r9, r9, lsl #8 + orr r9, r9, r10, lsr #24 + mov r10, r10, lsl #8 + orr r10, r10, r11, lsr #24 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsl #8 +#else orr r3, r3, r4, lsl #24 mov r4, r4, lsr #8 orr r4, r4, r5, lsl #24 @@ -313,6 +371,7 @@ loop8: orr r10, r10, r11, lsl #24 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #8 +#endif bhs 1b b less_than_thirtytwo @@ -322,6 +381,25 @@ loop24: ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} subs r2, r2, #32 ldrhs r12, [r1], #4 +#if __ARMEB__ + orr r3, r3, r4, lsr #8 + mov r4, r4, lsl #24 + orr r4, r4, r5, lsr #8 + mov r5, r5, lsl #24 + orr r5, r5, r6, lsr #8 + mov r6, r6, lsl #24 + orr r6, r6, r7, lsr #8 + mov r7, r7, lsl #24 + orr r7, r7, r8, lsr #8 + mov r8, r8, lsl #24 + orr r8, r8, r9, lsr #8 + mov r9, r9, lsl #24 + orr r9, r9, r10, lsr #8 + mov r10, r10, lsl #24 + orr r10, r10, r11, lsr #8 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsl #24 +#else orr r3, r3, r4, lsl #8 mov r4, r4, lsr #24 orr r4, r4, r5, lsl #8 @@ -339,6 +417,7 @@ loop24: orr r10, r10, r11, lsl #8 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #24 +#endif bhs 1b less_than_thirtytwo: @@ -350,9 +429,15 @@ less_than_thirtytwo: 1: ldr r5, [r1], #4 sub r2, r2, #4 +#if __ARMEB__ + mov r4, r5, lsr lr + orr r4, r4, r3 + mov r3, r5, lsl r12 +#else mov r4, r5, lsl lr orr r4, r4, r3 mov r3, r5, lsr r12 +#endif str r4, [r0], #4 cmp r2, #4 bhs 1b @@ -360,11 +445,20 @@ less_than_thirtytwo: partial_word_tail: /* we have a partial word in the input buffer */ movs r5, lr, lsl #(31-3) +#if __ARMEB__ + movmi r3, r3, ror #24 + strbmi r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 +#else strbmi r3, [r0], #1 movmi r3, r3, lsr #8 strbcs r3, [r0], #1 movcs r3, r3, lsr #8 strbcs r3, [r0], #1 +#endif /* Refill spilled registers from the stack. Don't update sp. */ ldmfd sp, {r5-r11} @@ -383,4 +477,3 @@ copy_last_3_and_return: ldmfd sp!, {r0, r4, lr} bx lr -#endif diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c deleted file mode 100644 index 041614f4..00000000 --- a/src/string/arm/memcpy.c +++ /dev/null @@ -1,3 +0,0 @@ -#if __ARMEB__ -#include "../memcpy.c" -#endif -- 2.23.0