From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=-2.6 required=5.0 tests=DKIM_ADSP_CUSTOM_MED, DKIM_INVALID,DKIM_SIGNED,FREEMAIL_FORGED_FROMDOMAIN,FREEMAIL_FROM, HEADER_FROM_DIFFERENT_DOMAINS,MAILING_LIST_MULTI,RCVD_IN_DNSWL_MED, RCVD_IN_MSPIKE_H3,RCVD_IN_MSPIKE_WL autolearn=ham autolearn_force=no version=3.4.2 Received: from mother.openwall.net (mother.openwall.net [195.42.179.200]) by inbox.vuxu.org (OpenSMTPD) with SMTP id 29b33dc3 for ; Tue, 21 Jan 2020 18:52:38 +0000 (UTC) Received: (qmail 27906 invoked by uid 550); 21 Jan 2020 18:52:37 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Reply-To: musl@lists.openwall.com Received: (qmail 27868 invoked from network); 21 Jan 2020 18:52:36 -0000 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:mime-version :content-transfer-encoding; bh=r+5YPqRXN3QpZnrmrEoIylDzCcmy+r52PztpenFTr9w=; b=kJFri4wu0xFpN3uZVodESGAOMHE7MZ92Hz6ToT9PBgy2pHBmWx5CWyTPZFNh9S0Lma C8zTQb/7NRjD6xjoO87DYXSx5TyXPsEUgNZ3WQgjFew0Ce+rHPjGMzay/FBBKvJh0K0D QYfzDjP7HBU0ETTy9Fm727PH1EMavNp4gTy2gPHIcdeikf45zg+OLTuhP+CrE7QKeCyo OXiQvn7DnF+KS6H7n6S/BZ8XUEVdDveVnoXjO+c8RWR3WFHhwLZx82/Tg0d8BPY5vcuY 8QUso+lxat2su2+rZQ2BiMTITdpCbHVVmZawoqyjsoo9MR68mVO321CaeKQ/MwHGLtES zmBg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:mime-version :content-transfer-encoding; bh=r+5YPqRXN3QpZnrmrEoIylDzCcmy+r52PztpenFTr9w=; b=HYUKkHUw6l2jKttJKMWs14to3mmT0SnGRyq+NVxnxESthBR0SVCIHPKYtw3nkWW0+r tIxQMzCp43ryN1rr+par95O1eKdt8g4kZ2ERtZbNpb/t3HwIgZu+BeJRd5lHByPp1z9S XL0EuaMrwAakmkFTPOGop/t3ZXvwtJpZypK5HpOsj4HAh2qOBqIncdBaskB/OFVfqfAE aDCA8rGb/wCdbPldQrjbBzQMZ+Xkkxv2cnaaOuoQ+BJvLaYqJMfUDJzYFM1C27zQUUse Ewaydh6Qc4El+xmoL/xc/dNunjeBwAtXzcT6Sln9U0DSuMPDBhJn/ltJzBmEUa+C903Q ayrg== X-Gm-Message-State: APjAAAWkzZB4VLBoMsMH6NwDGJhtqbC4wNUjn6RfgO1znjI3vyMzrKkK nylYsVXsS4F2FZX6E56wi5sjoqxX X-Google-Smtp-Source: APXvYqwjwxS02DVGEk3IheAtkFdvHOqKac6S9b/yPjTqx53lg+It1ofKDVP/3wEIASGyhkovZmRdjQ== X-Received: by 2002:a17:90b:142:: with SMTP id em2mr7073914pjb.4.1579632743798; Tue, 21 Jan 2020 10:52:23 -0800 (PST) From: Andre McCurdy To: musl@lists.openwall.com Cc: Andre McCurdy Date: Tue, 21 Jan 2020 10:52:15 -0800 Message-Id: <20200121185215.5958-1-armccurdy@gmail.com> X-Mailer: git-send-email 2.24.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Subject: [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy Allow the existing ARM assembler memcpy implementation to be used for both big and little endian targets. --- Exactly the same changes as before but rebased to account for whitespace changes in the preceding patch to add Thumb2 support. COPYRIGHT | 2 +- src/string/arm/{memcpy_le.S => memcpy.S} | 101 ++++++++++++++++++++++- src/string/arm/memcpy.c | 3 - 3 files changed, 98 insertions(+), 8 deletions(-) rename src/string/arm/{memcpy_le.S => memcpy.S} (82%) delete mode 100644 src/string/arm/memcpy.c diff --git a/COPYRIGHT b/COPYRIGHT index e6472371..d3edc2a2 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -127,7 +127,7 @@ Copyright © 2017-2018 Arm Limited and labelled as such in comments in the individual source files. All have been licensed under extremely permissive terms. -The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008 +The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008 The Android Open Source Project and is licensed under a two-clause BSD license. It was taken from Bionic libc, used on Android. diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy.S similarity index 82% rename from src/string/arm/memcpy_le.S rename to src/string/arm/memcpy.S index 7b35d305..869e3448 100644 --- a/src/string/arm/memcpy_le.S +++ b/src/string/arm/memcpy.S @@ -1,5 +1,3 @@ -#if !__ARMEB__ - /* * Copyright (C) 2008 The Android Open Source Project * All rights reserved. @@ -42,7 +40,7 @@ * code safely callable from thumb mode, adjusting the return * instructions to be compatible with pre-thumb ARM cpus, removal of * prefetch code that is not compatible with older cpus and support for - * building as thumb 2. + * building as thumb 2 and big-endian. */ .syntax unified @@ -227,24 +225,45 @@ non_congruent: * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) */ movs r5, r5, lsl #31 + +#if __ARMEB__ + movmi r3, r3, ror #24 + strbmi r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 +#else strbmi r3, [r0], #1 movmi r3, r3, lsr #8 strbcs r3, [r0], #1 movcs r3, r3, lsr #8 strbcs r3, [r0], #1 movcs r3, r3, lsr #8 +#endif cmp r2, #4 blo partial_word_tail +#if __ARMEB__ + mov r3, r3, lsr r12 + mov r3, r3, lsl r12 +#endif + /* Align destination to 32 bytes (cache line boundary) */ 1: tst r0, #0x1c beq 2f ldr r5, [r1], #4 sub r2, r2, #4 +#if __ARMEB__ + mov r4, r5, lsr lr + orr r4, r4, r3 + mov r3, r5, lsl r12 +#else mov r4, r5, lsl lr orr r4, r4, r3 mov r3, r5, lsr r12 +#endif str r4, [r0], #4 cmp r2, #4 bhs 1b @@ -270,6 +289,25 @@ loop16: ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} subs r2, r2, #32 ldrhs r12, [r1], #4 +#if __ARMEB__ + orr r3, r3, r4, lsr #16 + mov r4, r4, lsl #16 + orr r4, r4, r5, lsr #16 + mov r5, r5, lsl #16 + orr r5, r5, r6, lsr #16 + mov r6, r6, lsl #16 + orr r6, r6, r7, lsr #16 + mov r7, r7, lsl #16 + orr r7, r7, r8, lsr #16 + mov r8, r8, lsl #16 + orr r8, r8, r9, lsr #16 + mov r9, r9, lsl #16 + orr r9, r9, r10, lsr #16 + mov r10, r10, lsl #16 + orr r10, r10, r11, lsr #16 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsl #16 +#else orr r3, r3, r4, lsl #16 mov r4, r4, lsr #16 orr r4, r4, r5, lsl #16 @@ -287,6 +325,7 @@ loop16: orr r10, r10, r11, lsl #16 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #16 +#endif bhs 1b b less_than_thirtytwo @@ -296,6 +335,25 @@ loop8: ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} subs r2, r2, #32 ldrhs r12, [r1], #4 +#if __ARMEB__ + orr r3, r3, r4, lsr #24 + mov r4, r4, lsl #8 + orr r4, r4, r5, lsr #24 + mov r5, r5, lsl #8 + orr r5, r5, r6, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r7, lsr #24 + mov r7, r7, lsl #8 + orr r7, r7, r8, lsr #24 + mov r8, r8, lsl #8 + orr r8, r8, r9, lsr #24 + mov r9, r9, lsl #8 + orr r9, r9, r10, lsr #24 + mov r10, r10, lsl #8 + orr r10, r10, r11, lsr #24 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsl #8 +#else orr r3, r3, r4, lsl #24 mov r4, r4, lsr #8 orr r4, r4, r5, lsl #24 @@ -313,6 +371,7 @@ loop8: orr r10, r10, r11, lsl #24 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #8 +#endif bhs 1b b less_than_thirtytwo @@ -322,6 +381,25 @@ loop24: ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} subs r2, r2, #32 ldrhs r12, [r1], #4 +#if __ARMEB__ + orr r3, r3, r4, lsr #8 + mov r4, r4, lsl #24 + orr r4, r4, r5, lsr #8 + mov r5, r5, lsl #24 + orr r5, r5, r6, lsr #8 + mov r6, r6, lsl #24 + orr r6, r6, r7, lsr #8 + mov r7, r7, lsl #24 + orr r7, r7, r8, lsr #8 + mov r8, r8, lsl #24 + orr r8, r8, r9, lsr #8 + mov r9, r9, lsl #24 + orr r9, r9, r10, lsr #8 + mov r10, r10, lsl #24 + orr r10, r10, r11, lsr #8 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsl #24 +#else orr r3, r3, r4, lsl #8 mov r4, r4, lsr #24 orr r4, r4, r5, lsl #8 @@ -339,6 +417,7 @@ loop24: orr r10, r10, r11, lsl #8 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #24 +#endif bhs 1b less_than_thirtytwo: @@ -350,9 +429,15 @@ less_than_thirtytwo: 1: ldr r5, [r1], #4 sub r2, r2, #4 +#if __ARMEB__ + mov r4, r5, lsr lr + orr r4, r4, r3 + mov r3, r5, lsl r12 +#else mov r4, r5, lsl lr orr r4, r4, r3 mov r3, r5, lsr r12 +#endif str r4, [r0], #4 cmp r2, #4 bhs 1b @@ -360,11 +445,20 @@ less_than_thirtytwo: partial_word_tail: /* we have a partial word in the input buffer */ movs r5, lr, lsl #(31-3) +#if __ARMEB__ + movmi r3, r3, ror #24 + strbmi r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 +#else strbmi r3, [r0], #1 movmi r3, r3, lsr #8 strbcs r3, [r0], #1 movcs r3, r3, lsr #8 strbcs r3, [r0], #1 +#endif /* Refill spilled registers from the stack. Don't update sp. */ ldmfd sp, {r5-r11} @@ -383,4 +477,3 @@ copy_last_3_and_return: ldmfd sp!, {r0, r4, lr} bx lr -#endif diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c deleted file mode 100644 index 041614f4..00000000 --- a/src/string/arm/memcpy.c +++ /dev/null @@ -1,3 +0,0 @@ -#if __ARMEB__ -#include "../memcpy.c" -#endif -- 2.24.0