From: Andre McCurdy <armccurdy@gmail.com>
To: musl@lists.openwall.com
Cc: Andre McCurdy <armccurdy@gmail.com>
Subject: [PATCH 2/2] Add big-endian support to ARM assembler memcpy
Date: Fri, 13 Sep 2019 11:44:32 -0700 [thread overview]
Message-ID: <20190913184432.29753-2-armccurdy@gmail.com> (raw)
In-Reply-To: <20190913184432.29753-1-armccurdy@gmail.com>
Allow the existing ARM assembler memcpy implementation to be used for
both big and little endian targets.
---
COPYRIGHT | 2 +-
src/string/arm/{memcpy_le.S => memcpy.S} | 101 ++++++++++++++++++++++-
src/string/arm/memcpy.c | 3 -
3 files changed, 98 insertions(+), 8 deletions(-)
rename src/string/arm/{memcpy_le.S => memcpy.S} (83%)
delete mode 100644 src/string/arm/memcpy.c
diff --git a/COPYRIGHT b/COPYRIGHT
index 2525ffb5..96c2b070 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -126,7 +126,7 @@ Copyright © 2008 Stephen L. Moshier
and labelled as such in comments in the individual source files. All
have been licensed under extremely permissive terms.
-The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008
+The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
The Android Open Source Project and is licensed under a two-clause BSD
license. It was taken from Bionic libc, used on Android.
diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy.S
similarity index 83%
rename from src/string/arm/memcpy_le.S
rename to src/string/arm/memcpy.S
index 64bc5f9e..766a88a5 100644
--- a/src/string/arm/memcpy_le.S
+++ b/src/string/arm/memcpy.S
@@ -1,5 +1,3 @@
-#if !__ARMEB__
-
/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
@@ -42,7 +40,7 @@
* code safely callable from thumb mode, adjusting the return
* instructions to be compatible with pre-thumb ARM cpus, removal of
* prefetch code that is not compatible with older cpus and support for
- * building as thumb 2.
+ * building as thumb 2 and big-endian.
*/
.syntax unified
@@ -227,24 +225,45 @@ non_congruent:
* becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
*/
movs r5, r5, lsl #31
+
+#if __ARMEB__
+ movmi r3, r3, ror #24
+ strbmi r3, [r0], #1
+ movcs r3, r3, ror #24
+ strbcs r3, [r0], #1
+ movcs r3, r3, ror #24
+ strbcs r3, [r0], #1
+#else
strbmi r3, [r0], #1
movmi r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
+#endif
cmp r2, #4
blo partial_word_tail
+#if __ARMEB__
+ mov r3, r3, lsr r12
+ mov r3, r3, lsl r12
+#endif
+
/* Align destination to 32 bytes (cache line boundary) */
1: tst r0, #0x1c
beq 2f
ldr r5, [r1], #4
sub r2, r2, #4
+#if __ARMEB__
+ mov r4, r5, lsr lr
+ orr r4, r4, r3
+ mov r3, r5, lsl r12
+#else
mov r4, r5, lsl lr
orr r4, r4, r3
mov r3, r5, lsr r12
+#endif
str r4, [r0], #4
cmp r2, #4
bhs 1b
@@ -270,6 +289,25 @@ loop16:
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
+#if __ARMEB__
+ orr r3, r3, r4, lsr #16
+ mov r4, r4, lsl #16
+ orr r4, r4, r5, lsr #16
+ mov r5, r5, lsl #16
+ orr r5, r5, r6, lsr #16
+ mov r6, r6, lsl #16
+ orr r6, r6, r7, lsr #16
+ mov r7, r7, lsl #16
+ orr r7, r7, r8, lsr #16
+ mov r8, r8, lsl #16
+ orr r8, r8, r9, lsr #16
+ mov r9, r9, lsl #16
+ orr r9, r9, r10, lsr #16
+ mov r10, r10, lsl #16
+ orr r10, r10, r11, lsr #16
+ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+ mov r3, r11, lsl #16
+#else
orr r3, r3, r4, lsl #16
mov r4, r4, lsr #16
orr r4, r4, r5, lsl #16
@@ -287,6 +325,7 @@ loop16:
orr r10, r10, r11, lsl #16
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #16
+#endif
bhs 1b
b less_than_thirtytwo
@@ -296,6 +335,25 @@ loop8:
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
+#if __ARMEB__
+ orr r3, r3, r4, lsr #24
+ mov r4, r4, lsl #8
+ orr r4, r4, r5, lsr #24
+ mov r5, r5, lsl #8
+ orr r5, r5, r6, lsr #24
+ mov r6, r6, lsl #8
+ orr r6, r6, r7, lsr #24
+ mov r7, r7, lsl #8
+ orr r7, r7, r8, lsr #24
+ mov r8, r8, lsl #8
+ orr r8, r8, r9, lsr #24
+ mov r9, r9, lsl #8
+ orr r9, r9, r10, lsr #24
+ mov r10, r10, lsl #8
+ orr r10, r10, r11, lsr #24
+ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+ mov r3, r11, lsl #8
+#else
orr r3, r3, r4, lsl #24
mov r4, r4, lsr #8
orr r4, r4, r5, lsl #24
@@ -313,6 +371,7 @@ loop8:
orr r10, r10, r11, lsl #24
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #8
+#endif
bhs 1b
b less_than_thirtytwo
@@ -322,6 +381,25 @@ loop24:
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
+#if __ARMEB__
+ orr r3, r3, r4, lsr #8
+ mov r4, r4, lsl #24
+ orr r4, r4, r5, lsr #8
+ mov r5, r5, lsl #24
+ orr r5, r5, r6, lsr #8
+ mov r6, r6, lsl #24
+ orr r6, r6, r7, lsr #8
+ mov r7, r7, lsl #24
+ orr r7, r7, r8, lsr #8
+ mov r8, r8, lsl #24
+ orr r8, r8, r9, lsr #8
+ mov r9, r9, lsl #24
+ orr r9, r9, r10, lsr #8
+ mov r10, r10, lsl #24
+ orr r10, r10, r11, lsr #8
+ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+ mov r3, r11, lsl #24
+#else
orr r3, r3, r4, lsl #8
mov r4, r4, lsr #24
orr r4, r4, r5, lsl #8
@@ -339,6 +417,7 @@ loop24:
orr r10, r10, r11, lsl #8
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #24
+#endif
bhs 1b
less_than_thirtytwo:
@@ -350,9 +429,15 @@ less_than_thirtytwo:
1: ldr r5, [r1], #4
sub r2, r2, #4
+#if __ARMEB__
+ mov r4, r5, lsr lr
+ orr r4, r4, r3
+ mov r3, r5, lsl r12
+#else
mov r4, r5, lsl lr
orr r4, r4, r3
mov r3, r5, lsr r12
+#endif
str r4, [r0], #4
cmp r2, #4
bhs 1b
@@ -360,11 +445,20 @@ less_than_thirtytwo:
partial_word_tail:
/* we have a partial word in the input buffer */
movs r5, lr, lsl #(31-3)
+#if __ARMEB__
+ movmi r3, r3, ror #24
+ strbmi r3, [r0], #1
+ movcs r3, r3, ror #24
+ strbcs r3, [r0], #1
+ movcs r3, r3, ror #24
+ strbcs r3, [r0], #1
+#else
strbmi r3, [r0], #1
movmi r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
strbcs r3, [r0], #1
+#endif
/* Refill spilled registers from the stack. Don't update sp. */
ldmfd sp, {r5-r11}
@@ -383,4 +477,3 @@ copy_last_3_and_return:
ldmfd sp!, {r0, r4, lr}
bx lr
-#endif
diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c
deleted file mode 100644
index 041614f4..00000000
--- a/src/string/arm/memcpy.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#if __ARMEB__
-#include "../memcpy.c"
-#endif
--
2.23.0
next prev parent reply other threads:[~2019-09-13 18:44 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-09-13 18:44 [PATCH 1/2] Add Thumb2 " Andre McCurdy
2019-09-13 18:44 ` Andre McCurdy [this message]
2019-09-13 18:59 ` [PATCH 2/2] Add big-endian " Rich Felker
2019-09-13 20:38 ` Andre McCurdy
2020-01-15 15:45 ` [musl] " Rich Felker
2020-01-15 18:41 ` Andre McCurdy
2020-01-15 19:22 ` Rich Felker
2020-01-15 20:54 ` Andre McCurdy
2020-01-16 15:21 ` Natanael Copa
2020-01-15 16:35 ` [musl] [PATCH 1/2] Add Thumb2 " Rich Felker
2020-01-15 18:49 ` Andre McCurdy
2020-01-15 19:24 ` Rich Felker
2020-01-15 20:20 ` Andre McCurdy
2020-01-15 20:38 ` Rich Felker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190913184432.29753-2-armccurdy@gmail.com \
--to=armccurdy@gmail.com \
--cc=musl@lists.openwall.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/musl/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).