mailing list of musl libc
 help / color / Atom feed
* [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy
@ 2020-01-21 18:52 Andre McCurdy
  2020-01-22  0:36 ` Rich Felker
  0 siblings, 1 reply; 4+ messages in thread
From: Andre McCurdy @ 2020-01-21 18:52 UTC (permalink / raw)
  To: musl; +Cc: Andre McCurdy

Allow the existing ARM assembler memcpy implementation to be used for
both big and little endian targets.
---

Exactly the same changes as before but rebased to account for
whitespace changes in the preceding patch to add Thumb2 support.

 COPYRIGHT                                |   2 +-
 src/string/arm/{memcpy_le.S => memcpy.S} | 101 ++++++++++++++++++++++-
 src/string/arm/memcpy.c                  |   3 -
 3 files changed, 98 insertions(+), 8 deletions(-)
 rename src/string/arm/{memcpy_le.S => memcpy.S} (82%)
 delete mode 100644 src/string/arm/memcpy.c

diff --git a/COPYRIGHT b/COPYRIGHT
index e6472371..d3edc2a2 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -127,7 +127,7 @@ Copyright © 2017-2018 Arm Limited
 and labelled as such in comments in the individual source files. All
 have been licensed under extremely permissive terms.
 
-The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008
+The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
 The Android Open Source Project and is licensed under a two-clause BSD
 license. It was taken from Bionic libc, used on Android.
 
diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy.S
similarity index 82%
rename from src/string/arm/memcpy_le.S
rename to src/string/arm/memcpy.S
index 7b35d305..869e3448 100644
--- a/src/string/arm/memcpy_le.S
+++ b/src/string/arm/memcpy.S
@@ -1,5 +1,3 @@
-#if !__ARMEB__
-
 /*
  * Copyright (C) 2008 The Android Open Source Project
  * All rights reserved.
@@ -42,7 +40,7 @@
  * code safely callable from thumb mode, adjusting the return
  * instructions to be compatible with pre-thumb ARM cpus, removal of
  * prefetch code that is not compatible with older cpus and support for
- * building as thumb 2.
+ * building as thumb 2 and big-endian.
  */
 
 .syntax unified
@@ -227,24 +225,45 @@ non_congruent:
 	 * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
 	 */
 	movs    r5, r5, lsl #31
+
+#if __ARMEB__
+	movmi   r3, r3, ror #24
+	strbmi	r3, [r0], #1
+	movcs   r3, r3, ror #24
+	strbcs	r3, [r0], #1
+	movcs   r3, r3, ror #24
+	strbcs	r3, [r0], #1
+#else
 	strbmi r3, [r0], #1
 	movmi   r3, r3, lsr #8
 	strbcs r3, [r0], #1
 	movcs   r3, r3, lsr #8
 	strbcs r3, [r0], #1
 	movcs   r3, r3, lsr #8
+#endif
 
 	cmp     r2, #4
 	blo     partial_word_tail
 
+#if __ARMEB__
+	mov	r3, r3, lsr r12
+	mov	r3, r3, lsl r12
+#endif
+
 	/* Align destination to 32 bytes (cache line boundary) */
 1:      tst     r0, #0x1c
 	beq     2f
 	ldr     r5, [r1], #4
 	sub     r2, r2, #4
+#if __ARMEB__
+	mov     r4, r5,                 lsr lr
+	orr     r4, r4, r3
+	mov     r3, r5,                 lsl r12
+#else
 	mov     r4, r5,                 lsl lr
 	orr     r4, r4, r3
 	mov     r3, r5,                 lsr r12
+#endif
 	str     r4, [r0], #4
 	cmp     r2, #4
 	bhs     1b
@@ -270,6 +289,25 @@ loop16:
 	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
 	subs    r2, r2, #32
 	ldrhs   r12, [r1], #4
+#if __ARMEB__
+	orr     r3, r3, r4, lsr #16
+	mov     r4, r4, lsl #16
+	orr     r4, r4, r5, lsr #16
+	mov     r5, r5, lsl #16
+	orr     r5, r5, r6, lsr #16
+	mov     r6, r6, lsl #16
+	orr     r6, r6, r7, lsr #16
+	mov     r7, r7, lsl #16
+	orr     r7, r7, r8, lsr #16
+	mov     r8, r8, lsl #16
+	orr     r8, r8, r9, lsr #16
+	mov     r9, r9, lsl #16
+	orr     r9, r9, r10, lsr #16
+	mov     r10, r10,               lsl #16
+	orr     r10, r10, r11, lsr #16
+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+	mov     r3, r11, lsl #16
+#else
 	orr     r3, r3, r4, lsl #16
 	mov     r4, r4, lsr #16
 	orr     r4, r4, r5, lsl #16
@@ -287,6 +325,7 @@ loop16:
 	orr     r10, r10, r11, lsl #16
 	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
 	mov     r3, r11, lsr #16
+#endif
 	bhs     1b
 	b       less_than_thirtytwo
 
@@ -296,6 +335,25 @@ loop8:
 	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
 	subs    r2, r2, #32
 	ldrhs   r12, [r1], #4
+#if __ARMEB__
+	orr     r3, r3, r4, lsr #24
+	mov     r4, r4, lsl #8
+	orr     r4, r4, r5, lsr #24
+	mov     r5, r5, lsl #8
+	orr     r5, r5, r6, lsr #24
+	mov     r6, r6,  lsl #8
+	orr     r6, r6, r7, lsr #24
+	mov     r7, r7,  lsl #8
+	orr     r7, r7, r8,             lsr #24
+	mov     r8, r8,  lsl #8
+	orr     r8, r8, r9,             lsr #24
+	mov     r9, r9,  lsl #8
+	orr     r9, r9, r10,    lsr #24
+	mov     r10, r10, lsl #8
+	orr     r10, r10, r11,  lsr #24
+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+	mov     r3, r11, lsl #8
+#else
 	orr     r3, r3, r4, lsl #24
 	mov     r4, r4, lsr #8
 	orr     r4, r4, r5, lsl #24
@@ -313,6 +371,7 @@ loop8:
 	orr     r10, r10, r11,  lsl #24
 	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
 	mov     r3, r11, lsr #8
+#endif
 	bhs     1b
 	b       less_than_thirtytwo
 
@@ -322,6 +381,25 @@ loop24:
 	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
 	subs    r2, r2, #32
 	ldrhs   r12, [r1], #4
+#if __ARMEB__
+	orr     r3, r3, r4, lsr #8
+	mov     r4, r4, lsl #24
+	orr     r4, r4, r5, lsr #8
+	mov     r5, r5, lsl #24
+	orr     r5, r5, r6, lsr #8
+	mov     r6, r6, lsl #24
+	orr     r6, r6, r7, lsr #8
+	mov     r7, r7, lsl #24
+	orr     r7, r7, r8, lsr #8
+	mov     r8, r8, lsl #24
+	orr     r8, r8, r9, lsr #8
+	mov     r9, r9, lsl #24
+	orr     r9, r9, r10, lsr #8
+	mov     r10, r10, lsl #24
+	orr     r10, r10, r11, lsr #8
+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+	mov     r3, r11, lsl #24
+#else
 	orr     r3, r3, r4, lsl #8
 	mov     r4, r4, lsr #24
 	orr     r4, r4, r5, lsl #8
@@ -339,6 +417,7 @@ loop24:
 	orr     r10, r10, r11, lsl #8
 	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
 	mov     r3, r11, lsr #24
+#endif
 	bhs     1b
 
 less_than_thirtytwo:
@@ -350,9 +429,15 @@ less_than_thirtytwo:
 
 1:      ldr     r5, [r1], #4
 	sub     r2, r2, #4
+#if __ARMEB__
+	mov     r4, r5,                 lsr lr
+	orr     r4, r4, r3
+	mov     r3,     r5,                     lsl r12
+#else
 	mov     r4, r5,                 lsl lr
 	orr     r4, r4, r3
 	mov     r3,     r5,                     lsr r12
+#endif
 	str     r4, [r0], #4
 	cmp     r2, #4
 	bhs     1b
@@ -360,11 +445,20 @@ less_than_thirtytwo:
 partial_word_tail:
 	/* we have a partial word in the input buffer */
 	movs    r5, lr, lsl #(31-3)
+#if __ARMEB__
+	movmi   r3, r3, ror #24
+	strbmi r3, [r0], #1
+	movcs   r3, r3, ror #24
+	strbcs r3, [r0], #1
+	movcs   r3, r3, ror #24
+	strbcs r3, [r0], #1
+#else
 	strbmi r3, [r0], #1
 	movmi   r3, r3, lsr #8
 	strbcs r3, [r0], #1
 	movcs   r3, r3, lsr #8
 	strbcs r3, [r0], #1
+#endif
 
 	/* Refill spilled registers from the stack. Don't update sp. */
 	ldmfd   sp, {r5-r11}
@@ -383,4 +477,3 @@ copy_last_3_and_return:
 	ldmfd   sp!, {r0, r4, lr}
 	bx      lr
 
-#endif
diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c
deleted file mode 100644
index 041614f4..00000000
--- a/src/string/arm/memcpy.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#if __ARMEB__
-#include "../memcpy.c"
-#endif
-- 
2.24.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy
  2020-01-21 18:52 [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy Andre McCurdy
@ 2020-01-22  0:36 ` Rich Felker
  2020-01-22  0:47   ` Andre McCurdy
  0 siblings, 1 reply; 4+ messages in thread
From: Rich Felker @ 2020-01-22  0:36 UTC (permalink / raw)
  To: musl

On Tue, Jan 21, 2020 at 10:52:15AM -0800, Andre McCurdy wrote:
> Allow the existing ARM assembler memcpy implementation to be used for
> both big and little endian targets.
> ---
> 
> Exactly the same changes as before but rebased to account for
> whitespace changes in the preceding patch to add Thumb2 support.

Thanks. I'm not sure if I'll ever apply this, since I think there's a
better memcpy we may be adopting, but it's nice to have in case we
want to.

Rich

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy
  2020-01-22  0:36 ` Rich Felker
@ 2020-01-22  0:47   ` Andre McCurdy
  2020-01-22  1:31     ` Rich Felker
  0 siblings, 1 reply; 4+ messages in thread
From: Andre McCurdy @ 2020-01-22  0:47 UTC (permalink / raw)
  To: musl

On Tue, Jan 21, 2020 at 4:36 PM Rich Felker <dalias@libc.org> wrote:
> On Tue, Jan 21, 2020 at 10:52:15AM -0800, Andre McCurdy wrote:
> > Allow the existing ARM assembler memcpy implementation to be used for
> > both big and little endian targets.
> > ---
> >
> > Exactly the same changes as before but rebased to account for
> > whitespace changes in the preceding patch to add Thumb2 support.
>
> Thanks. I'm not sure if I'll ever apply this, since I think there's a
> better memcpy we may be adopting, but it's nice to have in case we
> want to.

OK. Slightly disappointed to hear that. I've already been carrying
this patch for many months in my own builds so hopefully the new
memcpy() which will allow it to be dropped is imminent!

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy
  2020-01-22  0:47   ` Andre McCurdy
@ 2020-01-22  1:31     ` Rich Felker
  0 siblings, 0 replies; 4+ messages in thread
From: Rich Felker @ 2020-01-22  1:31 UTC (permalink / raw)
  To: musl

On Tue, Jan 21, 2020 at 04:47:42PM -0800, Andre McCurdy wrote:
> On Tue, Jan 21, 2020 at 4:36 PM Rich Felker <dalias@libc.org> wrote:
> > On Tue, Jan 21, 2020 at 10:52:15AM -0800, Andre McCurdy wrote:
> > > Allow the existing ARM assembler memcpy implementation to be used for
> > > both big and little endian targets.
> > > ---
> > >
> > > Exactly the same changes as before but rebased to account for
> > > whitespace changes in the preceding patch to add Thumb2 support.
> >
> > Thanks. I'm not sure if I'll ever apply this, since I think there's a
> > better memcpy we may be adopting, but it's nice to have in case we
> > want to.
> 
> OK. Slightly disappointed to hear that. I've already been carrying
> this patch for many months in my own builds so hopefully the new
> memcpy() which will allow it to be dropped is imminent!

If it doesn't look immediately clear that we should go with the new
one, I'll go ahead and merge this after release. I wasn't aware anyone
was using or cared about BE, but if you do I don't want to hold it
back just because it might no longer be relevant in future.

Rich

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, back to index

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-21 18:52 [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy Andre McCurdy
2020-01-22  0:36 ` Rich Felker
2020-01-22  0:47   ` Andre McCurdy
2020-01-22  1:31     ` Rich Felker

mailing list of musl libc

Archives are clonable: git clone --mirror http://inbox.vuxu.org/musl

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://inbox.vuxu.org/vuxu.archive.musl


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git