* [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy
@ 2020-01-21 18:52 Andre McCurdy
2020-01-22 0:36 ` Rich Felker
2020-06-25 21:50 ` Rich Felker
0 siblings, 2 replies; 7+ messages in thread
From: Andre McCurdy @ 2020-01-21 18:52 UTC (permalink / raw)
To: musl; +Cc: Andre McCurdy
Allow the existing ARM assembler memcpy implementation to be used for
both big and little endian targets.
---
Exactly the same changes as before but rebased to account for
whitespace changes in the preceding patch to add Thumb2 support.
COPYRIGHT | 2 +-
src/string/arm/{memcpy_le.S => memcpy.S} | 101 ++++++++++++++++++++++-
src/string/arm/memcpy.c | 3 -
3 files changed, 98 insertions(+), 8 deletions(-)
rename src/string/arm/{memcpy_le.S => memcpy.S} (82%)
delete mode 100644 src/string/arm/memcpy.c
diff --git a/COPYRIGHT b/COPYRIGHT
index e6472371..d3edc2a2 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -127,7 +127,7 @@ Copyright © 2017-2018 Arm Limited
and labelled as such in comments in the individual source files. All
have been licensed under extremely permissive terms.
-The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008
+The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
The Android Open Source Project and is licensed under a two-clause BSD
license. It was taken from Bionic libc, used on Android.
diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy.S
similarity index 82%
rename from src/string/arm/memcpy_le.S
rename to src/string/arm/memcpy.S
index 7b35d305..869e3448 100644
--- a/src/string/arm/memcpy_le.S
+++ b/src/string/arm/memcpy.S
@@ -1,5 +1,3 @@
-#if !__ARMEB__
-
/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
@@ -42,7 +40,7 @@
* code safely callable from thumb mode, adjusting the return
* instructions to be compatible with pre-thumb ARM cpus, removal of
* prefetch code that is not compatible with older cpus and support for
- * building as thumb 2.
+ * building as thumb 2 and big-endian.
*/
.syntax unified
@@ -227,24 +225,45 @@ non_congruent:
* becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
*/
movs r5, r5, lsl #31
+
+#if __ARMEB__
+ movmi r3, r3, ror #24
+ strbmi r3, [r0], #1
+ movcs r3, r3, ror #24
+ strbcs r3, [r0], #1
+ movcs r3, r3, ror #24
+ strbcs r3, [r0], #1
+#else
strbmi r3, [r0], #1
movmi r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
+#endif
cmp r2, #4
blo partial_word_tail
+#if __ARMEB__
+ mov r3, r3, lsr r12
+ mov r3, r3, lsl r12
+#endif
+
/* Align destination to 32 bytes (cache line boundary) */
1: tst r0, #0x1c
beq 2f
ldr r5, [r1], #4
sub r2, r2, #4
+#if __ARMEB__
+ mov r4, r5, lsr lr
+ orr r4, r4, r3
+ mov r3, r5, lsl r12
+#else
mov r4, r5, lsl lr
orr r4, r4, r3
mov r3, r5, lsr r12
+#endif
str r4, [r0], #4
cmp r2, #4
bhs 1b
@@ -270,6 +289,25 @@ loop16:
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
+#if __ARMEB__
+ orr r3, r3, r4, lsr #16
+ mov r4, r4, lsl #16
+ orr r4, r4, r5, lsr #16
+ mov r5, r5, lsl #16
+ orr r5, r5, r6, lsr #16
+ mov r6, r6, lsl #16
+ orr r6, r6, r7, lsr #16
+ mov r7, r7, lsl #16
+ orr r7, r7, r8, lsr #16
+ mov r8, r8, lsl #16
+ orr r8, r8, r9, lsr #16
+ mov r9, r9, lsl #16
+ orr r9, r9, r10, lsr #16
+ mov r10, r10, lsl #16
+ orr r10, r10, r11, lsr #16
+ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+ mov r3, r11, lsl #16
+#else
orr r3, r3, r4, lsl #16
mov r4, r4, lsr #16
orr r4, r4, r5, lsl #16
@@ -287,6 +325,7 @@ loop16:
orr r10, r10, r11, lsl #16
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #16
+#endif
bhs 1b
b less_than_thirtytwo
@@ -296,6 +335,25 @@ loop8:
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
+#if __ARMEB__
+ orr r3, r3, r4, lsr #24
+ mov r4, r4, lsl #8
+ orr r4, r4, r5, lsr #24
+ mov r5, r5, lsl #8
+ orr r5, r5, r6, lsr #24
+ mov r6, r6, lsl #8
+ orr r6, r6, r7, lsr #24
+ mov r7, r7, lsl #8
+ orr r7, r7, r8, lsr #24
+ mov r8, r8, lsl #8
+ orr r8, r8, r9, lsr #24
+ mov r9, r9, lsl #8
+ orr r9, r9, r10, lsr #24
+ mov r10, r10, lsl #8
+ orr r10, r10, r11, lsr #24
+ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+ mov r3, r11, lsl #8
+#else
orr r3, r3, r4, lsl #24
mov r4, r4, lsr #8
orr r4, r4, r5, lsl #24
@@ -313,6 +371,7 @@ loop8:
orr r10, r10, r11, lsl #24
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #8
+#endif
bhs 1b
b less_than_thirtytwo
@@ -322,6 +381,25 @@ loop24:
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
+#if __ARMEB__
+ orr r3, r3, r4, lsr #8
+ mov r4, r4, lsl #24
+ orr r4, r4, r5, lsr #8
+ mov r5, r5, lsl #24
+ orr r5, r5, r6, lsr #8
+ mov r6, r6, lsl #24
+ orr r6, r6, r7, lsr #8
+ mov r7, r7, lsl #24
+ orr r7, r7, r8, lsr #8
+ mov r8, r8, lsl #24
+ orr r8, r8, r9, lsr #8
+ mov r9, r9, lsl #24
+ orr r9, r9, r10, lsr #8
+ mov r10, r10, lsl #24
+ orr r10, r10, r11, lsr #8
+ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+ mov r3, r11, lsl #24
+#else
orr r3, r3, r4, lsl #8
mov r4, r4, lsr #24
orr r4, r4, r5, lsl #8
@@ -339,6 +417,7 @@ loop24:
orr r10, r10, r11, lsl #8
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #24
+#endif
bhs 1b
less_than_thirtytwo:
@@ -350,9 +429,15 @@ less_than_thirtytwo:
1: ldr r5, [r1], #4
sub r2, r2, #4
+#if __ARMEB__
+ mov r4, r5, lsr lr
+ orr r4, r4, r3
+ mov r3, r5, lsl r12
+#else
mov r4, r5, lsl lr
orr r4, r4, r3
mov r3, r5, lsr r12
+#endif
str r4, [r0], #4
cmp r2, #4
bhs 1b
@@ -360,11 +445,20 @@ less_than_thirtytwo:
partial_word_tail:
/* we have a partial word in the input buffer */
movs r5, lr, lsl #(31-3)
+#if __ARMEB__
+ movmi r3, r3, ror #24
+ strbmi r3, [r0], #1
+ movcs r3, r3, ror #24
+ strbcs r3, [r0], #1
+ movcs r3, r3, ror #24
+ strbcs r3, [r0], #1
+#else
strbmi r3, [r0], #1
movmi r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
strbcs r3, [r0], #1
+#endif
/* Refill spilled registers from the stack. Don't update sp. */
ldmfd sp, {r5-r11}
@@ -383,4 +477,3 @@ copy_last_3_and_return:
ldmfd sp!, {r0, r4, lr}
bx lr
-#endif
diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c
deleted file mode 100644
index 041614f4..00000000
--- a/src/string/arm/memcpy.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#if __ARMEB__
-#include "../memcpy.c"
-#endif
--
2.24.0
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy
2020-01-21 18:52 [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy Andre McCurdy
@ 2020-01-22 0:36 ` Rich Felker
2020-01-22 0:47 ` Andre McCurdy
2020-06-25 21:50 ` Rich Felker
1 sibling, 1 reply; 7+ messages in thread
From: Rich Felker @ 2020-01-22 0:36 UTC (permalink / raw)
To: musl
On Tue, Jan 21, 2020 at 10:52:15AM -0800, Andre McCurdy wrote:
> Allow the existing ARM assembler memcpy implementation to be used for
> both big and little endian targets.
> ---
>
> Exactly the same changes as before but rebased to account for
> whitespace changes in the preceding patch to add Thumb2 support.
Thanks. I'm not sure if I'll ever apply this, since I think there's a
better memcpy we may be adopting, but it's nice to have in case we
want to.
Rich
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy
2020-01-22 0:36 ` Rich Felker
@ 2020-01-22 0:47 ` Andre McCurdy
2020-01-22 1:31 ` Rich Felker
0 siblings, 1 reply; 7+ messages in thread
From: Andre McCurdy @ 2020-01-22 0:47 UTC (permalink / raw)
To: musl
On Tue, Jan 21, 2020 at 4:36 PM Rich Felker <dalias@libc.org> wrote:
> On Tue, Jan 21, 2020 at 10:52:15AM -0800, Andre McCurdy wrote:
> > Allow the existing ARM assembler memcpy implementation to be used for
> > both big and little endian targets.
> > ---
> >
> > Exactly the same changes as before but rebased to account for
> > whitespace changes in the preceding patch to add Thumb2 support.
>
> Thanks. I'm not sure if I'll ever apply this, since I think there's a
> better memcpy we may be adopting, but it's nice to have in case we
> want to.
OK. Slightly disappointed to hear that. I've already been carrying
this patch for many months in my own builds so hopefully the new
memcpy() which will allow it to be dropped is imminent!
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy
2020-01-22 0:47 ` Andre McCurdy
@ 2020-01-22 1:31 ` Rich Felker
0 siblings, 0 replies; 7+ messages in thread
From: Rich Felker @ 2020-01-22 1:31 UTC (permalink / raw)
To: musl
On Tue, Jan 21, 2020 at 04:47:42PM -0800, Andre McCurdy wrote:
> On Tue, Jan 21, 2020 at 4:36 PM Rich Felker <dalias@libc.org> wrote:
> > On Tue, Jan 21, 2020 at 10:52:15AM -0800, Andre McCurdy wrote:
> > > Allow the existing ARM assembler memcpy implementation to be used for
> > > both big and little endian targets.
> > > ---
> > >
> > > Exactly the same changes as before but rebased to account for
> > > whitespace changes in the preceding patch to add Thumb2 support.
> >
> > Thanks. I'm not sure if I'll ever apply this, since I think there's a
> > better memcpy we may be adopting, but it's nice to have in case we
> > want to.
>
> OK. Slightly disappointed to hear that. I've already been carrying
> this patch for many months in my own builds so hopefully the new
> memcpy() which will allow it to be dropped is imminent!
If it doesn't look immediately clear that we should go with the new
one, I'll go ahead and merge this after release. I wasn't aware anyone
was using or cared about BE, but if you do I don't want to hold it
back just because it might no longer be relevant in future.
Rich
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy
2020-01-21 18:52 [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy Andre McCurdy
2020-01-22 0:36 ` Rich Felker
@ 2020-06-25 21:50 ` Rich Felker
2020-06-25 22:11 ` Andre McCurdy
1 sibling, 1 reply; 7+ messages in thread
From: Rich Felker @ 2020-06-25 21:50 UTC (permalink / raw)
To: musl
On Tue, Jan 21, 2020 at 10:52:15AM -0800, Andre McCurdy wrote:
> Allow the existing ARM assembler memcpy implementation to be used for
> both big and little endian targets.
> ---
>
> Exactly the same changes as before but rebased to account for
> whitespace changes in the preceding patch to add Thumb2 support.
>
> COPYRIGHT | 2 +-
> src/string/arm/{memcpy_le.S => memcpy.S} | 101 ++++++++++++++++++++++-
> src/string/arm/memcpy.c | 3 -
> 3 files changed, 98 insertions(+), 8 deletions(-)
> rename src/string/arm/{memcpy_le.S => memcpy.S} (82%)
> delete mode 100644 src/string/arm/memcpy.c
>
> diff --git a/COPYRIGHT b/COPYRIGHT
> index e6472371..d3edc2a2 100644
> --- a/COPYRIGHT
> +++ b/COPYRIGHT
> @@ -127,7 +127,7 @@ Copyright © 2017-2018 Arm Limited
> and labelled as such in comments in the individual source files. All
> have been licensed under extremely permissive terms.
>
> -The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008
> +The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
> The Android Open Source Project and is licensed under a two-clause BSD
> license. It was taken from Bionic libc, used on Android.
>
> diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy.S
> similarity index 82%
> rename from src/string/arm/memcpy_le.S
> rename to src/string/arm/memcpy.S
> index 7b35d305..869e3448 100644
> --- a/src/string/arm/memcpy_le.S
> +++ b/src/string/arm/memcpy.S
> @@ -1,5 +1,3 @@
> -#if !__ARMEB__
> -
> /*
> * Copyright (C) 2008 The Android Open Source Project
> * All rights reserved.
> @@ -42,7 +40,7 @@
> * code safely callable from thumb mode, adjusting the return
> * instructions to be compatible with pre-thumb ARM cpus, removal of
> * prefetch code that is not compatible with older cpus and support for
> - * building as thumb 2.
> + * building as thumb 2 and big-endian.
> */
>
> .syntax unified
> @@ -227,24 +225,45 @@ non_congruent:
> * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
> */
> movs r5, r5, lsl #31
> +
> +#if __ARMEB__
> + movmi r3, r3, ror #24
> + strbmi r3, [r0], #1
> + movcs r3, r3, ror #24
> + strbcs r3, [r0], #1
> + movcs r3, r3, ror #24
> + strbcs r3, [r0], #1
> +#else
> strbmi r3, [r0], #1
> movmi r3, r3, lsr #8
> strbcs r3, [r0], #1
> movcs r3, r3, lsr #8
> strbcs r3, [r0], #1
> movcs r3, r3, lsr #8
> +#endif
>
> cmp r2, #4
> blo partial_word_tail
>
> +#if __ARMEB__
> + mov r3, r3, lsr r12
> + mov r3, r3, lsl r12
> +#endif
> +
> /* Align destination to 32 bytes (cache line boundary) */
> 1: tst r0, #0x1c
> beq 2f
> ldr r5, [r1], #4
> sub r2, r2, #4
> +#if __ARMEB__
> + mov r4, r5, lsr lr
> + orr r4, r4, r3
> + mov r3, r5, lsl r12
> +#else
> mov r4, r5, lsl lr
> orr r4, r4, r3
> mov r3, r5, lsr r12
> +#endif
Am I missing something or are both cases identical here? That would
either indicate this is gratuitous or there's a bug here and they were
intended not to be the same.
> [...]
> @@ -350,9 +429,15 @@ less_than_thirtytwo:
>
> 1: ldr r5, [r1], #4
> sub r2, r2, #4
> +#if __ARMEB__
> + mov r4, r5, lsr lr
> + orr r4, r4, r3
> + mov r3, r5, lsl r12
> +#else
> mov r4, r5, lsl lr
> orr r4, r4, r3
> mov r3, r5, lsr r12
> +#endif
And again here.
Rich
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy
2020-06-25 21:50 ` Rich Felker
@ 2020-06-25 22:11 ` Andre McCurdy
2020-06-25 22:24 ` Rich Felker
0 siblings, 1 reply; 7+ messages in thread
From: Andre McCurdy @ 2020-06-25 22:11 UTC (permalink / raw)
To: musl
On Thu, Jun 25, 2020 at 3:06 PM Rich Felker <dalias@libc.org> wrote:
>
> On Tue, Jan 21, 2020 at 10:52:15AM -0800, Andre McCurdy wrote:
> > Allow the existing ARM assembler memcpy implementation to be used for
> > both big and little endian targets.
> > ---
> >
> > Exactly the same changes as before but rebased to account for
> > whitespace changes in the preceding patch to add Thumb2 support.
> >
> > COPYRIGHT | 2 +-
> > src/string/arm/{memcpy_le.S => memcpy.S} | 101 ++++++++++++++++++++++-
> > src/string/arm/memcpy.c | 3 -
> > 3 files changed, 98 insertions(+), 8 deletions(-)
> > rename src/string/arm/{memcpy_le.S => memcpy.S} (82%)
> > delete mode 100644 src/string/arm/memcpy.c
> >
> > diff --git a/COPYRIGHT b/COPYRIGHT
> > index e6472371..d3edc2a2 100644
> > --- a/COPYRIGHT
> > +++ b/COPYRIGHT
> > @@ -127,7 +127,7 @@ Copyright © 2017-2018 Arm Limited
> > and labelled as such in comments in the individual source files. All
> > have been licensed under extremely permissive terms.
> >
> > -The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008
> > +The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
> > The Android Open Source Project and is licensed under a two-clause BSD
> > license. It was taken from Bionic libc, used on Android.
> >
> > diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy.S
> > similarity index 82%
> > rename from src/string/arm/memcpy_le.S
> > rename to src/string/arm/memcpy.S
> > index 7b35d305..869e3448 100644
> > --- a/src/string/arm/memcpy_le.S
> > +++ b/src/string/arm/memcpy.S
> > @@ -1,5 +1,3 @@
> > -#if !__ARMEB__
> > -
> > /*
> > * Copyright (C) 2008 The Android Open Source Project
> > * All rights reserved.
> > @@ -42,7 +40,7 @@
> > * code safely callable from thumb mode, adjusting the return
> > * instructions to be compatible with pre-thumb ARM cpus, removal of
> > * prefetch code that is not compatible with older cpus and support for
> > - * building as thumb 2.
> > + * building as thumb 2 and big-endian.
> > */
> >
> > .syntax unified
> > @@ -227,24 +225,45 @@ non_congruent:
> > * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
> > */
> > movs r5, r5, lsl #31
> > +
> > +#if __ARMEB__
> > + movmi r3, r3, ror #24
> > + strbmi r3, [r0], #1
> > + movcs r3, r3, ror #24
> > + strbcs r3, [r0], #1
> > + movcs r3, r3, ror #24
> > + strbcs r3, [r0], #1
> > +#else
> > strbmi r3, [r0], #1
> > movmi r3, r3, lsr #8
> > strbcs r3, [r0], #1
> > movcs r3, r3, lsr #8
> > strbcs r3, [r0], #1
> > movcs r3, r3, lsr #8
> > +#endif
> >
> > cmp r2, #4
> > blo partial_word_tail
> >
> > +#if __ARMEB__
> > + mov r3, r3, lsr r12
> > + mov r3, r3, lsl r12
> > +#endif
> > +
> > /* Align destination to 32 bytes (cache line boundary) */
> > 1: tst r0, #0x1c
> > beq 2f
> > ldr r5, [r1], #4
> > sub r2, r2, #4
> > +#if __ARMEB__
> > + mov r4, r5, lsr lr
> > + orr r4, r4, r3
> > + mov r3, r5, lsl r12
> > +#else
> > mov r4, r5, lsl lr
> > orr r4, r4, r3
> > mov r3, r5, lsr r12
> > +#endif
>
> Am I missing something or are both cases identical here? That would
> either indicate this is gratuitous or there's a bug here and they were
> intended not to be the same.
Difference here and below is lsr (logical shift right) -vs- lsl
(logical shift left).
> > [...]
> > @@ -350,9 +429,15 @@ less_than_thirtytwo:
> >
> > 1: ldr r5, [r1], #4
> > sub r2, r2, #4
> > +#if __ARMEB__
> > + mov r4, r5, lsr lr
> > + orr r4, r4, r3
> > + mov r3, r5, lsl r12
> > +#else
> > mov r4, r5, lsl lr
> > orr r4, r4, r3
> > mov r3, r5, lsr r12
> > +#endif
>
> And again here.
>
> Rich
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy
2020-06-25 22:11 ` Andre McCurdy
@ 2020-06-25 22:24 ` Rich Felker
0 siblings, 0 replies; 7+ messages in thread
From: Rich Felker @ 2020-06-25 22:24 UTC (permalink / raw)
To: musl
On Thu, Jun 25, 2020 at 03:11:05PM -0700, Andre McCurdy wrote:
> On Thu, Jun 25, 2020 at 3:06 PM Rich Felker <dalias@libc.org> wrote:
> >
> > On Tue, Jan 21, 2020 at 10:52:15AM -0800, Andre McCurdy wrote:
> > > Allow the existing ARM assembler memcpy implementation to be used for
> > > both big and little endian targets.
> > > ---
> > >
> > > Exactly the same changes as before but rebased to account for
> > > whitespace changes in the preceding patch to add Thumb2 support.
> > >
> > > COPYRIGHT | 2 +-
> > > src/string/arm/{memcpy_le.S => memcpy.S} | 101 ++++++++++++++++++++++-
> > > src/string/arm/memcpy.c | 3 -
> > > 3 files changed, 98 insertions(+), 8 deletions(-)
> > > rename src/string/arm/{memcpy_le.S => memcpy.S} (82%)
> > > delete mode 100644 src/string/arm/memcpy.c
> > >
> > > diff --git a/COPYRIGHT b/COPYRIGHT
> > > index e6472371..d3edc2a2 100644
> > > --- a/COPYRIGHT
> > > +++ b/COPYRIGHT
> > > @@ -127,7 +127,7 @@ Copyright © 2017-2018 Arm Limited
> > > and labelled as such in comments in the individual source files. All
> > > have been licensed under extremely permissive terms.
> > >
> > > -The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008
> > > +The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
> > > The Android Open Source Project and is licensed under a two-clause BSD
> > > license. It was taken from Bionic libc, used on Android.
> > >
> > > diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy.S
> > > similarity index 82%
> > > rename from src/string/arm/memcpy_le.S
> > > rename to src/string/arm/memcpy.S
> > > index 7b35d305..869e3448 100644
> > > --- a/src/string/arm/memcpy_le.S
> > > +++ b/src/string/arm/memcpy.S
> > > @@ -1,5 +1,3 @@
> > > -#if !__ARMEB__
> > > -
> > > /*
> > > * Copyright (C) 2008 The Android Open Source Project
> > > * All rights reserved.
> > > @@ -42,7 +40,7 @@
> > > * code safely callable from thumb mode, adjusting the return
> > > * instructions to be compatible with pre-thumb ARM cpus, removal of
> > > * prefetch code that is not compatible with older cpus and support for
> > > - * building as thumb 2.
> > > + * building as thumb 2 and big-endian.
> > > */
> > >
> > > .syntax unified
> > > @@ -227,24 +225,45 @@ non_congruent:
> > > * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
> > > */
> > > movs r5, r5, lsl #31
> > > +
> > > +#if __ARMEB__
> > > + movmi r3, r3, ror #24
> > > + strbmi r3, [r0], #1
> > > + movcs r3, r3, ror #24
> > > + strbcs r3, [r0], #1
> > > + movcs r3, r3, ror #24
> > > + strbcs r3, [r0], #1
> > > +#else
> > > strbmi r3, [r0], #1
> > > movmi r3, r3, lsr #8
> > > strbcs r3, [r0], #1
> > > movcs r3, r3, lsr #8
> > > strbcs r3, [r0], #1
> > > movcs r3, r3, lsr #8
> > > +#endif
> > >
> > > cmp r2, #4
> > > blo partial_word_tail
> > >
> > > +#if __ARMEB__
> > > + mov r3, r3, lsr r12
> > > + mov r3, r3, lsl r12
> > > +#endif
> > > +
> > > /* Align destination to 32 bytes (cache line boundary) */
> > > 1: tst r0, #0x1c
> > > beq 2f
> > > ldr r5, [r1], #4
> > > sub r2, r2, #4
> > > +#if __ARMEB__
> > > + mov r4, r5, lsr lr
> > > + orr r4, r4, r3
> > > + mov r3, r5, lsl r12
> > > +#else
> > > mov r4, r5, lsl lr
> > > orr r4, r4, r3
> > > mov r3, r5, lsr r12
> > > +#endif
> >
> > Am I missing something or are both cases identical here? That would
> > either indicate this is gratuitous or there's a bug here and they were
> > intended not to be the same.
>
> Difference here and below is lsr (logical shift right) -vs- lsl
> (logical shift left).
Thanks!
Rich
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2020-06-25 22:24 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-21 18:52 [musl] [PATCH v2] Add big-endian support to ARM assembler memcpy Andre McCurdy
2020-01-22 0:36 ` Rich Felker
2020-01-22 0:47 ` Andre McCurdy
2020-01-22 1:31 ` Rich Felker
2020-06-25 21:50 ` Rich Felker
2020-06-25 22:11 ` Andre McCurdy
2020-06-25 22:24 ` Rich Felker
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/musl/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).