* [PATCH 1/2] Add Thumb2 support to ARM assembler memcpy
@ 2019-09-13 18:44 Andre McCurdy
2019-09-13 18:44 ` [PATCH 2/2] Add big-endian " Andre McCurdy
2020-01-15 16:35 ` [musl] [PATCH 1/2] Add Thumb2 " Rich Felker
0 siblings, 2 replies; 14+ messages in thread
From: Andre McCurdy @ 2019-09-13 18:44 UTC (permalink / raw)
To: musl; +Cc: Andre McCurdy
For Thumb2 compatibility, replace two instances of a single
instruction "orr with a variable shift" with the two instruction
equivalent. Neither of the replacements are in a performance critical
loop.
---
src/string/arm/memcpy.c | 2 +-
src/string/arm/memcpy_le.S | 17 ++++++++++-------
2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c
index f703c9bd..041614f4 100644
--- a/src/string/arm/memcpy.c
+++ b/src/string/arm/memcpy.c
@@ -1,3 +1,3 @@
-#if __ARMEB__ || __thumb__
+#if __ARMEB__
#include "../memcpy.c"
#endif
diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
index 9cfbcb2a..64bc5f9e 100644
--- a/src/string/arm/memcpy_le.S
+++ b/src/string/arm/memcpy_le.S
@@ -1,4 +1,4 @@
-#if !__ARMEB__ && !__thumb__
+#if !__ARMEB__
/*
* Copyright (C) 2008 The Android Open Source Project
@@ -40,8 +40,9 @@
* This file has been modified from the original for use in musl libc.
* The main changes are: addition of .type memcpy,%function to make the
* code safely callable from thumb mode, adjusting the return
- * instructions to be compatible with pre-thumb ARM cpus, and removal
- * of prefetch code that is not compatible with older cpus.
+ * instructions to be compatible with pre-thumb ARM cpus, removal of
+ * prefetch code that is not compatible with older cpus and support for
+ * building as thumb 2.
*/
.syntax unified
@@ -241,8 +242,9 @@ non_congruent:
beq 2f
ldr r5, [r1], #4
sub r2, r2, #4
- orr r4, r3, r5, lsl lr
- mov r3, r5, lsr r12
+ mov r4, r5, lsl lr
+ orr r4, r4, r3
+ mov r3, r5, lsr r12
str r4, [r0], #4
cmp r2, #4
bhs 1b
@@ -348,8 +350,9 @@ less_than_thirtytwo:
1: ldr r5, [r1], #4
sub r2, r2, #4
- orr r4, r3, r5, lsl lr
- mov r3, r5, lsr r12
+ mov r4, r5, lsl lr
+ orr r4, r4, r3
+ mov r3, r5, lsr r12
str r4, [r0], #4
cmp r2, #4
bhs 1b
--
2.23.0
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/2] Add big-endian support to ARM assembler memcpy
2019-09-13 18:44 [PATCH 1/2] Add Thumb2 support to ARM assembler memcpy Andre McCurdy
@ 2019-09-13 18:44 ` Andre McCurdy
2019-09-13 18:59 ` Rich Felker
2020-01-15 16:35 ` [musl] [PATCH 1/2] Add Thumb2 " Rich Felker
1 sibling, 1 reply; 14+ messages in thread
From: Andre McCurdy @ 2019-09-13 18:44 UTC (permalink / raw)
To: musl; +Cc: Andre McCurdy
Allow the existing ARM assembler memcpy implementation to be used for
both big and little endian targets.
---
COPYRIGHT | 2 +-
src/string/arm/{memcpy_le.S => memcpy.S} | 101 ++++++++++++++++++++++-
src/string/arm/memcpy.c | 3 -
3 files changed, 98 insertions(+), 8 deletions(-)
rename src/string/arm/{memcpy_le.S => memcpy.S} (83%)
delete mode 100644 src/string/arm/memcpy.c
diff --git a/COPYRIGHT b/COPYRIGHT
index 2525ffb5..96c2b070 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -126,7 +126,7 @@ Copyright © 2008 Stephen L. Moshier
and labelled as such in comments in the individual source files. All
have been licensed under extremely permissive terms.
-The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008
+The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
The Android Open Source Project and is licensed under a two-clause BSD
license. It was taken from Bionic libc, used on Android.
diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy.S
similarity index 83%
rename from src/string/arm/memcpy_le.S
rename to src/string/arm/memcpy.S
index 64bc5f9e..766a88a5 100644
--- a/src/string/arm/memcpy_le.S
+++ b/src/string/arm/memcpy.S
@@ -1,5 +1,3 @@
-#if !__ARMEB__
-
/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
@@ -42,7 +40,7 @@
* code safely callable from thumb mode, adjusting the return
* instructions to be compatible with pre-thumb ARM cpus, removal of
* prefetch code that is not compatible with older cpus and support for
- * building as thumb 2.
+ * building as thumb 2 and big-endian.
*/
.syntax unified
@@ -227,24 +225,45 @@ non_congruent:
* becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
*/
movs r5, r5, lsl #31
+
+#if __ARMEB__
+ movmi r3, r3, ror #24
+ strbmi r3, [r0], #1
+ movcs r3, r3, ror #24
+ strbcs r3, [r0], #1
+ movcs r3, r3, ror #24
+ strbcs r3, [r0], #1
+#else
strbmi r3, [r0], #1
movmi r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
+#endif
cmp r2, #4
blo partial_word_tail
+#if __ARMEB__
+ mov r3, r3, lsr r12
+ mov r3, r3, lsl r12
+#endif
+
/* Align destination to 32 bytes (cache line boundary) */
1: tst r0, #0x1c
beq 2f
ldr r5, [r1], #4
sub r2, r2, #4
+#if __ARMEB__
+ mov r4, r5, lsr lr
+ orr r4, r4, r3
+ mov r3, r5, lsl r12
+#else
mov r4, r5, lsl lr
orr r4, r4, r3
mov r3, r5, lsr r12
+#endif
str r4, [r0], #4
cmp r2, #4
bhs 1b
@@ -270,6 +289,25 @@ loop16:
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
+#if __ARMEB__
+ orr r3, r3, r4, lsr #16
+ mov r4, r4, lsl #16
+ orr r4, r4, r5, lsr #16
+ mov r5, r5, lsl #16
+ orr r5, r5, r6, lsr #16
+ mov r6, r6, lsl #16
+ orr r6, r6, r7, lsr #16
+ mov r7, r7, lsl #16
+ orr r7, r7, r8, lsr #16
+ mov r8, r8, lsl #16
+ orr r8, r8, r9, lsr #16
+ mov r9, r9, lsl #16
+ orr r9, r9, r10, lsr #16
+ mov r10, r10, lsl #16
+ orr r10, r10, r11, lsr #16
+ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+ mov r3, r11, lsl #16
+#else
orr r3, r3, r4, lsl #16
mov r4, r4, lsr #16
orr r4, r4, r5, lsl #16
@@ -287,6 +325,7 @@ loop16:
orr r10, r10, r11, lsl #16
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #16
+#endif
bhs 1b
b less_than_thirtytwo
@@ -296,6 +335,25 @@ loop8:
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
+#if __ARMEB__
+ orr r3, r3, r4, lsr #24
+ mov r4, r4, lsl #8
+ orr r4, r4, r5, lsr #24
+ mov r5, r5, lsl #8
+ orr r5, r5, r6, lsr #24
+ mov r6, r6, lsl #8
+ orr r6, r6, r7, lsr #24
+ mov r7, r7, lsl #8
+ orr r7, r7, r8, lsr #24
+ mov r8, r8, lsl #8
+ orr r8, r8, r9, lsr #24
+ mov r9, r9, lsl #8
+ orr r9, r9, r10, lsr #24
+ mov r10, r10, lsl #8
+ orr r10, r10, r11, lsr #24
+ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+ mov r3, r11, lsl #8
+#else
orr r3, r3, r4, lsl #24
mov r4, r4, lsr #8
orr r4, r4, r5, lsl #24
@@ -313,6 +371,7 @@ loop8:
orr r10, r10, r11, lsl #24
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #8
+#endif
bhs 1b
b less_than_thirtytwo
@@ -322,6 +381,25 @@ loop24:
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
+#if __ARMEB__
+ orr r3, r3, r4, lsr #8
+ mov r4, r4, lsl #24
+ orr r4, r4, r5, lsr #8
+ mov r5, r5, lsl #24
+ orr r5, r5, r6, lsr #8
+ mov r6, r6, lsl #24
+ orr r6, r6, r7, lsr #8
+ mov r7, r7, lsl #24
+ orr r7, r7, r8, lsr #8
+ mov r8, r8, lsl #24
+ orr r8, r8, r9, lsr #8
+ mov r9, r9, lsl #24
+ orr r9, r9, r10, lsr #8
+ mov r10, r10, lsl #24
+ orr r10, r10, r11, lsr #8
+ stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+ mov r3, r11, lsl #24
+#else
orr r3, r3, r4, lsl #8
mov r4, r4, lsr #24
orr r4, r4, r5, lsl #8
@@ -339,6 +417,7 @@ loop24:
orr r10, r10, r11, lsl #8
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #24
+#endif
bhs 1b
less_than_thirtytwo:
@@ -350,9 +429,15 @@ less_than_thirtytwo:
1: ldr r5, [r1], #4
sub r2, r2, #4
+#if __ARMEB__
+ mov r4, r5, lsr lr
+ orr r4, r4, r3
+ mov r3, r5, lsl r12
+#else
mov r4, r5, lsl lr
orr r4, r4, r3
mov r3, r5, lsr r12
+#endif
str r4, [r0], #4
cmp r2, #4
bhs 1b
@@ -360,11 +445,20 @@ less_than_thirtytwo:
partial_word_tail:
/* we have a partial word in the input buffer */
movs r5, lr, lsl #(31-3)
+#if __ARMEB__
+ movmi r3, r3, ror #24
+ strbmi r3, [r0], #1
+ movcs r3, r3, ror #24
+ strbcs r3, [r0], #1
+ movcs r3, r3, ror #24
+ strbcs r3, [r0], #1
+#else
strbmi r3, [r0], #1
movmi r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
strbcs r3, [r0], #1
+#endif
/* Refill spilled registers from the stack. Don't update sp. */
ldmfd sp, {r5-r11}
@@ -383,4 +477,3 @@ copy_last_3_and_return:
ldmfd sp!, {r0, r4, lr}
bx lr
-#endif
diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c
deleted file mode 100644
index 041614f4..00000000
--- a/src/string/arm/memcpy.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#if __ARMEB__
-#include "../memcpy.c"
-#endif
--
2.23.0
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] Add big-endian support to ARM assembler memcpy
2019-09-13 18:44 ` [PATCH 2/2] Add big-endian " Andre McCurdy
@ 2019-09-13 18:59 ` Rich Felker
2019-09-13 20:38 ` Andre McCurdy
0 siblings, 1 reply; 14+ messages in thread
From: Rich Felker @ 2019-09-13 18:59 UTC (permalink / raw)
To: musl
On Fri, Sep 13, 2019 at 11:44:32AM -0700, Andre McCurdy wrote:
> Allow the existing ARM assembler memcpy implementation to be used for
> both big and little endian targets.
Nice. I don't want to merge this just before release, but as long as
it looks ok I should be able to review and merge it afterward.
Note that I'd really like to replace this giant file with C using
inline asm just for the inner block copies and C for all the flow
control, but I don't mind merging this first as long as it's correct.
Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/2] Add big-endian support to ARM assembler memcpy
2019-09-13 18:59 ` Rich Felker
@ 2019-09-13 20:38 ` Andre McCurdy
2020-01-15 15:45 ` [musl] " Rich Felker
0 siblings, 1 reply; 14+ messages in thread
From: Andre McCurdy @ 2019-09-13 20:38 UTC (permalink / raw)
To: musl
On Fri, Sep 13, 2019 at 11:59 AM Rich Felker <dalias@libc.org> wrote:
>
> On Fri, Sep 13, 2019 at 11:44:32AM -0700, Andre McCurdy wrote:
> > Allow the existing ARM assembler memcpy implementation to be used for
> > both big and little endian targets.
>
> Nice. I don't want to merge this just before release, but as long as
> it looks ok I should be able to review and merge it afterward.
>
> Note that I'd really like to replace this giant file with C using
> inline asm just for the inner block copies and C for all the flow
> control, but I don't mind merging this first as long as it's correct.
Sounds good. I'll wait for your feedback after the upcoming release.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [musl] [PATCH 2/2] Add big-endian support to ARM assembler memcpy
2019-09-13 20:38 ` Andre McCurdy
@ 2020-01-15 15:45 ` Rich Felker
2020-01-15 18:41 ` Andre McCurdy
0 siblings, 1 reply; 14+ messages in thread
From: Rich Felker @ 2020-01-15 15:45 UTC (permalink / raw)
To: musl
On Fri, Sep 13, 2019 at 01:38:34PM -0700, Andre McCurdy wrote:
> On Fri, Sep 13, 2019 at 11:59 AM Rich Felker <dalias@libc.org> wrote:
> >
> > On Fri, Sep 13, 2019 at 11:44:32AM -0700, Andre McCurdy wrote:
> > > Allow the existing ARM assembler memcpy implementation to be used for
> > > both big and little endian targets.
> >
> > Nice. I don't want to merge this just before release, but as long as
> > it looks ok I should be able to review and merge it afterward.
> >
> > Note that I'd really like to replace this giant file with C using
> > inline asm just for the inner block copies and C for all the flow
> > control, but I don't mind merging this first as long as it's correct.
>
> Sounds good. I'll wait for your feedback after the upcoming release.
Sorry this dropped off my radar. I'd like to merge at least the thumb
part since it's simple enough to review quickly and users have
actually complained about memcpy being slow on armv7 with -mthumb as
default.
Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [musl] [PATCH 1/2] Add Thumb2 support to ARM assembler memcpy
2019-09-13 18:44 [PATCH 1/2] Add Thumb2 support to ARM assembler memcpy Andre McCurdy
2019-09-13 18:44 ` [PATCH 2/2] Add big-endian " Andre McCurdy
@ 2020-01-15 16:35 ` Rich Felker
2020-01-15 18:49 ` Andre McCurdy
1 sibling, 1 reply; 14+ messages in thread
From: Rich Felker @ 2020-01-15 16:35 UTC (permalink / raw)
To: musl
On Fri, Sep 13, 2019 at 11:44:31AM -0700, Andre McCurdy wrote:
> For Thumb2 compatibility, replace two instances of a single
> instruction "orr with a variable shift" with the two instruction
> equivalent. Neither of the replacements are in a performance critical
> loop.
> ---
> src/string/arm/memcpy.c | 2 +-
> src/string/arm/memcpy_le.S | 17 ++++++++++-------
> 2 files changed, 11 insertions(+), 8 deletions(-)
>
> diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c
> index f703c9bd..041614f4 100644
> --- a/src/string/arm/memcpy.c
> +++ b/src/string/arm/memcpy.c
> @@ -1,3 +1,3 @@
> -#if __ARMEB__ || __thumb__
> +#if __ARMEB__
> #include "../memcpy.c"
> #endif
> diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
> index 9cfbcb2a..64bc5f9e 100644
> --- a/src/string/arm/memcpy_le.S
> +++ b/src/string/arm/memcpy_le.S
> @@ -1,4 +1,4 @@
> -#if !__ARMEB__ && !__thumb__
> +#if !__ARMEB__
>
> /*
> * Copyright (C) 2008 The Android Open Source Project
> @@ -40,8 +40,9 @@
> * This file has been modified from the original for use in musl libc.
> * The main changes are: addition of .type memcpy,%function to make the
> * code safely callable from thumb mode, adjusting the return
> - * instructions to be compatible with pre-thumb ARM cpus, and removal
> - * of prefetch code that is not compatible with older cpus.
> + * instructions to be compatible with pre-thumb ARM cpus, removal of
> + * prefetch code that is not compatible with older cpus and support for
> + * building as thumb 2.
> */
>
> .syntax unified
> @@ -241,8 +242,9 @@ non_congruent:
> beq 2f
> ldr r5, [r1], #4
> sub r2, r2, #4
> - orr r4, r3, r5, lsl lr
> - mov r3, r5, lsr r12
> + mov r4, r5, lsl lr
> + orr r4, r4, r3
> + mov r3, r5, lsr r12
> str r4, [r0], #4
> cmp r2, #4
> bhs 1b
This is outside of loops and not a hot path,
> @@ -348,8 +350,9 @@ less_than_thirtytwo:
>
> 1: ldr r5, [r1], #4
> sub r2, r2, #4
> - orr r4, r3, r5, lsl lr
> - mov r3, r5, lsr r12
> + mov r4, r5, lsl lr
> + orr r4, r4, r3
> + mov r3, r5, lsr r12
> str r4, [r0], #4
> cmp r2, #4
> bhs 1b
This one is in a loop, but perhaps not terribly critical to
performance. We could keep old version with #if !__thumb__ but I doubt
it matters, and it looks like hardly anyone is using pre-thumb2 ARM
anymore anyway; a show-stopping bug went uncaught for over a year in
other things for v6.
One cosmetic fix I'd like to make when applying this is keeping the
old gratuitously-ugly formatting just so the actual change isn't
obscured by the formatting-only change on an adjacent line. I can
handle that though.
Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [musl] [PATCH 2/2] Add big-endian support to ARM assembler memcpy
2020-01-15 15:45 ` [musl] " Rich Felker
@ 2020-01-15 18:41 ` Andre McCurdy
2020-01-15 19:22 ` Rich Felker
2020-01-16 15:21 ` Natanael Copa
0 siblings, 2 replies; 14+ messages in thread
From: Andre McCurdy @ 2020-01-15 18:41 UTC (permalink / raw)
To: musl
On Wed, Jan 15, 2020 at 7:46 AM Rich Felker <dalias@libc.org> wrote:
> On Fri, Sep 13, 2019 at 01:38:34PM -0700, Andre McCurdy wrote:
> > On Fri, Sep 13, 2019 at 11:59 AM Rich Felker <dalias@libc.org> wrote:
> > > On Fri, Sep 13, 2019 at 11:44:32AM -0700, Andre McCurdy wrote:
> > > > Allow the existing ARM assembler memcpy implementation to be used for
> > > > both big and little endian targets.
> > >
> > > Nice. I don't want to merge this just before release, but as long as
> > > it looks ok I should be able to review and merge it afterward.
> > >
> > > Note that I'd really like to replace this giant file with C using
> > > inline asm just for the inner block copies and C for all the flow
> > > control, but I don't mind merging this first as long as it's correct.
> >
> > Sounds good. I'll wait for your feedback after the upcoming release.
>
> Sorry this dropped off my radar. I'd like to merge at least the thumb
> part since it's simple enough to review quickly and users have
> actually complained about memcpy being slow on armv7 with -mthumb as
> default.
Interesting. I wonder what the reference was against which the musl C
code was compared? From my own benchmarking I didn't find the musl
assembler to be much faster than the C code. There are armv6 and maybe
early armv7 CPUs where explicit prefetch instructions make a huge
difference (much more so than C -vs- assembler). Did the users who
complained about musl memcpy() compare against a memcpy() which uses
prefetch? For armv7 using NEON may help, although the latest armv7
cores seem to perform very well with plain old C code too. There are
lots of trade offs so it's impossible for a single implementation to
be universally optimal. The "arm-mem" routines used on Raspberry Pi
seem to be a very fast for many targets, but unfortunately the armv6
memcpy generates mis-aligned accesses so isn't suitable for armv5.
https://github.com/bavison/arm-mem/
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [musl] [PATCH 1/2] Add Thumb2 support to ARM assembler memcpy
2020-01-15 16:35 ` [musl] [PATCH 1/2] Add Thumb2 " Rich Felker
@ 2020-01-15 18:49 ` Andre McCurdy
2020-01-15 19:24 ` Rich Felker
0 siblings, 1 reply; 14+ messages in thread
From: Andre McCurdy @ 2020-01-15 18:49 UTC (permalink / raw)
To: musl
On Wed, Jan 15, 2020 at 8:36 AM Rich Felker <dalias@libc.org> wrote:
> On Fri, Sep 13, 2019 at 11:44:31AM -0700, Andre McCurdy wrote:
> > For Thumb2 compatibility, replace two instances of a single
> > instruction "orr with a variable shift" with the two instruction
> > equivalent. Neither of the replacements are in a performance critical
> > loop.
> > ---
> > src/string/arm/memcpy.c | 2 +-
> > src/string/arm/memcpy_le.S | 17 ++++++++++-------
> > 2 files changed, 11 insertions(+), 8 deletions(-)
> >
> > diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c
> > index f703c9bd..041614f4 100644
> > --- a/src/string/arm/memcpy.c
> > +++ b/src/string/arm/memcpy.c
> > @@ -1,3 +1,3 @@
> > -#if __ARMEB__ || __thumb__
> > +#if __ARMEB__
> > #include "../memcpy.c"
> > #endif
> > diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
> > index 9cfbcb2a..64bc5f9e 100644
> > --- a/src/string/arm/memcpy_le.S
> > +++ b/src/string/arm/memcpy_le.S
> > @@ -1,4 +1,4 @@
> > -#if !__ARMEB__ && !__thumb__
> > +#if !__ARMEB__
> >
> > /*
> > * Copyright (C) 2008 The Android Open Source Project
> > @@ -40,8 +40,9 @@
> > * This file has been modified from the original for use in musl libc.
> > * The main changes are: addition of .type memcpy,%function to make the
> > * code safely callable from thumb mode, adjusting the return
> > - * instructions to be compatible with pre-thumb ARM cpus, and removal
> > - * of prefetch code that is not compatible with older cpus.
> > + * instructions to be compatible with pre-thumb ARM cpus, removal of
> > + * prefetch code that is not compatible with older cpus and support for
> > + * building as thumb 2.
> > */
> >
> > .syntax unified
> > @@ -241,8 +242,9 @@ non_congruent:
> > beq 2f
> > ldr r5, [r1], #4
> > sub r2, r2, #4
> > - orr r4, r3, r5, lsl lr
> > - mov r3, r5, lsr r12
> > + mov r4, r5, lsl lr
> > + orr r4, r4, r3
> > + mov r3, r5, lsr r12
> > str r4, [r0], #4
> > cmp r2, #4
> > bhs 1b
>
> This is outside of loops and not a hot path,
>
> > @@ -348,8 +350,9 @@ less_than_thirtytwo:
> >
> > 1: ldr r5, [r1], #4
> > sub r2, r2, #4
> > - orr r4, r3, r5, lsl lr
> > - mov r3, r5, lsr r12
> > + mov r4, r5, lsl lr
> > + orr r4, r4, r3
> > + mov r3, r5, lsr r12
> > str r4, [r0], #4
> > cmp r2, #4
> > bhs 1b
>
> This one is in a loop, but perhaps not terribly critical to
> performance.
Yes, it's in a loop, but I can confirm it's not a performance critical one.
> We could keep old version with #if !__thumb__ but I doubt
> it matters, and it looks like hardly anyone is using pre-thumb2 ARM
> anymore anyway; a show-stopping bug went uncaught for over a year in
> other things for v6.
I was meaning to ask about that after seeing your recent commit in
master. My primary target is pre-thumb2 armv6 and I hadn't noticed any
problems...
> One cosmetic fix I'd like to make when applying this is keeping the
> old gratuitously-ugly formatting just so the actual change isn't
> obscured by the formatting-only change on an adjacent line. I can
> handle that though.
>
> Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [musl] [PATCH 2/2] Add big-endian support to ARM assembler memcpy
2020-01-15 18:41 ` Andre McCurdy
@ 2020-01-15 19:22 ` Rich Felker
2020-01-15 20:54 ` Andre McCurdy
2020-01-16 15:21 ` Natanael Copa
1 sibling, 1 reply; 14+ messages in thread
From: Rich Felker @ 2020-01-15 19:22 UTC (permalink / raw)
To: musl
On Wed, Jan 15, 2020 at 10:41:08AM -0800, Andre McCurdy wrote:
> On Wed, Jan 15, 2020 at 7:46 AM Rich Felker <dalias@libc.org> wrote:
> > On Fri, Sep 13, 2019 at 01:38:34PM -0700, Andre McCurdy wrote:
> > > On Fri, Sep 13, 2019 at 11:59 AM Rich Felker <dalias@libc.org> wrote:
> > > > On Fri, Sep 13, 2019 at 11:44:32AM -0700, Andre McCurdy wrote:
> > > > > Allow the existing ARM assembler memcpy implementation to be used for
> > > > > both big and little endian targets.
> > > >
> > > > Nice. I don't want to merge this just before release, but as long as
> > > > it looks ok I should be able to review and merge it afterward.
> > > >
> > > > Note that I'd really like to replace this giant file with C using
> > > > inline asm just for the inner block copies and C for all the flow
> > > > control, but I don't mind merging this first as long as it's correct.
> > >
> > > Sounds good. I'll wait for your feedback after the upcoming release.
> >
> > Sorry this dropped off my radar. I'd like to merge at least the thumb
> > part since it's simple enough to review quickly and users have
> > actually complained about memcpy being slow on armv7 with -mthumb as
> > default.
>
> Interesting. I wonder what the reference was against which the musl C
> code was compared? From my own benchmarking I didn't find the musl
> assembler to be much faster than the C code. There are armv6 and maybe
> early armv7 CPUs where explicit prefetch instructions make a huge
> difference (much more so than C -vs- assembler). Did the users who
> complained about musl memcpy() compare against a memcpy() which uses
> prefetch? For armv7 using NEON may help, although the latest armv7
> cores seem to perform very well with plain old C code too. There are
> lots of trade offs so it's impossible for a single implementation to
> be universally optimal. The "arm-mem" routines used on Raspberry Pi
> seem to be a very fast for many targets, but unfortunately the armv6
> memcpy generates mis-aligned accesses so isn't suitable for armv5.
>
> https://github.com/bavison/arm-mem/
I'm not sure of the details but the comparison was just between the
armv6 version of Alpine and the armv7 version (so using musl's
memcpy_le.S vs memcpy.c).
Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [musl] [PATCH 1/2] Add Thumb2 support to ARM assembler memcpy
2020-01-15 18:49 ` Andre McCurdy
@ 2020-01-15 19:24 ` Rich Felker
2020-01-15 20:20 ` Andre McCurdy
0 siblings, 1 reply; 14+ messages in thread
From: Rich Felker @ 2020-01-15 19:24 UTC (permalink / raw)
To: musl
On Wed, Jan 15, 2020 at 10:49:03AM -0800, Andre McCurdy wrote:
> On Wed, Jan 15, 2020 at 8:36 AM Rich Felker <dalias@libc.org> wrote:
> > On Fri, Sep 13, 2019 at 11:44:31AM -0700, Andre McCurdy wrote:
> > > For Thumb2 compatibility, replace two instances of a single
> > > instruction "orr with a variable shift" with the two instruction
> > > equivalent. Neither of the replacements are in a performance critical
> > > loop.
> > > ---
> > > src/string/arm/memcpy.c | 2 +-
> > > src/string/arm/memcpy_le.S | 17 ++++++++++-------
> > > 2 files changed, 11 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c
> > > index f703c9bd..041614f4 100644
> > > --- a/src/string/arm/memcpy.c
> > > +++ b/src/string/arm/memcpy.c
> > > @@ -1,3 +1,3 @@
> > > -#if __ARMEB__ || __thumb__
> > > +#if __ARMEB__
> > > #include "../memcpy.c"
> > > #endif
> > > diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
> > > index 9cfbcb2a..64bc5f9e 100644
> > > --- a/src/string/arm/memcpy_le.S
> > > +++ b/src/string/arm/memcpy_le.S
> > > @@ -1,4 +1,4 @@
> > > -#if !__ARMEB__ && !__thumb__
> > > +#if !__ARMEB__
> > >
> > > /*
> > > * Copyright (C) 2008 The Android Open Source Project
> > > @@ -40,8 +40,9 @@
> > > * This file has been modified from the original for use in musl libc.
> > > * The main changes are: addition of .type memcpy,%function to make the
> > > * code safely callable from thumb mode, adjusting the return
> > > - * instructions to be compatible with pre-thumb ARM cpus, and removal
> > > - * of prefetch code that is not compatible with older cpus.
> > > + * instructions to be compatible with pre-thumb ARM cpus, removal of
> > > + * prefetch code that is not compatible with older cpus and support for
> > > + * building as thumb 2.
> > > */
> > >
> > > .syntax unified
> > > @@ -241,8 +242,9 @@ non_congruent:
> > > beq 2f
> > > ldr r5, [r1], #4
> > > sub r2, r2, #4
> > > - orr r4, r3, r5, lsl lr
> > > - mov r3, r5, lsr r12
> > > + mov r4, r5, lsl lr
> > > + orr r4, r4, r3
> > > + mov r3, r5, lsr r12
> > > str r4, [r0], #4
> > > cmp r2, #4
> > > bhs 1b
> >
> > This is outside of loops and not a hot path,
> >
> > > @@ -348,8 +350,9 @@ less_than_thirtytwo:
> > >
> > > 1: ldr r5, [r1], #4
> > > sub r2, r2, #4
> > > - orr r4, r3, r5, lsl lr
> > > - mov r3, r5, lsr r12
> > > + mov r4, r5, lsl lr
> > > + orr r4, r4, r3
> > > + mov r3, r5, lsr r12
> > > str r4, [r0], #4
> > > cmp r2, #4
> > > bhs 1b
> >
> > This one is in a loop, but perhaps not terribly critical to
> > performance.
>
> Yes, it's in a loop, but I can confirm it's not a performance critical one.
Thanks.
> > We could keep old version with #if !__thumb__ but I doubt
> > it matters, and it looks like hardly anyone is using pre-thumb2 ARM
> > anymore anyway; a show-stopping bug went uncaught for over a year in
> > other things for v6.
>
> I was meaning to ask about that after seeing your recent commit in
> master. My primary target is pre-thumb2 armv6 and I hadn't noticed any
> problems...
I wonder if there was some magical mechanism by which the anticipated
crash failed to trigger. It certainly triggered in the other affected
arch, sh2, though. If you happen to look at it and find out what was
going on, let us know on the list.
Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [musl] [PATCH 1/2] Add Thumb2 support to ARM assembler memcpy
2020-01-15 19:24 ` Rich Felker
@ 2020-01-15 20:20 ` Andre McCurdy
2020-01-15 20:38 ` Rich Felker
0 siblings, 1 reply; 14+ messages in thread
From: Andre McCurdy @ 2020-01-15 20:20 UTC (permalink / raw)
To: musl
On Wed, Jan 15, 2020 at 11:24 AM Rich Felker <dalias@libc.org> wrote:
> On Wed, Jan 15, 2020 at 10:49:03AM -0800, Andre McCurdy wrote:
> > On Wed, Jan 15, 2020 at 8:36 AM Rich Felker <dalias@libc.org> wrote:
> > > On Fri, Sep 13, 2019 at 11:44:31AM -0700, Andre McCurdy wrote:
> > > > For Thumb2 compatibility, replace two instances of a single
> > > > instruction "orr with a variable shift" with the two instruction
> > > > equivalent. Neither of the replacements are in a performance critical
> > > > loop.
> > > > ---
> > > > src/string/arm/memcpy.c | 2 +-
> > > > src/string/arm/memcpy_le.S | 17 ++++++++++-------
> > > > 2 files changed, 11 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c
> > > > index f703c9bd..041614f4 100644
> > > > --- a/src/string/arm/memcpy.c
> > > > +++ b/src/string/arm/memcpy.c
> > > > @@ -1,3 +1,3 @@
> > > > -#if __ARMEB__ || __thumb__
> > > > +#if __ARMEB__
> > > > #include "../memcpy.c"
> > > > #endif
> > > > diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
> > > > index 9cfbcb2a..64bc5f9e 100644
> > > > --- a/src/string/arm/memcpy_le.S
> > > > +++ b/src/string/arm/memcpy_le.S
> > > > @@ -1,4 +1,4 @@
> > > > -#if !__ARMEB__ && !__thumb__
> > > > +#if !__ARMEB__
> > > >
> > > > /*
> > > > * Copyright (C) 2008 The Android Open Source Project
> > > > @@ -40,8 +40,9 @@
> > > > * This file has been modified from the original for use in musl libc.
> > > > * The main changes are: addition of .type memcpy,%function to make the
> > > > * code safely callable from thumb mode, adjusting the return
> > > > - * instructions to be compatible with pre-thumb ARM cpus, and removal
> > > > - * of prefetch code that is not compatible with older cpus.
> > > > + * instructions to be compatible with pre-thumb ARM cpus, removal of
> > > > + * prefetch code that is not compatible with older cpus and support for
> > > > + * building as thumb 2.
> > > > */
> > > >
> > > > .syntax unified
> > > > @@ -241,8 +242,9 @@ non_congruent:
> > > > beq 2f
> > > > ldr r5, [r1], #4
> > > > sub r2, r2, #4
> > > > - orr r4, r3, r5, lsl lr
> > > > - mov r3, r5, lsr r12
> > > > + mov r4, r5, lsl lr
> > > > + orr r4, r4, r3
> > > > + mov r3, r5, lsr r12
> > > > str r4, [r0], #4
> > > > cmp r2, #4
> > > > bhs 1b
> > >
> > > This is outside of loops and not a hot path,
> > >
> > > > @@ -348,8 +350,9 @@ less_than_thirtytwo:
> > > >
> > > > 1: ldr r5, [r1], #4
> > > > sub r2, r2, #4
> > > > - orr r4, r3, r5, lsl lr
> > > > - mov r3, r5, lsr r12
> > > > + mov r4, r5, lsl lr
> > > > + orr r4, r4, r3
> > > > + mov r3, r5, lsr r12
> > > > str r4, [r0], #4
> > > > cmp r2, #4
> > > > bhs 1b
> > >
> > > This one is in a loop, but perhaps not terribly critical to
> > > performance.
> >
> > Yes, it's in a loop, but I can confirm it's not a performance critical one.
>
> Thanks.
>
> > > We could keep old version with #if !__thumb__ but I doubt
> > > it matters, and it looks like hardly anyone is using pre-thumb2 ARM
> > > anymore anyway; a show-stopping bug went uncaught for over a year in
> > > other things for v6.
> >
> > I was meaning to ask about that after seeing your recent commit in
> > master. My primary target is pre-thumb2 armv6 and I hadn't noticed any
> > problems...
>
> I wonder if there was some magical mechanism by which the anticipated
> crash failed to trigger. It certainly triggered in the other affected
> arch, sh2, though. If you happen to look at it and find out what was
> going on, let us know on the list.
For ARM, testing libc.auxv is guarded by a test on __hwcap, so if
__hwcap is also being initialised after __set_thread_area() then
__set_thread_area() will never access libc.auxv ?
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [musl] [PATCH 1/2] Add Thumb2 support to ARM assembler memcpy
2020-01-15 20:20 ` Andre McCurdy
@ 2020-01-15 20:38 ` Rich Felker
0 siblings, 0 replies; 14+ messages in thread
From: Rich Felker @ 2020-01-15 20:38 UTC (permalink / raw)
To: musl
On Wed, Jan 15, 2020 at 12:20:08PM -0800, Andre McCurdy wrote:
> On Wed, Jan 15, 2020 at 11:24 AM Rich Felker <dalias@libc.org> wrote:
> > On Wed, Jan 15, 2020 at 10:49:03AM -0800, Andre McCurdy wrote:
> > > On Wed, Jan 15, 2020 at 8:36 AM Rich Felker <dalias@libc.org> wrote:
> > > > On Fri, Sep 13, 2019 at 11:44:31AM -0700, Andre McCurdy wrote:
> > > > > For Thumb2 compatibility, replace two instances of a single
> > > > > instruction "orr with a variable shift" with the two instruction
> > > > > equivalent. Neither of the replacements are in a performance critical
> > > > > loop.
> > > > > ---
> > > > > src/string/arm/memcpy.c | 2 +-
> > > > > src/string/arm/memcpy_le.S | 17 ++++++++++-------
> > > > > 2 files changed, 11 insertions(+), 8 deletions(-)
> > > > >
> > > > > diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c
> > > > > index f703c9bd..041614f4 100644
> > > > > --- a/src/string/arm/memcpy.c
> > > > > +++ b/src/string/arm/memcpy.c
> > > > > @@ -1,3 +1,3 @@
> > > > > -#if __ARMEB__ || __thumb__
> > > > > +#if __ARMEB__
> > > > > #include "../memcpy.c"
> > > > > #endif
> > > > > diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
> > > > > index 9cfbcb2a..64bc5f9e 100644
> > > > > --- a/src/string/arm/memcpy_le.S
> > > > > +++ b/src/string/arm/memcpy_le.S
> > > > > @@ -1,4 +1,4 @@
> > > > > -#if !__ARMEB__ && !__thumb__
> > > > > +#if !__ARMEB__
> > > > >
> > > > > /*
> > > > > * Copyright (C) 2008 The Android Open Source Project
> > > > > @@ -40,8 +40,9 @@
> > > > > * This file has been modified from the original for use in musl libc.
> > > > > * The main changes are: addition of .type memcpy,%function to make the
> > > > > * code safely callable from thumb mode, adjusting the return
> > > > > - * instructions to be compatible with pre-thumb ARM cpus, and removal
> > > > > - * of prefetch code that is not compatible with older cpus.
> > > > > + * instructions to be compatible with pre-thumb ARM cpus, removal of
> > > > > + * prefetch code that is not compatible with older cpus and support for
> > > > > + * building as thumb 2.
> > > > > */
> > > > >
> > > > > .syntax unified
> > > > > @@ -241,8 +242,9 @@ non_congruent:
> > > > > beq 2f
> > > > > ldr r5, [r1], #4
> > > > > sub r2, r2, #4
> > > > > - orr r4, r3, r5, lsl lr
> > > > > - mov r3, r5, lsr r12
> > > > > + mov r4, r5, lsl lr
> > > > > + orr r4, r4, r3
> > > > > + mov r3, r5, lsr r12
> > > > > str r4, [r0], #4
> > > > > cmp r2, #4
> > > > > bhs 1b
> > > >
> > > > This is outside of loops and not a hot path,
> > > >
> > > > > @@ -348,8 +350,9 @@ less_than_thirtytwo:
> > > > >
> > > > > 1: ldr r5, [r1], #4
> > > > > sub r2, r2, #4
> > > > > - orr r4, r3, r5, lsl lr
> > > > > - mov r3, r5, lsr r12
> > > > > + mov r4, r5, lsl lr
> > > > > + orr r4, r4, r3
> > > > > + mov r3, r5, lsr r12
> > > > > str r4, [r0], #4
> > > > > cmp r2, #4
> > > > > bhs 1b
> > > >
> > > > This one is in a loop, but perhaps not terribly critical to
> > > > performance.
> > >
> > > Yes, it's in a loop, but I can confirm it's not a performance critical one.
> >
> > Thanks.
> >
> > > > We could keep old version with #if !__thumb__ but I doubt
> > > > it matters, and it looks like hardly anyone is using pre-thumb2 ARM
> > > > anymore anyway; a show-stopping bug went uncaught for over a year in
> > > > other things for v6.
> > >
> > > I was meaning to ask about that after seeing your recent commit in
> > > master. My primary target is pre-thumb2 armv6 and I hadn't noticed any
> > > problems...
> >
> > I wonder if there was some magical mechanism by which the anticipated
> > crash failed to trigger. It certainly triggered in the other affected
> > arch, sh2, though. If you happen to look at it and find out what was
> > going on, let us know on the list.
>
> For ARM, testing libc.auxv is guarded by a test on __hwcap, so if
> __hwcap is also being initialised after __set_thread_area() then
> __set_thread_area() will never access libc.auxv ?
Ahh, that makes sense. And that exposes another bug: __hwcap needs to
be initialized here, but isn't. Without it, wrong backends will be
used and it will crash on kernels that rip out kuser_helper for
hardening purposes. I'll fix that too. Thanks for finding this!
Rich
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [musl] [PATCH 2/2] Add big-endian support to ARM assembler memcpy
2020-01-15 19:22 ` Rich Felker
@ 2020-01-15 20:54 ` Andre McCurdy
0 siblings, 0 replies; 14+ messages in thread
From: Andre McCurdy @ 2020-01-15 20:54 UTC (permalink / raw)
To: musl
On Wed, Jan 15, 2020 at 11:22 AM Rich Felker <dalias@libc.org> wrote:
> On Wed, Jan 15, 2020 at 10:41:08AM -0800, Andre McCurdy wrote:
> > On Wed, Jan 15, 2020 at 7:46 AM Rich Felker <dalias@libc.org> wrote:
> > > On Fri, Sep 13, 2019 at 01:38:34PM -0700, Andre McCurdy wrote:
> > > > On Fri, Sep 13, 2019 at 11:59 AM Rich Felker <dalias@libc.org> wrote:
> > > > > On Fri, Sep 13, 2019 at 11:44:32AM -0700, Andre McCurdy wrote:
> > > > > > Allow the existing ARM assembler memcpy implementation to be used for
> > > > > > both big and little endian targets.
> > > > >
> > > > > Nice. I don't want to merge this just before release, but as long as
> > > > > it looks ok I should be able to review and merge it afterward.
> > > > >
> > > > > Note that I'd really like to replace this giant file with C using
> > > > > inline asm just for the inner block copies and C for all the flow
> > > > > control, but I don't mind merging this first as long as it's correct.
> > > >
> > > > Sounds good. I'll wait for your feedback after the upcoming release.
> > >
> > > Sorry this dropped off my radar. I'd like to merge at least the thumb
> > > part since it's simple enough to review quickly and users have
> > > actually complained about memcpy being slow on armv7 with -mthumb as
> > > default.
> >
> > Interesting. I wonder what the reference was against which the musl C
> > code was compared? From my own benchmarking I didn't find the musl
> > assembler to be much faster than the C code. There are armv6 and maybe
> > early armv7 CPUs where explicit prefetch instructions make a huge
> > difference (much more so than C -vs- assembler). Did the users who
> > complained about musl memcpy() compare against a memcpy() which uses
> > prefetch? For armv7 using NEON may help, although the latest armv7
> > cores seem to perform very well with plain old C code too. There are
> > lots of trade offs so it's impossible for a single implementation to
> > be universally optimal. The "arm-mem" routines used on Raspberry Pi
> > seem to be a very fast for many targets, but unfortunately the armv6
> > memcpy generates mis-aligned accesses so isn't suitable for armv5.
> >
> > https://github.com/bavison/arm-mem/
>
> I'm not sure of the details but the comparison was just between the
> armv6 version of Alpine and the armv7 version (so using musl's
> memcpy_le.S vs memcpy.c).
OK. A comparison with the arm-mem version would be interesting too.
The arm-mem library is designed to be preloaded (ahead of glibc for
Raspberry Pi distros) so it should be possible to preload ahead of
musl on Alpine, which makes testing slightly easier.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [musl] [PATCH 2/2] Add big-endian support to ARM assembler memcpy
2020-01-15 18:41 ` Andre McCurdy
2020-01-15 19:22 ` Rich Felker
@ 2020-01-16 15:21 ` Natanael Copa
1 sibling, 0 replies; 14+ messages in thread
From: Natanael Copa @ 2020-01-16 15:21 UTC (permalink / raw)
To: Andre McCurdy; +Cc: musl
On Wed, 15 Jan 2020 10:41:08 -0800
Andre McCurdy <armccurdy@gmail.com> wrote:
> On Wed, Jan 15, 2020 at 7:46 AM Rich Felker <dalias@libc.org> wrote:
> > On Fri, Sep 13, 2019 at 01:38:34PM -0700, Andre McCurdy wrote:
> > > On Fri, Sep 13, 2019 at 11:59 AM Rich Felker <dalias@libc.org> wrote:
> > > > On Fri, Sep 13, 2019 at 11:44:32AM -0700, Andre McCurdy wrote:
> > > > > Allow the existing ARM assembler memcpy implementation to be used for
> > > > > both big and little endian targets.
> > > >
> > > > Nice. I don't want to merge this just before release, but as long as
> > > > it looks ok I should be able to review and merge it afterward.
> > > >
> > > > Note that I'd really like to replace this giant file with C using
> > > > inline asm just for the inner block copies and C for all the flow
> > > > control, but I don't mind merging this first as long as it's correct.
> > >
> > > Sounds good. I'll wait for your feedback after the upcoming release.
> >
> > Sorry this dropped off my radar. I'd like to merge at least the thumb
> > part since it's simple enough to review quickly and users have
> > actually complained about memcpy being slow on armv7 with -mthumb as
> > default.
>
> Interesting. I wonder what the reference was against which the musl C
> code was compared? From my own benchmarking I didn't find the musl
> assembler to be much faster than the C code. There are armv6 and maybe
> early armv7 CPUs where explicit prefetch instructions make a huge
> difference (much more so than C -vs- assembler). Did the users who
> complained about musl memcpy() compare against a memcpy() which uses
> prefetch? For armv7 using NEON may help, although the latest armv7
> cores seem to perform very well with plain old C code too. There are
> lots of trade offs so it's impossible for a single implementation to
> be universally optimal. The "arm-mem" routines used on Raspberry Pi
> seem to be a very fast for many targets, but unfortunately the armv6
> memcpy generates mis-aligned accesses so isn't suitable for armv5.
>
> https://github.com/bavison/arm-mem/
The Alpine user reported it here:
https://gitlab.alpinelinux.org/alpine/aports/issues/11128
I don't know if you got the __builtin_memcpy or the libc version. I do
know that qemu once got surprised that `memcpy` used libc's non-atomic
version instead of gcc's atomic __builtin_memcpy. This happened due to
alpine users fortify-headers as FORTIFY_SOURCE implementation.
Not sure if something similar happened here.
-nc
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2020-01-16 15:21 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-13 18:44 [PATCH 1/2] Add Thumb2 support to ARM assembler memcpy Andre McCurdy
2019-09-13 18:44 ` [PATCH 2/2] Add big-endian " Andre McCurdy
2019-09-13 18:59 ` Rich Felker
2019-09-13 20:38 ` Andre McCurdy
2020-01-15 15:45 ` [musl] " Rich Felker
2020-01-15 18:41 ` Andre McCurdy
2020-01-15 19:22 ` Rich Felker
2020-01-15 20:54 ` Andre McCurdy
2020-01-16 15:21 ` Natanael Copa
2020-01-15 16:35 ` [musl] [PATCH 1/2] Add Thumb2 " Rich Felker
2020-01-15 18:49 ` Andre McCurdy
2020-01-15 19:24 ` Rich Felker
2020-01-15 20:20 ` Andre McCurdy
2020-01-15 20:38 ` Rich Felker
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/musl/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).