* build musl for armv7m
@ 2016-06-14 8:49 weimingz
2016-06-14 13:00 ` Rich Felker
2016-06-14 14:38 ` Rich Felker
0 siblings, 2 replies; 21+ messages in thread
From: weimingz @ 2016-06-14 8:49 UTC (permalink / raw)
To: musl
[-- Attachment #1: Type: text/plain, Size: 385 bytes --]
Hi,
I'm building MUSL with -mcpu=cortex-m3. There are a few .s files that
cannot be assembled because: (1) use predicated instructions without IT
instr (2) use sp inside reg list in ldmia/stmia.
Please help to review the attached patch.
Also, is there any easy way of disabling string/arm/memcpy_le.S ? For
baremetal, unaligned access may be unavailable.
Thanks,
Weiming
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-fix-asm-for-cortex-m3.patch --]
[-- Type: text/x-diff; name=0001-fix-asm-for-cortex-m3.patch, Size: 2137 bytes --]
From c8fafd75832b0974be59429c6d59b5c19b535c45 Mon Sep 17 00:00:00 2001
From: Weiming Zhao <weimingz@codeaurora.org>
Date: Tue, 14 Jun 2016 01:36:16 -0700
Subject: [PATCH] fix asm for cortex-m3
---
src/setjmp/arm/longjmp.s | 5 ++++-
src/setjmp/arm/setjmp.s | 4 +++-
src/thread/arm/atomics.s | 3 +++
src/thread/arm/syscall_cp.s | 1 +
4 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/src/setjmp/arm/longjmp.s b/src/setjmp/arm/longjmp.s
index e28d8f3..5c85cba 100644
--- a/src/setjmp/arm/longjmp.s
+++ b/src/setjmp/arm/longjmp.s
@@ -7,8 +7,11 @@ _longjmp:
longjmp:
mov ip,r0
movs r0,r1
+ it eq
moveq r0,#1
- ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
+ ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp}
+ ldr sp, [ip]!
+ ldr lr, [ip]!
adr r1,1f
ldr r2,1f
diff --git a/src/setjmp/arm/setjmp.s b/src/setjmp/arm/setjmp.s
index 8779163..f13068a 100644
--- a/src/setjmp/arm/setjmp.s
+++ b/src/setjmp/arm/setjmp.s
@@ -9,7 +9,9 @@ __setjmp:
_setjmp:
setjmp:
mov ip,r0
- stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
+ stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp}
+ str sp, [ip]!
+ str lr, [ip]!
mov r0,#0
adr r1,1f
diff --git a/src/thread/arm/atomics.s b/src/thread/arm/atomics.s
index 673fc03..05fd9b9 100644
--- a/src/thread/arm/atomics.s
+++ b/src/thread/arm/atomics.s
@@ -49,6 +49,7 @@ __a_cas_dummy:
mov r3,r0
ldr r0,[r2]
subs r0,r3,r0
+ it eq
streq r1,[r2]
bx lr
.global __a_cas_v6
@@ -58,6 +59,7 @@ __a_cas_v6:
mcr p15,0,r0,c7,c10,5
1: .word 0xe1920f9f /* ldrex r0,[r2] */
subs r0,r3,r0
+ itt eq
.word 0x01820f91 /* strexeq r0,r1,[r2] */
teqeq r0,#1
beq 1b
@@ -70,6 +72,7 @@ __a_cas_v7:
.word 0xf57ff05b /* dmb ish */
1: .word 0xe1920f9f /* ldrex r0,[r2] */
subs r0,r3,r0
+ itt eq
.word 0x01820f91 /* strexeq r0,r1,[r2] */
teqeq r0,#1
beq 1b
diff --git a/src/thread/arm/syscall_cp.s b/src/thread/arm/syscall_cp.s
index a5730c0..399700b 100644
--- a/src/thread/arm/syscall_cp.s
+++ b/src/thread/arm/syscall_cp.s
@@ -15,6 +15,7 @@ __syscall_cp_asm:
__cp_begin:
ldr r0,[r0]
cmp r0,#0
+ it ne
blne __cp_cancel
mov r7,r1
mov r0,r2
2.8.4
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-14 8:49 build musl for armv7m weimingz
@ 2016-06-14 13:00 ` Rich Felker
2016-06-14 16:12 ` Zhao, Weiming
2016-06-14 14:38 ` Rich Felker
1 sibling, 1 reply; 21+ messages in thread
From: Rich Felker @ 2016-06-14 13:00 UTC (permalink / raw)
To: musl
On Tue, Jun 14, 2016 at 01:49:40AM -0700, weimingz@codeaurora.org wrote:
> Hi,
>
> I'm building MUSL with -mcpu=cortex-m3. There are a few .s files
> that cannot be assembled because: (1) use predicated instructions
> without IT instr (2) use sp inside reg list in ldmia/stmia.
>
> Please help to review the attached patch.
Did you test anything? These patches do not result in working code;
they just make it assemble without errors. There's already a gas
option to automatically add IT instructions where needed for
thumb-only targets, but that's not the only thing needed to support
thumb-only/cortex-m. I'd be interested in knowing more about the setup
you're trying to target. Is it Linux or bare-metal? If Linux, are you
going to use the ARM/FDPIC toolchain & kernel mods? I'm about to leave
at the moment but I'll follow up with a more detailed review of your
patch later.
> Also, is there any easy way of disabling string/arm/memcpy_le.S ?
> For baremetal, unaligned access may be unavailable.
That file does not perform any unaligned access. It should work on any
EABI-supported version of the arm instruction set.
Rich
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-14 8:49 build musl for armv7m weimingz
2016-06-14 13:00 ` Rich Felker
@ 2016-06-14 14:38 ` Rich Felker
2016-06-14 16:35 ` Zhao, Weiming
1 sibling, 1 reply; 21+ messages in thread
From: Rich Felker @ 2016-06-14 14:38 UTC (permalink / raw)
To: musl
On Tue, Jun 14, 2016 at 01:49:40AM -0700, weimingz@codeaurora.org wrote:
> diff --git a/src/setjmp/arm/longjmp.s b/src/setjmp/arm/longjmp.s
> index e28d8f3..5c85cba 100644
> --- a/src/setjmp/arm/longjmp.s
> +++ b/src/setjmp/arm/longjmp.s
> @@ -7,8 +7,11 @@ _longjmp:
> longjmp:
> mov ip,r0
> movs r0,r1
> + it eq
> moveq r0,#1
> - ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
> + ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp}
> + ldr sp, [ip]!
> + ldr lr, [ip]!
Small note: your indention is inconsistent -- using a single space
instead of a tab.
In my earlier work trying to get an all-thumb build working, I had
tried doing this as:
+ ldmia ip!, {r2,v1,v2,v3,v4,v5,v6,sl,fp,lr}
+ mov sp,r2
which might be marginally faster but of course it reorders the fields
in the jmp_buf. Nothing should be depending on them anyway but I like
your version better I think. Likewise for setjmp.
> diff --git a/src/thread/arm/atomics.s b/src/thread/arm/atomics.s
> index 673fc03..05fd9b9 100644
> --- a/src/thread/arm/atomics.s
> +++ b/src/thread/arm/atomics.s
> @@ -49,6 +49,7 @@ __a_cas_dummy:
> mov r3,r0
> ldr r0,[r2]
> subs r0,r3,r0
> + it eq
> streq r1,[r2]
> bx lr
> .global __a_cas_v6
> @@ -58,6 +59,7 @@ __a_cas_v6:
> mcr p15,0,r0,c7,c10,5
> 1: .word 0xe1920f9f /* ldrex r0,[r2] */
> subs r0,r3,r0
> + itt eq
> .word 0x01820f91 /* strexeq r0,r1,[r2] */
> teqeq r0,#1
> beq 1b
You're ignoring that the .word-encoded instruction is not valid thumb,
and that the two 'eq' conditions are independent. The second one
(teqeq) is using the result of the strexeq, not the result of the
subs. But there are much deeper reasons this file cannot work as thumb
without significant changes: the arithmetic on pc is not
thumb-compatible.
> @@ -70,6 +72,7 @@ __a_cas_v7:
> .word 0xf57ff05b /* dmb ish */
> 1: .word 0xe1920f9f /* ldrex r0,[r2] */
> subs r0,r3,r0
> + itt eq
> .word 0x01820f91 /* strexeq r0,r1,[r2] */
> teqeq r0,#1
> beq 1b
Same here.
There's also the matter of the code to read the thread pointer, which
requires a special CP15 register that cortex-m's lack, I think. I
don't see any alternate way to do this functionality (and it would be
ABI-incompatible anyway) so I think the kernel (or bare-metal
undefined instruction exception trap) needs to just trap and emulate
it.
Rich
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-14 13:00 ` Rich Felker
@ 2016-06-14 16:12 ` Zhao, Weiming
2016-06-14 16:32 ` Szabolcs Nagy
0 siblings, 1 reply; 21+ messages in thread
From: Zhao, Weiming @ 2016-06-14 16:12 UTC (permalink / raw)
To: musl
I haven't did a full test as the functions I modified are not actually
being used.
It is a bare-metal environment, using clang to compile.
Main flags are -mcpu=cortex-m3 -Os -fdata-sections -ffunction-sections
-mno-unaligned-access
Could you please let me know the gas option?
Thanks,
Weiing
On 6/14/2016 6:00 AM, Rich Felker wrote:
> On Tue, Jun 14, 2016 at 01:49:40AM -0700, weimingz@codeaurora.org wrote:
>> Hi,
>>
>> I'm building MUSL with -mcpu=cortex-m3. There are a few .s files
>> that cannot be assembled because: (1) use predicated instructions
>> without IT instr (2) use sp inside reg list in ldmia/stmia.
>>
>> Please help to review the attached patch.
> Did you test anything? These patches do not result in working code;
> they just make it assemble without errors. There's already a gas
> option to automatically add IT instructions where needed for
> thumb-only targets, but that's not the only thing needed to support
> thumb-only/cortex-m. I'd be interested in knowing more about the setup
> you're trying to target. Is it Linux or bare-metal? If Linux, are you
> going to use the ARM/FDPIC toolchain & kernel mods? I'm about to leave
> at the moment but I'll follow up with a more detailed review of your
> patch later.
>
>> Also, is there any easy way of disabling string/arm/memcpy_le.S ?
>> For baremetal, unaligned access may be unavailable.
> That file does not perform any unaligned access. It should work on any
> EABI-supported version of the arm instruction set.
>
> Rich
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-14 16:12 ` Zhao, Weiming
@ 2016-06-14 16:32 ` Szabolcs Nagy
2016-06-14 16:58 ` Zhao, Weiming
0 siblings, 1 reply; 21+ messages in thread
From: Szabolcs Nagy @ 2016-06-14 16:32 UTC (permalink / raw)
To: musl
* Zhao, Weiming <weimingz@codeaurora.org> [2016-06-14 09:12:17 -0700]:
> I haven't did a full test as the functions I modified are not actually being
> used.
>
> It is a bare-metal environment, using clang to compile.
>
> Main flags are -mcpu=cortex-m3 -Os -fdata-sections -ffunction-sections
> -mno-unaligned-access
>
> Could you please let me know the gas option?
>
-mimplicit-it=always
https://sourceware.org/binutils/docs/as/ARM-Options.html
> Thanks,
>
> Weiing
>
> On 6/14/2016 6:00 AM, Rich Felker wrote:
> >On Tue, Jun 14, 2016 at 01:49:40AM -0700, weimingz@codeaurora.org wrote:
> >>Hi,
> >>
> >>I'm building MUSL with -mcpu=cortex-m3. There are a few .s files
> >>that cannot be assembled because: (1) use predicated instructions
> >>without IT instr (2) use sp inside reg list in ldmia/stmia.
> >>
> >>Please help to review the attached patch.
> >Did you test anything? These patches do not result in working code;
> >they just make it assemble without errors. There's already a gas
> >option to automatically add IT instructions where needed for
> >thumb-only targets, but that's not the only thing needed to support
> >thumb-only/cortex-m. I'd be interested in knowing more about the setup
> >you're trying to target. Is it Linux or bare-metal? If Linux, are you
> >going to use the ARM/FDPIC toolchain & kernel mods? I'm about to leave
> >at the moment but I'll follow up with a more detailed review of your
> >patch later.
> >
> >>Also, is there any easy way of disabling string/arm/memcpy_le.S ?
> >>For baremetal, unaligned access may be unavailable.
> >That file does not perform any unaligned access. It should work on any
> >EABI-supported version of the arm instruction set.
> >
> >Rich
>
> --
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-14 14:38 ` Rich Felker
@ 2016-06-14 16:35 ` Zhao, Weiming
0 siblings, 0 replies; 21+ messages in thread
From: Zhao, Weiming @ 2016-06-14 16:35 UTC (permalink / raw)
To: musl
I'll fix the indention.
Why do we use .word encoded instruction? Clang's integrated-as can
encode the instruction correctly. Is that some workaround with some
compiler?
Regarding " The second one(teqeq) is using the result of the strexeq,
not the result of thesubs.":
strexeq won't change flag register:
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0552a/BABFFBJB.html
If it would, the execution of teqeq will depend on the conditionally
executed "strexeq", is that expected?
Thanks,
Weiming
On 6/14/2016 7:38 AM, Rich Felker wrote:
> On Tue, Jun 14, 2016 at 01:49:40AM -0700, weimingz@codeaurora.org wrote:
>> diff --git a/src/setjmp/arm/longjmp.s b/src/setjmp/arm/longjmp.s
>> index e28d8f3..5c85cba 100644
>> --- a/src/setjmp/arm/longjmp.s
>> +++ b/src/setjmp/arm/longjmp.s
>> @@ -7,8 +7,11 @@ _longjmp:
>> longjmp:
>> mov ip,r0
>> movs r0,r1
>> + it eq
>> moveq r0,#1
>> - ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
>> + ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp}
>> + ldr sp, [ip]!
>> + ldr lr, [ip]!
> Small note: your indention is inconsistent -- using a single space
> instead of a tab.
>
> In my earlier work trying to get an all-thumb build working, I had
> tried doing this as:
>
> + ldmia ip!, {r2,v1,v2,v3,v4,v5,v6,sl,fp,lr}
> + mov sp,r2
>
> which might be marginally faster but of course it reorders the fields
> in the jmp_buf. Nothing should be depending on them anyway but I like
> your version better I think. Likewise for setjmp.
>
>> diff --git a/src/thread/arm/atomics.s b/src/thread/arm/atomics.s
>> index 673fc03..05fd9b9 100644
>> --- a/src/thread/arm/atomics.s
>> +++ b/src/thread/arm/atomics.s
>> @@ -49,6 +49,7 @@ __a_cas_dummy:
>> mov r3,r0
>> ldr r0,[r2]
>> subs r0,r3,r0
>> + it eq
>> streq r1,[r2]
>> bx lr
>> .global __a_cas_v6
>> @@ -58,6 +59,7 @@ __a_cas_v6:
>> mcr p15,0,r0,c7,c10,5
>> 1: .word 0xe1920f9f /* ldrex r0,[r2] */
>> subs r0,r3,r0
>> + itt eq
>> .word 0x01820f91 /* strexeq r0,r1,[r2] */
>> teqeq r0,#1
>> beq 1b
> You're ignoring that the .word-encoded instruction is not valid thumb,
> and that the two 'eq' conditions are independent. The second one
> (teqeq) is using the result of the strexeq, not the result of the
> subs. But there are much deeper reasons this file cannot work as thumb
> without significant changes: the arithmetic on pc is not
> thumb-compatible.
>
>> @@ -70,6 +72,7 @@ __a_cas_v7:
>> .word 0xf57ff05b /* dmb ish */
>> 1: .word 0xe1920f9f /* ldrex r0,[r2] */
>> subs r0,r3,r0
>> + itt eq
>> .word 0x01820f91 /* strexeq r0,r1,[r2] */
>> teqeq r0,#1
>> beq 1b
> Same here.
>
> There's also the matter of the code to read the thread pointer, which
> requires a special CP15 register that cortex-m's lack, I think. I
> don't see any alternate way to do this functionality (and it would be
> ABI-incompatible anyway) so I think the kernel (or bare-metal
> undefined instruction exception trap) needs to just trap and emulate
> it.
>
> Rich
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-14 16:32 ` Szabolcs Nagy
@ 2016-06-14 16:58 ` Zhao, Weiming
2016-06-14 17:40 ` Zhao, Weiming
0 siblings, 1 reply; 21+ messages in thread
From: Zhao, Weiming @ 2016-06-14 16:58 UTC (permalink / raw)
To: musl
Thank you, Szabolcs.
It's GAS-only flag, clang's integrated-as doesn't support it. I can use
-no-integrated-as.
It also expose other errors:
src/thread/arm/atomics.s:9: Error: cannot use register index with
PC-relative addressing -- `ldr ip,[pc,ip]'
Clang's integrated-as accepts it but seems encoded incorrectly.
I can fix it.
On 6/14/2016 9:32 AM, Szabolcs Nagy wrote:
> * Zhao, Weiming <weimingz@codeaurora.org> [2016-06-14 09:12:17 -0700]:
>> I haven't did a full test as the functions I modified are not actually being
>> used.
>>
>> It is a bare-metal environment, using clang to compile.
>>
>> Main flags are -mcpu=cortex-m3 -Os -fdata-sections -ffunction-sections
>> -mno-unaligned-access
>>
>> Could you please let me know the gas option?
>>
> -mimplicit-it=always
>
> https://sourceware.org/binutils/docs/as/ARM-Options.html
>
>> Thanks,
>>
>> Weiing
>>
>> On 6/14/2016 6:00 AM, Rich Felker wrote:
>>> On Tue, Jun 14, 2016 at 01:49:40AM -0700, weimingz@codeaurora.org wrote:
>>>> Hi,
>>>>
>>>> I'm building MUSL with -mcpu=cortex-m3. There are a few .s files
>>>> that cannot be assembled because: (1) use predicated instructions
>>>> without IT instr (2) use sp inside reg list in ldmia/stmia.
>>>>
>>>> Please help to review the attached patch.
>>> Did you test anything? These patches do not result in working code;
>>> they just make it assemble without errors. There's already a gas
>>> option to automatically add IT instructions where needed for
>>> thumb-only targets, but that's not the only thing needed to support
>>> thumb-only/cortex-m. I'd be interested in knowing more about the setup
>>> you're trying to target. Is it Linux or bare-metal? If Linux, are you
>>> going to use the ARM/FDPIC toolchain & kernel mods? I'm about to leave
>>> at the moment but I'll follow up with a more detailed review of your
>>> patch later.
>>>
>>>> Also, is there any easy way of disabling string/arm/memcpy_le.S ?
>>>> For baremetal, unaligned access may be unavailable.
>>> That file does not perform any unaligned access. It should work on any
>>> EABI-supported version of the arm instruction set.
>>>
>>> Rich
>> --
>> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-14 16:58 ` Zhao, Weiming
@ 2016-06-14 17:40 ` Zhao, Weiming
2016-06-16 18:34 ` Zhao, Weiming
2016-06-20 17:17 ` Zhao, Weiming
0 siblings, 2 replies; 21+ messages in thread
From: Zhao, Weiming @ 2016-06-14 17:40 UTC (permalink / raw)
To: musl
[-- Attachment #1: Type: text/plain, Size: 2614 bytes --]
Please review the changes.
I'm able to build libc.a with -mcpu=cortex-m3 -Wa,-implicit-it=always
using clang + gas
Thanks,
weiming
On 6/14/2016 9:58 AM, Zhao, Weiming wrote:
> Thank you, Szabolcs.
>
> It's GAS-only flag, clang's integrated-as doesn't support it. I can
> use -no-integrated-as.
>
> It also expose other errors:
>
> src/thread/arm/atomics.s:9: Error: cannot use register index with
> PC-relative addressing -- `ldr ip,[pc,ip]'
>
> Clang's integrated-as accepts it but seems encoded incorrectly.
>
> I can fix it.
>
>
> On 6/14/2016 9:32 AM, Szabolcs Nagy wrote:
>> * Zhao, Weiming <weimingz@codeaurora.org> [2016-06-14 09:12:17 -0700]:
>>> I haven't did a full test as the functions I modified are not
>>> actually being
>>> used.
>>>
>>> It is a bare-metal environment, using clang to compile.
>>>
>>> Main flags are -mcpu=cortex-m3 -Os -fdata-sections -ffunction-sections
>>> -mno-unaligned-access
>>>
>>> Could you please let me know the gas option?
>>>
>> -mimplicit-it=always
>>
>> https://sourceware.org/binutils/docs/as/ARM-Options.html
>>
>>> Thanks,
>>>
>>> Weiing
>>>
>>> On 6/14/2016 6:00 AM, Rich Felker wrote:
>>>> On Tue, Jun 14, 2016 at 01:49:40AM -0700, weimingz@codeaurora.org
>>>> wrote:
>>>>> Hi,
>>>>>
>>>>> I'm building MUSL with -mcpu=cortex-m3. There are a few .s files
>>>>> that cannot be assembled because: (1) use predicated instructions
>>>>> without IT instr (2) use sp inside reg list in ldmia/stmia.
>>>>>
>>>>> Please help to review the attached patch.
>>>> Did you test anything? These patches do not result in working code;
>>>> they just make it assemble without errors. There's already a gas
>>>> option to automatically add IT instructions where needed for
>>>> thumb-only targets, but that's not the only thing needed to support
>>>> thumb-only/cortex-m. I'd be interested in knowing more about the setup
>>>> you're trying to target. Is it Linux or bare-metal? If Linux, are you
>>>> going to use the ARM/FDPIC toolchain & kernel mods? I'm about to leave
>>>> at the moment but I'll follow up with a more detailed review of your
>>>> patch later.
>>>>
>>>>> Also, is there any easy way of disabling string/arm/memcpy_le.S ?
>>>>> For baremetal, unaligned access may be unavailable.
>>>> That file does not perform any unaligned access. It should work on any
>>>> EABI-supported version of the arm instruction set.
>>>>
>>>> Rich
>>> --
>>> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
>>> hosted by The Linux Foundation
>
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
[-- Attachment #2: cortex_m3.patch --]
[-- Type: text/plain, Size: 2136 bytes --]
diff --git a/src/setjmp/arm/longjmp.s b/src/setjmp/arm/longjmp.s
index e28d8f3..e9b9b32 100644
--- a/src/setjmp/arm/longjmp.s
+++ b/src/setjmp/arm/longjmp.s
@@ -8,7 +8,9 @@ longjmp:
mov ip,r0
movs r0,r1
moveq r0,#1
- ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
+ ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp}
+ ldr sp, [ip]!
+ ldr lr, [ip]!
adr r1,1f
ldr r2,1f
diff --git a/src/setjmp/arm/setjmp.s b/src/setjmp/arm/setjmp.s
index 8779163..fd380b0 100644
--- a/src/setjmp/arm/setjmp.s
+++ b/src/setjmp/arm/setjmp.s
@@ -9,7 +9,9 @@ __setjmp:
_setjmp:
setjmp:
mov ip,r0
- stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
+ stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp}
+ str sp, [ip]!
+ str lr, [ip]!
mov r0,#0
adr r1,1f
diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
index 4db4844..2517d15 100644
--- a/src/string/arm/memcpy_le.S
+++ b/src/string/arm/memcpy_le.S
@@ -241,7 +241,8 @@ non_congruent:
beq 2f
ldr r5, [r1], #4
sub r2, r2, #4
- orr r4, r3, r5, lsl lr
+ lsl r4, r5, lr
+ orr r4, r3, r4
mov r3, r5, lsr r12
str r4, [r0], #4
cmp r2, #4
@@ -348,7 +349,8 @@ less_than_thirtytwo:
1: ldr r5, [r1], #4
sub r2, r2, #4
- orr r4, r3, r5, lsl lr
+ lsl r4, r5, lr
+ orr r4, r3, r4
mov r3, r5, lsr r12
str r4, [r0], #4
cmp r2, #4
diff --git a/src/thread/arm/atomics.s b/src/thread/arm/atomics.s
index 673fc03..a4bd03a 100644
--- a/src/thread/arm/atomics.s
+++ b/src/thread/arm/atomics.s
@@ -6,7 +6,8 @@
.type __a_barrier,%function
__a_barrier:
ldr ip,1f
- ldr ip,[pc,ip]
+ add ip,pc,ip
+ ldr ip,[ip]
add pc,pc,ip
1: .word __a_barrier_ptr-1b
.global __a_barrier_dummy
@@ -40,7 +41,8 @@ __a_barrier_v7:
.type __a_cas,%function
__a_cas:
ldr ip,1f
- ldr ip,[pc,ip]
+ add ip,pc,ip
+ ldr ip,[ip]
add pc,pc,ip
1: .word __a_cas_ptr-1b
.global __a_cas_dummy
@@ -85,7 +87,8 @@ __aeabi_read_tp:
.type __a_gettp,%function
__a_gettp:
ldr r0,1f
- ldr r0,[pc,r0]
+ add r0,pc,r0
+ ldr r0,[r0]
add pc,pc,r0
1: .word __a_gettp_ptr-1b
.global __a_gettp_dummy
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-14 17:40 ` Zhao, Weiming
@ 2016-06-16 18:34 ` Zhao, Weiming
2016-06-20 19:58 ` Rich Felker
2016-06-20 17:17 ` Zhao, Weiming
1 sibling, 1 reply; 21+ messages in thread
From: Zhao, Weiming @ 2016-06-16 18:34 UTC (permalink / raw)
To: musl
I tried to build for armv6m (cortex-m0) and I got other build issues
with .S and inline asms.
Below are the changes for building armv7m:
diff --git a/src/setjmp/arm/longjmp.s b/src/setjmp/arm/longjmp.s
index e28d8f3..e9b9b32 100644
--- a/src/setjmp/arm/longjmp.s
+++ b/src/setjmp/arm/longjmp.s
@@ -8,7 +8,9 @@ longjmp:
mov ip,r0
movs r0,r1
moveq r0,#1
- ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
+ ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp}
+ ldr sp, [ip]!
+ ldr lr, [ip]!
adr r1,1f
ldr r2,1f
diff --git a/src/setjmp/arm/setjmp.s b/src/setjmp/arm/setjmp.s
index 8779163..fd380b0 100644
--- a/src/setjmp/arm/setjmp.s
+++ b/src/setjmp/arm/setjmp.s
@@ -9,7 +9,9 @@ __setjmp:
_setjmp:
setjmp:
mov ip,r0
- stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
+ stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp}
+ str sp, [ip]!
+ str lr, [ip]!
mov r0,#0
adr r1,1f
diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
index 4db4844..2517d15 100644
--- a/src/string/arm/memcpy_le.S
+++ b/src/string/arm/memcpy_le.S
@@ -241,7 +241,8 @@ non_congruent:
beq 2f
ldr r5, [r1], #4
sub r2, r2, #4
- orr r4, r3, r5, lsl lr
+ lsl r4, r5, lr
+ orr r4, r3, r4
mov r3, r5, lsr r12
str r4, [r0], #4
cmp r2, #4
@@ -348,7 +349,8 @@ less_than_thirtytwo:
1: ldr r5, [r1], #4
sub r2, r2, #4
- orr r4, r3, r5, lsl lr
+ lsl r4, r5, lr
+ orr r4, r3, r4
mov r3, r5, lsr r12
str r4, [r0], #4
cmp r2, #4
diff --git a/src/thread/arm/atomics.s b/src/thread/arm/atomics.s
index 673fc03..a4bd03a 100644
--- a/src/thread/arm/atomics.s
+++ b/src/thread/arm/atomics.s
@@ -6,7 +6,8 @@
.type __a_barrier,%function
__a_barrier:
ldr ip,1f
- ldr ip,[pc,ip]
+ add ip,pc,ip
+ ldr ip,[ip]
add pc,pc,ip
1: .word __a_barrier_ptr-1b
.global __a_barrier_dummy
@@ -40,7 +41,8 @@ __a_barrier_v7:
.type __a_cas,%function
__a_cas:
ldr ip,1f
- ldr ip,[pc,ip]
+ add ip,pc,ip
+ ldr ip,[ip]
add pc,pc,ip
1: .word __a_cas_ptr-1b
.global __a_cas_dummy
@@ -85,7 +87,8 @@ __aeabi_read_tp:
.type __a_gettp,%function
__a_gettp:
ldr r0,1f
- ldr r0,[pc,r0]
+ add r0,pc,r0
+ ldr r0,[r0]
add pc,pc,r0
1: .word __a_gettp_ptr-1b
.global __a_gettp_dummy
Thanks,
Weiming
On 6/14/2016 10:40 AM, Zhao, Weiming wrote:
> Please review the changes.
>
> I'm able to build libc.a with -mcpu=cortex-m3 -Wa,-implicit-it=always
> using clang + gas
>
> Thanks,
>
> weiming
>
>
> On 6/14/2016 9:58 AM, Zhao, Weiming wrote:
>> Thank you, Szabolcs.
>>
>> It's GAS-only flag, clang's integrated-as doesn't support it. I can
>> use -no-integrated-as.
>>
>> It also expose other errors:
>>
>> src/thread/arm/atomics.s:9: Error: cannot use register index with
>> PC-relative addressing -- `ldr ip,[pc,ip]'
>>
>> Clang's integrated-as accepts it but seems encoded incorrectly.
>>
>> I can fix it.
>>
>>
>> On 6/14/2016 9:32 AM, Szabolcs Nagy wrote:
>>> * Zhao, Weiming <weimingz@codeaurora.org> [2016-06-14 09:12:17 -0700]:
>>>> I haven't did a full test as the functions I modified are not
>>>> actually being
>>>> used.
>>>>
>>>> It is a bare-metal environment, using clang to compile.
>>>>
>>>> Main flags are -mcpu=cortex-m3 -Os -fdata-sections -ffunction-sections
>>>> -mno-unaligned-access
>>>>
>>>> Could you please let me know the gas option?
>>>>
>>> -mimplicit-it=always
>>>
>>> https://sourceware.org/binutils/docs/as/ARM-Options.html
>>>
>>>> Thanks,
>>>>
>>>> Weiing
>>>>
>>>> On 6/14/2016 6:00 AM, Rich Felker wrote:
>>>>> On Tue, Jun 14, 2016 at 01:49:40AM -0700, weimingz@codeaurora.org
>>>>> wrote:
>>>>>> Hi,
>>>>>>
>>>>>> I'm building MUSL with -mcpu=cortex-m3. There are a few .s files
>>>>>> that cannot be assembled because: (1) use predicated instructions
>>>>>> without IT instr (2) use sp inside reg list in ldmia/stmia.
>>>>>>
>>>>>> Please help to review the attached patch.
>>>>> Did you test anything? These patches do not result in working code;
>>>>> they just make it assemble without errors. There's already a gas
>>>>> option to automatically add IT instructions where needed for
>>>>> thumb-only targets, but that's not the only thing needed to support
>>>>> thumb-only/cortex-m. I'd be interested in knowing more about the
>>>>> setup
>>>>> you're trying to target. Is it Linux or bare-metal? If Linux, are you
>>>>> going to use the ARM/FDPIC toolchain & kernel mods? I'm about to
>>>>> leave
>>>>> at the moment but I'll follow up with a more detailed review of your
>>>>> patch later.
>>>>>
>>>>>> Also, is there any easy way of disabling string/arm/memcpy_le.S ?
>>>>>> For baremetal, unaligned access may be unavailable.
>>>>> That file does not perform any unaligned access. It should work on
>>>>> any
>>>>> EABI-supported version of the arm instruction set.
>>>>>
>>>>> Rich
>>>> --
>>>> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
>>>> hosted by The Linux Foundation
>>
>
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-14 17:40 ` Zhao, Weiming
2016-06-16 18:34 ` Zhao, Weiming
@ 2016-06-20 17:17 ` Zhao, Weiming
1 sibling, 0 replies; 21+ messages in thread
From: Zhao, Weiming @ 2016-06-20 17:17 UTC (permalink / raw)
To: musl
ping?
On 6/14/2016 10:40 AM, Zhao, Weiming wrote:
> Please review the changes.
>
> I'm able to build libc.a with -mcpu=cortex-m3 -Wa,-implicit-it=always
> using clang + gas
>
> Thanks,
>
> weiming
>
>
> On 6/14/2016 9:58 AM, Zhao, Weiming wrote:
>> Thank you, Szabolcs.
>>
>> It's GAS-only flag, clang's integrated-as doesn't support it. I can
>> use -no-integrated-as.
>>
>> It also expose other errors:
>>
>> src/thread/arm/atomics.s:9: Error: cannot use register index with
>> PC-relative addressing -- `ldr ip,[pc,ip]'
>>
>> Clang's integrated-as accepts it but seems encoded incorrectly.
>>
>> I can fix it.
>>
>>
>> On 6/14/2016 9:32 AM, Szabolcs Nagy wrote:
>>> * Zhao, Weiming <weimingz@codeaurora.org> [2016-06-14 09:12:17 -0700]:
>>>> I haven't did a full test as the functions I modified are not
>>>> actually being
>>>> used.
>>>>
>>>> It is a bare-metal environment, using clang to compile.
>>>>
>>>> Main flags are -mcpu=cortex-m3 -Os -fdata-sections -ffunction-sections
>>>> -mno-unaligned-access
>>>>
>>>> Could you please let me know the gas option?
>>>>
>>> -mimplicit-it=always
>>>
>>> https://sourceware.org/binutils/docs/as/ARM-Options.html
>>>
>>>> Thanks,
>>>>
>>>> Weiing
>>>>
>>>> On 6/14/2016 6:00 AM, Rich Felker wrote:
>>>>> On Tue, Jun 14, 2016 at 01:49:40AM -0700, weimingz@codeaurora.org
>>>>> wrote:
>>>>>> Hi,
>>>>>>
>>>>>> I'm building MUSL with -mcpu=cortex-m3. There are a few .s files
>>>>>> that cannot be assembled because: (1) use predicated instructions
>>>>>> without IT instr (2) use sp inside reg list in ldmia/stmia.
>>>>>>
>>>>>> Please help to review the attached patch.
>>>>> Did you test anything? These patches do not result in working code;
>>>>> they just make it assemble without errors. There's already a gas
>>>>> option to automatically add IT instructions where needed for
>>>>> thumb-only targets, but that's not the only thing needed to support
>>>>> thumb-only/cortex-m. I'd be interested in knowing more about the
>>>>> setup
>>>>> you're trying to target. Is it Linux or bare-metal? If Linux, are you
>>>>> going to use the ARM/FDPIC toolchain & kernel mods? I'm about to
>>>>> leave
>>>>> at the moment but I'll follow up with a more detailed review of your
>>>>> patch later.
>>>>>
>>>>>> Also, is there any easy way of disabling string/arm/memcpy_le.S ?
>>>>>> For baremetal, unaligned access may be unavailable.
>>>>> That file does not perform any unaligned access. It should work on
>>>>> any
>>>>> EABI-supported version of the arm instruction set.
>>>>>
>>>>> Rich
>>>> --
>>>> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
>>>> hosted by The Linux Foundation
>>
>
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-16 18:34 ` Zhao, Weiming
@ 2016-06-20 19:58 ` Rich Felker
2016-06-22 19:08 ` Zhao, Weiming
0 siblings, 1 reply; 21+ messages in thread
From: Rich Felker @ 2016-06-20 19:58 UTC (permalink / raw)
To: musl
On Thu, Jun 16, 2016 at 11:34:28AM -0700, Zhao, Weiming wrote:
> I tried to build for armv6m (cortex-m0) and I got other build issues
> with .S and inline asms.
>
> Below are the changes for building armv7m:
I thought I'd already replied to this but I don't see my reply so I'm
doing it [again?] now.
> diff --git a/src/setjmp/arm/longjmp.s b/src/setjmp/arm/longjmp.s
> index e28d8f3..e9b9b32 100644
> --- a/src/setjmp/arm/longjmp.s
> +++ b/src/setjmp/arm/longjmp.s
> @@ -8,7 +8,9 @@ longjmp:
> mov ip,r0
> movs r0,r1
> moveq r0,#1
> - ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
> + ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp}
> + ldr sp, [ip]!
> + ldr lr, [ip]!
I think changes like this are ok. They could be conditional on
__thumb__ if they hurt performance measurably on arm but I doubt it
matters.
> diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
> index 4db4844..2517d15 100644
> --- a/src/string/arm/memcpy_le.S
> +++ b/src/string/arm/memcpy_le.S
> @@ -241,7 +241,8 @@ non_congruent:
> beq 2f
> ldr r5, [r1], #4
> sub r2, r2, #4
> - orr r4, r3, r5, lsl lr
> + lsl r4, r5, lr
> + orr r4, r3, r4
> mov r3, r5, lsr r12
> str r4, [r0], #4
> cmp r2, #4
If this is in a hot path it may need to be conditional.
> diff --git a/src/thread/arm/atomics.s b/src/thread/arm/atomics.s
> index 673fc03..a4bd03a 100644
> --- a/src/thread/arm/atomics.s
> +++ b/src/thread/arm/atomics.s
> @@ -6,7 +6,8 @@
> .type __a_barrier,%function
> __a_barrier:
> ldr ip,1f
> - ldr ip,[pc,ip]
> + add ip,pc,ip
> + ldr ip,[ip]
> add pc,pc,ip
> 1: .word __a_barrier_ptr-1b
> .global __a_barrier_dummy
As far as I can tell, this does not work at all. The arithmetic on pc
is assuming the particular offset between the instruction using pc and
the following code as arm opcodes.
There's also the matter of the cp15 register load in this file that
doesn't exist on cortex-m. IMO the kernel (or bare-metal trap handler)
needs to trap and emulate it.
Rich
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-20 19:58 ` Rich Felker
@ 2016-06-22 19:08 ` Zhao, Weiming
2016-06-22 19:19 ` Rich Felker
0 siblings, 1 reply; 21+ messages in thread
From: Zhao, Weiming @ 2016-06-22 19:08 UTC (permalink / raw)
To: musl
[-- Attachment #1: Type: text/plain, Size: 2818 bytes --]
Thanks for reviewing.
I add tests for ARMv7m for memcpy.
For atomics.s, I think the below are equivalent:
ldr ip,1f ==> assembler will computes the offset from current inst to the label
- ldr ip,[pc,ip] ==> here the address to be loaded is current PC + ip
+ add ip,pc,ip ==> here, the PC is the same as above
+ ldr ip,[ip]
But I'm not familiar with the CP15 issue you mentioned.
So, anyway, I skip the change for atomics.s in this patch.
Thanks,
Weiming
On 6/20/2016 12:58 PM, Rich Felker wrote:
> On Thu, Jun 16, 2016 at 11:34:28AM -0700, Zhao, Weiming wrote:
>> I tried to build for armv6m (cortex-m0) and I got other build issues
>> with .S and inline asms.
>>
>> Below are the changes for building armv7m:
> I thought I'd already replied to this but I don't see my reply so I'm
> doing it [again?] now.
>
>> diff --git a/src/setjmp/arm/longjmp.s b/src/setjmp/arm/longjmp.s
>> index e28d8f3..e9b9b32 100644
>> --- a/src/setjmp/arm/longjmp.s
>> +++ b/src/setjmp/arm/longjmp.s
>> @@ -8,7 +8,9 @@ longjmp:
>> mov ip,r0
>> movs r0,r1
>> moveq r0,#1
>> - ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
>> + ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp}
>> + ldr sp, [ip]!
>> + ldr lr, [ip]!
> I think changes like this are ok. They could be conditional on
> __thumb__ if they hurt performance measurably on arm but I doubt it
> matters.
>
>> diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
>> index 4db4844..2517d15 100644
>> --- a/src/string/arm/memcpy_le.S
>> +++ b/src/string/arm/memcpy_le.S
>> @@ -241,7 +241,8 @@ non_congruent:
>> beq 2f
>> ldr r5, [r1], #4
>> sub r2, r2, #4
>> - orr r4, r3, r5, lsl lr
>> + lsl r4, r5, lr
>> + orr r4, r3, r4
>> mov r3, r5, lsr r12
>> str r4, [r0], #4
>> cmp r2, #4
> If this is in a hot path it may need to be conditional.
>
>> diff --git a/src/thread/arm/atomics.s b/src/thread/arm/atomics.s
>> index 673fc03..a4bd03a 100644
>> --- a/src/thread/arm/atomics.s
>> +++ b/src/thread/arm/atomics.s
>> @@ -6,7 +6,8 @@
>> .type __a_barrier,%function
>> __a_barrier:
>> ldr ip,1f
>> - ldr ip,[pc,ip]
>> + add ip,pc,ip
>> + ldr ip,[ip]
>> add pc,pc,ip
>> 1: .word __a_barrier_ptr-1b
>> .global __a_barrier_dummy
> As far as I can tell, this does not work at all. The arithmetic on pc
> is assuming the particular offset between the instruction using pc and
> the following code as arm opcodes.
>
> There's also the matter of the cp15 register load in this file that
> doesn't exist on cortex-m. IMO the kernel (or bare-metal trap handler)
> needs to trap and emulate it.
>
> Rich
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
[-- Attachment #2: patch.diff --]
[-- Type: text/plain, Size: 1546 bytes --]
diff --git a/src/setjmp/arm/longjmp.s b/src/setjmp/arm/longjmp.s
index e28d8f3..e9b9b32 100644
--- a/src/setjmp/arm/longjmp.s
+++ b/src/setjmp/arm/longjmp.s
@@ -8,7 +8,9 @@ longjmp:
mov ip,r0
movs r0,r1
moveq r0,#1
- ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
+ ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp}
+ ldr sp, [ip]!
+ ldr lr, [ip]!
adr r1,1f
ldr r2,1f
diff --git a/src/setjmp/arm/setjmp.s b/src/setjmp/arm/setjmp.s
index 8779163..fd380b0 100644
--- a/src/setjmp/arm/setjmp.s
+++ b/src/setjmp/arm/setjmp.s
@@ -9,7 +9,9 @@ __setjmp:
_setjmp:
setjmp:
mov ip,r0
- stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
+ stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp}
+ str sp, [ip]!
+ str lr, [ip]!
mov r0,#0
adr r1,1f
diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
index 4db4844..1137f55 100644
--- a/src/string/arm/memcpy_le.S
+++ b/src/string/arm/memcpy_le.S
@@ -241,7 +241,12 @@ non_congruent:
beq 2f
ldr r5, [r1], #4
sub r2, r2, #4
+#if (__ARM_ARCH_7A || __ARM_ARCH_7R || __ARM_ARCH > 7)
orr r4, r3, r5, lsl lr
+#else
+ lsl r4, r5, lr
+ orr r4, r3, r4
+#endif
mov r3, r5, lsr r12
str r4, [r0], #4
cmp r2, #4
@@ -348,7 +353,12 @@ less_than_thirtytwo:
1: ldr r5, [r1], #4
sub r2, r2, #4
+#if (__ARM_ARCH_7A || __ARM_ARCH_7R || __ARM_ARCH > 7)
orr r4, r3, r5, lsl lr
+#else
+ lsl r4, r5, lr
+ orr r4, r3, r4
+#endif
mov r3, r5, lsr r12
str r4, [r0], #4
cmp r2, #4
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-22 19:08 ` Zhao, Weiming
@ 2016-06-22 19:19 ` Rich Felker
2016-06-22 20:37 ` Zhao, Weiming
0 siblings, 1 reply; 21+ messages in thread
From: Rich Felker @ 2016-06-22 19:19 UTC (permalink / raw)
To: musl
On Wed, Jun 22, 2016 at 12:08:13PM -0700, Zhao, Weiming wrote:
> Thanks for reviewing.
>
> I add tests for ARMv7m for memcpy.
>
> For atomics.s, I think the below are equivalent:
>
> ldr ip,1f ==> assembler will computes the offset from current inst to the label
>
> - ldr ip,[pc,ip] ==> here the address to be loaded is current PC + ip
> + add ip,pc,ip ==> here, the PC is the same as above
> + ldr ip,[ip]
In the original code, pc reads as the address of the instruction
following the ".word" mnemonic (2 ARM insns ahead of the current
instruction). In your version, I believe pc reads as the address of
the .word (2 thumb insns ahead of the current insn) which would be
wrong, but I may be wrong; one source I saw suggested that arithmetic
on pc like this was not even defined in thumb mode on some models.
> But I'm not familiar with the CP15 issue you mentioned.
> So, anyway, I skip the change for atomics.s in this patch.
OK. But just know that you can't expect it to work unless that's
implemented.
> diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
> index 4db4844..1137f55 100644
> --- a/src/string/arm/memcpy_le.S
> +++ b/src/string/arm/memcpy_le.S
> @@ -241,7 +241,12 @@ non_congruent:
> beq 2f
> ldr r5, [r1], #4
> sub r2, r2, #4
> +#if (__ARM_ARCH_7A || __ARM_ARCH_7R || __ARM_ARCH > 7)
> orr r4, r3, r5, lsl lr
> +#else
> + lsl r4, r5, lr
> + orr r4, r3, r4
> +#endif
These are not the right conditions. The selection should be made based
on whether the code is being assembled as thumb, not on the ISA
revision level. As written your patch uses the thumb code on all
pre-v7 targets.
Rich
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-22 19:19 ` Rich Felker
@ 2016-06-22 20:37 ` Zhao, Weiming
2016-06-22 23:26 ` Rich Felker
0 siblings, 1 reply; 21+ messages in thread
From: Zhao, Weiming @ 2016-06-22 20:37 UTC (permalink / raw)
To: musl
[-- Attachment #1: Type: text/plain, Size: 2252 bytes --]
How about using the following test?
!defined(__thumb__) || (__ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH
> 7)
Because I think those instructions are ISA related, not just arm/thumb.
For example, using -mcpu=cortex-a8 -mthumb, the original code still
works. So if only testing __thumb__, the armv7-a thumb mode will fall
into the slow path too.
On 6/22/2016 12:19 PM, Rich Felker wrote:
> On Wed, Jun 22, 2016 at 12:08:13PM -0700, Zhao, Weiming wrote:
>> Thanks for reviewing.
>>
>> I add tests for ARMv7m for memcpy.
>>
>> For atomics.s, I think the below are equivalent:
>>
>> ldr ip,1f ==> assembler will computes the offset from current inst to the label
>>
>> - ldr ip,[pc,ip] ==> here the address to be loaded is current PC + ip
>> + add ip,pc,ip ==> here, the PC is the same as above
>> + ldr ip,[ip]
> In the original code, pc reads as the address of the instruction
> following the ".word" mnemonic (2 ARM insns ahead of the current
> instruction). In your version, I believe pc reads as the address of
> the .word (2 thumb insns ahead of the current insn) which would be
> wrong, but I may be wrong; one source I saw suggested that arithmetic
> on pc like this was not even defined in thumb mode on some models.
>
>> But I'm not familiar with the CP15 issue you mentioned.
>> So, anyway, I skip the change for atomics.s in this patch.
> OK. But just know that you can't expect it to work unless that's
> implemented.
>> diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
>> index 4db4844..1137f55 100644
>> --- a/src/string/arm/memcpy_le.S
>> +++ b/src/string/arm/memcpy_le.S
>> @@ -241,7 +241,12 @@ non_congruent:
>> beq 2f
>> ldr r5, [r1], #4
>> sub r2, r2, #4
>> +#if (__ARM_ARCH_7A || __ARM_ARCH_7R || __ARM_ARCH > 7)
>> orr r4, r3, r5, lsl lr
>> +#else
>> + lsl r4, r5, lr
>> + orr r4, r3, r4
>> +#endif
> These are not the right conditions. The selection should be made based
> on whether the code is being assembled as thumb, not on the ISA
> revision level. As written your patch uses the thumb code on all
> pre-v7 targets.
> Rich
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
[-- Attachment #2: patch.diff --]
[-- Type: text/plain, Size: 1600 bytes --]
diff --git a/src/setjmp/arm/longjmp.s b/src/setjmp/arm/longjmp.s
index e28d8f3..e9b9b32 100644
--- a/src/setjmp/arm/longjmp.s
+++ b/src/setjmp/arm/longjmp.s
@@ -8,7 +8,9 @@ longjmp:
mov ip,r0
movs r0,r1
moveq r0,#1
- ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
+ ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp}
+ ldr sp, [ip]!
+ ldr lr, [ip]!
adr r1,1f
ldr r2,1f
diff --git a/src/setjmp/arm/setjmp.s b/src/setjmp/arm/setjmp.s
index 8779163..fd380b0 100644
--- a/src/setjmp/arm/setjmp.s
+++ b/src/setjmp/arm/setjmp.s
@@ -9,7 +9,9 @@ __setjmp:
_setjmp:
setjmp:
mov ip,r0
- stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
+ stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp}
+ str sp, [ip]!
+ str lr, [ip]!
mov r0,#0
adr r1,1f
diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
index 4db4844..7c61979 100644
--- a/src/string/arm/memcpy_le.S
+++ b/src/string/arm/memcpy_le.S
@@ -241,7 +241,12 @@ non_congruent:
beq 2f
ldr r5, [r1], #4
sub r2, r2, #4
+#if !defined(__thumb__) || (__ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH > 7)
orr r4, r3, r5, lsl lr
+#else
+ lsl r4, r5, lr
+ orr r4, r3, r4
+#endif
mov r3, r5, lsr r12
str r4, [r0], #4
cmp r2, #4
@@ -348,7 +353,12 @@ less_than_thirtytwo:
1: ldr r5, [r1], #4
sub r2, r2, #4
+#if !defined(__thumb__) || (__ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH > 7)
orr r4, r3, r5, lsl lr
+#else
+ lsl r4, r5, lr
+ orr r4, r3, r4
+#endif
mov r3, r5, lsr r12
str r4, [r0], #4
cmp r2, #4
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-22 20:37 ` Zhao, Weiming
@ 2016-06-22 23:26 ` Rich Felker
2016-06-23 0:21 ` Zhao, Weiming
0 siblings, 1 reply; 21+ messages in thread
From: Rich Felker @ 2016-06-22 23:26 UTC (permalink / raw)
To: musl
On Wed, Jun 22, 2016 at 01:37:38PM -0700, Zhao, Weiming wrote:
> How about using the following test?
>
> !defined(__thumb__) || (__ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||
> __ARM_ARCH > 7)
>
> Because I think those instructions are ISA related, not just
> arm/thumb. For example, using -mcpu=cortex-a8 -mthumb, the original
> code still works. So if only testing __thumb__, the armv7-a thumb
> mode will fall into the slow path too.
I think perhaps __thumb2__ is what you're looking for...?
Rich
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-22 23:26 ` Rich Felker
@ 2016-06-23 0:21 ` Zhao, Weiming
2016-06-23 4:22 ` Rich Felker
2016-07-05 20:08 ` Rich Felker
0 siblings, 2 replies; 21+ messages in thread
From: Zhao, Weiming @ 2016-06-23 0:21 UTC (permalink / raw)
To: musl
[-- Attachment #1: Type: text/plain, Size: 922 bytes --]
Fixed.
Btw, can we make exit() weak function? (patch attached)
This allows user code to redefine it because in baremetal environment,
sometime we want to customize it. For example, some code never exits, so
we can define a dummy exit() and thus save code size.
On 6/22/2016 4:26 PM, Rich Felker wrote:
> On Wed, Jun 22, 2016 at 01:37:38PM -0700, Zhao, Weiming wrote:
>> How about using the following test?
>>
>> !defined(__thumb__) || (__ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||
>> __ARM_ARCH > 7)
>>
>> Because I think those instructions are ISA related, not just
>> arm/thumb. For example, using -mcpu=cortex-a8 -mthumb, the original
>> code still works. So if only testing __thumb__, the armv7-a thumb
>> mode will fall into the slow path too.
> I think perhaps __thumb2__ is what you're looking for...?
>
> Rich
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
[-- Attachment #2: patch.diff --]
[-- Type: text/plain, Size: 1602 bytes --]
diff --git a/src/setjmp/arm/longjmp.s b/src/setjmp/arm/longjmp.s
index e28d8f3..e9b9b32 100644
--- a/src/setjmp/arm/longjmp.s
+++ b/src/setjmp/arm/longjmp.s
@@ -8,7 +8,9 @@ longjmp:
mov ip,r0
movs r0,r1
moveq r0,#1
- ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
+ ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp}
+ ldr sp, [ip]!
+ ldr lr, [ip]!
adr r1,1f
ldr r2,1f
diff --git a/src/setjmp/arm/setjmp.s b/src/setjmp/arm/setjmp.s
index 8779163..fd380b0 100644
--- a/src/setjmp/arm/setjmp.s
+++ b/src/setjmp/arm/setjmp.s
@@ -9,7 +9,9 @@ __setjmp:
_setjmp:
setjmp:
mov ip,r0
- stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp,sp,lr}
+ stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp}
+ str sp, [ip]!
+ str lr, [ip]!
mov r0,#0
adr r1,1f
diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
index 4db4844..df9f5c8 100644
--- a/src/string/arm/memcpy_le.S
+++ b/src/string/arm/memcpy_le.S
@@ -241,7 +241,12 @@ non_congruent:
beq 2f
ldr r5, [r1], #4
sub r2, r2, #4
+#if !defined(__thumb2__) || (__ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH > 7)
orr r4, r3, r5, lsl lr
+#else
+ lsl r4, r5, lr
+ orr r4, r3, r4
+#endif
mov r3, r5, lsr r12
str r4, [r0], #4
cmp r2, #4
@@ -348,7 +353,12 @@ less_than_thirtytwo:
1: ldr r5, [r1], #4
sub r2, r2, #4
+#if !defined(__thumb2__) || (__ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH > 7)
orr r4, r3, r5, lsl lr
+#else
+ lsl r4, r5, lr
+ orr r4, r3, r4
+#endif
mov r3, r5, lsr r12
str r4, [r0], #4
cmp r2, #4
[-- Attachment #3: patch_exit.diff --]
[-- Type: text/plain, Size: 305 bytes --]
diff --git a/src/exit/exit.c b/src/exit/exit.c
index bf7835a..694d277 100644
--- a/src/exit/exit.c
+++ b/src/exit/exit.c
@@ -25,6 +25,7 @@ static void libc_exit_fini(void)
weak_alias(libc_exit_fini, __libc_exit_fini);
+__attribute__((__weak__))
_Noreturn void exit(int code)
{
__funcs_on_exit();
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-23 0:21 ` Zhao, Weiming
@ 2016-06-23 4:22 ` Rich Felker
2016-06-23 6:04 ` weimingz
2016-07-05 20:08 ` Rich Felker
1 sibling, 1 reply; 21+ messages in thread
From: Rich Felker @ 2016-06-23 4:22 UTC (permalink / raw)
To: musl
On Wed, Jun 22, 2016 at 05:21:54PM -0700, Zhao, Weiming wrote:
> Fixed.
>
> Btw, can we make exit() weak function? (patch attached)
>
> This allows user code to redefine it because in baremetal
> environment, sometime we want to customize it. For example, some
> code never exits, so we can define a dummy exit() and thus save code
> size.
No, this is a hack and is not even needed to do what you're asking
for. The way linking an archive works, an object in the archive is
only pulled in to the link process when it satisfies an undefined
symbol reference, and if you defined your own exit, then exit.o would
never have reason to get linked.
But there are all sorts of things that can break unexpectedly from
redefining standard functions, which is why it's UB and this is not
supported usage.
Rich
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-23 4:22 ` Rich Felker
@ 2016-06-23 6:04 ` weimingz
2016-06-23 9:57 ` Szabolcs Nagy
0 siblings, 1 reply; 21+ messages in thread
From: weimingz @ 2016-06-23 6:04 UTC (permalink / raw)
To: musl; +Cc: Rich Felker, Rich Felker
If it is not a weak symbol, you may get "multi-def" error. It depends on
the obj/lib order on command line.
If it is a weak symbol, then a non-weak symbol can override it.
Anyway, your comment reminds me that we can use -wrap=exit to override
it. So no change is needed.
btw, will you commit the change of those .s files for armv7m ?
Thanks,
Weiming
On 2016-06-22 21:22, Rich Felker wrote:
> On Wed, Jun 22, 2016 at 05:21:54PM -0700, Zhao, Weiming wrote:
>> Fixed.
>>
>> Btw, can we make exit() weak function? (patch attached)
>>
>> This allows user code to redefine it because in baremetal
>> environment, sometime we want to customize it. For example, some
>> code never exits, so we can define a dummy exit() and thus save code
>> size.
>
> No, this is a hack and is not even needed to do what you're asking
> for. The way linking an archive works, an object in the archive is
> only pulled in to the link process when it satisfies an undefined
> symbol reference, and if you defined your own exit, then exit.o would
> never have reason to get linked.
>
> But there are all sorts of things that can break unexpectedly from
> redefining standard functions, which is why it's UB and this is not
> supported usage.
>
> Rich
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-23 6:04 ` weimingz
@ 2016-06-23 9:57 ` Szabolcs Nagy
2016-06-23 14:22 ` weimingz
0 siblings, 1 reply; 21+ messages in thread
From: Szabolcs Nagy @ 2016-06-23 9:57 UTC (permalink / raw)
To: weimingz; +Cc: musl, Rich Felker, Rich Felker
* weimingz@codeaurora.org <weimingz@codeaurora.org> [2016-06-22 23:04:18 -0700]:
> If it is not a weak symbol, you may get "multi-def" error. It depends on the
> obj/lib order on command line.
the only external symbol definition with non-weak binding
in the exit.o object is exit.
so the only way exit.o can get linked is an undefined reference
to exit.
you only need to make sure that -lc is at the end of the cmdline
when you want to interpose exit with your own definition
(then libc exit.o wont get linked in).
but to repeat rich: interposing libc symbols can break things.
> If it is a weak symbol, then a non-weak symbol can override it.
>
> Anyway, your comment reminds me that we can use -wrap=exit to override it.
> So no change is needed.
>
> btw, will you commit the change of those .s files for armv7m ?
>
> Thanks,
> Weiming
>
> On 2016-06-22 21:22, Rich Felker wrote:
> >On Wed, Jun 22, 2016 at 05:21:54PM -0700, Zhao, Weiming wrote:
> >>Fixed.
> >>
> >>Btw, can we make exit() weak function? (patch attached)
> >>
> >>This allows user code to redefine it because in baremetal
> >>environment, sometime we want to customize it. For example, some
> >>code never exits, so we can define a dummy exit() and thus save code
> >>size.
> >
> >No, this is a hack and is not even needed to do what you're asking
> >for. The way linking an archive works, an object in the archive is
> >only pulled in to the link process when it satisfies an undefined
> >symbol reference, and if you defined your own exit, then exit.o would
> >never have reason to get linked.
> >
> >But there are all sorts of things that can break unexpectedly from
> >redefining standard functions, which is why it's UB and this is not
> >supported usage.
> >
> >Rich
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-23 9:57 ` Szabolcs Nagy
@ 2016-06-23 14:22 ` weimingz
0 siblings, 0 replies; 21+ messages in thread
From: weimingz @ 2016-06-23 14:22 UTC (permalink / raw)
To: weimingz; +Cc: musl, Rich Felker, Rich Felker
Correct. Thank you Szabolcs.
Weiming
On 2016-06-23 02:57, Szabolcs Nagy wrote:
> * weimingz@codeaurora.org <weimingz@codeaurora.org> [2016-06-22
> 23:04:18 -0700]:
>> If it is not a weak symbol, you may get "multi-def" error. It depends
>> on the
>> obj/lib order on command line.
>
> the only external symbol definition with non-weak binding
> in the exit.o object is exit.
>
> so the only way exit.o can get linked is an undefined reference
> to exit.
>
> you only need to make sure that -lc is at the end of the cmdline
> when you want to interpose exit with your own definition
> (then libc exit.o wont get linked in).
>
> but to repeat rich: interposing libc symbols can break things.
>
>> If it is a weak symbol, then a non-weak symbol can override it.
>>
>> Anyway, your comment reminds me that we can use -wrap=exit to override
>> it.
>> So no change is needed.
>>
>> btw, will you commit the change of those .s files for armv7m ?
>>
>> Thanks,
>> Weiming
>>
>> On 2016-06-22 21:22, Rich Felker wrote:
>> >On Wed, Jun 22, 2016 at 05:21:54PM -0700, Zhao, Weiming wrote:
>> >>Fixed.
>> >>
>> >>Btw, can we make exit() weak function? (patch attached)
>> >>
>> >>This allows user code to redefine it because in baremetal
>> >>environment, sometime we want to customize it. For example, some
>> >>code never exits, so we can define a dummy exit() and thus save code
>> >>size.
>> >
>> >No, this is a hack and is not even needed to do what you're asking
>> >for. The way linking an archive works, an object in the archive is
>> >only pulled in to the link process when it satisfies an undefined
>> >symbol reference, and if you defined your own exit, then exit.o would
>> >never have reason to get linked.
>> >
>> >But there are all sorts of things that can break unexpectedly from
>> >redefining standard functions, which is why it's UB and this is not
>> >supported usage.
>> >
>> >Rich
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: build musl for armv7m
2016-06-23 0:21 ` Zhao, Weiming
2016-06-23 4:22 ` Rich Felker
@ 2016-07-05 20:08 ` Rich Felker
1 sibling, 0 replies; 21+ messages in thread
From: Rich Felker @ 2016-07-05 20:08 UTC (permalink / raw)
To: musl
On Wed, Jun 22, 2016 at 05:21:54PM -0700, Zhao, Weiming wrote:
> diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy_le.S
> index 4db4844..df9f5c8 100644
> --- a/src/string/arm/memcpy_le.S
> +++ b/src/string/arm/memcpy_le.S
> @@ -241,7 +241,12 @@ non_congruent:
> beq 2f
> ldr r5, [r1], #4
> sub r2, r2, #4
> +#if !defined(__thumb2__) || (__ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH > 7)
> orr r4, r3, r5, lsl lr
> +#else
> + lsl r4, r5, lr
> + orr r4, r3, r4
> +#endif
I don't see how this new logic makes sense at all. The goal is to use
the less-efficient code for the case where you're building as thumb
and thumb2 is not supported (i.e. limited to thumb1). That would be:
#if defined(__thumb2__) || !defined(__thumb__)
orr r4, r3, r5, lsl lr
#else
lsl r4, r5, lr
orr r4, r3, r4
#endif
Does that look right?
I'm trying to wrap up the 1.1.15 release right now so I don't think
these changes will make it in, but I want to finish addressing _all_
of the thumb/cortex-m compat issues soon, hopefully in the next
release cycle. Let's start a new thread figuring out what's still
needed.
Rich
^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2016-07-05 20:08 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-14 8:49 build musl for armv7m weimingz
2016-06-14 13:00 ` Rich Felker
2016-06-14 16:12 ` Zhao, Weiming
2016-06-14 16:32 ` Szabolcs Nagy
2016-06-14 16:58 ` Zhao, Weiming
2016-06-14 17:40 ` Zhao, Weiming
2016-06-16 18:34 ` Zhao, Weiming
2016-06-20 19:58 ` Rich Felker
2016-06-22 19:08 ` Zhao, Weiming
2016-06-22 19:19 ` Rich Felker
2016-06-22 20:37 ` Zhao, Weiming
2016-06-22 23:26 ` Rich Felker
2016-06-23 0:21 ` Zhao, Weiming
2016-06-23 4:22 ` Rich Felker
2016-06-23 6:04 ` weimingz
2016-06-23 9:57 ` Szabolcs Nagy
2016-06-23 14:22 ` weimingz
2016-07-05 20:08 ` Rich Felker
2016-06-20 17:17 ` Zhao, Weiming
2016-06-14 14:38 ` Rich Felker
2016-06-14 16:35 ` Zhao, Weiming
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/musl/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).