From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=-4.1 required=5.0 tests=HTML_MESSAGE, MAILING_LIST_MULTI,NICE_REPLY_A,RCVD_IN_DNSWL_MED,RCVD_IN_MSPIKE_H3, RCVD_IN_MSPIKE_WL,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.4 Received: (qmail 28492 invoked from network); 21 May 2022 06:38:39 -0000 Received: from mother.openwall.net (195.42.179.200) by inbox.vuxu.org with ESMTPUTF8; 21 May 2022 06:38:39 -0000 Received: (qmail 9470 invoked by uid 550); 21 May 2022 06:38:35 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Reply-To: musl@lists.openwall.com Received: (qmail 9428 invoked from network); 21 May 2022 06:38:34 -0000 To: musl@lists.openwall.com References: <20220516142704.GR7074@brightrain.aerifal.cx> From: =?UTF-8?B?546L5rSq5Lqu?= Message-ID: <5fe467cd-4b68-2841-8aae-c485e7570267@loongson.cn> Date: Sat, 21 May 2022 14:38:20 +0800 User-Agent: Mozilla/5.0 (X11; Linux mips64; rv:68.0) Gecko/20100101 Thunderbird/68.7.0 MIME-Version: 1.0 In-Reply-To: <20220516142704.GR7074@brightrain.aerifal.cx> Content-Type: multipart/alternative; boundary="------------9A0E0331F80550981905707F" Content-Language: en-US X-CM-TRANSID:AQAAf9BxcNrciIhi2CMeAA--.37195S3 X-Coremail-Antispam: 1UD129KBjvJXoW3XFyDCry8ZF1DuFy5Cry8Zrb_yoW7Kryxpa 13Z3WkGrW8W34xuryxKa48Xry3CF4kZa43ZF9rWFy8Ar17Wr10qr40qFyq9FW5Xw4S9rW0 vFyjkw1a9FW8JaUanT9S1TB71UUUUUUqnTZGkaVYY2UrUUUUjbIjqfuFe4nvWSU5nxnvy2 9KBjDU0xBIdaVrnRJUUUv0b7Iv0xC_Kw4lb4IE77IF4wAFF20E14v26r1j6r4UM7CY07I2 0VC2zVCF04k26cxKx2IYs7xG6rWj6s0DM7CIcVAFz4kK6r1j6r18M28lY4IEw2IIxxk0rw A2F7IY1VAKz4vEj48ve4kI8wA2z4x0Y4vE2Ix0cI8IcVAFwI0_JFI_Gr1l84ACjcxK6xII jxv20xvEc7CjxVAFwI0_Jr0_Gr1l84ACjcxK6I8E87Iv67AKxVW8Jr0_Cr1UM28EF7xvwV C2z280aVCY1x0267AKxVW8Jr0_Cr1UM2AIxVAIcxkEcVAq07x20xvEncxIr21lYx0E2Ix0 cI8IcVAFwI0_Jr0_Jr4lYx0Ex4A2jsIE14v26r1j6r4UMcvjeVCFs4IE7xkEbVWUJVW8Jw ACjcxG0xvEwIxGrwCjr7xvwVCIw2I0I7xG6c02F41lc7I2V7IY0VAS07AlzVAYIcxG8wCY 02Avz4vE-syl42xK82IYc2Ij64vIr41l4I8I3I0E4IkC6x0Yz7v_Jr0_Gr1lx2IqxVAqx4 xG67AKxVWUGVWUWwC20s026x8GjcxK67AKxVWUGVWUWwC2zVAF1VAY17CE14v26r1j6r15 MIIYrxkI7VAKI48JMIIF0xvE2Ix0cI8IcVAFwI0_Jr0_JF4lIxAIcVC0I7IYx2IY6xkF7I 0E14v26r1j6r4UMIIF0xvE42xK8VAvwI8IcIk0rVWrZr1j6s0DMIIF0xvEx4A2jsIE14v2 6r1j6r4UMIIF0xvEx4A2jsIEc7CjxVAFwI0_Jr0_GrUvcSsGvfC2KfnxnUUI43ZEXa7IU5 fCztUUUUU== X-CM-SenderInfo: pzdqwxxrqjzxhdqjqz5rrqw2lrqou0/ Subject: Re: [musl] add loongarch64 port v3 This is a multi-part message in MIME format. --------------9A0E0331F80550981905707F Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 8bit 在 2022/5/16 下午10:27, Rich Felker 写道: > OK, review follows: >> diff --git a/arch/loongarch64/bits/signal.h b/arch/loongarch64/bits/signal.h >> new file mode 100644 >> index 00000000..e6613516 >> --- /dev/null >> +++ b/arch/loongarch64/bits/signal.h >> @@ -0,0 +1,79 @@ >> +#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \ >> + || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE) >> + >> +#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE) >> +#define MINSIGSTKSZ 4096 >> +#define SIGSTKSZ 16384 >> +#endif >> + >> +typedef unsigned long greg_t, gregset_t[32]; >> + >> +typedef struct sigcontext { >> + unsigned long pc; >> + gregset_t gregs; >> + unsigned int flags; >> + unsigned long extcontext[0] __attribute__((__aligned__(16))); >> +}mcontext_t; > Again, [0] is not valid C. If the extension field is going to be > declared at all it needs to be declared in a way it can be accessed > without invoking UB, e.g. as a FAM. [0] is allowed in GNU C, we can not use it in musl ? I don't understand  UB ?  e.g. as a FAM ? > I'm also not clear on how > specifying the alignment here helps since any object created in a way > that the alignment would affect cannot have the FAM present. > the __aligned__(16)  here used to save 128bit vector later. > >> diff --git a/arch/loongarch64/kstat.h b/arch/loongarch64/kstat.h >> new file mode 100644 >> index 00000000..900f119d >> --- /dev/null >> +++ b/arch/loongarch64/kstat.h >> @@ -0,0 +1,21 @@ >> +struct kstat { >> + dev_t st_dev; >> + ino_t st_ino; >> + mode_t st_mode; >> + nlink_t st_nlink; >> + uid_t st_uid; >> + gid_t st_gid; >> + dev_t st_rdev; >> + unsigned long __pad; >> + off_t st_size; >> + blksize_t st_blksize; >> + int __pad2; >> + blkcnt_t st_blocks; >> + long st_atime_sec; >> + unsigned long st_atime_nsec; >> + long st_mtime_sec; >> + unsigned long st_mtime_nsec; >> + long st_ctime_sec; >> + unsigned long st_ctime_nsec; >> + unsigned __unused[2]; >> +}; > I think this file should be removed, right? -- since there is no > legacy stat sycall with a kstat structure. yes, I removed this file. >> diff --git a/arch/loongarch64/reloc.h b/arch/loongarch64/reloc.h >> new file mode 100644 >> index 00000000..249cb40c >> --- /dev/null >> +++ b/arch/loongarch64/reloc.h >> @@ -0,0 +1,35 @@ >> +#ifndef __RELOC_H__ >> +#define __RELOC_H__ >> + >> +#define _GNU_SOURCE >> +#include > You cannot define FTMs in the middle of a header file that might be > included somewhere other than the very top of a source file. But there > does not seem to be any reason it's defined at all. I fixed this error: diff --git a/arch/loongarch64/reloc.h b/arch/loongarch64/reloc.h index 249cb40c..2f759a34 100644 --- a/arch/loongarch64/reloc.h +++ b/arch/loongarch64/reloc.h @@ -1,9 +1,3 @@ -#ifndef __RELOC_H__ -#define __RELOC_H__ - -#define _GNU_SOURCE -#include -  #ifdef __loongarch64_soft_float  #define FP_SUFFIX "-sf"  #else @@ -31,5 +25,3 @@      "    la.local $t1, "#sym" \n" \      "    or %0, $t1, $zero \n" \      : "=r"(*(fp)) : : "memory" ) - -#endif >> diff --git a/src/ldso/loongarch64/dlsym.s b/src/ldso/loongarch64/dlsym.s >> new file mode 100644 >> index 00000000..10ac5c7f >> --- /dev/null >> +++ b/src/ldso/loongarch64/dlsym.s >> @@ -0,0 +1,12 @@ >> +.global dlsym >> +.hidden __dlsym >> +.type dlsym,@function >> +dlsym: >> + move $a2, $ra >> + la.global $r16, __dlsym >> + addi.d $sp, $sp, -8 >> + st.d $ra, $sp, 0 >> + jirl $ra, $r16, 0 >> + ld.d $ra, $sp, 0 >> + addi.d $sp, $sp, 8 >> + jirl $r0, $ra, 0 > Is there a reason this can't be a tail call? Maybe the ABI requires > that the caller reserve space for the callee to spill arg registers > that are used? I optimized the code implementation: diff --git a/src/ldso/loongarch64/dlsym.s b/src/ldso/loongarch64/dlsym.s index 10ac5c7f..5f51ed85 100644 --- a/src/ldso/loongarch64/dlsym.s +++ b/src/ldso/loongarch64/dlsym.s @@ -4,9 +4,4 @@  dlsym:      move    $a2, $ra      la.global    $r16, __dlsym -    addi.d     $sp, $sp, -8 -    st.d    $ra, $sp, 0 -    jirl    $ra, $r16, 0 -    ld.d    $ra, $sp, 0 -    addi.d    $sp, $sp, 8 -    jirl    $r0, $ra, 0 +    jirl    $r0, $r16, 0 -- >> diff --git a/src/thread/loongarch64/clone.s b/src/thread/loongarch64/clone.s >> new file mode 100644 >> index 00000000..ec920c7b >> --- /dev/null >> +++ b/src/thread/loongarch64/clone.s >> @@ -0,0 +1,47 @@ >> +#__clone(func, stack, flags, arg, ptid, tls, ctid) >> +# a0, a1, a2, a3, a4, a5, a6 >> +# sys_clone3(struct clone_args *cl_args, size_t size) >> +# a0 a1 >> + >> +.global __clone >> +.hidden __clone >> +.type __clone,@function >> +__clone: >> + # Save function pointer and argument pointer on new thread stack >> + addi.d $a1, $a1, -16 >> + st.d $a0, $a1, 0 # save function pointer >> + st.d $a3, $a1, 8 # save argument pointer >> + >> + li.d $t0, ~0x004000ff # mask CSIGNAL and CLONE_DETACHED >> + and $t1, $a2, $t0 # cl_args.flags >> + li.d $t0, 0x000000ff # CSIGNAL >> + and $t2, $a2, $t0 # cl_args.exit_signal >> + >> + bstrins.d $sp, $zero, 2, 0 # align stack to 8 bytes >> + addi.d $sp, $sp, -88 # struct clone_args >> + st.d $t1, $sp, 0 # flags >> + st.d $a4, $sp, 8 # pidfd >> + st.d $a6, $sp, 16 # child_tid >> + st.d $a4, $sp, 24 # parent_tid >> + st.d $t2, $sp, 32 # exit_signal >> + st.d $a1, $sp, 40 # stack >> + st.d $zero, $sp, 48 # stack_size >> + st.d $a5, $sp, 56 # tls >> + st.d $zero, $sp, 64 # set_tid >> + st.d $zero, $sp, 72 # set_tid_size >> + st.d $zero, $sp, 80 # cgroup >> + >> + move $a0, $sp >> + li.d $a1, 88 >> + li.d $a7, 435 # __NR_clone3 >> + syscall 0 # call clone3 >> + >> + beqz $a0, 1f # whether child process >> + addi.d $sp, $sp, 88 >> + jirl $zero, $ra, 0 # parent process return >> +1: >> + ld.d $t8, $sp, 0 # function pointer >> + ld.d $a0, $sp, 8 # argument pointer >> + jirl $ra, $t8, 0 # call the user's function >> + li.d $a7, 93 >> + syscall 0 # child process exit > Based on the outcome of the kernel discussiong about __NR_clone3 vs > __NR_clone, this likely needs to be reverted to use __NR_clone again. > I'm waiting to see how that comes out. Sorry for the back and forth > while we try to get this right on the kernel side. OK,No problem. --------------9A0E0331F80550981905707F Content-Type: text/html; charset=utf-8 Content-Transfer-Encoding: 8bit


在 2022/5/16 下午10:27, Rich Felker 写道:
OK, review follows:
diff --git a/arch/loongarch64/bits/signal.h b/arch/loongarch64/bits/signal.h
new file mode 100644
index 00000000..e6613516
--- /dev/null
+++ b/arch/loongarch64/bits/signal.h
@@ -0,0 +1,79 @@
+#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
+ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+
+#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+#define MINSIGSTKSZ 4096
+#define SIGSTKSZ 16384
+#endif
+
+typedef unsigned long greg_t, gregset_t[32];
+
+typedef struct sigcontext {
+	unsigned long pc;
+	gregset_t gregs;
+	unsigned int flags;
+	unsigned long extcontext[0] __attribute__((__aligned__(16)));
+}mcontext_t;
Again, [0] is not valid C. If the extension field is going to be
declared at all it needs to be declared in a way it can be accessed
without invoking UB, e.g. as a FAM. 

[0] is allowed in GNU C, we can not use it in musl ?

I don't understand  UB ?  e.g. as a FAM ?

I'm also not clear on how
specifying the alignment here helps since any object created in a way
that the alignment would affect cannot have the FAM present.

the __aligned__(16)  here used to save 128bit vector later.

      

      
diff --git a/arch/loongarch64/kstat.h b/arch/loongarch64/kstat.h
new file mode 100644
index 00000000..900f119d
--- /dev/null
+++ b/arch/loongarch64/kstat.h
@@ -0,0 +1,21 @@
+struct kstat {
+	dev_t st_dev;
+	ino_t st_ino;
+	mode_t st_mode;
+	nlink_t st_nlink;
+	uid_t st_uid;
+	gid_t st_gid;
+	dev_t st_rdev;
+	unsigned long __pad;
+	off_t st_size;
+	blksize_t st_blksize;
+	int __pad2;
+	blkcnt_t st_blocks;
+	long st_atime_sec;
+	unsigned long st_atime_nsec;
+	long st_mtime_sec;
+	unsigned long st_mtime_nsec;
+	long st_ctime_sec;
+	unsigned long st_ctime_nsec;
+	unsigned __unused[2];
+};
I think this file should be removed, right? -- since there is no
legacy stat sycall with a kstat structure.
yes, I removed this file.

      
diff --git a/arch/loongarch64/reloc.h b/arch/loongarch64/reloc.h
new file mode 100644
index 00000000..249cb40c
--- /dev/null
+++ b/arch/loongarch64/reloc.h
@@ -0,0 +1,35 @@
+#ifndef __RELOC_H__
+#define __RELOC_H__
+
+#define _GNU_SOURCE
+#include <endian.h>
You cannot define FTMs in the middle of a header file that might be
included somewhere other than the very top of a source file. But there
does not seem to be any reason it's defined at all.

I fixed this error:

diff --git a/arch/loongarch64/reloc.h b/arch/loongarch64/reloc.h

index 249cb40c..2f759a34 100644
--- a/arch/loongarch64/reloc.h
+++ b/arch/loongarch64/reloc.h
@@ -1,9 +1,3 @@
-#ifndef __RELOC_H__
-#define __RELOC_H__
-
-#define _GNU_SOURCE
-#include <endian.h>
-
 #ifdef __loongarch64_soft_float
 #define FP_SUFFIX "-sf"
 #else
@@ -31,5 +25,3 @@
     "    la.local $t1, "#sym" \n" \
     "    or %0, $t1, $zero \n" \
     : "=r"(*(fp)) : : "memory" )
-
-#endif


      
diff --git a/src/ldso/loongarch64/dlsym.s b/src/ldso/loongarch64/dlsym.s
new file mode 100644
index 00000000..10ac5c7f
--- /dev/null
+++ b/src/ldso/loongarch64/dlsym.s
@@ -0,0 +1,12 @@
+.global dlsym
+.hidden __dlsym
+.type   dlsym,@function
+dlsym:
+	move	$a2, $ra
+	la.global	$r16, __dlsym
+	addi.d 	$sp, $sp, -8
+	st.d	$ra, $sp, 0
+	jirl	$ra, $r16, 0
+	ld.d	$ra, $sp, 0
+	addi.d	$sp, $sp, 8
+	jirl	$r0, $ra, 0
Is there a reason this can't be a tail call? Maybe the ABI requires
that the caller reserve space for the callee to spill arg registers
that are used?

I optimized the code implementation:

diff --git a/src/ldso/loongarch64/dlsym.s b/src/ldso/loongarch64/dlsym.s
index 10ac5c7f..5f51ed85 100644
--- a/src/ldso/loongarch64/dlsym.s
+++ b/src/ldso/loongarch64/dlsym.s
@@ -4,9 +4,4 @@
 dlsym:
     move    $a2, $ra
     la.global    $r16, __dlsym
-    addi.d     $sp, $sp, -8
-    st.d    $ra, $sp, 0
-    jirl    $ra, $r16, 0
-    ld.d    $ra, $sp, 0
-    addi.d    $sp, $sp, 8
-    jirl    $r0, $ra, 0
+    jirl    $r0, $r16, 0
--


      
diff --git a/src/thread/loongarch64/clone.s b/src/thread/loongarch64/clone.s
new file mode 100644
index 00000000..ec920c7b
--- /dev/null
+++ b/src/thread/loongarch64/clone.s
@@ -0,0 +1,47 @@
+#__clone(func, stack, flags, arg, ptid, tls, ctid)
+#         a0,    a1,   a2,    a3,  a4,  a5,   a6
+# sys_clone3(struct clone_args *cl_args, size_t size)
+#                                 a0             a1
+
+.global	__clone
+.hidden __clone
+.type	__clone,@function
+__clone:
+	# Save function pointer and argument pointer on new thread stack
+	addi.d	$a1, $a1, -16
+	st.d	$a0, $a1, 0	# save function pointer
+	st.d	$a3, $a1, 8	# save argument pointer
+
+	li.d	$t0, ~0x004000ff  # mask CSIGNAL and CLONE_DETACHED
+	and	$t1, $a2, $t0     # cl_args.flags
+	li.d	$t0, 0x000000ff   # CSIGNAL
+	and	$t2, $a2, $t0     # cl_args.exit_signal
+
+	bstrins.d $sp, $zero, 2, 0  # align stack to 8 bytes
+	addi.d	$sp, $sp, -88   # struct clone_args
+	st.d	$t1, $sp, 0     # flags
+	st.d	$a4, $sp, 8     # pidfd
+	st.d	$a6, $sp, 16    # child_tid
+	st.d	$a4, $sp, 24    # parent_tid
+	st.d	$t2, $sp, 32    # exit_signal
+	st.d	$a1, $sp, 40    # stack
+	st.d	$zero, $sp, 48  # stack_size
+	st.d	$a5, $sp, 56    # tls
+	st.d	$zero, $sp, 64  # set_tid
+	st.d	$zero, $sp, 72  # set_tid_size
+	st.d	$zero, $sp, 80  # cgroup
+
+	move	$a0, $sp
+	li.d	$a1, 88
+	li.d	$a7, 435	# __NR_clone3
+	syscall 0		# call clone3
+
+	beqz	$a0, 1f		# whether child process
+	addi.d	$sp, $sp, 88
+	jirl	$zero, $ra, 0	# parent process return
+1:
+	ld.d	$t8, $sp, 0     # function pointer
+	ld.d	$a0, $sp, 8     # argument pointer
+	jirl	$ra, $t8, 0     # call the user's function
+	li.d	$a7, 93
+	syscall	0		# child process exit
Based on the outcome of the kernel discussiong about __NR_clone3 vs
__NR_clone, this likely needs to be reverted to use __NR_clone again.
I'm waiting to see how that comes out. Sorry for the back and forth
while we try to get this right on the kernel side.
OK,No problem.
--------------9A0E0331F80550981905707F--