* [musl] Initial xtensa/fdpic port review
@ 2024-02-27 23:24 Rich Felker
2024-02-28 0:13 ` Rich Felker
0 siblings, 1 reply; 17+ messages in thread
From: Rich Felker @ 2024-02-27 23:24 UTC (permalink / raw)
To: musl; +Cc: Max Filippov
[-- Attachment #1: Type: text/plain, Size: 133 bytes --]
Attached patches were pulled from
https://github.com/jcmvbkbc/musl-xtensa/commits/xtensa-1.2.4-fdpic/
I'll reply to comment inline.
[-- Attachment #2: 0001-xtensa-add-port.patch --]
[-- Type: text/plain, Size: 53991 bytes --]
From 868b34272f414323f10528d5b9988886541cb1c0 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Tue, 22 Mar 2016 02:35:58 +0300
Subject: [PATCH 1/2] xtensa: add port
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
arch/xtensa/arch.mak | 1 +
arch/xtensa/atomic_arch.h | 27 ++
arch/xtensa/bits/alltypes.h.in | 27 ++
arch/xtensa/bits/float.h | 16 ++
arch/xtensa/bits/ioctl.h | 219 ++++++++++++++
arch/xtensa/bits/limits.h | 1 +
arch/xtensa/bits/mman.h | 68 +++++
arch/xtensa/bits/posix.h | 2 +
arch/xtensa/bits/reg.h | 2 +
arch/xtensa/bits/setjmp.h | 5 +
arch/xtensa/bits/signal.h | 88 ++++++
arch/xtensa/bits/stat.h | 24 ++
arch/xtensa/bits/stdint.h | 20 ++
arch/xtensa/bits/syscall.h.in | 396 ++++++++++++++++++++++++++
arch/xtensa/bits/termios.h | 167 +++++++++++
arch/xtensa/bits/user.h | 4 +
arch/xtensa/bits/xtensa-config.h | 8 +
arch/xtensa/crt_arch.h | 48 ++++
arch/xtensa/kstat.h | 18 ++
arch/xtensa/pthread_arch.h | 11 +
arch/xtensa/reloc.h | 38 +++
arch/xtensa/syscall_arch.h | 104 +++++++
configure | 1 +
crt/xtensa/crti.S | 29 ++
crt/xtensa/crtn.S | 23 ++
include/elf.h | 70 +++++
src/internal/xtensa/syscall.S | 25 ++
src/ldso/xtensa/tlsdesc.S | 36 +++
src/process/xtensa/vfork.S | 21 ++
src/setjmp/xtensa/longjmp.S | 22 ++
src/setjmp/xtensa/setjmp.S | 25 ++
src/signal/xtensa/restore.s | 10 +
src/signal/xtensa/sigsetjmp.S | 24 ++
src/thread/xtensa/__set_thread_area.S | 16 ++
src/thread/xtensa/__unmapself.S | 13 +
src/thread/xtensa/clone.S | 46 +++
src/thread/xtensa/syscall_cp.S | 38 +++
37 files changed, 1693 insertions(+)
create mode 100644 arch/xtensa/arch.mak
create mode 100644 arch/xtensa/atomic_arch.h
create mode 100644 arch/xtensa/bits/alltypes.h.in
create mode 100644 arch/xtensa/bits/float.h
create mode 100644 arch/xtensa/bits/ioctl.h
create mode 100644 arch/xtensa/bits/limits.h
create mode 100644 arch/xtensa/bits/mman.h
create mode 100644 arch/xtensa/bits/posix.h
create mode 100644 arch/xtensa/bits/reg.h
create mode 100644 arch/xtensa/bits/setjmp.h
create mode 100644 arch/xtensa/bits/signal.h
create mode 100644 arch/xtensa/bits/stat.h
create mode 100644 arch/xtensa/bits/stdint.h
create mode 100644 arch/xtensa/bits/syscall.h.in
create mode 100644 arch/xtensa/bits/termios.h
create mode 100644 arch/xtensa/bits/user.h
create mode 100644 arch/xtensa/bits/xtensa-config.h
create mode 100644 arch/xtensa/crt_arch.h
create mode 100644 arch/xtensa/kstat.h
create mode 100644 arch/xtensa/pthread_arch.h
create mode 100644 arch/xtensa/reloc.h
create mode 100644 arch/xtensa/syscall_arch.h
create mode 100644 crt/xtensa/crti.S
create mode 100644 crt/xtensa/crtn.S
create mode 100644 src/internal/xtensa/syscall.S
create mode 100644 src/ldso/xtensa/tlsdesc.S
create mode 100644 src/process/xtensa/vfork.S
create mode 100644 src/setjmp/xtensa/longjmp.S
create mode 100644 src/setjmp/xtensa/setjmp.S
create mode 100644 src/signal/xtensa/restore.s
create mode 100644 src/signal/xtensa/sigsetjmp.S
create mode 100644 src/thread/xtensa/__set_thread_area.S
create mode 100644 src/thread/xtensa/__unmapself.S
create mode 100644 src/thread/xtensa/clone.S
create mode 100644 src/thread/xtensa/syscall_cp.S
diff --git a/arch/xtensa/arch.mak b/arch/xtensa/arch.mak
new file mode 100644
index 00000000..aa4d05ce
--- /dev/null
+++ b/arch/xtensa/arch.mak
@@ -0,0 +1 @@
+COMPAT_SRC_DIRS = compat/time32
diff --git a/arch/xtensa/atomic_arch.h b/arch/xtensa/atomic_arch.h
new file mode 100644
index 00000000..34bf0704
--- /dev/null
+++ b/arch/xtensa/atomic_arch.h
@@ -0,0 +1,27 @@
+#include "bits/xtensa-config.h"
+
+#if XCHAL_HAVE_S32C1I
+#define a_cas a_cas
+static inline int a_cas(volatile int *p, int t, int s)
+{
+ __asm__ __volatile__ (
+ " wsr %2, scompare1\n"
+ " s32c1i %0, %1\n"
+ : "+a"(s), "+m"(*p)
+ : "a"(t)
+ : "memory" );
+ return s;
+}
+#endif
+
+#define a_barrier a_barrier
+static inline void a_barrier()
+{
+ __asm__ __volatile__ ("memw" : : : "memory");
+}
+
+#define a_crash a_crash
+static inline void a_crash()
+{
+ __asm__ __volatile__ ("ill" : : : "memory");
+}
diff --git a/arch/xtensa/bits/alltypes.h.in b/arch/xtensa/bits/alltypes.h.in
new file mode 100644
index 00000000..21221ce5
--- /dev/null
+++ b/arch/xtensa/bits/alltypes.h.in
@@ -0,0 +1,27 @@
+#define _REDIR_TIME64 1
+#define _Addr int
+#define _Int64 long long
+#define _Reg int
+
+#if __XTENSA_EB__
+#define __BYTE_ORDER 4321
+#elif __XTENSA_EL__
+#define __BYTE_ORDER 1234
+#else
+#error Unknown endianness
+#endif
+
+#define __LONG_MAX 0x7fffffffL
+
+#ifndef __cplusplus
+#ifdef __WCHAR_TYPE__
+TYPEDEF __WCHAR_TYPE__ wchar_t;
+#else
+TYPEDEF unsigned wchar_t;
+#endif
+#endif
+
+TYPEDEF float float_t;
+TYPEDEF double double_t;
+
+TYPEDEF struct { long __l __attribute__((aligned(__BIGGEST_ALIGNMENT__))); } max_align_t;
diff --git a/arch/xtensa/bits/float.h b/arch/xtensa/bits/float.h
new file mode 100644
index 00000000..c4a655e7
--- /dev/null
+++ b/arch/xtensa/bits/float.h
@@ -0,0 +1,16 @@
+#define FLT_EVAL_METHOD 0
+
+#define LDBL_TRUE_MIN 4.94065645841246544177e-324L
+#define LDBL_MIN 2.22507385850720138309e-308L
+#define LDBL_MAX 1.79769313486231570815e+308L
+#define LDBL_EPSILON 2.22044604925031308085e-16L
+
+#define LDBL_MANT_DIG 53
+#define LDBL_MIN_EXP (-1021)
+#define LDBL_MAX_EXP 1024
+
+#define LDBL_DIG 15
+#define LDBL_MIN_10_EXP (-307)
+#define LDBL_MAX_10_EXP 308
+
+#define DECIMAL_DIG 17
diff --git a/arch/xtensa/bits/ioctl.h b/arch/xtensa/bits/ioctl.h
new file mode 100644
index 00000000..f30e3a69
--- /dev/null
+++ b/arch/xtensa/bits/ioctl.h
@@ -0,0 +1,219 @@
+#define _IOC(a,b,c,d) ( ((a)<<30) | ((b)<<8) | (c) | ((d)<<16) )
+#define _IOC_NONE 0U
+#define _IOC_READ 2U
+#define _IOC_WRITE 1U
+
+#define _IO(a,b) _IOC(_IOC_NONE,(a),(b),0)
+#define _IOW(a,b,c) _IOC(_IOC_WRITE,(a),(b),sizeof(c))
+#define _IOR(a,b,c) _IOC(_IOC_READ,(a),(b),sizeof(c))
+#define _IOWR(a,b,c) _IOC(_IOC_READ|_IOC_WRITE,(a),(b),sizeof(c))
+
+#define FIOCLEX _IO('f', 1)
+#define FIONCLEX _IO('f', 2)
+#define FIOASYNC _IOW('f', 125, int)
+#define FIONBIO _IOW('f', 126, int)
+#define FIONREAD _IOR('f', 127, int)
+#define TIOCINQ FIONREAD
+#define FIOQSIZE _IOR('f', 128, loff_t)
+
+#define TCGETS 0x5401
+#define TCSETS 0x5402
+#define TCSETSW 0x5403
+#define TCSETSF 0x5404
+
+#define TCGETA 0x80127417 /* _IOR('t', 23, struct termio) */
+#define TCSETA 0x40127418 /* _IOW('t', 24, struct termio) */
+#define TCSETAW 0x40127419 /* _IOW('t', 25, struct termio) */
+#define TCSETAF 0x4012741C /* _IOW('t', 28, struct termio) */
+
+#define TCSBRK _IO('t', 29)
+#define TCXONC _IO('t', 30)
+#define TCFLSH _IO('t', 31)
+
+#define TIOCSWINSZ 0x40087467 /* _IOW('t', 103, struct winsize) */
+#define TIOCGWINSZ 0x80087468 /* _IOR('t', 104, struct winsize) */
+#define TIOCSTART _IO('t', 110) /* start output, like ^Q */
+#define TIOCSTOP _IO('t', 111) /* stop output, like ^S */
+#define TIOCOUTQ _IOR('t', 115, int) /* output queue size */
+
+#define TIOCSPGRP _IOW('t', 118, int)
+#define TIOCGPGRP _IOR('t', 119, int)
+
+#define TIOCEXCL _IO('T', 12)
+#define TIOCNXCL _IO('T', 13)
+#define TIOCSCTTY _IO('T', 14)
+
+#define TIOCSTI _IOW('T', 18, char)
+#define TIOCMGET _IOR('T', 21, unsigned int)
+#define TIOCMBIS _IOW('T', 22, unsigned int)
+#define TIOCMBIC _IOW('T', 23, unsigned int)
+#define TIOCMSET _IOW('T', 24, unsigned int)
+# define TIOCM_LE 0x001
+# define TIOCM_DTR 0x002
+# define TIOCM_RTS 0x004
+# define TIOCM_ST 0x008
+# define TIOCM_SR 0x010
+# define TIOCM_CTS 0x020
+# define TIOCM_CAR 0x040
+# define TIOCM_RNG 0x080
+# define TIOCM_DSR 0x100
+# define TIOCM_CD TIOCM_CAR
+# define TIOCM_RI TIOCM_RNG
+
+#define TIOCGSOFTCAR _IOR('T', 25, unsigned int)
+#define TIOCSSOFTCAR _IOW('T', 26, unsigned int)
+#define TIOCLINUX _IOW('T', 28, char)
+#define TIOCCONS _IO('T', 29)
+#define TIOCGSERIAL 0x803C541E /*_IOR('T', 30, struct serial_struct)*/
+#define TIOCSSERIAL 0x403C541F /*_IOW('T', 31, struct serial_struct)*/
+#define TIOCPKT _IOW('T', 32, int)
+# define TIOCPKT_DATA 0
+# define TIOCPKT_FLUSHREAD 1
+# define TIOCPKT_FLUSHWRITE 2
+# define TIOCPKT_STOP 4
+# define TIOCPKT_START 8
+# define TIOCPKT_NOSTOP 16
+# define TIOCPKT_DOSTOP 32
+# define TIOCPKT_IOCTL 64
+
+
+#define TIOCNOTTY _IO('T', 34)
+#define TIOCSETD _IOW('T', 35, int)
+#define TIOCGETD _IOR('T', 36, int)
+#define TCSBRKP _IOW('T', 37, int) /* Needed for POSIX tcsendbreak()*/
+#define TIOCSBRK _IO('T', 39) /* BSD compatibility */
+#define TIOCCBRK _IO('T', 40) /* BSD compatibility */
+#define TIOCGSID _IOR('T', 41, pid_t) /* Return the session ID of FD*/
+#define TCGETS2 _IOR('T', 42, struct termios2)
+#define TCSETS2 _IOW('T', 43, struct termios2)
+#define TCSETSW2 _IOW('T', 44, struct termios2)
+#define TCSETSF2 _IOW('T', 45, struct termios2)
+#define TIOCGRS485 _IOR('T', 46, struct serial_rs485)
+#define TIOCSRS485 _IOWR('T', 47, struct serial_rs485)
+#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */
+#define TIOCGDEV _IOR('T',0x32, unsigned int) /* Get primary device node of /dev/console */
+#define TIOCSIG _IOW('T',0x36, int) /* Generate signal on Pty slave */
+#define TIOCVHANGUP _IO('T', 0x37)
+#define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */
+#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */
+#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */
+
+#define TIOCSERCONFIG _IO('T', 83)
+#define TIOCSERGWILD _IOR('T', 84, int)
+#define TIOCSERSWILD _IOW('T', 85, int)
+#define TIOCGLCKTRMIOS 0x5456
+#define TIOCSLCKTRMIOS 0x5457
+#define TIOCSERGSTRUCT 0x5458 /* For debugging only */
+#define TIOCSERGETLSR _IOR('T', 89, unsigned int) /* Get line status reg. */
+ /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+#define TIOCSERGETMULTI 0x80a8545a /* Get multiport config */
+ /* _IOR('T', 90, struct serial_multiport_struct) */
+#define TIOCSERSETMULTI 0x40a8545b /* Set multiport config */
+ /* _IOW('T', 91, struct serial_multiport_struct) */
+
+#define TIOCMIWAIT _IO('T', 92) /* wait for a change on serial input line(s) */
+#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */
+
+#define TIOCM_LE 0x001
+#define TIOCM_DTR 0x002
+#define TIOCM_RTS 0x004
+#define TIOCM_ST 0x008
+#define TIOCM_SR 0x010
+#define TIOCM_CTS 0x020
+#define TIOCM_CAR 0x040
+#define TIOCM_RNG 0x080
+#define TIOCM_DSR 0x100
+#define TIOCM_CD TIOCM_CAR
+#define TIOCM_RI TIOCM_RNG
+#define TIOCM_OUT1 0x2000
+#define TIOCM_OUT2 0x4000
+#define TIOCM_LOOP 0x8000
+#define TIOCM_MODEM_BITS TIOCM_OUT2
+
+#define N_TTY 0
+#define N_SLIP 1
+#define N_MOUSE 2
+#define N_PPP 3
+#define N_STRIP 4
+#define N_AX25 5
+#define N_X25 6
+#define N_6PACK 7
+#define N_MASC 8
+#define N_R3964 9
+#define N_PROFIBUS_FDL 10
+#define N_IRDA 11
+#define N_SMSBLOCK 12
+#define N_HDLC 13
+#define N_SYNC_PPP 14
+#define N_HCI 15
+
+#define FIOGETOWN _IOR('f', 123, int)
+#define FIOSETOWN _IOW('f', 124, int)
+#define SIOCATMARK _IOR('s', 7, int)
+#define SIOCSPGRP _IOW('s', 8, pid_t)
+#define SIOCGPGRP _IOR('s', 9, pid_t)
+#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
+#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
+
+#define SIOCADDRT 0x890B
+#define SIOCDELRT 0x890C
+#define SIOCRTMSG 0x890D
+
+#define SIOCGIFNAME 0x8910
+#define SIOCSIFLINK 0x8911
+#define SIOCGIFCONF 0x8912
+#define SIOCGIFFLAGS 0x8913
+#define SIOCSIFFLAGS 0x8914
+#define SIOCGIFADDR 0x8915
+#define SIOCSIFADDR 0x8916
+#define SIOCGIFDSTADDR 0x8917
+#define SIOCSIFDSTADDR 0x8918
+#define SIOCGIFBRDADDR 0x8919
+#define SIOCSIFBRDADDR 0x891a
+#define SIOCGIFNETMASK 0x891b
+#define SIOCSIFNETMASK 0x891c
+#define SIOCGIFMETRIC 0x891d
+#define SIOCSIFMETRIC 0x891e
+#define SIOCGIFMEM 0x891f
+#define SIOCSIFMEM 0x8920
+#define SIOCGIFMTU 0x8921
+#define SIOCSIFMTU 0x8922
+#define SIOCSIFHWADDR 0x8924
+#define SIOCGIFENCAP 0x8925
+#define SIOCSIFENCAP 0x8926
+#define SIOCGIFHWADDR 0x8927
+#define SIOCGIFSLAVE 0x8929
+#define SIOCSIFSLAVE 0x8930
+#define SIOCADDMULTI 0x8931
+#define SIOCDELMULTI 0x8932
+#define SIOCGIFINDEX 0x8933
+#define SIOGIFINDEX SIOCGIFINDEX
+#define SIOCSIFPFLAGS 0x8934
+#define SIOCGIFPFLAGS 0x8935
+#define SIOCDIFADDR 0x8936
+#define SIOCSIFHWBROADCAST 0x8937
+#define SIOCGIFCOUNT 0x8938
+
+#define SIOCGIFBR 0x8940
+#define SIOCSIFBR 0x8941
+
+#define SIOCGIFTXQLEN 0x8942
+#define SIOCSIFTXQLEN 0x8943
+
+#define SIOCDARP 0x8953
+#define SIOCGARP 0x8954
+#define SIOCSARP 0x8955
+
+#define SIOCDRARP 0x8960
+#define SIOCGRARP 0x8961
+#define SIOCSRARP 0x8962
+
+#define SIOCGIFMAP 0x8970
+#define SIOCSIFMAP 0x8971
+
+#define SIOCADDDLCI 0x8980
+#define SIOCDELDLCI 0x8981
+
+#define SIOCDEVPRIVATE 0x89F0
+#define SIOCPROTOPRIVATE 0x89E0
diff --git a/arch/xtensa/bits/limits.h b/arch/xtensa/bits/limits.h
new file mode 100644
index 00000000..07743b6f
--- /dev/null
+++ b/arch/xtensa/bits/limits.h
@@ -0,0 +1 @@
+#define PAGESIZE 4096
diff --git a/arch/xtensa/bits/mman.h b/arch/xtensa/bits/mman.h
new file mode 100644
index 00000000..d2f58eb1
--- /dev/null
+++ b/arch/xtensa/bits/mman.h
@@ -0,0 +1,68 @@
+#define MAP_FAILED ((void *) -1)
+
+#define PROT_NONE 0
+#define PROT_READ 1
+#define PROT_WRITE 2
+#define PROT_EXEC 4
+#define PROT_GROWSDOWN 0x01000000
+#define PROT_GROWSUP 0x02000000
+
+#define MAP_SHARED 0x01
+#define MAP_PRIVATE 0x02
+#define MAP_FIXED 0x10
+
+#define MAP_TYPE 0x0f
+#undef MAP_FILE
+#define MAP_FILE 0x00
+#undef MAP_ANON
+#define MAP_ANON 0x800
+#define MAP_ANONYMOUS MAP_ANON
+#undef MAP_NORESERVE
+#define MAP_NORESERVE 0x0400
+#undef MAP_GROWSDOWN
+#define MAP_GROWSDOWN 0x1000
+#undef MAP_DENYWRITE
+#define MAP_DENYWRITE 0x2000
+#undef MAP_EXECUTABLE
+#define MAP_EXECUTABLE 0x4000
+#undef MAP_LOCKED
+#define MAP_LOCKED 0x8000
+#undef MAP_POPULATE
+#define MAP_POPULATE 0x10000
+#undef MAP_NONBLOCK
+#define MAP_NONBLOCK 0x20000
+#undef MAP_STACK
+#define MAP_STACK 0x40000
+#undef MAP_HUGETLB
+#define MAP_HUGETLB 0x80000
+
+#define POSIX_MADV_NORMAL 0
+#define POSIX_MADV_RANDOM 1
+#define POSIX_MADV_SEQUENTIAL 2
+#define POSIX_MADV_WILLNEED 3
+#define POSIX_MADV_DONTNEED 4
+
+#define MS_ASYNC 1
+#define MS_INVALIDATE 2
+#define MS_SYNC 4
+
+#define MCL_CURRENT 1
+#define MCL_FUTURE 2
+
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+#define MADV_NORMAL 0
+#define MADV_RANDOM 1
+#define MADV_SEQUENTIAL 2
+#define MADV_WILLNEED 3
+#define MADV_DONTNEED 4
+#define MADV_REMOVE 9
+#define MADV_DONTFORK 10
+#define MADV_DOFORK 11
+#define MADV_MERGEABLE 12
+#define MADV_UNMERGEABLE 13
+#define MADV_HUGEPAGE 14
+#define MADV_NOHUGEPAGE 15
+#define MADV_DONTDUMP 16
+#define MADV_DODUMP 17
+#define MADV_HWPOISON 100
+#endif
diff --git a/arch/xtensa/bits/posix.h b/arch/xtensa/bits/posix.h
new file mode 100644
index 00000000..30a38714
--- /dev/null
+++ b/arch/xtensa/bits/posix.h
@@ -0,0 +1,2 @@
+#define _POSIX_V6_ILP32_OFFBIG 1
+#define _POSIX_V7_ILP32_OFFBIG 1
diff --git a/arch/xtensa/bits/reg.h b/arch/xtensa/bits/reg.h
new file mode 100644
index 00000000..0192a293
--- /dev/null
+++ b/arch/xtensa/bits/reg.h
@@ -0,0 +1,2 @@
+#undef __WORDSIZE
+#define __WORDSIZE 32
diff --git a/arch/xtensa/bits/setjmp.h b/arch/xtensa/bits/setjmp.h
new file mode 100644
index 00000000..e1a6dd97
--- /dev/null
+++ b/arch/xtensa/bits/setjmp.h
@@ -0,0 +1,5 @@
+#if defined(__XTENSA_CALL0_ABI__)
+typedef unsigned long __jmp_buf[6];
+#else
+#error Unsupported Xtensa ABI
+#endif
diff --git a/arch/xtensa/bits/signal.h b/arch/xtensa/bits/signal.h
new file mode 100644
index 00000000..545ffd36
--- /dev/null
+++ b/arch/xtensa/bits/signal.h
@@ -0,0 +1,88 @@
+#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
+ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+
+#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+#define MINSIGSTKSZ 2048
+#define SIGSTKSZ 8192
+#endif
+
+typedef struct sigcontext
+{
+ unsigned long sc_pc;
+ unsigned long sc_ps;
+ unsigned long sc_lbeg;
+ unsigned long sc_lend;
+ unsigned long sc_lcount;
+ unsigned long sc_sar;
+ unsigned long sc_acclo;
+ unsigned long sc_acchi;
+ unsigned long sc_a[16];
+ void *sc_xtregs;
+} mcontext_t;
+
+struct sigaltstack {
+ void *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+};
+
+typedef struct __ucontext {
+ unsigned long uc_flags;
+ struct __ucontext *uc_link;
+ stack_t uc_stack;
+ mcontext_t uc_mcontext;
+ sigset_t uc_sigmask;
+} ucontext_t;
+
+#define SA_NOCLDSTOP 0x00000001
+#define SA_NOCLDWAIT 0x00000002 /* not supported yet */
+#define SA_SIGINFO 0x00000004
+#define SA_ONSTACK 0x08000000
+#define SA_RESTART 0x10000000
+#define SA_NODEFER 0x40000000
+#define SA_RESETHAND 0x80000000
+
+#define SA_NOMASK SA_NODEFER
+#define SA_ONESHOT SA_RESETHAND
+
+#define SA_RESTORER 0x04000000
+
+#endif
+
+#define SIGHUP 1
+#define SIGINT 2
+#define SIGQUIT 3
+#define SIGILL 4
+#define SIGTRAP 5
+#define SIGABRT 6
+#define SIGIOT 6
+#define SIGBUS 7
+#define SIGFPE 8
+#define SIGKILL 9
+#define SIGUSR1 10
+#define SIGSEGV 11
+#define SIGUSR2 12
+#define SIGPIPE 13
+#define SIGALRM 14
+#define SIGTERM 15
+#define SIGSTKFLT 16
+#define SIGCHLD 17
+#define SIGCONT 18
+#define SIGSTOP 19
+#define SIGTSTP 20
+#define SIGTTIN 21
+#define SIGTTOU 22
+#define SIGURG 23
+#define SIGXCPU 24
+#define SIGXFSZ 25
+#define SIGVTALRM 26
+#define SIGPROF 27
+#define SIGWINCH 28
+#define SIGIO 29
+#define SIGPOLL SIGIO
+/* #define SIGLOST 29 */
+#define SIGPWR 30
+#define SIGSYS 31
+#define SIGUNUSED 31
+
+#define _NSIG 64
diff --git a/arch/xtensa/bits/stat.h b/arch/xtensa/bits/stat.h
new file mode 100644
index 00000000..58881366
--- /dev/null
+++ b/arch/xtensa/bits/stat.h
@@ -0,0 +1,24 @@
+/* copied from kernel definition, but with padding replaced
+ * by the corresponding correctly-sized userspace types. */
+
+struct stat
+{
+ dev_t st_dev;
+ ino_t st_ino;
+ mode_t st_mode;
+ nlink_t st_nlink;
+ uid_t st_uid;
+ gid_t st_gid;
+ dev_t st_rdev;
+ off_t st_size;
+ blksize_t st_blksize;
+ long __st_padding1;
+ blkcnt_t st_blocks;
+ struct {
+ long tv_sec;
+ long tv_nsec;
+ } __st_atim32, __st_mtim32, __st_ctim32;
+ struct timespec st_atim;
+ struct timespec st_mtim;
+ struct timespec st_ctim;
+};
diff --git a/arch/xtensa/bits/stdint.h b/arch/xtensa/bits/stdint.h
new file mode 100644
index 00000000..d1b27121
--- /dev/null
+++ b/arch/xtensa/bits/stdint.h
@@ -0,0 +1,20 @@
+typedef int32_t int_fast16_t;
+typedef int32_t int_fast32_t;
+typedef uint32_t uint_fast16_t;
+typedef uint32_t uint_fast32_t;
+
+#define INT_FAST16_MIN INT32_MIN
+#define INT_FAST32_MIN INT32_MIN
+
+#define INT_FAST16_MAX INT32_MAX
+#define INT_FAST32_MAX INT32_MAX
+
+#define UINT_FAST16_MAX UINT32_MAX
+#define UINT_FAST32_MAX UINT32_MAX
+
+#define INTPTR_MIN INT32_MIN
+#define INTPTR_MAX INT32_MAX
+#define UINTPTR_MAX UINT32_MAX
+#define PTRDIFF_MIN INT32_MIN
+#define PTRDIFF_MAX INT32_MAX
+#define SIZE_MAX UINT32_MAX
diff --git a/arch/xtensa/bits/syscall.h.in b/arch/xtensa/bits/syscall.h.in
new file mode 100644
index 00000000..06f8ca2e
--- /dev/null
+++ b/arch/xtensa/bits/syscall.h.in
@@ -0,0 +1,396 @@
+#define __NR_spill 0
+#define __NR_xtensa 1
+#define __NR_available4 2
+#define __NR_available5 3
+#define __NR_available6 4
+#define __NR_available7 5
+#define __NR_available8 6
+#define __NR_available9 7
+#define __NR_open 8
+#define __NR_close 9
+#define __NR_dup 10
+#define __NR_dup2 11
+#define __NR_read 12
+#define __NR_write 13
+#define __NR_select 14
+#define __NR_lseek 15
+#define __NR_poll 16
+#define __NR__llseek 17
+#define __NR_epoll_wait 18
+#define __NR_epoll_ctl 19
+#define __NR_epoll_create 20
+#define __NR_creat 21
+#define __NR_truncate 22
+#define __NR_ftruncate 23
+#define __NR_readv 24
+#define __NR_writev 25
+#define __NR_fsync 26
+#define __NR_fdatasync 27
+#define __NR_truncate64 28
+#define __NR_ftruncate64 29
+#define __NR_pread64 30
+#define __NR_pwrite64 31
+#define __NR_link 32
+#define __NR_rename 33
+#define __NR_symlink 34
+#define __NR_readlink 35
+#define __NR_mknod 36
+#define __NR_pipe 37
+#define __NR_unlink 38
+#define __NR_rmdir 39
+#define __NR_mkdir 40
+#define __NR_chdir 41
+#define __NR_fchdir 42
+#define __NR_getcwd 43
+#define __NR_chmod 44
+#define __NR_chown 45
+#define __NR_stat 46
+#define __NR_stat64 47
+#define __NR_lchown 48
+#define __NR_lstat 49
+#define __NR_lstat64 50
+#define __NR_available51 51
+#define __NR_fchmod 52
+#define __NR_fchown 53
+#define __NR_fstat 54
+#define __NR_fstat64 55
+#define __NR_flock 56
+#define __NR_access 57
+#define __NR_umask 58
+#define __NR_getdents 59
+#define __NR_getdents64 60
+#define __NR_fcntl64 61
+#define __NR_fallocate 62
+#define __NR_fadvise64_64 63
+#define __NR_utime 64 /* glibc 2.3.3 ?? */
+#define __NR_utimes 65
+#define __NR_ioctl 66
+#define __NR_fcntl 67
+#define __NR_setxattr 68
+#define __NR_getxattr 69
+#define __NR_listxattr 70
+#define __NR_removexattr 71
+#define __NR_lsetxattr 72
+#define __NR_lgetxattr 73
+#define __NR_llistxattr 74
+#define __NR_lremovexattr 75
+#define __NR_fsetxattr 76
+#define __NR_fgetxattr 77
+#define __NR_flistxattr 78
+#define __NR_fremovexattr 79
+#define __NR_mmap2 80
+#define __NR_munmap 81
+#define __NR_mprotect 82
+#define __NR_brk 83
+#define __NR_mlock 84
+#define __NR_munlock 85
+#define __NR_mlockall 86
+#define __NR_munlockall 87
+#define __NR_mremap 88
+#define __NR_msync 89
+#define __NR_mincore 90
+#define __NR_madvise 91
+#define __NR_shmget 92
+#define __NR_shmat 93
+#define __NR_shmctl 94
+#define __NR_shmdt 95
+#define __NR_socket 96
+#define __NR_setsockopt 97
+#define __NR_getsockopt 98
+#define __NR_shutdown 99
+#define __NR_bind 100
+#define __NR_connect 101
+#define __NR_listen 102
+#define __NR_accept 103
+#define __NR_getsockname 104
+#define __NR_getpeername 105
+#define __NR_sendmsg 106
+#define __NR_recvmsg 107
+#define __NR_send 108
+#define __NR_recv 109
+#define __NR_sendto 110
+#define __NR_recvfrom 111
+#define __NR_socketpair 112
+#define __NR_sendfile 113
+#define __NR_sendfile64 114
+#define __NR_sendmmsg 115
+#define __NR_clone 116
+#define __NR_execve 117
+#define __NR_exit 118
+#define __NR_exit_group 119
+#define __NR_getpid 120
+#define __NR_wait4 121
+#define __NR_waitid 122
+#define __NR_kill 123
+#define __NR_tkill 124
+#define __NR_tgkill 125
+#define __NR_set_tid_address 126
+#define __NR_gettid 127
+#define __NR_setsid 128
+#define __NR_getsid 129
+#define __NR_prctl 130
+#define __NR_personality 131
+#define __NR_getpriority 132
+#define __NR_setpriority 133
+#define __NR_setitimer 134
+#define __NR_getitimer 135
+#define __NR_setuid 136
+#define __NR_getuid 137
+#define __NR_setgid 138
+#define __NR_getgid 139
+#define __NR_geteuid 140
+#define __NR_getegid 141
+#define __NR_setreuid 142
+#define __NR_setregid 143
+#define __NR_setresuid 144
+#define __NR_getresuid 145
+#define __NR_setresgid 146
+#define __NR_getresgid 147
+#define __NR_setpgid 148
+#define __NR_getpgid 149
+#define __NR_getppid 150
+#define __NR_getpgrp 151
+#define __NR_reserved152 152 /* set_thread_area */
+#define __NR_reserved153 153 /* get_thread_area */
+#define __NR_times 154
+#define __NR_acct 155
+#define __NR_sched_setaffinity 156
+#define __NR_sched_getaffinity 157
+#define __NR_capget 158
+#define __NR_capset 159
+#define __NR_ptrace 160
+#define __NR_semtimedop 161
+#define __NR_semget 162
+#define __NR_semop 163
+#define __NR_semctl 164
+#define __NR_available165 165
+#define __NR_msgget 166
+#define __NR_msgsnd 167
+#define __NR_msgrcv 168
+#define __NR_msgctl 169
+#define __NR_available170 170
+#define __NR_umount2 171
+#define __NR_mount 172
+#define __NR_swapon 173
+#define __NR_chroot 174
+#define __NR_pivot_root 175
+#define __NR_umount 176
+#define __NR_swapoff 177
+#define __NR_sync 178
+#define __NR_syncfs 179
+#define __NR_setfsuid 180
+#define __NR_setfsgid 181
+#define __NR_sysfs 182
+#define __NR_ustat 183
+#define __NR_statfs 184
+#define __NR_fstatfs 185
+#define __NR_statfs64 186
+#define __NR_fstatfs64 187
+#define __NR_setrlimit 188
+#define __NR_getrlimit 189
+#define __NR_getrusage 190
+#define __NR_futex 191
+#define __NR_gettimeofday 192
+#define __NR_settimeofday 193
+#define __NR_adjtimex 194
+#define __NR_nanosleep 195
+#define __NR_getgroups 196
+#define __NR_setgroups 197
+#define __NR_sethostname 198
+#define __NR_setdomainname 199
+#define __NR_syslog 200
+#define __NR_vhangup 201
+#define __NR_uselib 202
+#define __NR_reboot 203
+#define __NR_quotactl 204
+#define __NR_nfsservctl 205
+#define __NR__sysctl 206
+#define __NR_bdflush 207
+#define __NR_uname 208
+#define __NR_sysinfo 209
+#define __NR_init_module 210
+#define __NR_delete_module 211
+#define __NR_sched_setparam 212
+#define __NR_sched_getparam 213
+#define __NR_sched_setscheduler 214
+#define __NR_sched_getscheduler 215
+#define __NR_sched_get_priority_max 216
+#define __NR_sched_get_priority_min 217
+#define __NR_sched_rr_get_interval 218
+#define __NR_sched_yield 219
+#define __NR_available222 222
+#define __NR_restart_syscall 223
+#define __NR_sigaltstack 224
+#define __NR_rt_sigreturn 225
+#define __NR_rt_sigaction 226
+#define __NR_rt_sigprocmask 227
+#define __NR_rt_sigpending 228
+#define __NR_rt_sigtimedwait 229
+#define __NR_rt_sigqueueinfo 230
+#define __NR_rt_sigsuspend 231
+#define __NR_mq_open 232
+#define __NR_mq_unlink 233
+#define __NR_mq_timedsend 234
+#define __NR_mq_timedreceive 235
+#define __NR_mq_notify 236
+#define __NR_mq_getsetattr 237
+#define __NR_available238 238
+#define __NR_io_setup 239
+#define __NR_io_destroy 240
+#define __NR_io_submit 241
+#define __NR_io_getevents 242
+#define __NR_io_cancel 243
+#define __NR_clock_settime 244
+#define __NR_clock_gettime 245
+#define __NR_clock_getres 246
+#define __NR_clock_nanosleep 247
+#define __NR_timer_create 248
+#define __NR_timer_delete 249
+#define __NR_timer_settime 250
+#define __NR_timer_gettime 251
+#define __NR_timer_getoverrun 252
+#define __NR_reserved253 253
+#define __NR_lookup_dcookie 254
+#define __NR_available255 255
+#define __NR_add_key 256
+#define __NR_request_key 257
+#define __NR_keyctl 258
+#define __NR_available259 259
+#define __NR_readahead 260
+#define __NR_remap_file_pages 261
+#define __NR_migrate_pages 262
+#define __NR_mbind 263
+#define __NR_get_mempolicy 264
+#define __NR_set_mempolicy 265
+#define __NR_unshare 266
+#define __NR_move_pages 267
+#define __NR_splice 268
+#define __NR_tee 269
+#define __NR_vmsplice 270
+#define __NR_available271 271
+#define __NR_pselect6 272
+#define __NR_ppoll 273
+#define __NR_epoll_pwait 274
+#define __NR_epoll_create1 275
+#define __NR_inotify_init 276
+#define __NR_inotify_add_watch 277
+#define __NR_inotify_rm_watch 278
+#define __NR_inotify_init1 279
+#define __NR_getcpu 280
+#define __NR_kexec_load 281
+#define __NR_ioprio_set 282
+#define __NR_ioprio_get 283
+#define __NR_set_robust_list 284
+#define __NR_get_robust_list 285
+#define __NR_available286 286
+#define __NR_available287 287
+#define __NR_openat 288
+#define __NR_mkdirat 289
+#define __NR_mknodat 290
+#define __NR_unlinkat 291
+#define __NR_renameat 292
+#define __NR_linkat 293
+#define __NR_symlinkat 294
+#define __NR_readlinkat 295
+#define __NR_utimensat 296
+#define __NR_fchownat 297
+#define __NR_futimesat 298
+#define __NR_fstatat64 299
+#define __NR_fchmodat 300
+#define __NR_faccessat 301
+#define __NR_available302 302
+#define __NR_available303 303
+#define __NR_signalfd 304
+#define __NR_eventfd 306
+#define __NR_recvmmsg 307
+#define __NR_setns 308
+#define __NR_signalfd4 309
+#define __NR_dup3 310
+#define __NR_pipe2 311
+#define __NR_timerfd_create 312
+#define __NR_timerfd_settime 313
+#define __NR_timerfd_gettime 314
+#define __NR_available315 315
+#define __NR_eventfd2 316
+#define __NR_preadv 317
+#define __NR_pwritev 318
+#define __NR_available319 319
+#define __NR_fanotify_init 320
+#define __NR_fanotify_mark 321
+#define __NR_process_vm_readv 322
+#define __NR_process_vm_writev 323
+#define __NR_name_to_handle_at 324
+#define __NR_open_by_handle_at 325
+#define __NR_sync_file_range2 326
+#define __NR_perf_event_open 327
+#define __NR_rt_tgsigqueueinfo 328
+#define __NR_clock_adjtime 329
+#define __NR_prlimit64 330
+#define __NR_kcmp 331
+#define __NR_finit_module 332
+#define __NR_accept4 333
+#define __NR_sched_setattr 334
+#define __NR_sched_getattr 335
+#define __NR_renameat2 336
+#define __NR_seccomp 337
+#define __NR_getrandom 338
+#define __NR_memfd_create 339
+#define __NR_bpf 340
+#define __NR_execveat 341
+#define __NR_userfaultfd 342
+#define __NR_membarrier 343
+#define __NR_mlock2 344
+#define __NR_copy_file_range 345
+#define __NR_preadv2 346
+#define __NR_pwritev2 347
+#define __NR_pkey_mprotect 348
+#define __NR_pkey_alloc 349
+#define __NR_pkey_free 350
+#define __NR_statx 351
+#define __NR_rseq 352
+#define __NR_clock_gettime64 403
+#define __NR_clock_settime64 404
+#define __NR_clock_adjtime64 405
+#define __NR_clock_getres_time64 406
+#define __NR_clock_nanosleep_time64 407
+#define __NR_timer_gettime64 408
+#define __NR_timer_settime64 409
+#define __NR_timerfd_gettime64 410
+#define __NR_timerfd_settime64 411
+#define __NR_utimensat_time64 412
+#define __NR_pselect6_time64 413
+#define __NR_ppoll_time64 414
+#define __NR_io_pgetevents_time64 416
+#define __NR_recvmmsg_time64 417
+#define __NR_mq_timedsend_time64 418
+#define __NR_mq_timedreceive_time64 419
+#define __NR_semtimedop_time64 420
+#define __NR_rt_sigtimedwait_time64 421
+#define __NR_futex_time64 422
+#define __NR_sched_rr_get_interval_time64 423
+#define __NR_pidfd_send_signal 424
+#define __NR_io_uring_setup 425
+#define __NR_io_uring_enter 426
+#define __NR_io_uring_register 427
+#define __NR_open_tree 428
+#define __NR_move_mount 429
+#define __NR_fsopen 430
+#define __NR_fsconfig 431
+#define __NR_fsmount 432
+#define __NR_fspick 433
+#define __NR_pidfd_open 434
+#define __NR_clone3 435
+#define __NR_close_range 436
+#define __NR_openat2 437
+#define __NR_pidfd_getfd 438
+#define __NR_faccessat2 439
+#define __NR_process_madvise 440
+#define __NR_epoll_pwait2 441
+#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
+#define __NR_landlock_create_ruleset 444
+#define __NR_landlock_add_rule 445
+#define __NR_landlock_restrict_self 446
+#define __NR_process_mrelease 448
+#define __NR_futex_waitv 449
+#define __NR_set_mempolicy_home_node 450
diff --git a/arch/xtensa/bits/termios.h b/arch/xtensa/bits/termios.h
new file mode 100644
index 00000000..ff1cbb25
--- /dev/null
+++ b/arch/xtensa/bits/termios.h
@@ -0,0 +1,167 @@
+struct termios
+{
+ tcflag_t c_iflag;
+ tcflag_t c_oflag;
+ tcflag_t c_cflag;
+ tcflag_t c_lflag;
+ cc_t c_line;
+ cc_t c_cc[NCCS];
+ speed_t __c_ispeed;
+ speed_t __c_ospeed;
+};
+
+#define VINTR 0
+#define VQUIT 1
+#define VERASE 2
+#define VKILL 3
+#define VEOF 4
+#define VTIME 5
+#define VMIN 6
+#define VSWTC 7
+#define VSTART 8
+#define VSTOP 9
+#define VSUSP 10
+#define VEOL 11
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE 14
+#define VLNEXT 15
+#define VEOL2 16
+
+#define IGNBRK 0000001
+#define BRKINT 0000002
+#define IGNPAR 0000004
+#define PARMRK 0000010
+#define INPCK 0000020
+#define ISTRIP 0000040
+#define INLCR 0000100
+#define IGNCR 0000200
+#define ICRNL 0000400
+#define IUCLC 0001000
+#define IXON 0002000
+#define IXANY 0004000
+#define IXOFF 0010000
+#define IMAXBEL 0020000
+#define IUTF8 0040000
+
+#define OPOST 0000001
+#define OLCUC 0000002
+#define ONLCR 0000004
+#define OCRNL 0000010
+#define ONOCR 0000020
+#define ONLRET 0000040
+#define OFILL 0000100
+#define OFDEL 0000200
+#define NLDLY 0000400
+#define NL0 0000000
+#define NL1 0000400
+#define CRDLY 0003000
+#define CR0 0000000
+#define CR1 0001000
+#define CR2 0002000
+#define CR3 0003000
+#define TABDLY 0014000
+#define TAB0 0000000
+#define TAB1 0004000
+#define TAB2 0010000
+#define TAB3 0014000
+#define XTABS 0014000
+#define BSDLY 0020000
+#define BS0 0000000
+#define BS1 0020000
+#define VTDLY 0040000
+#define VT0 0000000
+#define VT1 0040000
+#define FFDLY 0100000
+#define FF0 0000000
+#define FF1 0100000
+
+#define B0 0000000
+#define B50 0000001
+#define B75 0000002
+#define B110 0000003
+#define B134 0000004
+#define B150 0000005
+#define B200 0000006
+#define B300 0000007
+#define B600 0000010
+#define B1200 0000011
+#define B1800 0000012
+#define B2400 0000013
+#define B4800 0000014
+#define B9600 0000015
+#define B19200 0000016
+#define B38400 0000017
+#define EXTA 0000016
+#define EXTB 0000017
+
+#define BOTHER 0010000
+#define B57600 0010001
+#define B115200 0010002
+#define B230400 0010003
+#define B460800 0010004
+#define B500000 0010005
+#define B576000 0010006
+#define B921600 0010007
+#define B1000000 0010010
+#define B1152000 0010011
+#define B1500000 0010012
+#define B2000000 0010013
+#define B2500000 0010014
+#define B3000000 0010015
+#define B3500000 0010016
+#define B4000000 0010017
+
+#define CBAUD 0010017
+
+#define CSIZE 0000060
+#define CS5 0000000
+#define CS6 0000020
+#define CS7 0000040
+#define CS8 0000060
+#define CSTOPB 0000100
+#define CREAD 0000200
+#define PARENB 0000400
+#define PARODD 0001000
+#define HUPCL 0002000
+#define CLOCAL 0004000
+
+#define ISIG 0000001
+#define ICANON 0000002
+#define XCASE 0000004
+#define ECHO 0000010
+#define ECHOE 0000020
+#define ECHOK 0000040
+#define ECHONL 0000100
+#define NOFLSH 0000200
+#define TOSTOP 0000400
+#define ECHOCTL 0001000
+#define ECHOPRT 0002000
+#define ECHOKE 0004000
+#define FLUSHO 0010000
+#define PENDIN 0040000
+#define IEXTEN 0100000
+#define EXTPROC 0200000
+
+#define TCOOFF 0
+#define TCOON 1
+#define TCIOFF 2
+#define TCION 3
+
+#define TCIFLUSH 0
+#define TCOFLUSH 1
+#define TCIOFLUSH 2
+
+#define TCSANOW 0
+#define TCSADRAIN 1
+#define TCSAFLUSH 2
+
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+#define CBAUDEX 0010000
+#define CIBAUD 002003600000
+#define IBSHIFT 16
+#define CMSPAR 010000000000
+#define CRTSCTS 020000000000
+#define EXTPROC 0200000
+#define XTABS 0014000
+#endif
diff --git a/arch/xtensa/bits/user.h b/arch/xtensa/bits/user.h
new file mode 100644
index 00000000..8ac7526f
--- /dev/null
+++ b/arch/xtensa/bits/user.h
@@ -0,0 +1,4 @@
+#define ELF_NGREG 128
+#define ELF_NFPREG 18
+typedef unsigned long elf_greg_t, elf_gregset_t[ELF_NGREG];
+typedef unsigned int elf_fpreg_t, elf_fpregset_t[ELF_NFPREG];
diff --git a/arch/xtensa/bits/xtensa-config.h b/arch/xtensa/bits/xtensa-config.h
new file mode 100644
index 00000000..a2e95904
--- /dev/null
+++ b/arch/xtensa/bits/xtensa-config.h
@@ -0,0 +1,8 @@
+#undef XCHAL_NUM_AREGS
+#define XCHAL_NUM_AREGS __XCHAL_NUM_AREGS
+
+#undef XCHAL_HAVE_S32C1I
+#define XCHAL_HAVE_S32C1I __XCHAL_HAVE_S32C1I
+
+#undef XCHAL_HAVE_EXCLUSIVE
+#define XCHAL_HAVE_EXCLUSIVE __XCHAL_HAVE_EXCLUSIVE
diff --git a/arch/xtensa/crt_arch.h b/arch/xtensa/crt_arch.h
new file mode 100644
index 00000000..1019c186
--- /dev/null
+++ b/arch/xtensa/crt_arch.h
@@ -0,0 +1,48 @@
+#ifdef __FDPIC__
+
+__asm__(
+".text \n"
+".global " START " \n"
+START ": \n"
+" .begin no-transform\n"
+" call0 1f\n"
+"2:\n"
+" .end no-transform\n"
+" .align 4\n"
+" .literal_position\n"
+"1:\n"
+" movi a15, 2b\n"
+" sub a15, a0, a15\n"
+
+" mov a12, a4\n"
+" mov a13, a5\n"
+" mov a14, a6\n"
+#ifndef SHARED
+" mov a2, a4\n"
+" movi a3, __ROFIXUP_LIST__\n"
+" add a3, a3, a15\n"
+" movi a4, __ROFIXUP_END__\n"
+" add a4, a4, a15\n"
+" movi a0, __fdpic_fixup\n"
+" add a0, a0, a15\n"
+" callx0 a0\n"
+" mov a11, a2\n"
+" s32i a15, a11, 12\n"
+#endif
+" addi a7, a1, -8\n"
+" s32i a12, a7, 0\n"
+" s32i a13, a7, 4\n"
+" mov a2, a1\n"
+" mov a3, a14\n"
+" movi a4, -16\n"
+" and a1, a7, a4\n"
+" movi a0, "START"_c\n"
+" add a0, a0, a15\n"
+" callx0 a0\n"
+);
+
+#ifndef SHARED
+#include "fdpic_crt.h"
+#endif
+
+#endif
diff --git a/arch/xtensa/kstat.h b/arch/xtensa/kstat.h
new file mode 100644
index 00000000..9aeaf7e3
--- /dev/null
+++ b/arch/xtensa/kstat.h
@@ -0,0 +1,18 @@
+struct kstat {
+ dev_t st_dev;
+ ino_t st_ino;
+ mode_t st_mode;
+ nlink_t st_nlink;
+ uid_t st_uid;
+ gid_t st_gid;
+ dev_t st_rdev;
+ off_t st_size;
+ blksize_t st_blksize;
+ blkcnt_t st_blocks;
+ long st_atime_sec;
+ long st_atime_nsec;
+ long st_mtime_sec;
+ long st_mtime_nsec;
+ long st_ctime_sec;
+ long st_ctime_nsec;
+};
diff --git a/arch/xtensa/pthread_arch.h b/arch/xtensa/pthread_arch.h
new file mode 100644
index 00000000..79f53ddd
--- /dev/null
+++ b/arch/xtensa/pthread_arch.h
@@ -0,0 +1,11 @@
+static inline uintptr_t __get_tp()
+{
+ uintptr_t tp;
+ __asm__ __volatile__ ("rur %0, threadptr" : "=a" (tp));
+ return tp;
+}
+
+#define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 8
+
+#define MC_PC sc_pc
diff --git a/arch/xtensa/reloc.h b/arch/xtensa/reloc.h
new file mode 100644
index 00000000..03482f18
--- /dev/null
+++ b/arch/xtensa/reloc.h
@@ -0,0 +1,38 @@
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define ENDIAN_SUFFIX "eb"
+#else
+#define ENDIAN_SUFFIX ""
+#endif
+
+#if __FDPIC__
+#define ABI_SUFFIX "-fdpic"
+#else
+#define ABI_SUFFIX ""
+#endif
+
+#define LDSO_ARCH "xtensa" ENDIAN_SUFFIX ABI_SUFFIX
+
+#define TPOFF_K 8
+
+#define REL_PLT R_XTENSA_JMP_SLOT
+#define REL_RELATIVE R_XTENSA_RELATIVE
+#define REL_GOT R_XTENSA_GLOB_DAT
+#define REL_COPY R_XTENSA_32
+#define REL_TLSDESC R_XTENSA_TLSDESC_FN
+
+#if __FDPIC__
+#define REL_FUNCDESC R_XTENSA_FUNCDESC
+#define REL_FUNCDESC_VAL R_XTENSA_FUNCDESC_VALUE
+
+#define DL_FDPIC 1
+#define DL_NOMMU_SUPPORT 1
+
+#define CRTJMP(pc,sp) do { \
+ register size_t a4 __asm__("a4") = ((size_t *)(sp))[-2]; \
+ __asm__ __volatile__( "mov a1, %1 ; jx %0" \
+ : : "r"(pc), "r"(sp), "r"(a4) : "memory" ); } while(0)
+
+#define GETFUNCSYM(fp, sym, got) __asm__ ( \
+ "movi %0, " #sym "@GOTOFFFUNCDESC ; add %0, %0, %1" \
+ : "=&a"(*fp) : "a"(got) : "memory" )
+#endif
diff --git a/arch/xtensa/syscall_arch.h b/arch/xtensa/syscall_arch.h
new file mode 100644
index 00000000..63ee4e96
--- /dev/null
+++ b/arch/xtensa/syscall_arch.h
@@ -0,0 +1,104 @@
+#define __SYSCALL_LL_E(x) \
+((union { long long ll; long l[2]; }){ .ll = x }).l[0], \
+((union { long long ll; long l[2]; }){ .ll = x }).l[1]
+#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
+
+static inline long __syscall0(long n)
+{
+ register long a2 __asm__("a2") = n;
+
+ __asm__ __volatile__ ("syscall"
+ : "+&a"(a2)
+ :
+ : "memory");
+ return a2;
+}
+
+static inline long __syscall1(long n, long a)
+{
+ register long a2 __asm__("a2") = n;
+ register long a6 __asm__("a6") = a;
+
+ __asm__ __volatile__ ("syscall"
+ : "+&a"(a2)
+ : "a"(a6)
+ : "memory");
+ return a2;
+}
+
+static inline long __syscall2(long n, long a, long b)
+{
+ register long a2 __asm__("a2") = n;
+ register long a6 __asm__("a6") = a;
+ register long a3 __asm__("a3") = b;
+
+ __asm__ __volatile__ ("syscall"
+ : "+&a"(a2)
+ : "a"(a6), "a"(a3)
+ : "memory");
+ return a2;
+}
+
+static inline long __syscall3(long n, long a, long b, long c)
+{
+ register long a2 __asm__("a2") = n;
+ register long a6 __asm__("a6") = a;
+ register long a3 __asm__("a3") = b;
+ register long a4 __asm__("a4") = c;
+
+ __asm__ __volatile__ ("syscall"
+ : "+&a"(a2)
+ : "a"(a6), "a"(a3), "a"(a4)
+ : "memory");
+ return a2;
+}
+
+static inline long __syscall4(long n, long a, long b, long c, long d)
+{
+ register long a2 __asm__("a2") = n;
+ register long a6 __asm__("a6") = a;
+ register long a3 __asm__("a3") = b;
+ register long a4 __asm__("a4") = c;
+ register long a5 __asm__("a5") = d;
+
+ __asm__ __volatile__ ("syscall"
+ : "+&a"(a2)
+ : "a"(a6), "a"(a3), "a"(a4), "a"(a5)
+ : "memory");
+ return a2;
+}
+
+static inline long __syscall5(long n, long a, long b, long c, long d, long e)
+{
+ register long a2 __asm__("a2") = n;
+ register long a6 __asm__("a6") = a;
+ register long a3 __asm__("a3") = b;
+ register long a4 __asm__("a4") = c;
+ register long a5 __asm__("a5") = d;
+ register long a8 __asm__("a8") = e;
+
+ __asm__ __volatile__ ("syscall"
+ : "+&a"(a2)
+ : "a"(a6), "a"(a3), "a"(a4), "a"(a5), "a"(a8)
+ : "memory");
+ return a2;
+}
+
+static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
+{
+ register long a2 __asm__("a2") = n;
+ register long a6 __asm__("a6") = a;
+ register long a3 __asm__("a3") = b;
+ register long a4 __asm__("a4") = c;
+ register long a5 __asm__("a5") = d;
+ register long a8 __asm__("a8") = e;
+ register long a9 __asm__("a9") = f;
+
+ __asm__ __volatile__ ("syscall"
+ : "+&a"(a2)
+ : "a"(a6), "a"(a3), "a"(a4), "a"(a5), "a"(a8), "a"(a9)
+ : "memory");
+ return a2;
+}
+
+#define SYSCALL_FADVISE_6_ARG
diff --git a/configure b/configure
index 853bf05e..8cfc3111 100755
--- a/configure
+++ b/configure
@@ -338,6 +338,7 @@ powerpc*|ppc*) ARCH=powerpc ;;
riscv64*) ARCH=riscv64 ;;
sh[1-9bel-]*|sh|superh*) ARCH=sh ;;
s390x*) ARCH=s390x ;;
+xtensa*) ARCH=xtensa ;;
unknown) fail "$0: unable to detect target arch; try $0 --target=..." ;;
*) fail "$0: unknown or unsupported target \"$target\"" ;;
esac
diff --git a/crt/xtensa/crti.S b/crt/xtensa/crti.S
new file mode 100644
index 00000000..82a837e7
--- /dev/null
+++ b/crt/xtensa/crti.S
@@ -0,0 +1,29 @@
+.section .init
+.global _init
+.type _init, @function
+_init:
+#if defined(__XTENSA_CALL0_ABI__)
+ addi sp, sp, -16
+ s32i a0, sp, 0
+#ifdef __FDPIC__
+ s32i a12, sp, 4
+ mov a12, a11
+#endif
+#else
+#error Unsupported Xtensa ABI
+#endif
+
+.section .fini
+.global _fini
+.type _fini, @function
+_fini:
+#if defined(__XTENSA_CALL0_ABI__)
+ addi sp, sp, -16
+ s32i a0, sp, 0
+#ifdef __FDPIC__
+ s32i a12, sp, 4
+ mov a12, a11
+#endif
+#else
+#error Unsupported Xtensa ABI
+#endif
diff --git a/crt/xtensa/crtn.S b/crt/xtensa/crtn.S
new file mode 100644
index 00000000..5f93b193
--- /dev/null
+++ b/crt/xtensa/crtn.S
@@ -0,0 +1,23 @@
+.section .init
+#if defined(__XTENSA_CALL0_ABI__)
+#ifdef __FDPIC__
+ l32i a12, sp, 4
+#endif
+ l32i a0, sp, 0
+ addi sp, sp, 16
+ ret
+#else
+#error Unsupported Xtensa ABI
+#endif
+
+.section .fini
+#if defined(__XTENSA_CALL0_ABI__)
+#ifdef __FDPIC__
+ l32i a12, sp, 4
+#endif
+ l32i a0, sp, 0
+ addi sp, sp, 16
+ ret
+#else
+#error Unsupported Xtensa ABI
+#endif
diff --git a/include/elf.h b/include/elf.h
index 23f2c4bc..f472c808 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -3293,6 +3293,76 @@ enum
#define R_RISCV_SET32 56
#define R_RISCV_32_PCREL 57
+#define R_XTENSA_NONE 0
+#define R_XTENSA_32 1
+#define R_XTENSA_RTLD 2
+#define R_XTENSA_GLOB_DAT 3
+#define R_XTENSA_JMP_SLOT 4
+#define R_XTENSA_RELATIVE 5
+#define R_XTENSA_PLT 6
+#define R_XTENSA_OP0 8
+#define R_XTENSA_OP1 9
+#define R_XTENSA_OP2 10
+#define R_XTENSA_ASM_EXPAND 11
+#define R_XTENSA_ASM_SIMPLIFY 12
+#define R_XTENSA_32_PCREL 14
+#define R_XTENSA_GNU_VTINHERIT 15
+#define R_XTENSA_GNU_VTENTRY 16
+#define R_XTENSA_DIFF8 17
+#define R_XTENSA_DIFF16 18
+#define R_XTENSA_DIFF32 19
+#define R_XTENSA_SLOT0_OP 20
+#define R_XTENSA_SLOT1_OP 21
+#define R_XTENSA_SLOT2_OP 22
+#define R_XTENSA_SLOT3_OP 23
+#define R_XTENSA_SLOT4_OP 24
+#define R_XTENSA_SLOT5_OP 25
+#define R_XTENSA_SLOT6_OP 26
+#define R_XTENSA_SLOT7_OP 27
+#define R_XTENSA_SLOT8_OP 28
+#define R_XTENSA_SLOT9_OP 29
+#define R_XTENSA_SLOT10_OP 30
+#define R_XTENSA_SLOT11_OP 31
+#define R_XTENSA_SLOT12_OP 32
+#define R_XTENSA_SLOT13_OP 33
+#define R_XTENSA_SLOT14_OP 34
+#define R_XTENSA_SLOT0_ALT 35
+#define R_XTENSA_SLOT1_ALT 36
+#define R_XTENSA_SLOT2_ALT 37
+#define R_XTENSA_SLOT3_ALT 38
+#define R_XTENSA_SLOT4_ALT 39
+#define R_XTENSA_SLOT5_ALT 40
+#define R_XTENSA_SLOT6_ALT 41
+#define R_XTENSA_SLOT7_ALT 42
+#define R_XTENSA_SLOT8_ALT 43
+#define R_XTENSA_SLOT9_ALT 44
+#define R_XTENSA_SLOT10_ALT 45
+#define R_XTENSA_SLOT11_ALT 46
+#define R_XTENSA_SLOT12_ALT 47
+#define R_XTENSA_SLOT13_ALT 48
+#define R_XTENSA_SLOT14_ALT 49
+#define R_XTENSA_TLSDESC_FN 50
+#define R_XTENSA_TLSDESC_ARG 51
+#define R_XTENSA_TLS_DTPOFF 52
+#define R_XTENSA_TLS_TPOFF 53
+#define R_XTENSA_TLS_FUNC 54
+#define R_XTENSA_TLS_ARG 55
+#define R_XTENSA_TLS_CALL 56
+#define R_XTENSA_PDIFF8 57
+#define R_XTENSA_PDIFF16 58
+#define R_XTENSA_PDIFF32 59
+#define R_XTENSA_NDIFF8 60
+#define R_XTENSA_NDIFF16 61
+#define R_XTENSA_NDIFF32 62
+#define R_XTENSA_GOT 63
+#define R_XTENSA_GOTOFF 64
+#define R_XTENSA_GOTFUNCDESC 65
+#define R_XTENSA_GOTOFFFUNCDESC 66
+#define R_XTENSA_FUNCDESC 67
+#define R_XTENSA_FUNCDESC_VALUE 68
+
+#define R_XTENSA_NUM 69
+
#ifdef __cplusplus
}
#endif
diff --git a/src/internal/xtensa/syscall.S b/src/internal/xtensa/syscall.S
new file mode 100644
index 00000000..f09a46e8
--- /dev/null
+++ b/src/internal/xtensa/syscall.S
@@ -0,0 +1,25 @@
+.global __syscall
+.hidden __syscall
+.type __syscall,@function
+.align 4
+__syscall:
+#ifdef __XTENSA_WINDOWED_ABI__
+ entry a1, 16
+#endif
+ mov a8, a3
+ mov a3, a4
+ mov a4, a5
+ mov a5, a6
+ mov a6, a8
+ mov a8, a7
+#if defined(__XTENSA_WINDOWED_ABI__)
+ l32i a9, a1, 16
+ syscall
+ retw
+#elif defined(__XTENSA_CALL0_ABI__)
+ l32i a9, a1, 0
+ syscall
+ ret
+#else
+#error Unsupported Xtensa ABI
+#endif
diff --git a/src/ldso/xtensa/tlsdesc.S b/src/ldso/xtensa/tlsdesc.S
new file mode 100644
index 00000000..084d5f3e
--- /dev/null
+++ b/src/ldso/xtensa/tlsdesc.S
@@ -0,0 +1,36 @@
+.global __tlsdesc_static
+.hidden __tlsdesc_static
+.type __tlsdesc_static,@function
+.align 4
+__tlsdesc_static:
+#ifdef __XTENSA_WINDOWED_ABI__
+ entry a1, 16
+#endif
+ rur a3, threadptr
+ add a2, a2, a3
+#if defined(__XTENSA_WINDOWED_ABI__)
+ retw
+#elif defined(__XTENSA_CALL0_ABI__)
+ ret
+#else
+#error Unsupported Xtensa ABI
+#endif
+
+.hidden __tls_get_new
+
+.global __tlsdesc_dynamic
+.hidden __tlsdesc_dynamic
+.type __tlsdesc_dynamic,@function
+.align 4
+__tlsdesc_dynamic:
+#if defined(__XTENSA_WINDOWED_ABI__)
+ entry a1, 16
+ mov a6, a2
+ call4 __tls_get_addr
+ mov a2, a6
+ retw
+#elif defined(__XTENSA_CALL0_ABI__)
+ j __tls_get_addr
+#else
+#error Unsupported Xtensa ABI
+#endif
diff --git a/src/process/xtensa/vfork.S b/src/process/xtensa/vfork.S
new file mode 100644
index 00000000..25478b87
--- /dev/null
+++ b/src/process/xtensa/vfork.S
@@ -0,0 +1,21 @@
+.global vfork
+.type vfork,@function
+vfork:
+#if defined(__XTENSA_CALL0_ABI__)
+ movi a2, 116 # __NR_clone
+ movi a3, 0
+ movi a6, 0x4111 # CLONE_VM | CLONE_VFORK | SIGCHLD
+ syscall
+
+#ifdef __FDPIC__
+.hidden __syscall_ret
+ movi a9, __syscall_ret@GOT
+ add a9, a9, a11
+ l32i a9, a9, 0
+ jx a9
+#else
+#error Unsupported Xtensa ABI
+#endif
+#else
+#error Unsupported Xtensa ABI
+#endif
diff --git a/src/setjmp/xtensa/longjmp.S b/src/setjmp/xtensa/longjmp.S
new file mode 100644
index 00000000..602093c9
--- /dev/null
+++ b/src/setjmp/xtensa/longjmp.S
@@ -0,0 +1,22 @@
+.global _longjmp
+.global longjmp
+.type _longjmp,@function
+.type longjmp,@function
+.align 4
+_longjmp:
+longjmp:
+#if defined(__XTENSA_CALL0_ABI__)
+ l32i a0, a2, 0
+ l32i a1, a2, 4
+ l32i a12, a2, 8
+ l32i a13, a2, 12
+ l32i a14, a2, 16
+ l32i a15, a2, 20
+
+ movi a2, 1
+ movnez a2, a3, a3
+
+ ret
+#else
+#error Unsupported Xtensa ABI
+#endif
diff --git a/src/setjmp/xtensa/setjmp.S b/src/setjmp/xtensa/setjmp.S
new file mode 100644
index 00000000..3a735a6a
--- /dev/null
+++ b/src/setjmp/xtensa/setjmp.S
@@ -0,0 +1,25 @@
+.global ___setjmp
+.hidden ___setjmp
+.global __setjmp
+.global _setjmp
+.global setjmp
+.type __setjmp,@function
+.type _setjmp,@function
+.type setjmp,@function
+.align 4
+___setjmp:
+__setjmp:
+_setjmp:
+setjmp:
+#if defined(__XTENSA_CALL0_ABI__)
+ s32i a0, a2, 0
+ s32i a1, a2, 4
+ s32i a12, a2, 8
+ s32i a13, a2, 12
+ s32i a14, a2, 16
+ s32i a15, a2, 20
+ movi a2, 0
+ ret
+#else
+#error Unsupported Xtensa ABI
+#endif
diff --git a/src/signal/xtensa/restore.s b/src/signal/xtensa/restore.s
new file mode 100644
index 00000000..e03e0fb7
--- /dev/null
+++ b/src/signal/xtensa/restore.s
@@ -0,0 +1,10 @@
+.global __restore
+.global __restore_rt
+.type __restore,@function
+.type __restore_rt,@function
+.space 1
+.align 4
+__restore:
+__restore_rt:
+ movi a2, 225 # SYS_rt_sigreturn
+ syscall
diff --git a/src/signal/xtensa/sigsetjmp.S b/src/signal/xtensa/sigsetjmp.S
new file mode 100644
index 00000000..e44bba88
--- /dev/null
+++ b/src/signal/xtensa/sigsetjmp.S
@@ -0,0 +1,24 @@
+.global sigsetjmp
+.global __sigsetjmp
+.type sigsetjmp,@function
+.type __sigsetjmp,@function
+.align 4
+sigsetjmp:
+__sigsetjmp:
+#if defined(__XTENSA_CALL0_ABI__)
+ bnez a3, 1f
+.hidden ___setjmp
+ j ___setjmp
+1:
+ mov a3, a0
+ mov a4, a2
+ call0 ___setjmp
+ mov a0, a3
+ mov a3, a2
+ mov a2, a4
+
+.hidden __sigsetjmp_tail
+ j __sigsetjmp_tail
+#else
+#error Unsupported Xtensa ABI
+#endif
diff --git a/src/thread/xtensa/__set_thread_area.S b/src/thread/xtensa/__set_thread_area.S
new file mode 100644
index 00000000..1f402125
--- /dev/null
+++ b/src/thread/xtensa/__set_thread_area.S
@@ -0,0 +1,16 @@
+.global __set_thread_area
+.type __set_thread_area,@function
+.align 4
+__set_thread_area:
+#ifdef __XTENSA_WINDOWED_ABI__
+ entry a1, 16
+#endif
+ wur a2, threadptr
+ movi a2, 0
+#if defined(__XTENSA_WINDOWED_ABI__)
+ retw
+#elif defined(__XTENSA_CALL0_ABI__)
+ ret
+#else
+#error Unsupported Xtensa ABI
+#endif
diff --git a/src/thread/xtensa/__unmapself.S b/src/thread/xtensa/__unmapself.S
new file mode 100644
index 00000000..15bf2bdf
--- /dev/null
+++ b/src/thread/xtensa/__unmapself.S
@@ -0,0 +1,13 @@
+.global __unmapself
+.type __unmapself,@function
+.align 4
+__unmapself:
+#if defined(__XTENSA_CALL0_ABI__)
+ mov a6, a2
+ movi a2, 81 # SYS_munmap
+ syscall
+ movi a2, 118 # SYS_exit
+ syscall
+#else
+#error Unsupported Xtensa ABI
+#endif
diff --git a/src/thread/xtensa/clone.S b/src/thread/xtensa/clone.S
new file mode 100644
index 00000000..8e8514d6
--- /dev/null
+++ b/src/thread/xtensa/clone.S
@@ -0,0 +1,46 @@
+# __clone(func, stack, flags, arg, ptid, tls, ctid)
+# a2, a3, a4, a5, a6, a7, [sp]
+
+# syscall(SYS_clone, flags, stack, ptid, tls, ctid)
+# a2, a6, a3, a4, a5, a8
+
+.global __clone
+.type __clone,@function
+.align 4
+__clone:
+#if defined(__XTENSA_CALL0_ABI__)
+ # align stack and save func,arg
+ srli a3, a3, 4
+ slli a3, a3, 4
+ addi a3, a3, -16
+ s32i a2, a3, 0
+ s32i a5, a3, 4
+
+ # syscall
+ mov a2, a4
+ mov a4, a6
+ mov a6, a2
+ mov a5, a7
+ l32i a8, a1, 16
+ movi a2, 116 # SYS_clone
+ syscall
+
+ beqz a2, 1f
+ # parent
+ ret
+
+ # child
+1:
+ l32i a0, a1, 0
+ l32i a2, a1, 4
+#ifdef __FDPIC__
+ l32i a11, a0, 4
+ l32i a0, a0, 0
+#endif
+ callx0 a0
+ mov a6, a2
+ movi a2, 118 # SYS_exit
+ syscall
+#else
+#error Unsupported Xtensa ABI
+#endif
diff --git a/src/thread/xtensa/syscall_cp.S b/src/thread/xtensa/syscall_cp.S
new file mode 100644
index 00000000..de7c3e66
--- /dev/null
+++ b/src/thread/xtensa/syscall_cp.S
@@ -0,0 +1,38 @@
+# __syscall_cp_asm(&self->cancel, nr, u, v, w, x, y, z)
+# a2 a3 a4 a5 a6 a7 [sp] [sp+4]
+
+# syscall(nr, u, v, w, x, y, z)
+# a2 a6 a3 a4 a5 a8 a9
+
+.global __cp_begin
+.hidden __cp_begin
+.global __cp_end
+.hidden __cp_end
+.global __cp_cancel
+.hidden __cp_cancel
+.hidden __cancel
+.global __syscall_cp_asm
+.hidden __syscall_cp_asm
+.type __syscall_cp_asm,@function
+.align 4
+#if defined(__XTENSA_CALL0_ABI__)
+__syscall_cp_asm:
+__cp_begin:
+ l32i a2, a2, 0
+ bnez a2, __cp_cancel
+ mov a2, a4
+ mov a4, a6
+ mov a6, a2
+ mov a2, a3
+ mov a3, a5
+ mov a5, a7
+ l32i a8, a1, 0
+ l32i a9, a1, 4
+ syscall
+__cp_end:
+ ret
+__cp_cancel:
+ j __cancel
+#else
+#error Unsupported Xtensa ABI
+#endif
--
2.21.0
[-- Attachment #3: 0002-WIP.patch --]
[-- Type: text/plain, Size: 3181 bytes --]
From f8c5953ecdb948282cc8e573b729c25db60a95a8 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Wed, 21 Feb 2024 08:27:37 -0800
Subject: [PATCH 2/2] WIP
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
ldso/dlstart.c | 7 +++++++
ldso/dynlink.c | 6 ++++--
src/internal/dynlink.h | 5 +++--
3 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/ldso/dlstart.c b/ldso/dlstart.c
index 259f5e18..beca953f 100644
--- a/ldso/dlstart.c
+++ b/ldso/dlstart.c
@@ -90,12 +90,19 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
- segs[rel_addr[1]].p_vaddr
+ syms[R_SYM(rel[1])].st_value;
rel_addr[1] = dyn[DT_PLTGOT];
+ } else if (R_TYPE(rel[1]) == REL_RELATIVE) {
+ size_t val = *rel_addr;
+ for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
+ *rel_addr += segs[j].addr - segs[j].p_vaddr;
} else {
size_t val = syms[R_SYM(rel[1])].st_value;
for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
*rel_addr = rel[2] + segs[j].addr - segs[j].p_vaddr + val;
}
}
+#ifdef __xtensa__
+ ((unsigned long *)dyn[DT_PLTGOT])[3] = segs[0].addr - segs[0].p_vaddr;
+#endif
#else
/* If the dynamic linker is invoked as a command, its load
* address is not available in the aux vector. Instead, compute
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index ceca3c98..25563af3 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -1420,6 +1420,7 @@ static void reloc_all(struct dso *p)
if (!DL_FDPIC)
do_relr_relocs(p, laddr(p, dyn[DT_RELR]), dyn[DT_RELRSZ]);
+#if 0
if (head != &ldso && p->relro_start != p->relro_end) {
long ret = __syscall(SYS_mprotect, laddr(p, p->relro_start),
p->relro_end-p->relro_start, PROT_READ);
@@ -1429,6 +1430,7 @@ static void reloc_all(struct dso *p)
if (runtime) longjmp(*rtld_fail, 1);
}
}
+#endif
p->relocated = 1;
}
@@ -1485,7 +1487,7 @@ void __libc_exit_fini()
if (dyn[0] & (1<<DT_FINI_ARRAY)) {
size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
- while (n--) ((void (*)(void))*--fn)();
+ while (n--) fpaddr(p, *--fn)();
}
#ifndef NO_LEGACY_INITFINI
if ((dyn[0] & (1<<DT_FINI)) && dyn[DT_FINI])
@@ -1603,7 +1605,7 @@ static void do_init_fini(struct dso **queue)
if (dyn[0] & (1<<DT_INIT_ARRAY)) {
size_t n = dyn[DT_INIT_ARRAYSZ]/sizeof(size_t);
size_t *fn = laddr(p, dyn[DT_INIT_ARRAY]);
- while (n--) ((void (*)(void))*fn++)();
+ while (n--) fpaddr(p, *fn++)();
}
pthread_mutex_lock(&init_fini_lock);
diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h
index 06f41d09..6485e883 100644
--- a/src/internal/dynlink.h
+++ b/src/internal/dynlink.h
@@ -78,10 +78,11 @@ struct fdpic_dummy_loadmap {
(R_TYPE(x) == REL_RELATIVE) || \
(R_TYPE(x) == REL_SYM_OR_REL && !R_SYM(x)) )
#else
-#define IS_RELATIVE(x,s) ( ( \
+#define IS_RELATIVE(x,s) ( \
+ (R_TYPE(x) == REL_RELATIVE) || ( ( \
(R_TYPE(x) == REL_FUNCDESC_VAL) || \
(R_TYPE(x) == REL_SYMBOLIC) ) \
- && (((s)[R_SYM(x)].st_info & 0xf) == STT_SECTION) )
+ && (((s)[R_SYM(x)].st_info & 0xf) == STT_SECTION) ) )
#endif
#ifndef NEED_MIPS_GOT_RELOCS
--
2.21.0
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-27 23:24 [musl] Initial xtensa/fdpic port review Rich Felker
@ 2024-02-28 0:13 ` Rich Felker
2024-02-28 17:20 ` Max Filippov
0 siblings, 1 reply; 17+ messages in thread
From: Rich Felker @ 2024-02-28 0:13 UTC (permalink / raw)
To: musl; +Cc: Max Filippov
First of all, I'm excited to see new work on this and especially the
FDPIC ABI. Not only is xtensa a very relevant arch I've been hoping we
could support for some time now, but having FDPIC will help getting
the parts of musl's FDPIC support that are currently making
sh-specific assumptions worked out so that we can get other new FDPIC
ports added too (particularly, arm and riscv).
I'm not up-to-date on what toolchain status/stability is or how close
this is to being intended as upstreamable, but since I was pointed to
it and took a look, I wanted to record my initial review thoughts here
so they don't get lost and to hopefully move this closer to ready for
upstream.
Review follows inline:
On Tue, Feb 27, 2024 at 06:24:31PM -0500, Rich Felker wrote:
> Attached patches were pulled from
> https://github.com/jcmvbkbc/musl-xtensa/commits/xtensa-1.2.4-fdpic/
>
> I'll reply to comment inline.
> >From 868b34272f414323f10528d5b9988886541cb1c0 Mon Sep 17 00:00:00 2001
> From: Max Filippov <jcmvbkbc@gmail.com>
> Date: Tue, 22 Mar 2016 02:35:58 +0300
> Subject: [PATCH 1/2] xtensa: add port
>
> Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
> ---
> arch/xtensa/arch.mak | 1 +
> arch/xtensa/atomic_arch.h | 27 ++
> arch/xtensa/bits/alltypes.h.in | 27 ++
> arch/xtensa/bits/float.h | 16 ++
> arch/xtensa/bits/ioctl.h | 219 ++++++++++++++
> arch/xtensa/bits/limits.h | 1 +
> arch/xtensa/bits/mman.h | 68 +++++
> arch/xtensa/bits/posix.h | 2 +
> arch/xtensa/bits/reg.h | 2 +
> arch/xtensa/bits/setjmp.h | 5 +
> arch/xtensa/bits/signal.h | 88 ++++++
> arch/xtensa/bits/stat.h | 24 ++
> arch/xtensa/bits/stdint.h | 20 ++
> arch/xtensa/bits/syscall.h.in | 396 ++++++++++++++++++++++++++
> arch/xtensa/bits/termios.h | 167 +++++++++++
> arch/xtensa/bits/user.h | 4 +
> arch/xtensa/bits/xtensa-config.h | 8 +
> arch/xtensa/crt_arch.h | 48 ++++
> arch/xtensa/kstat.h | 18 ++
> arch/xtensa/pthread_arch.h | 11 +
> arch/xtensa/reloc.h | 38 +++
> arch/xtensa/syscall_arch.h | 104 +++++++
> configure | 1 +
> crt/xtensa/crti.S | 29 ++
> crt/xtensa/crtn.S | 23 ++
> include/elf.h | 70 +++++
> src/internal/xtensa/syscall.S | 25 ++
> src/ldso/xtensa/tlsdesc.S | 36 +++
> src/process/xtensa/vfork.S | 21 ++
> src/setjmp/xtensa/longjmp.S | 22 ++
> src/setjmp/xtensa/setjmp.S | 25 ++
> src/signal/xtensa/restore.s | 10 +
> src/signal/xtensa/sigsetjmp.S | 24 ++
> src/thread/xtensa/__set_thread_area.S | 16 ++
> src/thread/xtensa/__unmapself.S | 13 +
> src/thread/xtensa/clone.S | 46 +++
> src/thread/xtensa/syscall_cp.S | 38 +++
> 37 files changed, 1693 insertions(+)
> create mode 100644 arch/xtensa/arch.mak
> create mode 100644 arch/xtensa/atomic_arch.h
> create mode 100644 arch/xtensa/bits/alltypes.h.in
> create mode 100644 arch/xtensa/bits/float.h
> create mode 100644 arch/xtensa/bits/ioctl.h
> create mode 100644 arch/xtensa/bits/limits.h
> create mode 100644 arch/xtensa/bits/mman.h
> create mode 100644 arch/xtensa/bits/posix.h
> create mode 100644 arch/xtensa/bits/reg.h
> create mode 100644 arch/xtensa/bits/setjmp.h
> create mode 100644 arch/xtensa/bits/signal.h
> create mode 100644 arch/xtensa/bits/stat.h
> create mode 100644 arch/xtensa/bits/stdint.h
> create mode 100644 arch/xtensa/bits/syscall.h.in
> create mode 100644 arch/xtensa/bits/termios.h
> create mode 100644 arch/xtensa/bits/user.h
> create mode 100644 arch/xtensa/bits/xtensa-config.h
> create mode 100644 arch/xtensa/crt_arch.h
> create mode 100644 arch/xtensa/kstat.h
> create mode 100644 arch/xtensa/pthread_arch.h
> create mode 100644 arch/xtensa/reloc.h
> create mode 100644 arch/xtensa/syscall_arch.h
> create mode 100644 crt/xtensa/crti.S
> create mode 100644 crt/xtensa/crtn.S
> create mode 100644 src/internal/xtensa/syscall.S
> create mode 100644 src/ldso/xtensa/tlsdesc.S
> create mode 100644 src/process/xtensa/vfork.S
> create mode 100644 src/setjmp/xtensa/longjmp.S
> create mode 100644 src/setjmp/xtensa/setjmp.S
> create mode 100644 src/signal/xtensa/restore.s
> create mode 100644 src/signal/xtensa/sigsetjmp.S
> create mode 100644 src/thread/xtensa/__set_thread_area.S
> create mode 100644 src/thread/xtensa/__unmapself.S
> create mode 100644 src/thread/xtensa/clone.S
> create mode 100644 src/thread/xtensa/syscall_cp.S
>
> diff --git a/arch/xtensa/arch.mak b/arch/xtensa/arch.mak
> new file mode 100644
> index 00000000..aa4d05ce
> --- /dev/null
> +++ b/arch/xtensa/arch.mak
> @@ -0,0 +1 @@
> +COMPAT_SRC_DIRS = compat/time32
> diff --git a/arch/xtensa/atomic_arch.h b/arch/xtensa/atomic_arch.h
> new file mode 100644
> index 00000000..34bf0704
> --- /dev/null
> +++ b/arch/xtensa/atomic_arch.h
> @@ -0,0 +1,27 @@
> +#include "bits/xtensa-config.h"
This is not what bits headers are for; they are public-installed
headers that provide parts of the standard public headers that vary by
arch, and are never directly #include'able.
If you really need arch-internal headers like this just put them in
the top-level dir of the arch. But in the case of these macros, either
just use the compiler-provided, __-prefixed ones directly, or if
they're not actually variable, don't inspect them at all.
> +
> +#if XCHAL_HAVE_S32C1I
> +#define a_cas a_cas
> +static inline int a_cas(volatile int *p, int t, int s)
> +{
> + __asm__ __volatile__ (
> + " wsr %2, scompare1\n"
> + " s32c1i %0, %1\n"
> + : "+a"(s), "+m"(*p)
> + : "a"(t)
> + : "memory" );
> + return s;
> +}
> +#endif
For example this is not a useful test, because there's nothing you can
do in the case where it's not defined; it's just not supportable
(unless there's some kernel fallback mechanism like ARM and SH have;
then it would make sense to hard-code the cas instruction as above if
the ISA level is known to have it, and otherwise do conditional
runtime selection (again, like ARM and SH).
> diff --git a/arch/xtensa/bits/alltypes.h.in b/arch/xtensa/bits/alltypes.h.in
> new file mode 100644
> index 00000000..21221ce5
> --- /dev/null
> +++ b/arch/xtensa/bits/alltypes.h.in
> @@ -0,0 +1,27 @@
> +#define _REDIR_TIME64 1
> +#define _Addr int
> +#define _Int64 long long
> +#define _Reg int
> +
> +#if __XTENSA_EB__
> +#define __BYTE_ORDER 4321
> +#elif __XTENSA_EL__
> +#define __BYTE_ORDER 1234
> +#else
> +#error Unknown endianness
> +#endif
> +
> +#define __LONG_MAX 0x7fffffffL
> +
> +#ifndef __cplusplus
> +#ifdef __WCHAR_TYPE__
> +TYPEDEF __WCHAR_TYPE__ wchar_t;
> +#else
> +TYPEDEF unsigned wchar_t;
> +#endif
> +#endif
> +
> +TYPEDEF float float_t;
> +TYPEDEF double double_t;
> +
> +TYPEDEF struct { long __l __attribute__((aligned(__BIGGEST_ALIGNMENT__))); } max_align_t;
It's best to avoid macros like __BIGGEST_ALIGNMENT__ here that could
change out from under us in the future, thereby quietly breaking the
ABI. Whatever the ABI is, that's what the alignment here should be,
and just using a type that naturally has the alignment if possible
rather than attributes.
> diff --git a/arch/xtensa/bits/mman.h b/arch/xtensa/bits/mman.h
> new file mode 100644
> index 00000000..d2f58eb1
> --- /dev/null
> +++ b/arch/xtensa/bits/mman.h
> @@ -0,0 +1,68 @@
> +#define MAP_FAILED ((void *) -1)
> +
> +#define PROT_NONE 0
> +#define PROT_READ 1
> +#define PROT_WRITE 2
> +#define PROT_EXEC 4
> +#define PROT_GROWSDOWN 0x01000000
> +#define PROT_GROWSUP 0x02000000
> +
> +#define MAP_SHARED 0x01
> +#define MAP_PRIVATE 0x02
> +#define MAP_FIXED 0x10
> +
> +#define MAP_TYPE 0x0f
> +#undef MAP_FILE
> +#define MAP_FILE 0x00
> +#undef MAP_ANON
> +#define MAP_ANON 0x800
> +#define MAP_ANONYMOUS MAP_ANON
> +#undef MAP_NORESERVE
> +#define MAP_NORESERVE 0x0400
> +#undef MAP_GROWSDOWN
> +#define MAP_GROWSDOWN 0x1000
> +#undef MAP_DENYWRITE
> +#define MAP_DENYWRITE 0x2000
> +#undef MAP_EXECUTABLE
> +#define MAP_EXECUTABLE 0x4000
> +#undef MAP_LOCKED
> +#define MAP_LOCKED 0x8000
> +#undef MAP_POPULATE
> +#define MAP_POPULATE 0x10000
> +#undef MAP_NONBLOCK
> +#define MAP_NONBLOCK 0x20000
> +#undef MAP_STACK
> +#define MAP_STACK 0x40000
> +#undef MAP_HUGETLB
> +#define MAP_HUGETLB 0x80000
> +
> +#define POSIX_MADV_NORMAL 0
> +#define POSIX_MADV_RANDOM 1
> +#define POSIX_MADV_SEQUENTIAL 2
> +#define POSIX_MADV_WILLNEED 3
> +#define POSIX_MADV_DONTNEED 4
> +
> +#define MS_ASYNC 1
> +#define MS_INVALIDATE 2
> +#define MS_SYNC 4
> +
> +#define MCL_CURRENT 1
> +#define MCL_FUTURE 2
> +
> +#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> +#define MADV_NORMAL 0
> +#define MADV_RANDOM 1
> +#define MADV_SEQUENTIAL 2
> +#define MADV_WILLNEED 3
> +#define MADV_DONTNEED 4
> +#define MADV_REMOVE 9
> +#define MADV_DONTFORK 10
> +#define MADV_DOFORK 11
> +#define MADV_MERGEABLE 12
> +#define MADV_UNMERGEABLE 13
> +#define MADV_HUGEPAGE 14
> +#define MADV_NOHUGEPAGE 15
> +#define MADV_DONTDUMP 16
> +#define MADV_DODUMP 17
> +#define MADV_HWPOISON 100
> +#endif
A lot of these don't vary from the standard values, and including them
(especially since most will be duplicate/re- definitions) obscures
what actually is arch-specific here.
> diff --git a/arch/xtensa/bits/setjmp.h b/arch/xtensa/bits/setjmp.h
> new file mode 100644
> index 00000000..e1a6dd97
> --- /dev/null
> +++ b/arch/xtensa/bits/setjmp.h
> @@ -0,0 +1,5 @@
> +#if defined(__XTENSA_CALL0_ABI__)
> +typedef unsigned long __jmp_buf[6];
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
See notes later on reloc.h regarding ABIs.
> diff --git a/arch/xtensa/bits/signal.h b/arch/xtensa/bits/signal.h
> new file mode 100644
> index 00000000..545ffd36
> --- /dev/null
> +++ b/arch/xtensa/bits/signal.h
> @@ -0,0 +1,88 @@
> +#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
> + || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> +
> +#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> +#define MINSIGSTKSZ 2048
> +#define SIGSTKSZ 8192
> +#endif
> +
> +typedef struct sigcontext
> +{
> + unsigned long sc_pc;
> + unsigned long sc_ps;
> + unsigned long sc_lbeg;
> + unsigned long sc_lend;
> + unsigned long sc_lcount;
> + unsigned long sc_sar;
> + unsigned long sc_acclo;
> + unsigned long sc_acchi;
> + unsigned long sc_a[16];
> + void *sc_xtregs;
> +} mcontext_t;
This needs a namespace-safe alternate definition when in
standards-conforming profile. See how it's done on most other archs.
(It can just be a dummy struct of the right size if you like.)
> diff --git a/arch/xtensa/bits/xtensa-config.h b/arch/xtensa/bits/xtensa-config.h
> new file mode 100644
> index 00000000..a2e95904
> --- /dev/null
> +++ b/arch/xtensa/bits/xtensa-config.h
> @@ -0,0 +1,8 @@
> +#undef XCHAL_NUM_AREGS
> +#define XCHAL_NUM_AREGS __XCHAL_NUM_AREGS
> +
> +#undef XCHAL_HAVE_S32C1I
> +#define XCHAL_HAVE_S32C1I __XCHAL_HAVE_S32C1I
> +
> +#undef XCHAL_HAVE_EXCLUSIVE
> +#define XCHAL_HAVE_EXCLUSIVE __XCHAL_HAVE_EXCLUSIVE
See notes above on this header.
> diff --git a/arch/xtensa/reloc.h b/arch/xtensa/reloc.h
> new file mode 100644
> index 00000000..03482f18
> --- /dev/null
> +++ b/arch/xtensa/reloc.h
> @@ -0,0 +1,38 @@
> +#if __BYTE_ORDER == __BIG_ENDIAN
> +#define ENDIAN_SUFFIX "eb"
> +#else
> +#define ENDIAN_SUFFIX ""
> +#endif
> +
> +#if __FDPIC__
> +#define ABI_SUFFIX "-fdpic"
> +#else
> +#define ABI_SUFFIX ""
> +#endif
> +
> +#define LDSO_ARCH "xtensa" ENDIAN_SUFFIX ABI_SUFFIX
Any actual incompatible ABI (as opposed to ISA level) needs its own
LDSO name. So if there are alternate (call0/windowed?) ABIs you want
to support, different names need to be defined for them here and in
configure. Unless there's a good reason to support more than one ABI,
it probably just makes sense to pick the preferred one and have
configure error-out if the toolchain's ABI is incompatible.
> diff --git a/crt/xtensa/crti.S b/crt/xtensa/crti.S
> new file mode 100644
> index 00000000..82a837e7
> --- /dev/null
> +++ b/crt/xtensa/crti.S
> @@ -0,0 +1,29 @@
> +.section .init
> +.global _init
> +.type _init, @function
> +_init:
> +#if defined(__XTENSA_CALL0_ABI__)
> + addi sp, sp, -16
> + s32i a0, sp, 0
> +#ifdef __FDPIC__
> + s32i a12, sp, 4
> + mov a12, a11
> +#endif
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
Same thing regarding ABIs.
> diff --git a/src/internal/xtensa/syscall.S b/src/internal/xtensa/syscall.S
> new file mode 100644
> index 00000000..f09a46e8
> --- /dev/null
> +++ b/src/internal/xtensa/syscall.S
> @@ -0,0 +1,25 @@
> +.global __syscall
> +.hidden __syscall
> +.type __syscall,@function
> +.align 4
> +__syscall:
> +#ifdef __XTENSA_WINDOWED_ABI__
> + entry a1, 16
> +#endif
> + mov a8, a3
> + mov a3, a4
> + mov a4, a5
> + mov a5, a6
> + mov a6, a8
> + mov a8, a7
> +#if defined(__XTENSA_WINDOWED_ABI__)
> + l32i a9, a1, 16
> + syscall
> + retw
> +#elif defined(__XTENSA_CALL0_ABI__)
> + l32i a9, a1, 0
> + syscall
> + ret
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
And here too.
> diff --git a/src/ldso/xtensa/tlsdesc.S b/src/ldso/xtensa/tlsdesc.S
> new file mode 100644
> index 00000000..084d5f3e
> --- /dev/null
> +++ b/src/ldso/xtensa/tlsdesc.S
> @@ -0,0 +1,36 @@
> +.global __tlsdesc_static
> +.hidden __tlsdesc_static
> +.type __tlsdesc_static,@function
> +.align 4
> +__tlsdesc_static:
> +#ifdef __XTENSA_WINDOWED_ABI__
> + entry a1, 16
> +#endif
> + rur a3, threadptr
> + add a2, a2, a3
> +#if defined(__XTENSA_WINDOWED_ABI__)
> + retw
> +#elif defined(__XTENSA_CALL0_ABI__)
> + ret
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
Are you sure the entry step makes sense with tlsdesc? Is this honoring
the ABI for what the tlsdesc function is allowed to clobber?
> +.hidden __tls_get_new
> +
> +.global __tlsdesc_dynamic
> +.hidden __tlsdesc_dynamic
> +.type __tlsdesc_dynamic,@function
> +.align 4
> +__tlsdesc_dynamic:
> +#if defined(__XTENSA_WINDOWED_ABI__)
> + entry a1, 16
> + mov a6, a2
> + call4 __tls_get_addr
> + mov a2, a6
> + retw
> +#elif defined(__XTENSA_CALL0_ABI__)
> + j __tls_get_addr
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
This approach is no longer a valid implementation for
__tlsdesc_dynamic, because it calls C code which could clobber any
call-clobbered registers, not just the ones the tlsdesc function is
allowed to clobber.
Instead, it needs to actually do the dtv lookup in asm. musl does not
use any dynamic allocation here, so the lookup is guaranteed to just
work using the indices found with no conditional branches. See the
recently added riscv code for a clean example, in commit
407aea628af8c81d9e3f5a068568f2217db71bba.
> diff --git a/src/process/xtensa/vfork.S b/src/process/xtensa/vfork.S
> new file mode 100644
> index 00000000..25478b87
> --- /dev/null
> +++ b/src/process/xtensa/vfork.S
> @@ -0,0 +1,21 @@
> +.global vfork
> +.type vfork,@function
> +vfork:
> +#if defined(__XTENSA_CALL0_ABI__)
> + movi a2, 116 # __NR_clone
> + movi a3, 0
> + movi a6, 0x4111 # CLONE_VM | CLONE_VFORK | SIGCHLD
> + syscall
> +
> +#ifdef __FDPIC__
> +.hidden __syscall_ret
> + movi a9, __syscall_ret@GOT
> + add a9, a9, a11
> + l32i a9, a9, 0
> + jx a9
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
Same re: ABIs. But maybe there's intent to support a non-FDPIC ABI
too?
> diff --git a/src/setjmp/xtensa/longjmp.S b/src/setjmp/xtensa/longjmp.S
> new file mode 100644
> index 00000000..602093c9
> --- /dev/null
> +++ b/src/setjmp/xtensa/longjmp.S
> @@ -0,0 +1,22 @@
> +.global _longjmp
> +.global longjmp
> +.type _longjmp,@function
> +.type longjmp,@function
> +.align 4
> +_longjmp:
> +longjmp:
> +#if defined(__XTENSA_CALL0_ABI__)
> + l32i a0, a2, 0
> + l32i a1, a2, 4
> + l32i a12, a2, 8
> + l32i a13, a2, 12
> + l32i a14, a2, 16
> + l32i a15, a2, 20
> +
> + movi a2, 1
> + movnez a2, a3, a3
> +
> + ret
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
> diff --git a/src/setjmp/xtensa/setjmp.S b/src/setjmp/xtensa/setjmp.S
> new file mode 100644
> index 00000000..3a735a6a
> --- /dev/null
> +++ b/src/setjmp/xtensa/setjmp.S
> @@ -0,0 +1,25 @@
> +.global ___setjmp
> +.hidden ___setjmp
> +.global __setjmp
> +.global _setjmp
> +.global setjmp
> +.type __setjmp,@function
> +.type _setjmp,@function
> +.type setjmp,@function
> +.align 4
> +___setjmp:
> +__setjmp:
> +_setjmp:
> +setjmp:
> +#if defined(__XTENSA_CALL0_ABI__)
> + s32i a0, a2, 0
> + s32i a1, a2, 4
> + s32i a12, a2, 8
> + s32i a13, a2, 12
> + s32i a14, a2, 16
> + s32i a15, a2, 20
> + movi a2, 0
> + ret
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
Ditto.
> diff --git a/src/signal/xtensa/sigsetjmp.S b/src/signal/xtensa/sigsetjmp.S
> new file mode 100644
> index 00000000..e44bba88
> --- /dev/null
> +++ b/src/signal/xtensa/sigsetjmp.S
> @@ -0,0 +1,24 @@
> +.global sigsetjmp
> +.global __sigsetjmp
> +.type sigsetjmp,@function
> +.type __sigsetjmp,@function
> +.align 4
> +sigsetjmp:
> +__sigsetjmp:
> +#if defined(__XTENSA_CALL0_ABI__)
> + bnez a3, 1f
> +.hidden ___setjmp
> + j ___setjmp
> +1:
> + mov a3, a0
> + mov a4, a2
> + call0 ___setjmp
> + mov a0, a3
> + mov a3, a2
> + mov a2, a4
> +
> +.hidden __sigsetjmp_tail
> + j __sigsetjmp_tail
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
I don't think this code works. It does not seem to save the return
address and argument address before calling ___setjmp, so they will be
clobbered after it returns.
> diff --git a/src/thread/xtensa/__set_thread_area.S b/src/thread/xtensa/__set_thread_area.S
> new file mode 100644
> index 00000000..1f402125
> --- /dev/null
> +++ b/src/thread/xtensa/__set_thread_area.S
> @@ -0,0 +1,16 @@
> +.global __set_thread_area
> +.type __set_thread_area,@function
> +.align 4
> +__set_thread_area:
> +#ifdef __XTENSA_WINDOWED_ABI__
> + entry a1, 16
> +#endif
> + wur a2, threadptr
> + movi a2, 0
> +#if defined(__XTENSA_WINDOWED_ABI__)
> + retw
> +#elif defined(__XTENSA_CALL0_ABI__)
> + ret
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
FWIW, few archs do this yet, but this function can be written in C
with inline asm to set the register, and that would avoid any ABI
conditionals if they're ever needed.
Eventually I'd like to move most functions like this, that are
currently external asm but have no good reason to be (unlike setjmp,
vfork, etc. which *do* have a good reason to be), to minimal inline
asm in C. Some archs' versions of this file would even just become
__syscall, no further asm.
> diff --git a/src/thread/xtensa/__unmapself.S b/src/thread/xtensa/__unmapself.S
> new file mode 100644
> index 00000000..15bf2bdf
> --- /dev/null
> +++ b/src/thread/xtensa/__unmapself.S
> @@ -0,0 +1,13 @@
> +.global __unmapself
> +.type __unmapself,@function
> +.align 4
> +__unmapself:
> +#if defined(__XTENSA_CALL0_ABI__)
> + mov a6, a2
> + movi a2, 81 # SYS_munmap
> + syscall
> + movi a2, 118 # SYS_exit
> + syscall
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
This is unsafe on fdpic/nommu if the kernel requires the task to
always have a valid stack. You might want to check how it's done on SH
and whether something similar is needed here. The idea is that it
temporarily switches to an alternate stack protected by the global
thread list lock (so only one thread can be using it at a time) in
order to make it safe to unmap the thread's own stack. The thread list
is then unlocked by the kernel when the thread dies (via the exit
futex).
> diff --git a/src/thread/xtensa/clone.S b/src/thread/xtensa/clone.S
> new file mode 100644
> index 00000000..8e8514d6
> --- /dev/null
> +++ b/src/thread/xtensa/clone.S
> @@ -0,0 +1,46 @@
> +# __clone(func, stack, flags, arg, ptid, tls, ctid)
> +# a2, a3, a4, a5, a6, a7, [sp]
> +
> +# syscall(SYS_clone, flags, stack, ptid, tls, ctid)
> +# a2, a6, a3, a4, a5, a8
> +
> +.global __clone
> +.type __clone,@function
> +.align 4
> +__clone:
> +#if defined(__XTENSA_CALL0_ABI__)
> + # align stack and save func,arg
> + srli a3, a3, 4
> + slli a3, a3, 4
> + addi a3, a3, -16
> + s32i a2, a3, 0
> + s32i a5, a3, 4
> +
> + # syscall
> + mov a2, a4
> + mov a4, a6
> + mov a6, a2
> + mov a5, a7
> + l32i a8, a1, 16
> + movi a2, 116 # SYS_clone
> + syscall
> +
> + beqz a2, 1f
> + # parent
> + ret
> +
> + # child
> +1:
> + l32i a0, a1, 0
> + l32i a2, a1, 4
> +#ifdef __FDPIC__
> + l32i a11, a0, 4
> + l32i a0, a0, 0
> +#endif
> + callx0 a0
> + mov a6, a2
> + movi a2, 118 # SYS_exit
> + syscall
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
Same re: ABIs.
> diff --git a/src/thread/xtensa/syscall_cp.S b/src/thread/xtensa/syscall_cp.S
> new file mode 100644
> index 00000000..de7c3e66
> --- /dev/null
> +++ b/src/thread/xtensa/syscall_cp.S
> @@ -0,0 +1,38 @@
> +# __syscall_cp_asm(&self->cancel, nr, u, v, w, x, y, z)
> +# a2 a3 a4 a5 a6 a7 [sp] [sp+4]
> +
> +# syscall(nr, u, v, w, x, y, z)
> +# a2 a6 a3 a4 a5 a8 a9
> +
> +.global __cp_begin
> +.hidden __cp_begin
> +.global __cp_end
> +.hidden __cp_end
> +.global __cp_cancel
> +.hidden __cp_cancel
> +.hidden __cancel
> +.global __syscall_cp_asm
> +.hidden __syscall_cp_asm
> +.type __syscall_cp_asm,@function
> +.align 4
> +#if defined(__XTENSA_CALL0_ABI__)
> +__syscall_cp_asm:
> +__cp_begin:
> + l32i a2, a2, 0
> + bnez a2, __cp_cancel
> + mov a2, a4
> + mov a4, a6
> + mov a6, a2
> + mov a2, a3
> + mov a3, a5
> + mov a5, a7
> + l32i a8, a1, 0
> + l32i a9, a1, 4
> + syscall
> +__cp_end:
> + ret
> +__cp_cancel:
> + j __cancel
> +#else
> +#error Unsupported Xtensa ABI
> +#endif
> --
> 2.21.0
And this one.
> >From f8c5953ecdb948282cc8e573b729c25db60a95a8 Mon Sep 17 00:00:00 2001
> From: Max Filippov <jcmvbkbc@gmail.com>
> Date: Wed, 21 Feb 2024 08:27:37 -0800
> Subject: [PATCH 2/2] WIP
>
> Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
> ---
> ldso/dlstart.c | 7 +++++++
> ldso/dynlink.c | 6 ++++--
> src/internal/dynlink.h | 5 +++--
> 3 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/ldso/dlstart.c b/ldso/dlstart.c
> index 259f5e18..beca953f 100644
> --- a/ldso/dlstart.c
> +++ b/ldso/dlstart.c
> @@ -90,12 +90,19 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
> - segs[rel_addr[1]].p_vaddr
> + syms[R_SYM(rel[1])].st_value;
> rel_addr[1] = dyn[DT_PLTGOT];
> + } else if (R_TYPE(rel[1]) == REL_RELATIVE) {
> + size_t val = *rel_addr;
> + for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
> + *rel_addr += segs[j].addr - segs[j].p_vaddr;
So xtensa has a "relative" reloc type that's just adjusted by the load
offset of the segment the relocation lives in, rather than needing to
use a symbolic relocation referencing a section symbol like other
fdpic archs do?
If so, this looks right and looks ok.
> } else {
> size_t val = syms[R_SYM(rel[1])].st_value;
> for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
> *rel_addr = rel[2] + segs[j].addr - segs[j].p_vaddr + val;
> }
> }
> +#ifdef __xtensa__
> + ((unsigned long *)dyn[DT_PLTGOT])[3] = segs[0].addr - segs[0].p_vaddr;
> +#endif
Is this actually needed for anything? Generally musl doesn't use the
reserved GOT slots itself, and on all the other archs I'm aware of,
they're essentially reserved to the dynamic linker implementation so
the dynamic linker is just free not to use them and not to set them
up.
> #else
> /* If the dynamic linker is invoked as a command, its load
> * address is not available in the aux vector. Instead, compute
> diff --git a/ldso/dynlink.c b/ldso/dynlink.c
> index ceca3c98..25563af3 100644
> --- a/ldso/dynlink.c
> +++ b/ldso/dynlink.c
> @@ -1420,6 +1420,7 @@ static void reloc_all(struct dso *p)
> if (!DL_FDPIC)
> do_relr_relocs(p, laddr(p, dyn[DT_RELR]), dyn[DT_RELRSZ]);
>
> +#if 0
> if (head != &ldso && p->relro_start != p->relro_end) {
> long ret = __syscall(SYS_mprotect, laddr(p, p->relro_start),
> p->relro_end-p->relro_start, PROT_READ);
> @@ -1429,6 +1430,7 @@ static void reloc_all(struct dso *p)
> if (runtime) longjmp(*rtld_fail, 1);
> }
> }
> +#endif
Was this breaking something? Relro linking probably should have been
disabled on fdpic, and we ignore ENOSYS anyway for nommu.
> p->relocated = 1;
> }
> @@ -1485,7 +1487,7 @@ void __libc_exit_fini()
> if (dyn[0] & (1<<DT_FINI_ARRAY)) {
> size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
> size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
> - while (n--) ((void (*)(void))*--fn)();
> + while (n--) fpaddr(p, *--fn)();
If this is fixable on the tooling side it really should be fixed
there. init/fini arrays should have actual language-level function
addresses (descriptor addresses on fdpic), not instruction addresses.
If it's not fixable at this point, I guess we need the arch's reloc.h
to define a macro signaling this difference in behavior.
I have no idea how this would be expected to work on static linked
programs...
> diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h
> index 06f41d09..6485e883 100644
> --- a/src/internal/dynlink.h
> +++ b/src/internal/dynlink.h
> @@ -78,10 +78,11 @@ struct fdpic_dummy_loadmap {
> (R_TYPE(x) == REL_RELATIVE) || \
> (R_TYPE(x) == REL_SYM_OR_REL && !R_SYM(x)) )
> #else
> -#define IS_RELATIVE(x,s) ( ( \
> +#define IS_RELATIVE(x,s) ( \
> + (R_TYPE(x) == REL_RELATIVE) || ( ( \
> (R_TYPE(x) == REL_FUNCDESC_VAL) || \
> (R_TYPE(x) == REL_SYMBOLIC) ) \
> - && (((s)[R_SYM(x)].st_info & 0xf) == STT_SECTION) )
> + && (((s)[R_SYM(x)].st_info & 0xf) == STT_SECTION) ) )
> #endif
>
> #ifndef NEED_MIPS_GOT_RELOCS
> --
> 2.21.0
>
This looks ok assuming the above about relative relocs.
Rich
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 0:13 ` Rich Felker
@ 2024-02-28 17:20 ` Max Filippov
2024-02-28 18:30 ` Rich Felker
0 siblings, 1 reply; 17+ messages in thread
From: Max Filippov @ 2024-02-28 17:20 UTC (permalink / raw)
To: Rich Felker; +Cc: musl
Hi Rich,
On Tue, Feb 27, 2024 at 4:12 PM Rich Felker <dalias@libc.org> wrote:
>
> First of all, I'm excited to see new work on this and especially the
> FDPIC ABI. Not only is xtensa a very relevant arch I've been hoping we
> could support for some time now, but having FDPIC will help getting
> the parts of musl's FDPIC support that are currently making
> sh-specific assumptions worked out so that we can get other new FDPIC
> ports added too (particularly, arm and riscv).
>
> I'm not up-to-date on what toolchain status/stability is or how close
> this is to being intended as upstreamable, but since I was pointed to
> it and took a look, I wanted to record my initial review thoughts here
> so they don't get lost and to hopefully move this closer to ready for
> upstream.
Thanks for your review.
I expect to get toolchain components into shape for upstreaming during
this year.
> Review follows inline:
>
> On Tue, Feb 27, 2024 at 06:24:31PM -0500, Rich Felker wrote:
> > Attached patches were pulled from
> > https://github.com/jcmvbkbc/musl-xtensa/commits/xtensa-1.2.4-fdpic/
> >
> > I'll reply to comment inline.
>
> > >From 868b34272f414323f10528d5b9988886541cb1c0 Mon Sep 17 00:00:00 2001
> > From: Max Filippov <jcmvbkbc@gmail.com>
> > Date: Tue, 22 Mar 2016 02:35:58 +0300
> > Subject: [PATCH 1/2] xtensa: add port
> >
> > Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
> > ---
> > arch/xtensa/arch.mak | 1 +
> > arch/xtensa/atomic_arch.h | 27 ++
> > arch/xtensa/bits/alltypes.h.in | 27 ++
> > arch/xtensa/bits/float.h | 16 ++
> > arch/xtensa/bits/ioctl.h | 219 ++++++++++++++
> > arch/xtensa/bits/limits.h | 1 +
> > arch/xtensa/bits/mman.h | 68 +++++
> > arch/xtensa/bits/posix.h | 2 +
> > arch/xtensa/bits/reg.h | 2 +
> > arch/xtensa/bits/setjmp.h | 5 +
> > arch/xtensa/bits/signal.h | 88 ++++++
> > arch/xtensa/bits/stat.h | 24 ++
> > arch/xtensa/bits/stdint.h | 20 ++
> > arch/xtensa/bits/syscall.h.in | 396 ++++++++++++++++++++++++++
> > arch/xtensa/bits/termios.h | 167 +++++++++++
> > arch/xtensa/bits/user.h | 4 +
> > arch/xtensa/bits/xtensa-config.h | 8 +
> > arch/xtensa/crt_arch.h | 48 ++++
> > arch/xtensa/kstat.h | 18 ++
> > arch/xtensa/pthread_arch.h | 11 +
> > arch/xtensa/reloc.h | 38 +++
> > arch/xtensa/syscall_arch.h | 104 +++++++
> > configure | 1 +
> > crt/xtensa/crti.S | 29 ++
> > crt/xtensa/crtn.S | 23 ++
> > include/elf.h | 70 +++++
> > src/internal/xtensa/syscall.S | 25 ++
> > src/ldso/xtensa/tlsdesc.S | 36 +++
> > src/process/xtensa/vfork.S | 21 ++
> > src/setjmp/xtensa/longjmp.S | 22 ++
> > src/setjmp/xtensa/setjmp.S | 25 ++
> > src/signal/xtensa/restore.s | 10 +
> > src/signal/xtensa/sigsetjmp.S | 24 ++
> > src/thread/xtensa/__set_thread_area.S | 16 ++
> > src/thread/xtensa/__unmapself.S | 13 +
> > src/thread/xtensa/clone.S | 46 +++
> > src/thread/xtensa/syscall_cp.S | 38 +++
> > 37 files changed, 1693 insertions(+)
> > create mode 100644 arch/xtensa/arch.mak
> > create mode 100644 arch/xtensa/atomic_arch.h
> > create mode 100644 arch/xtensa/bits/alltypes.h.in
> > create mode 100644 arch/xtensa/bits/float.h
> > create mode 100644 arch/xtensa/bits/ioctl.h
> > create mode 100644 arch/xtensa/bits/limits.h
> > create mode 100644 arch/xtensa/bits/mman.h
> > create mode 100644 arch/xtensa/bits/posix.h
> > create mode 100644 arch/xtensa/bits/reg.h
> > create mode 100644 arch/xtensa/bits/setjmp.h
> > create mode 100644 arch/xtensa/bits/signal.h
> > create mode 100644 arch/xtensa/bits/stat.h
> > create mode 100644 arch/xtensa/bits/stdint.h
> > create mode 100644 arch/xtensa/bits/syscall.h.in
> > create mode 100644 arch/xtensa/bits/termios.h
> > create mode 100644 arch/xtensa/bits/user.h
> > create mode 100644 arch/xtensa/bits/xtensa-config.h
> > create mode 100644 arch/xtensa/crt_arch.h
> > create mode 100644 arch/xtensa/kstat.h
> > create mode 100644 arch/xtensa/pthread_arch.h
> > create mode 100644 arch/xtensa/reloc.h
> > create mode 100644 arch/xtensa/syscall_arch.h
> > create mode 100644 crt/xtensa/crti.S
> > create mode 100644 crt/xtensa/crtn.S
> > create mode 100644 src/internal/xtensa/syscall.S
> > create mode 100644 src/ldso/xtensa/tlsdesc.S
> > create mode 100644 src/process/xtensa/vfork.S
> > create mode 100644 src/setjmp/xtensa/longjmp.S
> > create mode 100644 src/setjmp/xtensa/setjmp.S
> > create mode 100644 src/signal/xtensa/restore.s
> > create mode 100644 src/signal/xtensa/sigsetjmp.S
> > create mode 100644 src/thread/xtensa/__set_thread_area.S
> > create mode 100644 src/thread/xtensa/__unmapself.S
> > create mode 100644 src/thread/xtensa/clone.S
> > create mode 100644 src/thread/xtensa/syscall_cp.S
> >
> > diff --git a/arch/xtensa/arch.mak b/arch/xtensa/arch.mak
> > new file mode 100644
> > index 00000000..aa4d05ce
> > --- /dev/null
> > +++ b/arch/xtensa/arch.mak
> > @@ -0,0 +1 @@
> > +COMPAT_SRC_DIRS = compat/time32
> > diff --git a/arch/xtensa/atomic_arch.h b/arch/xtensa/atomic_arch.h
> > new file mode 100644
> > index 00000000..34bf0704
> > --- /dev/null
> > +++ b/arch/xtensa/atomic_arch.h
> > @@ -0,0 +1,27 @@
> > +#include "bits/xtensa-config.h"
>
> This is not what bits headers are for; they are public-installed
> headers that provide parts of the standard public headers that vary by
> arch, and are never directly #include'able.
>
> If you really need arch-internal headers like this just put them in
> the top-level dir of the arch. But in the case of these macros, either
> just use the compiler-provided, __-prefixed ones directly, or if
> they're not actually variable, don't inspect them at all.
I carry this header from the time when configuration-specific header
file was the only available configuration option. __XCHAL_* macros
only appeared in gcc-13, but since FDPIC support will only be
available on toolchains that have these macros I guess I'll drop the
header and use __XCHAL_* macros directly.
> > +
> > +#if XCHAL_HAVE_S32C1I
> > +#define a_cas a_cas
> > +static inline int a_cas(volatile int *p, int t, int s)
> > +{
> > + __asm__ __volatile__ (
> > + " wsr %2, scompare1\n"
> > + " s32c1i %0, %1\n"
> > + : "+a"(s), "+m"(*p)
> > + : "a"(t)
> > + : "memory" );
> > + return s;
> > +}
> > +#endif
>
> For example this is not a useful test, because there's nothing you can
> do in the case where it's not defined;
This test is in the spirit of xtensa: it covers one possible
configuration option.
There's at least one other hardware option (exclusive access instructions)
available for this functionality and a fallback to syscalls. They're not listed
here because I tried to keep this initial implementation to a bare minimum.
> it's just not supportable
> (unless there's some kernel fallback mechanism like ARM and SH have;
> then it would make sense to hard-code the cas instruction as above if
> the ISA level is known to have it, and otherwise do conditional
> runtime selection (again, like ARM and SH).
Xtensa is quite limited on the runtime selection, because opcodes belonging
to the options not enabled for a particular core just would not assemble.
> > diff --git a/arch/xtensa/bits/alltypes.h.in b/arch/xtensa/bits/alltypes.h.in
> > new file mode 100644
> > index 00000000..21221ce5
> > --- /dev/null
> > +++ b/arch/xtensa/bits/alltypes.h.in
> > @@ -0,0 +1,27 @@
> > +#define _REDIR_TIME64 1
> > +#define _Addr int
> > +#define _Int64 long long
> > +#define _Reg int
> > +
> > +#if __XTENSA_EB__
> > +#define __BYTE_ORDER 4321
> > +#elif __XTENSA_EL__
> > +#define __BYTE_ORDER 1234
> > +#else
> > +#error Unknown endianness
> > +#endif
> > +
> > +#define __LONG_MAX 0x7fffffffL
> > +
> > +#ifndef __cplusplus
> > +#ifdef __WCHAR_TYPE__
> > +TYPEDEF __WCHAR_TYPE__ wchar_t;
> > +#else
> > +TYPEDEF unsigned wchar_t;
> > +#endif
> > +#endif
> > +
> > +TYPEDEF float float_t;
> > +TYPEDEF double double_t;
> > +
> > +TYPEDEF struct { long __l __attribute__((aligned(__BIGGEST_ALIGNMENT__))); } max_align_t;
>
> It's best to avoid macros like __BIGGEST_ALIGNMENT__ here that could
> change out from under us in the future, thereby quietly breaking the
> ABI. Whatever the ABI is, that's what the alignment here should be,
> and just using a type that naturally has the alignment if possible
> rather than attributes.
The thing is: we don't know the name of the type with the widest
alignment, as indeed in the future someone may come up with
a configuration with an arbitrarily named type with an alignment
as big as allowed physically. But then its their responsibility to
keep the __BIGGEST_ALIGNMENT__ consistent across the
updates of that configuration that intend to stay compatible with
each other.
> > diff --git a/arch/xtensa/bits/mman.h b/arch/xtensa/bits/mman.h
> > new file mode 100644
> > index 00000000..d2f58eb1
> > --- /dev/null
> > +++ b/arch/xtensa/bits/mman.h
> > @@ -0,0 +1,68 @@
> > +#define MAP_FAILED ((void *) -1)
> > +
> > +#define PROT_NONE 0
> > +#define PROT_READ 1
> > +#define PROT_WRITE 2
> > +#define PROT_EXEC 4
> > +#define PROT_GROWSDOWN 0x01000000
> > +#define PROT_GROWSUP 0x02000000
> > +
> > +#define MAP_SHARED 0x01
> > +#define MAP_PRIVATE 0x02
> > +#define MAP_FIXED 0x10
> > +
> > +#define MAP_TYPE 0x0f
> > +#undef MAP_FILE
> > +#define MAP_FILE 0x00
> > +#undef MAP_ANON
> > +#define MAP_ANON 0x800
> > +#define MAP_ANONYMOUS MAP_ANON
> > +#undef MAP_NORESERVE
> > +#define MAP_NORESERVE 0x0400
> > +#undef MAP_GROWSDOWN
> > +#define MAP_GROWSDOWN 0x1000
> > +#undef MAP_DENYWRITE
> > +#define MAP_DENYWRITE 0x2000
> > +#undef MAP_EXECUTABLE
> > +#define MAP_EXECUTABLE 0x4000
> > +#undef MAP_LOCKED
> > +#define MAP_LOCKED 0x8000
> > +#undef MAP_POPULATE
> > +#define MAP_POPULATE 0x10000
> > +#undef MAP_NONBLOCK
> > +#define MAP_NONBLOCK 0x20000
> > +#undef MAP_STACK
> > +#define MAP_STACK 0x40000
> > +#undef MAP_HUGETLB
> > +#define MAP_HUGETLB 0x80000
> > +
> > +#define POSIX_MADV_NORMAL 0
> > +#define POSIX_MADV_RANDOM 1
> > +#define POSIX_MADV_SEQUENTIAL 2
> > +#define POSIX_MADV_WILLNEED 3
> > +#define POSIX_MADV_DONTNEED 4
> > +
> > +#define MS_ASYNC 1
> > +#define MS_INVALIDATE 2
> > +#define MS_SYNC 4
> > +
> > +#define MCL_CURRENT 1
> > +#define MCL_FUTURE 2
> > +
> > +#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> > +#define MADV_NORMAL 0
> > +#define MADV_RANDOM 1
> > +#define MADV_SEQUENTIAL 2
> > +#define MADV_WILLNEED 3
> > +#define MADV_DONTNEED 4
> > +#define MADV_REMOVE 9
> > +#define MADV_DONTFORK 10
> > +#define MADV_DOFORK 11
> > +#define MADV_MERGEABLE 12
> > +#define MADV_UNMERGEABLE 13
> > +#define MADV_HUGEPAGE 14
> > +#define MADV_NOHUGEPAGE 15
> > +#define MADV_DONTDUMP 16
> > +#define MADV_DODUMP 17
> > +#define MADV_HWPOISON 100
> > +#endif
>
> A lot of these don't vary from the standard values, and including them
> (especially since most will be duplicate/re- definitions) obscures
> what actually is arch-specific here.
Ok, I'll drop the duplicates.
> > diff --git a/arch/xtensa/bits/setjmp.h b/arch/xtensa/bits/setjmp.h
> > new file mode 100644
> > index 00000000..e1a6dd97
> > --- /dev/null
> > +++ b/arch/xtensa/bits/setjmp.h
> > @@ -0,0 +1,5 @@
> > +#if defined(__XTENSA_CALL0_ABI__)
> > +typedef unsigned long __jmp_buf[6];
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
>
> See notes later on reloc.h regarding ABIs.
>
> > diff --git a/arch/xtensa/bits/signal.h b/arch/xtensa/bits/signal.h
> > new file mode 100644
> > index 00000000..545ffd36
> > --- /dev/null
> > +++ b/arch/xtensa/bits/signal.h
> > @@ -0,0 +1,88 @@
> > +#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
> > + || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> > +
> > +#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> > +#define MINSIGSTKSZ 2048
> > +#define SIGSTKSZ 8192
> > +#endif
> > +
> > +typedef struct sigcontext
> > +{
> > + unsigned long sc_pc;
> > + unsigned long sc_ps;
> > + unsigned long sc_lbeg;
> > + unsigned long sc_lend;
> > + unsigned long sc_lcount;
> > + unsigned long sc_sar;
> > + unsigned long sc_acclo;
> > + unsigned long sc_acchi;
> > + unsigned long sc_a[16];
> > + void *sc_xtregs;
> > +} mcontext_t;
>
> This needs a namespace-safe alternate definition when in
> standards-conforming profile. See how it's done on most other archs.
> (It can just be a dummy struct of the right size if you like.)
Ok.
> > diff --git a/arch/xtensa/bits/xtensa-config.h b/arch/xtensa/bits/xtensa-config.h
> > new file mode 100644
> > index 00000000..a2e95904
> > --- /dev/null
> > +++ b/arch/xtensa/bits/xtensa-config.h
> > @@ -0,0 +1,8 @@
> > +#undef XCHAL_NUM_AREGS
> > +#define XCHAL_NUM_AREGS __XCHAL_NUM_AREGS
> > +
> > +#undef XCHAL_HAVE_S32C1I
> > +#define XCHAL_HAVE_S32C1I __XCHAL_HAVE_S32C1I
> > +
> > +#undef XCHAL_HAVE_EXCLUSIVE
> > +#define XCHAL_HAVE_EXCLUSIVE __XCHAL_HAVE_EXCLUSIVE
>
> See notes above on this header.
>
> > diff --git a/arch/xtensa/reloc.h b/arch/xtensa/reloc.h
> > new file mode 100644
> > index 00000000..03482f18
> > --- /dev/null
> > +++ b/arch/xtensa/reloc.h
> > @@ -0,0 +1,38 @@
> > +#if __BYTE_ORDER == __BIG_ENDIAN
> > +#define ENDIAN_SUFFIX "eb"
> > +#else
> > +#define ENDIAN_SUFFIX ""
> > +#endif
> > +
> > +#if __FDPIC__
> > +#define ABI_SUFFIX "-fdpic"
> > +#else
> > +#define ABI_SUFFIX ""
> > +#endif
> > +
> > +#define LDSO_ARCH "xtensa" ENDIAN_SUFFIX ABI_SUFFIX
>
> Any actual incompatible ABI (as opposed to ISA level) needs its own
> LDSO name.
There's no real ISA levels in the xtensa. Every CPU core configuration
is its own unique ISA.
> So if there are alternate (call0/windowed?) ABIs you want
> to support, different names need to be defined for them here and in
> configure.
I understand that it means that I should drop the endianness suffix,
as the endianness of each core is fixed, but add windowed/call0
discriminator.
> Unless there's a good reason to support more than one ABI,
> it probably just makes sense to pick the preferred one and have
> configure error-out if the toolchain's ABI is incompatible.
I still have hope to support windowed FDPIC and then non-FDPIC
variants.
> > diff --git a/crt/xtensa/crti.S b/crt/xtensa/crti.S
> > new file mode 100644
> > index 00000000..82a837e7
> > --- /dev/null
> > +++ b/crt/xtensa/crti.S
> > @@ -0,0 +1,29 @@
> > +.section .init
> > +.global _init
> > +.type _init, @function
> > +_init:
> > +#if defined(__XTENSA_CALL0_ABI__)
> > + addi sp, sp, -16
> > + s32i a0, sp, 0
> > +#ifdef __FDPIC__
> > + s32i a12, sp, 4
> > + mov a12, a11
> > +#endif
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
>
> Same thing regarding ABIs.
>
> > diff --git a/src/internal/xtensa/syscall.S b/src/internal/xtensa/syscall.S
> > new file mode 100644
> > index 00000000..f09a46e8
> > --- /dev/null
> > +++ b/src/internal/xtensa/syscall.S
> > @@ -0,0 +1,25 @@
> > +.global __syscall
> > +.hidden __syscall
> > +.type __syscall,@function
> > +.align 4
> > +__syscall:
> > +#ifdef __XTENSA_WINDOWED_ABI__
> > + entry a1, 16
> > +#endif
> > + mov a8, a3
> > + mov a3, a4
> > + mov a4, a5
> > + mov a5, a6
> > + mov a6, a8
> > + mov a8, a7
> > +#if defined(__XTENSA_WINDOWED_ABI__)
> > + l32i a9, a1, 16
> > + syscall
> > + retw
> > +#elif defined(__XTENSA_CALL0_ABI__)
> > + l32i a9, a1, 0
> > + syscall
> > + ret
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
>
> And here too.
>
> > diff --git a/src/ldso/xtensa/tlsdesc.S b/src/ldso/xtensa/tlsdesc.S
> > new file mode 100644
> > index 00000000..084d5f3e
> > --- /dev/null
> > +++ b/src/ldso/xtensa/tlsdesc.S
> > @@ -0,0 +1,36 @@
> > +.global __tlsdesc_static
> > +.hidden __tlsdesc_static
> > +.type __tlsdesc_static,@function
> > +.align 4
> > +__tlsdesc_static:
> > +#ifdef __XTENSA_WINDOWED_ABI__
> > + entry a1, 16
> > +#endif
> > + rur a3, threadptr
> > + add a2, a2, a3
> > +#if defined(__XTENSA_WINDOWED_ABI__)
> > + retw
> > +#elif defined(__XTENSA_CALL0_ABI__)
> > + ret
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
>
> Are you sure the entry step makes sense with tlsdesc? Is this honoring
> the ABI for what the tlsdesc function is allowed to clobber?
But the tlsdesc function is just a normal function, it can do whatever
a regular function can do AFAIU. In addition there are no relaxations
that can replace non-call code with a variant with a call.
'entry' is the right way to begin a regular function in windowed xtensa ABI.
> > +.hidden __tls_get_new
> > +
> > +.global __tlsdesc_dynamic
> > +.hidden __tlsdesc_dynamic
> > +.type __tlsdesc_dynamic,@function
> > +.align 4
> > +__tlsdesc_dynamic:
> > +#if defined(__XTENSA_WINDOWED_ABI__)
> > + entry a1, 16
> > + mov a6, a2
> > + call4 __tls_get_addr
> > + mov a2, a6
> > + retw
> > +#elif defined(__XTENSA_CALL0_ABI__)
> > + j __tls_get_addr
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
>
> This approach is no longer a valid implementation for
> __tlsdesc_dynamic, because it calls C code which could clobber any
> call-clobbered registers, not just the ones the tlsdesc function is
> allowed to clobber.
>
> Instead, it needs to actually do the dtv lookup in asm. musl does not
> use any dynamic allocation here, so the lookup is guaranteed to just
> work using the indices found with no conditional branches. See the
> recently added riscv code for a clean example, in commit
> 407aea628af8c81d9e3f5a068568f2217db71bba.
Yeah, this whole xtensa TLS code should be in the WIP pile.
I'll rewrite it in assembly.
> > diff --git a/src/process/xtensa/vfork.S b/src/process/xtensa/vfork.S
> > new file mode 100644
> > index 00000000..25478b87
> > --- /dev/null
> > +++ b/src/process/xtensa/vfork.S
> > @@ -0,0 +1,21 @@
> > +.global vfork
> > +.type vfork,@function
> > +vfork:
> > +#if defined(__XTENSA_CALL0_ABI__)
> > + movi a2, 116 # __NR_clone
> > + movi a3, 0
> > + movi a6, 0x4111 # CLONE_VM | CLONE_VFORK | SIGCHLD
> > + syscall
> > +
> > +#ifdef __FDPIC__
> > +.hidden __syscall_ret
> > + movi a9, __syscall_ret@GOT
> > + add a9, a9, a11
> > + l32i a9, a9, 0
> > + jx a9
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
>
> Same re: ABIs. But maybe there's intent to support a non-FDPIC ABI
> too?
Right, since there's no intent to support non-FDPIC ABI at this time
the FDPIC condition can be dropped here.
> > diff --git a/src/setjmp/xtensa/longjmp.S b/src/setjmp/xtensa/longjmp.S
> > new file mode 100644
> > index 00000000..602093c9
> > --- /dev/null
> > +++ b/src/setjmp/xtensa/longjmp.S
> > @@ -0,0 +1,22 @@
> > +.global _longjmp
> > +.global longjmp
> > +.type _longjmp,@function
> > +.type longjmp,@function
> > +.align 4
> > +_longjmp:
> > +longjmp:
> > +#if defined(__XTENSA_CALL0_ABI__)
> > + l32i a0, a2, 0
> > + l32i a1, a2, 4
> > + l32i a12, a2, 8
> > + l32i a13, a2, 12
> > + l32i a14, a2, 16
> > + l32i a15, a2, 20
> > +
> > + movi a2, 1
> > + movnez a2, a3, a3
> > +
> > + ret
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
> > diff --git a/src/setjmp/xtensa/setjmp.S b/src/setjmp/xtensa/setjmp.S
> > new file mode 100644
> > index 00000000..3a735a6a
> > --- /dev/null
> > +++ b/src/setjmp/xtensa/setjmp.S
> > @@ -0,0 +1,25 @@
> > +.global ___setjmp
> > +.hidden ___setjmp
> > +.global __setjmp
> > +.global _setjmp
> > +.global setjmp
> > +.type __setjmp,@function
> > +.type _setjmp,@function
> > +.type setjmp,@function
> > +.align 4
> > +___setjmp:
> > +__setjmp:
> > +_setjmp:
> > +setjmp:
> > +#if defined(__XTENSA_CALL0_ABI__)
> > + s32i a0, a2, 0
> > + s32i a1, a2, 4
> > + s32i a12, a2, 8
> > + s32i a13, a2, 12
> > + s32i a14, a2, 16
> > + s32i a15, a2, 20
> > + movi a2, 0
> > + ret
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
>
> Ditto.
>
> > diff --git a/src/signal/xtensa/sigsetjmp.S b/src/signal/xtensa/sigsetjmp.S
> > new file mode 100644
> > index 00000000..e44bba88
> > --- /dev/null
> > +++ b/src/signal/xtensa/sigsetjmp.S
> > @@ -0,0 +1,24 @@
> > +.global sigsetjmp
> > +.global __sigsetjmp
> > +.type sigsetjmp,@function
> > +.type __sigsetjmp,@function
> > +.align 4
> > +sigsetjmp:
> > +__sigsetjmp:
> > +#if defined(__XTENSA_CALL0_ABI__)
> > + bnez a3, 1f
> > +.hidden ___setjmp
> > + j ___setjmp
> > +1:
> > + mov a3, a0
> > + mov a4, a2
> > + call0 ___setjmp
> > + mov a0, a3
> > + mov a3, a2
> > + mov a2, a4
> > +
> > +.hidden __sigsetjmp_tail
> > + j __sigsetjmp_tail
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
>
> I don't think this code works. It does not seem to save the return
> address and argument address before calling ___setjmp, so they will be
> clobbered after it returns.
It does:
+ mov a3, a0
+ mov a4, a2
do exactly this and ___setjmp is hidden, so it's always going to be
the implementation above that doesn't clobber a3 and a4.
> > diff --git a/src/thread/xtensa/__set_thread_area.S b/src/thread/xtensa/__set_thread_area.S
> > new file mode 100644
> > index 00000000..1f402125
> > --- /dev/null
> > +++ b/src/thread/xtensa/__set_thread_area.S
> > @@ -0,0 +1,16 @@
> > +.global __set_thread_area
> > +.type __set_thread_area,@function
> > +.align 4
> > +__set_thread_area:
> > +#ifdef __XTENSA_WINDOWED_ABI__
> > + entry a1, 16
> > +#endif
> > + wur a2, threadptr
> > + movi a2, 0
> > +#if defined(__XTENSA_WINDOWED_ABI__)
> > + retw
> > +#elif defined(__XTENSA_CALL0_ABI__)
> > + ret
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
>
> FWIW, few archs do this yet, but this function can be written in C
> with inline asm to set the register, and that would avoid any ABI
> conditionals if they're ever needed.
Ok.
> Eventually I'd like to move most functions like this, that are
> currently external asm but have no good reason to be (unlike setjmp,
> vfork, etc. which *do* have a good reason to be), to minimal inline
> asm in C. Some archs' versions of this file would even just become
> __syscall, no further asm.
>
> > diff --git a/src/thread/xtensa/__unmapself.S b/src/thread/xtensa/__unmapself.S
> > new file mode 100644
> > index 00000000..15bf2bdf
> > --- /dev/null
> > +++ b/src/thread/xtensa/__unmapself.S
> > @@ -0,0 +1,13 @@
> > +.global __unmapself
> > +.type __unmapself,@function
> > +.align 4
> > +__unmapself:
> > +#if defined(__XTENSA_CALL0_ABI__)
> > + mov a6, a2
> > + movi a2, 81 # SYS_munmap
> > + syscall
> > + movi a2, 118 # SYS_exit
> > + syscall
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
>
> This is unsafe on fdpic/nommu if the kernel requires the task to
> always have a valid stack.
How do I know if the kernel requires that?
> You might want to check how it's done on SH
> and whether something similar is needed here. The idea is that it
> temporarily switches to an alternate stack protected by the global
> thread list lock (so only one thread can be using it at a time) in
> order to make it safe to unmap the thread's own stack. The thread list
> is then unlocked by the kernel when the thread dies (via the exit
> futex).
>
> > diff --git a/src/thread/xtensa/clone.S b/src/thread/xtensa/clone.S
> > new file mode 100644
> > index 00000000..8e8514d6
> > --- /dev/null
> > +++ b/src/thread/xtensa/clone.S
> > @@ -0,0 +1,46 @@
> > +# __clone(func, stack, flags, arg, ptid, tls, ctid)
> > +# a2, a3, a4, a5, a6, a7, [sp]
> > +
> > +# syscall(SYS_clone, flags, stack, ptid, tls, ctid)
> > +# a2, a6, a3, a4, a5, a8
> > +
> > +.global __clone
> > +.type __clone,@function
> > +.align 4
> > +__clone:
> > +#if defined(__XTENSA_CALL0_ABI__)
> > + # align stack and save func,arg
> > + srli a3, a3, 4
> > + slli a3, a3, 4
> > + addi a3, a3, -16
> > + s32i a2, a3, 0
> > + s32i a5, a3, 4
> > +
> > + # syscall
> > + mov a2, a4
> > + mov a4, a6
> > + mov a6, a2
> > + mov a5, a7
> > + l32i a8, a1, 16
> > + movi a2, 116 # SYS_clone
> > + syscall
> > +
> > + beqz a2, 1f
> > + # parent
> > + ret
> > +
> > + # child
> > +1:
> > + l32i a0, a1, 0
> > + l32i a2, a1, 4
> > +#ifdef __FDPIC__
> > + l32i a11, a0, 4
> > + l32i a0, a0, 0
> > +#endif
> > + callx0 a0
> > + mov a6, a2
> > + movi a2, 118 # SYS_exit
> > + syscall
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
>
> Same re: ABIs.
>
> > diff --git a/src/thread/xtensa/syscall_cp.S b/src/thread/xtensa/syscall_cp.S
> > new file mode 100644
> > index 00000000..de7c3e66
> > --- /dev/null
> > +++ b/src/thread/xtensa/syscall_cp.S
> > @@ -0,0 +1,38 @@
> > +# __syscall_cp_asm(&self->cancel, nr, u, v, w, x, y, z)
> > +# a2 a3 a4 a5 a6 a7 [sp] [sp+4]
> > +
> > +# syscall(nr, u, v, w, x, y, z)
> > +# a2 a6 a3 a4 a5 a8 a9
> > +
> > +.global __cp_begin
> > +.hidden __cp_begin
> > +.global __cp_end
> > +.hidden __cp_end
> > +.global __cp_cancel
> > +.hidden __cp_cancel
> > +.hidden __cancel
> > +.global __syscall_cp_asm
> > +.hidden __syscall_cp_asm
> > +.type __syscall_cp_asm,@function
> > +.align 4
> > +#if defined(__XTENSA_CALL0_ABI__)
> > +__syscall_cp_asm:
> > +__cp_begin:
> > + l32i a2, a2, 0
> > + bnez a2, __cp_cancel
> > + mov a2, a4
> > + mov a4, a6
> > + mov a6, a2
> > + mov a2, a3
> > + mov a3, a5
> > + mov a5, a7
> > + l32i a8, a1, 0
> > + l32i a9, a1, 4
> > + syscall
> > +__cp_end:
> > + ret
> > +__cp_cancel:
> > + j __cancel
> > +#else
> > +#error Unsupported Xtensa ABI
> > +#endif
> > --
> > 2.21.0
>
> And this one.
>
>
> > >From f8c5953ecdb948282cc8e573b729c25db60a95a8 Mon Sep 17 00:00:00 2001
> > From: Max Filippov <jcmvbkbc@gmail.com>
> > Date: Wed, 21 Feb 2024 08:27:37 -0800
> > Subject: [PATCH 2/2] WIP
> >
> > Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
> > ---
> > ldso/dlstart.c | 7 +++++++
> > ldso/dynlink.c | 6 ++++--
> > src/internal/dynlink.h | 5 +++--
> > 3 files changed, 14 insertions(+), 4 deletions(-)
> >
> > diff --git a/ldso/dlstart.c b/ldso/dlstart.c
> > index 259f5e18..beca953f 100644
> > --- a/ldso/dlstart.c
> > +++ b/ldso/dlstart.c
> > @@ -90,12 +90,19 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
> > - segs[rel_addr[1]].p_vaddr
> > + syms[R_SYM(rel[1])].st_value;
> > rel_addr[1] = dyn[DT_PLTGOT];
> > + } else if (R_TYPE(rel[1]) == REL_RELATIVE) {
> > + size_t val = *rel_addr;
> > + for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
> > + *rel_addr += segs[j].addr - segs[j].p_vaddr;
>
> So xtensa has a "relative" reloc type that's just adjusted by the load
> offset of the segment the relocation lives in, rather than needing to
> use a symbolic relocation referencing a section symbol like other
> fdpic archs do?
I was looking at the ARM BFD code while doing that and
my impression was that they do the same.
Regardless, I wonder why either relocation form might be preferrable?
> If so, this looks right and looks ok.
>
> > } else {
> > size_t val = syms[R_SYM(rel[1])].st_value;
> > for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
> > *rel_addr = rel[2] + segs[j].addr - segs[j].p_vaddr + val;
> > }
> > }
> > +#ifdef __xtensa__
> > + ((unsigned long *)dyn[DT_PLTGOT])[3] = segs[0].addr - segs[0].p_vaddr;
> > +#endif
>
> Is this actually needed for anything? Generally musl doesn't use the
> reserved GOT slots itself, and on all the other archs I'm aware of,
> they're essentially reserved to the dynamic linker implementation so
> the dynamic linker is just free not to use them and not to set them
> up.
xtensa doesn't have relative register jumps and calls, so local jumps
and calls to a far off locations need to use absolute target addresses.
One possible solution is to have the address in the GOT entry, the
other is to calculate the target address using the text segment load
offset at runtime. Both have the same instruction count, see
http://wiki.osll.ru/doku.php/etc:users:jcmvbkbc:binutils-xtensa#local_call
for the details, but the latter doesn't waste GOT space and that saves
a noticeable amount of RAM.
> > #else
> > /* If the dynamic linker is invoked as a command, its load
> > * address is not available in the aux vector. Instead, compute
> > diff --git a/ldso/dynlink.c b/ldso/dynlink.c
> > index ceca3c98..25563af3 100644
> > --- a/ldso/dynlink.c
> > +++ b/ldso/dynlink.c
> > @@ -1420,6 +1420,7 @@ static void reloc_all(struct dso *p)
> > if (!DL_FDPIC)
> > do_relr_relocs(p, laddr(p, dyn[DT_RELR]), dyn[DT_RELRSZ]);
> >
> > +#if 0
> > if (head != &ldso && p->relro_start != p->relro_end) {
> > long ret = __syscall(SYS_mprotect, laddr(p, p->relro_start),
> > p->relro_end-p->relro_start, PROT_READ);
> > @@ -1429,6 +1430,7 @@ static void reloc_all(struct dso *p)
> > if (runtime) longjmp(*rtld_fail, 1);
> > }
> > }
> > +#endif
>
> Was this breaking something? Relro linking probably should have been
> disabled on fdpic, and we ignore ENOSYS anyway for nommu.
Yeah, I do some of the testing in linux-user QEMU, it has MMU
and mprotect calls actually succeed. Failures were coming from the
relocation code, but my impression was that relro should be applied
after the relocation completion and it should just work, hence the
WIP pile.
> > p->relocated = 1;
> > }
> > @@ -1485,7 +1487,7 @@ void __libc_exit_fini()
> > if (dyn[0] & (1<<DT_FINI_ARRAY)) {
> > size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
> > size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
> > - while (n--) ((void (*)(void))*--fn)();
> > + while (n--) fpaddr(p, *--fn)();
>
> If this is fixable on the tooling side it really should be fixed
> there. init/fini arrays should have actual language-level function
> addresses (descriptor addresses on fdpic), not instruction addresses.
I read libgcc code at
https://github.com/jcmvbkbc/gcc-xtensa/blob/xtensa-14-8789-fdpic/libgcc/crtstuff.c#L498-L503
and the way it's written suggests that this was done on purpose.
I put it into the WIP pile to figure out later what the purpose was.
I thought that SH might not have this issue because it just didn't
use the .array_init/.array_fini.
> If it's not fixable at this point, I guess we need the arch's reloc.h
> to define a macro signaling this difference in behavior.
>
> I have no idea how this would be expected to work on static linked
> programs...
That's true it's not working in this code version.
> > diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h
> > index 06f41d09..6485e883 100644
> > --- a/src/internal/dynlink.h
> > +++ b/src/internal/dynlink.h
> > @@ -78,10 +78,11 @@ struct fdpic_dummy_loadmap {
> > (R_TYPE(x) == REL_RELATIVE) || \
> > (R_TYPE(x) == REL_SYM_OR_REL && !R_SYM(x)) )
> > #else
> > -#define IS_RELATIVE(x,s) ( ( \
> > +#define IS_RELATIVE(x,s) ( \
> > + (R_TYPE(x) == REL_RELATIVE) || ( ( \
> > (R_TYPE(x) == REL_FUNCDESC_VAL) || \
> > (R_TYPE(x) == REL_SYMBOLIC) ) \
> > - && (((s)[R_SYM(x)].st_info & 0xf) == STT_SECTION) )
> > + && (((s)[R_SYM(x)].st_info & 0xf) == STT_SECTION) ) )
> > #endif
> >
> > #ifndef NEED_MIPS_GOT_RELOCS
> > --
> > 2.21.0
> >
>
> This looks ok assuming the above about relative relocs.
--
Thanks.
-- Max
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 17:20 ` Max Filippov
@ 2024-02-28 18:30 ` Rich Felker
2024-02-28 18:37 ` Rich Felker
` (4 more replies)
0 siblings, 5 replies; 17+ messages in thread
From: Rich Felker @ 2024-02-28 18:30 UTC (permalink / raw)
To: Max Filippov; +Cc: musl
On Wed, Feb 28, 2024 at 09:20:33AM -0800, Max Filippov wrote:
> Hi Rich,
>
> On Tue, Feb 27, 2024 at 4:12 PM Rich Felker <dalias@libc.org> wrote:
> >
> > First of all, I'm excited to see new work on this and especially the
> > FDPIC ABI. Not only is xtensa a very relevant arch I've been hoping we
> > could support for some time now, but having FDPIC will help getting
> > the parts of musl's FDPIC support that are currently making
> > sh-specific assumptions worked out so that we can get other new FDPIC
> > ports added too (particularly, arm and riscv).
> >
> > I'm not up-to-date on what toolchain status/stability is or how close
> > this is to being intended as upstreamable, but since I was pointed to
> > it and took a look, I wanted to record my initial review thoughts here
> > so they don't get lost and to hopefully move this closer to ready for
> > upstream.
>
> Thanks for your review.
> I expect to get toolchain components into shape for upstreaming during
> this year.
>
> > Review follows inline:
> >
> > On Tue, Feb 27, 2024 at 06:24:31PM -0500, Rich Felker wrote:
> > > Attached patches were pulled from
> > > https://github.com/jcmvbkbc/musl-xtensa/commits/xtensa-1.2.4-fdpic/
> > >
> > > I'll reply to comment inline.
> >
> > > >From 868b34272f414323f10528d5b9988886541cb1c0 Mon Sep 17 00:00:00 2001
> > > From: Max Filippov <jcmvbkbc@gmail.com>
> > > Date: Tue, 22 Mar 2016 02:35:58 +0300
> > > Subject: [PATCH 1/2] xtensa: add port
> > >
> > > Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
> > > ---
> > > arch/xtensa/arch.mak | 1 +
> > > arch/xtensa/atomic_arch.h | 27 ++
> > > arch/xtensa/bits/alltypes.h.in | 27 ++
> > > arch/xtensa/bits/float.h | 16 ++
> > > arch/xtensa/bits/ioctl.h | 219 ++++++++++++++
> > > arch/xtensa/bits/limits.h | 1 +
> > > arch/xtensa/bits/mman.h | 68 +++++
> > > arch/xtensa/bits/posix.h | 2 +
> > > arch/xtensa/bits/reg.h | 2 +
> > > arch/xtensa/bits/setjmp.h | 5 +
> > > arch/xtensa/bits/signal.h | 88 ++++++
> > > arch/xtensa/bits/stat.h | 24 ++
> > > arch/xtensa/bits/stdint.h | 20 ++
> > > arch/xtensa/bits/syscall.h.in | 396 ++++++++++++++++++++++++++
> > > arch/xtensa/bits/termios.h | 167 +++++++++++
> > > arch/xtensa/bits/user.h | 4 +
> > > arch/xtensa/bits/xtensa-config.h | 8 +
> > > arch/xtensa/crt_arch.h | 48 ++++
> > > arch/xtensa/kstat.h | 18 ++
> > > arch/xtensa/pthread_arch.h | 11 +
> > > arch/xtensa/reloc.h | 38 +++
> > > arch/xtensa/syscall_arch.h | 104 +++++++
> > > configure | 1 +
> > > crt/xtensa/crti.S | 29 ++
> > > crt/xtensa/crtn.S | 23 ++
> > > include/elf.h | 70 +++++
> > > src/internal/xtensa/syscall.S | 25 ++
> > > src/ldso/xtensa/tlsdesc.S | 36 +++
> > > src/process/xtensa/vfork.S | 21 ++
> > > src/setjmp/xtensa/longjmp.S | 22 ++
> > > src/setjmp/xtensa/setjmp.S | 25 ++
> > > src/signal/xtensa/restore.s | 10 +
> > > src/signal/xtensa/sigsetjmp.S | 24 ++
> > > src/thread/xtensa/__set_thread_area.S | 16 ++
> > > src/thread/xtensa/__unmapself.S | 13 +
> > > src/thread/xtensa/clone.S | 46 +++
> > > src/thread/xtensa/syscall_cp.S | 38 +++
> > > 37 files changed, 1693 insertions(+)
> > > create mode 100644 arch/xtensa/arch.mak
> > > create mode 100644 arch/xtensa/atomic_arch.h
> > > create mode 100644 arch/xtensa/bits/alltypes.h.in
> > > create mode 100644 arch/xtensa/bits/float.h
> > > create mode 100644 arch/xtensa/bits/ioctl.h
> > > create mode 100644 arch/xtensa/bits/limits.h
> > > create mode 100644 arch/xtensa/bits/mman.h
> > > create mode 100644 arch/xtensa/bits/posix.h
> > > create mode 100644 arch/xtensa/bits/reg.h
> > > create mode 100644 arch/xtensa/bits/setjmp.h
> > > create mode 100644 arch/xtensa/bits/signal.h
> > > create mode 100644 arch/xtensa/bits/stat.h
> > > create mode 100644 arch/xtensa/bits/stdint.h
> > > create mode 100644 arch/xtensa/bits/syscall.h.in
> > > create mode 100644 arch/xtensa/bits/termios.h
> > > create mode 100644 arch/xtensa/bits/user.h
> > > create mode 100644 arch/xtensa/bits/xtensa-config.h
> > > create mode 100644 arch/xtensa/crt_arch.h
> > > create mode 100644 arch/xtensa/kstat.h
> > > create mode 100644 arch/xtensa/pthread_arch.h
> > > create mode 100644 arch/xtensa/reloc.h
> > > create mode 100644 arch/xtensa/syscall_arch.h
> > > create mode 100644 crt/xtensa/crti.S
> > > create mode 100644 crt/xtensa/crtn.S
> > > create mode 100644 src/internal/xtensa/syscall.S
> > > create mode 100644 src/ldso/xtensa/tlsdesc.S
> > > create mode 100644 src/process/xtensa/vfork.S
> > > create mode 100644 src/setjmp/xtensa/longjmp.S
> > > create mode 100644 src/setjmp/xtensa/setjmp.S
> > > create mode 100644 src/signal/xtensa/restore.s
> > > create mode 100644 src/signal/xtensa/sigsetjmp.S
> > > create mode 100644 src/thread/xtensa/__set_thread_area.S
> > > create mode 100644 src/thread/xtensa/__unmapself.S
> > > create mode 100644 src/thread/xtensa/clone.S
> > > create mode 100644 src/thread/xtensa/syscall_cp.S
> > >
> > > diff --git a/arch/xtensa/arch.mak b/arch/xtensa/arch.mak
> > > new file mode 100644
> > > index 00000000..aa4d05ce
> > > --- /dev/null
> > > +++ b/arch/xtensa/arch.mak
> > > @@ -0,0 +1 @@
> > > +COMPAT_SRC_DIRS = compat/time32
> > > diff --git a/arch/xtensa/atomic_arch.h b/arch/xtensa/atomic_arch.h
> > > new file mode 100644
> > > index 00000000..34bf0704
> > > --- /dev/null
> > > +++ b/arch/xtensa/atomic_arch.h
> > > @@ -0,0 +1,27 @@
> > > +#include "bits/xtensa-config.h"
> >
> > This is not what bits headers are for; they are public-installed
> > headers that provide parts of the standard public headers that vary by
> > arch, and are never directly #include'able.
> >
> > If you really need arch-internal headers like this just put them in
> > the top-level dir of the arch. But in the case of these macros, either
> > just use the compiler-provided, __-prefixed ones directly, or if
> > they're not actually variable, don't inspect them at all.
>
> I carry this header from the time when configuration-specific header
> file was the only available configuration option. __XCHAL_* macros
> only appeared in gcc-13, but since FDPIC support will only be
> available on toolchains that have these macros I guess I'll drop the
> header and use __XCHAL_* macros directly.
>
> > > +
> > > +#if XCHAL_HAVE_S32C1I
> > > +#define a_cas a_cas
> > > +static inline int a_cas(volatile int *p, int t, int s)
> > > +{
> > > + __asm__ __volatile__ (
> > > + " wsr %2, scompare1\n"
> > > + " s32c1i %0, %1\n"
> > > + : "+a"(s), "+m"(*p)
> > > + : "a"(t)
> > > + : "memory" );
> > > + return s;
> > > +}
> > > +#endif
> >
> > For example this is not a useful test, because there's nothing you can
> > do in the case where it's not defined;
>
> This test is in the spirit of xtensa: it covers one possible
> configuration option.
> There's at least one other hardware option (exclusive access instructions)
> available for this functionality and a fallback to syscalls. They're not listed
> here because I tried to keep this initial implementation to a bare minimum.
>
> > it's just not supportable
> > (unless there's some kernel fallback mechanism like ARM and SH have;
> > then it would make sense to hard-code the cas instruction as above if
> > the ISA level is known to have it, and otherwise do conditional
> > runtime selection (again, like ARM and SH).
>
> Xtensa is quite limited on the runtime selection, because opcodes belonging
> to the options not enabled for a particular core just would not assemble.
That's not really a problem. On sh, we use .word to encode the
instructions that might not be present at the ISA level targeted,
which will only be conditionally executed. On ARM, I believe we just
asm directives to suppress the ISA level tagging of the object file so
that it can use the mnemonics for the conditionally-available
instructions. Generally, we find a way to work around whatever the
tooling is doing to make things difficult here. :-)
> > > diff --git a/arch/xtensa/bits/alltypes.h.in b/arch/xtensa/bits/alltypes..h.in
> > > new file mode 100644
> > > index 00000000..21221ce5
> > > --- /dev/null
> > > +++ b/arch/xtensa/bits/alltypes.h.in
> > > @@ -0,0 +1,27 @@
> > > +#define _REDIR_TIME64 1
> > > +#define _Addr int
> > > +#define _Int64 long long
> > > +#define _Reg int
> > > +
> > > +#if __XTENSA_EB__
> > > +#define __BYTE_ORDER 4321
> > > +#elif __XTENSA_EL__
> > > +#define __BYTE_ORDER 1234
> > > +#else
> > > +#error Unknown endianness
> > > +#endif
> > > +
> > > +#define __LONG_MAX 0x7fffffffL
> > > +
> > > +#ifndef __cplusplus
> > > +#ifdef __WCHAR_TYPE__
> > > +TYPEDEF __WCHAR_TYPE__ wchar_t;
> > > +#else
> > > +TYPEDEF unsigned wchar_t;
> > > +#endif
> > > +#endif
> > > +
> > > +TYPEDEF float float_t;
> > > +TYPEDEF double double_t;
> > > +
> > > +TYPEDEF struct { long __l __attribute__((aligned(__BIGGEST_ALIGNMENT__))); } max_align_t;
> >
> > It's best to avoid macros like __BIGGEST_ALIGNMENT__ here that could
> > change out from under us in the future, thereby quietly breaking the
> > ABI. Whatever the ABI is, that's what the alignment here should be,
> > and just using a type that naturally has the alignment if possible
> > rather than attributes.
>
> The thing is: we don't know the name of the type with the widest
> alignment, as indeed in the future someone may come up with
> a configuration with an arbitrarily named type with an alignment
> as big as allowed physically. But then its their responsibility to
> keep the __BIGGEST_ALIGNMENT__ consistent across the
> updates of that configuration that intend to stay compatible with
> each other.
max_align_t is only a C-language contract for the standard C types.
There is no need for it to cover any sort of extension types, vectors,
etc. So it should be defined in terms of the C ABI types and needs to
be stable. If long long or double is already 8-byte aligned and long
double is the same as double, it should probably just be defined as
long long or a union of long long and long double or similar.
> > > diff --git a/arch/xtensa/reloc.h b/arch/xtensa/reloc.h
> > > new file mode 100644
> > > index 00000000..03482f18
> > > --- /dev/null
> > > +++ b/arch/xtensa/reloc.h
> > > @@ -0,0 +1,38 @@
> > > +#if __BYTE_ORDER == __BIG_ENDIAN
> > > +#define ENDIAN_SUFFIX "eb"
> > > +#else
> > > +#define ENDIAN_SUFFIX ""
> > > +#endif
> > > +
> > > +#if __FDPIC__
> > > +#define ABI_SUFFIX "-fdpic"
> > > +#else
> > > +#define ABI_SUFFIX ""
> > > +#endif
> > > +
> > > +#define LDSO_ARCH "xtensa" ENDIAN_SUFFIX ABI_SUFFIX
> >
> > Any actual incompatible ABI (as opposed to ISA level) needs its own
> > LDSO name.
>
> There's no real ISA levels in the xtensa. Every CPU core configuration
> is its own unique ISA.
What does that practically mean? I think we're using different
terminology which is making it hard to communicate.
By "ISA level" I mean a node of a tree of possible extensions to a
baseline ISA, where machine code targeted for one node in the tree can
be executed on all descendants, and can be linked with machine code
targeting a particular descendant to produce code that can be executed
on that descendant.
This applies both statically and dynamically. For example, an
application built for a higher ISA level can be executed with
libc/ldso built for the baseline ISA (note: this sometimes requires
conditional logic in a few places like setjmp; see arm for example),
and an application built for the baseline ISA can be executed using
libc/ldso built for a higher ISA level.
I'm guessing you mean which features are supported is combinatorically
selectable as part of the core configuration, not a linear progression
of levels like i386, i486, etc. That's no problem. If I'm mistaken,
plese help me try to understand what you mean.
> > So if there are alternate (call0/windowed?) ABIs you want
> > to support, different names need to be defined for them here and in
> > configure.
>
> I understand that it means that I should drop the endianness suffix,
> as the endianness of each core is fixed, but add windowed/call0
> discriminator.
If there are both le and be variants, then they need to have separate
names. Same for any other mutually incompatible ABIs.
> > Unless there's a good reason to support more than one ABI,
> > it probably just makes sense to pick the preferred one and have
> > configure error-out if the toolchain's ABI is incompatible.
>
> I still have hope to support windowed FDPIC and then non-FDPIC
> variants.
Is there a reason to support both windowed and non-windowed? Sorry if
this is a dumb question; I don't know the background on it. If it's
possible to just pick one as better, and it can be supported on all
cores, that's probably the better choice.
> > > diff --git a/src/ldso/xtensa/tlsdesc.S b/src/ldso/xtensa/tlsdesc.S
> > > new file mode 100644
> > > index 00000000..084d5f3e
> > > --- /dev/null
> > > +++ b/src/ldso/xtensa/tlsdesc.S
> > > @@ -0,0 +1,36 @@
> > > +.global __tlsdesc_static
> > > +.hidden __tlsdesc_static
> > > +.type __tlsdesc_static,@function
> > > +.align 4
> > > +__tlsdesc_static:
> > > +#ifdef __XTENSA_WINDOWED_ABI__
> > > + entry a1, 16
> > > +#endif
> > > + rur a3, threadptr
> > > + add a2, a2, a3
> > > +#if defined(__XTENSA_WINDOWED_ABI__)
> > > + retw
> > > +#elif defined(__XTENSA_CALL0_ABI__)
> > > + ret
> > > +#else
> > > +#error Unsupported Xtensa ABI
> > > +#endif
> >
> > Are you sure the entry step makes sense with tlsdesc? Is this honoring
> > the ABI for what the tlsdesc function is allowed to clobber?
>
> But the tlsdesc function is just a normal function, it can do whatever
> a regular function can do AFAIU. In addition there are no relaxations
> that can replace non-call code with a variant with a call.
>
> 'entry' is the right way to begin a regular function in windowed xtensa ABI..
OK but this is not a regular function. It's a code fragment with
special ABI constraints and is intended to execute as fast as
possible, clobbering as little as possible. I don't understand exactly
what "entry" does, but I suspect it only makes sense if you're
actually introducing a "call frame" of some sort, like if you'll be
spilling and restoring call-saved registers or something (or doing
whatever the windowed-register-file equivalent is to achieve the same
thing) which does not seem to be needed here.
> > > +.hidden __tls_get_new
> > > +
> > > +.global __tlsdesc_dynamic
> > > +.hidden __tlsdesc_dynamic
> > > +.type __tlsdesc_dynamic,@function
> > > +.align 4
> > > +__tlsdesc_dynamic:
> > > +#if defined(__XTENSA_WINDOWED_ABI__)
> > > + entry a1, 16
> > > + mov a6, a2
> > > + call4 __tls_get_addr
> > > + mov a2, a6
> > > + retw
> > > +#elif defined(__XTENSA_CALL0_ABI__)
> > > + j __tls_get_addr
> > > +#else
> > > +#error Unsupported Xtensa ABI
> > > +#endif
> >
> > This approach is no longer a valid implementation for
> > __tlsdesc_dynamic, because it calls C code which could clobber any
> > call-clobbered registers, not just the ones the tlsdesc function is
> > allowed to clobber.
> >
> > Instead, it needs to actually do the dtv lookup in asm. musl does not
> > use any dynamic allocation here, so the lookup is guaranteed to just
> > work using the indices found with no conditional branches. See the
> > recently added riscv code for a clean example, in commit
> > 407aea628af8c81d9e3f5a068568f2217db71bba.
>
> Yeah, this whole xtensa TLS code should be in the WIP pile.
> I'll rewrite it in assembly.
OK, just wanted you to be aware that this is something eventually
needing rewrite.
> > > diff --git a/src/signal/xtensa/sigsetjmp.S b/src/signal/xtensa/sigsetjmp.S
> > > new file mode 100644
> > > index 00000000..e44bba88
> > > --- /dev/null
> > > +++ b/src/signal/xtensa/sigsetjmp.S
> > > @@ -0,0 +1,24 @@
> > > +.global sigsetjmp
> > > +.global __sigsetjmp
> > > +.type sigsetjmp,@function
> > > +.type __sigsetjmp,@function
> > > +.align 4
> > > +sigsetjmp:
> > > +__sigsetjmp:
> > > +#if defined(__XTENSA_CALL0_ABI__)
> > > + bnez a3, 1f
> > > +.hidden ___setjmp
> > > + j ___setjmp
> > > +1:
> > > + mov a3, a0
> > > + mov a4, a2
> > > + call0 ___setjmp
> > > + mov a0, a3
> > > + mov a3, a2
> > > + mov a2, a4
> > > +
> > > +.hidden __sigsetjmp_tail
> > > + j __sigsetjmp_tail
> > > +#else
> > > +#error Unsupported Xtensa ABI
> > > +#endif
> >
> > I don't think this code works. It does not seem to save the return
> > address and argument address before calling ___setjmp, so they will be
> > clobbered after it returns.
>
> It does:
>
> + mov a3, a0
> + mov a4, a2
>
> do exactly this and ___setjmp is hidden, so it's always going to be
> the implementation above that doesn't clobber a3 and a4.
Nope, that's not how it works. setjmp returns twice, and the second
time it returns, it's *whatever code has executed since the first
return from setjmp, which most certainly will have clobbered
everything but the call-saved registers.
The return address needs to be saved in the space past the end of the
signal mask in the sigjmp_buf, and at least one call-saved register
also needs to be saved there, so that you can store the address of the
sigjmp_buf in a call-saved register and have it available after a
subsequent return from setjmp.
> > Eventually I'd like to move most functions like this, that are
> > currently external asm but have no good reason to be (unlike setjmp,
> > vfork, etc. which *do* have a good reason to be), to minimal inline
> > asm in C. Some archs' versions of this file would even just become
> > __syscall, no further asm.
> >
> > > diff --git a/src/thread/xtensa/__unmapself.S b/src/thread/xtensa/__unmapself.S
> > > new file mode 100644
> > > index 00000000..15bf2bdf
> > > --- /dev/null
> > > +++ b/src/thread/xtensa/__unmapself.S
> > > @@ -0,0 +1,13 @@
> > > +.global __unmapself
> > > +.type __unmapself,@function
> > > +.align 4
> > > +__unmapself:
> > > +#if defined(__XTENSA_CALL0_ABI__)
> > > + mov a6, a2
> > > + movi a2, 81 # SYS_munmap
> > > + syscall
> > > + movi a2, 118 # SYS_exit
> > > + syscall
> > > +#else
> > > +#error Unsupported Xtensa ABI
> > > +#endif
> >
> > This is unsafe on fdpic/nommu if the kernel requires the task to
> > always have a valid stack.
>
> How do I know if the kernel requires that?
I don't think it's explicitly documented. For sh I was vaguely
familiar at the ISA level that the sh1/sh2 cpu just pushes stuff to
the (existing, userspace) stack pointer on interrupts, which would
necessitate always having a valid stack. I also read the entry.S code
(or whatever file this part was split off into) for how kernelspace is
entered from traps/interrupts on nommu hardware, and saw how it
shuffled things around to make it look to the kernel more like a
conventional mmu-ful entry to kernelspace.
Maybe xtensa is better about this and always invokes the
trap/interrupt handler with the saved state in special registers
rather than pushing it to the stack. If so, this might not be a
consideration; normally, Linux switches the stack pointer to the
kernel stack before actually doing anything with it.
> > > >From f8c5953ecdb948282cc8e573b729c25db60a95a8 Mon Sep 17 00:00:00 2001
> > > From: Max Filippov <jcmvbkbc@gmail.com>
> > > Date: Wed, 21 Feb 2024 08:27:37 -0800
> > > Subject: [PATCH 2/2] WIP
> > >
> > > Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
> > > ---
> > > ldso/dlstart.c | 7 +++++++
> > > ldso/dynlink.c | 6 ++++--
> > > src/internal/dynlink.h | 5 +++--
> > > 3 files changed, 14 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/ldso/dlstart.c b/ldso/dlstart.c
> > > index 259f5e18..beca953f 100644
> > > --- a/ldso/dlstart.c
> > > +++ b/ldso/dlstart.c
> > > @@ -90,12 +90,19 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
> > > - segs[rel_addr[1]].p_vaddr
> > > + syms[R_SYM(rel[1])].st_value;
> > > rel_addr[1] = dyn[DT_PLTGOT];
> > > + } else if (R_TYPE(rel[1]) == REL_RELATIVE) {
> > > + size_t val = *rel_addr;
> > > + for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
> > > + *rel_addr += segs[j].addr - segs[j].p_vaddr;
> >
> > So xtensa has a "relative" reloc type that's just adjusted by the load
> > offset of the segment the relocation lives in, rather than needing to
> > use a symbolic relocation referencing a section symbol like other
> > fdpic archs do?
>
> I was looking at the ARM BFD code while doing that and
> my impression was that they do the same.
> Regardless, I wonder why either relocation form might be preferrable?
I think the relative type here is perfectly acceptable. At first I
thought it was weaker (less capable of representing addresses of
objects in different segments), but looking again, I don't think
that's the case.
> > If so, this looks right and looks ok.
> >
> > > } else {
> > > size_t val = syms[R_SYM(rel[1])].st_value;
> > > for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
> > > *rel_addr = rel[2] + segs[j].addr - segs[j].p_vaddr + val;
> > > }
> > > }
> > > +#ifdef __xtensa__
> > > + ((unsigned long *)dyn[DT_PLTGOT])[3] = segs[0].addr - segs[0].p_vaddr;
> > > +#endif
> >
> > Is this actually needed for anything? Generally musl doesn't use the
> > reserved GOT slots itself, and on all the other archs I'm aware of,
> > they're essentially reserved to the dynamic linker implementation so
> > the dynamic linker is just free not to use them and not to set them
> > up.
>
> xtensa doesn't have relative register jumps and calls, so local jumps
> and calls to a far off locations need to use absolute target addresses.
> One possible solution is to have the address in the GOT entry, the
> other is to calculate the target address using the text segment load
> offset at runtime. Both have the same instruction count, see
> http://wiki.osll.ru/doku.php/etc:users:jcmvbkbc:binutils-xtensa#local_call
> for the details, but the latter doesn't waste GOT space and that saves
> a noticeable amount of RAM.
I see. Doesn't this limit you to a single text segment, though? In
practice it might not matter, but it's more constraining than fdpic
was designed to be.
Is there a reason local, relative jumps/calls can't be done by
computing the address of a nearby label and using the offset relative
to that? That's how they (at least some types; I forget the details)
work on sh/fdpic.
> > > #else
> > > /* If the dynamic linker is invoked as a command, its load
> > > * address is not available in the aux vector. Instead, compute
> > > diff --git a/ldso/dynlink.c b/ldso/dynlink.c
> > > index ceca3c98..25563af3 100644
> > > --- a/ldso/dynlink.c
> > > +++ b/ldso/dynlink.c
> > > @@ -1420,6 +1420,7 @@ static void reloc_all(struct dso *p)
> > > if (!DL_FDPIC)
> > > do_relr_relocs(p, laddr(p, dyn[DT_RELR]), dyn[DT_RELRSZ]);
> > >
> > > +#if 0
> > > if (head != &ldso && p->relro_start != p->relro_end) {
> > > long ret = __syscall(SYS_mprotect, laddr(p, p->relro_start),
> > > p->relro_end-p->relro_start, PROT_READ);
> > > @@ -1429,6 +1430,7 @@ static void reloc_all(struct dso *p)
> > > if (runtime) longjmp(*rtld_fail, 1);
> > > }
> > > }
> > > +#endif
> >
> > Was this breaking something? Relro linking probably should have been
> > disabled on fdpic, and we ignore ENOSYS anyway for nommu.
>
> Yeah, I do some of the testing in linux-user QEMU, it has MMU
> and mprotect calls actually succeed. Failures were coming from the
> relocation code, but my impression was that relro should be applied
> after the relocation completion and it should just work, hence the
> WIP pile.
Yes, relro is only supposed to be applied after all relocations were
done.
>
> > > p->relocated = 1;
> > > }
> > > @@ -1485,7 +1487,7 @@ void __libc_exit_fini()
> > > if (dyn[0] & (1<<DT_FINI_ARRAY)) {
> > > size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
> > > size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
> > > - while (n--) ((void (*)(void))*--fn)();
> > > + while (n--) fpaddr(p, *--fn)();
> >
> > If this is fixable on the tooling side it really should be fixed
> > there. init/fini arrays should have actual language-level function
> > addresses (descriptor addresses on fdpic), not instruction addresses.
>
> I read libgcc code at
> https://github.com/jcmvbkbc/gcc-xtensa/blob/xtensa-14-8789-fdpic/libgcc/crtstuff.c#L498-L503
> and the way it's written suggests that this was done on purpose.
> I put it into the WIP pile to figure out later what the purpose was.
> I thought that SH might not have this issue because it just didn't
> use the .array_init/.array_fini.
I'm pretty sure we're using it -- musl-cross-make always forces it on
via the gcc configure command line -- but it's possible there's some
override disabling it for sh. I'll try some test cases and confirm
whether sh is doing it right. Maybe the arm folks will have input on
this too..?
> > If it's not fixable at this point, I guess we need the arch's reloc.h
> > to define a macro signaling this difference in behavior.
> >
> > I have no idea how this would be expected to work on static linked
> > programs...
>
> That's true it's not working in this code version.
And I would really not like to add hackish code to the static init
code that's special-casing some archs that violate the gABI
requirement that these arrays contain function pointers. Let's see if
it can be fixed.
Rich
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 18:30 ` Rich Felker
@ 2024-02-28 18:37 ` Rich Felker
2024-02-28 19:34 ` Max Filippov
2024-02-28 21:28 ` Max Filippov
` (3 subsequent siblings)
4 siblings, 1 reply; 17+ messages in thread
From: Rich Felker @ 2024-02-28 18:37 UTC (permalink / raw)
To: Max Filippov; +Cc: musl
On Wed, Feb 28, 2024 at 01:30:32PM -0500, Rich Felker wrote:
> On Wed, Feb 28, 2024 at 09:20:33AM -0800, Max Filippov wrote:
> >
> > > > p->relocated = 1;
> > > > }
> > > > @@ -1485,7 +1487,7 @@ void __libc_exit_fini()
> > > > if (dyn[0] & (1<<DT_FINI_ARRAY)) {
> > > > size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
> > > > size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
> > > > - while (n--) ((void (*)(void))*--fn)();
> > > > + while (n--) fpaddr(p, *--fn)();
> > >
> > > If this is fixable on the tooling side it really should be fixed
> > > there. init/fini arrays should have actual language-level function
> > > addresses (descriptor addresses on fdpic), not instruction addresses.
> >
> > I read libgcc code at
> > https://github.com/jcmvbkbc/gcc-xtensa/blob/xtensa-14-8789-fdpic/libgcc/crtstuff.c#L498-L503
> > and the way it's written suggests that this was done on purpose.
> > I put it into the WIP pile to figure out later what the purpose was.
> > I thought that SH might not have this issue because it just didn't
> > use the .array_init/.array_fini.
>
> I'm pretty sure we're using it -- musl-cross-make always forces it on
> via the gcc configure command line -- but it's possible there's some
> override disabling it for sh. I'll try some test cases and confirm
> whether sh is doing it right. Maybe the arm folks will have input on
> this too..?
Confirmed both that it works, and that it's working via init_array.
GCC emits:
.section .init_array,"aw"
.align 2
.long foo@FUNCDESC
for
__attribute__((__constructor__))
void foo() { ... }
Also, FWIW, I believe there's something of an application-facing
contract that you can declare function pointer arrays with
__attribute__((__section__(".init_array"))) and have them work, which
would not work if instruction addresses rather than function addresses
are expected to be there.
Rich
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 18:37 ` Rich Felker
@ 2024-02-28 19:34 ` Max Filippov
2024-02-28 19:41 ` Max Filippov
0 siblings, 1 reply; 17+ messages in thread
From: Max Filippov @ 2024-02-28 19:34 UTC (permalink / raw)
To: Rich Felker; +Cc: musl
On Wed, Feb 28, 2024 at 10:36 AM Rich Felker <dalias@libc.org> wrote:
> On Wed, Feb 28, 2024 at 01:30:32PM -0500, Rich Felker wrote:
> > On Wed, Feb 28, 2024 at 09:20:33AM -0800, Max Filippov wrote:
> > >
> > > > > p->relocated = 1;
> > > > > }
> > > > > @@ -1485,7 +1487,7 @@ void __libc_exit_fini()
> > > > > if (dyn[0] & (1<<DT_FINI_ARRAY)) {
> > > > > size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
> > > > > size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
> > > > > - while (n--) ((void (*)(void))*--fn)();
> > > > > + while (n--) fpaddr(p, *--fn)();
> > > >
> > > > If this is fixable on the tooling side it really should be fixed
> > > > there. init/fini arrays should have actual language-level function
> > > > addresses (descriptor addresses on fdpic), not instruction addresses.
> > >
> > > I read libgcc code at
> > > https://github.com/jcmvbkbc/gcc-xtensa/blob/xtensa-14-8789-fdpic/libgcc/crtstuff.c#L498-L503
> > > and the way it's written suggests that this was done on purpose.
> > > I put it into the WIP pile to figure out later what the purpose was.
> > > I thought that SH might not have this issue because it just didn't
> > > use the .array_init/.array_fini.
> >
> > I'm pretty sure we're using it -- musl-cross-make always forces it on
> > via the gcc configure command line -- but it's possible there's some
> > override disabling it for sh. I'll try some test cases and confirm
> > whether sh is doing it right. Maybe the arm folks will have input on
> > this too..?
>
> Confirmed both that it works, and that it's working via init_array.
> GCC emits:
>
> .section .init_array,"aw"
> .align 2
> .long foo@FUNCDESC
>
> for
>
> __attribute__((__constructor__))
> void foo() { ... }
>
Oh, no doubt that that C code generates a function descriptor, it
works for xtensa too. But the piece of libgcc quoted above specifically
puts a pointer to an object, not to a function into the .init_array.
> Also, FWIW, I believe there's something of an application-facing
> contract that you can declare function pointer arrays with
> __attribute__((__section__(".init_array"))) and have them work, which
> would not work if instruction addresses rather than function addresses
> are expected to be there.
--
Thanks.
-- Max
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 19:34 ` Max Filippov
@ 2024-02-28 19:41 ` Max Filippov
2024-02-28 20:14 ` Rich Felker
0 siblings, 1 reply; 17+ messages in thread
From: Max Filippov @ 2024-02-28 19:41 UTC (permalink / raw)
To: Rich Felker; +Cc: musl
On Wed, Feb 28, 2024 at 11:34 AM Max Filippov <jcmvbkbc@gmail.com> wrote:
>
> On Wed, Feb 28, 2024 at 10:36 AM Rich Felker <dalias@libc.org> wrote:
> > On Wed, Feb 28, 2024 at 01:30:32PM -0500, Rich Felker wrote:
> > > On Wed, Feb 28, 2024 at 09:20:33AM -0800, Max Filippov wrote:
> > > >
> > > > > > p->relocated = 1;
> > > > > > }
> > > > > > @@ -1485,7 +1487,7 @@ void __libc_exit_fini()
> > > > > > if (dyn[0] & (1<<DT_FINI_ARRAY)) {
> > > > > > size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
> > > > > > size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
> > > > > > - while (n--) ((void (*)(void))*--fn)();
> > > > > > + while (n--) fpaddr(p, *--fn)();
> > > > >
> > > > > If this is fixable on the tooling side it really should be fixed
> > > > > there. init/fini arrays should have actual language-level function
> > > > > addresses (descriptor addresses on fdpic), not instruction addresses.
> > > >
> > > > I read libgcc code at
> > > > https://github.com/jcmvbkbc/gcc-xtensa/blob/xtensa-14-8789-fdpic/libgcc/crtstuff.c#L498-L503
> > > > and the way it's written suggests that this was done on purpose.
> > > > I put it into the WIP pile to figure out later what the purpose was.
> > > > I thought that SH might not have this issue because it just didn't
> > > > use the .array_init/.array_fini.
> > >
> > > I'm pretty sure we're using it -- musl-cross-make always forces it on
> > > via the gcc configure command line -- but it's possible there's some
> > > override disabling it for sh. I'll try some test cases and confirm
> > > whether sh is doing it right. Maybe the arm folks will have input on
> > > this too..?
> >
> > Confirmed both that it works, and that it's working via init_array.
> > GCC emits:
> >
> > .section .init_array,"aw"
> > .align 2
> > .long foo@FUNCDESC
> >
> > for
> >
> > __attribute__((__constructor__))
> > void foo() { ... }
> >
>
> Oh, no doubt that that C code generates a function descriptor, it
> works for xtensa too. But the piece of libgcc quoted above specifically
> puts a pointer to an object, not to a function into the .init_array.
It was introduced to gcc by the ARM FDPIC series:
https://github.com/jcmvbkbc/gcc-xtensa/commit/11189793b6ef60645d5d1126d0bd9d0dd83e6583
This is the second change that I find made by the ARM FDPIC
series that appears to be not right for other FDPIC ports, first
being this change to the C++ unwinding code:
https://github.com/jcmvbkbc/gcc-xtensa/commit/67b0605494f32811364e25328d3522467aaf0638
--
Thanks.
-- Max
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 19:41 ` Max Filippov
@ 2024-02-28 20:14 ` Rich Felker
2024-02-28 20:26 ` Rich Felker
0 siblings, 1 reply; 17+ messages in thread
From: Rich Felker @ 2024-02-28 20:14 UTC (permalink / raw)
To: Max Filippov; +Cc: musl
On Wed, Feb 28, 2024 at 11:41:30AM -0800, Max Filippov wrote:
> On Wed, Feb 28, 2024 at 11:34 AM Max Filippov <jcmvbkbc@gmail.com> wrote:
> >
> > On Wed, Feb 28, 2024 at 10:36 AM Rich Felker <dalias@libc.org> wrote:
> > > On Wed, Feb 28, 2024 at 01:30:32PM -0500, Rich Felker wrote:
> > > > On Wed, Feb 28, 2024 at 09:20:33AM -0800, Max Filippov wrote:
> > > > >
> > > > > > > p->relocated = 1;
> > > > > > > }
> > > > > > > @@ -1485,7 +1487,7 @@ void __libc_exit_fini()
> > > > > > > if (dyn[0] & (1<<DT_FINI_ARRAY)) {
> > > > > > > size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
> > > > > > > size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
> > > > > > > - while (n--) ((void (*)(void))*--fn)();
> > > > > > > + while (n--) fpaddr(p, *--fn)();
> > > > > >
> > > > > > If this is fixable on the tooling side it really should be fixed
> > > > > > there. init/fini arrays should have actual language-level function
> > > > > > addresses (descriptor addresses on fdpic), not instruction addresses.
> > > > >
> > > > > I read libgcc code at
> > > > > https://github.com/jcmvbkbc/gcc-xtensa/blob/xtensa-14-8789-fdpic/libgcc/crtstuff.c#L498-L503
> > > > > and the way it's written suggests that this was done on purpose.
> > > > > I put it into the WIP pile to figure out later what the purpose was..
> > > > > I thought that SH might not have this issue because it just didn't
> > > > > use the .array_init/.array_fini.
> > > >
> > > > I'm pretty sure we're using it -- musl-cross-make always forces it on
> > > > via the gcc configure command line -- but it's possible there's some
> > > > override disabling it for sh. I'll try some test cases and confirm
> > > > whether sh is doing it right. Maybe the arm folks will have input on
> > > > this too..?
> > >
> > > Confirmed both that it works, and that it's working via init_array.
> > > GCC emits:
> > >
> > > .section .init_array,"aw"
> > > .align 2
> > > .long foo@FUNCDESC
> > >
> > > for
> > >
> > > __attribute__((__constructor__))
> > > void foo() { ... }
> > >
> >
> > Oh, no doubt that that C code generates a function descriptor, it
> > works for xtensa too. But the piece of libgcc quoted above specifically
> > puts a pointer to an object, not to a function into the .init_array.
>
> It was introduced to gcc by the ARM FDPIC series:
> https://github.com/jcmvbkbc/gcc-xtensa/commit/11189793b6ef60645d5d1126d0bd9d0dd83e6583
>
> This is the second change that I find made by the ARM FDPIC
> series that appears to be not right for other FDPIC ports, first
> being this change to the C++ unwinding code:
> https://github.com/jcmvbkbc/gcc-xtensa/commit/67b0605494f32811364e25328d3522467aaf0638
OK, so the arm folks put explicitly wrong/broken code here. That needs
to be reverted, and they can work out the mess they created on glibc.
There is probably wrong arm target code too whereby gcc is generating
instruction addresses for __attribute__((__constructor__)) rather than
function addresses.
If they have compat to worry about with glibc binaries, that's going
to be a mess for them to fix, but we can just patch it out for musl
target regardless of what they do since we have no existing broken
binaries.
Rich
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 20:14 ` Rich Felker
@ 2024-02-28 20:26 ` Rich Felker
2024-02-28 20:37 ` Rich Felker
0 siblings, 1 reply; 17+ messages in thread
From: Rich Felker @ 2024-02-28 20:26 UTC (permalink / raw)
To: Max Filippov; +Cc: musl
On Wed, Feb 28, 2024 at 03:14:12PM -0500, Rich Felker wrote:
> On Wed, Feb 28, 2024 at 11:41:30AM -0800, Max Filippov wrote:
> > On Wed, Feb 28, 2024 at 11:34 AM Max Filippov <jcmvbkbc@gmail.com> wrote:
> > >
> > > On Wed, Feb 28, 2024 at 10:36 AM Rich Felker <dalias@libc.org> wrote:
> > > > On Wed, Feb 28, 2024 at 01:30:32PM -0500, Rich Felker wrote:
> > > > > On Wed, Feb 28, 2024 at 09:20:33AM -0800, Max Filippov wrote:
> > > > > >
> > > > > > > > p->relocated = 1;
> > > > > > > > }
> > > > > > > > @@ -1485,7 +1487,7 @@ void __libc_exit_fini()
> > > > > > > > if (dyn[0] & (1<<DT_FINI_ARRAY)) {
> > > > > > > > size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
> > > > > > > > size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
> > > > > > > > - while (n--) ((void (*)(void))*--fn)();
> > > > > > > > + while (n--) fpaddr(p, *--fn)();
> > > > > > >
> > > > > > > If this is fixable on the tooling side it really should be fixed
> > > > > > > there. init/fini arrays should have actual language-level function
> > > > > > > addresses (descriptor addresses on fdpic), not instruction addresses.
> > > > > >
> > > > > > I read libgcc code at
> > > > > > https://github.com/jcmvbkbc/gcc-xtensa/blob/xtensa-14-8789-fdpic/libgcc/crtstuff.c#L498-L503
> > > > > > and the way it's written suggests that this was done on purpose.
> > > > > > I put it into the WIP pile to figure out later what the purpose was..
> > > > > > I thought that SH might not have this issue because it just didn't
> > > > > > use the .array_init/.array_fini.
> > > > >
> > > > > I'm pretty sure we're using it -- musl-cross-make always forces it on
> > > > > via the gcc configure command line -- but it's possible there's some
> > > > > override disabling it for sh. I'll try some test cases and confirm
> > > > > whether sh is doing it right. Maybe the arm folks will have input on
> > > > > this too..?
> > > >
> > > > Confirmed both that it works, and that it's working via init_array.
> > > > GCC emits:
> > > >
> > > > .section .init_array,"aw"
> > > > .align 2
> > > > .long foo@FUNCDESC
> > > >
> > > > for
> > > >
> > > > __attribute__((__constructor__))
> > > > void foo() { ... }
> > > >
> > >
> > > Oh, no doubt that that C code generates a function descriptor, it
> > > works for xtensa too. But the piece of libgcc quoted above specifically
> > > puts a pointer to an object, not to a function into the .init_array.
> >
> > It was introduced to gcc by the ARM FDPIC series:
> > https://github.com/jcmvbkbc/gcc-xtensa/commit/11189793b6ef60645d5d1126d0bd9d0dd83e6583
> >
> > This is the second change that I find made by the ARM FDPIC
> > series that appears to be not right for other FDPIC ports, first
> > being this change to the C++ unwinding code:
> > https://github.com/jcmvbkbc/gcc-xtensa/commit/67b0605494f32811364e25328d3522467aaf0638
>
> OK, so the arm folks put explicitly wrong/broken code here. That needs
> to be reverted, and they can work out the mess they created on glibc.
>
> There is probably wrong arm target code too whereby gcc is generating
> instruction addresses for __attribute__((__constructor__)) rather than
> function addresses.
>
> If they have compat to worry about with glibc binaries, that's going
> to be a mess for them to fix, but we can just patch it out for musl
> target regardless of what they do since we have no existing broken
> binaries.
OK, really good news! They didn't actually botch it. This test on
godbolt shows GCC is doing the right thing:
https://godbolt.org/z/b53ExYoPf
(GCC 13.1.0 ARM, -O2 -mfdpic, following code)
#include <stdio.h>
__attribute__((__constructor__))
void foo()
{
printf("hello ");
}
int main()
{
printf("world\n");
}
Rather, someone just made the crtstuff gratuitously do the wrong
thing, then later hard-coded disabling initfiniarray on fdpic because
it was doing the wrong thing:
https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=9c560cf23996271ee26dfc4a1d8484b85173cd12;hp=6bcbf80c6e2bd8a60d88bbcac3d70ffb67f4888f
So all that's needed is to revert the wrong patch to crtstuff and then
xtensa and arm initfiniarray will work as they should.
Anyone want to open a GCC bug tracker item for this?
Rich
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 20:26 ` Rich Felker
@ 2024-02-28 20:37 ` Rich Felker
0 siblings, 0 replies; 17+ messages in thread
From: Rich Felker @ 2024-02-28 20:37 UTC (permalink / raw)
To: Max Filippov; +Cc: musl
On Wed, Feb 28, 2024 at 03:26:45PM -0500, Rich Felker wrote:
> On Wed, Feb 28, 2024 at 03:14:12PM -0500, Rich Felker wrote:
> > On Wed, Feb 28, 2024 at 11:41:30AM -0800, Max Filippov wrote:
> > > On Wed, Feb 28, 2024 at 11:34 AM Max Filippov <jcmvbkbc@gmail.com> wrote:
> > > >
> > > > On Wed, Feb 28, 2024 at 10:36 AM Rich Felker <dalias@libc.org> wrote:
> > > > > On Wed, Feb 28, 2024 at 01:30:32PM -0500, Rich Felker wrote:
> > > > > > On Wed, Feb 28, 2024 at 09:20:33AM -0800, Max Filippov wrote:
> > > > > > >
> > > > > > > > > p->relocated = 1;
> > > > > > > > > }
> > > > > > > > > @@ -1485,7 +1487,7 @@ void __libc_exit_fini()
> > > > > > > > > if (dyn[0] & (1<<DT_FINI_ARRAY)) {
> > > > > > > > > size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
> > > > > > > > > size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
> > > > > > > > > - while (n--) ((void (*)(void))*--fn)();
> > > > > > > > > + while (n--) fpaddr(p, *--fn)();
> > > > > > > >
> > > > > > > > If this is fixable on the tooling side it really should be fixed
> > > > > > > > there. init/fini arrays should have actual language-level function
> > > > > > > > addresses (descriptor addresses on fdpic), not instruction addresses.
> > > > > > >
> > > > > > > I read libgcc code at
> > > > > > > https://github.com/jcmvbkbc/gcc-xtensa/blob/xtensa-14-8789-fdpic/libgcc/crtstuff.c#L498-L503
> > > > > > > and the way it's written suggests that this was done on purpose.
> > > > > > > I put it into the WIP pile to figure out later what the purpose was..
> > > > > > > I thought that SH might not have this issue because it just didn't
> > > > > > > use the .array_init/.array_fini.
> > > > > >
> > > > > > I'm pretty sure we're using it -- musl-cross-make always forces it on
> > > > > > via the gcc configure command line -- but it's possible there's some
> > > > > > override disabling it for sh. I'll try some test cases and confirm
> > > > > > whether sh is doing it right. Maybe the arm folks will have input on
> > > > > > this too..?
> > > > >
> > > > > Confirmed both that it works, and that it's working via init_array.
> > > > > GCC emits:
> > > > >
> > > > > .section .init_array,"aw"
> > > > > .align 2
> > > > > .long foo@FUNCDESC
> > > > >
> > > > > for
> > > > >
> > > > > __attribute__((__constructor__))
> > > > > void foo() { ... }
> > > > >
> > > >
> > > > Oh, no doubt that that C code generates a function descriptor, it
> > > > works for xtensa too. But the piece of libgcc quoted above specifically
> > > > puts a pointer to an object, not to a function into the .init_array.
> > >
> > > It was introduced to gcc by the ARM FDPIC series:
> > > https://github.com/jcmvbkbc/gcc-xtensa/commit/11189793b6ef60645d5d1126d0bd9d0dd83e6583
> > >
> > > This is the second change that I find made by the ARM FDPIC
> > > series that appears to be not right for other FDPIC ports, first
> > > being this change to the C++ unwinding code:
> > > https://github.com/jcmvbkbc/gcc-xtensa/commit/67b0605494f32811364e25328d3522467aaf0638
> >
> > OK, so the arm folks put explicitly wrong/broken code here. That needs
> > to be reverted, and they can work out the mess they created on glibc.
> >
> > There is probably wrong arm target code too whereby gcc is generating
> > instruction addresses for __attribute__((__constructor__)) rather than
> > function addresses.
> >
> > If they have compat to worry about with glibc binaries, that's going
> > to be a mess for them to fix, but we can just patch it out for musl
> > target regardless of what they do since we have no existing broken
> > binaries.
>
> OK, really good news! They didn't actually botch it. This test on
> godbolt shows GCC is doing the right thing:
>
> https://godbolt.org/z/b53ExYoPf
>
> (GCC 13.1.0 ARM, -O2 -mfdpic, following code)
>
> #include <stdio.h>
> __attribute__((__constructor__))
> void foo()
> {
> printf("hello ");
> }
>
> int main()
> {
> printf("world\n");
> }
>
> Rather, someone just made the crtstuff gratuitously do the wrong
> thing, then later hard-coded disabling initfiniarray on fdpic because
> it was doing the wrong thing:
>
> https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=9c560cf23996271ee26dfc4a1d8484b85173cd12;hp=6bcbf80c6e2bd8a60d88bbcac3d70ffb67f4888f
>
> So all that's needed is to revert the wrong patch to crtstuff and then
> xtensa and arm initfiniarray will work as they should.
>
> Anyone want to open a GCC bug tracker item for this?
Filed as https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114158
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 18:30 ` Rich Felker
2024-02-28 18:37 ` Rich Felker
@ 2024-02-28 21:28 ` Max Filippov
2024-02-29 12:03 ` Max Filippov
2024-02-29 16:25 ` Max Filippov
` (2 subsequent siblings)
4 siblings, 1 reply; 17+ messages in thread
From: Max Filippov @ 2024-02-28 21:28 UTC (permalink / raw)
To: Rich Felker; +Cc: musl
On Wed, Feb 28, 2024 at 10:30 AM Rich Felker <dalias@libc.org> wrote:
>
> On Wed, Feb 28, 2024 at 09:20:33AM -0800, Max Filippov wrote:
> > Hi Rich,
> >
> > On Tue, Feb 27, 2024 at 4:12 PM Rich Felker <dalias@libc.org> wrote:
> > >
> > > First of all, I'm excited to see new work on this and especially the
> > > FDPIC ABI. Not only is xtensa a very relevant arch I've been hoping we
> > > could support for some time now, but having FDPIC will help getting
> > > the parts of musl's FDPIC support that are currently making
> > > sh-specific assumptions worked out so that we can get other new FDPIC
> > > ports added too (particularly, arm and riscv).
> > >
> > > I'm not up-to-date on what toolchain status/stability is or how close
> > > this is to being intended as upstreamable, but since I was pointed to
> > > it and took a look, I wanted to record my initial review thoughts here
> > > so they don't get lost and to hopefully move this closer to ready for
> > > upstream.
> >
> > Thanks for your review.
> > I expect to get toolchain components into shape for upstreaming during
> > this year.
> >
> > > Review follows inline:
> > >
> > > On Tue, Feb 27, 2024 at 06:24:31PM -0500, Rich Felker wrote:
> > > > Attached patches were pulled from
> > > > https://github.com/jcmvbkbc/musl-xtensa/commits/xtensa-1.2.4-fdpic/
> > > >
> > > > I'll reply to comment inline.
> > >
> > > > >From 868b34272f414323f10528d5b9988886541cb1c0 Mon Sep 17 00:00:00 2001
> > > > From: Max Filippov <jcmvbkbc@gmail.com>
> > > > Date: Tue, 22 Mar 2016 02:35:58 +0300
> > > > Subject: [PATCH 1/2] xtensa: add port
> > > >
> > > > Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
> > > > ---
> > > > arch/xtensa/arch.mak | 1 +
> > > > arch/xtensa/atomic_arch.h | 27 ++
> > > > arch/xtensa/bits/alltypes.h.in | 27 ++
> > > > arch/xtensa/bits/float.h | 16 ++
> > > > arch/xtensa/bits/ioctl.h | 219 ++++++++++++++
> > > > arch/xtensa/bits/limits.h | 1 +
> > > > arch/xtensa/bits/mman.h | 68 +++++
> > > > arch/xtensa/bits/posix.h | 2 +
> > > > arch/xtensa/bits/reg.h | 2 +
> > > > arch/xtensa/bits/setjmp.h | 5 +
> > > > arch/xtensa/bits/signal.h | 88 ++++++
> > > > arch/xtensa/bits/stat.h | 24 ++
> > > > arch/xtensa/bits/stdint.h | 20 ++
> > > > arch/xtensa/bits/syscall.h.in | 396 ++++++++++++++++++++++++++
> > > > arch/xtensa/bits/termios.h | 167 +++++++++++
> > > > arch/xtensa/bits/user.h | 4 +
> > > > arch/xtensa/bits/xtensa-config.h | 8 +
> > > > arch/xtensa/crt_arch.h | 48 ++++
> > > > arch/xtensa/kstat.h | 18 ++
> > > > arch/xtensa/pthread_arch.h | 11 +
> > > > arch/xtensa/reloc.h | 38 +++
> > > > arch/xtensa/syscall_arch.h | 104 +++++++
> > > > configure | 1 +
> > > > crt/xtensa/crti.S | 29 ++
> > > > crt/xtensa/crtn.S | 23 ++
> > > > include/elf.h | 70 +++++
> > > > src/internal/xtensa/syscall.S | 25 ++
> > > > src/ldso/xtensa/tlsdesc.S | 36 +++
> > > > src/process/xtensa/vfork.S | 21 ++
> > > > src/setjmp/xtensa/longjmp.S | 22 ++
> > > > src/setjmp/xtensa/setjmp.S | 25 ++
> > > > src/signal/xtensa/restore.s | 10 +
> > > > src/signal/xtensa/sigsetjmp.S | 24 ++
> > > > src/thread/xtensa/__set_thread_area.S | 16 ++
> > > > src/thread/xtensa/__unmapself.S | 13 +
> > > > src/thread/xtensa/clone.S | 46 +++
> > > > src/thread/xtensa/syscall_cp.S | 38 +++
> > > > 37 files changed, 1693 insertions(+)
> > > > create mode 100644 arch/xtensa/arch.mak
> > > > create mode 100644 arch/xtensa/atomic_arch.h
> > > > create mode 100644 arch/xtensa/bits/alltypes.h.in
> > > > create mode 100644 arch/xtensa/bits/float.h
> > > > create mode 100644 arch/xtensa/bits/ioctl.h
> > > > create mode 100644 arch/xtensa/bits/limits.h
> > > > create mode 100644 arch/xtensa/bits/mman.h
> > > > create mode 100644 arch/xtensa/bits/posix.h
> > > > create mode 100644 arch/xtensa/bits/reg.h
> > > > create mode 100644 arch/xtensa/bits/setjmp.h
> > > > create mode 100644 arch/xtensa/bits/signal.h
> > > > create mode 100644 arch/xtensa/bits/stat.h
> > > > create mode 100644 arch/xtensa/bits/stdint.h
> > > > create mode 100644 arch/xtensa/bits/syscall.h.in
> > > > create mode 100644 arch/xtensa/bits/termios.h
> > > > create mode 100644 arch/xtensa/bits/user.h
> > > > create mode 100644 arch/xtensa/bits/xtensa-config.h
> > > > create mode 100644 arch/xtensa/crt_arch.h
> > > > create mode 100644 arch/xtensa/kstat.h
> > > > create mode 100644 arch/xtensa/pthread_arch.h
> > > > create mode 100644 arch/xtensa/reloc.h
> > > > create mode 100644 arch/xtensa/syscall_arch.h
> > > > create mode 100644 crt/xtensa/crti.S
> > > > create mode 100644 crt/xtensa/crtn.S
> > > > create mode 100644 src/internal/xtensa/syscall.S
> > > > create mode 100644 src/ldso/xtensa/tlsdesc.S
> > > > create mode 100644 src/process/xtensa/vfork.S
> > > > create mode 100644 src/setjmp/xtensa/longjmp.S
> > > > create mode 100644 src/setjmp/xtensa/setjmp.S
> > > > create mode 100644 src/signal/xtensa/restore.s
> > > > create mode 100644 src/signal/xtensa/sigsetjmp.S
> > > > create mode 100644 src/thread/xtensa/__set_thread_area.S
> > > > create mode 100644 src/thread/xtensa/__unmapself.S
> > > > create mode 100644 src/thread/xtensa/clone.S
> > > > create mode 100644 src/thread/xtensa/syscall_cp.S
> > > >
> > > > diff --git a/arch/xtensa/arch.mak b/arch/xtensa/arch.mak
> > > > new file mode 100644
> > > > index 00000000..aa4d05ce
> > > > --- /dev/null
> > > > +++ b/arch/xtensa/arch.mak
> > > > @@ -0,0 +1 @@
> > > > +COMPAT_SRC_DIRS = compat/time32
> > > > diff --git a/arch/xtensa/atomic_arch.h b/arch/xtensa/atomic_arch.h
> > > > new file mode 100644
> > > > index 00000000..34bf0704
> > > > --- /dev/null
> > > > +++ b/arch/xtensa/atomic_arch.h
> > > > @@ -0,0 +1,27 @@
> > > > +#include "bits/xtensa-config.h"
> > >
> > > This is not what bits headers are for; they are public-installed
> > > headers that provide parts of the standard public headers that vary by
> > > arch, and are never directly #include'able.
> > >
> > > If you really need arch-internal headers like this just put them in
> > > the top-level dir of the arch. But in the case of these macros, either
> > > just use the compiler-provided, __-prefixed ones directly, or if
> > > they're not actually variable, don't inspect them at all.
> >
> > I carry this header from the time when configuration-specific header
> > file was the only available configuration option. __XCHAL_* macros
> > only appeared in gcc-13, but since FDPIC support will only be
> > available on toolchains that have these macros I guess I'll drop the
> > header and use __XCHAL_* macros directly.
> >
> > > > +
> > > > +#if XCHAL_HAVE_S32C1I
> > > > +#define a_cas a_cas
> > > > +static inline int a_cas(volatile int *p, int t, int s)
> > > > +{
> > > > + __asm__ __volatile__ (
> > > > + " wsr %2, scompare1\n"
> > > > + " s32c1i %0, %1\n"
> > > > + : "+a"(s), "+m"(*p)
> > > > + : "a"(t)
> > > > + : "memory" );
> > > > + return s;
> > > > +}
> > > > +#endif
> > >
> > > For example this is not a useful test, because there's nothing you can
> > > do in the case where it's not defined;
> >
> > This test is in the spirit of xtensa: it covers one possible
> > configuration option.
> > There's at least one other hardware option (exclusive access instructions)
> > available for this functionality and a fallback to syscalls. They're not listed
> > here because I tried to keep this initial implementation to a bare minimum.
> >
> > > it's just not supportable
> > > (unless there's some kernel fallback mechanism like ARM and SH have;
> > > then it would make sense to hard-code the cas instruction as above if
> > > the ISA level is known to have it, and otherwise do conditional
> > > runtime selection (again, like ARM and SH).
> >
> > Xtensa is quite limited on the runtime selection, because opcodes belonging
> > to the options not enabled for a particular core just would not assemble.
>
> That's not really a problem. On sh, we use .word to encode the
> instructions that might not be present at the ISA level targeted,
> which will only be conditionally executed. On ARM, I believe we just
> asm directives to suppress the ISA level tagging of the object file so
> that it can use the mnemonics for the conditionally-available
> instructions. Generally, we find a way to work around whatever the
> tooling is doing to make things difficult here. :-)
On xtensa there's no guarantee that a particular byte sequence represents
the same instruction in different configurations.
> > > > diff --git a/arch/xtensa/bits/alltypes.h.in b/arch/xtensa/bits/alltypes..h.in
> > > > new file mode 100644
> > > > index 00000000..21221ce5
> > > > --- /dev/null
> > > > +++ b/arch/xtensa/bits/alltypes.h.in
> > > > @@ -0,0 +1,27 @@
> > > > +#define _REDIR_TIME64 1
> > > > +#define _Addr int
> > > > +#define _Int64 long long
> > > > +#define _Reg int
> > > > +
> > > > +#if __XTENSA_EB__
> > > > +#define __BYTE_ORDER 4321
> > > > +#elif __XTENSA_EL__
> > > > +#define __BYTE_ORDER 1234
> > > > +#else
> > > > +#error Unknown endianness
> > > > +#endif
> > > > +
> > > > +#define __LONG_MAX 0x7fffffffL
> > > > +
> > > > +#ifndef __cplusplus
> > > > +#ifdef __WCHAR_TYPE__
> > > > +TYPEDEF __WCHAR_TYPE__ wchar_t;
> > > > +#else
> > > > +TYPEDEF unsigned wchar_t;
> > > > +#endif
> > > > +#endif
> > > > +
> > > > +TYPEDEF float float_t;
> > > > +TYPEDEF double double_t;
> > > > +
> > > > +TYPEDEF struct { long __l __attribute__((aligned(__BIGGEST_ALIGNMENT__))); } max_align_t;
> > >
> > > It's best to avoid macros like __BIGGEST_ALIGNMENT__ here that could
> > > change out from under us in the future, thereby quietly breaking the
> > > ABI. Whatever the ABI is, that's what the alignment here should be,
> > > and just using a type that naturally has the alignment if possible
> > > rather than attributes.
> >
> > The thing is: we don't know the name of the type with the widest
> > alignment, as indeed in the future someone may come up with
> > a configuration with an arbitrarily named type with an alignment
> > as big as allowed physically. But then its their responsibility to
> > keep the __BIGGEST_ALIGNMENT__ consistent across the
> > updates of that configuration that intend to stay compatible with
> > each other.
>
> max_align_t is only a C-language contract for the standard C types.
> There is no need for it to cover any sort of extension types, vectors,
> etc. So it should be defined in terms of the C ABI types and needs to
> be stable. If long long or double is already 8-byte aligned and long
> double is the same as double, it should probably just be defined as
> long long or a union of long long and long double or similar.
Ok, somehow I missed that this name comes from the standard.
> > > > diff --git a/arch/xtensa/reloc.h b/arch/xtensa/reloc.h
> > > > new file mode 100644
> > > > index 00000000..03482f18
> > > > --- /dev/null
> > > > +++ b/arch/xtensa/reloc.h
> > > > @@ -0,0 +1,38 @@
> > > > +#if __BYTE_ORDER == __BIG_ENDIAN
> > > > +#define ENDIAN_SUFFIX "eb"
> > > > +#else
> > > > +#define ENDIAN_SUFFIX ""
> > > > +#endif
> > > > +
> > > > +#if __FDPIC__
> > > > +#define ABI_SUFFIX "-fdpic"
> > > > +#else
> > > > +#define ABI_SUFFIX ""
> > > > +#endif
> > > > +
> > > > +#define LDSO_ARCH "xtensa" ENDIAN_SUFFIX ABI_SUFFIX
> > >
> > > Any actual incompatible ABI (as opposed to ISA level) needs its own
> > > LDSO name.
> >
> > There's no real ISA levels in the xtensa. Every CPU core configuration
> > is its own unique ISA.
>
> What does that practically mean? I think we're using different
> terminology which is making it hard to communicate.
>
> By "ISA level" I mean a node of a tree of possible extensions to a
> baseline ISA, where machine code targeted for one node in the tree can
> be executed on all descendants, and can be linked with machine code
> targeting a particular descendant to produce code that can be executed
> on that descendant.
>
> This applies both statically and dynamically. For example, an
> application built for a higher ISA level can be executed with
> libc/ldso built for the baseline ISA (note: this sometimes requires
> conditional logic in a few places like setjmp; see arm for example),
> and an application built for the baseline ISA can be executed using
> libc/ldso built for a higher ISA level.
>
> I'm guessing you mean which features are supported is combinatorically
> selectable as part of the core configuration, not a linear progression
> of levels like i386, i486, etc.
That's correct. There seems to be no intention that different xtensa core
configurations were binary compatible with each other. The development
model that I know only supports compatibility between a particular
configuration created with one version of the xtensa tools and rebuilds of
that configuration with later versions of the xtensa tools. I.e. each specific
configuration is its own ISA, binary software compatibility between the
different configurations is not guaranteed. Software tools for one
configuration need not generate or understand the correct binary code
for any other configuration.
> That's no problem. If I'm mistaken,
> plese help me try to understand what you mean.
>
> > > So if there are alternate (call0/windowed?) ABIs you want
> > > to support, different names need to be defined for them here and in
> > > configure.
> >
> > I understand that it means that I should drop the endianness suffix,
> > as the endianness of each core is fixed, but add windowed/call0
> > discriminator.
>
> If there are both le and be variants, then they need to have separate
> names.
They can never exist both at the same time for any particular core
configuration. Every core has fixed endianness, there's no standard
way to generate the "other endian" code for a given core.
> Same for any other mutually incompatible ABIs.
>
> > > Unless there's a good reason to support more than one ABI,
> > > it probably just makes sense to pick the preferred one and have
> > > configure error-out if the toolchain's ABI is incompatible.
> >
> > I still have hope to support windowed FDPIC and then non-FDPIC
> > variants.
>
> Is there a reason to support both windowed and non-windowed? Sorry if
> this is a dumb question; I don't know the background on it.
Windowed is a bit more performant, and it produces more compact
code, but it may not be available if the windowed registers option was
not enabled for the particular xtensa core and it's harder to deal with
from the software point of view.
> If it's
> possible to just pick one as better, and it can be supported on all
> cores, that's probably the better choice.
That might be call0, but then all transistors spent on windowed
registers were wasted.
> > > > diff --git a/src/ldso/xtensa/tlsdesc.S b/src/ldso/xtensa/tlsdesc.S
> > > > new file mode 100644
> > > > index 00000000..084d5f3e
> > > > --- /dev/null
> > > > +++ b/src/ldso/xtensa/tlsdesc.S
> > > > @@ -0,0 +1,36 @@
> > > > +.global __tlsdesc_static
> > > > +.hidden __tlsdesc_static
> > > > +.type __tlsdesc_static,@function
> > > > +.align 4
> > > > +__tlsdesc_static:
> > > > +#ifdef __XTENSA_WINDOWED_ABI__
> > > > + entry a1, 16
> > > > +#endif
> > > > + rur a3, threadptr
> > > > + add a2, a2, a3
> > > > +#if defined(__XTENSA_WINDOWED_ABI__)
> > > > + retw
> > > > +#elif defined(__XTENSA_CALL0_ABI__)
> > > > + ret
> > > > +#else
> > > > +#error Unsupported Xtensa ABI
> > > > +#endif
> > >
> > > Are you sure the entry step makes sense with tlsdesc? Is this honoring
> > > the ABI for what the tlsdesc function is allowed to clobber?
> >
> > But the tlsdesc function is just a normal function, it can do whatever
> > a regular function can do AFAIU. In addition there are no relaxations
> > that can replace non-call code with a variant with a call.
> >
> > 'entry' is the right way to begin a regular function in windowed xtensa ABI..
>
> OK but this is not a regular function. It's a code fragment with
> special ABI constraints and is intended to execute as fast as
> possible, clobbering as little as possible.
I understand that, but I'm looking at the way it is implemented
in the xtensa gcc port and see an ordinary function call:
https://github.com/jcmvbkbc/gcc-xtensa/blob/xtensa-14-8789-fdpic/gcc/config/xtensa/xtensa.md#L3026
> I don't understand exactly
> what "entry" does, but I suspect it only makes sense if you're
> actually introducing a "call frame" of some sort, like if you'll be
> spilling and restoring call-saved registers or something (or doing
> whatever the windowed-register-file equivalent is to achieve the same
> thing) which does not seem to be needed here.
Unfortunately windowed calls on xtensa LX can be made with three
different window sizes: 4, 8 or 12 registers, the first outgoing argument
goes in the register a6, a10 or a14 respectively. One thing that 'entry'
does is window rotation so that the first incoming argument appears
in the register a2. AFAIK there's no other way to access arguments in
the unprivileged code.
> > > > +.hidden __tls_get_new
> > > > +
> > > > +.global __tlsdesc_dynamic
> > > > +.hidden __tlsdesc_dynamic
> > > > +.type __tlsdesc_dynamic,@function
> > > > +.align 4
> > > > +__tlsdesc_dynamic:
> > > > +#if defined(__XTENSA_WINDOWED_ABI__)
> > > > + entry a1, 16
> > > > + mov a6, a2
> > > > + call4 __tls_get_addr
> > > > + mov a2, a6
> > > > + retw
> > > > +#elif defined(__XTENSA_CALL0_ABI__)
> > > > + j __tls_get_addr
> > > > +#else
> > > > +#error Unsupported Xtensa ABI
> > > > +#endif
> > >
> > > This approach is no longer a valid implementation for
> > > __tlsdesc_dynamic, because it calls C code which could clobber any
> > > call-clobbered registers, not just the ones the tlsdesc function is
> > > allowed to clobber.
> > >
> > > Instead, it needs to actually do the dtv lookup in asm. musl does not
> > > use any dynamic allocation here, so the lookup is guaranteed to just
> > > work using the indices found with no conditional branches. See the
> > > recently added riscv code for a clean example, in commit
> > > 407aea628af8c81d9e3f5a068568f2217db71bba.
> >
> > Yeah, this whole xtensa TLS code should be in the WIP pile.
> > I'll rewrite it in assembly.
>
> OK, just wanted you to be aware that this is something eventually
> needing rewrite.
Sure.
> > > > diff --git a/src/signal/xtensa/sigsetjmp.S b/src/signal/xtensa/sigsetjmp.S
> > > > new file mode 100644
> > > > index 00000000..e44bba88
> > > > --- /dev/null
> > > > +++ b/src/signal/xtensa/sigsetjmp.S
> > > > @@ -0,0 +1,24 @@
> > > > +.global sigsetjmp
> > > > +.global __sigsetjmp
> > > > +.type sigsetjmp,@function
> > > > +.type __sigsetjmp,@function
> > > > +.align 4
> > > > +sigsetjmp:
> > > > +__sigsetjmp:
> > > > +#if defined(__XTENSA_CALL0_ABI__)
> > > > + bnez a3, 1f
> > > > +.hidden ___setjmp
> > > > + j ___setjmp
> > > > +1:
> > > > + mov a3, a0
> > > > + mov a4, a2
> > > > + call0 ___setjmp
> > > > + mov a0, a3
> > > > + mov a3, a2
> > > > + mov a2, a4
> > > > +
> > > > +.hidden __sigsetjmp_tail
> > > > + j __sigsetjmp_tail
> > > > +#else
> > > > +#error Unsupported Xtensa ABI
> > > > +#endif
> > >
> > > I don't think this code works. It does not seem to save the return
> > > address and argument address before calling ___setjmp, so they will be
> > > clobbered after it returns.
> >
> > It does:
> >
> > + mov a3, a0
> > + mov a4, a2
> >
> > do exactly this and ___setjmp is hidden, so it's always going to be
> > the implementation above that doesn't clobber a3 and a4.
>
> Nope, that's not how it works. setjmp returns twice, and the second
> time it returns, it's *whatever code has executed since the first
> return from setjmp, which most certainly will have clobbered
> everything but the call-saved registers.
Of course.
> The return address needs to be saved in the space past the end of the
> signal mask in the sigjmp_buf, and at least one call-saved register
> also needs to be saved there, so that you can store the address of the
> sigjmp_buf in a call-saved register and have it available after a
> subsequent return from setjmp.
I will.
> > > Eventually I'd like to move most functions like this, that are
> > > currently external asm but have no good reason to be (unlike setjmp,
> > > vfork, etc. which *do* have a good reason to be), to minimal inline
> > > asm in C. Some archs' versions of this file would even just become
> > > __syscall, no further asm.
> > >
> > > > diff --git a/src/thread/xtensa/__unmapself.S b/src/thread/xtensa/__unmapself.S
> > > > new file mode 100644
> > > > index 00000000..15bf2bdf
> > > > --- /dev/null
> > > > +++ b/src/thread/xtensa/__unmapself.S
> > > > @@ -0,0 +1,13 @@
> > > > +.global __unmapself
> > > > +.type __unmapself,@function
> > > > +.align 4
> > > > +__unmapself:
> > > > +#if defined(__XTENSA_CALL0_ABI__)
> > > > + mov a6, a2
> > > > + movi a2, 81 # SYS_munmap
> > > > + syscall
> > > > + movi a2, 118 # SYS_exit
> > > > + syscall
> > > > +#else
> > > > +#error Unsupported Xtensa ABI
> > > > +#endif
> > >
> > > This is unsafe on fdpic/nommu if the kernel requires the task to
> > > always have a valid stack.
> >
> > How do I know if the kernel requires that?
>
> I don't think it's explicitly documented. For sh I was vaguely
> familiar at the ISA level that the sh1/sh2 cpu just pushes stuff to
> the (existing, userspace) stack pointer on interrupts, which would
> necessitate always having a valid stack.
Oh, I know that part well, xtensa doesn't have anything like that.
> I also read the entry.S code
> (or whatever file this part was split off into) for how kernelspace is
> entered from traps/interrupts on nommu hardware, and saw how it
> shuffled things around to make it look to the kernel more like a
> conventional mmu-ful entry to kernelspace.
>
> Maybe xtensa is better about this and always invokes the
> trap/interrupt handler with the saved state in special registers
> rather than pushing it to the stack. If so, this might not be a
> consideration; normally, Linux switches the stack pointer to the
> kernel stack before actually doing anything with it.
Yep, that's what we do.
> > > > >From f8c5953ecdb948282cc8e573b729c25db60a95a8 Mon Sep 17 00:00:00 2001
> > > > From: Max Filippov <jcmvbkbc@gmail.com>
> > > > Date: Wed, 21 Feb 2024 08:27:37 -0800
> > > > Subject: [PATCH 2/2] WIP
> > > >
> > > > Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
> > > > ---
> > > > ldso/dlstart.c | 7 +++++++
> > > > ldso/dynlink.c | 6 ++++--
> > > > src/internal/dynlink.h | 5 +++--
> > > > 3 files changed, 14 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/ldso/dlstart.c b/ldso/dlstart.c
> > > > index 259f5e18..beca953f 100644
> > > > --- a/ldso/dlstart.c
> > > > +++ b/ldso/dlstart.c
> > > > @@ -90,12 +90,19 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
> > > > - segs[rel_addr[1]].p_vaddr
> > > > + syms[R_SYM(rel[1])].st_value;
> > > > rel_addr[1] = dyn[DT_PLTGOT];
> > > > + } else if (R_TYPE(rel[1]) == REL_RELATIVE) {
> > > > + size_t val = *rel_addr;
> > > > + for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
> > > > + *rel_addr += segs[j].addr - segs[j].p_vaddr;
> > >
> > > So xtensa has a "relative" reloc type that's just adjusted by the load
> > > offset of the segment the relocation lives in, rather than needing to
> > > use a symbolic relocation referencing a section symbol like other
> > > fdpic archs do?
> >
> > I was looking at the ARM BFD code while doing that and
> > my impression was that they do the same.
> > Regardless, I wonder why either relocation form might be preferrable?
>
> I think the relative type here is perfectly acceptable. At first I
> thought it was weaker (less capable of representing addresses of
> objects in different segments), but looking again, I don't think
> that's the case.
Ok.
> > > If so, this looks right and looks ok.
> > >
> > > > } else {
> > > > size_t val = syms[R_SYM(rel[1])].st_value;
> > > > for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
> > > > *rel_addr = rel[2] + segs[j].addr - segs[j].p_vaddr + val;
> > > > }
> > > > }
> > > > +#ifdef __xtensa__
> > > > + ((unsigned long *)dyn[DT_PLTGOT])[3] = segs[0].addr - segs[0].p_vaddr;
> > > > +#endif
> > >
> > > Is this actually needed for anything? Generally musl doesn't use the
> > > reserved GOT slots itself, and on all the other archs I'm aware of,
> > > they're essentially reserved to the dynamic linker implementation so
> > > the dynamic linker is just free not to use them and not to set them
> > > up.
> >
> > xtensa doesn't have relative register jumps and calls, so local jumps
> > and calls to a far off locations need to use absolute target addresses.
> > One possible solution is to have the address in the GOT entry, the
> > other is to calculate the target address using the text segment load
> > offset at runtime. Both have the same instruction count, see
> > http://wiki.osll.ru/doku.php/etc:users:jcmvbkbc:binutils-xtensa#local_call
> > for the details, but the latter doesn't waste GOT space and that saves
> > a noticeable amount of RAM.
>
> I see. Doesn't this limit you to a single text segment, though?
It does, yes. Also there's a hardcoded 0 as a load map index for
the text segment.
> In practice it might not matter, but it's more constraining than fdpic
> was designed to be.
I guess I could add a compiler switch that would choose how it's done,
but it seems that for a noMMU linux application a single text segment
is a reasonable expectation.
> Is there a reason local, relative jumps/calls can't be done by
> computing the address of a nearby label and using the offset relative
> to that? That's how they (at least some types; I forget the details)
> work on sh/fdpic.
IIUC that would require the actual PC knowledge and there's no direct
access to the PC on xtensa.
> > > > #else
> > > > /* If the dynamic linker is invoked as a command, its load
> > > > * address is not available in the aux vector. Instead, compute
> > > > diff --git a/ldso/dynlink.c b/ldso/dynlink.c
> > > > index ceca3c98..25563af3 100644
> > > > --- a/ldso/dynlink.c
> > > > +++ b/ldso/dynlink.c
> > > > @@ -1420,6 +1420,7 @@ static void reloc_all(struct dso *p)
> > > > if (!DL_FDPIC)
> > > > do_relr_relocs(p, laddr(p, dyn[DT_RELR]), dyn[DT_RELRSZ]);
> > > >
> > > > +#if 0
> > > > if (head != &ldso && p->relro_start != p->relro_end) {
> > > > long ret = __syscall(SYS_mprotect, laddr(p, p->relro_start),
> > > > p->relro_end-p->relro_start, PROT_READ);
> > > > @@ -1429,6 +1430,7 @@ static void reloc_all(struct dso *p)
> > > > if (runtime) longjmp(*rtld_fail, 1);
> > > > }
> > > > }
> > > > +#endif
> > >
> > > Was this breaking something? Relro linking probably should have been
> > > disabled on fdpic, and we ignore ENOSYS anyway for nommu.
> >
> > Yeah, I do some of the testing in linux-user QEMU, it has MMU
> > and mprotect calls actually succeed. Failures were coming from the
> > relocation code, but my impression was that relro should be applied
> > after the relocation completion and it should just work, hence the
> > WIP pile.
>
> Yes, relro is only supposed to be applied after all relocations were
> done.
Yeah, I'll have a look.
--
Thanks.
-- Max
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 21:28 ` Max Filippov
@ 2024-02-29 12:03 ` Max Filippov
2024-02-29 15:35 ` Rich Felker
0 siblings, 1 reply; 17+ messages in thread
From: Max Filippov @ 2024-02-29 12:03 UTC (permalink / raw)
To: Rich Felker; +Cc: musl
On Wed, Feb 28, 2024 at 1:28 PM Max Filippov <jcmvbkbc@gmail.com> wrote:
> On Wed, Feb 28, 2024 at 10:30 AM Rich Felker <dalias@libc.org> wrote:
> > Is there a reason local, relative jumps/calls can't be done by
> > computing the address of a nearby label and using the offset relative
> > to that? That's how they (at least some types; I forget the details)
> > work on sh/fdpic.
>
> IIUC that would require the actual PC knowledge and there's no direct
> access to the PC on xtensa.
I thought about it for a bit and although the current PC can easily
be fetched I don't see how it can work for multiple text segments
without a GOT entry. OTOH a link-time conversion of a call without
GOT to a call with GOT in case the target and the call site are in
the different text segments seems feasible.
--
Thanks.
-- Max
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-29 12:03 ` Max Filippov
@ 2024-02-29 15:35 ` Rich Felker
0 siblings, 0 replies; 17+ messages in thread
From: Rich Felker @ 2024-02-29 15:35 UTC (permalink / raw)
To: Max Filippov; +Cc: musl
On Thu, Feb 29, 2024 at 04:03:02AM -0800, Max Filippov wrote:
> On Wed, Feb 28, 2024 at 1:28 PM Max Filippov <jcmvbkbc@gmail.com> wrote:
> > On Wed, Feb 28, 2024 at 10:30 AM Rich Felker <dalias@libc.org> wrote:
> > > Is there a reason local, relative jumps/calls can't be done by
> > > computing the address of a nearby label and using the offset relative
> > > to that? That's how they (at least some types; I forget the details)
> > > work on sh/fdpic.
> >
> > IIUC that would require the actual PC knowledge and there's no direct
> > access to the PC on xtensa.
>
> I thought about it for a bit and although the current PC can easily
> be fetched I don't see how it can work for multiple text segments
> without a GOT entry. OTOH a link-time conversion of a call without
> GOT to a call with GOT in case the target and the call site are in
> the different text segments seems feasible.
With multiple text segments, a cross-segment call cannot be relative
to begin with. Because fdpic lets them float independently, it would
have to go thru the GOT.
Rather than having the linker convert calls *to* using GOT, I think
the safe way to do things would be to have the compiler emit calls
thru the GOT but linker relax them to avoid the GOT when they're in
the same segment. But there's no reason this has to be done initially.
I'm just thinking in terms of making sure the design doesn't preclude
or make it difficult to do multiple segments in the future if desired.
Rich
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 18:30 ` Rich Felker
2024-02-28 18:37 ` Rich Felker
2024-02-28 21:28 ` Max Filippov
@ 2024-02-29 16:25 ` Max Filippov
2024-02-29 18:16 ` Rich Felker
2024-03-19 15:25 ` Max Filippov
2024-03-19 16:08 ` Max Filippov
4 siblings, 1 reply; 17+ messages in thread
From: Max Filippov @ 2024-02-29 16:25 UTC (permalink / raw)
To: Rich Felker; +Cc: musl
On Wed, Feb 28, 2024 at 10:30 AM Rich Felker <dalias@libc.org> wrote:
> On Wed, Feb 28, 2024 at 09:20:33AM -0800, Max Filippov wrote:
> > > > diff --git a/ldso/dynlink.c b/ldso/dynlink.c
> > > > index ceca3c98..25563af3 100644
> > > > --- a/ldso/dynlink.c
> > > > +++ b/ldso/dynlink.c
> > > > @@ -1420,6 +1420,7 @@ static void reloc_all(struct dso *p)
> > > > if (!DL_FDPIC)
> > > > do_relr_relocs(p, laddr(p, dyn[DT_RELR]), dyn[DT_RELRSZ]);
> > > >
> > > > +#if 0
> > > > if (head != &ldso && p->relro_start != p->relro_end) {
> > > > long ret = __syscall(SYS_mprotect, laddr(p, p->relro_start),
> > > > p->relro_end-p->relro_start, PROT_READ);
> > > > @@ -1429,6 +1430,7 @@ static void reloc_all(struct dso *p)
> > > > if (runtime) longjmp(*rtld_fail, 1);
> > > > }
> > > > }
> > > > +#endif
> > >
> > > Was this breaking something? Relro linking probably should have been
> > > disabled on fdpic, and we ignore ENOSYS anyway for nommu.
> >
> > Yeah, I do some of the testing in linux-user QEMU, it has MMU
> > and mprotect calls actually succeed. Failures were coming from the
> > relocation code, but my impression was that relro should be applied
> > after the relocation completion and it should just work, hence the
> > WIP pile.
>
> Yes, relro is only supposed to be applied after all relocations were
> done.
So I've found two issues with this. First is that loadmap entries generated
by the QEMU (and by the linux kernel AFAICS) are not page-aligned,
but the relro segment addresses fetched by the loader are. Since the
laddr() doesn't check that a translation for the address it was given
was actually found the results are not always correct, mprotect fails
and it all terminates early.
With a workaround for that part in place I see that relro protection is
applied to the executable image before its __fdpic_fixup had a chance
to run, there are rofixups for the .init_array and .fini_array, but they both
are a part of the relro segment.
--
Thanks.
-- Max
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-29 16:25 ` Max Filippov
@ 2024-02-29 18:16 ` Rich Felker
0 siblings, 0 replies; 17+ messages in thread
From: Rich Felker @ 2024-02-29 18:16 UTC (permalink / raw)
To: Max Filippov; +Cc: musl
On Thu, Feb 29, 2024 at 08:25:12AM -0800, Max Filippov wrote:
> On Wed, Feb 28, 2024 at 10:30 AM Rich Felker <dalias@libc.org> wrote:
> > On Wed, Feb 28, 2024 at 09:20:33AM -0800, Max Filippov wrote:
> > > > > diff --git a/ldso/dynlink.c b/ldso/dynlink.c
> > > > > index ceca3c98..25563af3 100644
> > > > > --- a/ldso/dynlink.c
> > > > > +++ b/ldso/dynlink.c
> > > > > @@ -1420,6 +1420,7 @@ static void reloc_all(struct dso *p)
> > > > > if (!DL_FDPIC)
> > > > > do_relr_relocs(p, laddr(p, dyn[DT_RELR]), dyn[DT_RELRSZ]);
> > > > >
> > > > > +#if 0
> > > > > if (head != &ldso && p->relro_start != p->relro_end) {
> > > > > long ret = __syscall(SYS_mprotect, laddr(p, p->relro_start),
> > > > > p->relro_end-p->relro_start, PROT_READ);
> > > > > @@ -1429,6 +1430,7 @@ static void reloc_all(struct dso *p)
> > > > > if (runtime) longjmp(*rtld_fail, 1);
> > > > > }
> > > > > }
> > > > > +#endif
> > > >
> > > > Was this breaking something? Relro linking probably should have been
> > > > disabled on fdpic, and we ignore ENOSYS anyway for nommu.
> > >
> > > Yeah, I do some of the testing in linux-user QEMU, it has MMU
> > > and mprotect calls actually succeed. Failures were coming from the
> > > relocation code, but my impression was that relro should be applied
> > > after the relocation completion and it should just work, hence the
> > > WIP pile.
> >
> > Yes, relro is only supposed to be applied after all relocations were
> > done.
>
> So I've found two issues with this. First is that loadmap entries generated
> by the QEMU (and by the linux kernel AFAICS) are not page-aligned,
> but the relro segment addresses fetched by the loader are. Since the
> laddr() doesn't check that a translation for the address it was given
> was actually found the results are not always correct, mprotect fails
> and it all terminates early.
>
> With a workaround for that part in place I see that relro protection is
> applied to the executable image before its __fdpic_fixup had a chance
> to run, there are rofixups for the .init_array and .fini_array, but they both
> are a part of the relro segment.
OK, in that case, we probably should disable relro with fdpic until
it's fixed to work properly.
I think relro processing for the main executable should probably be
moved to run as the first step of __libc_start_init, so that it
happens after __fdpic_fixup has run. We also need to either make
relro_start and relro_end maintain the exact addresses and only get
expanded to full-page just before calling mprotect, or use laddr_pg to
get the containing page.
Unrelated to fdpic, there may also be an issue that relro processing
has a broken dependency on runtime-variable pagesize. In practice it's
probaby okay since the binary must have been linked for maxpagesize >=
PAGE_SIZE, but we should probably double-check if the current
alignment logic is semantically correct.
Rich
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 18:30 ` Rich Felker
` (2 preceding siblings ...)
2024-02-29 16:25 ` Max Filippov
@ 2024-03-19 15:25 ` Max Filippov
2024-03-19 16:08 ` Max Filippov
4 siblings, 0 replies; 17+ messages in thread
From: Max Filippov @ 2024-03-19 15:25 UTC (permalink / raw)
To: Rich Felker; +Cc: musl
On Wed, Feb 28, 2024 at 10:30 AM Rich Felker <dalias@libc.org> wrote:
> On Wed, Feb 28, 2024 at 09:20:33AM -0800, Max Filippov wrote:
> > On Tue, Feb 27, 2024 at 4:12 PM Rich Felker <dalias@libc.org> wrote:
> > > > diff --git a/ldso/dlstart.c b/ldso/dlstart.c
> > > > index 259f5e18..beca953f 100644
> > > > --- a/ldso/dlstart.c
> > > > +++ b/ldso/dlstart.c
> > > > @@ -90,12 +90,19 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
> > > > - segs[rel_addr[1]].p_vaddr
> > > > + syms[R_SYM(rel[1])].st_value;
> > > > rel_addr[1] = dyn[DT_PLTGOT];
> > > > + } else if (R_TYPE(rel[1]) == REL_RELATIVE) {
> > > > + size_t val = *rel_addr;
> > > > + for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
> > > > + *rel_addr += segs[j].addr - segs[j].p_vaddr;
> > >
> > > So xtensa has a "relative" reloc type that's just adjusted by the load
> > > offset of the segment the relocation lives in, rather than needing to
> > > use a symbolic relocation referencing a section symbol like other
> > > fdpic archs do?
> >
> > I was looking at the ARM BFD code while doing that and
> > my impression was that they do the same.
> > Regardless, I wonder why either relocation form might be preferable?
>
> I think the relative type here is perfectly acceptable. At first I
> thought it was weaker (less capable of representing addresses of
> objects in different segments), but looking again, I don't think
> that's the case.
I found that this treatment of the REL_RELATIVE is not consistent
with the REL_RELATIVE treatment in do_relocs(), where RELA type
relocation is expected to have an addend, and only the addend
matters, not the initial value of the field being relocated.
I just realized that looking at what ARM does might not be a good
idea for the xtensa port, as ARM uses the REL type relocations and
xtensa uses RELA.
Also do_relocs() does not use the DSO load map, so the following
change is required for it to work in the FDPIC case:
case REL_RELATIVE:
- *reloc_addr = (size_t)base + addend;
+ *reloc_addr = (size_t)laddr(dso, addend);
break;
--
Thanks.
-- Max
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [musl] Initial xtensa/fdpic port review
2024-02-28 18:30 ` Rich Felker
` (3 preceding siblings ...)
2024-03-19 15:25 ` Max Filippov
@ 2024-03-19 16:08 ` Max Filippov
4 siblings, 0 replies; 17+ messages in thread
From: Max Filippov @ 2024-03-19 16:08 UTC (permalink / raw)
To: Rich Felker; +Cc: musl
On Wed, Feb 28, 2024 at 10:30 AM Rich Felker <dalias@libc.org> wrote:
> On Wed, Feb 28, 2024 at 09:20:33AM -0800, Max Filippov wrote:
> > On Tue, Feb 27, 2024 at 4:12 PM Rich Felker <dalias@libc.org> wrote:
> > > > } else {
> > > > size_t val = syms[R_SYM(rel[1])].st_value;
> > > > for (j=0; val-segs[j].p_vaddr >= segs[j].p_memsz; j++);
> > > > *rel_addr = rel[2] + segs[j].addr - segs[j].p_vaddr + val;
> > > > }
> > > > }
> > > > +#ifdef __xtensa__
> > > > + ((unsigned long *)dyn[DT_PLTGOT])[3] = segs[0].addr - segs[0].p_vaddr;
> > > > +#endif
> > >
> > > Is this actually needed for anything? Generally musl doesn't use the
> > > reserved GOT slots itself, and on all the other archs I'm aware of,
> > > they're essentially reserved to the dynamic linker implementation so
> > > the dynamic linker is just free not to use them and not to set them
> > > up.
> >
> > xtensa doesn't have relative register jumps and calls, so local jumps
> > and calls to a far off locations need to use absolute target addresses.
> > One possible solution is to have the address in the GOT entry, the
> > other is to calculate the target address using the text segment load
> > offset at runtime. Both have the same instruction count, see
> > http://wiki.osll.ru/doku.php/etc:users:jcmvbkbc:binutils-xtensa#local_call
> > for the details, but the latter doesn't waste GOT space and that saves
> > a noticeable amount of RAM.
>
> I see. Doesn't this limit you to a single text segment, though? In
> practice it might not matter, but it's more constraining than fdpic
> was designed to be.
Instead of a fixed dedicated GOT entry there can be multiple
entries, one per independent text segment, giving the maximum of
1024 / 4 - 3 = 253 text segments addressable with this technique.
Instead of generating a fixed load from GOT + 12 there would be a
relocation against that load instruction mentioning the target symbol
and the linker would have a chance to allocate GOT space, fix up
offsets in these instructions and add dynamic relocations against
these GOT entries that would produce runtime load offsets for the
corresponding text segments.
--
Thanks.
-- Max
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2024-03-19 16:08 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-27 23:24 [musl] Initial xtensa/fdpic port review Rich Felker
2024-02-28 0:13 ` Rich Felker
2024-02-28 17:20 ` Max Filippov
2024-02-28 18:30 ` Rich Felker
2024-02-28 18:37 ` Rich Felker
2024-02-28 19:34 ` Max Filippov
2024-02-28 19:41 ` Max Filippov
2024-02-28 20:14 ` Rich Felker
2024-02-28 20:26 ` Rich Felker
2024-02-28 20:37 ` Rich Felker
2024-02-28 21:28 ` Max Filippov
2024-02-29 12:03 ` Max Filippov
2024-02-29 15:35 ` Rich Felker
2024-02-29 16:25 ` Max Filippov
2024-02-29 18:16 ` Rich Felker
2024-03-19 15:25 ` Max Filippov
2024-03-19 16:08 ` Max Filippov
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/musl/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).