Hi Rich, Regarding the N64 relocations (changes in dynlink.h): MIPS64 N64 relocations are slightly different than that of MIPS32 O32. The 64-bit r_info member of the REL/RELA contains up to three relocation types (8bits each) and two symbol table indexes (8bits and 32bits). For example (extracted from libc.so): Offset Info Type Sym. Value Sym. Name 0000000c4000 000000001203 R_MIPS_REL32 Type2: R_MIPS_64 Type3: R_MIPS_NONE In case of big-endian systems, R_TYPE (x & 0x7fffffff) and R_SYM (x >> 32) macros can be used to extract type and symbol table index. However on little-endian system, the raw r_info looks like 0x0312000000000000 and thus the required relocation information cannot be extracted using R_TYPE etc. macros. We have tested the modified R_TYPE/R_SYM/R_INFO macros for both big and little-endian. However as mentioned in the review, the change doesn't look good. Please refer to the revised dynlink.h given below. diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h index 48890b2..05cbdcc 100644 --- a/src/internal/dynlink.h +++ b/src/internal/dynlink.h @@ -11,12 +11,56 @@ typedef Elf32_Phdr Phdr; typedef Elf32_Sym Sym; #define R_TYPE(x) ((x)&255) #define R_SYM(x) ((x)>>8) +#define R_INFO ELF32_R_INFO #else typedef Elf64_Ehdr Ehdr; typedef Elf64_Phdr Phdr; typedef Elf64_Sym Sym; #define R_TYPE(x) ((x)&0x7fffffff) #define R_SYM(x) ((x)>>32) +#define R_INFO ELF64_R_INFO +#endif + +#ifdef __mips64 +#undef R_TYPE +#undef R_SYM +#undef R_INFO + +typedef union { + uint64_t r_info; + struct { + uint32_t sym1; + unsigned char sym2; + unsigned char type3; + unsigned char type2; + unsigned char type1; + } r_info_fields; +} _ELF64_MIPS_R_INFO; + +#define R_TYPE(INFO) ({ \ + _ELF64_MIPS_R_INFO info; \ + uint32_t r_type; \ + info.r_info = (INFO); \ + r_type = (uint32_t) info.r_info_fields.type1 | \ + (uint32_t) info.r_info_fields.type2 << 8 | \ + (uint32_t) info.r_info_fields.type3 << 16; \ + r_type; \ +}) + +#define R_SYM(INFO) ({ \ + _ELF64_MIPS_R_INFO info; \ + uint32_t symidx; \ + info.r_info = (INFO); \ + symidx = info.r_info_fields.sym1; \ + symidx; \ +}) + +#define R_INFO(SYM,TYPE1) ({ \ + _ELF64_MIPS_R_INFO info; \ + info.r_info_fields.sym1 = (SYM); \ + info.r_info_fields.type1 = (TYPE1); \ + info.r_info; \ +}) #endif /* These enum constants provide unmatchable default values for Regards, Jaydeep From: Mahesh Bodapati Sent: 23 February 2016 PM 12:34 To: Jaydeep Patil Subject: FW: [musl] mips n64 porting review From: Mahesh Bodapati [mailto:maheshbodapati90@gmail.com] Sent: 23 February 2016 11:56 To: Mahesh Bodapati Subject: Fwd: [musl] mips n64 porting review ---------- Forwarded message ---------- From: Rich Felker > Date: Tue, Feb 23, 2016 at 9:02 AM Subject: Re: [musl] mips n64 porting review To: musl@lists.openwall.com On Mon, Feb 22, 2016 at 10:06:02AM +0000, Mahesh Bodapati wrote: > Hi Rich, > I have attached the patch which has all the MIPS n64 porting work. I > have created mipsn64port_review remote branch on GitHub and please > have a look at > https://github.com/MaheshBodapati/musl/commits/mipsn64port_review > which has the broken down patches and the base revision on which we > have prepared patch is d150764 > > We have tested on both little endian and big endian, here are the FAILS > > Musl mips n64 libc testing on EL : > > Fails: 17 > FAIL ./src/api/main.exe [status 1] > FAIL ./src/functional/wcstol-static.exe [status 1] > FAIL ./src/functional/wcstol.exe [status 1] > FAIL ./src/math/acosh.exe [status 1] > FAIL ./src/math/asinh.exe [status 1] > FAIL ./src/math/j0.exe [status 1] > FAIL ./src/math/jn.exe [status 1] > FAIL ./src/math/jnf.exe [status 1] > FAIL ./src/math/lgamma.exe [status 1] > FAIL ./src/math/lgamma_r.exe [status 1] > FAIL ./src/math/lgammaf.exe [status 1] > FAIL ./src/math/lgammaf_r.exe [status 1] > FAIL ./src/math/sinh.exe [status 1] > FAIL ./src/math/tgamma.exe [status 1] > FAIL ./src/math/y0.exe [status 1] > FAIL ./src/math/y0f.exe [status 1] > FAIL ./src/math/ynf.exe [status 1] > > Musl mips n64 libc testing on EB : > Fails:17 > FAIL ./src/api/main.exe [status 1] > FAIL ./src/functional/wcstol-static.exe [status 1] > FAIL ./src/functional/wcstol.exe [status 1] > FAIL ./src/math/acosh.exe [status 1] > FAIL ./src/math/asinh.exe [status 1] > FAIL ./src/math/j0.exe [status 1] > FAIL ./src/math/jn.exe [status 1] > FAIL ./src/math/jnf.exe [status 1] > FAIL ./src/math/lgamma.exe [status 1] > FAIL ./src/math/lgamma_r.exe [status 1] > FAIL ./src/math/lgammaf.exe [status 1] > FAIL ./src/math/lgammaf_r.exe [status 1] > FAIL ./src/math/sinh.exe [status 1] > FAIL ./src/math/tgamma.exe [status 1] > FAIL ./src/math/y0.exe [status 1] > FAIL ./src/math/y0f.exe [status 1] > FAIL ./src/math/ynf.exe [status 1] These look fairly expected. > diff --git a/arch/mips/syscall_arch.h b/arch/mips/syscall_arch.h ^^^^^^^^^ This is changing the existing mips port (not mips64)... > index 39c0ea3..8097180 100644 > --- a/arch/mips/syscall_arch.h > +++ b/arch/mips/syscall_arch.h > @@ -54,15 +54,16 @@ static inline long __syscall2(long n, long a, long b) > register long r5 __asm__("$5") = b; > register long r7 __asm__("$7"); > register long r2 __asm__("$2"); > + long ret; > __asm__ __volatile__ ( > "addu $2,$0,%2 ; syscall" > : "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7), > "r"(r4), "r"(r5) > : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", > "$14", "$15", "$24", "$25", "hi", "lo", "memory"); > - if (r7) return -r2; > - long ret = r2; > - if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b); > + ret = r7 ? -r2 : r2; > + if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) > + __stat_fix (b); > return ret; > } > > @@ -79,10 +80,7 @@ static inline long __syscall3(long n, long a, long b, long c) > "r"(r4), "r"(r5), "r"(r6) > : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", > "$14", "$15", "$24", "$25", "hi", "lo", "memory"); > - if (r7) return -r2; > - long ret = r2; > - if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b); > - return ret; > + return r7 ? -r2 : r2; > } ...and significantly breaking it. “ok,we can keep same syscall_arch.h” > diff --git a/arch/mips64/atomic_arch.h b/arch/mips64/atomic_arch.h > new file mode 100644 > index 0000000..d90442d > --- /dev/null > +++ b/arch/mips64/atomic_arch.h > @@ -0,0 +1,221 @@ > +#define a_cas a_cas > +static inline int a_cas(volatile int *p, int t, int s) > +{ > + int dummy; > + __asm__ __volatile__( > + ".set push\n" > + ".set noreorder\n" > + " sync\n" > + "1: ll %0, %2\n" > + " bne %0, %3, 1f\n" > + " addu %1, %4, $0\n" > + " sc %1, %2\n" > + " beq %1, $0, 1b\n" > + " nop\n" > + " sync\n" > + "1: \n" > + ".set pop\n" > + : "=&r"(t), "=&r"(dummy), "+m"(*p) : "r"(t), "r"(s) : "memory" ); > + return t; > +} > [...] As noted by Bobby Bingham, you can just define the new ll/sc fragments rather than big asm blocks like were needed in old musl versions. I'll try to get the right infrastructure for the 64-bit pointer variant upstream right away so you can use it without needing more duplication in the mips64 port; right now it's just in the aarch64 dir. > diff --git a/arch/mips64/bits/float.h b/arch/mips64/bits/float.h > new file mode 100644 > index 0000000..719c790 > --- /dev/null > +++ b/arch/mips64/bits/float.h > @@ -0,0 +1,16 @@ > +#define FLT_EVAL_METHOD 0 > + > +#define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L > +#define LDBL_MIN 3.36210314311209350626267781732175260e-4932L > +#define LDBL_MAX 1.18973149535723176508575932662800702e+4932L > +#define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L > + > +#define LDBL_MANT_DIG 113 > +#define LDBL_MIN_EXP (-16381) > +#define LDBL_MAX_EXP 16384 > + > +#define LDBL_DIG 33 > +#define LDBL_MIN_10_EXP (-4931) > +#define LDBL_MAX_10_EXP 4932 > + > +#define DECIMAL_DIG 36 Is your mips64 port using IEEE quad? These values look like they're matched to IEEE quad, but I couldn't find clear answers on whether mips64 is using IEEE quad or double-double. The latter cannot be supported, so if that's what it's using, we'd need to either find a way to configure the compiler for quad or for 64-bit long double. “Szabolcs Nagy replied as below in prior mail discussions bits/float.h is wrong if mips64 uses ieee binary128 then copy aarch64 float.h otherwise use arm float.h long double is same as double on o32 and a 128-bit IEEE quad on mips64-linux > diff --git a/arch/mips64/bits/sem.h b/arch/mips64/bits/sem.h > new file mode 100644 > index 0000000..1c05a22 > --- /dev/null > +++ b/arch/mips64/bits/sem.h > @@ -0,0 +1,8 @@ > +struct semid_ds { > + struct ipc_perm sem_perm; > + time_t sem_otime; > + time_t sem_ctime; > + unsigned long sem_nsems; > + unsigned long __unused3; > + unsigned long __unused4; > +}; sem_nsems has to have type unsigned short (this is a POSIX requirement), with appropriate padding to match the kernel's wrong definition. See how it's done on other archs including 32-bit mips. The padding will need to be endian-dependent. “we can modify this similar to mips32” > diff --git a/arch/mips64/pthread_arch.h b/arch/mips64/pthread_arch.h > new file mode 100644 > index 0000000..b42edbe > --- /dev/null > +++ b/arch/mips64/pthread_arch.h > @@ -0,0 +1,18 @@ > +static inline struct pthread *__pthread_self() > +{ > +#ifdef __clang__ > + char *tp; > + __asm__ __volatile__ (".word 0x7c03e83b ; move %0, $3" : "=r" (tp) : : "$3" ); > +#else > + register char *tp __asm__("$3"); > + __asm__ __volatile__ (".word 0x7c03e83b" : "=r" (tp) ); > +#endif > + return (pthread_t)(tp - 0x7000 - sizeof(struct pthread)); > +} Not critical, but it would be nice to know if clang still needs this pessimization or if there's a way to make it use the right register directly. > diff --git a/arch/mips64/syscall_arch.h b/arch/mips64/syscall_arch.h > new file mode 100644 > index 0000000..380b3fb > --- /dev/null > +++ b/arch/mips64/syscall_arch.h > @@ -0,0 +1,205 @@ > +#define __SYSCALL_LL_E(x) \ > +((union { long long ll; long l[2]; }){ .ll = x }).l[0], \ > +((union { long long ll; long l[2]; }){ .ll = x }).l[1] > +#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x)) As Bobby Bingham noted these should both just be defined as (x) for 64-bit archs. > +#define SYSCALL_RLIM_INFINITY (-1UL/2) Is this right for mips64? > +#include > +struct kernel_stat { > + unsigned int st_dev; > + unsigned int __pad1[3]; > + unsigned long long st_ino; > + unsigned int st_mode; > + unsigned int st_nlink; > + int st_uid; > + int st_gid; > + unsigned int st_rdev; > + unsigned int __pad2[3]; > + long long st_size; > + unsigned int st_atime_sec; > + unsigned int st_atime_nsec; > + unsigned int st_mtime_sec; > + unsigned int st_mtime_nsec; > + unsigned int st_ctime_sec; > + unsigned int st_ctime_nsec; > + unsigned int st_blksize; > + unsigned int __pad3; > + unsigned long long st_blocks; > +}; > + > +static void __stat_fix(struct kernel_stat *kst, struct stat *st) > +{ > + extern void *memset(void *s, int c, size_t n); > + > + st->st_dev = kst->st_dev; > + memset (&st->st_pad1, 0, sizeof (st->st_pad1)); > + st->st_ino = kst->st_ino; > + st->st_mode = kst->st_mode; > + st->st_nlink = kst->st_nlink; > + st->st_uid = kst->st_uid; > + st->st_gid = kst->st_gid; > + st->st_rdev = kst->st_rdev; > + memset (&st->st_pad2, 0, sizeof (st->st_pad2)); > + st->st_size = kst->st_size; > + st->st_pad3 = 0; > + st->st_atim.tv_sec = kst->st_atime_sec; > + st->st_atim.tv_nsec = kst->st_atime_nsec; > + st->st_mtim.tv_sec = kst->st_mtime_sec; > + st->st_mtim.tv_nsec = kst->st_mtime_nsec; > + st->st_ctim.tv_sec = kst->st_ctime_sec; > + st->st_ctim.tv_nsec = kst->st_ctime_nsec; > + st->st_blksize = kst->st_blksize; > + st->st_blocks = kst->st_blocks; > + memset (&st->st_pad5, 0, sizeof (st->st_pad5)); > + return; > +} You've made this a lot more complicated than it needs to be. Just copying the __stat_fix code from 32-bit mips should work fine. The only fixup that needs to be done is for st_dev and st_rdev and it can be done in-place. > + > +#ifndef __clang__ > + > +static inline long __syscall0(long n) > +{ > + register long r7 __asm__("$7"); > + register long r2 __asm__("$2"); > + __asm__ __volatile__ ( > + "daddu $2,$0,%2 ; syscall" > + : "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7) > + : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", > + "$14", "$15", "$24", "$25", "hi", "lo", "memory"); > + return r7 ? -r2 : r2; > +} > + > +static inline long __syscall1(long n, long a) > +{ > + register long r4 __asm__("$4") = a; > + register long r7 __asm__("$7"); > + register long r2 __asm__("$2"); > + __asm__ __volatile__ ( > + "daddu $2,$0,%2 ; syscall" > + : "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7), > + "r"(r4) > + : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", > + "$14", "$15", "$24", "$25", "hi", "lo", "memory"); > + return r7 ? -r2 : r2; > +} > + > +static inline long __syscall2(long n, long a, long b) > +{ > + struct kernel_stat kst = {0,}; > + long ret; > + register long r4 __asm__("$4"); > + register long r5 __asm__("$5"); > + register long r7 __asm__("$7"); > + register long r2 __asm__("$2"); > + > + r5 = b; > + if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat) > + r5 = (long) &kst; > + > + r4 = a; > + __asm__ __volatile__ ( > + "daddu $2,$0,%2 ; syscall" > + : "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7), > + "r"(r4), "r"(r5) > + : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", > + "$14", "$15", "$24", "$25", "hi", "lo", "memory"); > + > + ret = r7 ? -r2 : r2; > + if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat) > + __stat_fix(&kst, (struct stat *)b); > + > + return ret; > +} See the logic on 32-bit mips. There should be an early return without performing __stat_fix in the error case (if r7 is nonzero). Otherwise you're performing fixups on uninitialized data. > diff --git a/ldso/dynlink.c b/ldso/dynlink.c > index 87f3b7f..2355d74 100644 > --- a/ldso/dynlink.c > +++ b/ldso/dynlink.c > @@ -325,8 +325,8 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri > for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) { > if (skip_relative && IS_RELATIVE(rel[1], dso->syms)) continue; > type = R_TYPE(rel[1]); > - if (type == REL_NONE) continue; > sym_index = R_SYM(rel[1]); > + if (type == REL_NONE) continue; This looks gratuitous and probably just leftover from old things you tried. ok > @@ -1134,7 +1134,7 @@ static void do_mips_relocs(struct dso *p, size_t *got) > Sym *sym = p->syms + j; > rel[0] = (unsigned char *)got - base; > for (i-=j; i; i--, sym++, rel[0]+=sizeof(size_t)) { > - rel[1] = sym-p->syms << 8 | R_MIPS_JUMP_SLOT; > + rel[1] = R_INFO(sym-p->syms, R_MIPS_JUMP_SLOT); > do_relocs(p, rel, sizeof rel, 2); > } > } Looks ok. > @@ -1365,7 +1365,7 @@ void __dls2(unsigned char *base, size_t *sp) > size_t symbolic_rel_cnt = 0; > apply_addends_to = rel; > for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t)) > - if (!IS_RELATIVE(rel[1], ldso.syms)) symbolic_rel_cnt++; > + if (!IS_RELATIVE(rel[1], ldso.syms)) symbolic_rel_cnt++; This looks like a non-functional change, but wrong (misleading indention) and probably unintentional. ok > diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h > index 48890b2..dcc1bde 100644 > --- a/src/internal/dynlink.h > +++ b/src/internal/dynlink.h > @@ -11,14 +11,49 @@ typedef Elf32_Phdr Phdr; > typedef Elf32_Sym Sym; > #define R_TYPE(x) ((x)&255) > #define R_SYM(x) ((x)>>8) > +#define R_INFO ELF32_R_INFO > #else > typedef Elf64_Ehdr Ehdr; > typedef Elf64_Phdr Phdr; > typedef Elf64_Sym Sym; > #define R_TYPE(x) ((x)&0x7fffffff) > #define R_SYM(x) ((x)>>32) > +#define R_INFO ELF64_R_INFO > +#define ELF64 1 > #endif I don't think ELF64 is used anywhere so it can be removed. > +#ifdef __mips64 > +#undef R_TYPE > +#undef R_SYM > +#undef R_INFO > +#define R_TYPE(INFO) ({ \ > + uint64_t r_info = (INFO); \ > + uint32_t *type, r_type; \ > + type = (((uint32_t*) &r_info) + 1); \ > + r_type = (uint32_t) *(((unsigned char*) type) + 3) | \ > + (uint32_t) *(((unsigned char*) type) + 2) << 8 | \ > + (uint32_t) *(((unsigned char*) type) + 1) << 16; \ > + r_type; \ > +}) > + > +#define R_SYM(INFO) ({ \ > + uint64_t r_info = (INFO); \ > + uint32_t symidx; \ > + symidx = *(((uint32_t*) &r_info) + 0); \ > + symidx; \ > +}) > + > +#define R_INFO(SYM,TYPE1) ({ \ > + uint32_t *pinfo; \ > + uint64_t r_info = 0; \ > + pinfo = ((uint32_t*) &r_info) + 1; \ > + *((uint32_t*) &r_info + 0) = (SYM); \ > + *((unsigned char*) pinfo + 3) = (TYPE1); \ > + r_info; \ > +}) > +#endif At the very least this is buggy (aliasing violations) and gratuitously ugly, and might be wrong for little-endian. Have you tested it on little? See the discussion on top of this mail. This code above implies that the values are always stored in big-endian order as (sym<<32 | type). If they're actually stored native-endian (which I would expect) then the standard Elf64 definitions just work as-is and don't need replacement. If they're always-big, tweaked versions of the standard ones that swap the byte order of their inputs or outputs would be all that's needed. #if __mips64 + for (; rel_size; rel+=3, rel_size-=3*sizeof(size_t)) { + if (ELF64_MIPS_R_TYPE (rel[1]) != REL_SYM_OR_REL) + continue; + size_t *rel_addr = (void *)(base + rel[0]); + *rel_addr = base + rel[2]; + } +#else + for (; rel_size; rel+=3, rel_size-=3*sizeof(size_t)) { + if (!IS_RELATIVE(rel[1], 0)) continue; + size_t *rel_addr = (void *)(base + rel[0]); + *rel_addr = base + rel[2]; + } For big endian ,rel[1] is 0x0000 0000 00 00 12 03 and this is equal to 4611 But for little endian , rel[1] is 0x 12 03 00 00 0000 0000 and it's not 4611 so we have checked the rel_info in glibc. Can you help me on this issue whether there is any alternative for liitle endian. Rich