Hi Rich,

Regarding the N64 relocations (changes in dynlink.h):
MIPS64 N64 relocations are slightly different than that of MIPS32 O32. The 64-bit r_info member of the REL/RELA contains up to three relocation types (8bits each) and two symbol table indexes (8bits and 32bits).

For example (extracted from libc.so):

Offset                 Info                    Type                          Sym. Value    Sym. Name
0000000c4000  000000001203 R_MIPS_REL32
                                            Type2: R_MIPS_64
                                            Type3: R_MIPS_NONE

In case of big-endian systems, R_TYPE (x & 0x7fffffff) and R_SYM (x >> 32) macros can be used to extract type and symbol table index. However on little-endian system, the raw r_info looks like 0x0312000000000000 and thus the required relocation information cannot be extracted using R_TYPE etc. macros.

We have tested the modified R_TYPE/R_SYM/R_INFO macros for both big and little-endian. However as mentioned in the review, the change doesn't look good.
Please refer to the revised dynlink.h given below.

diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h
index 48890b2..05cbdcc 100644
--- a/src/internal/dynlink.h
+++ b/src/internal/dynlink.h
@@ -11,12 +11,56 @@ typedef Elf32_Phdr Phdr;
typedef Elf32_Sym Sym;
#define R_TYPE(x) ((x)&255)
#define R_SYM(x) ((x)>>8)
+#define R_INFO ELF32_R_INFO
#else
typedef Elf64_Ehdr Ehdr;
typedef Elf64_Phdr Phdr;
typedef Elf64_Sym Sym;
#define R_TYPE(x) ((x)&0x7fffffff)
#define R_SYM(x) ((x)>>32)
+#define R_INFO ELF64_R_INFO
+#endif
+
+#ifdef __mips64
+#undef R_TYPE
+#undef R_SYM
+#undef R_INFO
+
+typedef union {
+  uint64_t r_info;
+  struct {
+    uint32_t sym1;
+    unsigned char sym2;
+    unsigned char type3;
+    unsigned char type2;
+    unsigned char type1;
+  } r_info_fields;
+} _ELF64_MIPS_R_INFO;
+
+#define R_TYPE(INFO) ({                            \
+  _ELF64_MIPS_R_INFO info;                         \
+  uint32_t r_type;                                 \
+  info.r_info = (INFO);                            \
+  r_type = (uint32_t) info.r_info_fields.type1 |   \
+    (uint32_t) info.r_info_fields.type2 << 8 |     \
+    (uint32_t) info.r_info_fields.type3 << 16;     \
+  r_type;                                          \
+})
+
+#define R_SYM(INFO) ({              \
+  _ELF64_MIPS_R_INFO info;          \
+  uint32_t symidx;                  \
+  info.r_info = (INFO);             \
+  symidx = info.r_info_fields.sym1; \
+  symidx;                           \
+})
+
+#define R_INFO(SYM,TYPE1) ({          \
+  _ELF64_MIPS_R_INFO info;            \
+  info.r_info_fields.sym1 = (SYM);    \
+  info.r_info_fields.type1 = (TYPE1); \
+  info.r_info;                        \
+})
#endif
 /* These enum constants provide unmatchable default values for

Regards,
Jaydeep

From: Mahesh Bodapati
Sent: 23 February 2016 PM 12:34
To: Jaydeep Patil
Subject: FW: [musl] mips n64 porting review

From: Mahesh Bodapati [mailto:maheshbodapati90@gmail.com]
Sent: 23 February 2016 11:56
To: Mahesh Bodapati
Subject: Fwd: [musl] mips n64 porting review


---------- Forwarded message ----------
From: Rich Felker <dalias@libc.org<mailto:dalias@libc.org>>
Date: Tue, Feb 23, 2016 at 9:02 AM
Subject: Re: [musl] mips n64 porting review
To: musl@lists.openwall.com<mailto:musl@lists.openwall.com>


On Mon, Feb 22, 2016 at 10:06:02AM +0000, Mahesh Bodapati wrote:
> Hi Rich,
> I have attached the patch which has all the MIPS n64 porting work. I
> have created mipsn64port_review remote branch on GitHub and please
> have a look at
> https://github.com/MaheshBodapati/musl/commits/mipsn64port_review
> which has the broken down patches and the base revision on which we
> have prepared patch is d150764
>
> We have tested on both little endian and big endian, here are the FAILS
>
> Musl mips n64 libc testing on EL :
>
> Fails: 17
> FAIL ./src/api/main.exe [status 1]
> FAIL ./src/functional/wcstol-static.exe [status 1]
> FAIL ./src/functional/wcstol.exe [status 1]
> FAIL ./src/math/acosh.exe [status 1]
> FAIL ./src/math/asinh.exe [status 1]
> FAIL ./src/math/j0.exe [status 1]
> FAIL ./src/math/jn.exe [status 1]
> FAIL ./src/math/jnf.exe [status 1]
> FAIL ./src/math/lgamma.exe [status 1]
> FAIL ./src/math/lgamma_r.exe [status 1]
> FAIL ./src/math/lgammaf.exe [status 1]
> FAIL ./src/math/lgammaf_r.exe [status 1]
> FAIL ./src/math/sinh.exe [status 1]
> FAIL ./src/math/tgamma.exe [status 1]
> FAIL ./src/math/y0.exe [status 1]
> FAIL ./src/math/y0f.exe [status 1]
> FAIL ./src/math/ynf.exe [status 1]
>
> Musl mips n64 libc testing on EB :
> Fails:17
> FAIL ./src/api/main.exe [status 1]
> FAIL ./src/functional/wcstol-static.exe [status 1]
> FAIL ./src/functional/wcstol.exe [status 1]
> FAIL ./src/math/acosh.exe [status 1]
> FAIL ./src/math/asinh.exe [status 1]
> FAIL ./src/math/j0.exe [status 1]
> FAIL ./src/math/jn.exe [status 1]
> FAIL ./src/math/jnf.exe [status 1]
> FAIL ./src/math/lgamma.exe [status 1]
> FAIL ./src/math/lgamma_r.exe [status 1]
> FAIL ./src/math/lgammaf.exe [status 1]
> FAIL ./src/math/lgammaf_r.exe [status 1]
> FAIL ./src/math/sinh.exe [status 1]
> FAIL ./src/math/tgamma.exe [status 1]
> FAIL ./src/math/y0.exe [status 1]
> FAIL ./src/math/y0f.exe [status 1]
> FAIL ./src/math/ynf.exe [status 1]
These look fairly expected.

> diff --git a/arch/mips/syscall_arch.h b/arch/mips/syscall_arch.h
               ^^^^^^^^^

This is changing the existing mips port (not mips64)...

> index 39c0ea3..8097180 100644
> --- a/arch/mips/syscall_arch.h
> +++ b/arch/mips/syscall_arch.h
> @@ -54,15 +54,16 @@ static inline long __syscall2(long n, long a, long b)
>       register long r5 __asm__("$5") = b;
>       register long r7 __asm__("$7");
>       register long r2 __asm__("$2");
> +     long ret;
>       __asm__ __volatile__ (
>               "addu $2,$0,%2 ; syscall"
>               : "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
>                 "r"(r4), "r"(r5)
>               : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
>                 "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> -     if (r7) return -r2;
> -     long ret = r2;
> -     if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
> +     ret = r7 ? -r2 : r2;
> +     if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64)
> +             __stat_fix (b);
>       return ret;
>  }
>
> @@ -79,10 +80,7 @@ static inline long __syscall3(long n, long a, long b, long c)
>                 "r"(r4), "r"(r5), "r"(r6)
>               : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
>                 "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> -     if (r7) return -r2;
> -     long ret = r2;
> -     if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
> -     return ret;
> +     return r7 ? -r2 : r2;
>  }

...and significantly breaking it.


“ok,we can keep  same syscall_arch.h”


> diff --git a/arch/mips64/atomic_arch.h b/arch/mips64/atomic_arch.h
> new file mode 100644
> index 0000000..d90442d
> --- /dev/null
> +++ b/arch/mips64/atomic_arch.h
> @@ -0,0 +1,221 @@
> +#define a_cas a_cas
> +static inline int a_cas(volatile int *p, int t, int s)
> +{
> +     int dummy;
> +     __asm__ __volatile__(
> +             ".set push\n"
> +             ".set noreorder\n"
> +             "       sync\n"
> +             "1:     ll %0, %2\n"
> +             "       bne %0, %3, 1f\n"
> +             "       addu %1, %4, $0\n"
> +             "       sc %1, %2\n"
> +             "       beq %1, $0, 1b\n"
> +             "       nop\n"
> +             "       sync\n"
> +             "1:     \n"
> +             ".set pop\n"
> +             : "=&r"(t), "=&r"(dummy), "+m"(*p) : "r"(t), "r"(s) : "memory" );
> +        return t;
> +}
> [...]

As noted by Bobby Bingham, you can just define the new ll/sc fragments
rather than big asm blocks like were needed in old musl versions. I'll
try to get the right infrastructure for the 64-bit pointer variant
upstream right away so you can use it without needing more duplication
in the mips64 port; right now it's just in the aarch64 dir.

> diff --git a/arch/mips64/bits/float.h b/arch/mips64/bits/float.h
> new file mode 100644
> index 0000000..719c790
> --- /dev/null
> +++ b/arch/mips64/bits/float.h
> @@ -0,0 +1,16 @@
> +#define FLT_EVAL_METHOD 0
> +
> +#define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L
> +#define LDBL_MIN 3.36210314311209350626267781732175260e-4932L
> +#define LDBL_MAX 1.18973149535723176508575932662800702e+4932L
> +#define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L
> +
> +#define LDBL_MANT_DIG 113
> +#define LDBL_MIN_EXP (-16381)
> +#define LDBL_MAX_EXP 16384
> +
> +#define LDBL_DIG 33
> +#define LDBL_MIN_10_EXP (-4931)
> +#define LDBL_MAX_10_EXP 4932
> +
> +#define DECIMAL_DIG 36

Is your mips64 port using IEEE quad? These values look like they're
matched to IEEE quad, but I couldn't find clear answers on whether
mips64 is using IEEE quad or double-double. The latter cannot be
supported, so if that's what it's using, we'd need to either find a
way to configure the compiler for quad or for 64-bit long double.


“Szabolcs Nagy replied as below in prior mail discussions

bits/float.h is wrong

if mips64 uses ieee binary128 then copy aarch64 float.h otherwise use arm float.h



long double is same as double on o32 and a 128-bit IEEE quad on mips64-linux



> diff --git a/arch/mips64/bits/sem.h b/arch/mips64/bits/sem.h
> new file mode 100644
> index 0000000..1c05a22
> --- /dev/null
> +++ b/arch/mips64/bits/sem.h
> @@ -0,0 +1,8 @@
> +struct semid_ds {
> +     struct ipc_perm sem_perm;
> +     time_t sem_otime;
> +     time_t sem_ctime;
> +     unsigned long sem_nsems;
> +     unsigned long  __unused3;
> +     unsigned long __unused4;
> +};

sem_nsems has to have type unsigned short (this is a POSIX
requirement), with appropriate padding to match the kernel's wrong
definition. See how it's done on other archs including 32-bit mips.
The padding will need to be endian-dependent.

“we can modify this similar to mips32”


> diff --git a/arch/mips64/pthread_arch.h b/arch/mips64/pthread_arch.h
> new file mode 100644
> index 0000000..b42edbe
> --- /dev/null
> +++ b/arch/mips64/pthread_arch.h
> @@ -0,0 +1,18 @@
> +static inline struct pthread *__pthread_self()
> +{
> +#ifdef __clang__
> +     char *tp;
> +     __asm__ __volatile__ (".word 0x7c03e83b ; move %0, $3" : "=r" (tp) : : "$3" );
> +#else
> +     register char *tp __asm__("$3");
> +     __asm__ __volatile__ (".word 0x7c03e83b" : "=r" (tp) );
> +#endif
> +     return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
> +}

Not critical, but it would be nice to know if clang still needs this
pessimization or if there's a way to make it use the right register
directly.

> diff --git a/arch/mips64/syscall_arch.h b/arch/mips64/syscall_arch.h
> new file mode 100644
> index 0000000..380b3fb
> --- /dev/null
> +++ b/arch/mips64/syscall_arch.h
> @@ -0,0 +1,205 @@
> +#define __SYSCALL_LL_E(x) \
> +((union { long long ll; long l[2]; }){ .ll = x }).l[0], \
> +((union { long long ll; long l[2]; }){ .ll = x }).l[1]
> +#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))

As Bobby Bingham noted these should both just be defined as (x) for
64-bit archs.

> +#define SYSCALL_RLIM_INFINITY (-1UL/2)

Is this right for mips64?

> +#include <sys/stat.h>
> +struct kernel_stat {
> +    unsigned int st_dev;
> +    unsigned int __pad1[3];
> +    unsigned long long st_ino;
> +    unsigned int st_mode;
> +    unsigned int st_nlink;
> +    int st_uid;
> +    int st_gid;
> +    unsigned int st_rdev;
> +    unsigned int __pad2[3];
> +    long long st_size;
> +    unsigned int st_atime_sec;
> +    unsigned int st_atime_nsec;
> +    unsigned int st_mtime_sec;
> +    unsigned int st_mtime_nsec;
> +    unsigned int st_ctime_sec;
> +    unsigned int st_ctime_nsec;
> +    unsigned int st_blksize;
> +    unsigned int __pad3;
> +    unsigned long long st_blocks;
> +};
> +
> +static void __stat_fix(struct kernel_stat *kst, struct stat *st)
> +{
> +     extern void *memset(void *s, int c, size_t n);
> +
> +     st->st_dev = kst->st_dev;
> +     memset (&st->st_pad1, 0, sizeof (st->st_pad1));
> +     st->st_ino = kst->st_ino;
> +     st->st_mode = kst->st_mode;
> +     st->st_nlink = kst->st_nlink;
> +     st->st_uid = kst->st_uid;
> +     st->st_gid = kst->st_gid;
> +     st->st_rdev = kst->st_rdev;
> +     memset (&st->st_pad2, 0, sizeof (st->st_pad2));
> +     st->st_size = kst->st_size;
> +     st->st_pad3 = 0;
> +     st->st_atim.tv_sec = kst->st_atime_sec;
> +     st->st_atim.tv_nsec = kst->st_atime_nsec;
> +     st->st_mtim.tv_sec = kst->st_mtime_sec;
> +     st->st_mtim.tv_nsec = kst->st_mtime_nsec;
> +     st->st_ctim.tv_sec = kst->st_ctime_sec;
> +     st->st_ctim.tv_nsec = kst->st_ctime_nsec;
> +     st->st_blksize = kst->st_blksize;
> +     st->st_blocks = kst->st_blocks;
> +     memset (&st->st_pad5, 0, sizeof (st->st_pad5));
> +     return;
> +}

You've made this a lot more complicated than it needs to be. Just
copying the __stat_fix code from 32-bit mips should work fine. The
only fixup that needs to be done is for st_dev and st_rdev and it can
be done in-place.

> +
> +#ifndef __clang__
> +
> +static inline long __syscall0(long n)
> +{
> +     register long r7 __asm__("$7");
> +     register long r2 __asm__("$2");
> +     __asm__ __volatile__ (
> +             "daddu $2,$0,%2 ; syscall"
> +             : "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7)
> +             : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
> +               "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> +     return r7 ? -r2 : r2;
> +}
> +
> +static inline long __syscall1(long n, long a)
> +{
> +     register long r4 __asm__("$4") = a;
> +     register long r7 __asm__("$7");
> +     register long r2 __asm__("$2");
> +     __asm__ __volatile__ (
> +             "daddu $2,$0,%2 ; syscall"
> +             : "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
> +               "r"(r4)
> +             : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
> +               "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> +     return r7 ? -r2 : r2;
> +}
> +
> +static inline long __syscall2(long n, long a, long b)
> +{
> +     struct kernel_stat kst = {0,};
> +     long ret;
> +     register long r4 __asm__("$4");
> +     register long r5 __asm__("$5");
> +     register long r7 __asm__("$7");
> +     register long r2 __asm__("$2");
> +
> +     r5 = b;
> +     if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
> +             r5 = (long) &kst;
> +
> +     r4 = a;
> +     __asm__ __volatile__ (
> +             "daddu $2,$0,%2 ; syscall"
> +             : "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
> +               "r"(r4), "r"(r5)
> +             : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
> +               "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> +
> +     ret = r7 ? -r2 : r2;
> +     if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
> +             __stat_fix(&kst, (struct stat *)b);
> +
> +     return ret;
> +}

See the logic on 32-bit mips. There should be an early return without
performing __stat_fix in the error case (if r7 is nonzero). Otherwise
you're performing fixups on uninitialized data.

> diff --git a/ldso/dynlink.c b/ldso/dynlink.c
> index 87f3b7f..2355d74 100644
> --- a/ldso/dynlink.c
> +++ b/ldso/dynlink.c
> @@ -325,8 +325,8 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
>       for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
>               if (skip_relative && IS_RELATIVE(rel[1], dso->syms)) continue;
>               type = R_TYPE(rel[1]);
> -             if (type == REL_NONE) continue;
>               sym_index = R_SYM(rel[1]);
> +             if (type == REL_NONE) continue;

This looks gratuitous and probably just leftover from old things you tried.


ok


> @@ -1134,7 +1134,7 @@ static void do_mips_relocs(struct dso *p, size_t *got)
>       Sym *sym = p->syms + j;
>       rel[0] = (unsigned char *)got - base;
>       for (i-=j; i; i--, sym++, rel[0]+=sizeof(size_t)) {
> -             rel[1] = sym-p->syms << 8 | R_MIPS_JUMP_SLOT;
> +             rel[1] = R_INFO(sym-p->syms, R_MIPS_JUMP_SLOT);
>               do_relocs(p, rel, sizeof rel, 2);
>       }
>  }

Looks ok.

> @@ -1365,7 +1365,7 @@ void __dls2(unsigned char *base, size_t *sp)
>       size_t symbolic_rel_cnt = 0;
>       apply_addends_to = rel;
>       for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t))
> -             if (!IS_RELATIVE(rel[1], ldso.syms)) symbolic_rel_cnt++;
> +     if (!IS_RELATIVE(rel[1], ldso.syms)) symbolic_rel_cnt++;

This looks like a non-functional change, but wrong (misleading
indention) and probably unintentional.

ok


> diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h
> index 48890b2..dcc1bde 100644
> --- a/src/internal/dynlink.h
> +++ b/src/internal/dynlink.h
> @@ -11,14 +11,49 @@ typedef Elf32_Phdr Phdr;
>  typedef Elf32_Sym Sym;
>  #define R_TYPE(x) ((x)&255)
>  #define R_SYM(x) ((x)>>8)
> +#define R_INFO       ELF32_R_INFO
>  #else
>  typedef Elf64_Ehdr Ehdr;
>  typedef Elf64_Phdr Phdr;
>  typedef Elf64_Sym Sym;
>  #define R_TYPE(x) ((x)&0x7fffffff)
>  #define R_SYM(x) ((x)>>32)
> +#define R_INFO       ELF64_R_INFO
> +#define ELF64 1
>  #endif

I don't think ELF64 is used anywhere so it can be removed.

> +#ifdef __mips64
> +#undef R_TYPE
> +#undef R_SYM
> +#undef R_INFO
> +#define R_TYPE(INFO) ({                                                                              \
> +     uint64_t r_info = (INFO);                                                               \
> +     uint32_t *type, r_type;                                                                 \
> +     type = (((uint32_t*) &r_info) + 1);                                             \
> +     r_type = (uint32_t) *(((unsigned char*) type) + 3) |    \
> +     (uint32_t) *(((unsigned char*) type) + 2) << 8 |                \
> +     (uint32_t) *(((unsigned char*) type) + 1) << 16;                \
> +     r_type;                                                                                                 \
> +})
> +
> +#define R_SYM(INFO) ({                                               \
> +     uint64_t r_info = (INFO);                               \
> +     uint32_t symidx;                                                \
> +     symidx = *(((uint32_t*) &r_info) + 0);  \
> +     symidx;                                                                 \
> +})
> +
> +#define R_INFO(SYM,TYPE1) ({                                 \
> +     uint32_t *pinfo;                                                        \
> +     uint64_t r_info = 0;                                            \
> +     pinfo = ((uint32_t*) &r_info) + 1;                      \
> +     *((uint32_t*) &r_info + 0) = (SYM);                     \
> +     *((unsigned char*) pinfo + 3) = (TYPE1);        \
> +     r_info;                                                                         \
> +})
> +#endif

At the very least this is buggy (aliasing violations) and gratuitously
ugly, and might be wrong for little-endian. Have you tested it on
little?


See the discussion on top of this mail.


This code above implies that the values are always stored in
big-endian order as (sym<<32 | type). If they're actually stored
native-endian (which I would expect) then the standard Elf64
definitions just work as-is and don't need replacement. If they're
always-big, tweaked versions of the standard ones that swap the byte
order of their inputs or outputs would be all that's needed.




#if __mips64

+        for (; rel_size; rel+=3, rel_size-=3*sizeof(size_t)) {

+                if (ELF64_MIPS_R_TYPE (rel[1]) != REL_SYM_OR_REL)

+                continue;

+                size_t *rel_addr = (void *)(base + rel[0]);

+                *rel_addr = base + rel[2];

+        }

+#else

+        for (; rel_size; rel+=3, rel_size-=3*sizeof(size_t)) {

+                if (!IS_RELATIVE(rel[1], 0)) continue;

+                size_t *rel_addr = (void *)(base + rel[0]);

+                *rel_addr = base + rel[2];

+        }



For big endian ,rel[1] is 0x0000 0000 00 00 12 03   and this is equal to 4611

But for little endian , rel[1] is 0x 12 03 00 00 0000 0000 and it's not 4611 so we have checked the rel_info in glibc.

Can you help me on this issue whether there is any alternative for liitle endian.


Rich