mailing list of musl libc
 help / color / mirror / code / Atom feed
* [PATCH] Fix TLS layout of TLS variant I when there is a gap above TP
@ 2018-06-01 23:52 Szabolcs Nagy
  2018-06-02  2:59 ` Rich Felker
  2018-06-04  7:27 ` Phillip Berndt
  0 siblings, 2 replies; 7+ messages in thread
From: Szabolcs Nagy @ 2018-06-01 23:52 UTC (permalink / raw)
  To: musl

In TLS variant I the TLS is above TP (or above a fixed offset from TP)
but on some targets there is a reserved gap above TP before TLS starts.

This matters for the local-exec tls access model when the offsets of
TLS variables from the TP are hard coded by the linker into the
executable, so the libc must compute these offsets the same way as the
linker.  The tls offset of the main module has to be

	alignup(GAP_ABOVE_TP, main_tls_align).

If there is no TLS in the main module then the gap can be ignored
since musl does not use it and the tls access models of shared
libraries are not affected.

The previous setup only worked if (tls_align & -GAP_ABOVE_TP) == 0
(i.e. TLS did not require large alignment) because the gap was
treated as a fixed offset from TP.  Now the TP points at the end
of the pthread struct (which is aligned) and there is a gap above
it (which may also need alignment).

The fix required changing TP_ADJ and __pthread_self on affected
targets (aarch64, arm and sh) and in the tlsdesc asm the offset to
access the dtv changed too.
---
passed my simple local-exec tests.

 arch/aarch64/pthread_arch.h   |  5 +++--
 arch/aarch64/reloc.h          |  2 +-
 arch/arm/pthread_arch.h       |  7 ++++---
 arch/arm/reloc.h              |  2 +-
 arch/mips/pthread_arch.h      |  1 +
 arch/mips64/pthread_arch.h    |  1 +
 arch/mipsn32/pthread_arch.h   |  1 +
 arch/or1k/pthread_arch.h      |  1 +
 arch/powerpc/pthread_arch.h   |  1 +
 arch/powerpc64/pthread_arch.h |  1 +
 arch/sh/pthread_arch.h        |  5 +++--
 arch/sh/reloc.h               |  2 +-
 ldso/dynlink.c                |  5 +++--
 src/env/__init_tls.c          | 10 ++++++++--
 src/ldso/aarch64/tlsdesc.s    |  5 ++---
 15 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/arch/aarch64/pthread_arch.h b/arch/aarch64/pthread_arch.h
index b2e2d8f1..e8499d8e 100644
--- a/arch/aarch64/pthread_arch.h
+++ b/arch/aarch64/pthread_arch.h
@@ -2,10 +2,11 @@ static inline struct pthread *__pthread_self()
 {
 	char *self;
 	__asm__ __volatile__ ("mrs %0,tpidr_el0" : "=r"(self));
-	return (void*)(self + 16 - sizeof(struct pthread));
+	return (void*)(self - sizeof(struct pthread));
 }
 
 #define TLS_ABOVE_TP
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 16)
+#define GAP_ABOVE_TP 16
+#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC pc
diff --git a/arch/aarch64/reloc.h b/arch/aarch64/reloc.h
index 51b66e23..40cf0b28 100644
--- a/arch/aarch64/reloc.h
+++ b/arch/aarch64/reloc.h
@@ -10,7 +10,7 @@
 
 #define NO_LEGACY_INITFINI
 
-#define TPOFF_K 16
+#define TPOFF_K 0
 
 #define REL_SYMBOLIC    R_AARCH64_ABS64
 #define REL_GOT         R_AARCH64_GLOB_DAT
diff --git a/arch/arm/pthread_arch.h b/arch/arm/pthread_arch.h
index 6657e198..8f2ae8f8 100644
--- a/arch/arm/pthread_arch.h
+++ b/arch/arm/pthread_arch.h
@@ -5,7 +5,7 @@ static inline pthread_t __pthread_self()
 {
 	char *p;
 	__asm__ __volatile__ ( "mrc p15,0,%0,c13,c0,3" : "=r"(p) );
-	return (void *)(p+8-sizeof(struct pthread));
+	return (void *)(p-sizeof(struct pthread));
 }
 
 #else
@@ -21,12 +21,13 @@ static inline pthread_t __pthread_self()
 	extern uintptr_t __attribute__((__visibility__("hidden"))) __a_gettp_ptr;
 	register uintptr_t p __asm__("r0");
 	__asm__ __volatile__ ( BLX " %1" : "=r"(p) : "r"(__a_gettp_ptr) : "cc", "lr" );
-	return (void *)(p+8-sizeof(struct pthread));
+	return (void *)(p-sizeof(struct pthread));
 }
 
 #endif
 
 #define TLS_ABOVE_TP
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)
+#define GAP_ABOVE_TP 8
+#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC arm_pc
diff --git a/arch/arm/reloc.h b/arch/arm/reloc.h
index b175711d..4b00bf64 100644
--- a/arch/arm/reloc.h
+++ b/arch/arm/reloc.h
@@ -16,7 +16,7 @@
 
 #define NO_LEGACY_INITFINI
 
-#define TPOFF_K 8
+#define TPOFF_K 0
 
 #define REL_SYMBOLIC    R_ARM_ABS32
 #define REL_GOT         R_ARM_GLOB_DAT
diff --git a/arch/mips/pthread_arch.h b/arch/mips/pthread_arch.h
index e5812655..5fea15ad 100644
--- a/arch/mips/pthread_arch.h
+++ b/arch/mips/pthread_arch.h
@@ -11,6 +11,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000
diff --git a/arch/mips64/pthread_arch.h b/arch/mips64/pthread_arch.h
index e5812655..5fea15ad 100644
--- a/arch/mips64/pthread_arch.h
+++ b/arch/mips64/pthread_arch.h
@@ -11,6 +11,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000
diff --git a/arch/mipsn32/pthread_arch.h b/arch/mipsn32/pthread_arch.h
index e5812655..5fea15ad 100644
--- a/arch/mipsn32/pthread_arch.h
+++ b/arch/mipsn32/pthread_arch.h
@@ -11,6 +11,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000
diff --git a/arch/or1k/pthread_arch.h b/arch/or1k/pthread_arch.h
index 7decd769..521b9c53 100644
--- a/arch/or1k/pthread_arch.h
+++ b/arch/or1k/pthread_arch.h
@@ -12,6 +12,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC regs.pc
diff --git a/arch/powerpc/pthread_arch.h b/arch/powerpc/pthread_arch.h
index 7c5c4fad..79e5a09f 100644
--- a/arch/powerpc/pthread_arch.h
+++ b/arch/powerpc/pthread_arch.h
@@ -11,6 +11,7 @@ static inline struct pthread *__pthread_self()
 }
                         
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000
diff --git a/arch/powerpc64/pthread_arch.h b/arch/powerpc64/pthread_arch.h
index 2f976fe2..37b75e29 100644
--- a/arch/powerpc64/pthread_arch.h
+++ b/arch/powerpc64/pthread_arch.h
@@ -6,6 +6,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000
diff --git a/arch/sh/pthread_arch.h b/arch/sh/pthread_arch.h
index 2756e7ec..41fefacf 100644
--- a/arch/sh/pthread_arch.h
+++ b/arch/sh/pthread_arch.h
@@ -2,10 +2,11 @@ static inline struct pthread *__pthread_self()
 {
 	char *self;
 	__asm__ __volatile__ ("stc gbr,%0" : "=r" (self) );
-	return (struct pthread *) (self + 8 - sizeof(struct pthread));
+	return (struct pthread *) (self - sizeof(struct pthread));
 }
 
 #define TLS_ABOVE_TP
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)
+#define GAP_ABOVE_TP 8
+#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC sc_pc
diff --git a/arch/sh/reloc.h b/arch/sh/reloc.h
index 0238ce07..a1f16cb1 100644
--- a/arch/sh/reloc.h
+++ b/arch/sh/reloc.h
@@ -20,7 +20,7 @@
 
 #define LDSO_ARCH "sh" ENDIAN_SUFFIX FP_SUFFIX ABI_SUFFIX
 
-#define TPOFF_K 8
+#define TPOFF_K 0
 
 #define REL_SYMBOLIC    R_SH_DIR32
 #define REL_OFFSET      R_SH_REL32
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index cea5f452..c6216b7c 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -1594,8 +1594,9 @@ _Noreturn void __dls3(size_t *sp)
 		libc.tls_head = tls_tail = &app.tls;
 		app.tls_id = tls_cnt = 1;
 #ifdef TLS_ABOVE_TP
-		app.tls.offset = 0;
-		tls_offset = app.tls.size
+		app.tls.offset = GAP_ABOVE_TP;
+		app.tls.offset += -GAP_ABOVE_TP & (app.tls.align-1);
+		tls_offset = app.tls.offset + app.tls.size
 			+ ( -((uintptr_t)app.tls.image + app.tls.size)
 			& (app.tls.align-1) );
 #else
diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c
index 1c5d98a0..31d324a8 100644
--- a/src/env/__init_tls.c
+++ b/src/env/__init_tls.c
@@ -104,13 +104,19 @@ static void static_init_tls(size_t *aux)
 
 	main_tls.size += (-main_tls.size - (uintptr_t)main_tls.image)
 		& (main_tls.align-1);
-	if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN;
-#ifndef TLS_ABOVE_TP
+#ifdef TLS_ABOVE_TP
+	main_tls.offset = GAP_ABOVE_TP;
+	main_tls.offset += -GAP_ABOVE_TP & (main_tls.align-1);
+#else
 	main_tls.offset = main_tls.size;
 #endif
+	if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN;
 
 	libc.tls_align = main_tls.align;
 	libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread)
+#ifdef TLS_ABOVE_TP
+		+ main_tls.offset
+#endif
 		+ main_tls.size + main_tls.align
 		+ MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN;
 
diff --git a/src/ldso/aarch64/tlsdesc.s b/src/ldso/aarch64/tlsdesc.s
index 8ed5c267..8e4004d7 100644
--- a/src/ldso/aarch64/tlsdesc.s
+++ b/src/ldso/aarch64/tlsdesc.s
@@ -14,7 +14,7 @@ __tlsdesc_static:
 // size_t __tlsdesc_dynamic(size_t *a)
 // {
 // 	struct {size_t modidx,off;} *p = (void*)a[1];
-// 	size_t *dtv = *(size_t**)(tp + 16 - 8);
+// 	size_t *dtv = *(size_t**)(tp - 8);
 // 	if (p->modidx <= dtv[0])
 // 		return dtv[p->modidx] + p->off - tp;
 // 	return __tls_get_new(p) - tp;
@@ -28,8 +28,7 @@ __tlsdesc_dynamic:
 	mrs x1,tpidr_el0      // tp
 	ldr x0,[x0,#8]        // p
 	ldr x2,[x0]           // p->modidx
-	add x3,x1,#8
-	ldr x3,[x3]           // dtv
+	ldr x3,[x1,#-8]       // dtv
 	ldr x4,[x3]           // dtv[0]
 	cmp x2,x4
 	b.hi 1f
-- 
2.16.3



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fix TLS layout of TLS variant I when there is a gap above TP
  2018-06-01 23:52 [PATCH] Fix TLS layout of TLS variant I when there is a gap above TP Szabolcs Nagy
@ 2018-06-02  2:59 ` Rich Felker
  2018-06-02  7:15   ` Szabolcs Nagy
  2018-06-04  7:27 ` Phillip Berndt
  1 sibling, 1 reply; 7+ messages in thread
From: Rich Felker @ 2018-06-02  2:59 UTC (permalink / raw)
  To: musl

On Sat, Jun 02, 2018 at 01:52:01AM +0200, Szabolcs Nagy wrote:
> In TLS variant I the TLS is above TP (or above a fixed offset from TP)
> but on some targets there is a reserved gap above TP before TLS starts.
> 
> This matters for the local-exec tls access model when the offsets of
> TLS variables from the TP are hard coded by the linker into the
> executable, so the libc must compute these offsets the same way as the
> linker.  The tls offset of the main module has to be
> 
> 	alignup(GAP_ABOVE_TP, main_tls_align).
> 
> If there is no TLS in the main module then the gap can be ignored
> since musl does not use it and the tls access models of shared
> libraries are not affected.
> 
> The previous setup only worked if (tls_align & -GAP_ABOVE_TP) == 0
> (i.e. TLS did not require large alignment) because the gap was
> treated as a fixed offset from TP.  Now the TP points at the end
> of the pthread struct (which is aligned) and there is a gap above
> it (which may also need alignment).
> 
> The fix required changing TP_ADJ and __pthread_self on affected
> targets (aarch64, arm and sh) and in the tlsdesc asm the offset to
> access the dtv changed too.
> ---

On first glance it all looks right. I'll read in more detail soon.
Thanks!

> passed my simple local-exec tests.

Did you test all archs or just some? I think we should at least run
libc-test (if it sufficiently tests TLS) on the affected archs to make
sure there are no regressions.

Rich


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fix TLS layout of TLS variant I when there is a gap above TP
  2018-06-02  2:59 ` Rich Felker
@ 2018-06-02  7:15   ` Szabolcs Nagy
  2018-06-03  1:11     ` Rich Felker
  0 siblings, 1 reply; 7+ messages in thread
From: Szabolcs Nagy @ 2018-06-02  7:15 UTC (permalink / raw)
  To: musl

* Rich Felker <dalias@libc.org> [2018-06-01 22:59:11 -0400]:
> On Sat, Jun 02, 2018 at 01:52:01AM +0200, Szabolcs Nagy wrote:
> > In TLS variant I the TLS is above TP (or above a fixed offset from TP)
> > but on some targets there is a reserved gap above TP before TLS starts.
> > 
> > This matters for the local-exec tls access model when the offsets of
> > TLS variables from the TP are hard coded by the linker into the
> > executable, so the libc must compute these offsets the same way as the
> > linker.  The tls offset of the main module has to be
> > 
> > 	alignup(GAP_ABOVE_TP, main_tls_align).
> > 
> > If there is no TLS in the main module then the gap can be ignored
> > since musl does not use it and the tls access models of shared
> > libraries are not affected.
> > 
> > The previous setup only worked if (tls_align & -GAP_ABOVE_TP) == 0
> > (i.e. TLS did not require large alignment) because the gap was
> > treated as a fixed offset from TP.  Now the TP points at the end
> > of the pthread struct (which is aligned) and there is a gap above
> > it (which may also need alignment).
> > 
> > The fix required changing TP_ADJ and __pthread_self on affected
> > targets (aarch64, arm and sh) and in the tlsdesc asm the offset to
> > access the dtv changed too.
> > ---
> 
> On first glance it all looks right. I'll read in more detail soon.
> Thanks!
> 
> > passed my simple local-exec tests.
> 
> Did you test all archs or just some? I think we should at least run
> libc-test (if it sufficiently tests TLS) on the affected archs to make
> sure there are no regressions.
> 

ran libc-test on various targets via qemu-user, i didnt see any regressions.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fix TLS layout of TLS variant I when there is a gap above TP
  2018-06-02  7:15   ` Szabolcs Nagy
@ 2018-06-03  1:11     ` Rich Felker
  0 siblings, 0 replies; 7+ messages in thread
From: Rich Felker @ 2018-06-03  1:11 UTC (permalink / raw)
  To: musl

On Sat, Jun 02, 2018 at 09:15:23AM +0200, Szabolcs Nagy wrote:
> * Rich Felker <dalias@libc.org> [2018-06-01 22:59:11 -0400]:
> > On Sat, Jun 02, 2018 at 01:52:01AM +0200, Szabolcs Nagy wrote:
> > > In TLS variant I the TLS is above TP (or above a fixed offset from TP)
> > > but on some targets there is a reserved gap above TP before TLS starts.
> > > 
> > > This matters for the local-exec tls access model when the offsets of
> > > TLS variables from the TP are hard coded by the linker into the
> > > executable, so the libc must compute these offsets the same way as the
> > > linker.  The tls offset of the main module has to be
> > > 
> > > 	alignup(GAP_ABOVE_TP, main_tls_align).
> > > 
> > > If there is no TLS in the main module then the gap can be ignored
> > > since musl does not use it and the tls access models of shared
> > > libraries are not affected.
> > > 
> > > The previous setup only worked if (tls_align & -GAP_ABOVE_TP) == 0
> > > (i.e. TLS did not require large alignment) because the gap was
> > > treated as a fixed offset from TP.  Now the TP points at the end
> > > of the pthread struct (which is aligned) and there is a gap above
> > > it (which may also need alignment).
> > > 
> > > The fix required changing TP_ADJ and __pthread_self on affected
> > > targets (aarch64, arm and sh) and in the tlsdesc asm the offset to
> > > access the dtv changed too.
> > > ---
> > 
> > On first glance it all looks right. I'll read in more detail soon.
> > Thanks!
> > 
> > > passed my simple local-exec tests.
> > 
> > Did you test all archs or just some? I think we should at least run
> > libc-test (if it sufficiently tests TLS) on the affected archs to make
> > sure there are no regressions.
> > 
> 
> ran libc-test on various targets via qemu-user, i didnt see any regressions.

I just setup libc-test to run under qemu-system-sh4, and confirmed
failure before the patch, success with the patch applied. I don't have
any other qemu-system environments setup, but success here looks like
a good sign!

Rich


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fix TLS layout of TLS variant I when there is a gap above TP
  2018-06-01 23:52 [PATCH] Fix TLS layout of TLS variant I when there is a gap above TP Szabolcs Nagy
  2018-06-02  2:59 ` Rich Felker
@ 2018-06-04  7:27 ` Phillip Berndt
  2018-06-11 19:02   ` Phillip Berndt
  1 sibling, 1 reply; 7+ messages in thread
From: Phillip Berndt @ 2018-06-04  7:27 UTC (permalink / raw)
  To: musl

2018-06-02 1:52 GMT+02:00 Szabolcs Nagy <nsz@port70.net>:
> The fix required changing TP_ADJ and __pthread_self on affected
> targets (aarch64, arm and sh) and in the tlsdesc asm the offset to
> access the dtv changed too.
> ---
> passed my simple local-exec tests.


Confirmed working on real aarch64, too. Awesome work, thanks a lot!

(The issue with symbols across different shared objects reusing the
same TLS addresses turned out to be on my end: I forgot to compile
these tests as -fPIC..)

- Phillip


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fix TLS layout of TLS variant I when there is a gap above TP
  2018-06-04  7:27 ` Phillip Berndt
@ 2018-06-11 19:02   ` Phillip Berndt
  2018-06-11 19:14     ` Rich Felker
  0 siblings, 1 reply; 7+ messages in thread
From: Phillip Berndt @ 2018-06-11 19:02 UTC (permalink / raw)
  To: musl

2018-06-04 9:27 GMT+02:00 Phillip Berndt <phillip.berndt@googlemail.com>:
> 2018-06-02 1:52 GMT+02:00 Szabolcs Nagy <nsz@port70.net>:
>> The fix required changing TP_ADJ and __pthread_self on affected
>> targets (aarch64, arm and sh) and in the tlsdesc asm the offset to
>> access the dtv changed too.
>> ---
>> passed my simple local-exec tests.
>
>
> Confirmed working on real aarch64, too. Awesome work, thanks a lot!
>

Got an opportunity to test this on real 32-bit arm hardware, fixes the
issue there for me as well.

Any news on the review & merge of the patch?

Best,
Phillip


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fix TLS layout of TLS variant I when there is a gap above TP
  2018-06-11 19:02   ` Phillip Berndt
@ 2018-06-11 19:14     ` Rich Felker
  0 siblings, 0 replies; 7+ messages in thread
From: Rich Felker @ 2018-06-11 19:14 UTC (permalink / raw)
  To: musl

On Mon, Jun 11, 2018 at 09:02:51PM +0200, Phillip Berndt wrote:
> 2018-06-04 9:27 GMT+02:00 Phillip Berndt <phillip.berndt@googlemail.com>:
> > 2018-06-02 1:52 GMT+02:00 Szabolcs Nagy <nsz@port70.net>:
> >> The fix required changing TP_ADJ and __pthread_self on affected
> >> targets (aarch64, arm and sh) and in the tlsdesc asm the offset to
> >> access the dtv changed too.
> >> ---
> >> passed my simple local-exec tests.
> >
> >
> > Confirmed working on real aarch64, too. Awesome work, thanks a lot!
> >
> 
> Got an opportunity to test this on real 32-bit arm hardware, fixes the
> issue there for me as well.
> 
> Any news on the review & merge of the patch?

Yes, I think it's fine and I've got it queued with a [much longer than
I realized! sorry!] batch of commits to push. Will look over things
and push asap after checking I didn't do anything stupid aside from
leaving them sitting so long.

Rich


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-06-11 19:14 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-01 23:52 [PATCH] Fix TLS layout of TLS variant I when there is a gap above TP Szabolcs Nagy
2018-06-02  2:59 ` Rich Felker
2018-06-02  7:15   ` Szabolcs Nagy
2018-06-03  1:11     ` Rich Felker
2018-06-04  7:27 ` Phillip Berndt
2018-06-11 19:02   ` Phillip Berndt
2018-06-11 19:14     ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).