C threads, v. 6.2

mailing list of musl libc
 help / color / mirror / code / Atom feed

* C threads, v. 6.2
@ 2014-08-27 22:11 Jens Gustedt
  2014-08-27 23:46 ` Rich Felker
  0 siblings, 1 reply; 26+ messages in thread
From: Jens Gustedt @ 2014-08-27 22:11 UTC (permalink / raw)
  To: musl


[-- Attachment #1.1: Type: text/plain, Size: 1131 bytes --]

Hello,
so here is a "conservative" version that I would consider a candidate
for release.

 - the "magic" constants are now those that Rich proposed to the glibc
   people. As a consequence many C thread wrappers are no tail calls
   to the POSIX functions but consist in a switch statement that
   translates the error conventions from POSIX to C.

 - this is directly based on the POSIX control structures. Since now
   these use private futexes properly, there is no emergency to
   implement C control structures seperately

 - the biggest code change is for pthread_create.c. Most common code
   has moved to a new TU pthread_exit.c. pthread_create and
   thread_create are implemented in two different TU.

 - a lot of minor changes to avoid dragging in symbols in the wrong
   namespaces.


Jens

-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::



[-- Attachment #1.2: thrd11-v6.2.patch --]
[-- Type: text/x-patch, Size: 56692 bytes --]

diff --git a/arch/arm/bits/alltypes.h.in b/arch/arm/bits/alltypes.h.in
index 183c4c4..333d43f 100644
--- a/arch/arm/bits/alltypes.h.in
+++ b/arch/arm/bits/alltypes.h.in
@@ -19,7 +19,7 @@ TYPEDEF long time_t;
 TYPEDEF long suseconds_t;
 
 TYPEDEF struct { union { int __i[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } pthread_cond_t;
+TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } __pthread_mutex_t;
+TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } __pthread_cond_t;
 TYPEDEF struct { union { int __i[8]; void *__p[8]; } __u; } pthread_rwlock_t;
 TYPEDEF struct { union { int __i[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/i386/bits/alltypes.h.in b/arch/i386/bits/alltypes.h.in
index 8ba8f6f..67cce3b 100644
--- a/arch/i386/bits/alltypes.h.in
+++ b/arch/i386/bits/alltypes.h.in
@@ -33,7 +33,7 @@ TYPEDEF long time_t;
 TYPEDEF long suseconds_t;
 
 TYPEDEF struct { union { int __i[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } pthread_cond_t;
+TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } __pthread_mutex_t;
+TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } __pthread_cond_t;
 TYPEDEF struct { union { int __i[8]; void *__p[8]; } __u; } pthread_rwlock_t;
 TYPEDEF struct { union { int __i[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/microblaze/bits/alltypes.h.in b/arch/microblaze/bits/alltypes.h.in
index a03e1b8..b27a8fe 100644
--- a/arch/microblaze/bits/alltypes.h.in
+++ b/arch/microblaze/bits/alltypes.h.in
@@ -19,7 +19,7 @@ TYPEDEF long time_t;
 TYPEDEF long suseconds_t;
 
 TYPEDEF struct { union { int __i[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } pthread_cond_t;
+TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } __pthread_mutex_t;
+TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } __pthread_cond_t;
 TYPEDEF struct { union { int __i[8]; void *__p[8]; } __u; } pthread_rwlock_t;
 TYPEDEF struct { union { int __i[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/mips/bits/alltypes.h.in b/arch/mips/bits/alltypes.h.in
index a03e1b8..b27a8fe 100644
--- a/arch/mips/bits/alltypes.h.in
+++ b/arch/mips/bits/alltypes.h.in
@@ -19,7 +19,7 @@ TYPEDEF long time_t;
 TYPEDEF long suseconds_t;
 
 TYPEDEF struct { union { int __i[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } pthread_cond_t;
+TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } __pthread_mutex_t;
+TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } __pthread_cond_t;
 TYPEDEF struct { union { int __i[8]; void *__p[8]; } __u; } pthread_rwlock_t;
 TYPEDEF struct { union { int __i[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/or1k/bits/alltypes.h.in b/arch/or1k/bits/alltypes.h.in
index 183c4c4..333d43f 100644
--- a/arch/or1k/bits/alltypes.h.in
+++ b/arch/or1k/bits/alltypes.h.in
@@ -19,7 +19,7 @@ TYPEDEF long time_t;
 TYPEDEF long suseconds_t;
 
 TYPEDEF struct { union { int __i[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } pthread_cond_t;
+TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } __pthread_mutex_t;
+TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } __pthread_cond_t;
 TYPEDEF struct { union { int __i[8]; void *__p[8]; } __u; } pthread_rwlock_t;
 TYPEDEF struct { union { int __i[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/powerpc/bits/alltypes.h.in b/arch/powerpc/bits/alltypes.h.in
index ee7f137..57d7c07 100644
--- a/arch/powerpc/bits/alltypes.h.in
+++ b/arch/powerpc/bits/alltypes.h.in
@@ -19,7 +19,7 @@ TYPEDEF long time_t;
 TYPEDEF long suseconds_t;
 
 TYPEDEF struct { union { int __i[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } pthread_cond_t;
+TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } __pthread_mutex_t;
+TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } __pthread_cond_t;
 TYPEDEF struct { union { int __i[8]; void *__p[8]; } __u; } pthread_rwlock_t;
 TYPEDEF struct { union { int __i[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/sh/bits/alltypes.h.in b/arch/sh/bits/alltypes.h.in
index ee7f137..57d7c07 100644
--- a/arch/sh/bits/alltypes.h.in
+++ b/arch/sh/bits/alltypes.h.in
@@ -19,7 +19,7 @@ TYPEDEF long time_t;
 TYPEDEF long suseconds_t;
 
 TYPEDEF struct { union { int __i[9]; unsigned __s[9]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } pthread_cond_t;
+TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } __pthread_mutex_t;
+TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } __pthread_cond_t;
 TYPEDEF struct { union { int __i[8]; void *__p[8]; } __u; } pthread_rwlock_t;
 TYPEDEF struct { union { int __i[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/x32/bits/alltypes.h.in b/arch/x32/bits/alltypes.h.in
index 8e396c9..02d9e1c 100644
--- a/arch/x32/bits/alltypes.h.in
+++ b/arch/x32/bits/alltypes.h.in
@@ -24,7 +24,7 @@ TYPEDEF long long time_t;
 TYPEDEF long long suseconds_t;
 
 TYPEDEF struct { union { int __i[14]; unsigned long __s[7]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[10]; volatile void *volatile __p[5]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[12]; void *__p[6]; } __u; } pthread_cond_t;
+TYPEDEF struct { union { int __i[10]; volatile void *volatile __p[5]; } __u; } __pthread_mutex_t;
+TYPEDEF struct { union { int __i[12]; void *__p[6]; } __u; } __pthread_cond_t;
 TYPEDEF struct { union { int __i[14]; void *__p[7]; } __u; } pthread_rwlock_t;
 TYPEDEF struct { union { int __i[8]; void *__p[4]; } __u; } pthread_barrier_t;
diff --git a/arch/x86_64/bits/alltypes.h.in b/arch/x86_64/bits/alltypes.h.in
index 7b4f3e7..ea9fc67 100644
--- a/arch/x86_64/bits/alltypes.h.in
+++ b/arch/x86_64/bits/alltypes.h.in
@@ -24,7 +24,7 @@ TYPEDEF long time_t;
 TYPEDEF long suseconds_t;
 
 TYPEDEF struct { union { int __i[14]; unsigned long __s[7]; } __u; } pthread_attr_t;
-TYPEDEF struct { union { int __i[10]; volatile void *volatile __p[5]; } __u; } pthread_mutex_t;
-TYPEDEF struct { union { int __i[12]; void *__p[6]; } __u; } pthread_cond_t;
+TYPEDEF struct { union { int __i[10]; volatile void *volatile __p[5]; } __u; } __pthread_mutex_t;
+TYPEDEF struct { union { int __i[12]; void *__p[6]; } __u; } __pthread_cond_t;
 TYPEDEF struct { union { int __i[14]; void *__p[7]; } __u; } pthread_rwlock_t;
 TYPEDEF struct { union { int __i[8]; void *__p[4]; } __u; } pthread_barrier_t;
diff --git a/include/alltypes.h.in b/include/alltypes.h.in
index c4ca5d5..5e29046 100644
--- a/include/alltypes.h.in
+++ b/include/alltypes.h.in
@@ -43,13 +43,24 @@ TYPEDEF unsigned gid_t;
 TYPEDEF int key_t;
 TYPEDEF unsigned useconds_t;
 
+/* The pairs of equivalent definitions for pthread and C thread types
+ * should always be kept in sync. */
 #ifdef __cplusplus
 TYPEDEF unsigned long pthread_t;
+TYPEDEF unsigned long thrd_t;
 #else
 TYPEDEF struct __pthread * pthread_t;
+TYPEDEF struct __pthread * thrd_t;
 #endif
 TYPEDEF int pthread_once_t;
+TYPEDEF int once_flag;
 TYPEDEF unsigned pthread_key_t;
+TYPEDEF unsigned tss_t;
+TYPEDEF __pthread_cond_t pthread_cond_t;
+TYPEDEF __pthread_cond_t cnd_t;
+TYPEDEF __pthread_mutex_t pthread_mutex_t;
+TYPEDEF __pthread_mutex_t mtx_t;
+
 TYPEDEF int pthread_spinlock_t;
 TYPEDEF struct { unsigned __attr; } pthread_mutexattr_t;
 TYPEDEF struct { unsigned __attr; } pthread_condattr_t;
diff --git a/include/pthread.h b/include/pthread.h
index f7c9568..0e8eeaa 100644
--- a/include/pthread.h
+++ b/include/pthread.h
@@ -16,6 +16,8 @@ extern "C" {
 #define __NEED_pthread_condattr_t
 #define __NEED_pthread_rwlockattr_t
 #define __NEED_pthread_barrierattr_t
+#define __NEED___pthread_mutex_t
+#define __NEED___pthread_cond_t
 #define __NEED_pthread_mutex_t
 #define __NEED_pthread_cond_t
 #define __NEED_pthread_rwlock_t
diff --git a/include/sys/types.h b/include/sys/types.h
index f00db03..a3f7271 100644
--- a/include/sys/types.h
+++ b/include/sys/types.h
@@ -36,6 +36,8 @@ extern "C" {
 #define __NEED_pthread_condattr_t
 #define __NEED_pthread_rwlockattr_t
 #define __NEED_pthread_barrierattr_t
+#define __NEED___pthread_mutex_t
+#define __NEED___pthread_cond_t
 #define __NEED_pthread_mutex_t
 #define __NEED_pthread_cond_t
 #define __NEED_pthread_rwlock_t
diff --git a/include/threads.h b/include/threads.h
new file mode 100644
index 0000000..cc396c3
--- /dev/null
+++ b/include/threads.h
@@ -0,0 +1,170 @@
+#ifndef _THREADS_H
+#define _THREADS_H
+
+/* This one is explicitly allowed to be included. */
+#include <time.h>
+
+#ifdef __GNUC__
+#define _Nonnull(...) __attribute__((__nonnull__(__VA_ARGS__)))
+#else
+#define _Nonnull(...)
+#endif
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define __NEED___pthread_cond_t
+#define __NEED___pthread_mutex_t
+#define __NEED_cnd_t
+#define __NEED_mtx_t
+#define __NEED_once_flag
+#define __NEED_thrd_t
+#define __NEED_tss_t
+
+#include <bits/alltypes.h>
+
+typedef int (*thrd_start_t)(void*);
+typedef void (*tss_dtor_t)(void*);
+
+
+#ifndef __THRD_EXPERIMENTAL
+# define __THRD_EXPERIMENTAL 1
+#endif
+
+  /* The following list of 9 integer constants makes up for the binary
+     compatibility of this C thread implementation. You must never
+     link code against versions of the C library that do not agree
+     upon these ABI parameters.
+
+     Additionally this implementation assumes that the 5 types have
+     the same size across C libraries and that these types can be
+     initialized by the default initializer.
+
+     The values for the 9 parameters are not fixed for now. Depending
+     on the choices of other implementations and the evolution of the
+     C standard they may still change. This should happen rarely, but
+     you have to consider the C thread feature to be experimental
+     until then, and be prepared to recompile your binary when linking
+     against a different implementation or a new version.
+
+     The macro __THRD_EXPERIMENTAL will be defined as long as we
+     consider this ABI to be unstable. This comes with some link time
+     checks and an overhead of some bytes. At your own risk you may
+     switch this check off by defining __THRD_EXPERIMENTAL macro to
+     0. */
+
+#define __THRD_SUCCESS      0
+#define __THRD_BUSY         1
+#define __THRD_ERROR        2
+#define __THRD_NOMEM        3
+#define __THRD_TIMEDOUT     4
+#define __MTX_PLAIN         0
+#define __MTX_RECURSIVE     1
+#define __MTX_TIMED         2
+#define TSS_DTOR_ITERATIONS 4
+
+#define __THRD_CONCAT10_(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9)         \
+_0 ## _ ## _1 ## _ ## _2 ## _ ## _3 ## _ ## _4 ## _ ## _5 ## _ ## _6 ## _ ## _7 ## _ ## _8 ## _ ## _9
+
+#define __THRD_CONCAT10(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9) \
+  __THRD_CONCAT10_(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9)
+
+
+#define __THRD_ABI                              \
+__THRD_CONCAT10(__thrd_abi,                     \
+                __THRD_SUCCESS,                 \
+                __THRD_BUSY,                    \
+                __THRD_ERROR,                   \
+                __THRD_NOMEM,                   \
+                __THRD_TIMEDOUT,                \
+                __MTX_PLAIN,                    \
+                __MTX_RECURSIVE,                \
+                __MTX_TIMED,                    \
+                TSS_DTOR_ITERATIONS             \
+                )
+
+#define __THRD_SHFT(X, Y) (((X) << 3) ^ (Y))
+
+enum {
+  __thrd_abi =
+  __THRD_SHFT(sizeof(cnd_t),
+              __THRD_SHFT(sizeof(mtx_t),
+                          __THRD_SHFT(sizeof(once_flag),
+                                      __THRD_SHFT(sizeof(thrd_t),
+                                                  sizeof(tss_t))))),
+};
+
+extern unsigned const __THRD_ABI;
+
+#define __THRD_ABI_CHECK (1/(__THRD_ABI == __thrd_abi))
+
+#if __THRD_EXPERIMENTAL
+# define __THRD_ABI_MARK __attribute__((used)) static unsigned const*const __thrd_abi_mark = &__THRD_ABI
+#else
+# define __THRD_ABI_MARK typedef unsigned __thrd_abi_dummy_type
+#endif
+
+enum {
+  thrd_success = __THRD_SUCCESS,
+  thrd_busy = __THRD_BUSY,
+  thrd_error = __THRD_ERROR,
+  thrd_nomem = __THRD_NOMEM,
+  thrd_timedout = __THRD_TIMEDOUT,
+};
+
+enum {
+  mtx_plain = __MTX_PLAIN,
+  mtx_recursive = __MTX_RECURSIVE,
+  // all mutexes are timed, here. so this is a no-op
+  mtx_timed = __MTX_TIMED,
+};
+
+#define ONCE_FLAG_INIT { 0 }
+#define thread_local _Thread_local
+
+int thrd_create(thrd_t *, thrd_start_t, void *) _Nonnull(1);
+_Noreturn void thrd_exit(int);
+
+int thrd_detach(thrd_t);
+int thrd_join(thrd_t, int *);
+
+int thrd_sleep(const struct timespec *, struct timespec *) _Nonnull(1);
+void thrd_yield(void);
+
+thrd_t thrd_current(void);
+int thrd_equal(thrd_t, thrd_t);
+#define thrd_equal(A, B) ((A) == (B))
+
+void call_once(once_flag *, void (*)(void)) _Nonnull(1, 2);
+
+int mtx_init(mtx_t *, int) _Nonnull(1);
+void mtx_destroy(mtx_t *) _Nonnull(1);
+
+int mtx_lock(mtx_t *) _Nonnull(1);
+int mtx_timedlock(mtx_t *restrict, const struct timespec *restrict) _Nonnull(1);
+int mtx_trylock(mtx_t *) _Nonnull(1);
+int mtx_unlock(mtx_t *) _Nonnull(1);
+
+int cnd_init(cnd_t *) _Nonnull(1);
+void cnd_destroy(cnd_t *) _Nonnull(1);
+
+int cnd_broadcast(cnd_t *) _Nonnull(1);
+int cnd_signal(cnd_t *) _Nonnull(1);
+
+int cnd_timedwait(cnd_t *restrict, mtx_t *restrict, const struct timespec *restrict) _Nonnull(1, 2);
+int cnd_wait(cnd_t *, mtx_t *) _Nonnull(1, 2);
+
+int tss_create(tss_t *, tss_dtor_t) _Nonnull(1);
+void tss_delete(tss_t key);
+
+int tss_set(tss_t, void *);
+void *tss_get(tss_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/time.h b/include/time.h
index dc88070..13dccaa 100644
--- a/include/time.h
+++ b/include/time.h
@@ -129,6 +129,19 @@ int stime(const time_t *);
 time_t timegm(struct tm *);
 #endif
 
+#if __STDC_VERSION__ >= 201112L
+  /* Implementation specific choice: The epoch that the TIME_UTC clock
+     is based upon is the Unix Epoch, that is a struct timespec
+     represents the number of seconds that have elapsed since 00:00:00
+     Coordinated Universal Time (UTC), Thursday, 1 January
+     1970. Because of differences in leap seconds this is not
+     completely equivalent to UTC. */
+  /* Beware that the TIME_UTC constant itself per the standard must be
+     greater than 0. */
+#define TIME_UTC 1
+int timespec_get(struct timespec *, int);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mman/mprotect.c b/src/mman/mprotect.c
index f488486..535787b 100644
--- a/src/mman/mprotect.c
+++ b/src/mman/mprotect.c
@@ -2,10 +2,12 @@
 #include "libc.h"
 #include "syscall.h"
 
-int mprotect(void *addr, size_t len, int prot)
+int __mprotect(void *addr, size_t len, int prot)
 {
 	size_t start, end;
 	start = (size_t)addr & -PAGE_SIZE;
 	end = (size_t)((char *)addr + len + PAGE_SIZE-1) & -PAGE_SIZE;
 	return syscall(SYS_mprotect, start, end-start, prot);
 }
+
+weak_alias(__mprotect, mprotect);
diff --git a/src/sched/thrd_yield.c b/src/sched/thrd_yield.c
new file mode 100644
index 0000000..301e953
--- /dev/null
+++ b/src/sched/thrd_yield.c
@@ -0,0 +1,7 @@
+#include <sched.h>
+#include "syscall.h"
+
+void thrd_yield()
+{
+         (void)syscall(SYS_sched_yield);
+}
diff --git a/src/thread/__thrd_abi.c b/src/thread/__thrd_abi.c
new file mode 100644
index 0000000..e5674e6
--- /dev/null
+++ b/src/thread/__thrd_abi.c
@@ -0,0 +1,3 @@
+#include <threads.h>
+
+unsigned const __THRD_ABI =  __thrd_abi;
diff --git a/src/thread/__timedwait.c b/src/thread/__timedwait.c
index d6f1233..3d62272 100644
--- a/src/thread/__timedwait.c
+++ b/src/thread/__timedwait.c
@@ -4,6 +4,9 @@
 #include "futex.h"
 #include "syscall.h"
 
+int __pthread_setcancelstate(int new, int *old);
+int __clock_gettime(clockid_t clk, struct timespec *ts);
+
 int __timedwait(volatile int *addr, int val,
 	clockid_t clk, const struct timespec *at,
 	void (*cleanup)(void *), void *arg, int priv)
@@ -15,7 +18,7 @@ int __timedwait(volatile int *addr, int val,
 
 	if (at) {
 		if (at->tv_nsec >= 1000000000UL) return EINVAL;
-		if (clock_gettime(clk, &to)) return EINVAL;
+		if (__clock_gettime(clk, &to)) return EINVAL;
 		to.tv_sec = at->tv_sec - to.tv_sec;
 		if ((to.tv_nsec = at->tv_nsec - to.tv_nsec) < 0) {
 			to.tv_sec--;
@@ -25,7 +28,7 @@ int __timedwait(volatile int *addr, int val,
 		top = &to;
 	}
 
-	if (!cleanup) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+	if (!cleanup) __pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
 	pthread_cleanup_push(cleanup, arg);
 
 	r = -__syscall_cp(SYS_futex, addr, FUTEX_WAIT|priv, val, top);
@@ -33,7 +36,7 @@ int __timedwait(volatile int *addr, int val,
 	if (r != EINTR && r != ETIMEDOUT) r = 0;
 
 	pthread_cleanup_pop(0);
-	if (!cleanup) pthread_setcancelstate(cs, 0);
+	if (!cleanup) __pthread_setcancelstate(cs, 0);
 
 	return r;
 }
diff --git a/src/thread/call_once.c b/src/thread/call_once.c
new file mode 100644
index 0000000..742a707
--- /dev/null
+++ b/src/thread/call_once.c
@@ -0,0 +1,9 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_once(once_flag *, void (*)(void));
+
+void (call_once)(once_flag *flag, void (*func)(void)) {
+	__THRD_ABI_MARK;
+	(void)__pthread_once(flag, func);
+}
diff --git a/src/thread/cnd_broadcast.c b/src/thread/cnd_broadcast.c
new file mode 100644
index 0000000..b1db667
--- /dev/null
+++ b/src/thread/cnd_broadcast.c
@@ -0,0 +1,9 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_cond_broadcast(pthread_cond_t *);
+
+int cnd_broadcast(cnd_t * cnd) {
+	int ret = __pthread_cond_broadcast(cnd);
+	return ret ? thrd_error : thrd_success;
+}
diff --git a/src/thread/cnd_destroy.c b/src/thread/cnd_destroy.c
new file mode 100644
index 0000000..11cfc19
--- /dev/null
+++ b/src/thread/cnd_destroy.c
@@ -0,0 +1,17 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_cond_destroy(pthread_cond_t *);
+
+/* The behavior of cnd_destroy is undefined if cnd is still in
+   use. The choice for pthread_cond_destroy in that situation is to
+   wake up all users before destroying. I am not sure that we should
+   do it like that here, too. Alternatives would be:
+   - complain by using perror or equivalent
+   - assert that there is no waiter
+   - abort when there is a waiter
+   - do nothing
+   */
+void (cnd_destroy)(cnd_t *cnd) {
+	(void)__pthread_cond_destroy(cnd);
+}
diff --git a/src/thread/cnd_init.c b/src/thread/cnd_init.c
new file mode 100644
index 0000000..c7f1edf
--- /dev/null
+++ b/src/thread/cnd_init.c
@@ -0,0 +1,10 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int cnd_init(cnd_t * c)
+{
+	*c = (cnd_t) {
+		0
+	};
+	return thrd_success;
+}
diff --git a/src/thread/cnd_signal.c b/src/thread/cnd_signal.c
new file mode 100644
index 0000000..a8d942d
--- /dev/null
+++ b/src/thread/cnd_signal.c
@@ -0,0 +1,9 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_cond_signal(pthread_cond_t *);
+
+int cnd_signal(cnd_t * cnd) {
+	int ret = __pthread_cond_signal(cnd);
+	return ret ? thrd_error : thrd_success;
+}
diff --git a/src/thread/cnd_timedwait.c b/src/thread/cnd_timedwait.c
new file mode 100644
index 0000000..cce47a4
--- /dev/null
+++ b/src/thread/cnd_timedwait.c
@@ -0,0 +1,14 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m, const struct timespec *restrict ts);
+
+int cnd_timedwait(cnd_t *restrict cond, mtx_t *restrict mutex, const struct timespec *restrict ts) {
+	int ret = __pthread_cond_timedwait(cond, mutex, ts);
+	switch (ret) {
+	/* May also return EINVAL or EPERM. */
+	default:        return thrd_error;
+	case 0:         return thrd_success;
+	case ETIMEDOUT: return thrd_timedout;
+	}
+}
diff --git a/src/thread/cnd_wait.c b/src/thread/cnd_wait.c
new file mode 100644
index 0000000..39d0a0d
--- /dev/null
+++ b/src/thread/cnd_wait.c
@@ -0,0 +1,11 @@
+#include <threads.h>
+
+int cnd_wait(cnd_t *cond, mtx_t *mutex)
+{
+	__THRD_ABI_MARK;
+	/* Calling cnd_timedwait with a null pointer is an
+	   extension. Such a call is convenient, here since it avoids to
+	   repeat the case analysis that is already done for
+	   cnd_timedwait. */
+	return cnd_timedwait(cond, mutex, 0);
+}
diff --git a/src/thread/mtx_destroy.c b/src/thread/mtx_destroy.c
new file mode 100644
index 0000000..482b781
--- /dev/null
+++ b/src/thread/mtx_destroy.c
@@ -0,0 +1,5 @@
+#include <threads.h>
+
+void (mtx_destroy)(mtx_t *mtx) {
+	/* empty */
+}
diff --git a/src/thread/mtx_init.c b/src/thread/mtx_init.c
new file mode 100644
index 0000000..827a56a
--- /dev/null
+++ b/src/thread/mtx_init.c
@@ -0,0 +1,10 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int mtx_init(mtx_t * m, int type)
+{
+	*m = (pthread_mutex_t) {
+		._m_type = ((type&mtx_recursive) ? PTHREAD_MUTEX_RECURSIVE : 0),
+	};
+	return thrd_success;
+}
diff --git a/src/thread/mtx_lock.c b/src/thread/mtx_lock.c
new file mode 100644
index 0000000..89730f1
--- /dev/null
+++ b/src/thread/mtx_lock.c
@@ -0,0 +1,14 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int mtx_lock(mtx_t *m)
+{
+	__THRD_ABI_MARK;
+	if (m->_m_type == PTHREAD_MUTEX_NORMAL && !a_cas(&m->_m_lock, 0, EBUSY))
+		return thrd_success;
+	/* Calling mtx_timedlock with a null pointer is an
+	   extension. Such a call is convenient, here, since it avoids
+	   to repeat the case analysis that is already done for
+	   mtx_timedlock. */
+	return mtx_timedlock(m, 0);
+}
diff --git a/src/thread/mtx_timedlock.c b/src/thread/mtx_timedlock.c
new file mode 100644
index 0000000..c42a096
--- /dev/null
+++ b/src/thread/mtx_timedlock.c
@@ -0,0 +1,19 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict ts);
+
+int mtx_timedlock(mtx_t *restrict mutex, const struct timespec *restrict ts) {
+	/* In the best of all worlds this is a tail call. */
+	int ret = __pthread_mutex_timedlock(mutex, ts);
+	switch (ret) {
+	/* May also return EINVAL, EPERM, EDEADLK or EAGAIN. EAGAIN is
+	   specially tricky since C11 doesn't define how many recursive
+	   calls can be done. (this *isn't* the maximum amount of nested
+	   calls!) This implementation here deals this with a counter and
+	   detects overflow, so this is definitively UB. */
+	default:        return thrd_error;
+	case 0:         return thrd_success;
+	case ETIMEDOUT: return thrd_timedout;
+	}
+}
diff --git a/src/thread/mtx_trylock.c b/src/thread/mtx_trylock.c
new file mode 100644
index 0000000..5fd823a
--- /dev/null
+++ b/src/thread/mtx_trylock.c
@@ -0,0 +1,22 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_mutex_trylock(pthread_mutex_t *restrict m);
+
+int mtx_trylock(mtx_t *restrict m) {
+	if (m->_m_type == PTHREAD_MUTEX_NORMAL)
+		return (a_cas(&m->_m_lock, 0, EBUSY) & EBUSY) ? thrd_busy : thrd_success;
+
+	/* In the best of all worlds this is a tail call. */
+	int ret = __pthread_mutex_trylock(m);
+	switch (ret) {
+	/* In case of UB may also return EINVAL, EPERM, EDEADLK or EAGAIN. EAGAIN is
+	   specially tricky since C11 doesn't define how many recursive
+	   calls can be done. (this *isn't* the maximum amount of nested
+	   calls!) This implementation here deals this with a counter and
+	   detects overflow, so this is definitively UB. */
+	default:    return thrd_error;
+	case 0:     return thrd_success;
+	case EBUSY: return thrd_busy;
+	}
+}
diff --git a/src/thread/mtx_unlock.c b/src/thread/mtx_unlock.c
new file mode 100644
index 0000000..d1721a6
--- /dev/null
+++ b/src/thread/mtx_unlock.c
@@ -0,0 +1,11 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_mutex_unlock(pthread_mutex_t *);
+
+int (mtx_unlock)(mtx_t *mtx) {
+	/* In the best of all worlds this is a tail call. */
+	int ret = __pthread_mutex_unlock(mtx);
+	/* In case of UB may also return EPERM. */
+	return ret ? thrd_error : thrd_success;
+}
diff --git a/src/thread/pthread_cond_broadcast.c b/src/thread/pthread_cond_broadcast.c
index 69f840f..f53b41d 100644
--- a/src/thread/pthread_cond_broadcast.c
+++ b/src/thread/pthread_cond_broadcast.c
@@ -2,7 +2,7 @@
 
 int __private_cond_signal(pthread_cond_t *, int);
 
-int pthread_cond_broadcast(pthread_cond_t *c)
+int __pthread_cond_broadcast(pthread_cond_t *c)
 {
 	if (!c->_c_shared) return __private_cond_signal(c, -1);
 	if (!c->_c_waiters) return 0;
@@ -10,3 +10,5 @@ int pthread_cond_broadcast(pthread_cond_t *c)
 	__wake(&c->_c_seq, -1, 0);
 	return 0;
 }
+
+weak_alias(__pthread_cond_broadcast, pthread_cond_broadcast);
diff --git a/src/thread/pthread_cond_destroy.c b/src/thread/pthread_cond_destroy.c
index 8c55516..b2abeb8 100644
--- a/src/thread/pthread_cond_destroy.c
+++ b/src/thread/pthread_cond_destroy.c
@@ -1,6 +1,6 @@
 #include "pthread_impl.h"
 
-int pthread_cond_destroy(pthread_cond_t *c)
+int __pthread_cond_destroy(pthread_cond_t *c)
 {
 	if (c->_c_shared && c->_c_waiters) {
 		int cnt;
@@ -12,3 +12,5 @@ int pthread_cond_destroy(pthread_cond_t *c)
 	}
 	return 0;
 }
+
+weak_alias(__pthread_cond_destroy, pthread_cond_destroy);
diff --git a/src/thread/pthread_cond_signal.c b/src/thread/pthread_cond_signal.c
index 119c00a..d1d3c77 100644
--- a/src/thread/pthread_cond_signal.c
+++ b/src/thread/pthread_cond_signal.c
@@ -2,7 +2,7 @@
 
 int __private_cond_signal(pthread_cond_t *, int);
 
-int pthread_cond_signal(pthread_cond_t *c)
+int __pthread_cond_signal(pthread_cond_t *c)
 {
 	if (!c->_c_shared) return __private_cond_signal(c, 1);
 	if (!c->_c_waiters) return 0;
@@ -10,3 +10,5 @@ int pthread_cond_signal(pthread_cond_t *c)
 	__wake(&c->_c_seq, 1, 0);
 	return 0;
 }
+
+weak_alias(__pthread_cond_signal, pthread_cond_signal);
diff --git a/src/thread/pthread_cond_timedwait.c b/src/thread/pthread_cond_timedwait.c
index 2d192b0..393d60e 100644
--- a/src/thread/pthread_cond_timedwait.c
+++ b/src/thread/pthread_cond_timedwait.c
@@ -1,5 +1,9 @@
 #include "pthread_impl.h"
 
+void __pthread_testcancel(void);
+int __pthread_mutex_lock(pthread_mutex_t *);
+int __pthread_mutex_unlock(pthread_mutex_t *m);
+
 /*
  * struct waiter
  *
@@ -119,7 +123,7 @@ static void unwait(void *arg)
 	}
 }
 
-int pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m, const struct timespec *restrict ts)
+int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m, const struct timespec *restrict ts)
 {
 	struct waiter node = { .cond = c, .mutex = m };
 	int e, seq, *fut, clock = c->_c_clock;
@@ -130,7 +134,7 @@ int pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict
 	if (ts && ts->tv_nsec >= 1000000000UL)
 		return EINVAL;
 
-	pthread_testcancel();
+	__pthread_testcancel();
 
 	if (c->_c_shared) {
 		node.shared = 1;
@@ -151,7 +155,7 @@ int pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict
 		unlock(&c->_c_lock);
 	}
 
-	pthread_mutex_unlock(m);
+	__pthread_mutex_unlock(m);
 
 	do e = __timedwait(fut, seq, clock, ts, unwait, &node, !node.shared);
 	while (*fut==seq && (!e || e==EINTR));
@@ -197,3 +201,5 @@ int __private_cond_signal(pthread_cond_t *c, int n)
 
 	return 0;
 }
+
+weak_alias(__pthread_cond_timedwait, pthread_cond_timedwait);
diff --git a/src/thread/pthread_cond_wait.c b/src/thread/pthread_cond_wait.c
index 8735bf1..58656f7 100644
--- a/src/thread/pthread_cond_wait.c
+++ b/src/thread/pthread_cond_wait.c
@@ -1,6 +1,8 @@
 #include "pthread_impl.h"
 
+int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m, const struct timespec *restrict ts);
+
 int pthread_cond_wait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m)
 {
-	return pthread_cond_timedwait(c, m, 0);
+	return __pthread_cond_timedwait(c, m, 0);
 }
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index e441bda..193b295 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -4,102 +4,21 @@
 #include "libc.h"
 #include <sys/mman.h>
 #include <string.h>
+#include <threads.h>
+
+void *__mmap(void *, size_t, int, int, int, off_t);
+int __munmap(void *, size_t);
+int __mprotect(void *, size_t, int);
+void __thread_enable(void);
+_Noreturn void __pthread_exit(void *);
+void *__copy_tls(unsigned char *);
+extern volatile size_t __pthread_tsd_size;
 
 static void dummy_0()
 {
 }
 weak_alias(dummy_0, __acquire_ptc);
 weak_alias(dummy_0, __release_ptc);
-weak_alias(dummy_0, __pthread_tsd_run_dtors);
-weak_alias(dummy_0, __do_private_robust_list);
-weak_alias(dummy_0, __do_orphaned_stdio_locks);
-
-_Noreturn void pthread_exit(void *result)
-{
-	pthread_t self = __pthread_self();
-	sigset_t set;
-
-	self->result = result;
-
-	while (self->cancelbuf) {
-		void (*f)(void *) = self->cancelbuf->__f;
-		void *x = self->cancelbuf->__x;
-		self->cancelbuf = self->cancelbuf->__next;
-		f(x);
-	}
-
-	__pthread_tsd_run_dtors();
-
-	__lock(self->exitlock);
-
-	/* Mark this thread dead before decrementing count */
-	__lock(self->killlock);
-	self->dead = 1;
-
-	/* Block all signals before decrementing the live thread count.
-	 * This is important to ensure that dynamically allocated TLS
-	 * is not under-allocated/over-committed, and possibly for other
-	 * reasons as well. */
-	__block_all_sigs(&set);
-
-	/* Wait to unlock the kill lock, which governs functions like
-	 * pthread_kill which target a thread id, until signals have
-	 * been blocked. This precludes observation of the thread id
-	 * as a live thread (with application code running in it) after
-	 * the thread was reported dead by ESRCH being returned. */
-	__unlock(self->killlock);
-
-	/* It's impossible to determine whether this is "the last thread"
-	 * until performing the atomic decrement, since multiple threads
-	 * could exit at the same time. For the last thread, revert the
-	 * decrement and unblock signals to give the atexit handlers and
-	 * stdio cleanup code a consistent state. */
-	if (a_fetch_add(&libc.threads_minus_1, -1)==0) {
-		libc.threads_minus_1 = 0;
-		__restore_sigs(&set);
-		exit(0);
-	}
-
-	if (self->locale != &libc.global_locale) {
-		a_dec(&libc.uselocale_cnt);
-		if (self->locale->ctype_utf8)
-			a_dec(&libc.bytelocale_cnt_minus_1);
-	}
-
-	__do_private_robust_list();
-	__do_orphaned_stdio_locks();
-
-	if (self->detached && self->map_base) {
-		/* Detached threads must avoid the kernel clear_child_tid
-		 * feature, since the virtual address will have been
-		 * unmapped and possibly already reused by a new mapping
-		 * at the time the kernel would perform the write. In
-		 * the case of threads that started out detached, the
-		 * initial clone flags are correct, but if the thread was
-		 * detached later (== 2), we need to clear it here. */
-		if (self->detached == 2) __syscall(SYS_set_tid_address, 0);
-
-		/* The following call unmaps the thread's stack mapping
-		 * and then exits without touching the stack. */
-		__unmapself(self->map_base, self->map_size);
-	}
-
-	for (;;) __syscall(SYS_exit, 0);
-}
-
-void __do_cleanup_push(struct __ptcb *cb)
-{
-	if (!libc.has_thread_pointer) return;
-	struct pthread *self = __pthread_self();
-	cb->__next = self->cancelbuf;
-	self->cancelbuf = cb;
-}
-
-void __do_cleanup_pop(struct __ptcb *cb)
-{
-	if (!libc.has_thread_pointer) return;
-	__pthread_self()->cancelbuf = cb->__next;
-}
 
 static int start(void *p)
 {
@@ -121,28 +40,10 @@ static int start(void *p)
 
 #define ROUND(x) (((x)+PAGE_SIZE-1)&-PAGE_SIZE)
 
-/* pthread_key_create.c overrides this */
-static volatile size_t dummy = 0;
-weak_alias(dummy, __pthread_tsd_size);
-static void *dummy_tsd[1] = { 0 };
-weak_alias(dummy_tsd, __pthread_tsd_main);
-
-static FILE *volatile dummy_file = 0;
-weak_alias(dummy_file, __stdin_used);
-weak_alias(dummy_file, __stdout_used);
-weak_alias(dummy_file, __stderr_used);
-
-static void init_file_lock(FILE *f)
-{
-	if (f && f->lock<0) f->lock = 0;
-}
-
-void *__copy_tls(unsigned char *);
-
 int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp, void *(*entry)(void *), void *restrict arg)
 {
 	int ret;
-	size_t size, guard;
+	size_t size, guard = 0;
 	struct pthread *self, *new;
 	unsigned char *map = 0, *stack = 0, *tsd = 0, *stack_limit;
 	unsigned flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND
@@ -153,16 +54,7 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp
 
 	if (!libc.can_do_threads) return ENOSYS;
 	self = __pthread_self();
-	if (!libc.threaded) {
-		for (FILE *f=libc.ofl_head; f; f=f->next)
-			init_file_lock(f);
-		init_file_lock(__stdin_used);
-		init_file_lock(__stdout_used);
-		init_file_lock(__stderr_used);
-		__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
-		self->tsd = (void **)__pthread_tsd_main;
-		libc.threaded = 1;
-	}
+	if (!libc.threaded) __thread_enable();
 	if (attrp) attr = *attrp;
 
 	__acquire_ptc();
@@ -191,14 +83,14 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp
 
 	if (!tsd) {
 		if (guard) {
-			map = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
+			map = __mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
 			if (map == MAP_FAILED) goto fail;
-			if (mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
-				munmap(map, size);
+			if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
+				__munmap(map, size);
 				goto fail;
 			}
 		} else {
-			map = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+			map = __mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
 			if (map == MAP_FAILED) goto fail;
 		}
 		tsd = map + size - __pthread_tsd_size;
@@ -240,7 +132,7 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp
 
 	if (ret < 0) {
 		a_dec(&libc.threads_minus_1);
-		if (map) munmap(map, size);
+		if (map) __munmap(map, size);
 		return EAGAIN;
 	}
 
diff --git a/src/thread/pthread_detach.c b/src/thread/pthread_detach.c
index 651c38e..5ca7855 100644
--- a/src/thread/pthread_detach.c
+++ b/src/thread/pthread_detach.c
@@ -1,11 +1,15 @@
 #include "pthread_impl.h"
 
-int pthread_detach(pthread_t t)
+int __pthread_join(pthread_t t, void **res);
+
+int __pthread_detach(pthread_t t)
 {
 	/* Cannot detach a thread that's already exiting */
 	if (a_swap(t->exitlock, 1))
-		return pthread_join(t, 0);
+		return __pthread_join(t, 0);
 	t->detached = 2;
 	__unlock(t->exitlock);
 	return 0;
 }
+
+weak_alias(__pthread_detach, pthread_detach);
diff --git a/src/thread/pthread_equal.c b/src/thread/pthread_equal.c
index 3e3df4f..38fb45e 100644
--- a/src/thread/pthread_equal.c
+++ b/src/thread/pthread_equal.c
@@ -1,4 +1,4 @@
-#include <pthread.h>
+#include "pthread_impl.h"
 
 int (pthread_equal)(pthread_t a, pthread_t b)
 {
diff --git a/src/thread/pthread_exit.c b/src/thread/pthread_exit.c
new file mode 100644
index 0000000..4f51269
--- /dev/null
+++ b/src/thread/pthread_exit.c
@@ -0,0 +1,130 @@
+#define _GNU_SOURCE
+#include "pthread_impl.h"
+#include "stdio_impl.h"
+#include "libc.h"
+#include <sys/mman.h>
+#include <threads.h>
+
+static void dummy_0()
+{
+}
+weak_alias(dummy_0, __pthread_tsd_run_dtors);
+weak_alias(dummy_0, __do_private_robust_list);
+weak_alias(dummy_0, __do_orphaned_stdio_locks);
+
+_Noreturn void __pthread_exit(void *result)
+{
+	pthread_t self = __pthread_self();
+	sigset_t set;
+
+	self->result = result;
+
+	while (self->cancelbuf) {
+		void (*f)(void *) = self->cancelbuf->__f;
+		void *x = self->cancelbuf->__x;
+		self->cancelbuf = self->cancelbuf->__next;
+		f(x);
+	}
+
+	__pthread_tsd_run_dtors();
+
+	__lock(self->exitlock);
+
+	/* Mark this thread dead before decrementing count */
+	__lock(self->killlock);
+	self->dead = 1;
+
+	/* Block all signals before decrementing the live thread count.
+	 * This is important to ensure that dynamically allocated TLS
+	 * is not under-allocated/over-committed, and possibly for other
+	 * reasons as well. */
+	__block_all_sigs(&set);
+
+	/* Wait to unlock the kill lock, which governs functions like
+	 * pthread_kill which target a thread id, until signals have
+	 * been blocked. This precludes observation of the thread id
+	 * as a live thread (with application code running in it) after
+	 * the thread was reported dead by ESRCH being returned. */
+	__unlock(self->killlock);
+
+	/* It's impossible to determine whether this is "the last thread"
+	 * until performing the atomic decrement, since multiple threads
+	 * could exit at the same time. For the last thread, revert the
+	 * decrement and unblock signals to give the atexit handlers and
+	 * stdio cleanup code a consistent state. */
+	if (a_fetch_add(&libc.threads_minus_1, -1)==0) {
+		libc.threads_minus_1 = 0;
+		__restore_sigs(&set);
+		exit(0);
+	}
+
+	if (self->locale != &libc.global_locale) {
+		a_dec(&libc.uselocale_cnt);
+		if (self->locale->ctype_utf8)
+			a_dec(&libc.bytelocale_cnt_minus_1);
+	}
+
+	__do_private_robust_list();
+	__do_orphaned_stdio_locks();
+
+	if (self->detached && self->map_base) {
+		/* Detached threads must avoid the kernel clear_child_tid
+		 * feature, since the virtual address will have been
+		 * unmapped and possibly already reused by a new mapping
+		 * at the time the kernel would perform the write. In
+		 * the case of threads that started out detached, the
+		 * initial clone flags are correct, but if the thread was
+		 * detached later (== 2), we need to clear it here. */
+		if (self->detached == 2) __syscall(SYS_set_tid_address, 0);
+
+		/* The following call unmaps the thread's stack mapping
+		 * and then exits without touching the stack. */
+		__unmapself(self->map_base, self->map_size);
+	}
+
+	for (;;) __syscall(SYS_exit, 0);
+}
+
+void __do_cleanup_push(struct __ptcb *cb)
+{
+	if (!libc.has_thread_pointer) return;
+	struct pthread *self = __pthread_self();
+	cb->__next = self->cancelbuf;
+	self->cancelbuf = cb;
+}
+
+void __do_cleanup_pop(struct __ptcb *cb)
+{
+	if (!libc.has_thread_pointer) return;
+	__pthread_self()->cancelbuf = cb->__next;
+}
+
+/* pthread_key_create.c overrides this */
+static volatile size_t dummy = 0;
+weak_alias(dummy, __pthread_tsd_size);
+static void *dummy_tsd[1] = { 0 };
+weak_alias(dummy_tsd, __pthread_tsd_main);
+
+static FILE *volatile dummy_file = 0;
+weak_alias(dummy_file, __stdin_used);
+weak_alias(dummy_file, __stdout_used);
+weak_alias(dummy_file, __stderr_used);
+
+static void init_file_lock(FILE *f)
+{
+	if (f && f->lock<0) f->lock = 0;
+}
+
+void __thread_enable(void)
+{
+	for (FILE *f=libc.ofl_head; f; f=f->next)
+		init_file_lock(f);
+	init_file_lock(__stdin_used);
+	init_file_lock(__stdout_used);
+	init_file_lock(__stderr_used);
+	__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
+	__pthread_self()->tsd = (void **)__pthread_tsd_main;
+	libc.threaded = 1;
+}
+
+weak_alias(__pthread_exit, pthread_exit);
diff --git a/src/thread/pthread_getspecific.c b/src/thread/pthread_getspecific.c
index b2a282c..bfc4294 100644
--- a/src/thread/pthread_getspecific.c
+++ b/src/thread/pthread_getspecific.c
@@ -1,7 +1,10 @@
 #include "pthread_impl.h"
 
-void *pthread_getspecific(pthread_key_t k)
+static void *__pthread_getspecific(pthread_key_t k)
 {
 	struct pthread *self = __pthread_self();
 	return self->tsd[k];
 }
+
+weak_alias(__pthread_getspecific, pthread_getspecific);
+weak_alias(__pthread_getspecific, tss_get);
diff --git a/src/thread/pthread_join.c b/src/thread/pthread_join.c
index 719c91c..bca89aa 100644
--- a/src/thread/pthread_join.c
+++ b/src/thread/pthread_join.c
@@ -1,15 +1,19 @@
 #include "pthread_impl.h"
 #include <sys/mman.h>
 
+int __munmap(void *, size_t);
+
 static void dummy(void *p)
 {
 }
 
-int pthread_join(pthread_t t, void **res)
+int __pthread_join(pthread_t t, void **res)
 {
 	int tmp;
 	while ((tmp = t->tid)) __timedwait(&t->tid, tmp, 0, 0, dummy, 0, 0);
 	if (res) *res = t->result;
-	if (t->map_base) munmap(t->map_base, t->map_size);
+	if (t->map_base) __munmap(t->map_base, t->map_size);
 	return 0;
 }
+
+weak_alias(__pthread_join, pthread_join);
diff --git a/src/thread/pthread_key_create.c b/src/thread/pthread_key_create.c
index a9187f7..bfcd597 100644
--- a/src/thread/pthread_key_create.c
+++ b/src/thread/pthread_key_create.c
@@ -9,7 +9,7 @@ static void nodtor(void *dummy)
 {
 }
 
-int pthread_key_create(pthread_key_t *k, void (*dtor)(void *))
+int __pthread_key_create(pthread_key_t *k, void (*dtor)(void *))
 {
 	unsigned i = (uintptr_t)&k / 16 % PTHREAD_KEYS_MAX;
 	unsigned j = i;
@@ -31,7 +31,7 @@ int pthread_key_create(pthread_key_t *k, void (*dtor)(void *))
 	return EAGAIN;
 }
 
-int pthread_key_delete(pthread_key_t k)
+int __pthread_key_delete(pthread_key_t k)
 {
 	keys[k] = 0;
 	return 0;
@@ -53,3 +53,6 @@ void __pthread_tsd_run_dtors()
 		}
 	}
 }
+
+weak_alias(__pthread_key_delete, pthread_key_delete);
+weak_alias(__pthread_key_create, pthread_key_create);
diff --git a/src/thread/pthread_mutex_lock.c b/src/thread/pthread_mutex_lock.c
index 2a9a3aa..993f54c 100644
--- a/src/thread/pthread_mutex_lock.c
+++ b/src/thread/pthread_mutex_lock.c
@@ -1,10 +1,14 @@
 #include "pthread_impl.h"
 
-int pthread_mutex_lock(pthread_mutex_t *m)
+int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict at);
+
+int __pthread_mutex_lock(pthread_mutex_t *m)
 {
 	if ((m->_m_type&15) == PTHREAD_MUTEX_NORMAL
 	    && !a_cas(&m->_m_lock, 0, EBUSY))
 		return 0;
 
-	return pthread_mutex_timedlock(m, 0);
+	return __pthread_mutex_timedlock(m, 0);
 }
+
+weak_alias(__pthread_mutex_lock, pthread_mutex_lock);
diff --git a/src/thread/pthread_mutex_timedlock.c b/src/thread/pthread_mutex_timedlock.c
index ae883f9..16241ee 100644
--- a/src/thread/pthread_mutex_timedlock.c
+++ b/src/thread/pthread_mutex_timedlock.c
@@ -1,6 +1,6 @@
 #include "pthread_impl.h"
 
-int pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict at)
+int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict at)
 {
 	if ((m->_m_type&15) == PTHREAD_MUTEX_NORMAL
 	    && !a_cas(&m->_m_lock, 0, EBUSY))
@@ -30,3 +30,5 @@ int pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *
 	}
 	return r;
 }
+
+weak_alias(__pthread_mutex_timedlock, pthread_mutex_timedlock);
diff --git a/src/thread/pthread_mutex_trylock.c b/src/thread/pthread_mutex_trylock.c
index e851517..9be7930 100644
--- a/src/thread/pthread_mutex_trylock.c
+++ b/src/thread/pthread_mutex_trylock.c
@@ -50,9 +50,11 @@ int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
 	return 0;
 }
 
-int pthread_mutex_trylock(pthread_mutex_t *m)
+static int __pthread_mutex_trylock(pthread_mutex_t *m)
 {
 	if ((m->_m_type&15) == PTHREAD_MUTEX_NORMAL)
 		return a_cas(&m->_m_lock, 0, EBUSY) & EBUSY;
 	return __pthread_mutex_trylock_owner(m);
 }
+
+weak_alias(__pthread_mutex_trylock, pthread_mutex_trylock);
diff --git a/src/thread/pthread_mutex_unlock.c b/src/thread/pthread_mutex_unlock.c
index 46761d9..a7f39c7 100644
--- a/src/thread/pthread_mutex_unlock.c
+++ b/src/thread/pthread_mutex_unlock.c
@@ -3,7 +3,7 @@
 void __vm_lock_impl(int);
 void __vm_unlock_impl(void);
 
-int pthread_mutex_unlock(pthread_mutex_t *m)
+int __pthread_mutex_unlock(pthread_mutex_t *m)
 {
 	pthread_t self;
 	int waiters = m->_m_waiters;
@@ -36,3 +36,5 @@ int pthread_mutex_unlock(pthread_mutex_t *m)
 		__wake(&m->_m_lock, 1, priv);
 	return 0;
 }
+
+weak_alias(__pthread_mutex_unlock, pthread_mutex_unlock);
diff --git a/src/thread/pthread_once.c b/src/thread/pthread_once.c
index 2eb0f93..05ebe69 100644
--- a/src/thread/pthread_once.c
+++ b/src/thread/pthread_once.c
@@ -6,7 +6,7 @@ static void undo(void *control)
 	__wake(control, 1, 1);
 }
 
-int pthread_once(pthread_once_t *control, void (*init)(void))
+int __pthread_once(pthread_once_t *control, void (*init)(void))
 {
 	static int waiters;
 
@@ -34,3 +34,5 @@ int pthread_once(pthread_once_t *control, void (*init)(void))
 		return 0;
 	}
 }
+
+weak_alias(__pthread_once, pthread_once);
diff --git a/src/thread/pthread_setcancelstate.c b/src/thread/pthread_setcancelstate.c
index 060bcdc..2268217 100644
--- a/src/thread/pthread_setcancelstate.c
+++ b/src/thread/pthread_setcancelstate.c
@@ -1,6 +1,6 @@
 #include "pthread_impl.h"
 
-int pthread_setcancelstate(int new, int *old)
+int __pthread_setcancelstate(int new, int *old)
 {
 	if (new > 1U) return EINVAL;
 	if (!libc.has_thread_pointer) return ENOSYS;
@@ -9,3 +9,5 @@ int pthread_setcancelstate(int new, int *old)
 	self->canceldisable = new;
 	return 0;
 }
+
+weak_alias(__pthread_setcancelstate, pthread_setcancelstate);
diff --git a/src/thread/pthread_setcanceltype.c b/src/thread/pthread_setcanceltype.c
index bf0a3f3..d493c1b 100644
--- a/src/thread/pthread_setcanceltype.c
+++ b/src/thread/pthread_setcanceltype.c
@@ -1,11 +1,13 @@
 #include "pthread_impl.h"
 
+void __pthread_testcancel(void);
+
 int pthread_setcanceltype(int new, int *old)
 {
 	struct pthread *self = __pthread_self();
 	if (new > 1U) return EINVAL;
 	if (old) *old = self->cancelasync;
 	self->cancelasync = new;
-	if (new) pthread_testcancel();
+	if (new) __pthread_testcancel();
 	return 0;
 }
diff --git a/src/thread/pthread_setspecific.c b/src/thread/pthread_setspecific.c
index 55e46a8..dd18b96 100644
--- a/src/thread/pthread_setspecific.c
+++ b/src/thread/pthread_setspecific.c
@@ -1,6 +1,6 @@
 #include "pthread_impl.h"
 
-int pthread_setspecific(pthread_key_t k, const void *x)
+int __pthread_setspecific(pthread_key_t k, const void *x)
 {
 	struct pthread *self = __pthread_self();
 	/* Avoid unnecessary COW */
@@ -10,3 +10,5 @@ int pthread_setspecific(pthread_key_t k, const void *x)
 	}
 	return 0;
 }
+
+weak_alias(__pthread_setspecific, pthread_setspecific);
diff --git a/src/thread/pthread_testcancel.c b/src/thread/pthread_testcancel.c
index ba5f7c6..ee48e6d 100644
--- a/src/thread/pthread_testcancel.c
+++ b/src/thread/pthread_testcancel.c
@@ -7,7 +7,9 @@ static void dummy()
 
 weak_alias(dummy, __testcancel);
 
-void pthread_testcancel()
+void __pthread_testcancel()
 {
 	__testcancel();
 }
+
+weak_alias(__pthread_testcancel, pthread_testcancel);
diff --git a/src/thread/thrd_create.c b/src/thread/thrd_create.c
new file mode 100644
index 0000000..f72f992
--- /dev/null
+++ b/src/thread/thrd_create.c
@@ -0,0 +1,98 @@
+#define _GNU_SOURCE
+#include "pthread_impl.h"
+#include "stdio_impl.h"
+#include "libc.h"
+#include <sys/mman.h>
+#include <threads.h>
+
+void *__mmap(void *, size_t, int, int, int, off_t);
+int __munmap(void *, size_t);
+int __mprotect(void *, size_t, int);
+void __thread_enable(void);
+_Noreturn void __pthread_exit(void *);
+void *__copy_tls(unsigned char *);
+extern volatile size_t __pthread_tsd_size;
+
+_Noreturn void thrd_exit(int result) {
+	__pthread_exit((void*)(intptr_t)result);
+}
+
+static void dummy_0()
+{
+}
+weak_alias(dummy_0, __acquire_ptc);
+weak_alias(dummy_0, __release_ptc);
+
+static int start(void *p)
+{
+	thrd_t self = p;
+	int (*start)(void*) = (int(*)(void*)) self->start;
+	thrd_exit(start(self->start_arg));
+	return 0;
+}
+
+#define ROUND(x) (((x)+PAGE_SIZE-1)&-PAGE_SIZE)
+
+int thrd_create(thrd_t *res, thrd_start_t entry, void *arg)
+{
+	int ret = -ENOMEM;
+	size_t guard = ROUND(DEFAULT_GUARD_SIZE);
+	size_t size = guard + ROUND(DEFAULT_STACK_SIZE + libc.tls_size +  __pthread_tsd_size);
+	struct pthread *self, *new;
+	unsigned char *map, *stack, *tsd, *stack_limit;
+	unsigned flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND
+		| CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS
+		| CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | CLONE_DETACHED;
+
+	if (!libc.can_do_threads) return thrd_error;
+	self = __pthread_self();
+	if (!libc.threaded) __thread_enable();
+
+	__acquire_ptc();
+
+	if (guard) {
+		map = __mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
+		if (map == MAP_FAILED) goto CLEANUP;
+		if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
+			__munmap(map, size);
+			goto CLEANUP;
+		}
+	} else {
+		map = __mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+		if (map == MAP_FAILED) goto CLEANUP;
+	}
+	tsd = map + size - __pthread_tsd_size;
+	stack = tsd - libc.tls_size;
+	stack_limit = map + guard;
+
+	new = __copy_tls(tsd - libc.tls_size);
+	new->map_base = map;
+	new->map_size = size;
+	new->stack = stack;
+	new->stack_size = stack - stack_limit;
+	new->start = (void *(*)(void*))entry;
+	new->start_arg = arg;
+	new->self = new;
+	new->tsd = (void *)tsd;
+	new->locale = &libc.global_locale;
+	new->unblock_cancel = self->cancel;
+	new->canary = self->canary;
+
+	a_inc(&libc.threads_minus_1);
+	ret = __clone(start, stack, flags, new, &new->tid, TP_ADJ(new), &new->tid);
+
+ CLEANUP:
+	__release_ptc();
+
+	if (ret > 0) {
+          *res = new;
+          ret = thrd_success;
+        } else if (ret == -ENOMEM) {
+          ret = thrd_nomem;
+        } else {
+          a_dec(&libc.threads_minus_1);
+          if (map) __munmap(map, size);
+          ret = thrd_error;
+	}
+        return ret;
+}
diff --git a/src/thread/thrd_current.c b/src/thread/thrd_current.c
new file mode 100644
index 0000000..1728535
--- /dev/null
+++ b/src/thread/thrd_current.c
@@ -0,0 +1,11 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+/* Not all arch have __pthread_self as a symbol. On some this results
+   in some magic. So this "call" to __pthread_self is not necessary a
+   tail call. In particular, other than the appearance, thrd_current
+   can not be implemented as a weak symbol. */
+pthread_t thrd_current()
+{
+	return __pthread_self();
+}
diff --git a/src/thread/thrd_detach.c b/src/thread/thrd_detach.c
new file mode 100644
index 0000000..211b8fb
--- /dev/null
+++ b/src/thread/thrd_detach.c
@@ -0,0 +1,18 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __munmap(void *, size_t);
+
+int thrd_detach(thrd_t t)
+{
+	/* Cannot detach a thread that's already exiting */
+	if (a_swap(t->exitlock, 1)){
+		int tmp;
+		while ((tmp = t->tid)) __timedwait(&t->tid, tmp, 0, 0, 0, 0, 0);
+		if (t->map_base) __munmap(t->map_base, t->map_size);
+	} else {
+		t->detached = 2;
+		__unlock(t->exitlock);
+	}
+	return thrd_success;
+}
diff --git a/src/thread/thrd_equal.c b/src/thread/thrd_equal.c
new file mode 100644
index 0000000..ac49a44
--- /dev/null
+++ b/src/thread/thrd_equal.c
@@ -0,0 +1,6 @@
+#include <threads.h>
+
+int (thrd_equal)(thrd_t a, thrd_t b)
+{
+	return a==b;
+}
diff --git a/src/thread/thrd_join.c b/src/thread/thrd_join.c
new file mode 100644
index 0000000..a8c7aed
--- /dev/null
+++ b/src/thread/thrd_join.c
@@ -0,0 +1,16 @@
+#include "pthread_impl.h"
+#include <sys/mman.h>
+#include <threads.h>
+
+int __munmap(void *, size_t);
+
+/* C11 threads cannot be canceled, so there is no need for a
+   cancelation function pointer, here. */
+int thrd_join(thrd_t t, int *res)
+{
+	int tmp;
+	while ((tmp = t->tid)) __timedwait(&t->tid, tmp, 0, 0, 0, 0, 0);
+	if (res) *res = (int)(intptr_t)t->result;
+	if (t->map_base) __munmap(t->map_base, t->map_size);
+	return thrd_success;
+}
diff --git a/src/thread/tss_create.c b/src/thread/tss_create.c
new file mode 100644
index 0000000..0cbadc8
--- /dev/null
+++ b/src/thread/tss_create.c
@@ -0,0 +1,13 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_key_create(pthread_key_t *k, void (*dtor)(void *));
+
+int tss_create(tss_t *tss, tss_dtor_t dtor)
+{
+	__THRD_ABI_MARK;
+	/* Different error returns are possible. C glues them together into
+	   just failure notification. Can't be optimized to a tail call,
+	   unless thrd_error equals EAGAIN. */
+	return __pthread_key_create(tss, dtor) ? thrd_error : thrd_success;
+}
diff --git a/src/thread/tss_delete.c b/src/thread/tss_delete.c
new file mode 100644
index 0000000..d50731f
--- /dev/null
+++ b/src/thread/tss_delete.c
@@ -0,0 +1,8 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __pthread_key_delete(pthread_key_t k);
+
+void (tss_delete)(tss_t key) {
+	(void)__pthread_key_delete(key);
+}
diff --git a/src/thread/tss_set.c b/src/thread/tss_set.c
new file mode 100644
index 0000000..70c4fb7
--- /dev/null
+++ b/src/thread/tss_set.c
@@ -0,0 +1,13 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int tss_set(tss_t k, void *x)
+{
+	struct pthread *self = __pthread_self();
+	/* Avoid unnecessary COW */
+	if (self->tsd[k] != x) {
+		self->tsd[k] = x;
+		self->tsd_used = 1;
+	}
+	return thrd_success;
+}
diff --git a/src/time/thrd_sleep.c b/src/time/thrd_sleep.c
new file mode 100644
index 0000000..6570b0c
--- /dev/null
+++ b/src/time/thrd_sleep.c
@@ -0,0 +1,31 @@
+#include <time.h>
+#include <errno.h>
+#include "syscall.h"
+#include "libc.h"
+#include "threads.h"
+
+/* Roughly __syscall already returns the right thing: 0 if all went
+   well or a negative error indication, otherwise. */
+int thrd_sleep(const struct timespec *req, struct timespec *rem)
+{
+  int ret = __syscall(SYS_nanosleep, req, rem);
+  // compile time comparison, constant propagated
+  if (EINTR != 1) {
+    switch (ret) {
+    case 0: return 0;
+      /* error described by POSIX:                                    */
+      /* EINTR  The nanosleep() function was interrupted by a signal. */
+      /* The C11 wording is:                                          */
+      /* -1 if it has been interrupted by a signal                    */
+    case -EINTR:  return -1;
+      /* error described by POSIX */
+    case -EINVAL: return -2;
+      /* described for linux */
+    case -EFAULT: return -3;
+      /* No other error returns are specified. */
+    default: return INT_MIN;
+    }
+  }
+  // potentially a tail call
+  return ret;
+}
diff --git a/src/time/timespec_get.c b/src/time/timespec_get.c
new file mode 100644
index 0000000..bf78e5a
--- /dev/null
+++ b/src/time/timespec_get.c
@@ -0,0 +1,9 @@
+#include <time.h>
+
+int __clock_gettime(clockid_t clk, struct timespec *ts);
+
+/* the base argument is simply ignored, there is no other implemented
+   value than TIME_UTC. */
+int timespec_get(struct timespec * ts, int base) {
+  return __clock_gettime(CLOCK_REALTIME, ts) < 0 ? 0 : base;
+}

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-27 22:11 C threads, v. 6.2 Jens Gustedt
@ 2014-08-27 23:46 ` Rich Felker
  2014-08-28  9:40   ` Jens Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2014-08-27 23:46 UTC (permalink / raw)
  To: musl

On Thu, Aug 28, 2014 at 12:11:45AM +0200, Jens Gustedt wrote:
> diff --git a/arch/arm/bits/alltypes.h.in b/arch/arm/bits/alltypes.h.in
> index 183c4c4..333d43f 100644
> --- a/arch/arm/bits/alltypes.h.in
> +++ b/arch/arm/bits/alltypes.h.in
> @@ -19,7 +19,7 @@ TYPEDEF long time_t;
>  TYPEDEF long suseconds_t;
>  
>  TYPEDEF struct { union { int __i[9]; unsigned __s[9]; } __u; } pthread_attr_t;
> -TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
> -TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } pthread_cond_t;
> +TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } __pthread_mutex_t;
> +TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } __pthread_cond_t;
> [...]
> +TYPEDEF __pthread_cond_t pthread_cond_t;
> +TYPEDEF __pthread_cond_t cnd_t;
> +TYPEDEF __pthread_mutex_t pthread_mutex_t;
> +TYPEDEF __pthread_mutex_t mtx_t;

This changes the C++ ABI by changing the tag for the POSIX types, so
it can't be done this way. We could return to some serious language
lawyering about whether it's valid to access members via the 'wrong'
struct type when we have two identical struct types, or just assume
that's safe for now and do it.

There's also the approach of just having them both be separate struct
types that contain, as their first member, the real struct; then
pointer casts are legal and the problem is solved via a trivial
one-line addition to each function along the lines of:

	struct __actual_mutex *m = (void *)incoming_m_arg;

but I'm okay with deferring a change to do this unless/until it's
deemed a real issue.

> diff --git a/include/threads.h b/include/threads.h
> new file mode 100644
> index 0000000..cc396c3
> --- /dev/null
> +++ b/include/threads.h
> @@ -0,0 +1,170 @@
> +#ifndef _THREADS_H
> +#define _THREADS_H
> +
> +/* This one is explicitly allowed to be included. */
> +#include <time.h>
> +
> +#ifdef __GNUC__
> +#define _Nonnull(...) __attribute__((__nonnull__(__VA_ARGS__)))
> +#else
> +#define _Nonnull(...)
> +#endif

If we want to use attributes like this, it's a separate change from
adding threads. But I'm fairly much against doing it anyway since the
compiler already knows how to make these warnings for standard
functions, without help from the headers.

> +#ifndef __THRD_EXPERIMENTAL
> +# define __THRD_EXPERIMENTAL 1
> +#endif
> +
> +  /* The following list of 9 integer constants makes up for the binary
> +     compatibility of this C thread implementation. You must never
> +     link code against versions of the C library that do not agree
> +     upon these ABI parameters.
> [...]

Getting this committed to musl, or at least making the first release
with it present, should be committing to the ABI, so I don't think we
need any ABI-check type stuff here. But I am curious how it works.
__THRD_ABI_CHECK doesn't seem to be used anywhere. Is the intent just
that you would put it in a program manually?

> +enum {
> +  thrd_success = __THRD_SUCCESS,
> +  thrd_busy = __THRD_BUSY,
> +  thrd_error = __THRD_ERROR,
> +  thrd_nomem = __THRD_NOMEM,
> +  thrd_timedout = __THRD_TIMEDOUT,
> +};
> +
> +enum {
> +  mtx_plain = __MTX_PLAIN,
> +  mtx_recursive = __MTX_RECURSIVE,
> +  // all mutexes are timed, here. so this is a no-op
> +  mtx_timed = __MTX_TIMED,
> +};

Without ABI checking, I think we can just put the values here where
they're immediately visible rather than going through an extra level
of indirection with the preprocessor.

> +#define thread_local _Thread_local

This needs to be omitted for C++11+, I think.

> diff --git a/include/time.h b/include/time.h
> index dc88070..13dccaa 100644
> --- a/include/time.h
> +++ b/include/time.h
> @@ -129,6 +129,19 @@ int stime(const time_t *);
>  time_t timegm(struct tm *);
>  #endif
>  
> +#if __STDC_VERSION__ >= 201112L
> +  /* Implementation specific choice: The epoch that the TIME_UTC clock
> +     is based upon is the Unix Epoch, that is a struct timespec
> +     represents the number of seconds that have elapsed since 00:00:00
> +     Coordinated Universal Time (UTC), Thursday, 1 January
> +     1970. Because of differences in leap seconds this is not
> +     completely equivalent to UTC. */
> +  /* Beware that the TIME_UTC constant itself per the standard must be
> +     greater than 0. */
> +#define TIME_UTC 1
> +int timespec_get(struct timespec *, int);
> +#endif
> +
>  #ifdef __cplusplus
>  }
>  #endif

So far, when it comes to C11 library functionality as opposed to
compiler features that might not be available without compiler
support, the approach we've taken in musl is to expose them
unconditionally. This makes it possible to use the library features
even on compilers without explicit C11 support. Arguments could be
made either way on this but I think we should be consistent. It might
also be more appropriate to factor out timespec_get as a separate
patch since it's nominally separate from threads, although I don't
care much either way. Presumably the main use of it is for arguments
to timedwait/timedlock operations.

> diff --git a/src/mman/mprotect.c b/src/mman/mprotect.c
> index f488486..535787b 100644
> --- a/src/mman/mprotect.c
> +++ b/src/mman/mprotect.c
> @@ -2,10 +2,12 @@
>  #include "libc.h"
>  #include "syscall.h"
>  
> -int mprotect(void *addr, size_t len, int prot)
> +int __mprotect(void *addr, size_t len, int prot)
>  {
>  	size_t start, end;
>  	start = (size_t)addr & -PAGE_SIZE;
>  	end = (size_t)((char *)addr + len + PAGE_SIZE-1) & -PAGE_SIZE;
>  	return syscall(SYS_mprotect, start, end-start, prot);
>  }
> +
> +weak_alias(__mprotect, mprotect);

Here it might be nicer to just use __syscall(SYS_mprotect...) directly
in pthread_create. We don't need the extra page alignment overhead
there, only for the public function, and __syscall is lighter than a
function call anyway.

> diff --git a/src/thread/call_once.c b/src/thread/call_once.c
> new file mode 100644
> index 0000000..742a707
> --- /dev/null
> +++ b/src/thread/call_once.c
> @@ -0,0 +1,9 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int __pthread_once(once_flag *, void (*)(void));
> +
> +void (call_once)(once_flag *flag, void (*func)(void)) {
> +	__THRD_ABI_MARK;
> +	(void)__pthread_once(flag, func);
> +}

We have questions about cancellation and longjmp, but I think it's
okay to leave it like this for now and address those later.

> diff --git a/src/thread/cnd_broadcast.c b/src/thread/cnd_broadcast.c
> new file mode 100644
> index 0000000..b1db667
> --- /dev/null
> +++ b/src/thread/cnd_broadcast.c
> @@ -0,0 +1,9 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int __pthread_cond_broadcast(pthread_cond_t *);
> +
> +int cnd_broadcast(cnd_t * cnd) {
> +	int ret = __pthread_cond_broadcast(cnd);
> +	return ret ? thrd_error : thrd_success;
> +}

This could/should actually be a separate function using
__private_signal from pthread_cond_timedwait.c, I think. There's no
reason to pull in the extra process-shared broadcast code from
pthread_cond_broadcast. Right now the latter is small anyway, but it
could possibly become larger if we try to fix some of the problems
with process-shared cond vars (which are hard to fix, unfortunately).

Also I'm fine with omitting the error conversion logic since the
called function is implemented so as never to return an error (and in
fact POSIX defines no errors, so it would have to be an additional
implementation-defined error, which musl strongly avoids). If you want
this to be more clear, a short comment should probably suffice without
dead code to convert error numbers that never occur.

> diff --git a/src/thread/cnd_destroy.c b/src/thread/cnd_destroy.c
> new file mode 100644
> index 0000000..11cfc19
> --- /dev/null
> +++ b/src/thread/cnd_destroy.c
> @@ -0,0 +1,17 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int __pthread_cond_destroy(pthread_cond_t *);
> +
> +/* The behavior of cnd_destroy is undefined if cnd is still in
> +   use. The choice for pthread_cond_destroy in that situation is to
> +   wake up all users before destroying. I am not sure that we should
> +   do it like that here, too. Alternatives would be:
> +   - complain by using perror or equivalent
> +   - assert that there is no waiter
> +   - abort when there is a waiter
> +   - do nothing
> +   */
> +void (cnd_destroy)(cnd_t *cnd) {
> +	(void)__pthread_cond_destroy(cnd);
> +}

This can just be a NOP now. The non-NOP pthread_cond_destroy is only
for process-shared cond vars. Non-shared ones do not need any action
for destruction.

> diff --git a/src/thread/cnd_timedwait.c b/src/thread/cnd_timedwait.c
> new file mode 100644
> index 0000000..cce47a4
> --- /dev/null
> +++ b/src/thread/cnd_timedwait.c
> @@ -0,0 +1,14 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m, const struct timespec *restrict ts);
> +
> +int cnd_timedwait(cnd_t *restrict cond, mtx_t *restrict mutex, const struct timespec *restrict ts) {
> +	int ret = __pthread_cond_timedwait(cond, mutex, ts);
> +	switch (ret) {
> +	/* May also return EINVAL or EPERM. */
> +	default:        return thrd_error;
> +	case 0:         return thrd_success;
> +	case ETIMEDOUT: return thrd_timedout;
> +	}
> +}

C11 isn't clear on what happens when the timespec is non-normalized.
Should we still just treat this as an error, or attempt to normalize
it first? While EPERM also may happen, the cases that would lead to
this seem to be UB, but I'm fine with your above mapping of it to an
error.

> diff --git a/src/thread/mtx_init.c b/src/thread/mtx_init.c
> new file mode 100644
> index 0000000..827a56a
> --- /dev/null
> +++ b/src/thread/mtx_init.c
> @@ -0,0 +1,10 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int mtx_init(mtx_t * m, int type)
> +{
> +	*m = (pthread_mutex_t) {
> +		._m_type = ((type&mtx_recursive) ? PTHREAD_MUTEX_RECURSIVE : 0),
> +	};
> +	return thrd_success;
> +}

I'm okay with a 0 here since we assume all over that {0} init for a
mutex is a default type mutex, but you might prefer something like
PTHREAD_MUTEX_NORMAL here instead of 0.

> diff --git a/src/thread/mtx_lock.c b/src/thread/mtx_lock.c
> new file mode 100644
> index 0000000..89730f1
> --- /dev/null
> +++ b/src/thread/mtx_lock.c
> @@ -0,0 +1,14 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int mtx_lock(mtx_t *m)
> +{
> +	__THRD_ABI_MARK;
> +	if (m->_m_type == PTHREAD_MUTEX_NORMAL && !a_cas(&m->_m_lock, 0, EBUSY))
> +		return thrd_success;
> +	/* Calling mtx_timedlock with a null pointer is an
> +	   extension. Such a call is convenient, here, since it avoids
> +	   to repeat the case analysis that is already done for
> +	   mtx_timedlock. */
> +	return mtx_timedlock(m, 0);
> +}
> diff --git a/src/thread/mtx_timedlock.c b/src/thread/mtx_timedlock.c
> new file mode 100644
> index 0000000..c42a096
> --- /dev/null
> +++ b/src/thread/mtx_timedlock.c
> @@ -0,0 +1,19 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict ts);
> +
> +int mtx_timedlock(mtx_t *restrict mutex, const struct timespec *restrict ts) {

It might be worth duplicating the a_cas attempt here for normal-type;
I'm not sure.

> +	/* In the best of all worlds this is a tail call. */
> +	int ret = __pthread_mutex_timedlock(mutex, ts);
> +	switch (ret) {
> +	/* May also return EINVAL, EPERM, EDEADLK or EAGAIN. EAGAIN is
> +	   specially tricky since C11 doesn't define how many recursive
> +	   calls can be done. (this *isn't* the maximum amount of nested
> +	   calls!) This implementation here deals this with a counter and
> +	   detects overflow, so this is definitively UB. */
> +	default:        return thrd_error;
> +	case 0:         return thrd_success;
> +	case ETIMEDOUT: return thrd_timedout;
> +	}
> +}

I don't think EPERM is possible for locking. And EDEADLK can't happen
with mutex types used for C11. So I think only EINVAL (invalid
timespec) and EAGAIN (recursive lock count overflow) apply. The code
looks fine; this is just a comment on the comments.

> diff --git a/src/thread/mtx_trylock.c b/src/thread/mtx_trylock.c
> new file mode 100644
> index 0000000..5fd823a
> --- /dev/null
> +++ b/src/thread/mtx_trylock.c
> @@ -0,0 +1,22 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int __pthread_mutex_trylock(pthread_mutex_t *restrict m);
> +
> +int mtx_trylock(mtx_t *restrict m) {
> +	if (m->_m_type == PTHREAD_MUTEX_NORMAL)
> +		return (a_cas(&m->_m_lock, 0, EBUSY) & EBUSY) ? thrd_busy : thrd_success;
> +
> +	/* In the best of all worlds this is a tail call. */
> +	int ret = __pthread_mutex_trylock(m);
> +	switch (ret) {
> +	/* In case of UB may also return EINVAL, EPERM, EDEADLK or EAGAIN. EAGAIN is
> +	   specially tricky since C11 doesn't define how many recursive
> +	   calls can be done. (this *isn't* the maximum amount of nested
> +	   calls!) This implementation here deals this with a counter and
> +	   detects overflow, so this is definitively UB. */
> +	default:    return thrd_error;
> +	case 0:     return thrd_success;
> +	case EBUSY: return thrd_busy;
> +	}
> +}

Same here.

> diff --git a/src/thread/mtx_unlock.c b/src/thread/mtx_unlock.c
> new file mode 100644
> index 0000000..d1721a6
> --- /dev/null
> +++ b/src/thread/mtx_unlock.c
> @@ -0,0 +1,11 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int __pthread_mutex_unlock(pthread_mutex_t *);
> +
> +int (mtx_unlock)(mtx_t *mtx) {
> +	/* In the best of all worlds this is a tail call. */
> +	int ret = __pthread_mutex_unlock(mtx);
> +	/* In case of UB may also return EPERM. */
> +	return ret ? thrd_error : thrd_success;
> +}

For unlock, C11 makes it UB to unlock a mutex you don't own (even for
recursive type), so the tail call would be legal. I don't have a
strong opinion either way here.

> diff --git a/src/thread/pthread_cond_broadcast.c b/src/thread/pthread_cond_broadcast.c
> index 69f840f..f53b41d 100644
> --- a/src/thread/pthread_cond_broadcast.c
> +++ b/src/thread/pthread_cond_broadcast.c
> @@ -2,7 +2,7 @@
>  
>  int __private_cond_signal(pthread_cond_t *, int);
>  
> -int pthread_cond_broadcast(pthread_cond_t *c)
> +int __pthread_cond_broadcast(pthread_cond_t *c)
>  {
>  	if (!c->_c_shared) return __private_cond_signal(c, -1);
>  	if (!c->_c_waiters) return 0;
> @@ -10,3 +10,5 @@ int pthread_cond_broadcast(pthread_cond_t *c)
>  	__wake(&c->_c_seq, -1, 0);
>  	return 0;
>  }
> +
> +weak_alias(__pthread_cond_broadcast, pthread_cond_broadcast);

This can be avoided if the C11 function doesn't call here but calls
__private_cond_signal directly.

> diff --git a/src/thread/pthread_cond_destroy.c b/src/thread/pthread_cond_destroy.c
> index 8c55516..b2abeb8 100644
> --- a/src/thread/pthread_cond_destroy.c
> +++ b/src/thread/pthread_cond_destroy.c
> @@ -1,6 +1,6 @@
>  #include "pthread_impl.h"
>  
> -int pthread_cond_destroy(pthread_cond_t *c)
> +int __pthread_cond_destroy(pthread_cond_t *c)
>  {
>  	if (c->_c_shared && c->_c_waiters) {
>  		int cnt;
> @@ -12,3 +12,5 @@ int pthread_cond_destroy(pthread_cond_t *c)
>  	}
>  	return 0;
>  }
> +
> +weak_alias(__pthread_cond_destroy, pthread_cond_destroy);

This can be avoided since destruction for private cv is a NOP.

> diff --git a/src/thread/pthread_cond_signal.c b/src/thread/pthread_cond_signal.c
> index 119c00a..d1d3c77 100644
> --- a/src/thread/pthread_cond_signal.c
> +++ b/src/thread/pthread_cond_signal.c
> @@ -2,7 +2,7 @@
>  
>  int __private_cond_signal(pthread_cond_t *, int);
>  
> -int pthread_cond_signal(pthread_cond_t *c)
> +int __pthread_cond_signal(pthread_cond_t *c)
>  {
>  	if (!c->_c_shared) return __private_cond_signal(c, 1);
>  	if (!c->_c_waiters) return 0;
> @@ -10,3 +10,5 @@ int pthread_cond_signal(pthread_cond_t *c)
>  	__wake(&c->_c_seq, 1, 0);
>  	return 0;
>  }
> +
> +weak_alias(__pthread_cond_signal, pthread_cond_signal);

And likewise this can be avoided using __private_cond_signal directly.

> diff --git a/src/thread/pthread_cond_timedwait.c b/src/thread/pthread_cond_timedwait.c
> index 2d192b0..393d60e 100644
> --- a/src/thread/pthread_cond_timedwait.c
> +++ b/src/thread/pthread_cond_timedwait.c
> @@ -1,5 +1,9 @@
>  #include "pthread_impl.h"
>  
> +void __pthread_testcancel(void);
> +int __pthread_mutex_lock(pthread_mutex_t *);
> +int __pthread_mutex_unlock(pthread_mutex_t *m);
> +
>  /*
>   * struct waiter
>   *
> @@ -119,7 +123,7 @@ static void unwait(void *arg)
>  	}
>  }
>  
> -int pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m, const struct timespec *restrict ts)
> +int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m, const struct timespec *restrict ts)
>  {
>  	struct waiter node = { .cond = c, .mutex = m };
>  	int e, seq, *fut, clock = c->_c_clock;
> @@ -130,7 +134,7 @@ int pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict
>  	if (ts && ts->tv_nsec >= 1000000000UL)
>  		return EINVAL;
>  
> -	pthread_testcancel();
> +	__pthread_testcancel();
>  
>  	if (c->_c_shared) {
>  		node.shared = 1;
> @@ -151,7 +155,7 @@ int pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict
>  		unlock(&c->_c_lock);
>  	}
>  
> -	pthread_mutex_unlock(m);
> +	__pthread_mutex_unlock(m);
>  
>  	do e = __timedwait(fut, seq, clock, ts, unwait, &node, !node.shared);
>  	while (*fut==seq && (!e || e==EINTR));
> @@ -197,3 +201,5 @@ int __private_cond_signal(pthread_cond_t *c, int n)
>  
>  	return 0;
>  }
> +
> +weak_alias(__pthread_cond_timedwait, pthread_cond_timedwait);

All this looks necessary and okay.

> diff --git a/src/thread/pthread_cond_wait.c b/src/thread/pthread_cond_wait.c
> index 8735bf1..58656f7 100644
> --- a/src/thread/pthread_cond_wait.c
> +++ b/src/thread/pthread_cond_wait.c
> @@ -1,6 +1,8 @@
>  #include "pthread_impl.h"
>  
> +int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m, const struct timespec *restrict ts);
> +
>  int pthread_cond_wait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m)
>  {
> -	return pthread_cond_timedwait(c, m, 0);
> +	return __pthread_cond_timedwait(c, m, 0);
>  }

This is unnecessary.

> diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
> index e441bda..193b295 100644
> --- a/src/thread/pthread_create.c
> +++ b/src/thread/pthread_create.c
> @@ -4,102 +4,21 @@
>  #include "libc.h"
>  #include <sys/mman.h>
>  #include <string.h>
> +#include <threads.h>
> +
> +void *__mmap(void *, size_t, int, int, int, off_t);
> +int __munmap(void *, size_t);
> +int __mprotect(void *, size_t, int);
> +void __thread_enable(void);
> +_Noreturn void __pthread_exit(void *);
> +void *__copy_tls(unsigned char *);
> +extern volatile size_t __pthread_tsd_size;
>  
>  static void dummy_0()
>  {
>  }
>  weak_alias(dummy_0, __acquire_ptc);
>  weak_alias(dummy_0, __release_ptc);
> -weak_alias(dummy_0, __pthread_tsd_run_dtors);
> -weak_alias(dummy_0, __do_private_robust_list);
> -weak_alias(dummy_0, __do_orphaned_stdio_locks);
> -
> -_Noreturn void pthread_exit(void *result)
> -{
> -	pthread_t self = __pthread_self();
> -	sigset_t set;
> -
> -	self->result = result;
> -
> -	while (self->cancelbuf) {
> -		void (*f)(void *) = self->cancelbuf->__f;
> -		void *x = self->cancelbuf->__x;
> -		self->cancelbuf = self->cancelbuf->__next;
> -		f(x);
> -	}
> -
> -	__pthread_tsd_run_dtors();
> -
> -	__lock(self->exitlock);
> -
> -	/* Mark this thread dead before decrementing count */
> -	__lock(self->killlock);
> -	self->dead = 1;
> -
> -	/* Block all signals before decrementing the live thread count.
> -	 * This is important to ensure that dynamically allocated TLS
> -	 * is not under-allocated/over-committed, and possibly for other
> -	 * reasons as well. */
> -	__block_all_sigs(&set);
> -
> -	/* Wait to unlock the kill lock, which governs functions like
> -	 * pthread_kill which target a thread id, until signals have
> -	 * been blocked. This precludes observation of the thread id
> -	 * as a live thread (with application code running in it) after
> -	 * the thread was reported dead by ESRCH being returned. */
> -	__unlock(self->killlock);
> -
> -	/* It's impossible to determine whether this is "the last thread"
> -	 * until performing the atomic decrement, since multiple threads
> -	 * could exit at the same time. For the last thread, revert the
> -	 * decrement and unblock signals to give the atexit handlers and
> -	 * stdio cleanup code a consistent state. */
> -	if (a_fetch_add(&libc.threads_minus_1, -1)==0) {
> -		libc.threads_minus_1 = 0;
> -		__restore_sigs(&set);
> -		exit(0);
> -	}
> -
> -	if (self->locale != &libc.global_locale) {
> -		a_dec(&libc.uselocale_cnt);
> -		if (self->locale->ctype_utf8)
> -			a_dec(&libc.bytelocale_cnt_minus_1);
> -	}
> -
> -	__do_private_robust_list();
> -	__do_orphaned_stdio_locks();
> -
> -	if (self->detached && self->map_base) {
> -		/* Detached threads must avoid the kernel clear_child_tid
> -		 * feature, since the virtual address will have been
> -		 * unmapped and possibly already reused by a new mapping
> -		 * at the time the kernel would perform the write. In
> -		 * the case of threads that started out detached, the
> -		 * initial clone flags are correct, but if the thread was
> -		 * detached later (== 2), we need to clear it here. */
> -		if (self->detached == 2) __syscall(SYS_set_tid_address, 0);
> -
> -		/* The following call unmaps the thread's stack mapping
> -		 * and then exits without touching the stack. */
> -		__unmapself(self->map_base, self->map_size);
> -	}
> -
> -	for (;;) __syscall(SYS_exit, 0);
> -}
> -
> -void __do_cleanup_push(struct __ptcb *cb)
> -{
> -	if (!libc.has_thread_pointer) return;
> -	struct pthread *self = __pthread_self();
> -	cb->__next = self->cancelbuf;
> -	self->cancelbuf = cb;
> -}
> -
> -void __do_cleanup_pop(struct __ptcb *cb)
> -{
> -	if (!libc.has_thread_pointer) return;
> -	__pthread_self()->cancelbuf = cb->__next;
> -}
>  
>  static int start(void *p)
>  {
> @@ -121,28 +40,10 @@ static int start(void *p)
>  
>  #define ROUND(x) (((x)+PAGE_SIZE-1)&-PAGE_SIZE)
>  
> -/* pthread_key_create.c overrides this */
> -static volatile size_t dummy = 0;
> -weak_alias(dummy, __pthread_tsd_size);
> -static void *dummy_tsd[1] = { 0 };
> -weak_alias(dummy_tsd, __pthread_tsd_main);
> -
> -static FILE *volatile dummy_file = 0;
> -weak_alias(dummy_file, __stdin_used);
> -weak_alias(dummy_file, __stdout_used);
> -weak_alias(dummy_file, __stderr_used);
> -
> -static void init_file_lock(FILE *f)
> -{
> -	if (f && f->lock<0) f->lock = 0;
> -}
> -
> -void *__copy_tls(unsigned char *);
> -
>  int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp, void *(*entry)(void *), void *restrict arg)
>  {
>  	int ret;
> -	size_t size, guard;
> +	size_t size, guard = 0;
>  	struct pthread *self, *new;
>  	unsigned char *map = 0, *stack = 0, *tsd = 0, *stack_limit;
>  	unsigned flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND
> @@ -153,16 +54,7 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp
>  
>  	if (!libc.can_do_threads) return ENOSYS;
>  	self = __pthread_self();
> -	if (!libc.threaded) {
> -		for (FILE *f=libc.ofl_head; f; f=f->next)
> -			init_file_lock(f);
> -		init_file_lock(__stdin_used);
> -		init_file_lock(__stdout_used);
> -		init_file_lock(__stderr_used);
> -		__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
> -		self->tsd = (void **)__pthread_tsd_main;
> -		libc.threaded = 1;
> -	}
> +	if (!libc.threaded) __thread_enable();
>  	if (attrp) attr = *attrp;
>  
>  	__acquire_ptc();
> @@ -191,14 +83,14 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp
>  
>  	if (!tsd) {
>  		if (guard) {
> -			map = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
> +			map = __mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
>  			if (map == MAP_FAILED) goto fail;
> -			if (mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
> -				munmap(map, size);
> +			if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
> +				__munmap(map, size);
>  				goto fail;
>  			}
>  		} else {
> -			map = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
> +			map = __mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
>  			if (map == MAP_FAILED) goto fail;
>  		}
>  		tsd = map + size - __pthread_tsd_size;
> @@ -240,7 +132,7 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp
>  
>  	if (ret < 0) {
>  		a_dec(&libc.threads_minus_1);
> -		if (map) munmap(map, size);
> +		if (map) __munmap(map, size);
>  		return EAGAIN;
>  	}
>  

See comments below on thrd_create.

> diff --git a/src/thread/pthread_detach.c b/src/thread/pthread_detach.c
> index 651c38e..5ca7855 100644
> --- a/src/thread/pthread_detach.c
> +++ b/src/thread/pthread_detach.c
> @@ -1,11 +1,15 @@
>  #include "pthread_impl.h"
>  
> -int pthread_detach(pthread_t t)
> +int __pthread_join(pthread_t t, void **res);
> +
> +int __pthread_detach(pthread_t t)
>  {
>  	/* Cannot detach a thread that's already exiting */
>  	if (a_swap(t->exitlock, 1))
> -		return pthread_join(t, 0);
> +		return __pthread_join(t, 0);
>  	t->detached = 2;
>  	__unlock(t->exitlock);
>  	return 0;
>  }
> +
> +weak_alias(__pthread_detach, pthread_detach);

OK.

> diff --git a/src/thread/pthread_equal.c b/src/thread/pthread_equal.c
> index 3e3df4f..38fb45e 100644
> --- a/src/thread/pthread_equal.c
> +++ b/src/thread/pthread_equal.c
> @@ -1,4 +1,4 @@
> -#include <pthread.h>
> +#include "pthread_impl.h"
>  
>  int (pthread_equal)(pthread_t a, pthread_t b)
>  {

This does not seem like a useful change and just increases compile
time.

> diff --git a/src/thread/pthread_exit.c b/src/thread/pthread_exit.c
> new file mode 100644
> index 0000000..4f51269
> --- /dev/null
> +++ b/src/thread/pthread_exit.c
> @@ -0,0 +1,130 @@
> +#define _GNU_SOURCE
> +#include "pthread_impl.h"
> +#include "stdio_impl.h"
> +#include "libc.h"
> +#include <sys/mman.h>
> +#include <threads.h>
> +
> +static void dummy_0()
> +{
> +}
> +weak_alias(dummy_0, __pthread_tsd_run_dtors);
> +weak_alias(dummy_0, __do_private_robust_list);
> +weak_alias(dummy_0, __do_orphaned_stdio_locks);
> +
> +_Noreturn void __pthread_exit(void *result)
> +{
> +	pthread_t self = __pthread_self();
> +	sigset_t set;
> +
> +	self->result = result;
> +
> +	while (self->cancelbuf) {
> +		void (*f)(void *) = self->cancelbuf->__f;
> +		void *x = self->cancelbuf->__x;
> +		self->cancelbuf = self->cancelbuf->__next;
> +		f(x);
> +	}
> +
> +	__pthread_tsd_run_dtors();
> +
> +	__lock(self->exitlock);
> +
> +	/* Mark this thread dead before decrementing count */
> +	__lock(self->killlock);
> +	self->dead = 1;
> +
> +	/* Block all signals before decrementing the live thread count.
> +	 * This is important to ensure that dynamically allocated TLS
> +	 * is not under-allocated/over-committed, and possibly for other
> +	 * reasons as well. */
> +	__block_all_sigs(&set);
> +
> +	/* Wait to unlock the kill lock, which governs functions like
> +	 * pthread_kill which target a thread id, until signals have
> +	 * been blocked. This precludes observation of the thread id
> +	 * as a live thread (with application code running in it) after
> +	 * the thread was reported dead by ESRCH being returned. */
> +	__unlock(self->killlock);
> +
> +	/* It's impossible to determine whether this is "the last thread"
> +	 * until performing the atomic decrement, since multiple threads
> +	 * could exit at the same time. For the last thread, revert the
> +	 * decrement and unblock signals to give the atexit handlers and
> +	 * stdio cleanup code a consistent state. */
> +	if (a_fetch_add(&libc.threads_minus_1, -1)==0) {
> +		libc.threads_minus_1 = 0;
> +		__restore_sigs(&set);
> +		exit(0);
> +	}
> +
> +	if (self->locale != &libc.global_locale) {
> +		a_dec(&libc.uselocale_cnt);
> +		if (self->locale->ctype_utf8)
> +			a_dec(&libc.bytelocale_cnt_minus_1);
> +	}
> +
> +	__do_private_robust_list();
> +	__do_orphaned_stdio_locks();
> +
> +	if (self->detached && self->map_base) {
> +		/* Detached threads must avoid the kernel clear_child_tid
> +		 * feature, since the virtual address will have been
> +		 * unmapped and possibly already reused by a new mapping
> +		 * at the time the kernel would perform the write. In
> +		 * the case of threads that started out detached, the
> +		 * initial clone flags are correct, but if the thread was
> +		 * detached later (== 2), we need to clear it here. */
> +		if (self->detached == 2) __syscall(SYS_set_tid_address, 0);
> +
> +		/* The following call unmaps the thread's stack mapping
> +		 * and then exits without touching the stack. */
> +		__unmapself(self->map_base, self->map_size);
> +	}
> +
> +	for (;;) __syscall(SYS_exit, 0);
> +}
> +
> +void __do_cleanup_push(struct __ptcb *cb)
> +{
> +	if (!libc.has_thread_pointer) return;
> +	struct pthread *self = __pthread_self();
> +	cb->__next = self->cancelbuf;
> +	self->cancelbuf = cb;
> +}
> +
> +void __do_cleanup_pop(struct __ptcb *cb)
> +{
> +	if (!libc.has_thread_pointer) return;
> +	__pthread_self()->cancelbuf = cb->__next;
> +}
> +
> +/* pthread_key_create.c overrides this */
> +static volatile size_t dummy = 0;
> +weak_alias(dummy, __pthread_tsd_size);
> +static void *dummy_tsd[1] = { 0 };
> +weak_alias(dummy_tsd, __pthread_tsd_main);
> +
> +static FILE *volatile dummy_file = 0;
> +weak_alias(dummy_file, __stdin_used);
> +weak_alias(dummy_file, __stdout_used);
> +weak_alias(dummy_file, __stderr_used);
> +
> +static void init_file_lock(FILE *f)
> +{
> +	if (f && f->lock<0) f->lock = 0;
> +}
> +
> +void __thread_enable(void)
> +{
> +	for (FILE *f=libc.ofl_head; f; f=f->next)
> +		init_file_lock(f);
> +	init_file_lock(__stdin_used);
> +	init_file_lock(__stdout_used);
> +	init_file_lock(__stderr_used);
> +	__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
> +	__pthread_self()->tsd = (void **)__pthread_tsd_main;
> +	libc.threaded = 1;
> +}
> +
> +weak_alias(__pthread_exit, pthread_exit);

See comments below on thrd_create.

> diff --git a/src/thread/pthread_getspecific.c b/src/thread/pthread_getspecific.c
> index b2a282c..bfc4294 100644
> --- a/src/thread/pthread_getspecific.c
> +++ b/src/thread/pthread_getspecific.c
> @@ -1,7 +1,10 @@
>  #include "pthread_impl.h"
>  
> -void *pthread_getspecific(pthread_key_t k)
> +static void *__pthread_getspecific(pthread_key_t k)
>  {
>  	struct pthread *self = __pthread_self();
>  	return self->tsd[k];
>  }
> +
> +weak_alias(__pthread_getspecific, pthread_getspecific);
> +weak_alias(__pthread_getspecific, tss_get);

Assuming we accept that multiple standard functions are allowed to
have the same address, I think this is probably okay. (IIRC there's a
DR that implies this is okay, and if so, we should probably apply it
to math later too.)


> diff --git a/src/thread/pthread_join.c b/src/thread/pthread_join.c
> index 719c91c..bca89aa 100644
> --- a/src/thread/pthread_join.c
> +++ b/src/thread/pthread_join.c
> @@ -1,15 +1,19 @@
>  #include "pthread_impl.h"
>  #include <sys/mman.h>
>  
> +int __munmap(void *, size_t);
> +
>  static void dummy(void *p)
>  {
>  }
>  
> -int pthread_join(pthread_t t, void **res)
> +int __pthread_join(pthread_t t, void **res)
>  {
>  	int tmp;
>  	while ((tmp = t->tid)) __timedwait(&t->tid, tmp, 0, 0, dummy, 0, 0);
>  	if (res) *res = t->result;
> -	if (t->map_base) munmap(t->map_base, t->map_size);
> +	if (t->map_base) __munmap(t->map_base, t->map_size);
>  	return 0;
>  }
> +
> +weak_alias(__pthread_join, pthread_join);

OK.

> diff --git a/src/thread/pthread_key_create.c b/src/thread/pthread_key_create.c
> index a9187f7..bfcd597 100644
> --- a/src/thread/pthread_key_create.c
> +++ b/src/thread/pthread_key_create.c
> @@ -9,7 +9,7 @@ static void nodtor(void *dummy)
>  {
>  }
>  
> -int pthread_key_create(pthread_key_t *k, void (*dtor)(void *))
> +int __pthread_key_create(pthread_key_t *k, void (*dtor)(void *))
>  {
>  	unsigned i = (uintptr_t)&k / 16 % PTHREAD_KEYS_MAX;
>  	unsigned j = i;
> @@ -31,7 +31,7 @@ int pthread_key_create(pthread_key_t *k, void (*dtor)(void *))
>  	return EAGAIN;
>  }
>  
> -int pthread_key_delete(pthread_key_t k)
> +int __pthread_key_delete(pthread_key_t k)
>  {
>  	keys[k] = 0;
>  	return 0;
> @@ -53,3 +53,6 @@ void __pthread_tsd_run_dtors()
>  		}
>  	}
>  }
> +
> +weak_alias(__pthread_key_delete, pthread_key_delete);
> +weak_alias(__pthread_key_create, pthread_key_create);

Looks ok.

> diff --git a/src/thread/pthread_mutex_lock.c b/src/thread/pthread_mutex_lock.c
> index 2a9a3aa..993f54c 100644
> --- a/src/thread/pthread_mutex_lock.c
> +++ b/src/thread/pthread_mutex_lock.c
> @@ -1,10 +1,14 @@
>  #include "pthread_impl.h"
>  
> -int pthread_mutex_lock(pthread_mutex_t *m)
> +int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict at);
> +
> +int __pthread_mutex_lock(pthread_mutex_t *m)
>  {
>  	if ((m->_m_type&15) == PTHREAD_MUTEX_NORMAL
>  	    && !a_cas(&m->_m_lock, 0, EBUSY))
>  		return 0;
>  
> -	return pthread_mutex_timedlock(m, 0);
> +	return __pthread_mutex_timedlock(m, 0);
>  }
> +
> +weak_alias(__pthread_mutex_lock, pthread_mutex_lock);

OK.

> diff --git a/src/thread/pthread_mutex_timedlock.c b/src/thread/pthread_mutex_timedlock.c
> index ae883f9..16241ee 100644
> --- a/src/thread/pthread_mutex_timedlock.c
> +++ b/src/thread/pthread_mutex_timedlock.c
> @@ -1,6 +1,6 @@
>  #include "pthread_impl.h"
>  
> -int pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict at)
> +int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict at)
>  {
>  	if ((m->_m_type&15) == PTHREAD_MUTEX_NORMAL
>  	    && !a_cas(&m->_m_lock, 0, EBUSY))
> @@ -30,3 +30,5 @@ int pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *
>  	}
>  	return r;
>  }
> +
> +weak_alias(__pthread_mutex_timedlock, pthread_mutex_timedlock);

OK.

> diff --git a/src/thread/pthread_mutex_trylock.c b/src/thread/pthread_mutex_trylock.c
> index e851517..9be7930 100644
> --- a/src/thread/pthread_mutex_trylock.c
> +++ b/src/thread/pthread_mutex_trylock.c
> @@ -50,9 +50,11 @@ int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
>  	return 0;
>  }
>  
> -int pthread_mutex_trylock(pthread_mutex_t *m)
> +static int __pthread_mutex_trylock(pthread_mutex_t *m)
>  {
>  	if ((m->_m_type&15) == PTHREAD_MUTEX_NORMAL)
>  		return a_cas(&m->_m_lock, 0, EBUSY) & EBUSY;
>  	return __pthread_mutex_trylock_owner(m);
>  }
> +
> +weak_alias(__pthread_mutex_trylock, pthread_mutex_trylock);

OK.

> diff --git a/src/thread/pthread_mutex_unlock.c b/src/thread/pthread_mutex_unlock.c
> index 46761d9..a7f39c7 100644
> --- a/src/thread/pthread_mutex_unlock.c
> +++ b/src/thread/pthread_mutex_unlock.c
> @@ -3,7 +3,7 @@
>  void __vm_lock_impl(int);
>  void __vm_unlock_impl(void);
>  
> -int pthread_mutex_unlock(pthread_mutex_t *m)
> +int __pthread_mutex_unlock(pthread_mutex_t *m)
>  {
>  	pthread_t self;
>  	int waiters = m->_m_waiters;
> @@ -36,3 +36,5 @@ int pthread_mutex_unlock(pthread_mutex_t *m)
>  		__wake(&m->_m_lock, 1, priv);
>  	return 0;
>  }
> +
> +weak_alias(__pthread_mutex_unlock, pthread_mutex_unlock);

OK.

> diff --git a/src/thread/pthread_once.c b/src/thread/pthread_once.c
> index 2eb0f93..05ebe69 100644
> --- a/src/thread/pthread_once.c
> +++ b/src/thread/pthread_once.c
> @@ -6,7 +6,7 @@ static void undo(void *control)
>  	__wake(control, 1, 1);
>  }
>  
> -int pthread_once(pthread_once_t *control, void (*init)(void))
> +int __pthread_once(pthread_once_t *control, void (*init)(void))
>  {
>  	static int waiters;
>  
> @@ -34,3 +34,5 @@ int pthread_once(pthread_once_t *control, void (*init)(void))
>  		return 0;
>  	}
>  }
> +
> +weak_alias(__pthread_once, pthread_once);

OK.

> diff --git a/src/thread/pthread_setcancelstate.c b/src/thread/pthread_setcancelstate.c
> index 060bcdc..2268217 100644
> --- a/src/thread/pthread_setcancelstate.c
> +++ b/src/thread/pthread_setcancelstate.c
> @@ -1,6 +1,6 @@
>  #include "pthread_impl.h"
>  
> -int pthread_setcancelstate(int new, int *old)
> +int __pthread_setcancelstate(int new, int *old)
>  {
>  	if (new > 1U) return EINVAL;
>  	if (!libc.has_thread_pointer) return ENOSYS;
> @@ -9,3 +9,5 @@ int pthread_setcancelstate(int new, int *old)
>  	self->canceldisable = new;
>  	return 0;
>  }
> +
> +weak_alias(__pthread_setcancelstate, pthread_setcancelstate);

Seems ok. Alternatively maybe we could have a simplfied inline version
of this (no error checking) in pthread_impl.h. But for now let's leave
it; I might do that, or it might even become unnecessary, along with
adding the new proposed cancellation mode later.

> diff --git a/src/thread/pthread_setcanceltype.c b/src/thread/pthread_setcanceltype.c
> index bf0a3f3..d493c1b 100644
> --- a/src/thread/pthread_setcanceltype.c
> +++ b/src/thread/pthread_setcanceltype.c
> @@ -1,11 +1,13 @@
>  #include "pthread_impl.h"
>  
> +void __pthread_testcancel(void);
> +
>  int pthread_setcanceltype(int new, int *old)
>  {
>  	struct pthread *self = __pthread_self();
>  	if (new > 1U) return EINVAL;
>  	if (old) *old = self->cancelasync;
>  	self->cancelasync = new;
> -	if (new) pthread_testcancel();
> +	if (new) __pthread_testcancel();
>  	return 0;
>  }

This change looks gratuitous. pthread_setcanceltype is in the POSIX
namespace anyway and isn't being changed for use in C11 (and it has no
use to C11).

> diff --git a/src/thread/pthread_setspecific.c b/src/thread/pthread_setspecific.c
> index 55e46a8..dd18b96 100644
> --- a/src/thread/pthread_setspecific.c
> +++ b/src/thread/pthread_setspecific.c
> @@ -1,6 +1,6 @@
>  #include "pthread_impl.h"
>  
> -int pthread_setspecific(pthread_key_t k, const void *x)
> +int __pthread_setspecific(pthread_key_t k, const void *x)
>  {
>  	struct pthread *self = __pthread_self();
>  	/* Avoid unnecessary COW */
> @@ -10,3 +10,5 @@ int pthread_setspecific(pthread_key_t k, const void *x)
>  	}
>  	return 0;
>  }
> +
> +weak_alias(__pthread_setspecific, pthread_setspecific);

It looks like you have a duplicate tss_set for this rather than using
the above, so either the above is a grauitous change, or the new
tss_set is duplicate code that should be removed. Or did I miss
something?

> diff --git a/src/thread/pthread_testcancel.c b/src/thread/pthread_testcancel.c
> index ba5f7c6..ee48e6d 100644
> --- a/src/thread/pthread_testcancel.c
> +++ b/src/thread/pthread_testcancel.c
> @@ -7,7 +7,9 @@ static void dummy()
>  
>  weak_alias(dummy, __testcancel);
>  
> -void pthread_testcancel()
> +void __pthread_testcancel()
>  {
>  	__testcancel();
>  }
> +
> +weak_alias(__pthread_testcancel, pthread_testcancel);

I think this is okay.

> diff --git a/src/thread/thrd_create.c b/src/thread/thrd_create.c
> new file mode 100644
> index 0000000..f72f992
> --- /dev/null
> +++ b/src/thread/thrd_create.c
> @@ -0,0 +1,98 @@
> +#define _GNU_SOURCE
> +#include "pthread_impl.h"
> +#include "stdio_impl.h"
> +#include "libc.h"
> +#include <sys/mman.h>
> +#include <threads.h>
> +
> +void *__mmap(void *, size_t, int, int, int, off_t);
> +int __munmap(void *, size_t);
> +int __mprotect(void *, size_t, int);
> +void __thread_enable(void);
> +_Noreturn void __pthread_exit(void *);
> +void *__copy_tls(unsigned char *);
> +extern volatile size_t __pthread_tsd_size;
> +
> +_Noreturn void thrd_exit(int result) {
> +	__pthread_exit((void*)(intptr_t)result);
> +}
> +
> +static void dummy_0()
> +{
> +}
> +weak_alias(dummy_0, __acquire_ptc);
> +weak_alias(dummy_0, __release_ptc);
> +
> +static int start(void *p)
> +{
> +	thrd_t self = p;
> +	int (*start)(void*) = (int(*)(void*)) self->start;
> +	thrd_exit(start(self->start_arg));
> +	return 0;
> +}
> +
> +#define ROUND(x) (((x)+PAGE_SIZE-1)&-PAGE_SIZE)
> +
> +int thrd_create(thrd_t *res, thrd_start_t entry, void *arg)
> +{
> +	int ret = -ENOMEM;
> +	size_t guard = ROUND(DEFAULT_GUARD_SIZE);
> +	size_t size = guard + ROUND(DEFAULT_STACK_SIZE + libc.tls_size +  __pthread_tsd_size);
> +	struct pthread *self, *new;
> +	unsigned char *map, *stack, *tsd, *stack_limit;
> +	unsigned flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND
> +		| CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS
> +		| CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | CLONE_DETACHED;
> +
> +	if (!libc.can_do_threads) return thrd_error;
> +	self = __pthread_self();
> +	if (!libc.threaded) __thread_enable();
> +
> +	__acquire_ptc();
> +
> +	if (guard) {
> +		map = __mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
> +		if (map == MAP_FAILED) goto CLEANUP;
> +		if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
> +			__munmap(map, size);
> +			goto CLEANUP;
> +		}
> +	} else {
> +		map = __mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
> +		if (map == MAP_FAILED) goto CLEANUP;
> +	}
> +	tsd = map + size - __pthread_tsd_size;
> +	stack = tsd - libc.tls_size;
> +	stack_limit = map + guard;
> +
> +	new = __copy_tls(tsd - libc.tls_size);
> +	new->map_base = map;
> +	new->map_size = size;
> +	new->stack = stack;
> +	new->stack_size = stack - stack_limit;
> +	new->start = (void *(*)(void*))entry;
> +	new->start_arg = arg;
> +	new->self = new;
> +	new->tsd = (void *)tsd;
> +	new->locale = &libc.global_locale;
> +	new->unblock_cancel = self->cancel;
> +	new->canary = self->canary;
> +
> +	a_inc(&libc.threads_minus_1);
> +	ret = __clone(start, stack, flags, new, &new->tid, TP_ADJ(new), &new->tid);
> +
> + CLEANUP:
> +	__release_ptc();
> +
> +	if (ret > 0) {
> +          *res = new;
> +          ret = thrd_success;
> +        } else if (ret == -ENOMEM) {
> +          ret = thrd_nomem;
> +        } else {
> +          a_dec(&libc.threads_minus_1);
> +          if (map) __munmap(map, size);
> +          ret = thrd_error;
> +	}
> +        return ret;
> +}

Minor indention/formatting issues. But my main concern is the amount
of code duplication, especially delicate implementation details where
it would be easy for them to get out of sync. To me that seems like a
bigger concern than thrd_create pulling in a few hundred bytes more
than it should (for the extra unused functionality in pthread_create).

Didn't we discuss before just passing a special attr pointer to
__pthread_create to effect C11-style creation?

> diff --git a/src/thread/thrd_current.c b/src/thread/thrd_current.c
> new file mode 100644
> index 0000000..1728535
> --- /dev/null
> +++ b/src/thread/thrd_current.c
> @@ -0,0 +1,11 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +/* Not all arch have __pthread_self as a symbol. On some this results
> +   in some magic. So this "call" to __pthread_self is not necessary a
> +   tail call. In particular, other than the appearance, thrd_current
> +   can not be implemented as a weak symbol. */
> +pthread_t thrd_current()
> +{
> +	return __pthread_self();
> +}

OK, but I'm not sure the detailed comment is needed. The exact same
code appears in pthread_self.c.

> diff --git a/src/thread/thrd_detach.c b/src/thread/thrd_detach.c
> new file mode 100644
> index 0000000..211b8fb
> --- /dev/null
> +++ b/src/thread/thrd_detach.c
> @@ -0,0 +1,18 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int __munmap(void *, size_t);
> +
> +int thrd_detach(thrd_t t)
> +{
> +	/* Cannot detach a thread that's already exiting */
> +	if (a_swap(t->exitlock, 1)){
> +		int tmp;
> +		while ((tmp = t->tid)) __timedwait(&t->tid, tmp, 0, 0, 0, 0, 0);
> +		if (t->map_base) __munmap(t->map_base, t->map_size);
> +	} else {
> +		t->detached = 2;
> +		__unlock(t->exitlock);
> +	}
> +	return thrd_success;
> +}

You already moved pthread_detach to the protected namespace, so this
seems to be duplicate code. And it's a place that has implementation
details (magic numbers like 2, which you could argue shouldn't exist
;-) so I think it's best to avoid spreading these details out over
more places where they could go out of sync, no?

> diff --git a/src/thread/thrd_equal.c b/src/thread/thrd_equal.c
> new file mode 100644
> index 0000000..ac49a44
> --- /dev/null
> +++ b/src/thread/thrd_equal.c
> @@ -0,0 +1,6 @@
> +#include <threads.h>
> +
> +int (thrd_equal)(thrd_t a, thrd_t b)
> +{
> +	return a==b;
> +}

OK.

> diff --git a/src/thread/thrd_join.c b/src/thread/thrd_join.c
> new file mode 100644
> index 0000000..a8c7aed
> --- /dev/null
> +++ b/src/thread/thrd_join.c
> @@ -0,0 +1,16 @@
> +#include "pthread_impl.h"
> +#include <sys/mman.h>
> +#include <threads.h>
> +
> +int __munmap(void *, size_t);
> +
> +/* C11 threads cannot be canceled, so there is no need for a
> +   cancelation function pointer, here. */
> +int thrd_join(thrd_t t, int *res)
> +{
> +	int tmp;
> +	while ((tmp = t->tid)) __timedwait(&t->tid, tmp, 0, 0, 0, 0, 0);
> +	if (res) *res = (int)(intptr_t)t->result;
> +	if (t->map_base) __munmap(t->map_base, t->map_size);
> +	return thrd_success;
> +}

Likewise here there's dupliation.

> diff --git a/src/thread/tss_create.c b/src/thread/tss_create.c
> new file mode 100644
> index 0000000..0cbadc8
> --- /dev/null
> +++ b/src/thread/tss_create.c
> @@ -0,0 +1,13 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int __pthread_key_create(pthread_key_t *k, void (*dtor)(void *));
> +
> +int tss_create(tss_t *tss, tss_dtor_t dtor)
> +{
> +	__THRD_ABI_MARK;
> +	/* Different error returns are possible. C glues them together into
> +	   just failure notification. Can't be optimized to a tail call,
> +	   unless thrd_error equals EAGAIN. */
> +	return __pthread_key_create(tss, dtor) ? thrd_error : thrd_success;
> +}

OK.

> diff --git a/src/thread/tss_delete.c b/src/thread/tss_delete.c
> new file mode 100644
> index 0000000..d50731f
> --- /dev/null
> +++ b/src/thread/tss_delete.c
> @@ -0,0 +1,8 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int __pthread_key_delete(pthread_key_t k);
> +
> +void (tss_delete)(tss_t key) {
> +	(void)__pthread_key_delete(key);
> +}

This could probably be done with just an alias, but I'm fine with the
tail-call. It should be a very rarely-used function (ideally,
never-used) since it's rare for there to be any safe way to destroy a
key once you create it.

> diff --git a/src/thread/tss_set.c b/src/thread/tss_set.c
> new file mode 100644
> index 0000000..70c4fb7
> --- /dev/null
> +++ b/src/thread/tss_set.c
> @@ -0,0 +1,13 @@
> +#include "pthread_impl.h"
> +#include <threads.h>
> +
> +int tss_set(tss_t k, void *x)
> +{
> +	struct pthread *self = __pthread_self();
> +	/* Avoid unnecessary COW */
> +	if (self->tsd[k] != x) {
> +		self->tsd[k] = x;
> +		self->tsd_used = 1;
> +	}
> +	return thrd_success;
> +}

As mentioned before, this is duplicate, and it could probably just be
an alias.

> diff --git a/src/time/thrd_sleep.c b/src/time/thrd_sleep.c
> new file mode 100644
> index 0000000..6570b0c
> --- /dev/null
> +++ b/src/time/thrd_sleep.c
> @@ -0,0 +1,31 @@
> +#include <time.h>
> +#include <errno.h>
> +#include "syscall.h"
> +#include "libc.h"
> +#include "threads.h"
> +
> +/* Roughly __syscall already returns the right thing: 0 if all went
> +   well or a negative error indication, otherwise. */
> +int thrd_sleep(const struct timespec *req, struct timespec *rem)
> +{
> +  int ret = __syscall(SYS_nanosleep, req, rem);
> +  // compile time comparison, constant propagated
> +  if (EINTR != 1) {
> +    switch (ret) {
> +    case 0: return 0;
> +      /* error described by POSIX:                                    */
> +      /* EINTR  The nanosleep() function was interrupted by a signal. */
> +      /* The C11 wording is:                                          */
> +      /* -1 if it has been interrupted by a signal                    */
> +    case -EINTR:  return -1;
> +      /* error described by POSIX */
> +    case -EINVAL: return -2;
> +      /* described for linux */
> +    case -EFAULT: return -3;
> +      /* No other error returns are specified. */
> +    default: return INT_MIN;
> +    }
> +  }
> +  // potentially a tail call
> +  return ret;
> +}

__syscall is almost never a "tail call" because it's rarely a call at
all; usually, it's inline asm. :-) But okay. My preference would be
not to do fancy remapping of error codes (especially not when the
codes will differ by arch, since this just encourages programs to
assume they're fixed and then break on the arch where they're
different, if any).

Also indention is inconsistent with musl (tabs for indention, spaces
for alignment).

> diff --git a/src/time/timespec_get.c b/src/time/timespec_get.c
> new file mode 100644
> index 0000000..bf78e5a
> --- /dev/null
> +++ b/src/time/timespec_get.c
> @@ -0,0 +1,9 @@
> +#include <time.h>
> +
> +int __clock_gettime(clockid_t clk, struct timespec *ts);
> +
> +/* the base argument is simply ignored, there is no other implemented
> +   value than TIME_UTC. */
> +int timespec_get(struct timespec * ts, int base) {
> +  return __clock_gettime(CLOCK_REALTIME, ts) < 0 ? 0 : base;
> +}

As mentioned in another email (off-list, IIRC), I'd like to reject
unknown base values so that programs built against a newer musl that
has other clock bases, but which get an older musl at runtime, will
see an error rather than silently using the wrong clock. Does this
make sense?

Rich


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-27 23:46 ` Rich Felker
@ 2014-08-28  9:40   ` Jens Gustedt
  2014-08-28 11:41     ` Szabolcs Nagy
  2014-08-28 16:15     ` Rich Felker
  0 siblings, 2 replies; 26+ messages in thread
From: Jens Gustedt @ 2014-08-28  9:40 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 27730 bytes --]

Hi,
thanks for the fast review and reply, Rich.

I will not reply to all raised issues, otherwise this gets out of
bounds :) Consider the ones where I don't as agreed.

For a start, I didn't intend this to be integrated into musl as just
one patch, this was just meant for easier review my list of perhaps 30
patches that I have divided this currently. Once we agree on the
content, I would regroup and rebase these to reasonable semantical
units, and propose them as 4 or 5 patches, say.

Am Mittwoch, den 27.08.2014, 19:46 -0400 schrieb Rich Felker:
> On Thu, Aug 28, 2014 at 12:11:45AM +0200, Jens Gustedt wrote:
> > diff --git a/arch/arm/bits/alltypes.h.in b/arch/arm/bits/alltypes.h.in
> > index 183c4c4..333d43f 100644
> > --- a/arch/arm/bits/alltypes.h.in
> > +++ b/arch/arm/bits/alltypes.h.in
> > @@ -19,7 +19,7 @@ TYPEDEF long time_t;
> >  TYPEDEF long suseconds_t;
> >  
> >  TYPEDEF struct { union { int __i[9]; unsigned __s[9]; } __u; } pthread_attr_t;
> > -TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
> > -TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } pthread_cond_t;
> > +TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } __pthread_mutex_t;
> > +TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } __pthread_cond_t;
> > [...]
> > +TYPEDEF __pthread_cond_t pthread_cond_t;
> > +TYPEDEF __pthread_cond_t cnd_t;
> > +TYPEDEF __pthread_mutex_t pthread_mutex_t;
> > +TYPEDEF __pthread_mutex_t mtx_t;
> 
> This changes the C++ ABI by changing the tag for the POSIX types, so
> it can't be done this way. We could return to some serious language
> lawyering about whether it's valid to access members via the 'wrong'
> struct type when we have two identical struct types, or just assume
> that's safe for now and do it.

I don't see your point, this doesn't change the tag name, no? (There
is no tag name, here.) This is just typedeffing to the same type as
before so this should be "as allowed" as it was before.

> > +#ifdef __GNUC__
> > +#define _Nonnull(...) __attribute__((__nonnull__(__VA_ARGS__)))
> > +#else
> > +#define _Nonnull(...)
> > +#endif
> 
> If we want to use attributes like this, it's a separate change from
> adding threads. But I'm fairly much against doing it anyway since the
> compiler already knows how to make these warnings for standard
> functions, without help from the headers.

There will be a while, until compilers know these things for C11
thread functions, I think.

BTw, I was tempted to do it the "C" way by using things like
`[static 1]` instead of `*`, but then I thought you wouldn't like
that.

> > +#ifndef __THRD_EXPERIMENTAL
> > +# define __THRD_EXPERIMENTAL 1
> > +#endif
> > +
> > +  /* The following list of 9 integer constants makes up for the binary
> > +     compatibility of this C thread implementation. You must never
> > +     link code against versions of the C library that do not agree
> > +     upon these ABI parameters.
> > [...]
> 
> Getting this committed to musl, or at least making the first release
> with it present, should be committing to the ABI, so I don't think we
> need any ABI-check type stuff here.

right, I'll pull that out

> But I am curious how it works.
> __THRD_ABI_CHECK doesn't seem to be used anywhere. Is the intent just
> that you would put it in a program manually?

yes, that was the use case I had when starting all this. Should be
obsolete by now.

> > +#define thread_local _Thread_local
> 
> This needs to be omitted for C++11+, I think.

right

> > diff --git a/include/time.h b/include/time.h
> > index dc88070..13dccaa 100644
> > --- a/include/time.h
> > +++ b/include/time.h
> > @@ -129,6 +129,19 @@ int stime(const time_t *);
> >  time_t timegm(struct tm *);
> >  #endif
> >  
> > +#if __STDC_VERSION__ >= 201112L
> > +  /* Implementation specific choice: The epoch that the TIME_UTC clock
> > +     is based upon is the Unix Epoch, that is a struct timespec
> > +     represents the number of seconds that have elapsed since 00:00:00
> > +     Coordinated Universal Time (UTC), Thursday, 1 January
> > +     1970. Because of differences in leap seconds this is not
> > +     completely equivalent to UTC. */
> > +  /* Beware that the TIME_UTC constant itself per the standard must be
> > +     greater than 0. */
> > +#define TIME_UTC 1
> > +int timespec_get(struct timespec *, int);
> > +#endif
> > +
> >  #ifdef __cplusplus
> >  }
> >  #endif
> 
> So far, when it comes to C11 library functionality as opposed to
> compiler features that might not be available without compiler
> support, the approach we've taken in musl is to expose them
> unconditionally. This makes it possible to use the library features
> even on compilers without explicit C11 Support.

ok, I'll make that unconditional

> Arguments could be
> made either way on this but I think we should be consistent. It might
> also be more appropriate to factor out timespec_get as a separate
> patch since it's nominally separate from threads, although I don't
> care much either way. Presumably the main use of it is for arguments
> to timedwait/timedlock operations.

see my remark on regrouping into subsets of patches above

> > diff --git a/src/mman/mprotect.c b/src/mman/mprotect.c
> > index f488486..535787b 100644
> > --- a/src/mman/mprotect.c
> > +++ b/src/mman/mprotect.c
> > @@ -2,10 +2,12 @@
> >  #include "libc.h"
> >  #include "syscall.h"
> >  
> > -int mprotect(void *addr, size_t len, int prot)
> > +int __mprotect(void *addr, size_t len, int prot)
> >  {
> >  	size_t start, end;
> >  	start = (size_t)addr & -PAGE_SIZE;
> >  	end = (size_t)((char *)addr + len + PAGE_SIZE-1) & -PAGE_SIZE;
> >  	return syscall(SYS_mprotect, start, end-start, prot);
> >  }
> > +
> > +weak_alias(__mprotect, mprotect);
> 
> Here it might be nicer to just use __syscall(SYS_mprotect...) directly
> in pthread_create. We don't need the extra page alignment overhead
> there, only for the public function, and __syscall is lighter than a
> function call anyway.

I'd rather leave such intrusive changes to the pthread code to you :)

> > diff --git a/src/thread/cnd_broadcast.c b/src/thread/cnd_broadcast.c
> > new file mode 100644
> > index 0000000..b1db667
> > --- /dev/null
> > +++ b/src/thread/cnd_broadcast.c
> > @@ -0,0 +1,9 @@
> > +#include "pthread_impl.h"
> > +#include <threads.h>
> > +
> > +int __pthread_cond_broadcast(pthread_cond_t *);
> > +
> > +int cnd_broadcast(cnd_t * cnd) {
> > +	int ret = __pthread_cond_broadcast(cnd);
> > +	return ret ? thrd_error : thrd_success;
> > +}
> 
> This could/should actually be a separate function using
> __private_signal from pthread_cond_timedwait.c, I think.

right, good idea (and for all the similar or implied changes you
mention after)

> Also I'm fine with omitting the error conversion logic since the
> called function is implemented so as never to return an error (and in
> fact POSIX defines no errors, so it would have to be an additional
> implementation-defined error, which musl strongly avoids). If you want
> this to be more clear, a short comment should probably suffice without
> dead code to convert error numbers that never occur.

ok

> > diff --git a/src/thread/cnd_destroy.c b/src/thread/cnd_destroy.c
> > new file mode 100644
> > index 0000000..11cfc19
> > --- /dev/null
> > +++ b/src/thread/cnd_destroy.c
> > @@ -0,0 +1,17 @@
> > +#include "pthread_impl.h"
> > +#include <threads.h>
> > +
> > +int __pthread_cond_destroy(pthread_cond_t *);
> > +
> > +/* The behavior of cnd_destroy is undefined if cnd is still in
> > +   use. The choice for pthread_cond_destroy in that situation is to
> > +   wake up all users before destroying. I am not sure that we should
> > +   do it like that here, too. Alternatives would be:
> > +   - complain by using perror or equivalent
> > +   - assert that there is no waiter
> > +   - abort when there is a waiter
> > +   - do nothing
> > +   */
> > +void (cnd_destroy)(cnd_t *cnd) {
> > +	(void)__pthread_cond_destroy(cnd);
> > +}
> 
> This can just be a NOP now. The non-NOP pthread_cond_destroy is only
> for process-shared cond vars. Non-shared ones do not need any action
> for destruction.

right, this is definitively an advantage of the new cond code

> > diff --git a/src/thread/cnd_timedwait.c b/src/thread/cnd_timedwait.c
> > new file mode 100644
> > index 0000000..cce47a4
> > --- /dev/null
> > +++ b/src/thread/cnd_timedwait.c
> > @@ -0,0 +1,14 @@
> > +#include "pthread_impl.h"
> > +#include <threads.h>
> > +
> > +int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m, const struct timespec *restrict ts);
> > +
> > +int cnd_timedwait(cnd_t *restrict cond, mtx_t *restrict mutex, const struct timespec *restrict ts) {
> > +	int ret = __pthread_cond_timedwait(cond, mutex, ts);
> > +	switch (ret) {
> > +	/* May also return EINVAL or EPERM. */
> > +	default:        return thrd_error;
> > +	case 0:         return thrd_success;
> > +	case ETIMEDOUT: return thrd_timedout;
> > +	}
> > +}
>
> C11 isn't clear on what happens when the timespec is non-normalized.

yes, I noticed that, too, have that on my list of necessary
clarifications for C11, now

> Should we still just treat this as an error, or attempt to normalize
> it first?

I would leave it as is, I'd rather like to stay as close to the POSIX
behavior as possible

> > diff --git a/src/thread/mtx_init.c b/src/thread/mtx_init.c
> > new file mode 100644
> > index 0000000..827a56a
> > --- /dev/null
> > +++ b/src/thread/mtx_init.c
> > @@ -0,0 +1,10 @@
> > +#include "pthread_impl.h"
> > +#include <threads.h>
> > +
> > +int mtx_init(mtx_t * m, int type)
> > +{
> > +	*m = (pthread_mutex_t) {
> > +		._m_type = ((type&mtx_recursive) ? PTHREAD_MUTEX_RECURSIVE : 0),
> > +	};
> > +	return thrd_success;
> > +}
> 
> I'm okay with a 0 here since we assume all over that {0} init for a
> mutex is a default type mutex, but you might prefer something like
> PTHREAD_MUTEX_NORMAL here instead of 0.

yes

> > diff --git a/src/thread/mtx_lock.c b/src/thread/mtx_lock.c
> > new file mode 100644
> > index 0000000..89730f1
> > --- /dev/null
> > +++ b/src/thread/mtx_lock.c
> > @@ -0,0 +1,14 @@
> > +#include "pthread_impl.h"
> > +#include <threads.h>
> > +
> > +int mtx_lock(mtx_t *m)
> > +{
> > +	__THRD_ABI_MARK;
> > +	if (m->_m_type == PTHREAD_MUTEX_NORMAL && !a_cas(&m->_m_lock, 0, EBUSY))
> > +		return thrd_success;
> > +	/* Calling mtx_timedlock with a null pointer is an
> > +	   extension. Such a call is convenient, here, since it avoids
> > +	   to repeat the case analysis that is already done for
> > +	   mtx_timedlock. */
> > +	return mtx_timedlock(m, 0);
> > +}
> > diff --git a/src/thread/mtx_timedlock.c b/src/thread/mtx_timedlock.c
> > new file mode 100644
> > index 0000000..c42a096
> > --- /dev/null
> > +++ b/src/thread/mtx_timedlock.c
> > @@ -0,0 +1,19 @@
> > +#include "pthread_impl.h"
> > +#include <threads.h>
> > +
> > +int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict ts);
> > +
> > +int mtx_timedlock(mtx_t *restrict mutex, const struct timespec *restrict ts) {
> 
> It might be worth duplicating the a_cas attempt here for normal-type;
> I'm not sure.

that would be premature optimization :)

> > diff --git a/src/thread/mtx_unlock.c b/src/thread/mtx_unlock.c
> > new file mode 100644
> > index 0000000..d1721a6
> > --- /dev/null
> > +++ b/src/thread/mtx_unlock.c
> > @@ -0,0 +1,11 @@
> > +#include "pthread_impl.h"
> > +#include <threads.h>
> > +
> > +int __pthread_mutex_unlock(pthread_mutex_t *);
> > +
> > +int (mtx_unlock)(mtx_t *mtx) {
> > +	/* In the best of all worlds this is a tail call. */
> > +	int ret = __pthread_mutex_unlock(mtx);
> > +	/* In case of UB may also return EPERM. */
> > +	return ret ? thrd_error : thrd_success;
> > +}
> 
> For unlock, C11 makes it UB to unlock a mutex you don't own (even for
> recursive type), so the tail call would be legal. I don't have a
> strong opinion either way here.

I'll remove the comment, this is a left over. Tail call is still not
possible, since moraly thrd_success is not 0 :)

But we could just return thrd_success unconditionnally.

> > diff --git a/src/thread/pthread_cond_wait.c b/src/thread/pthread_cond_wait.c
> > index 8735bf1..58656f7 100644
> > --- a/src/thread/pthread_cond_wait.c
> > +++ b/src/thread/pthread_cond_wait.c
> > @@ -1,6 +1,8 @@
> >  #include "pthread_impl.h"
> >  
> > +int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m, const struct timespec *restrict ts);
> > +
> >  int pthread_cond_wait(pthread_cond_t *restrict c, pthread_mutex_t *restrict m)
> >  {
> > -	return pthread_cond_timedwait(c, m, 0);
> > +	return __pthread_cond_timedwait(c, m, 0);
> >  }
> 
> This is unnecessary.

right

> > diff --git a/src/thread/pthread_equal.c b/src/thread/pthread_equal.c
> > index 3e3df4f..38fb45e 100644
> > --- a/src/thread/pthread_equal.c
> > +++ b/src/thread/pthread_equal.c
> > @@ -1,4 +1,4 @@
> > -#include <pthread.h>
> > +#include "pthread_impl.h"
> >  
> >  int (pthread_equal)(pthread_t a, pthread_t b)
> >  {
> 
> This does not seem like a useful change and just increases compile
> time.

a leftover, sorry

> > diff --git a/src/thread/pthread_setcanceltype.c b/src/thread/pthread_setcanceltype.c
> > index bf0a3f3..d493c1b 100644
> > --- a/src/thread/pthread_setcanceltype.c
> > +++ b/src/thread/pthread_setcanceltype.c
> > @@ -1,11 +1,13 @@
> >  #include "pthread_impl.h"
> >  
> > +void __pthread_testcancel(void);
> > +
> >  int pthread_setcanceltype(int new, int *old)
> >  {
> >  	struct pthread *self = __pthread_self();
> >  	if (new > 1U) return EINVAL;
> >  	if (old) *old = self->cancelasync;
> >  	self->cancelasync = new;
> > -	if (new) pthread_testcancel();
> > +	if (new) __pthread_testcancel();
> >  	return 0;
> >  }
> 
> This change looks gratuitous. pthread_setcanceltype is in the POSIX
> namespace anyway and isn't being changed for use in C11 (and it has no
> use to C11).

right

> > diff --git a/src/thread/pthread_setspecific.c b/src/thread/pthread_setspecific.c
> > index 55e46a8..dd18b96 100644
> > --- a/src/thread/pthread_setspecific.c
> > +++ b/src/thread/pthread_setspecific.c
> > @@ -1,6 +1,6 @@
> >  #include "pthread_impl.h"
> >  
> > -int pthread_setspecific(pthread_key_t k, const void *x)
> > +int __pthread_setspecific(pthread_key_t k, const void *x)
> >  {
> >  	struct pthread *self = __pthread_self();
> >  	/* Avoid unnecessary COW */
> > @@ -10,3 +10,5 @@ int pthread_setspecific(pthread_key_t k, const void *x)
> >  	}
> >  	return 0;
> >  }
> > +
> > +weak_alias(__pthread_setspecific, pthread_setspecific);
> 
> It looks like you have a duplicate tss_set for this rather than using
> the above, so either the above is a grauitous change, or the new
> tss_set is duplicate code that should be removed. Or did I miss
> something?

you are right, this should be omitted

(the tss_get code differs in the const qualification of the second argument)

> > diff --git a/src/thread/thrd_create.c b/src/thread/thrd_create.c
> > new file mode 100644
> > index 0000000..f72f992
> > --- /dev/null
> > +++ b/src/thread/thrd_create.c
> > @@ -0,0 +1,98 @@
> > +#define _GNU_SOURCE
> > +#include "pthread_impl.h"
> > +#include "stdio_impl.h"
> > +#include "libc.h"
> > +#include <sys/mman.h>
> > +#include <threads.h>
> > +
> > +void *__mmap(void *, size_t, int, int, int, off_t);
> > +int __munmap(void *, size_t);
> > +int __mprotect(void *, size_t, int);
> > +void __thread_enable(void);
> > +_Noreturn void __pthread_exit(void *);
> > +void *__copy_tls(unsigned char *);
> > +extern volatile size_t __pthread_tsd_size;
> > +
> > +_Noreturn void thrd_exit(int result) {
> > +	__pthread_exit((void*)(intptr_t)result);
> > +}
> > +
> > +static void dummy_0()
> > +{
> > +}
> > +weak_alias(dummy_0, __acquire_ptc);
> > +weak_alias(dummy_0, __release_ptc);
> > +
> > +static int start(void *p)
> > +{
> > +	thrd_t self = p;
> > +	int (*start)(void*) = (int(*)(void*)) self->start;
> > +	thrd_exit(start(self->start_arg));
> > +	return 0;
> > +}
> > +
> > +#define ROUND(x) (((x)+PAGE_SIZE-1)&-PAGE_SIZE)
> > +
> > +int thrd_create(thrd_t *res, thrd_start_t entry, void *arg)
> > +{
> > +	int ret = -ENOMEM;
> > +	size_t guard = ROUND(DEFAULT_GUARD_SIZE);
> > +	size_t size = guard + ROUND(DEFAULT_STACK_SIZE + libc.tls_size +  __pthread_tsd_size);
> > +	struct pthread *self, *new;
> > +	unsigned char *map, *stack, *tsd, *stack_limit;
> > +	unsigned flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND
> > +		| CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS
> > +		| CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | CLONE_DETACHED;
> > +
> > +	if (!libc.can_do_threads) return thrd_error;
> > +	self = __pthread_self();
> > +	if (!libc.threaded) __thread_enable();
> > +
> > +	__acquire_ptc();
> > +
> > +	if (guard) {
> > +		map = __mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
> > +		if (map == MAP_FAILED) goto CLEANUP;
> > +		if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
> > +			__munmap(map, size);
> > +			goto CLEANUP;
> > +		}
> > +	} else {
> > +		map = __mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
> > +		if (map == MAP_FAILED) goto CLEANUP;
> > +	}
> > +	tsd = map + size - __pthread_tsd_size;
> > +	stack = tsd - libc.tls_size;
> > +	stack_limit = map + guard;
> > +
> > +	new = __copy_tls(tsd - libc.tls_size);
> > +	new->map_base = map;
> > +	new->map_size = size;
> > +	new->stack = stack;
> > +	new->stack_size = stack - stack_limit;
> > +	new->start = (void *(*)(void*))entry;
> > +	new->start_arg = arg;
> > +	new->self = new;
> > +	new->tsd = (void *)tsd;
> > +	new->locale = &libc.global_locale;
> > +	new->unblock_cancel = self->cancel;
> > +	new->canary = self->canary;
> > +
> > +	a_inc(&libc.threads_minus_1);
> > +	ret = __clone(start, stack, flags, new, &new->tid, TP_ADJ(new), &new->tid);
> > +
> > + CLEANUP:
> > +	__release_ptc();
> > +
> > +	if (ret > 0) {
> > +          *res = new;
> > +          ret = thrd_success;
> > +        } else if (ret == -ENOMEM) {
> > +          ret = thrd_nomem;
> > +        } else {
> > +          a_dec(&libc.threads_minus_1);
> > +          if (map) __munmap(map, size);
> > +          ret = thrd_error;
> > +	}
> > +        return ret;
> > +}
> 
> Minor indention/formatting issues. But my main concern is the amount
> of code duplication, especially delicate implementation details where
> it would be easy for them to get out of sync. To me that seems like a
> bigger concern than thrd_create pulling in a few hundred bytes more
> than it should (for the extra unused functionality in pthread_create).
> 
> Didn't we discuss before just passing a special attr pointer to
> __pthread_create to effect C11-style creation?

I did that, and that version could still be revived. But I was not too
happy with it, because it meant intruding into the existing pthread
code. For this version, the pthread code is just reorganized in
different TU, but otherwise there should be no change.

Also I'd really like to advertise C threads as a lightweighted,
stripped down thread implementation, so the short cuts (that are
merely a fall off of the code separation, here) were politically
welcome.

> > diff --git a/src/thread/thrd_current.c b/src/thread/thrd_current.c
> > new file mode 100644
> > index 0000000..1728535
> > --- /dev/null
> > +++ b/src/thread/thrd_current.c
> > @@ -0,0 +1,11 @@
> > +#include "pthread_impl.h"
> > +#include <threads.h>
> > +
> > +/* Not all arch have __pthread_self as a symbol. On some this results
> > +   in some magic. So this "call" to __pthread_self is not necessary a
> > +   tail call. In particular, other than the appearance, thrd_current
> > +   can not be implemented as a weak symbol. */
> > +pthread_t thrd_current()
> > +{
> > +	return __pthread_self();
> > +}
> 
> OK, but I'm not sure the detailed comment is needed. The exact same
> code appears in pthread_self.c.

I certainly have the tendency to comment a bit more than you :)

All this could potentially divert from pthread one day, so I'd like to
have such information in place for future coders or maintainers
(myself, probably)

> > diff --git a/src/thread/thrd_detach.c b/src/thread/thrd_detach.c
> > new file mode 100644
> > index 0000000..211b8fb
> > --- /dev/null
> > +++ b/src/thread/thrd_detach.c
> > @@ -0,0 +1,18 @@
> > +#include "pthread_impl.h"
> > +#include <threads.h>
> > +
> > +int __munmap(void *, size_t);
> > +
> > +int thrd_detach(thrd_t t)
> > +{
> > +	/* Cannot detach a thread that's already exiting */
> > +	if (a_swap(t->exitlock, 1)){
> > +		int tmp;
> > +		while ((tmp = t->tid)) __timedwait(&t->tid, tmp, 0, 0, 0, 0, 0);
> > +		if (t->map_base) __munmap(t->map_base, t->map_size);
> > +	} else {
> > +		t->detached = 2;
> > +		__unlock(t->exitlock);
> > +	}
> > +	return thrd_success;
> > +}
> 
> You already moved pthread_detach to the protected namespace, so this
> seems to be duplicate code.

It is.

> And it's a place that has implementation
> details (magic numbers like 2, which you could argue shouldn't exist
> ;-)

I would !)

> so I think it's best to avoid spreading these details out over
> more places where they could go out of sync, no?

yes, probably, the fact that pthread_detach called pthread_join
somehow disturbed me. I'll reconsider

> > diff --git a/src/thread/tss_delete.c b/src/thread/tss_delete.c
> > new file mode 100644
> > index 0000000..d50731f
> > --- /dev/null
> > +++ b/src/thread/tss_delete.c
> > @@ -0,0 +1,8 @@
> > +#include "pthread_impl.h"
> > +#include <threads.h>
> > +
> > +int __pthread_key_delete(pthread_key_t k);
> > +
> > +void (tss_delete)(tss_t key) {
> > +	(void)__pthread_key_delete(key);
> > +}
> 
> This could probably be done with just an alias, but I'm fine with the
> tail-call.

I thought of this, but I was reluctant to make an alias to a function
with a different calling convention. Couldn't that pollute the return
register in an unpredictable way?

> It should be a very rarely-used function (ideally,
> never-used) since it's rare for there to be any safe way to destroy a
> key once you create it.

Yes, exactly. That's why I also didn't try to make this a #define or
static inline in the haeder file, it's not worth the effort.

> > diff --git a/src/thread/tss_set.c b/src/thread/tss_set.c
> > new file mode 100644
> > index 0000000..70c4fb7
> > --- /dev/null
> > +++ b/src/thread/tss_set.c
> > @@ -0,0 +1,13 @@
> > +#include "pthread_impl.h"
> > +#include <threads.h>
> > +
> > +int tss_set(tss_t k, void *x)
> > +{
> > +	struct pthread *self = __pthread_self();
> > +	/* Avoid unnecessary COW */
> > +	if (self->tsd[k] != x) {
> > +		self->tsd[k] = x;
> > +		self->tsd_used = 1;
> > +	}
> > +	return thrd_success;
> > +}
> 
> As mentioned before, this is duplicate, and it could probably just be
> an alias.

no, I don't think so, it has a different interface, there is no const

> > diff --git a/src/time/thrd_sleep.c b/src/time/thrd_sleep.c
> > new file mode 100644
> > index 0000000..6570b0c
> > --- /dev/null
> > +++ b/src/time/thrd_sleep.c
> > @@ -0,0 +1,31 @@
> > +#include <time.h>
> > +#include <errno.h>
> > +#include "syscall.h"
> > +#include "libc.h"
> > +#include "threads.h"
> > +
> > +/* Roughly __syscall already returns the right thing: 0 if all went
> > +   well or a negative error indication, otherwise. */
> > +int thrd_sleep(const struct timespec *req, struct timespec *rem)
> > +{
> > +  int ret = __syscall(SYS_nanosleep, req, rem);
> > +  // compile time comparison, constant propagated
> > +  if (EINTR != 1) {
> > +    switch (ret) {
> > +    case 0: return 0;
> > +      /* error described by POSIX:                                    */
> > +      /* EINTR  The nanosleep() function was interrupted by a signal. */
> > +      /* The C11 wording is:                                          */
> > +      /* -1 if it has been interrupted by a signal                    */
> > +    case -EINTR:  return -1;
> > +      /* error described by POSIX */
> > +    case -EINVAL: return -2;
> > +      /* described for linux */
> > +    case -EFAULT: return -3;
> > +      /* No other error returns are specified. */
> > +    default: return INT_MIN;
> > +    }
> > +  }
> > +  // potentially a tail call
> > +  return ret;
> > +}
> 
> __syscall is almost never a "tail call" because it's rarely a call at
> all; usually, it's inline asm. :-)

I'll remove that comment and the check for EINTR being 1.

> But okay. My preference would be
> not to do fancy remapping of error codes (especially not when the
> codes will differ by arch, since this just encourages programs to
> assume they're fixed and then break on the arch where they're
> different, if any).

We have to be at least partially fancy, because C enforces -1 for the
interrupt case without really being precise what that should mean. I
just naively assumed that they mean the same as EINTR :)

For EINVAL, as discussed above, I'd expect that the C specification
will be modified to also have the validity check for the req
argument. POSIX has a "shall" here, so I'd like to keep the case well
identified.

For the EFAULT case, I don't have a strong opinion, could be either
way.

> Also indention is inconsistent with musl (tabs for indention, spaces
> for alignment).

ah, still, sorry

> > diff --git a/src/time/timespec_get.c b/src/time/timespec_get.c
> > new file mode 100644
> > index 0000000..bf78e5a
> > --- /dev/null
> > +++ b/src/time/timespec_get.c
> > @@ -0,0 +1,9 @@
> > +#include <time.h>
> > +
> > +int __clock_gettime(clockid_t clk, struct timespec *ts);
> > +
> > +/* the base argument is simply ignored, there is no other implemented
> > +   value than TIME_UTC. */
> > +int timespec_get(struct timespec * ts, int base) {
> > +  return __clock_gettime(CLOCK_REALTIME, ts) < 0 ? 0 : base;
> > +}
> 
> As mentioned in another email (off-list, IIRC), I'd like to reject
> unknown base values so that programs built against a newer musl that
> has other clock bases, but which get an older musl at runtime, will
> see an error rather than silently using the wrong clock. Does this
> make sense?

I makes perfect sense, but I don't recall you having that said as
clearly.

BTW, the choice of CLOCK_REALTIME, here, could be subject of
debade. IIRC this clock doesn't exactly give the seconds since Unix
Epoch, but that value minus some leapseconds. So it doesn't seem to
fulfill the C specification.

One could think of using CLOCK_MONOTONIC_RAW, instead, or
CLOCK_BOOTTIME if it exists. But I am not sure if the bootime of the
machine can be considered as an "epoch" in the sense of the C standard
or even in plain English.

Jens

-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::





[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-28  9:40   ` Jens Gustedt
@ 2014-08-28 11:41     ` Szabolcs Nagy
  2014-08-28 16:15     ` Rich Felker
  1 sibling, 0 replies; 26+ messages in thread
From: Szabolcs Nagy @ 2014-08-28 11:41 UTC (permalink / raw)
  To: musl

* Jens Gustedt <jens.gustedt@inria.fr> [2014-08-28 11:40:50 +0200]:
> Am Mittwoch, den 27.08.2014, 19:46 -0400 schrieb Rich Felker:
> > On Thu, Aug 28, 2014 at 12:11:45AM +0200, Jens Gustedt wrote:
> > > +#define _Nonnull(...) __attribute__((__nonnull__(__VA_ARGS__)))
> > > +#else
> > > +#define _Nonnull(...)
> > > +#endif
> > 
> > If we want to use attributes like this, it's a separate change from
> > adding threads. But I'm fairly much against doing it anyway since the
> > compiler already knows how to make these warnings for standard
> > functions, without help from the headers.
> 
> There will be a while, until compilers know these things for C11
> thread functions, I think.
> 
> BTw, I was tempted to do it the "C" way by using things like
> `[static 1]` instead of `*`, but then I thought you wouldn't like
> that.
> 

hm i think [static 1] would be better than a gcc specific attribute

but gcc <=4.9 and clang <=3.1 don't seem to warn about 0 argument then
(we could still use it though)

probably should be reported to gcc devs

> > > +int thrd_sleep(const struct timespec *req, struct timespec *rem)
> > > +{
> > > +  int ret = __syscall(SYS_nanosleep, req, rem);
> > > +  // compile time comparison, constant propagated
> > > +  if (EINTR != 1) {
> > > +    switch (ret) {
> > > +    case 0: return 0;
> > > +      /* error described by POSIX:                                    */

pls use tabs for those of us who are used to 8 width indent


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-28  9:40   ` Jens Gustedt
  2014-08-28 11:41     ` Szabolcs Nagy
@ 2014-08-28 16:15     ` Rich Felker
  2014-08-28 19:28       ` Jens Gustedt
  1 sibling, 1 reply; 26+ messages in thread
From: Rich Felker @ 2014-08-28 16:15 UTC (permalink / raw)
  To: musl

On Thu, Aug 28, 2014 at 11:40:50AM +0200, Jens Gustedt wrote:
> Hi,
> thanks for the fast review and reply, Rich.
> 
> I will not reply to all raised issues, otherwise this gets out of
> bounds :) Consider the ones where I don't as agreed.
> 
> For a start, I didn't intend this to be integrated into musl as just
> one patch, this was just meant for easier review my list of perhaps 30
> patches that I have divided this currently. Once we agree on the
> content, I would regroup and rebase these to reasonable semantical
> units, and propose them as 4 or 5 patches, say.

OK, that makes sense.

> Am Mittwoch, den 27.08.2014, 19:46 -0400 schrieb Rich Felker:
> > On Thu, Aug 28, 2014 at 12:11:45AM +0200, Jens Gustedt wrote:
> > > diff --git a/arch/arm/bits/alltypes.h.in b/arch/arm/bits/alltypes.h.in
> > > index 183c4c4..333d43f 100644
> > > --- a/arch/arm/bits/alltypes.h.in
> > > +++ b/arch/arm/bits/alltypes.h.in
> > > @@ -19,7 +19,7 @@ TYPEDEF long time_t;
> > >  TYPEDEF long suseconds_t;
> > >  
> > >  TYPEDEF struct { union { int __i[9]; unsigned __s[9]; } __u; } pthread_attr_t;
> > > -TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
> > > -TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } pthread_cond_t;
> > > +TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } __pthread_mutex_t;
> > > +TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } __pthread_cond_t;
> > > [...]
> > > +TYPEDEF __pthread_cond_t pthread_cond_t;
> > > +TYPEDEF __pthread_cond_t cnd_t;
> > > +TYPEDEF __pthread_mutex_t pthread_mutex_t;
> > > +TYPEDEF __pthread_mutex_t mtx_t;
> > 
> > This changes the C++ ABI by changing the tag for the POSIX types, so
> > it can't be done this way. We could return to some serious language
> > lawyering about whether it's valid to access members via the 'wrong'
> > struct type when we have two identical struct types, or just assume
> > that's safe for now and do it.
> 
> I don't see your point, this doesn't change the tag name, no? (There
> is no tag name, here.) This is just typedeffing to the same type as
> before so this should be "as allowed" as it was before.

That's not how it works. In C++, there is no separate namespace for
struct tags versus types. If you define a struct without a tag but
using typedef, the typedef name becomes its effective struct tag,
which is what gets used for name mangling. If you later use typedef
again to make an alias for this type, it does not make a new struct
type with a new tag, just an alias for the existing one. Thus, your
patch as written changes the struct tags for C++ ABI from
"pthread_mutex_t" and "pthread_cond_t" to "__pthread_mutex_t" and
"__pthread_cond_t".

Yes this is ugly. Blame the C++ folks. (Not sure whether it's the
standard's fault or the ABI's fault.)

> > > +#ifdef __GNUC__
> > > +#define _Nonnull(...) __attribute__((__nonnull__(__VA_ARGS__)))
> > > +#else
> > > +#define _Nonnull(...)
> > > +#endif
> > 
> > If we want to use attributes like this, it's a separate change from
> > adding threads. But I'm fairly much against doing it anyway since the
> > compiler already knows how to make these warnings for standard
> > functions, without help from the headers.
> 
> There will be a while, until compilers know these things for C11
> thread functions, I think.
> 
> BTw, I was tempted to do it the "C" way by using things like
> `[static 1]` instead of `*`, but then I thought you wouldn't like
> that.

Actually I like it much better, but I'm not sufficiently familiar with
it to know whether it's a conforming declaration. Is it formally
compatible with the standard prototype? If so, how does it allow the
compiler to enforce anything?

> > > diff --git a/src/mman/mprotect.c b/src/mman/mprotect.c
> > > index f488486..535787b 100644
> > > --- a/src/mman/mprotect.c
> > > +++ b/src/mman/mprotect.c
> > > @@ -2,10 +2,12 @@
> > >  #include "libc.h"
> > >  #include "syscall.h"
> > >  
> > > -int mprotect(void *addr, size_t len, int prot)
> > > +int __mprotect(void *addr, size_t len, int prot)
> > >  {
> > >  	size_t start, end;
> > >  	start = (size_t)addr & -PAGE_SIZE;
> > >  	end = (size_t)((char *)addr + len + PAGE_SIZE-1) & -PAGE_SIZE;
> > >  	return syscall(SYS_mprotect, start, end-start, prot);
> > >  }
> > > +
> > > +weak_alias(__mprotect, mprotect);
> > 
> > Here it might be nicer to just use __syscall(SYS_mprotect...) directly
> > in pthread_create. We don't need the extra page alignment overhead
> > there, only for the public function, and __syscall is lighter than a
> > function call anyway.
> 
> I'd rather leave such intrusive changes to the pthread code to you :)

OK, I can go ahead and make it as a "shrink code and reduce namespace
dependence in preparation for C11 threads" change.

> > > diff --git a/src/thread/mtx_timedlock.c b/src/thread/mtx_timedlock.c
> > > new file mode 100644
> > > index 0000000..c42a096
> > > --- /dev/null
> > > +++ b/src/thread/mtx_timedlock.c
> > > @@ -0,0 +1,19 @@
> > > +#include "pthread_impl.h"
> > > +#include <threads.h>
> > > +
> > > +int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict ts);
> > > +
> > > +int mtx_timedlock(mtx_t *restrict mutex, const struct timespec *restrict ts) {
> > 
> > It might be worth duplicating the a_cas attempt here for normal-type;
> > I'm not sure.
> 
> that would be premature optimization :)

Perhaps. The same optimization helps in some other places, but it's of
course non-essential, so let's revisit it later sometime after commit.

> > > diff --git a/src/thread/mtx_unlock.c b/src/thread/mtx_unlock.c
> > > new file mode 100644
> > > index 0000000..d1721a6
> > > --- /dev/null
> > > +++ b/src/thread/mtx_unlock.c
> > > @@ -0,0 +1,11 @@
> > > +#include "pthread_impl.h"
> > > +#include <threads.h>
> > > +
> > > +int __pthread_mutex_unlock(pthread_mutex_t *);
> > > +
> > > +int (mtx_unlock)(mtx_t *mtx) {
> > > +	/* In the best of all worlds this is a tail call. */
> > > +	int ret = __pthread_mutex_unlock(mtx);
> > > +	/* In case of UB may also return EPERM. */
> > > +	return ret ? thrd_error : thrd_success;
> > > +}
> > 
> > For unlock, C11 makes it UB to unlock a mutex you don't own (even for
> > recursive type), so the tail call would be legal. I don't have a
> > strong opinion either way here.
> 
> I'll remove the comment, this is a left over. Tail call is still not
> possible, since moraly thrd_success is not 0 :)
> 
> But we could just return thrd_success unconditionnally.

Or: thrd_success ? thrd_success : ret;

But really, I'm fine with internal assumptions that success is 0.
That's part of why we would want to define it that way.

> > > diff --git a/src/thread/pthread_setspecific.c b/src/thread/pthread_setspecific.c
> > > index 55e46a8..dd18b96 100644
> > > --- a/src/thread/pthread_setspecific.c
> > > +++ b/src/thread/pthread_setspecific.c
> > > @@ -1,6 +1,6 @@
> > >  #include "pthread_impl.h"
> > >  
> > > -int pthread_setspecific(pthread_key_t k, const void *x)
> > > +int __pthread_setspecific(pthread_key_t k, const void *x)
> > >  {
> > >  	struct pthread *self = __pthread_self();
> > >  	/* Avoid unnecessary COW */
> > > @@ -10,3 +10,5 @@ int pthread_setspecific(pthread_key_t k, const void *x)
> > >  	}
> > >  	return 0;
> > >  }
> > > +
> > > +weak_alias(__pthread_setspecific, pthread_setspecific);
> > 
> > It looks like you have a duplicate tss_set for this rather than using
> > the above, so either the above is a grauitous change, or the new
> > tss_set is duplicate code that should be removed. Or did I miss
> > something?
> 
> you are right, this should be omitted
> 
> (the tss_get code differs in the const qualification of the second argument)

In this case it's still possible to implement tss_set as a wrapper,
just not an alias, but the code is sufficiently small that maybe it's
better just to duplicate it.

> > Didn't we discuss before just passing a special attr pointer to
> > __pthread_create to effect C11-style creation?
> 
> I did that, and that version could still be revived. But I was not too
> happy with it, because it meant intruding into the existing pthread
> code. For this version, the pthread code is just reorganized in
> different TU, but otherwise there should be no change.

From a maintainability standpoint, having two subtly different
versions of the same complex code is a big cost. I wouldn't be
fundamentally opposed to a factoring that avoids this duplication
while allowing the POSIX-specific parts to be optimized out for pure
C11 use, but I think that would be a lot more invasive (mainly in
terms of potential for errors, possibly also for performance, but
unlikely since basically all the time is spent in mmap, mprotect, and
clone anyway) and would be something to look into at a later time, not
while trying to get the initial work on C11 threads in.

> Also I'd really like to advertise C threads as a lightweighted,
> stripped down thread implementation, so the short cuts (that are
> merely a fall off of the code separation, here) were politically
> welcome.

I don't think saving 200 bytes in a pure-C11 static-linked program
while adding 300 bytes to libc.so in code duplication (these numbers
are just guesses but sound about right) really makes it come across as
more lightweight.

> > > diff --git a/src/thread/thrd_current.c b/src/thread/thrd_current.c
> > > new file mode 100644
> > > index 0000000..1728535
> > > --- /dev/null
> > > +++ b/src/thread/thrd_current.c
> > > @@ -0,0 +1,11 @@
> > > +#include "pthread_impl.h"
> > > +#include <threads.h>
> > > +
> > > +/* Not all arch have __pthread_self as a symbol. On some this results
> > > +   in some magic. So this "call" to __pthread_self is not necessary a
> > > +   tail call. In particular, other than the appearance, thrd_current
> > > +   can not be implemented as a weak symbol. */
> > > +pthread_t thrd_current()
> > > +{
> > > +	return __pthread_self();
> > > +}
> > 
> > OK, but I'm not sure the detailed comment is needed. The exact same
> > code appears in pthread_self.c.
> 
> I certainly have the tendency to comment a bit more than you :)

Yes, I have a bias towards omitting comments unless there's some
serious nontrivial logic (and even then sometimes I omit them). When a
comment would be more a matter of "justification of changes" or "why
it's being done this way rather than some other way", I usually use
the git log rather than source-level comments since I feel like the
text is more useful to someone studying the history of the source than
someone trying to understand what it's doing now.

> > > diff --git a/src/thread/thrd_detach.c b/src/thread/thrd_detach.c
> > > new file mode 100644
> > > index 0000000..211b8fb
> > > --- /dev/null
> > > +++ b/src/thread/thrd_detach.c
> > > @@ -0,0 +1,18 @@
> > > +#include "pthread_impl.h"
> > > +#include <threads.h>
> > > +
> > > +int __munmap(void *, size_t);
> > > +
> > > +int thrd_detach(thrd_t t)
> > > +{
> > > +	/* Cannot detach a thread that's already exiting */
> > > +	if (a_swap(t->exitlock, 1)){
> > > +		int tmp;
> > > +		while ((tmp = t->tid)) __timedwait(&t->tid, tmp, 0, 0, 0, 0, 0);
> > > +		if (t->map_base) __munmap(t->map_base, t->map_size);
> > > +	} else {
> > > +		t->detached = 2;
> > > +		__unlock(t->exitlock);
> > > +	}
> > > +	return thrd_success;
> > > +}
> > 
> > You already moved pthread_detach to the protected namespace, so this
> > seems to be duplicate code.
> 
> It is.
> 
> > And it's a place that has implementation
> > details (magic numbers like 2, which you could argue shouldn't exist
> > ;-)
> 
> I would !)
> 
> > so I think it's best to avoid spreading these details out over
> > more places where they could go out of sync, no?
> 
> yes, probably, the fact that pthread_detach called pthread_join
> somehow disturbed me. I'll reconsider

It's not so odd if you think about it -- when the thread to be
detached has already exited, you can't (implementation-wise) detach it
and leave it responsible for cleaning up itself, because it doesn't
exist anymore. Since you know it's exited though, joining and throwing
away the result is possible.

> > > diff --git a/src/thread/tss_delete.c b/src/thread/tss_delete.c
> > > new file mode 100644
> > > index 0000000..d50731f
> > > --- /dev/null
> > > +++ b/src/thread/tss_delete.c
> > > @@ -0,0 +1,8 @@
> > > +#include "pthread_impl.h"
> > > +#include <threads.h>
> > > +
> > > +int __pthread_key_delete(pthread_key_t k);
> > > +
> > > +void (tss_delete)(tss_t key) {
> > > +	(void)__pthread_key_delete(key);
> > > +}
> > 
> > This could probably be done with just an alias, but I'm fine with the
> > tail-call.
> 
> I thought of this, but I was reluctant to make an alias to a function
> with a different calling convention. Couldn't that pollute the return
> register in an unpredictable way?

I missed that -- sorry. Indeed, the way you've done it is fine.

> > > diff --git a/src/thread/tss_set.c b/src/thread/tss_set.c
> > > new file mode 100644
> > > index 0000000..70c4fb7
> > > --- /dev/null
> > > +++ b/src/thread/tss_set.c
> > > @@ -0,0 +1,13 @@
> > > +#include "pthread_impl.h"
> > > +#include <threads.h>
> > > +
> > > +int tss_set(tss_t k, void *x)
> > > +{
> > > +	struct pthread *self = __pthread_self();
> > > +	/* Avoid unnecessary COW */
> > > +	if (self->tsd[k] != x) {
> > > +		self->tsd[k] = x;
> > > +		self->tsd_used = 1;
> > > +	}
> > > +	return thrd_success;
> > > +}
> > 
> > As mentioned before, this is duplicate, and it could probably just be
> > an alias.
> 
> no, I don't think so, it has a different interface, there is no const

Well a wrapper would work, but the function is small/trivial enough
that a wrapper is not significantly smaller or simpler than just
duplicating it like you've done.

> > > +/* Roughly __syscall already returns the right thing: 0 if all went
> > > +   well or a negative error indication, otherwise. */
> > > +int thrd_sleep(const struct timespec *req, struct timespec *rem)
> > > +{
> > > +  int ret = __syscall(SYS_nanosleep, req, rem);
> > > +  // compile time comparison, constant propagated
> > > +  if (EINTR != 1) {
> > > +    switch (ret) {
> > > +    case 0: return 0;
> > > +      /* error described by POSIX:                                    */
> > > +      /* EINTR  The nanosleep() function was interrupted by a signal. */
> > > +      /* The C11 wording is:                                          */
> > > +      /* -1 if it has been interrupted by a signal                    */
> > > +    case -EINTR:  return -1;
> > > +      /* error described by POSIX */
> > > +    case -EINVAL: return -2;
> > > +      /* described for linux */
> > > +    case -EFAULT: return -3;
> > > +      /* No other error returns are specified. */
> > > +    default: return INT_MIN;
> > > +    }
> > > +  }
> > > +  // potentially a tail call
> > > +  return ret;
> > > +}
> > 
> > __syscall is almost never a "tail call" because it's rarely a call at
> > all; usually, it's inline asm. :-)
> 
> I'll remove that comment and the check for EINTR being 1.

OK.

> > But okay. My preference would be
> > not to do fancy remapping of error codes (especially not when the
> > codes will differ by arch, since this just encourages programs to
> > assume they're fixed and then break on the arch where they're
> > different, if any).
> 
> We have to be at least partially fancy, because C enforces -1 for the
> interrupt case without really being precise what that should mean. I
> just naively assumed that they mean the same as EINTR :)

I agree.

> For EINVAL, as discussed above, I'd expect that the C specification
> will be modified to also have the validity check for the req
> argument. POSIX has a "shall" here, so I'd like to keep the case well
> identified.

That seems likely, or they might make it undefined. C <3 UB...

> For the EFAULT case, I don't have a strong opinion, could be either
> way.

EFAULT really isn't an error the caller can expect to get. If there's
any userspace work aside from the syscall, it would be SIGSEGV
instead.

Is there a reason non-EINTR errors need to be mapped to distinct
values, though? I was thinking just 3 cases, 0, EINTR, and default,
would suffice.

> > > diff --git a/src/time/timespec_get.c b/src/time/timespec_get.c
> > > new file mode 100644
> > > index 0000000..bf78e5a
> > > --- /dev/null
> > > +++ b/src/time/timespec_get.c
> > > @@ -0,0 +1,9 @@
> > > +#include <time.h>
> > > +
> > > +int __clock_gettime(clockid_t clk, struct timespec *ts);
> > > +
> > > +/* the base argument is simply ignored, there is no other implemented
> > > +   value than TIME_UTC. */
> > > +int timespec_get(struct timespec * ts, int base) {
> > > +  return __clock_gettime(CLOCK_REALTIME, ts) < 0 ? 0 : base;
> > > +}
> > 
> > As mentioned in another email (off-list, IIRC), I'd like to reject
> > unknown base values so that programs built against a newer musl that
> > has other clock bases, but which get an older musl at runtime, will
> > see an error rather than silently using the wrong clock. Does this
> > make sense?
> 
> I makes perfect sense, but I don't recall you having that said as
> clearly.

Sorry.

> BTW, the choice of CLOCK_REALTIME, here, could be subject of
> debade. IIRC this clock doesn't exactly give the seconds since Unix
> Epoch, but that value minus some leapseconds. So it doesn't seem to
> fulfill the C specification.

With any luck, UTC might eventually be fixed to drop leap seconds, and
then it won't matter for future times (and having your clock set to a
past time is nonsense anyway). But I don't think C is strict on how
the clock corresponds to real-world time; at least it wasn't before
C11. And POSIX time explicitly excludes leap seconds.

> One could think of using CLOCK_MONOTONIC_RAW, instead, or
> CLOCK_BOOTTIME if it exists. But I am not sure if the bootime of the
> machine can be considered as an "epoch" in the sense of the C standard
> or even in plain English.

I think the "implementation defined" "epoch" should actually be
defined as something fixed, not something that varies. And for the
main intended use of the function (passing the timespec to
timedlock/timedwait functions), we need to match the clock the pthread
functions use if we're wrapping the pthread implementations, which
means using CLOCK_REALTIME.

Rich


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-28 16:15     ` Rich Felker
@ 2014-08-28 19:28       ` Jens Gustedt
  2014-08-28 20:00         ` Rich Felker
  0 siblings, 1 reply; 26+ messages in thread
From: Jens Gustedt @ 2014-08-28 19:28 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 13754 bytes --]

Hello,

Am Donnerstag, den 28.08.2014, 12:15 -0400 schrieb Rich Felker:
> > Am Mittwoch, den 27.08.2014, 19:46 -0400 schrieb Rich Felker:
> > > On Thu, Aug 28, 2014 at 12:11:45AM +0200, Jens Gustedt wrote:
> > > > diff --git a/arch/arm/bits/alltypes.h.in b/arch/arm/bits/alltypes.h.in
> > > > index 183c4c4..333d43f 100644
> > > > --- a/arch/arm/bits/alltypes.h.in
> > > > +++ b/arch/arm/bits/alltypes.h.in
> > > > @@ -19,7 +19,7 @@ TYPEDEF long time_t;
> > > >  TYPEDEF long suseconds_t;
> > > >  
> > > >  TYPEDEF struct { union { int __i[9]; unsigned __s[9]; } __u; } pthread_attr_t;
> > > > -TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
> > > > -TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } pthread_cond_t;
> > > > +TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } __pthread_mutex_t;
> > > > +TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } __pthread_cond_t;
> > > > [...]
> > > > +TYPEDEF __pthread_cond_t pthread_cond_t;
> > > > +TYPEDEF __pthread_cond_t cnd_t;
> > > > +TYPEDEF __pthread_mutex_t pthread_mutex_t;
> > > > +TYPEDEF __pthread_mutex_t mtx_t;
> > > 
> > > This changes the C++ ABI by changing the tag for the POSIX types, so
> > > it can't be done this way. We could return to some serious language
> > > lawyering about whether it's valid to access members via the 'wrong'
> > > struct type when we have two identical struct types, or just assume
> > > that's safe for now and do it.
> > 
> > I don't see your point, this doesn't change the tag name, no? (There
> > is no tag name, here.) This is just typedeffing to the same type as
> > before so this should be "as allowed" as it was before.
> 
> That's not how it works. In C++, there is no separate namespace for
> struct tags versus types. If you define a struct without a tag but
> using typedef, the typedef name becomes its effective struct tag,
> which is what gets used for name mangling. If you later use typedef
> again to make an alias for this type, it does not make a new struct
> type with a new tag, just an alias for the existing one. Thus, your
> patch as written changes the struct tags for C++ ABI from
> "pthread_mutex_t" and "pthread_cond_t" to "__pthread_mutex_t" and
> "__pthread_cond_t".

argh

at least it doesn't matter for the standard functions (they are `extern
"C"`) but only for user functions with C++ interfaces.

Well, ok, so if you could come up with some better idea in the future,
let me know.

> > > > +#ifdef __GNUC__
> > > > +#define _Nonnull(...) __attribute__((__nonnull__(__VA_ARGS__)))
> > > > +#else
> > > > +#define _Nonnull(...)
> > > > +#endif
> > > 
> > > If we want to use attributes like this, it's a separate change from
> > > adding threads. But I'm fairly much against doing it anyway since the
> > > compiler already knows how to make these warnings for standard
> > > functions, without help from the headers.
> > 
> > There will be a while, until compilers know these things for C11
> > thread functions, I think.
> > 
> > BTw, I was tempted to do it the "C" way by using things like
> > `[static 1]` instead of `*`, but then I thought you wouldn't like
> > that.
> 
> Actually I like it much better, but I'm not sufficiently familiar with
> it to know whether it's a conforming declaration. Is it formally
> compatible with the standard prototype?

yes, C allows even for crufty things like

void f(int a[static volatile const 42]);

to be equivalent to

void f(int *volatile const a);

as a prototype. Interfaces can contain more information than just the
prototype.

> If so, how does it allow the compiler to enforce anything? 

It doesn't enforce anything, it is more a hint at the same level as
`restrict` or `register`. It is a prerequisite that the user of the
function has to ensure.

As nsz remarked in his reply, most older compilers don't do anything
with it, they just ignore it, though the existence of the nonnull
attribute shows that they would be easily capable of doing so.

The other inconvenience for `static 1` is C++. They haven't adopted
it, so as such this would make the headers incompatible with C++. So
also for this one we would need some preprocessor magic.

And then, also, it is ugly :(

> > > > diff --git a/src/thread/mtx_unlock.c b/src/thread/mtx_unlock.c
> > > > new file mode 100644
> > > > index 0000000..d1721a6
> > > > --- /dev/null
> > > > +++ b/src/thread/mtx_unlock.c
> > > > @@ -0,0 +1,11 @@
> > > > +#include "pthread_impl.h"
> > > > +#include <threads.h>
> > > > +
> > > > +int __pthread_mutex_unlock(pthread_mutex_t *);
> > > > +
> > > > +int (mtx_unlock)(mtx_t *mtx) {
> > > > +	/* In the best of all worlds this is a tail call. */
> > > > +	int ret = __pthread_mutex_unlock(mtx);
> > > > +	/* In case of UB may also return EPERM. */
> > > > +	return ret ? thrd_error : thrd_success;
> > > > +}
> > > 
> > > For unlock, C11 makes it UB to unlock a mutex you don't own (even for
> > > recursive type), so the tail call would be legal. I don't have a
> > > strong opinion either way here.
> > 
> > I'll remove the comment, this is a left over. Tail call is still not
> > possible, since moraly thrd_success is not 0 :)
> > 
> > But we could just return thrd_success unconditionnally.
> 
> Or: thrd_success ? thrd_success : ret;

ok, I'll go for that

> > > > diff --git a/src/thread/pthread_setspecific.c b/src/thread/pthread_setspecific.c
> > > > index 55e46a8..dd18b96 100644
> > > > --- a/src/thread/pthread_setspecific.c
> > > > +++ b/src/thread/pthread_setspecific.c
> > > > @@ -1,6 +1,6 @@
> > > >  #include "pthread_impl.h"
> > > >  
> > > > -int pthread_setspecific(pthread_key_t k, const void *x)
> > > > +int __pthread_setspecific(pthread_key_t k, const void *x)
> > > >  {
> > > >  	struct pthread *self = __pthread_self();
> > > >  	/* Avoid unnecessary COW */
> > > > @@ -10,3 +10,5 @@ int pthread_setspecific(pthread_key_t k, const void *x)
> > > >  	}
> > > >  	return 0;
> > > >  }
> > > > +
> > > > +weak_alias(__pthread_setspecific, pthread_setspecific);
> > > 
> > > It looks like you have a duplicate tss_set for this rather than using
> > > the above, so either the above is a grauitous change, or the new
> > > tss_set is duplicate code that should be removed. Or did I miss
> > > something?
> > 
> > you are right, this should be omitted
> > 
> > (the tss_get code differs in the const qualification of the second argument)
> 
> In this case it's still possible to implement tss_set as a wrapper,

I am really allergic against casts, even more if they are somewhat
hidden. So I'd do much to avoid that.

> just not an alias, but the code is sufficiently small that maybe it's
> better just to duplicate it.

ok

> > > Didn't we discuss before just passing a special attr pointer to
> > > __pthread_create to effect C11-style creation?
> > 
> > I did that, and that version could still be revived. But I was not too
> > happy with it, because it meant intruding into the existing pthread
> > code. For this version, the pthread code is just reorganized in
> > different TU, but otherwise there should be no change.
> 
> From a maintainability standpoint, having two subtly different
> versions of the same complex code is a big cost. I wouldn't be
> fundamentally opposed to a factoring that avoids this duplication
> while allowing the POSIX-specific parts to be optimized out for pure
> C11 use, but I think that would be a lot more invasive (mainly in
> terms of potential for errors, possibly also for performance, but
> unlikely since basically all the time is spent in mmap, mprotect, and
> clone anyway) and would be something to look into at a later time, not
> while trying to get the initial work on C11 threads in.
> 
> > Also I'd really like to advertise C threads as a lightweighted,
> > stripped down thread implementation, so the short cuts (that are
> > merely a fall off of the code separation, here) were politically
> > welcome.
> 
> I don't think saving 200 bytes in a pure-C11 static-linked program
> while adding 300 bytes to libc.so in code duplication (these numbers
> are just guesses but sound about right) really makes it come across as
> more lightweight.

I know. I'll have a look and try to factor these things out, such that
we really can weigh the alternatives.

> > > > diff --git a/src/thread/thrd_detach.c b/src/thread/thrd_detach.c
> > > > new file mode 100644
> > > > index 0000000..211b8fb
> > > > --- /dev/null
> > > > +++ b/src/thread/thrd_detach.c
> > > > @@ -0,0 +1,18 @@
> > > > +#include "pthread_impl.h"
> > > > +#include <threads.h>
> > > > +
> > > > +int __munmap(void *, size_t);
> > > > +
> > > > +int thrd_detach(thrd_t t)
> > > > +{
> > > > +	/* Cannot detach a thread that's already exiting */
> > > > +	if (a_swap(t->exitlock, 1)){
> > > > +		int tmp;
> > > > +		while ((tmp = t->tid)) __timedwait(&t->tid, tmp, 0, 0, 0, 0, 0);
> > > > +		if (t->map_base) __munmap(t->map_base, t->map_size);
> > > > +	} else {
> > > > +		t->detached = 2;
> > > > +		__unlock(t->exitlock);
> > > > +	}
> > > > +	return thrd_success;
> > > > +}
> > > 
> > > You already moved pthread_detach to the protected namespace, so this
> > > seems to be duplicate code.
> > 
> > It is.
> > 
> > > And it's a place that has implementation
> > > details (magic numbers like 2, which you could argue shouldn't exist
> > > ;-)
> > 
> > I would !)
> > 
> > > so I think it's best to avoid spreading these details out over
> > > more places where they could go out of sync, no?
> > 
> > yes, probably, the fact that pthread_detach called pthread_join
> > somehow disturbed me. I'll reconsider
> 
> It's not so odd if you think about it -- when the thread to be
> detached has already exited, you can't (implementation-wise) detach it
> and leave it responsible for cleaning up itself, because it doesn't
> exist anymore. Since you know it's exited though, joining and throwing
> away the result is possible.

But thinking of it and looking into the patches I discovered my real
motivation for doing so. (I should have better commented to remember
things myself.) The thing is that using pthread_join under the hood is
semantically wrong because of the different interfaces. Well we are
only passing in a null pointer, here, but still the type is wrong, C
threads "join" on a int* and not on a void**.

> > > But okay. My preference would be
> > > not to do fancy remapping of error codes (especially not when the
> > > codes will differ by arch, since this just encourages programs to
> > > assume they're fixed and then break on the arch where they're
> > > different, if any).
> > 
> > We have to be at least partially fancy, because C enforces -1 for the
> > interrupt case without really being precise what that should mean. I
> > just naively assumed that they mean the same as EINTR :)
> 
> I agree.
> 
> > For EINVAL, as discussed above, I'd expect that the C specification
> > will be modified to also have the validity check for the req
> > argument. POSIX has a "shall" here, so I'd like to keep the case well
> > identified.
> 
> That seems likely, or they might make it undefined. C <3 UB...
> 
> > For the EFAULT case, I don't have a strong opinion, could be either
> > way.
> 
> EFAULT really isn't an error the caller can expect to get. If there's
> any userspace work aside from the syscall, it would be SIGSEGV
> instead.
> 
> Is there a reason non-EINTR errors need to be mapped to distinct
> values, though? I was thinking just 3 cases, 0, EINTR, and default,
> would suffice.

ok, let's do it like that

> > BTW, the choice of CLOCK_REALTIME, here, could be subject of
> > debade. IIRC this clock doesn't exactly give the seconds since Unix
> > Epoch, but that value minus some leapseconds. So it doesn't seem to
> > fulfill the C specification.
> 
> With any luck, UTC might eventually be fixed to drop leap seconds, and
> then it won't matter for future times (and having your clock set to a
> past time is nonsense anyway). But I don't think C is strict on how
> the clock corresponds to real-world time; at least it wasn't before
> C11. And POSIX time explicitly excludes leap seconds.
> 
> > One could think of using CLOCK_MONOTONIC_RAW, instead, or
> > CLOCK_BOOTTIME if it exists. But I am not sure if the bootime of the
> > machine can be considered as an "epoch" in the sense of the C standard
> > or even in plain English.
> 
> I think the "implementation defined" "epoch" should actually be
> defined as something fixed, not something that varies. And for the
> main intended use of the function (passing the timespec to
> timedlock/timedwait functions), we need to match the clock the pthread
> functions use if we're wrapping the pthread implementations, which
> means using CLOCK_REALTIME.

yes, right

I found another glitch with timespec_get, though. It shouldn't touch
errno. So either we do some stiches around it to save errno and
restore it (for a case that probably never happens) or implement it
directly from the vdso and the syscall. I already have done the later,
and it looks sufficiently simple to be acceptable and not too much
code duplication.

Jens

-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::




[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-28 19:28       ` Jens Gustedt
@ 2014-08-28 20:00         ` Rich Felker
  2014-08-28 20:55           ` Szabolcs Nagy
  2014-08-28 21:34           ` Jens Gustedt
  0 siblings, 2 replies; 26+ messages in thread
From: Rich Felker @ 2014-08-28 20:00 UTC (permalink / raw)
  To: musl

On Thu, Aug 28, 2014 at 09:28:09PM +0200, Jens Gustedt wrote:
> Hello,
> 
> Am Donnerstag, den 28.08.2014, 12:15 -0400 schrieb Rich Felker:
> > > Am Mittwoch, den 27.08.2014, 19:46 -0400 schrieb Rich Felker:
> > > > On Thu, Aug 28, 2014 at 12:11:45AM +0200, Jens Gustedt wrote:
> > > > > diff --git a/arch/arm/bits/alltypes.h.in b/arch/arm/bits/alltypes.h..in
> > > > > index 183c4c4..333d43f 100644
> > > > > --- a/arch/arm/bits/alltypes.h.in
> > > > > +++ b/arch/arm/bits/alltypes.h.in
> > > > > @@ -19,7 +19,7 @@ TYPEDEF long time_t;
> > > > >  TYPEDEF long suseconds_t;
> > > > >  
> > > > >  TYPEDEF struct { union { int __i[9]; unsigned __s[9]; } __u; } pthread_attr_t;
> > > > > -TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
> > > > > -TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } pthread_cond_t;
> > > > > +TYPEDEF struct { union { int __i[6]; volatile void *volatile __p[6]; } __u; } __pthread_mutex_t;
> > > > > +TYPEDEF struct { union { int __i[12]; void *__p[12]; } __u; } __pthread_cond_t;
> > > > > [...]
> > > > > +TYPEDEF __pthread_cond_t pthread_cond_t;
> > > > > +TYPEDEF __pthread_cond_t cnd_t;
> > > > > +TYPEDEF __pthread_mutex_t pthread_mutex_t;
> > > > > +TYPEDEF __pthread_mutex_t mtx_t;
> > > > 
> > > > This changes the C++ ABI by changing the tag for the POSIX types, so
> > > > it can't be done this way. We could return to some serious language
> > > > lawyering about whether it's valid to access members via the 'wrong'
> > > > struct type when we have two identical struct types, or just assume
> > > > that's safe for now and do it.
> > > 
> > > I don't see your point, this doesn't change the tag name, no? (There
> > > is no tag name, here.) This is just typedeffing to the same type as
> > > before so this should be "as allowed" as it was before.
> > 
> > That's not how it works. In C++, there is no separate namespace for
> > struct tags versus types. If you define a struct without a tag but
> > using typedef, the typedef name becomes its effective struct tag,
> > which is what gets used for name mangling. If you later use typedef
> > again to make an alias for this type, it does not make a new struct
> > type with a new tag, just an alias for the existing one. Thus, your
> > patch as written changes the struct tags for C++ ABI from
> > "pthread_mutex_t" and "pthread_cond_t" to "__pthread_mutex_t" and
> > "__pthread_cond_t".
> 
> argh
> 
> at least it doesn't matter for the standard functions (they are `extern
> "C"`) but only for user functions with C++ interfaces.

Right, but it matters for all C++ code containing C++ functions that
use pthread_mutex_t* as an argument. And apparently there's a lot of
such code.

> Well, ok, so if you could come up with some better idea in the future,
> let me know.

I'm not even sure it's an issue. I've seen it argued that aliasing
rules don't even apply here because, when you access something like
m->_m_lock, that's not an "access" to the structure object/type but to
the individual member. If that's true, then as long as the structs
have identical layout, it should be valid to access the members via
either.

Also, what is the relationship between two identical struct or union
types without tags (i.e. the first member of pthread_mutex_t and the
first member of mtx_t, both of which are unions with no tag)?

> > > > > +#ifdef __GNUC__
> > > > > +#define _Nonnull(...) __attribute__((__nonnull__(__VA_ARGS__)))
> > > > > +#else
> > > > > +#define _Nonnull(...)
> > > > > +#endif
> > > > 
> > > > If we want to use attributes like this, it's a separate change from
> > > > adding threads. But I'm fairly much against doing it anyway since the
> > > > compiler already knows how to make these warnings for standard
> > > > functions, without help from the headers.
> > > 
> > > There will be a while, until compilers know these things for C11
> > > thread functions, I think.
> > > 
> > > BTw, I was tempted to do it the "C" way by using things like
> > > `[static 1]` instead of `*`, but then I thought you wouldn't like
> > > that.
> > 
> > Actually I like it much better, but I'm not sufficiently familiar with
> > it to know whether it's a conforming declaration. Is it formally
> > compatible with the standard prototype?
> 
> yes, C allows even for crufty things like
> 
> void f(int a[static volatile const 42]);
> 
> to be equivalent to
> 
> void f(int *volatile const a);
> 
> as a prototype. Interfaces can contain more information than just the
> prototype.

I was asking whether the use of static to mean "pointer to at least
this many elements" used for an argument in a function type resulted
in a distinct function type from the same without static.

> > If so, how does it allow the compiler to enforce anything? 
> 
> It doesn't enforce anything, it is more a hint at the same level as
> `restrict` or `register`. It is a prerequisite that the user of the
> function has to ensure.

So the following is NOT UB?

void foo(int [static 2]);

void bar()
{
	int x;
	foo(&x);
}

I'm guessing not. The same seems to apply to restrict too -- it has no
meaning on the caller side, so the compiler cannot automatically
assume UB when the restrict constraint is violated by the caller. It
can only be used when optimizing on the callee side. I suppose static
is the same -- but in the case of static, the only optimization that
seems to be possible on the callee side is assuming the pointer is
non-null; the length seems irrelevant.

> As nsz remarked in his reply, most older compilers don't do anything
> with it, they just ignore it, though the existence of the nonnull
> attribute shows that they would be easily capable of doing so.
> 
> The other inconvenience for `static 1` is C++. They haven't adopted
> it, so as such this would make the headers incompatible with C++. So
> also for this one we would need some preprocessor magic.
> 
> And then, also, it is ugly :(

Yes. Then let's just omit it for now.

> > > > > diff --git a/src/thread/mtx_unlock.c b/src/thread/mtx_unlock.c
> > > > > new file mode 100644
> > > > > index 0000000..d1721a6
> > > > > --- /dev/null
> > > > > +++ b/src/thread/mtx_unlock.c
> > > > > @@ -0,0 +1,11 @@
> > > > > +#include "pthread_impl.h"
> > > > > +#include <threads.h>
> > > > > +
> > > > > +int __pthread_mutex_unlock(pthread_mutex_t *);
> > > > > +
> > > > > +int (mtx_unlock)(mtx_t *mtx) {
> > > > > +	/* In the best of all worlds this is a tail call. */
> > > > > +	int ret = __pthread_mutex_unlock(mtx);
> > > > > +	/* In case of UB may also return EPERM. */
> > > > > +	return ret ? thrd_error : thrd_success;
> > > > > +}
> > > > 
> > > > For unlock, C11 makes it UB to unlock a mutex you don't own (even for
> > > > recursive type), so the tail call would be legal. I don't have a
> > > > strong opinion either way here.
> > > 
> > > I'll remove the comment, this is a left over. Tail call is still not
> > > possible, since moraly thrd_success is not 0 :)
> > > 
> > > But we could just return thrd_success unconditionnally.
> > 
> > Or: thrd_success ? thrd_success : ret;
> 
> ok, I'll go for that

OK.

> > > > > diff --git a/src/thread/pthread_setspecific.c b/src/thread/pthread_setspecific.c
> > > > > index 55e46a8..dd18b96 100644
> > > > > --- a/src/thread/pthread_setspecific.c
> > > > > +++ b/src/thread/pthread_setspecific.c
> > > > > @@ -1,6 +1,6 @@
> > > > >  #include "pthread_impl.h"
> > > > >  
> > > > > -int pthread_setspecific(pthread_key_t k, const void *x)
> > > > > +int __pthread_setspecific(pthread_key_t k, const void *x)
> > > > >  {
> > > > >  	struct pthread *self = __pthread_self();
> > > > >  	/* Avoid unnecessary COW */
> > > > > @@ -10,3 +10,5 @@ int pthread_setspecific(pthread_key_t k, const void *x)
> > > > >  	}
> > > > >  	return 0;
> > > > >  }
> > > > > +
> > > > > +weak_alias(__pthread_setspecific, pthread_setspecific);
> > > > 
> > > > It looks like you have a duplicate tss_set for this rather than using
> > > > the above, so either the above is a grauitous change, or the new
> > > > tss_set is duplicate code that should be removed. Or did I miss
> > > > something?
> > > 
> > > you are right, this should be omitted
> > > 
> > > (the tss_get code differs in the const qualification of the second argument)
> > 
> > In this case it's still possible to implement tss_set as a wrapper,
> 
> I am really allergic against casts, even more if they are somewhat
> hidden. So I'd do much to avoid that.

There's no cast here. void * converts implicitly to const void *.

> > > > Didn't we discuss before just passing a special attr pointer to
> > > > __pthread_create to effect C11-style creation?
> > > 
> > > I did that, and that version could still be revived. But I was not too
> > > happy with it, because it meant intruding into the existing pthread
> > > code. For this version, the pthread code is just reorganized in
> > > different TU, but otherwise there should be no change.
> > 
> > From a maintainability standpoint, having two subtly different
> > versions of the same complex code is a big cost. I wouldn't be
> > fundamentally opposed to a factoring that avoids this duplication
> > while allowing the POSIX-specific parts to be optimized out for pure
> > C11 use, but I think that would be a lot more invasive (mainly in
> > terms of potential for errors, possibly also for performance, but
> > unlikely since basically all the time is spent in mmap, mprotect, and
> > clone anyway) and would be something to look into at a later time, not
> > while trying to get the initial work on C11 threads in.
> > 
> > > Also I'd really like to advertise C threads as a lightweighted,
> > > stripped down thread implementation, so the short cuts (that are
> > > merely a fall off of the code separation, here) were politically
> > > welcome.
> > 
> > I don't think saving 200 bytes in a pure-C11 static-linked program
> > while adding 300 bytes to libc.so in code duplication (these numbers
> > are just guesses but sound about right) really makes it come across as
> > more lightweight.
> 
> I know. I'll have a look and try to factor these things out, such that
> we really can weigh the alternatives.

Can we look at this as a potential post-merge task? I'm skeptical that
it improves anything; saving maybe 100-200 bytes in the static-linked
C11-only case is probably not worth spreading code out over multiple
functions or files and making the flow of pthread_create less obvious.
I'm willing to look at it if you want to try anyway, but I don't think
it should be holding up getting C11 threads support added.

> > > > You already moved pthread_detach to the protected namespace, so this
> > > > seems to be duplicate code.
> > > 
> > > It is.
> > > 
> > > > And it's a place that has implementation
> > > > details (magic numbers like 2, which you could argue shouldn't exist
> > > > ;-)
> > > 
> > > I would !)
> > > 
> > > > so I think it's best to avoid spreading these details out over
> > > > more places where they could go out of sync, no?
> > > 
> > > yes, probably, the fact that pthread_detach called pthread_join
> > > somehow disturbed me. I'll reconsider
> > 
> > It's not so odd if you think about it -- when the thread to be
> > detached has already exited, you can't (implementation-wise) detach it
> > and leave it responsible for cleaning up itself, because it doesn't
> > exist anymore. Since you know it's exited though, joining and throwing
> > away the result is possible.
> 
> But thinking of it and looking into the patches I discovered my real
> motivation for doing so. (I should have better commented to remember
> things myself.) The thing is that using pthread_join under the hood is
> semantically wrong because of the different interfaces. Well we are
> only passing in a null pointer, here, but still the type is wrong, C
> threads "join" on a int* and not on a void**.

I see. But I don't think it matters. You're already marshalling the
int return value via a cast to void* in thrd_exit and thrd_join, and
in the case of detach, the result is being thrown away anyway. There's
nothing sketchy/UB going on here, while other places already encode
the assumption that void* can faithfully round-trip int values.

> > > BTW, the choice of CLOCK_REALTIME, here, could be subject of
> > > debade. IIRC this clock doesn't exactly give the seconds since Unix
> > > Epoch, but that value minus some leapseconds. So it doesn't seem to
> > > fulfill the C specification.
> > 
> > With any luck, UTC might eventually be fixed to drop leap seconds, and
> > then it won't matter for future times (and having your clock set to a
> > past time is nonsense anyway). But I don't think C is strict on how
> > the clock corresponds to real-world time; at least it wasn't before
> > C11. And POSIX time explicitly excludes leap seconds.
> > 
> > > One could think of using CLOCK_MONOTONIC_RAW, instead, or
> > > CLOCK_BOOTTIME if it exists. But I am not sure if the bootime of the
> > > machine can be considered as an "epoch" in the sense of the C standard
> > > or even in plain English.
> > 
> > I think the "implementation defined" "epoch" should actually be
> > defined as something fixed, not something that varies. And for the
> > main intended use of the function (passing the timespec to
> > timedlock/timedwait functions), we need to match the clock the pthread
> > functions use if we're wrapping the pthread implementations, which
> > means using CLOCK_REALTIME.
> 
> yes, right
> 
> I found another glitch with timespec_get, though. It shouldn't touch
> errno. So either we do some stiches around it to save errno and

Why? I see nowhere in C11 that it's specified not to touch errno. In
general, any standard library function can clobber errno as long as it
does not set it to 0, unless it's specifically documented not to
modify errno on success.

> restore it (for a case that probably never happens) or implement it
> directly from the vdso and the syscall. I already have done the later,
> and it looks sufficiently simple to be acceptable and not too much
> code duplication.

The latter is some fairly major code duplication, especially since we
have fallbacks for pre-SYS_clock_gettime kernels. Eventually I plan to
add emulation/fallback for at least CLOCK_MONOTONIC and
CLOCK_CPUTIME_CLOCKID too, and if we want to later add support for
more clock bases to timespec_get, using the same __clock_gettime
backend allows the fallback code to work there too. So I think it's
best not to try to make the vdsocall/syscall directly.

Rich


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-28 20:00         ` Rich Felker
@ 2014-08-28 20:55           ` Szabolcs Nagy
  2014-08-28 21:38             ` Jens Gustedt
  2014-08-28 21:34           ` Jens Gustedt
  1 sibling, 1 reply; 26+ messages in thread
From: Szabolcs Nagy @ 2014-08-28 20:55 UTC (permalink / raw)
  To: musl

* Rich Felker <dalias@libc.org> [2014-08-28 16:00:29 -0400]:
> I was asking whether the use of static to mean "pointer to at least
> this many elements" used for an argument in a function type resulted
> in a distinct function type from the same without static.

no the type is not different, there is an example in the standard:

http://port70.net/~nsz/c/c11/n1570.html#6.7.6.3p21

the semantics is defined in

http://port70.net/~nsz/c/c11/n1570.html#6.7.6.3p7


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-28 20:00         ` Rich Felker
  2014-08-28 20:55           ` Szabolcs Nagy
@ 2014-08-28 21:34           ` Jens Gustedt
  2014-08-28 21:56             ` Rich Felker
  1 sibling, 1 reply; 26+ messages in thread
From: Jens Gustedt @ 2014-08-28 21:34 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 10547 bytes --]

Am Donnerstag, den 28.08.2014, 16:00 -0400 schrieb Rich Felker:
> On Thu, Aug 28, 2014 at 09:28:09PM +0200, Jens Gustedt wrote:
> > at least it doesn't matter for the standard functions (they are `extern
> > "C"`) but only for user functions with C++ interfaces.
> 
> Right, but it matters for all C++ code containing C++ functions that
> use pthread_mutex_t* as an argument. And apparently there's a lot of
> such code.
> 
> > Well, ok, so if you could come up with some better idea in the future,
> > let me know.
> 
> I'm not even sure it's an issue. I've seen it argued that aliasing
> rules don't even apply here because, when you access something like
> m->_m_lock, that's not an "access" to the structure object/type but to
> the individual member. If that's true, then as long as the structs
> have identical layout, it should be valid to access the members via
> either.

Yes, there is a special rule for struct types in different TU, that
they are compatible when their internal structure is the same
(including alignment) and if their *tag* name is the same.

> Also, what is the relationship between two identical struct or union
> types without tags (i.e. the first member of pthread_mutex_t and the
> first member of mtx_t, both of which are unions with no tag)?

For structs with no tags the situation is more subtle. If you are in
the same TU and declare them in different places they are *not*
compatible, basically they are two different struct. On the other hand
two such struct in different TU are compatible, if they comply to the
above rule of structural equivalence.

And for C++ it really seems that typedef identifiers have yet another
exception for the case that they typedef a struct or union type.

struct stat { int a; };
void stat(struct stat A) { }
struct stut { int a; };
typedef struct { int a; } stut;

is only an error on the typedef.

> > > > > > +#ifdef __GNUC__
> > > > > > +#define _Nonnull(...) __attribute__((__nonnull__(__VA_ARGS__)))
> > > > > > +#else
> > > > > > +#define _Nonnull(...)
> > > > > > +#endif
> > > > > 
> > > > > If we want to use attributes like this, it's a separate change from
> > > > > adding threads. But I'm fairly much against doing it anyway since the
> > > > > compiler already knows how to make these warnings for standard
> > > > > functions, without help from the headers.
> > > > 
> > > > There will be a while, until compilers know these things for C11
> > > > thread functions, I think.
> > > > 
> > > > BTw, I was tempted to do it the "C" way by using things like
> > > > `[static 1]` instead of `*`, but then I thought you wouldn't like
> > > > that.
> > > 
> > > Actually I like it much better, but I'm not sufficiently familiar with
> > > it to know whether it's a conforming declaration. Is it formally
> > > compatible with the standard prototype?
> > 
> > yes, C allows even for crufty things like
> > 
> > void f(int a[static volatile const 42]);
> > 
> > to be equivalent to
> > 
> > void f(int *volatile const a);
> > 
> > as a prototype. Interfaces can contain more information than just the
> > prototype.
> 
> I was asking whether the use of static to mean "pointer to at least
> this many elements" used for an argument in a function type resulted
> in a distinct function type from the same without static.

the answer is no to that, too, the function type is the same

for the first "dimension" only type qualifiers are accounted for the
function type

> > > If so, how does it allow the compiler to enforce anything? 
> > 
> > It doesn't enforce anything, it is more a hint at the same level as
> > `restrict` or `register`. It is a prerequisite that the user of the
> > function has to ensure.
> 
> So the following is NOT UB?
> 
> void foo(int [static 2]);
> 
> void bar()
> {
> 	int x;
> 	foo(&x);
> }
>
> I'm guessing not.

you are guessing correctly, UB only occurs if x[1] is accessed through
foo.

> The same seems to apply to restrict too -- it has no
> meaning on the caller side, so the compiler cannot automatically
> assume UB when the restrict constraint is violated by the caller. It
> can only be used when optimizing on the callee side. I suppose static
> is the same -- but in the case of static, the only optimization that
> seems to be possible on the callee side is assuming the pointer is
> non-null; the length seems irrelevant.

The callee can also be a caller to another (or the same) function and
thus transmit the invariant, there.

Basically this could do some bounds checking for arrays with
statically known sizes.

> > As nsz remarked in his reply, most older compilers don't do anything
> > with it, they just ignore it, though the existence of the nonnull
> > attribute shows that they would be easily capable of doing so.
> > 
> > The other inconvenience for `static 1` is C++. They haven't adopted
> > it, so as such this would make the headers incompatible with C++. So
> > also for this one we would need some preprocessor magic.
> > 
> > And then, also, it is ugly :(
> 
> Yes. Then let's just omit it for now.

ok

(you probably mean that also for the nonnull version, I suppose)

> > > > > > diff --git a/src/thread/pthread_setspecific.c b/src/thread/pthread_setspecific.c
> > > > > > index 55e46a8..dd18b96 100644
> > > > > > --- a/src/thread/pthread_setspecific.c
> > > > > > +++ b/src/thread/pthread_setspecific.c
> > > > > > @@ -1,6 +1,6 @@
> > > > > >  #include "pthread_impl.h"
> > > > > >  
> > > > > > -int pthread_setspecific(pthread_key_t k, const void *x)
> > > > > > +int __pthread_setspecific(pthread_key_t k, const void *x)
> > > > > >  {
> > > > > >  	struct pthread *self = __pthread_self();
> > > > > >  	/* Avoid unnecessary COW */
> > > > > > @@ -10,3 +10,5 @@ int pthread_setspecific(pthread_key_t k, const void *x)
> > > > > >  	}
> > > > > >  	return 0;
> > > > > >  }
> > > > > > +
> > > > > > +weak_alias(__pthread_setspecific, pthread_setspecific);
> > > > > 
> > > > > It looks like you have a duplicate tss_set for this rather than using
> > > > > the above, so either the above is a grauitous change, or the new
> > > > > tss_set is duplicate code that should be removed. Or did I miss
> > > > > something?
> > > > 
> > > > you are right, this should be omitted
> > > > 
> > > > (the tss_get code differs in the const qualification of the second argument)
> > > 
> > > In this case it's still possible to implement tss_set as a wrapper,
> > 
> > I am really allergic against casts, even more if they are somewhat
> > hidden. So I'd do much to avoid that.
> 
> There's no cast here. void * converts implicitly to const void *.

There is a cast inside the pthread_setspecific function which I really
don't like, we discussed that before, I think. I'd rather not use a
function that does const conversion magic under the hood. These are
really badly designed interfaces.

> > I know. I'll have a look and try to factor these things out, such that
> > we really can weigh the alternatives.
> 
> Can we look at this as a potential post-merge task? I'm skeptical that
> it improves anything; saving maybe 100-200 bytes in the static-linked
> C11-only case is probably not worth spreading code out over multiple
> functions or files and making the flow of pthread_create less obvious.
> I'm willing to look at it if you want to try anyway, but I don't think
> it should be holding up getting C11 threads support added.

wouldn't be holding up, I promisse. I'd have to factor this into
digestable patches anyhow, so this should not be much more effort.

> > But thinking of it and looking into the patches I discovered my real
> > motivation for doing so. (I should have better commented to remember
> > things myself.) The thing is that using pthread_join under the hood is
> > semantically wrong because of the different interfaces. Well we are
> > only passing in a null pointer, here, but still the type is wrong, C
> > threads "join" on a int* and not on a void**.
> 
> I see. But I don't think it matters. You're already marshalling the
> int return value via a cast to void* in thrd_exit and thrd_join, and
> in the case of detach, the result is being thrown away anyway. There's
> nothing sketchy/UB going on here, while other places already encode
> the assumption that void* can faithfully round-trip int values.

I know, there is no UB, but I still don't like this kind of
stuff. Basically, I have to read deeply into the code at any time I
touch it to understand that fact that it is not UB. These oddities
should be constrained to two or three well identifiable places where
they are unavoidable, namely the create, start and join functions.

> > I found another glitch with timespec_get, though. It shouldn't touch
> > errno. So either we do some stiches around it to save errno and
> 
> Why? I see nowhere in C11 that it's specified not to touch errno. In
> general, any standard library function can clobber errno as long as it
> does not set it to 0, unless it's specifically documented not to
> modify errno on success.

Somehow I always think that the standard is more constrained than that.

At least footnote 202 suggests somehow that a library function
[w|c]ould save errno and restore it later. I guess this is considered
"being nice".

> > restore it (for a case that probably never happens) or implement it
> > directly from the vdso and the syscall. I already have done the later,
> > and it looks sufficiently simple to be acceptable and not too much
> > code duplication.
> 
> The latter is some fairly major code duplication, especially since we
> have fallbacks for pre-SYS_clock_gettime kernels. Eventually I plan to
> add emulation/fallback for at least CLOCK_MONOTONIC and
> CLOCK_CPUTIME_CLOCKID too, and if we want to later add support for
> more clock bases to timespec_get, using the same __clock_gettime
> backend allows the fallback code to work there too. So I think it's
> best not to try to make the vdsocall/syscall directly.

I didn't find that code to complicated and much of a duplicate. Let's
see.

Jens

-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-28 20:55           ` Szabolcs Nagy
@ 2014-08-28 21:38             ` Jens Gustedt
  0 siblings, 0 replies; 26+ messages in thread
From: Jens Gustedt @ 2014-08-28 21:38 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 1070 bytes --]

Am Donnerstag, den 28.08.2014, 22:55 +0200 schrieb Szabolcs Nagy:
> * Rich Felker <dalias@libc.org> [2014-08-28 16:00:29 -0400]:
> > I was asking whether the use of static to mean "pointer to at least
> > this many elements" used for an argument in a function type resulted
> > in a distinct function type from the same without static.
> 
> no the type is not different, there is an example in the standard:
> 
> http://port70.net/~nsz/c/c11/n1570.html#6.7.6.3p21
> 
> the semantics is defined in
> 
> http://port70.net/~nsz/c/c11/n1570.html#6.7.6.3p7

So I was wrong in my previous answer to Rich, already calling the
function with an array of insufficient length is UB, even if the
function happens to not access out of bounds elements.

Jens

-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-28 21:34           ` Jens Gustedt
@ 2014-08-28 21:56             ` Rich Felker
  2014-08-28 23:25               ` Jens Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2014-08-28 21:56 UTC (permalink / raw)
  To: musl

On Thu, Aug 28, 2014 at 11:34:13PM +0200, Jens Gustedt wrote:
> Am Donnerstag, den 28.08.2014, 16:00 -0400 schrieb Rich Felker:
> > On Thu, Aug 28, 2014 at 09:28:09PM +0200, Jens Gustedt wrote:
> > > at least it doesn't matter for the standard functions (they are `extern
> > > "C"`) but only for user functions with C++ interfaces.
> > 
> > Right, but it matters for all C++ code containing C++ functions that
> > use pthread_mutex_t* as an argument. And apparently there's a lot of
> > such code.
> > 
> > > Well, ok, so if you could come up with some better idea in the future,
> > > let me know.
> > 
> > I'm not even sure it's an issue. I've seen it argued that aliasing
> > rules don't even apply here because, when you access something like
> > m->_m_lock, that's not an "access" to the structure object/type but to
> > the individual member. If that's true, then as long as the structs
> > have identical layout, it should be valid to access the members via
> > either.
> 
> Yes, there is a special rule for struct types in different TU, that
> they are compatible when their internal structure is the same
> (including alignment) and if their *tag* name is the same.
> 
> > Also, what is the relationship between two identical struct or union
> > types without tags (i.e. the first member of pthread_mutex_t and the
> > first member of mtx_t, both of which are unions with no tag)?
> 
> For structs with no tags the situation is more subtle. If you are in
> the same TU and declare them in different places they are *not*
> compatible, basically they are two different struct. On the other hand
> two such struct in different TU are compatible, if they comply to the
> above rule of structural equivalence.

Do you have a conclusion from this as to whether what we're doing is
okay? FWIW the mutex and the code manipulating its internals are
always in different TUs.

> > > As nsz remarked in his reply, most older compilers don't do anything
> > > with it, they just ignore it, though the existence of the nonnull
> > > attribute shows that they would be easily capable of doing so.
> > > 
> > > The other inconvenience for `static 1` is C++. They haven't adopted
> > > it, so as such this would make the headers incompatible with C++. So
> > > also for this one we would need some preprocessor magic.
> > > 
> > > And then, also, it is ugly :(
> > 
> > Yes. Then let's just omit it for now.
> 
> ok
> 
> (you probably mean that also for the nonnull version, I suppose)

Yes.

> > > > > > > +weak_alias(__pthread_setspecific, pthread_setspecific);
> > > > > > 
> > > > > > It looks like you have a duplicate tss_set for this rather than using
> > > > > > the above, so either the above is a grauitous change, or the new
> > > > > > tss_set is duplicate code that should be removed. Or did I miss
> > > > > > something?
> > > > > 
> > > > > you are right, this should be omitted
> > > > > 
> > > > > (the tss_get code differs in the const qualification of the second argument)
> > > > 
> > > > In this case it's still possible to implement tss_set as a wrapper,
> > > 
> > > I am really allergic against casts, even more if they are somewhat
> > > hidden. So I'd do much to avoid that.
> > 
> > There's no cast here. void * converts implicitly to const void *.
> 
> There is a cast inside the pthread_setspecific function which I really
> don't like, we discussed that before, I think. I'd rather not use a
> function that does const conversion magic under the hood. These are
> really badly designed interfaces.

Then do you also refrain from using strstr, strchr, etc.? :)

I certainly don't see any harm in passing to a pointer which
originally has type (void *) through a function that's going to
convert it to (const void *) and back to (void *). I could see your
objection making sense if the _original_ type were const qualified,
but here it's not.

In any case I still don't care whether the code gets duplicated or not
since it's trivial. So do whichever you like.

> > > I know. I'll have a look and try to factor these things out, such that
> > > we really can weigh the alternatives.
> > 
> > Can we look at this as a potential post-merge task? I'm skeptical that
> > it improves anything; saving maybe 100-200 bytes in the static-linked
> > C11-only case is probably not worth spreading code out over multiple
> > functions or files and making the flow of pthread_create less obvious.
> > I'm willing to look at it if you want to try anyway, but I don't think
> > it should be holding up getting C11 threads support added.
> 
> wouldn't be holding up, I promisse. I'd have to factor this into
> digestable patches anyhow, so this should not be much more effort.

Without this change, it's a tiny patch to pthread_create.c (basically
just adding one tiny C11 start function and a few namespace fixes).
With it, there's a lot more to do, but my concern isn't whether you
have time to do a proposed refactoring of pthread_create; rather, it's
the amount of review that will need to go into evaluating whether it's
a worthwhile change.

Rich


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-28 21:56             ` Rich Felker
@ 2014-08-28 23:25               ` Jens Gustedt
  2014-08-28 23:38                 ` Rich Felker
  0 siblings, 1 reply; 26+ messages in thread
From: Jens Gustedt @ 2014-08-28 23:25 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 4138 bytes --]

Am Donnerstag, den 28.08.2014, 17:56 -0400 schrieb Rich Felker:
> On Thu, Aug 28, 2014 at 11:34:13PM +0200, Jens Gustedt wrote:
> > Am Donnerstag, den 28.08.2014, 16:00 -0400 schrieb Rich Felker:
> > > On Thu, Aug 28, 2014 at 09:28:09PM +0200, Jens Gustedt wrote:
> > > > at least it doesn't matter for the standard functions (they are `extern
> > > > "C"`) but only for user functions with C++ interfaces.
> > > 
> > > Right, but it matters for all C++ code containing C++ functions that
> > > use pthread_mutex_t* as an argument. And apparently there's a lot of
> > > such code.
> > > 
> > > > Well, ok, so if you could come up with some better idea in the future,
> > > > let me know.
> > > 
> > > I'm not even sure it's an issue. I've seen it argued that aliasing
> > > rules don't even apply here because, when you access something like
> > > m->_m_lock, that's not an "access" to the structure object/type but to
> > > the individual member. If that's true, then as long as the structs
> > > have identical layout, it should be valid to access the members via
> > > either.
> > 
> > Yes, there is a special rule for struct types in different TU, that
> > they are compatible when their internal structure is the same
> > (including alignment) and if their *tag* name is the same.
> > 
> > > Also, what is the relationship between two identical struct or union
> > > types without tags (i.e. the first member of pthread_mutex_t and the
> > > first member of mtx_t, both of which are unions with no tag)?
> > 
> > For structs with no tags the situation is more subtle. If you are in
> > the same TU and declare them in different places they are *not*
> > compatible, basically they are two different struct. On the other hand
> > two such struct in different TU are compatible, if they comply to the
> > above rule of structural equivalence.
> 
> Do you have a conclusion from this as to whether what we're doing is
> okay? FWIW the mutex and the code manipulating its internals are
> always in different TUs.

Yes, what we were doing before and after is ok for C, anyhow. All code
sees exactly the same definitions in the platform specific (generated)
alltypes.h header. All renaming is done through typedef, so there is
no problem at all, this is always the same type, visible through
different names.

And it even would be ok if one TU would only see it as
__pthread_mutex_t and the other TU as pthread_mutex_t, say. As long as
these are typedef to structures with no tags and exactly the same
layout.

The problem only occurs for C++, since they seem to have a concept of
"original name" of a type or so.

> > > > I know. I'll have a look and try to factor these things out, such that
> > > > we really can weigh the alternatives.
> > > 
> > > Can we look at this as a potential post-merge task? I'm skeptical that
> > > it improves anything; saving maybe 100-200 bytes in the static-linked
> > > C11-only case is probably not worth spreading code out over multiple
> > > functions or files and making the flow of pthread_create less obvious.
> > > I'm willing to look at it if you want to try anyway, but I don't think
> > > it should be holding up getting C11 threads support added.
> > 
> > wouldn't be holding up, I promisse. I'd have to factor this into
> > digestable patches anyhow, so this should not be much more effort.
> 
> Without this change, it's a tiny patch to pthread_create.c (basically
> just adding one tiny C11 start function and a few namespace fixes).
> With it, there's a lot more to do, but my concern isn't whether you
> have time to do a proposed refactoring of pthread_create; rather, it's
> the amount of review that will need to go into evaluating whether it's
> a worthwhile change.

ok, I'll keep that in mind

Jens

-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-28 23:25               ` Jens Gustedt
@ 2014-08-28 23:38                 ` Rich Felker
  2014-08-29  7:56                   ` Jens Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2014-08-28 23:38 UTC (permalink / raw)
  To: musl

On Fri, Aug 29, 2014 at 01:25:37AM +0200, Jens Gustedt wrote:
> Am Donnerstag, den 28.08.2014, 17:56 -0400 schrieb Rich Felker:
> > On Thu, Aug 28, 2014 at 11:34:13PM +0200, Jens Gustedt wrote:
> > > Am Donnerstag, den 28.08.2014, 16:00 -0400 schrieb Rich Felker:
> > > > On Thu, Aug 28, 2014 at 09:28:09PM +0200, Jens Gustedt wrote:
> > > > > at least it doesn't matter for the standard functions (they are `extern
> > > > > "C"`) but only for user functions with C++ interfaces.
> > > > 
> > > > Right, but it matters for all C++ code containing C++ functions that
> > > > use pthread_mutex_t* as an argument. And apparently there's a lot of
> > > > such code.
> > > > 
> > > > > Well, ok, so if you could come up with some better idea in the future,
> > > > > let me know.
> > > > 
> > > > I'm not even sure it's an issue. I've seen it argued that aliasing
> > > > rules don't even apply here because, when you access something like
> > > > m->_m_lock, that's not an "access" to the structure object/type but to
> > > > the individual member. If that's true, then as long as the structs
> > > > have identical layout, it should be valid to access the members via
> > > > either.
> > > 
> > > Yes, there is a special rule for struct types in different TU, that
> > > they are compatible when their internal structure is the same
> > > (including alignment) and if their *tag* name is the same.
> > > 
> > > > Also, what is the relationship between two identical struct or union
> > > > types without tags (i.e. the first member of pthread_mutex_t and the
> > > > first member of mtx_t, both of which are unions with no tag)?
> > > 
> > > For structs with no tags the situation is more subtle. If you are in
> > > the same TU and declare them in different places they are *not*
> > > compatible, basically they are two different struct. On the other hand
> > > two such struct in different TU are compatible, if they comply to the
> > > above rule of structural equivalence.
> > 
> > Do you have a conclusion from this as to whether what we're doing is
> > okay? FWIW the mutex and the code manipulating its internals are
> > always in different TUs.
> 
> Yes, what we were doing before and after is ok for C, anyhow. All code

Perhaps I should clarify: what I mean by "what we're doing" is
defining pthread_mutex_t and mtx_t as separate structs with identical
contents (a single union with no tag). Your latest version with
__pthread_mutex_t is not a possibility because it changes the C++ ABI.

In principle we could do something like with the definition of
pthread_t where it changes depending on whether the header is being
used in a C or C++ program, but that's quite ugly and not something
I'd much like to do...

Is your conclusion still that it's okay? I think so but I just want to
confirm.

> sees exactly the same definitions in the platform specific (generated)
> alltypes.h header. All renaming is done through typedef, so there is
> no problem at all, this is always the same type, visible through
> different names.
> 
> And it even would be ok if one TU would only see it as
> __pthread_mutex_t and the other TU as pthread_mutex_t, say. As long as
> these are typedef to structures with no tags and exactly the same
> layout.
> 
> The problem only occurs for C++, since they seem to have a concept of
> "original name" of a type or so.

Unless there is a real _practical_ incompatibility, I'm fine with
ignoring C++ technicalities where the underlying implementation is
already valid C.

Rich


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-28 23:38                 ` Rich Felker
@ 2014-08-29  7:56                   ` Jens Gustedt
  2014-08-29  8:02                     ` Jens Gustedt
  2014-08-29 15:56                     ` Rich Felker
  0 siblings, 2 replies; 26+ messages in thread
From: Jens Gustedt @ 2014-08-29  7:56 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 2519 bytes --]

Am Donnerstag, den 28.08.2014, 19:38 -0400 schrieb Rich Felker:
> Perhaps I should clarify: what I mean by "what we're doing" is
> defining pthread_mutex_t and mtx_t as separate structs with identical
> contents (a single union with no tag). Your latest version with
> __pthread_mutex_t is not a possibility because it changes the C++ ABI.
>
> In principle we could do something like with the definition of
> pthread_t where it changes depending on whether the header is being
> used in a C or C++ program, but that's quite ugly and not something
> I'd much like to do...
> 
> Is your conclusion still that it's okay? I think so but I just want to
> confirm.

Ok, now I see.

(for the discussion let's just take mutexes as example, similar would
apply for cv)

Yes, done carefully this would be an option. By carefully meaning that
we have to ensure that all TU see only either type, not both.

For pthread this would be easy, basically nothing changes, good.

For C threads that base themselves on pthread this would be a pain. We
have to call pthread functions with mutex parameters. We couldn't just
simply include the pthread.h header, since this would drag in the
pthread type definitinions.

We could get away with it, by some hackery

** Option 1:

C threads do something like the following in all C thread TU, or have
an intermediate header "threads_impl.h" that does this

typedef mtx_t pthread_mutex_t
#define __DEFINED_pthread_mutex_t

#include "pthread_impl.h"

** Option 2:

In "pthread_impl.h" have:

typedef pthread_mutex_t __pthread_mutex_t;

In "threads_impl.h" have:

typedef mtx_t __pthread_mutex_t;

and then let all __pthread_xxxxx functions that we provide to be used
by the C thread implementation use __pthread_mutex_t.

***

All of this would explode in our face the day a user wants to use
pthread_mutex_t and mtx_t in the same application. A use case could be
that he uses one library that protects CS with pthread_mutex_t and
another that uses mtx_t. Now suddenly we have code that sees two
different types, with possibly subtle bugs due to aliasing.

So in conclusion, it is doable, but I don't like it at all.

Jens

-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-29  7:56                   ` Jens Gustedt
@ 2014-08-29  8:02                     ` Jens Gustedt
  2014-08-29 15:57                       ` Rich Felker
  2014-08-29 15:56                     ` Rich Felker
  1 sibling, 1 reply; 26+ messages in thread
From: Jens Gustedt @ 2014-08-29  8:02 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 1139 bytes --]

Am Freitag, den 29.08.2014, 09:56 +0200 schrieb Jens Gustedt:
> All of this would explode in our face the day a user wants to use
> pthread_mutex_t and mtx_t in the same application. A use case could be
> that he uses one library that protects CS with pthread_mutex_t and
> another that uses mtx_t. Now suddenly we have code that sees two
> different types, with possibly subtle bugs due to aliasing.
> 
> So in conclusion, it is doable, but I don't like it at all.

To give it a positive turn, for the moment I'd prefer to roll this
back and have the two types pthread_mutex_t and pthread_cond_t violate
the namespace rules of libc for the moment. This is not perfect, but
also not a serious drawback.

This would have the advantage of being conservative on the pthread
side and not to delay the schedule.

Jens

-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::




[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-29  7:56                   ` Jens Gustedt
  2014-08-29  8:02                     ` Jens Gustedt
@ 2014-08-29 15:56                     ` Rich Felker
  2014-08-29 18:40                       ` Jens Gustedt
  1 sibling, 1 reply; 26+ messages in thread
From: Rich Felker @ 2014-08-29 15:56 UTC (permalink / raw)
  To: musl

On Fri, Aug 29, 2014 at 09:56:39AM +0200, Jens Gustedt wrote:
> Am Donnerstag, den 28.08.2014, 19:38 -0400 schrieb Rich Felker:
> > Perhaps I should clarify: what I mean by "what we're doing" is
> > defining pthread_mutex_t and mtx_t as separate structs with identical
> > contents (a single union with no tag). Your latest version with
> > __pthread_mutex_t is not a possibility because it changes the C++ ABI.
> >
> > In principle we could do something like with the definition of
> > pthread_t where it changes depending on whether the header is being
> > used in a C or C++ program, but that's quite ugly and not something
> > I'd much like to do...
> > 
> > Is your conclusion still that it's okay? I think so but I just want to
> > confirm.
> 
> Ok, now I see.
> 
> (for the discussion let's just take mutexes as example, similar would
> apply for cv)
> 
> Yes, done carefully this would be an option. By carefully meaning that
> we have to ensure that all TU see only either type, not both.
> 
> For pthread this would be easy, basically nothing changes, good.
> 
> For C threads that base themselves on pthread this would be a pain. We
> have to call pthread functions with mutex parameters. We couldn't just
> simply include the pthread.h header, since this would drag in the
> pthread type definitinions.
> 
> 
> We could get away with it, by some hackery
> 
> ** Option 1:
> 
> C threads do something like the following in all C thread TU, or have
> an intermediate header "threads_impl.h" that does this
> 
> typedef mtx_t pthread_mutex_t
> #define __DEFINED_pthread_mutex_t
> 
> #include "pthread_impl.h"
> 
> ** Option 2:
> 
> In "pthread_impl.h" have:
> 
> typedef pthread_mutex_t __pthread_mutex_t;
> 
> In "threads_impl.h" have:
> 
> typedef mtx_t __pthread_mutex_t;
> 
> and then let all __pthread_xxxxx functions that we provide to be used
> by the C thread implementation use __pthread_mutex_t.

Or maybe, Option 3: Just don't include pthread_impl.h or pthread.h at
all in the C11 wrapper files for these types. These are only a few
files and they don't need access to any pthread types because they're
just wrappers.

> All of this would explode in our face the day a user wants to use
> pthread_mutex_t and mtx_t in the same application. A use case could be
> that he uses one library that protects CS with pthread_mutex_t and
> another that uses mtx_t. Now suddenly we have code that sees two
> different types, with possibly subtle bugs due to aliasing.

How so? The TUs that see both types cannot touch the contents of them
at all, since they're opaque types.

Also I'm still not convinced that any aliasing of the struct objects
happens (there is no access of whole structs, assignment of structs,
etc. in the relevant code), only of the int and void* members, which
have the correct effective types. Obviously there's a dependency on
the layout of the structs matching (no differences in padding, etc.)
but we depend on layout being predictable anyway since this is part of
the target ABI and cannot change.

Rich


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-29  8:02                     ` Jens Gustedt
@ 2014-08-29 15:57                       ` Rich Felker
  2014-08-29 19:01                         ` Jens Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2014-08-29 15:57 UTC (permalink / raw)
  To: musl

On Fri, Aug 29, 2014 at 10:02:43AM +0200, Jens Gustedt wrote:
> Am Freitag, den 29.08.2014, 09:56 +0200 schrieb Jens Gustedt:
> > All of this would explode in our face the day a user wants to use
> > pthread_mutex_t and mtx_t in the same application. A use case could be
> > that he uses one library that protects CS with pthread_mutex_t and
> > another that uses mtx_t. Now suddenly we have code that sees two
> > different types, with possibly subtle bugs due to aliasing.
> > 
> > So in conclusion, it is doable, but I don't like it at all.
> 
> To give it a positive turn, for the moment I'd prefer to roll this
> back and have the two types pthread_mutex_t and pthread_cond_t violate
> the namespace rules of libc for the moment. This is not perfect, but
> also not a serious drawback.
> 
> This would have the advantage of being conservative on the pthread
> side and not to delay the schedule.

I don't think this is an acceptable way to proceed. It creates a
C++ ABI that we're planning to remove by changing the struct tags for
these types later (fixing the namespace issue will necessarily break
the C++ ABI).

Rich


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-29 15:56                     ` Rich Felker
@ 2014-08-29 18:40                       ` Jens Gustedt
  0 siblings, 0 replies; 26+ messages in thread
From: Jens Gustedt @ 2014-08-29 18:40 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 5000 bytes --]

Am Freitag, den 29.08.2014, 11:56 -0400 schrieb Rich Felker:
> On Fri, Aug 29, 2014 at 09:56:39AM +0200, Jens Gustedt wrote:
> > Am Donnerstag, den 28.08.2014, 19:38 -0400 schrieb Rich Felker:
> > > Perhaps I should clarify: what I mean by "what we're doing" is
> > > defining pthread_mutex_t and mtx_t as separate structs with identical
> > > contents (a single union with no tag). Your latest version with
> > > __pthread_mutex_t is not a possibility because it changes the C++ ABI.
> > >
> > > In principle we could do something like with the definition of
> > > pthread_t where it changes depending on whether the header is being
> > > used in a C or C++ program, but that's quite ugly and not something
> > > I'd much like to do...
> > > 
> > > Is your conclusion still that it's okay? I think so but I just want to
> > > confirm.
> > 
> > Ok, now I see.
> > 
> > (for the discussion let's just take mutexes as example, similar would
> > apply for cv)
> > 
> > Yes, done carefully this would be an option. By carefully meaning that
> > we have to ensure that all TU see only either type, not both.
> > 
> > For pthread this would be easy, basically nothing changes, good.
> > 
> > For C threads that base themselves on pthread this would be a pain. We
> > have to call pthread functions with mutex parameters. We couldn't just
> > simply include the pthread.h header, since this would drag in the
> > pthread type definitinions.
> > 
> > 
> > We could get away with it, by some hackery
> > 
> > ** Option 1:
> > 
> > C threads do something like the following in all C thread TU, or have
> > an intermediate header "threads_impl.h" that does this
> > 
> > typedef mtx_t pthread_mutex_t
> > #define __DEFINED_pthread_mutex_t
> > 
> > #include "pthread_impl.h"
> > 
> > ** Option 2:
> > 
> > In "pthread_impl.h" have:
> > 
> > typedef pthread_mutex_t __pthread_mutex_t;
> > 
> > In "threads_impl.h" have:
> > 
> > typedef mtx_t __pthread_mutex_t;
> > 
> > and then let all __pthread_xxxxx functions that we provide to be used
> > by the C thread implementation use __pthread_mutex_t.
> 
> Or maybe, Option 3: Just don't include pthread_impl.h or pthread.h at
> all in the C11 wrapper files for these types. These are only a few
> files and they don't need access to any pthread types because they're
> just wrappers.

They are not just wrappers, there are some real functions, at least
for the time being, and this would close down a migration path to
separate implementations.

We could look into it, how the use of pthread_impl.h could be reduced,
but I don't think it can completely be avoided.

> > All of this would explode in our face the day a user wants to use
> > pthread_mutex_t and mtx_t in the same application. A use case could be
> > that he uses one library that protects CS with pthread_mutex_t and
> > another that uses mtx_t. Now suddenly we have code that sees two
> > different types, with possibly subtle bugs due to aliasing.
> 
> How so? The TUs that see both types cannot touch the contents of them
> at all, since they're opaque types.

I have the impression that I still don't get where you are heading
for.

These are *not* opaque types. In both models we have to expose a type
to the user code such that user code may declare a mutex variable, for
example. User code that uses both interfaces and has a pointer to
pthread_mutex_t and another pointer to mtx_t will either "know" that
both are the same type, or that the two types differ. There is no in
between. And according to that one may alias the other (or not).

> Also I'm still not convinced that any aliasing of the struct objects
> happens (there is no access of whole structs, assignment of structs,
> etc. in the relevant code), only of the int and void* members, which
> have the correct effective types.

I am not sure that such an aliasing model is reasonable. All struct
decompose to basic types at the end. What you are saying sounds to me
as if you are claiming that for

struct A0 { double d; int a; } va;
struct B0 { int b; double e; } vb;

the fact that va.a and vb.b are both int leads to some possible
conclusion whether or not they could be aliasing.

My reading of the aliasing rule says that they will never alias since
they are parts of objects of different type. For that argument nothing
changes if we have

struct A1 { int a; } *pa;
struct B1 { int b; } *pb;

for pa->a and pb->b. pa and pb point to differently typed objects so
they can't alias, and so pa->a and pb->b can't alias.

So whether or not pthread_mutex_t and mtx_t are the same type or not
matters.

Jens

-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-29 15:57                       ` Rich Felker
@ 2014-08-29 19:01                         ` Jens Gustedt
  2014-08-30  5:30                           ` Rich Felker
  0 siblings, 1 reply; 26+ messages in thread
From: Jens Gustedt @ 2014-08-29 19:01 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 2823 bytes --]

Am Freitag, den 29.08.2014, 11:57 -0400 schrieb Rich Felker:
> On Fri, Aug 29, 2014 at 10:02:43AM +0200, Jens Gustedt wrote:
> > Am Freitag, den 29.08.2014, 09:56 +0200 schrieb Jens Gustedt:
> > > All of this would explode in our face the day a user wants to use
> > > pthread_mutex_t and mtx_t in the same application. A use case could be
> > > that he uses one library that protects CS with pthread_mutex_t and
> > > another that uses mtx_t. Now suddenly we have code that sees two
> > > different types, with possibly subtle bugs due to aliasing.
> > > 
> > > So in conclusion, it is doable, but I don't like it at all.
> > 
> > To give it a positive turn, for the moment I'd prefer to roll this
> > back and have the two types pthread_mutex_t and pthread_cond_t violate
> > the namespace rules of libc for the moment. This is not perfect, but
> > also not a serious drawback.
> > 
> > This would have the advantage of being conservative on the pthread
> > side and not to delay the schedule.
> 
> I don't think this is an acceptable way to proceed. It creates a
> C++ ABI that we're planning to remove by changing the struct tags for
> these types later (fixing the namespace issue will necessarily break
> the C++ ABI).

I don't know what you are planning, could you please explain?

There is basically one base choice to make:

 - we decide if pthread_mutex_t and mtx_t are seen as two different
   types or not for any application that includes both headers

(This should be made independent of the question if we silently use
the same hidden type, or similar structured type, under the hood.)

For C this choice is not so relevant, since all interfaces are just
pointers to struct, so they are interchangeble, and this helps for the
implementation.

For C++ this is not the same because "type" for them means *typename*,
defined in addition that is determined in some subtle and not so
obvious way.

For backward compatibility, the C++ ABI seems to dictate that there
must be at least one such type that is called pthread_mutex_t. So we
have to keep the type with that typename for them, it is as simple as
that.

Now in a C++ context that choice above boils down to the question

  - is mtx_t a typedef to pthread_mutex_t or is it a proper type?

If we want it to be a proper type (for which I would argue, I think)
we have to think of ways to make C++ believe that the two types are
different, even if we use the same implementation underneath.

Jens

-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-29 19:01                         ` Jens Gustedt
@ 2014-08-30  5:30                           ` Rich Felker
  2014-08-30  7:43                             ` Jens Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2014-08-30  5:30 UTC (permalink / raw)
  To: musl

On Fri, Aug 29, 2014 at 09:01:11PM +0200, Jens Gustedt wrote:
> Am Freitag, den 29.08.2014, 11:57 -0400 schrieb Rich Felker:
> > On Fri, Aug 29, 2014 at 10:02:43AM +0200, Jens Gustedt wrote:
> > > Am Freitag, den 29.08.2014, 09:56 +0200 schrieb Jens Gustedt:
> > > > All of this would explode in our face the day a user wants to use
> > > > pthread_mutex_t and mtx_t in the same application. A use case could be
> > > > that he uses one library that protects CS with pthread_mutex_t and
> > > > another that uses mtx_t. Now suddenly we have code that sees two
> > > > different types, with possibly subtle bugs due to aliasing.
> > > > 
> > > > So in conclusion, it is doable, but I don't like it at all.
> > > 
> > > To give it a positive turn, for the moment I'd prefer to roll this
> > > back and have the two types pthread_mutex_t and pthread_cond_t violate
> > > the namespace rules of libc for the moment. This is not perfect, but
> > > also not a serious drawback.
> > > 
> > > This would have the advantage of being conservative on the pthread
> > > side and not to delay the schedule.
> > 
> > I don't think this is an acceptable way to proceed. It creates a
> > C++ ABI that we're planning to remove by changing the struct tags for
> > these types later (fixing the namespace issue will necessarily break
> > the C++ ABI).
> 
> I don't know what you are planning, could you please explain?

Unless the intent is to permanently have namespace violations, mtx_t
must be defined at some point such that it does not have
pthread_mutex_t as its C++ ABI "struct tag". It could have mtx_t
(because the specific name is reserved), or something like __mtx_t
(with a name in a general reserved namespace). This requires being a
different type from pthread_mutex_t.

> There is basically one base choice to make:
> 
>  - we decide if pthread_mutex_t and mtx_t are seen as two different
>    types or not for any application that includes both headers

This is not a choice; it's mandated by the fact that our
pthread_mutex_t has a "struct tag" (in C++) that's a namespace
violation for use as the tag for mtx_t.

However, by the C rules, they're only "different types" when they're
both visible in the same translation unit. To a translation unit where
only one is visible, since the typedef name is not actually part of
the type, just an alias, both are structures without tags, and the one
that is visible is _the same type_ as whichever one it needs to be to
make the code correct.

I don't see any problem if an application has both types visible in
one of it's TUs, since no "aliasing" takes place on the app side. The
tagless structure "struct { union {...} __u; }" (whichever instance of
it) is simply zero-initialized on the application TU side. On the
implementation side, functions like pthread_mutex_trylock access a
tagless structure "struct { union {...} __u; }", of which they have
only one defined: the one referenced by the pthread_mutex_t typedef.

> (This should be made independent of the question if we silently use
> the same hidden type, or similar structured type, under the hood.)
> 
> For C this choice is not so relevant, since all interfaces are just
> pointers to struct, so they are interchangeble, and this helps for the
> implementation.
> 
> For C++ this is not the same because "type" for them means *typename*,
> defined in addition that is determined in some subtle and not so
> obvious way.
> 
> For backward compatibility, the C++ ABI seems to dictate that there
> must be at least one such type that is called pthread_mutex_t. So we
> have to keep the type with that typename for them, it is as simple as
> that.
> 
> Now in a C++ context that choice above boils down to the question
> 
>   - is mtx_t a typedef to pthread_mutex_t or is it a proper type?
> 
> If we want it to be a proper type (for which I would argue, I think)
> we have to think of ways to make C++ believe that the two types are
> different, even if we use the same implementation underneath.

Yes, because of the namespace, C++ has to believe the types are
different. But the (C) implementation of the functions is not subject
to C++ rules about types; it's not C++ code. Thus I think everything
is fine.

If you really still think there's a problem, I still have one trick
I've mentioned before that makes it a 100% non-problem: never using
the pthread_mutex_t or mtx_t type at all internally, but instead using
the type of their first member. I believe I could make this work with
only a few lines of source-level changes, no change to the output
code, and minimal ugliness. Let me know if you still have doubts
whether the above analysis I gave is correct, and if so, I'll give my
trick a try.

Rich

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-30  5:30                           ` Rich Felker
@ 2014-08-30  7:43                             ` Jens Gustedt
  2014-08-30  8:54                               ` Jens Gustedt
  2014-08-31  0:30                               ` Rich Felker
  0 siblings, 2 replies; 26+ messages in thread
From: Jens Gustedt @ 2014-08-30  7:43 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 4668 bytes --]

Am Samstag, den 30.08.2014, 01:30 -0400 schrieb Rich Felker:
> On Fri, Aug 29, 2014 at 09:01:11PM +0200, Jens Gustedt wrote:
> Unless the intent is to permanently have namespace violations, mtx_t
> must be defined at some point such that it does not have
> pthread_mutex_t as its C++ ABI "struct tag". It could have mtx_t
> (because the specific name is reserved), or something like __mtx_t
> (with a name in a general reserved namespace). This requires being a
> different type from pthread_mutex_t.

Are we master of that decision, or do we have to coordinate that with
other C libraries?

> > There is basically one base choice to make:
> > 
> >  - we decide if pthread_mutex_t and mtx_t are seen as two different
> >    types or not for any application that includes both headers
> 
> This is not a choice; it's mandated by the fact that our
> pthread_mutex_t has a "struct tag" (in C++) that's a namespace
> violation for use as the tag for mtx_t.

We could probably also find a trick that has us clean on the C side,
and have namespace violation just in a C++ context :)

> However, by the C rules, they're only "different types" when they're
> both visible in the same translation unit. To a translation unit where
> only one is visible, since the typedef name is not actually part of
> the type, just an alias, both are structures without tags, and the one
> that is visible is _the same type_ as whichever one it needs to be to
> make the code correct.
> 
> I don't see any problem if an application has both types visible in
> one of it's TUs, since no "aliasing" takes place on the app side. The
> tagless structure "struct { union {...} __u; }" (whichever instance of
> it) is simply zero-initialized on the application TU side. On the
> implementation side, functions like pthread_mutex_trylock access a
> tagless structure "struct { union {...} __u; }", of which they have
> only one defined: the one referenced by the pthread_mutex_t typedef.

As I said, on the side of the current C thread implementation that
needs a thorough revision to be sure that none of the TU sees two
types. I'll look into that.

> > (This should be made independent of the question if we silently use
> > the same hidden type, or similar structured type, under the hood.)
> > 
> > For C this choice is not so relevant, since all interfaces are just
> > pointers to struct, so they are interchangeble, and this helps for the
> > implementation.
> > 
> > For C++ this is not the same because "type" for them means *typename*,
> > defined in addition that is determined in some subtle and not so
> > obvious way.
> > 
> > For backward compatibility, the C++ ABI seems to dictate that there
> > must be at least one such type that is called pthread_mutex_t. So we
> > have to keep the type with that typename for them, it is as simple as
> > that.
> > 
> > Now in a C++ context that choice above boils down to the question
> > 
> >   - is mtx_t a typedef to pthread_mutex_t or is it a proper type?
> > 
> > If we want it to be a proper type (for which I would argue, I think)
> > we have to think of ways to make C++ believe that the two types are
> > different, even if we use the same implementation underneath.
> 
> Yes, because of the namespace, C++ has to believe the types are
> different. But the (C) implementation of the functions is not subject
> to C++ rules about types; it's not C++ code. Thus I think everything
> is fine.
> 
> If you really still think there's a problem, I still have one trick
> I've mentioned before that makes it a 100% non-problem: never using
> the pthread_mutex_t or mtx_t type at all internally, but instead using
> the type of their first member. I believe I could make this work with
> only a few lines of source-level changes, no change to the output
> code, and minimal ugliness. Let me know if you still have doubts
> whether the above analysis I gave is correct, and if so, I'll give my
> trick a try.

So let us talk through this, I suspose the main change that you would
do for that is to change the accessor macros such that they have the
additional indirection. I can see that this would easily work for the
pthread TU.

For the C thread TU, what would be the mechanics for them to call one
of the (aliased) pthread functions?

Jens


-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-30  7:43                             ` Jens Gustedt
@ 2014-08-30  8:54                               ` Jens Gustedt
  2014-08-31  0:30                               ` Rich Felker
  1 sibling, 0 replies; 26+ messages in thread
From: Jens Gustedt @ 2014-08-30  8:54 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 1337 bytes --]

Am Samstag, den 30.08.2014, 09:43 +0200 schrieb Jens Gustedt:
> Am Samstag, den 30.08.2014, 01:30 -0400 schrieb Rich Felker:
> > I don't see any problem if an application has both types visible in
> > one of it's TUs, since no "aliasing" takes place on the app side. The
> > tagless structure "struct { union {...} __u; }" (whichever instance of
> > it) is simply zero-initialized on the application TU side. On the
> > implementation side, functions like pthread_mutex_trylock access a
> > tagless structure "struct { union {...} __u; }", of which they have
> > only one defined: the one referenced by the pthread_mutex_t typedef.
> 
> As I said, on the side of the current C thread implementation that
> needs a thorough revision to be sure that none of the TU sees two
> types. I'll look into that.

There are several C thread functions that use internals from
pthread_impl.h.

 - thrd_xxx and tss but these don't use mutex or cv

 - mtx but these only use minimal stuff

so this should be doable.

Jens

-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-30  7:43                             ` Jens Gustedt
  2014-08-30  8:54                               ` Jens Gustedt
@ 2014-08-31  0:30                               ` Rich Felker
  2014-08-31  1:31                                 ` Rich Felker
  1 sibling, 1 reply; 26+ messages in thread
From: Rich Felker @ 2014-08-31  0:30 UTC (permalink / raw)
  To: musl

On Sat, Aug 30, 2014 at 09:43:36AM +0200, Jens Gustedt wrote:
> Am Samstag, den 30.08.2014, 01:30 -0400 schrieb Rich Felker:
> > On Fri, Aug 29, 2014 at 09:01:11PM +0200, Jens Gustedt wrote:
> > Unless the intent is to permanently have namespace violations, mtx_t
> > must be defined at some point such that it does not have
> > pthread_mutex_t as its C++ ABI "struct tag". It could have mtx_t
> > (because the specific name is reserved), or something like __mtx_t
> > (with a name in a general reserved namespace). This requires being a
> > different type from pthread_mutex_t.
> 
> Are we master of that decision, or do we have to coordinate that with
> other C libraries?

In principle we need to coordinate. However, I'm going to strongly
recommend using a struct with no tag (in which case the C++ pseudo-tag
becomes mtx_t) since any other solution has the C aliasing problems
we've discussed (a struct with a tag cannot be the same type as a
struct with no tag, can it?).

> > > There is basically one base choice to make:
> > > 
> > >  - we decide if pthread_mutex_t and mtx_t are seen as two different
> > >    types or not for any application that includes both headers
> > 
> > This is not a choice; it's mandated by the fact that our
> > pthread_mutex_t has a "struct tag" (in C++) that's a namespace
> > violation for use as the tag for mtx_t.
> 
> We could probably also find a trick that has us clean on the C side,
> and have namespace violation just in a C++ context :)

Yes, actually I thought of this option too, but I'd rather not get us
stuck with something ugly like that. :)

> > However, by the C rules, they're only "different types" when they're
> > both visible in the same translation unit. To a translation unit where
> > only one is visible, since the typedef name is not actually part of
> > the type, just an alias, both are structures without tags, and the one
> > that is visible is _the same type_ as whichever one it needs to be to
> > make the code correct.
> > 
> > I don't see any problem if an application has both types visible in
> > one of it's TUs, since no "aliasing" takes place on the app side. The
> > tagless structure "struct { union {...} __u; }" (whichever instance of
> > it) is simply zero-initialized on the application TU side. On the
> > implementation side, functions like pthread_mutex_trylock access a
> > tagless structure "struct { union {...} __u; }", of which they have
> > only one defined: the one referenced by the pthread_mutex_t typedef.
> 
> As I said, on the side of the current C thread implementation that
> needs a thorough revision to be sure that none of the TU sees two
> types. I'll look into that.
> 
> > > (This should be made independent of the question if we silently use
> > > the same hidden type, or similar structured type, under the hood.)
> > > 
> > > For C this choice is not so relevant, since all interfaces are just
> > > pointers to struct, so they are interchangeble, and this helps for the
> > > implementation.
> > > 
> > > For C++ this is not the same because "type" for them means *typename*,
> > > defined in addition that is determined in some subtle and not so
> > > obvious way.
> > > 
> > > For backward compatibility, the C++ ABI seems to dictate that there
> > > must be at least one such type that is called pthread_mutex_t. So we
> > > have to keep the type with that typename for them, it is as simple as
> > > that.
> > > 
> > > Now in a C++ context that choice above boils down to the question
> > > 
> > >   - is mtx_t a typedef to pthread_mutex_t or is it a proper type?
> > > 
> > > If we want it to be a proper type (for which I would argue, I think)
> > > we have to think of ways to make C++ believe that the two types are
> > > different, even if we use the same implementation underneath.
> > 
> > Yes, because of the namespace, C++ has to believe the types are
> > different. But the (C) implementation of the functions is not subject
> > to C++ rules about types; it's not C++ code. Thus I think everything
> > is fine.
> > 
> > If you really still think there's a problem, I still have one trick
> > I've mentioned before that makes it a 100% non-problem: never using
> > the pthread_mutex_t or mtx_t type at all internally, but instead using
> > the type of their first member. I believe I could make this work with
> > only a few lines of source-level changes, no change to the output
> > code, and minimal ugliness. Let me know if you still have doubts
> > whether the above analysis I gave is correct, and if so, I'll give my
> > trick a try.
> 
> So let us talk through this, I suspose the main change that you would
> do for that is to change the accessor macros such that they have the
> additional indirection. I can see that this would easily work for the
> pthread TU.

No, if we did it that way, it would still be a potential aliasing
violation. For example, suppose m has type pthread_mutex_t* but
actually points to an mtx_t. Then m->[anything] is accessing *m with
the wrong effective type.

Instead, the argument name would be changed from "m" to "m0"
everywhere, and the first line of the function would be:

struct __mutex *const m = (struct __mutex *)m0;

This is legal because the first member of both pthread_mutex_t and
mtx_t would be a struct of type struct __mutex, and it's always valid
to convert back and forth between a pointer to a structure and a
pointer to its first member. So the argument m0 would essentially be
being used like a void* to convey to the mutex functions a pointer to
the struct __mutex rather than the containing object.

Then the rest of the function body would be essentially identical to
what it is now, except we would either use m0 again, or cast back,
when calling other mutex functions.

> For the C thread TU, what would be the mechanics for them to call one
> of the (aliased) pthread functions?

With my alternate solution just described, simply including the normal
pthread header and casting the pointer when making the call would be
fully legal.

With the approach we previously discussed, where we have to ensure
that no TU that accesses the contents of a mutex or cv structure can
see both the C11 and POSIX versions, The C11 TUs would have to contain
prototypes for the aliased POSIX functions like:

int __pthread_mutex_lock(mtx_t *);

Note that this is a perfectly correct prototype because mtx_t is just
this TU's typedef name for the tagless "struct { union { ... } __u; }"
that it's using, which is "the same type" as pthread_mutex_lock.c's
pthread_mutex_t.

Rich


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-31  0:30                               ` Rich Felker
@ 2014-08-31  1:31                                 ` Rich Felker
  2014-08-31  2:44                                   ` Rich Felker
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2014-08-31  1:31 UTC (permalink / raw)
  To: musl

On Sat, Aug 30, 2014 at 08:30:34PM -0400, Rich Felker wrote:
> > For the C thread TU, what would be the mechanics for them to call one
> > of the (aliased) pthread functions?
> 
> With my alternate solution just described, simply including the normal
> pthread header and casting the pointer when making the call would be
> fully legal.
> 
> With the approach we previously discussed, where we have to ensure
> that no TU that accesses the contents of a mutex or cv structure can
> see both the C11 and POSIX versions, The C11 TUs would have to contain
> prototypes for the aliased POSIX functions like:
> 
> int __pthread_mutex_lock(mtx_t *);
> 
> Note that this is a perfectly correct prototype because mtx_t is just
> this TU's typedef name for the tagless "struct { union { ... } __u; }"
> that it's using, which is "the same type" as pthread_mutex_lock.c's
> pthread_mutex_t.

Actually, unless the C11 functions actually access the mutex object,
their implementation files don't need to avoid having both types
visible. Only the TUs that dereference the object (i.e. the pthread
ones) need to ensure that only one version of the type is visible.

Rich


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-31  1:31                                 ` Rich Felker
@ 2014-08-31  2:44                                   ` Rich Felker
  2014-08-31  7:09                                     ` Jens Gustedt
  0 siblings, 1 reply; 26+ messages in thread
From: Rich Felker @ 2014-08-31  2:44 UTC (permalink / raw)
  To: musl

On Sat, Aug 30, 2014 at 09:31:11PM -0400, Rich Felker wrote:
> On Sat, Aug 30, 2014 at 08:30:34PM -0400, Rich Felker wrote:
> > > For the C thread TU, what would be the mechanics for them to call one
> > > of the (aliased) pthread functions?
> > 
> > With my alternate solution just described, simply including the normal
> > pthread header and casting the pointer when making the call would be
> > fully legal.
> > 
> > With the approach we previously discussed, where we have to ensure
> > that no TU that accesses the contents of a mutex or cv structure can
> > see both the C11 and POSIX versions, The C11 TUs would have to contain
> > prototypes for the aliased POSIX functions like:
> > 
> > int __pthread_mutex_lock(mtx_t *);
> > 
> > Note that this is a perfectly correct prototype because mtx_t is just
> > this TU's typedef name for the tagless "struct { union { ... } __u; }"
> > that it's using, which is "the same type" as pthread_mutex_lock.c's
> > pthread_mutex_t.
> 
> Actually, unless the C11 functions actually access the mutex object,
> their implementation files don't need to avoid having both types
> visible. Only the TUs that dereference the object (i.e. the pthread
> ones) need to ensure that only one version of the type is visible.

The more I think about it, the more I think the visibility of the
other type is utterly irrelevant.

6.5p7: "An object shall have its stored value accessed only by an
lvalue expression that has one of the following types:

a type compatible with the effective type of the object,"

6.2.7p1: "Moreover, two structure, union, or enumerated types declared
in separate translation units are compatible if their tags and members
satisfy the following requirements: If one is declared with a tag, the
other shall be declared with the same tag. If both are completed
anywhere within their respective translation units, then the following
additional requirements apply: there shall be a one-to-one
correspondence between their members such that each pair of
corresponding members are declared with compatible types; if one
member of the pair is declared with an alignment specifier, the other
is declared with an equivalent alignment specifier; and if one member
of the pair is declared with a name, the other is declared with the
same name. For two structures, corresponding members shall be declared
in the same order. For two structures or unions, corresponding
bit-fields shall have the same widths."

As I read this, structure types declared in separate translation
units, where neither has a tag and both have identical bodies, are
compatible types, and thus allowed to alias. This means:

1. The pthread_mutex_t in pthread_mutex_*.c can alias the
pthread_mutex_t in application TUs.

2. The pthread_mutex_t in pthread_mutex_*.c can alias the
mtx_t in application TUs.

3. The mtx_t in mtx_*.c can alias the pthread_mutex_t in application
TUs.

4. The mtx_t in mtx_*.c can alias the mtx_t in application TUs.

The only aliasing that's not permitted is for the pthread_mutex_t and
mtx_t in the _same_ _TU_ to alias each other.

Is there any error in my above interpretation?

Rich

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: C threads, v. 6.2
  2014-08-31  2:44                                   ` Rich Felker
@ 2014-08-31  7:09                                     ` Jens Gustedt
  0 siblings, 0 replies; 26+ messages in thread
From: Jens Gustedt @ 2014-08-31  7:09 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 2039 bytes --]

Am Samstag, den 30.08.2014, 22:44 -0400 schrieb Rich Felker:
> On Sat, Aug 30, 2014 at 09:31:11PM -0400, Rich Felker wrote:
> > On Sat, Aug 30, 2014 at 08:30:34PM -0400, Rich Felker wrote:
> > > > For the C thread TU, what would be the mechanics for them to call one
> > > > of the (aliased) pthread functions?
> > > 
> > > With my alternate solution just described, simply including the normal
> > > pthread header and casting the pointer when making the call would be
> > > fully legal.
> > > 
> > > With the approach we previously discussed, where we have to ensure
> > > that no TU that accesses the contents of a mutex or cv structure can
> > > see both the C11 and POSIX versions, The C11 TUs would have to contain
> > > prototypes for the aliased POSIX functions like:
> > > 
> > > int __pthread_mutex_lock(mtx_t *);
> > > 
> > > Note that this is a perfectly correct prototype because mtx_t is just
> > > this TU's typedef name for the tagless "struct { union { ... } __u; }"
> > > that it's using, which is "the same type" as pthread_mutex_lock.c's
> > > pthread_mutex_t.
> > 
> > Actually, unless the C11 functions actually access the mutex object,
> > their implementation files don't need to avoid having both types
> > visible. Only the TUs that dereference the object (i.e. the pthread
> > ones) need to ensure that only one version of the type is visible.
> 
> The more I think about it, the more I think the visibility of the
> other type is utterly irrelevant.

Yes, inside the C thread implementation it is. So as you have seen I
already came to the same conclusion, and so I applied that idea to the
latest series of patches.

I think this discussion is settled, then.

Jens


-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2014-08-31  7:09 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-27 22:11 C threads, v. 6.2 Jens Gustedt
2014-08-27 23:46 ` Rich Felker
2014-08-28  9:40   ` Jens Gustedt
2014-08-28 11:41     ` Szabolcs Nagy
2014-08-28 16:15     ` Rich Felker
2014-08-28 19:28       ` Jens Gustedt
2014-08-28 20:00         ` Rich Felker
2014-08-28 20:55           ` Szabolcs Nagy
2014-08-28 21:38             ` Jens Gustedt
2014-08-28 21:34           ` Jens Gustedt
2014-08-28 21:56             ` Rich Felker
2014-08-28 23:25               ` Jens Gustedt
2014-08-28 23:38                 ` Rich Felker
2014-08-29  7:56                   ` Jens Gustedt
2014-08-29  8:02                     ` Jens Gustedt
2014-08-29 15:57                       ` Rich Felker
2014-08-29 19:01                         ` Jens Gustedt
2014-08-30  5:30                           ` Rich Felker
2014-08-30  7:43                             ` Jens Gustedt
2014-08-30  8:54                               ` Jens Gustedt
2014-08-31  0:30                               ` Rich Felker
2014-08-31  1:31                                 ` Rich Felker
2014-08-31  2:44                                   ` Rich Felker
2014-08-31  7:09                                     ` Jens Gustedt
2014-08-29 15:56                     ` Rich Felker
2014-08-29 18:40                       ` Jens Gustedt

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).