mailing list of musl libc
 help / color / mirror / code / Atom feed
* bug in pthread_cond_broadcast
@ 2014-08-11 23:58 Jens Gustedt
  2014-08-12 16:50 ` Szabolcs Nagy
  0 siblings, 1 reply; 11+ messages in thread
From: Jens Gustedt @ 2014-08-11 23:58 UTC (permalink / raw)
  To: musl


[-- Attachment #1.1: Type: text/plain, Size: 1488 bytes --]

Hi,

I was thinking about our recent discussion on the availability of a
pthread_cont_t variable right after the broadcast (under certain
circumstances). As byproduct I came up with a test as you may find
attached to this post.

The idea is that there is one main thread and 10 client threads that
send signals and broadcasts back and forth through 10 phases. There is
a sequence of events (mutex locks and unlocks) that is observable from
all threads and thus all threads can deduce when a pthread_cond_t
(called cond_main) has unblocked all waiters of phase i. The client
threads then start a next phase use it with a different mutex than
before.

According to our recent discussion this should all be legal in POSIX.
Now for me, this test crashes/deadlocks with musl pthread.
It runs fine with glibc pthread, and with my latest version of C
threads based on musl (also attached).

I think, the problem for musl pthread is that while some clients start
to mess around with the new cond/mutex combination others trapped
inside a previous wait on the condition. The bookkeeping for the
pthread_cond variable is then mixed up and the program fails.

Jens

-- 
:: INRIA Nancy Grand Est ::: AlGorille ::: ICube/ICPS :::
:: ::::::::::::::: office Strasbourg : +33 368854536   ::
:: :::::::::::::::::::::: gsm France : +33 651400183   ::
:: ::::::::::::::: gsm international : +49 15737185122 ::
:: http://icube-icps.unistra.fr/index.php/Jens_Gustedt ::




[-- Attachment #1.2: pthread_cond_smasher.c --]
[-- Type: text/x-csrc, Size: 5387 bytes --]

#include <stdio.h>
#include <string.h>
#include <time.h>

#if (__STDC_VERSION__ > 201100L) && !defined(__STDC_NO_THREADS__)
# include <threads.h>

# define VERSION "C threads"

typedef mtx_t mutex;
typedef cnd_t condition;
typedef thrd_t thread;
typedef int thread_ret;

# define mutex_init(M) mtx_init((M), mtx_plain)
# define mutex_destroy mtx_destroy
# define mutex_lock mtx_lock
# define mutex_unlock mtx_unlock

# define condition_init(C) cnd_init((C))
# define condition_destroy cnd_destroy
# define condition_wait cnd_wait
# define condition_timedwait cnd_timedwait
# define condition_signal cnd_signal
# define condition_broadcast cnd_broadcast


# define thread_create(ID, START, ARG) thrd_create(ID, START, ARG)
# define thread_join thrd_join

# define gettime(TS) timespec_get((TS), TIME_UTC)

char const* errorstring(int err) {
  switch (err) {
  case thrd_success: return "success";
  case thrd_error: return "generic error";
  case thrd_nomem: return "out of memory";
  case thrd_timedout: return "timedout";
  default: return "unknown error return";
  }
}

#else
# include <pthread.h>

# define VERSION "POSIX threads"

typedef pthread_mutex_t mutex;
typedef pthread_cond_t condition;
typedef pthread_t thread;
typedef void* thread_ret;

# define mutex_init(M) pthread_mutex_init((M), 0)
# define mutex_destroy pthread_mutex_destroy
# define mutex_lock pthread_mutex_lock
# define mutex_unlock pthread_mutex_unlock

# define condition_init(C) pthread_cond_init((C), 0)
# define condition_destroy pthread_cond_destroy
# define condition_wait pthread_cond_wait
# define condition_timedwait pthread_cond_timedwait
# define condition_signal pthread_cond_signal
# define condition_broadcast pthread_cond_broadcast


# define thread_create(ID, START, ARG) pthread_create(ID, 0, START, ARG)
# define thread_join pthread_join

# define gettime(TS) clock_gettime(CLOCK_REALTIME, (TS))

# define errorstring strerror
#endif

#ifdef __GLIBC__
# define LIBRARY "glibc"
#else
# define LIBRARY "unidentified"
#endif

#define trace2(L, ...) fprintf(stderr, __FILE__ ":" #L ": " __VA_ARGS__)
#define trace1(L, ...) trace2(L, __VA_ARGS__)
#ifdef DEBUG
# define trace(...) trace1(__LINE__, __VA_ARGS__)
#else
# define trace(...) do { if (0) trace1(__LINE__, __VA_ARGS__); } while (0)
#endif

#define tell(...) trace1(__LINE__, __VA_ARGS__)

enum {
  phases = 10,
  threads = 10,
};

thread id[threads];
unsigned args[threads];

mutex mut[phases];
unsigned inside[phases];

condition cond_client;
condition cond_main;
unsigned volatile phase;

thread_ret client(void *arg) {
  unsigned * number = arg;
  for (unsigned i = 0; i < phases; ++i) {
    trace("thread %u in phase %u\n", *number, i);
    mutex_lock(&mut[i]);
    ++inside[i];
    if (inside[i] == threads) {
      trace("thread %u is last, signalling main\n", *number);
      int ret = condition_signal(&cond_main);
      trace("thread %u is last, signalling main, %s\n", *number, errorstring(ret));
    }
    while (i == phase) {
      tell("thread %u in phase %u (%u), waiting\n", *number, i, phase);
      int ret = condition_wait(&cond_client, &mut[i]);
      trace("thread %u in phase %u (%u), finished, %s\n", *number, i, phase, errorstring(ret));
    }
    int ret = mutex_unlock(&mut[i]);
    trace("thread %u in phase %u (%u), has unlocked mutex: %s\n", *number, i, phase, errorstring(ret));
  }
  return 0;
}


int main(void) {
  tell("start up of main, using %s, library %s\n", VERSION, LIBRARY);
  condition_init(&cond_client);
  condition_init(&cond_main);
  for (unsigned i = 0; i < phases; ++i) {
    mutex_init(&mut[i]);
  }
  mutex_lock(&mut[0]);

  for (unsigned i = 0; i < threads; ++i) {
    args[i] = i;
    thread_create(&id[i], client, &args[i]);
  }

  while (phase < phases) {
    while (inside[phase] < threads) {
      trace("main seeing %u threads in phase %u, waiting\n", inside[phase], phase);
      int ret = condition_wait(&cond_main, &mut[phase]);
      tell("main seeing %u threads in phase %u, %s\n", inside[phase], phase, errorstring(ret));
    }
    /* now we know that everybody is waiting inside, lock the next
       mutex, if any, such that nobody can enter the next phase
       without our permission. */
    if (phase < phases-1)
      mutex_lock(&mut[phase+1]);
    /* Now signal all clients, update the phase count and release the
       mutex they are waiting for. */
    int ret = condition_broadcast(&cond_client);
    trace("main has broadcast to %u: %s\n", phase, errorstring(ret));
    ++phase;
    ret = mutex_unlock(&mut[phase-1]);
    trace("main has unlocked mutex %u: %s\n", phase-1, errorstring(ret));
  }



  trace("main finished loop\n");

  for (unsigned i = 0; i < threads; ++i) {
    trace("main joining thread %u\n", i);
    int ret = thread_join(id[i], &(thread_ret){0});
    trace("main joining thread %u: %s\n", i, errorstring(ret));
  }

  /* C functions to destroy the control structures don't return error
     information, so we can't check for errors, here. */
  for (unsigned i = 0; i < phases; ++i) {
    mutex_destroy(&mut[i]);
  }
  condition_destroy(&cond_main);
  condition_destroy(&cond_client);

  tell("shut down of main, using %s, library %s\n", VERSION, LIBRARY);
}

[-- Attachment #1.3: thrd11-v5.6.patch --]
[-- Type: text/x-patch, Size: 49810 bytes --]

diff --git a/include/alltypes.h.in b/include/alltypes.h.in
index 8478fb4..4cd2cb1 100644
--- a/include/alltypes.h.in
+++ b/include/alltypes.h.in
@@ -58,6 +58,10 @@ TYPEDEF struct { unsigned __attr; } pthread_mutexattr_t;
 TYPEDEF struct { unsigned __attr; } pthread_condattr_t;
 TYPEDEF struct { unsigned __attr; } pthread_barrierattr_t;
 TYPEDEF struct { unsigned __attr[2]; } pthread_rwlockattr_t;
+TYPEDEF pthread_cond_t cnd_t;
+TYPEDEF pthread_mutex_t mtx_t;
+
+
 
 TYPEDEF struct _IO_FILE FILE;
 
diff --git a/include/threads.h b/include/threads.h
new file mode 100644
index 0000000..313fdb4
--- /dev/null
+++ b/include/threads.h
@@ -0,0 +1,200 @@
+#ifndef _THREADS_H
+#define _THREADS_H
+
+/* This one is explicitly allowed to be included. */
+#include <time.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define __NEED_cnd_t
+#define __NEED_mtx_t
+/* Until we come up with a better naming scheme, we need to expose
+   some pthread types. */
+#define __NEED_pthread_cond_t
+#define __NEED_pthread_mutex_t
+#define __NEED_pthread_once_t
+#define __NEED_pthread_t
+#define __NEED_pthread_key_t
+
+#include <bits/alltypes.h>
+#include <bits/errno.h>
+
+typedef struct __mtx_on_heap __mtx_t;
+void __mtx_unref(__mtx_t *);
+__mtx_t* __mtx_getref(mtx_t *);
+
+int __mtx_lock(__mtx_t *);
+int __mtx_trylock(__mtx_t *);
+int __mtx_unlock(__mtx_t *);
+int __mtx_timedlock(__mtx_t *restrict, const struct timespec *restrict);
+
+typedef struct __cnd_on_heap __cnd_t;
+void __cnd_unref(__cnd_t *);
+__cnd_t* __cnd_getref(cnd_t *, __mtx_t *);
+
+int __cnd_signal(__cnd_t *);
+int __cnd_broadcast(__cnd_t *);
+int __cnd_wait(__cnd_t *restrict, __mtx_t *restrict);
+
+typedef pthread_t thrd_t;
+typedef pthread_key_t tss_t;
+typedef int (*thrd_start_t)(void*);
+typedef void (*tss_dtor_t)(void*);
+
+typedef struct once_flag once_flag;
+
+/* Just have a copy of the definition for the moment. This has to be
+   replaced by sharing that declaration in alltypes.h */
+struct __once_cb {
+	void (*__f)(void *);
+	void *__x;
+	struct __ptcb *__next;
+};
+
+struct once_flag {
+  int __cntrl;
+  int __waiters;
+  struct __once_cb __cb;
+};
+
+#ifndef __THRD_EXPERIMENTAL
+# define __THRD_EXPERIMENTAL 1
+#endif
+
+  /* The following list of 9 integer constants makes up for the binary
+     compatibility of this C thread implementation. You must never
+     link code against versions of the C library that do not agree
+     upon these ABI parameters.
+
+     Additionally this implementation assumes that the 5 types have
+     the same size across C libraries and that these types can be
+     initialized by the default initializer.
+
+     The values for the 9 parameters are not fixed for now. Depending
+     on the choices of other implementations and the evolution of the
+     C standard they may still change. This should happen rarely, but
+     you have to consider the C thread feature to be experimental
+     until then, and be prepared to recompile your binary when linking
+     against a different implementation or a new version.
+
+     The macro __THRD_EXPERIMENTAL will be defined as long as we
+     consider this ABI to be unstable. This comes with some link time
+     checks and an overhead of some bytes. At your own risk you may
+     switch this check off by defining __THRD_EXPERIMENTAL macro to
+     0. */
+
+#define __THRD_SUCCESS      0
+#define __THRD_BUSY         EBUSY
+#define __THRD_ERROR        EINVAL
+#define __THRD_NOMEM        ENOMEM
+#define __THRD_TIMEDOUT     ETIMEDOUT
+#define __MTX_PLAIN         0
+#define __MTX_RECURSIVE     1
+#define __MTX_TIMED         2
+#define TSS_DTOR_ITERATIONS 4
+
+#define __THRD_CONCAT10_(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9)         \
+_0 ## _ ## _1 ## _ ## _2 ## _ ## _3 ## _ ## _4 ## _ ## _5 ## _ ## _6 ## _ ## _7 ## _ ## _8 ## _ ## _9
+
+#define __THRD_CONCAT10(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9) \
+  __THRD_CONCAT10_(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9)
+
+
+#define __THRD_ABI                              \
+__THRD_CONCAT10(__thrd_abi,                     \
+                __THRD_SUCCESS,                 \
+                __THRD_BUSY,                    \
+                __THRD_ERROR,                   \
+                __THRD_NOMEM,                   \
+                __THRD_TIMEDOUT,                \
+                __MTX_PLAIN,                    \
+                __MTX_RECURSIVE,                \
+                __MTX_TIMED,                    \
+                TSS_DTOR_ITERATIONS             \
+                )
+
+#define __THRD_SHFT(X, Y) (((X) << 3) ^ (Y))
+
+enum {
+  __thrd_abi =
+  __THRD_SHFT(sizeof(cnd_t),
+              __THRD_SHFT(sizeof(mtx_t),
+                          __THRD_SHFT(sizeof(once_flag),
+                                      __THRD_SHFT(sizeof(thrd_t),
+                                                  sizeof(tss_t))))),
+};
+
+extern unsigned const __THRD_ABI;
+
+#define __THRD_ABI_CHECK (1/(__THRD_ABI == __thrd_abi))
+
+#if __THRD_EXPERIMENTAL
+# define __THRD_ABI_MARK __attribute__((used)) static unsigned const*const __thrd_abi_mark = &__THRD_ABI
+#else
+# define __THRD_ABI_MARK typedef unsigned __thrd_abi_dummy_type
+#endif
+
+enum {
+  thrd_success = __THRD_SUCCESS,
+  thrd_busy = __THRD_BUSY,
+  thrd_error = __THRD_ERROR,
+  thrd_nomem = __THRD_NOMEM,
+  thrd_timedout = __THRD_TIMEDOUT,
+};
+
+enum {
+  mtx_plain = __MTX_PLAIN,
+  mtx_recursive = __MTX_RECURSIVE,
+  // all mutexes are timed, here. so this is a no-op
+  mtx_timed = __MTX_TIMED,
+};
+
+#define ONCE_FLAG_INIT { ._cntrl = 0, }
+#define thread_local _Thread_local
+
+int thrd_create(thrd_t *, thrd_start_t, void *);
+_Noreturn void thrd_exit(int);
+
+int thrd_detach(thrd_t);
+int thrd_join(thrd_t, int *);
+
+int thrd_sleep(const struct timespec *, struct timespec *);
+void thrd_yield(void);
+
+thrd_t thrd_current(void);
+int thrd_equal(thrd_t, thrd_t);
+#define thrd_equal(A, B) ((A) == (B))
+
+void call_once(once_flag *, void (*)(void));
+
+int mtx_init(mtx_t *, int);
+void mtx_destroy(mtx_t *);
+
+int mtx_lock(mtx_t *);
+int mtx_timedlock(mtx_t *restrict, const struct timespec *restrict);
+int mtx_trylock(mtx_t *);
+int mtx_unlock(mtx_t *);
+
+int cnd_init(cnd_t *);
+void cnd_destroy(cnd_t *);
+
+int cnd_broadcast(cnd_t *);
+int cnd_signal(cnd_t *);
+
+int cnd_timedwait(cnd_t *restrict, mtx_t *restrict, const struct timespec *restrict);
+int cnd_wait(cnd_t *, mtx_t *);
+
+int tss_create(tss_t *, tss_dtor_t);
+void tss_delete(tss_t key);
+
+int tss_set(tss_t, void *);
+void *tss_get(tss_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
index 650e811..6f4beab 100644
--- a/src/internal/pthread_impl.h
+++ b/src/internal/pthread_impl.h
@@ -51,6 +51,51 @@ struct __timer {
 	pthread_t thread;
 };
 
+/* For the moment this type has the same layout as
+   pthread_mutex_t. This is subject to change in later versions of the
+   C thread implementation, be it for the simple reason that this
+   would reduce the size of the structure by half. */
+struct __mtx_on_heap {
+	int _mt_typ;
+	int _mt_lck;
+	int _mt_wts;
+	int _mt_pad3;
+	int _mt_pad4;
+	int _mt_cnt;
+	int _mt_rfs;
+	int _mt_pad7;
+	int _mt_pad8;
+	int _mt_pad9;
+};
+
+static inline
+void __mtx_addref(struct __mtx_on_heap *m) {
+	if (m) {
+		a_inc(&m->_mt_rfs);
+	}
+}
+
+struct __cnd_on_heap {
+	int rfs;
+	int tok;
+	struct __mtx_on_heap* mtx;
+};
+
+static inline
+void __cnd_addref(struct __cnd_on_heap *c) {
+	if (c) {
+		a_inc(&c->rfs);
+	}
+}
+
+/* Unless told otherwise this C thread implementation will only use
+   private futexes. */
+#ifdef __MTX_SHARED
+#define THRD_PRIVATE 0
+#else
+#define THRD_PRIVATE 128
+#endif
+
 #define __SU (sizeof(size_t)/sizeof(int))
 
 #define _a_stacksize __u.__s[0]
@@ -66,6 +111,12 @@ struct __timer {
 #define _m_prev __u.__p[3]
 #define _m_next __u.__p[4]
 #define _m_count __u.__i[5]
+#define _mx_type __u.__i[0]
+#define _mx_lock __u.__i[1]
+#define _mx_mtx __u.__p[2]
+#define _mx_prev __u.__p[3] /* unused */
+#define _mx_next __u.__p[4] /* unused */
+#define _mx_count __u.__i[5] /* unused */
 #define _c_mutex __u.__p[0]
 #define _c_seq __u.__i[2]
 #define _c_waiters __u.__i[3]
@@ -74,6 +125,14 @@ struct __timer {
 #define _c_lockwait __u.__i[6]
 #define _c_waiters2 __u.__i[7]
 #define _c_destroy __u.__i[8]
+#define _cx_cnd __u.__p[0]
+#define _cx_seq __u.__i[2]     /* unused */
+#define _cx_waiters __u.__i[3] /* unused */
+#define _cx_clock __u.__i[4]   /* unused */
+#define _cx_lock __u.__i[5]
+#define _cx_lockwait __u.__i[6]/* unused */
+#define _cx_waiters2 __u.__i[7]/* unused */
+#define _cx_destroy __u.__i[8] /* unused */
 #define _rw_lock __u.__i[0]
 #define _rw_waiters __u.__i[1]
 #define _b_lock __u.__i[0]
@@ -83,6 +142,24 @@ struct __timer {
 #define _b_waiters2 __u.__i[4]
 #define _b_inst __u.__p[3]
 
+#if defined(__ATOMIC_ACQUIRE) && defined(__ATOMIC_RELEASE)
+#define a_splck(X)                                                      \
+({                                                                      \
+  int * _lck = (X);                                                     \
+  while (__atomic_test_and_set(_lck, __ATOMIC_ACQUIRE)) (void)0;        \
+  __atomic_thread_fence(__ATOMIC_ACQUIRE);                              \
+ })
+#define a_spunl(X) ({ int * _lck = (X); __atomic_clear(_lck, __ATOMIC_RELEASE); })
+#else
+#define a_splck(X)                                      \
+({                                                      \
+  int * _lck = (X);                                     \
+  while (__sync_lock_test_and_set(_lck, 1)) (void)0;    \
+  __sync_synchronize();                                 \
+ })
+#define a_spunl(X) ({ int * _lck = (X); __sync_lock_release(_lck); })
+#endif
+
 #include "pthread_arch.h"
 
 #define SIGTIMER 32
@@ -111,6 +188,10 @@ void __wait(volatile int *, volatile int *, int, int);
 #define __wake(addr, cnt, priv) \
 	__syscall(SYS_futex, addr, FUTEX_WAKE, (cnt)<0?INT_MAX:(cnt))
 
+void __wait_priv(volatile int *, volatile int *, int);
+#define __wake_priv(addr, cnt) \
+	__syscall(SYS_futex, addr, FUTEX_WAKE|THRD_PRIVATE, (cnt)<0?INT_MAX:(cnt))
+
 void __acquire_ptc();
 void __release_ptc();
 void __inhibit_ptc();
diff --git a/src/mman/mprotect.c b/src/mman/mprotect.c
index f488486..535787b 100644
--- a/src/mman/mprotect.c
+++ b/src/mman/mprotect.c
@@ -2,10 +2,12 @@
 #include "libc.h"
 #include "syscall.h"
 
-int mprotect(void *addr, size_t len, int prot)
+int __mprotect(void *addr, size_t len, int prot)
 {
 	size_t start, end;
 	start = (size_t)addr & -PAGE_SIZE;
 	end = (size_t)((char *)addr + len + PAGE_SIZE-1) & -PAGE_SIZE;
 	return syscall(SYS_mprotect, start, end-start, prot);
 }
+
+weak_alias(__mprotect, mprotect);
diff --git a/src/sched/thrd_yield.c b/src/sched/thrd_yield.c
new file mode 100644
index 0000000..301e953
--- /dev/null
+++ b/src/sched/thrd_yield.c
@@ -0,0 +1,7 @@
+#include <sched.h>
+#include "syscall.h"
+
+void thrd_yield()
+{
+         (void)syscall(SYS_sched_yield);
+}
diff --git a/src/thread/__cnd_getref.c b/src/thread/__cnd_getref.c
new file mode 100644
index 0000000..e96d816
--- /dev/null
+++ b/src/thread/__cnd_getref.c
@@ -0,0 +1,64 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+static inline __cnd_t* __cnd_init(__cnd_t *c, __mtx_t *m) {
+	if (c) {
+		*c = (__cnd_t) {
+			.rfs = 2,
+			 .mtx = m,
+		};
+	}
+	if (m) __mtx_addref(m);
+	return c;
+}
+
+static inline void __cnd_delete(__cnd_t *c) {
+	if (c) {
+		__mtx_unref(c->mtx);
+		c->mtx = 0;
+		free(c);
+	}
+}
+
+void __cnd_unref(__cnd_t *c) {
+	if (c) {
+		if (a_fetch_add(&c->rfs, -1) == 1) {
+			__cnd_delete(c);
+		}
+	}
+}
+
+static inline __cnd_t* __cnd_getref_def(cnd_t *cond, __mtx_t* m, __cnd_t* def) {
+	__cnd_t * ret;
+	__mtx_t * prev = 0;
+	/* Critical section protected by spin lock . */
+	a_splck(&cond->_cx_lock);
+	ret = cond->_cx_cnd;
+	if (ret) {
+		__cnd_addref(ret);
+		if (m && ret->mtx != m) {
+			prev = ret->mtx;
+			__mtx_addref(m);
+			ret->mtx = m;
+		}
+	} else if (def) {
+		ret = def;
+		cond->_cx_cnd = def;
+	}
+	a_spunl(&cond->_cx_lock);
+	if (prev) __mtx_unref(prev);
+	return ret;
+}
+
+__cnd_t* __cnd_getref(cnd_t *cond, __mtx_t * m) {
+	__cnd_t * ret = __cnd_getref_def(cond, m, 0);
+	if (!ret) {
+		__cnd_t * new = __cnd_init(malloc(sizeof *ret), m);
+		if (new) {
+			ret = __cnd_getref_def(cond, m, new);
+			/* somebody sneaked in between the first and second call */
+			if (ret != new) __cnd_delete(new);
+		}
+	}
+	return ret;
+}
diff --git a/src/thread/__mtx_getref.c b/src/thread/__mtx_getref.c
new file mode 100644
index 0000000..22b3db4
--- /dev/null
+++ b/src/thread/__mtx_getref.c
@@ -0,0 +1,49 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+static inline __mtx_t* __mtx_init(__mtx_t *m, int type) {
+	if (m) {
+		*m = (__mtx_t) {
+			._mt_rfs = 2,
+			 ._mt_typ = type,
+		};
+	}
+	return m;
+}
+
+static inline void __mtx_delete(__mtx_t * m) {
+	free(m);
+}
+
+void __mtx_unref(__mtx_t *m) {
+	if (m) {
+		if (a_fetch_add(&m->_mt_rfs, -1) == 1) {
+			__mtx_delete(m);
+		}
+	}
+}
+
+static inline __mtx_t* __mtx_getref_def(mtx_t *mut, __mtx_t* def) {
+	/* Critical section protected by spin lock . */
+	a_splck(&mut->_mx_lock);
+	__mtx_t * ret = mut->_mx_mtx;
+	if (ret) {
+		__mtx_addref(ret);
+	} else if (def) {
+		ret = def;
+		mut->_mx_mtx = def;
+	}
+	a_spunl(&mut->_mx_lock);
+	return ret;
+}
+
+__mtx_t* __mtx_getref(mtx_t *mut) {
+	__mtx_t * ret = __mtx_getref_def(mut, 0);
+	if (!ret) {
+		__mtx_t * new = __mtx_init(malloc(sizeof *ret), mut->_mx_type);
+		ret = __mtx_getref_def(mut, new);
+		/* somebody sneaked in between the first and second call */
+		if (ret != new) __mtx_delete(new);
+	}
+	return ret;
+}
diff --git a/src/thread/__thrd_abi.c b/src/thread/__thrd_abi.c
new file mode 100644
index 0000000..e5674e6
--- /dev/null
+++ b/src/thread/__thrd_abi.c
@@ -0,0 +1,3 @@
+#include <threads.h>
+
+unsigned const __THRD_ABI =  __thrd_abi;
diff --git a/src/thread/__thrd_wait.c b/src/thread/__thrd_wait.c
new file mode 100644
index 0000000..6351cb6
--- /dev/null
+++ b/src/thread/__thrd_wait.c
@@ -0,0 +1,26 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __clock_gettime(clockid_t clk, struct timespec *ts);
+
+int __thrd_wait(volatile int *addr, int val, const struct timespec *at)
+{
+	int r;
+	struct timespec to, *top=0;
+
+	if (at) {
+		if (at->tv_nsec >= 1000000000UL) return EINVAL;
+		if (__clock_gettime(CLOCK_REALTIME, &to)) return EINVAL;
+		to.tv_sec = at->tv_sec - to.tv_sec;
+		if ((to.tv_nsec = at->tv_nsec - to.tv_nsec) < 0) {
+			to.tv_sec--;
+			to.tv_nsec += 1000000000;
+		}
+		if (to.tv_sec < 0) return ETIMEDOUT;
+		top = &to;
+	}
+
+	r = -__syscall_cp(SYS_futex, addr, FUTEX_WAIT|THRD_PRIVATE, val, top);
+	if (r == EINTR || r == EINVAL || r == ETIMEDOUT || r == EWOULDBLOCK) return r;
+	return 0;
+}
diff --git a/src/thread/__timedwait.c b/src/thread/__timedwait.c
index 302273a..3aed2bd 100644
--- a/src/thread/__timedwait.c
+++ b/src/thread/__timedwait.c
@@ -4,6 +4,9 @@
 #include "futex.h"
 #include "syscall.h"
 
+int __pthread_setcancelstate(int new, int *old);
+int __clock_gettime(clockid_t clk, struct timespec *ts);
+
 static int do_wait(volatile int *addr, int val,
 	clockid_t clk, const struct timespec *at, int priv)
 {
@@ -12,7 +15,7 @@ static int do_wait(volatile int *addr, int val,
 
 	if (at) {
 		if (at->tv_nsec >= 1000000000UL) return EINVAL;
-		if (clock_gettime(clk, &to)) return EINVAL;
+		if (__clock_gettime(clk, &to)) return EINVAL;
 		to.tv_sec = at->tv_sec - to.tv_sec;
 		if ((to.tv_nsec = at->tv_nsec - to.tv_nsec) < 0) {
 			to.tv_sec--;
@@ -33,13 +36,13 @@ int __timedwait(volatile int *addr, int val,
 {
 	int r, cs;
 
-	if (!cleanup) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
+	if (!cleanup) __pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
 	pthread_cleanup_push(cleanup, arg);
 
 	r = do_wait(addr, val, clk, at, priv);
 
 	pthread_cleanup_pop(0);
-	if (!cleanup) pthread_setcancelstate(cs, 0);
+	if (!cleanup) __pthread_setcancelstate(cs, 0);
 
 	return r;
 }
diff --git a/src/thread/__wait_priv.c b/src/thread/__wait_priv.c
new file mode 100644
index 0000000..f8358cd
--- /dev/null
+++ b/src/thread/__wait_priv.c
@@ -0,0 +1,14 @@
+#include "pthread_impl.h"
+
+void __wait_priv(volatile int *addr, volatile int *waiters, int val)
+{
+	int spins=10000;
+	while (spins--) {
+		if (*addr==val) a_spin();
+		else return;
+	}
+	if (waiters) a_inc(waiters);
+	while (*addr==val)
+		__syscall(SYS_futex, addr, FUTEX_WAIT|THRD_PRIVATE, val, 0);
+	if (waiters) a_dec(waiters);
+}
diff --git a/src/thread/call_once.c b/src/thread/call_once.c
new file mode 100644
index 0000000..1d3540e
--- /dev/null
+++ b/src/thread/call_once.c
@@ -0,0 +1,40 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+#define pthread_cleanup_push_static(f, x) do { static struct __ptcb __cb; _pthread_cleanup_push(&__cb, f, x)
+
+static void undo(void *flg)
+{
+	once_flag * flag = flg;
+	a_store(&flag->__cntrl, 0);
+	__wake_priv(&flag->__cntrl, 1);
+}
+
+void call_once(once_flag *flag, void (*func)(void))
+{
+	__THRD_ABI_MARK;
+
+	/* Return immediately if init finished before */
+	if (flag->__cntrl == 2) return;
+
+	/* Try to enter initializing state. Three possibilities:
+	 *  0 - we're the first or the other cancelled; run init
+	 *  1 - another thread is running init; wait
+	 *  2 - another thread finished running init; just return */
+
+	for (;;) switch (a_cas(&flag->__cntrl, 0, 1)) {
+	case 0:
+		_pthread_cleanup_push((void*)&flag->__cb, undo, flag);
+		func();
+		_pthread_cleanup_pop((void*)&flag->__cb, 0);
+
+		a_store(&flag->__cntrl, 2);
+		if (flag->__waiters) __wake(&flag->__cntrl, -1, 0);
+		return;
+	case 1:
+		__wait_priv(&flag->__cntrl, &flag->__waiters, 1);
+		continue;
+	case 2:
+		return;
+	}
+}
diff --git a/src/thread/cnd_broadcast.c b/src/thread/cnd_broadcast.c
new file mode 100644
index 0000000..9e81b64
--- /dev/null
+++ b/src/thread/cnd_broadcast.c
@@ -0,0 +1,48 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __cnd_broadcast(__cnd_t *c)
+{
+	int ret = 0;
+	__mtx_t * mtx = c->mtx;
+	/* If the mtx isn't set, this __cnd has never been used for a wait,
+	   nothing to do. */
+	if (mtx) {
+		/* We can upload as much tokens as we wish, here since this
+		   __cnd_t is already unlinked from its cnd_t. It will never be
+		   used for any new waiter. */
+		a_store(&c->tok, INT_MAX);
+		/* Perform the futex requeue, waking one waiter unless we know
+		 * that the calling thread holds the mutex. */
+                int wakeup = !mtx->_mt_typ || (mtx->_mt_lck&INT_MAX)!=__pthread_self()->tid;
+		ret = __syscall(SYS_futex, &c->tok, FUTEX_REQUEUE|THRD_PRIVATE,
+		                0,
+		                INT_MAX, &mtx->_mt_lck, c->tok);
+                /* Do the bookkeeping for the mutex and wake up one thread eventually. */
+		if (ret > wakeup) a_fetch_add(&mtx->_mt_wts, ret-wakeup);
+		if (ret > 0 && wakeup) ret = __syscall(SYS_futex, &mtx->_mt_lck, FUTEX_WAKE|THRD_PRIVATE, 1);
+	}
+	return  ret < 0 ? thrd_error : thrd_success;
+}
+
+int cnd_broadcast(cnd_t * cond)
+{
+	int ret = thrd_success;
+	/* Critical section protected by spin lock */
+	a_splck(&cond->_cx_lock);
+	__cnd_t * c = cond->_cx_cnd;
+	/* If there are waiters, all will be waiting on c. Since we hold the
+	   spinlock no other waiters can sneak in. Lock them permanently out
+	   of using this one, here. As a consequence we don't have inc the
+	   reference count: their is no change in the total number of
+	   references. */
+	cond->_cx_cnd = 0;
+	a_spunl(&cond->_cx_lock);
+
+	/* If c is 0, there haven't been any waiters, yet, nothing to do. */
+	if (c) {
+		ret = __cnd_broadcast(c);
+		__cnd_unref(c);
+	}
+	return ret;
+}
diff --git a/src/thread/cnd_destroy.c b/src/thread/cnd_destroy.c
new file mode 100644
index 0000000..463b137
--- /dev/null
+++ b/src/thread/cnd_destroy.c
@@ -0,0 +1,13 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+void cnd_destroy(cnd_t *cond)
+{
+	/* Critical section protected by spin lock */
+	a_splck(&cond->_cx_lock);
+	__cnd_t * ret = cond->_cx_cnd;
+	cond->_cx_cnd = 0;
+	a_spunl(&cond->_cx_lock);
+
+	if (ret) __cnd_unref(ret);
+}
diff --git a/src/thread/cnd_init.c b/src/thread/cnd_init.c
new file mode 100644
index 0000000..9bcd051
--- /dev/null
+++ b/src/thread/cnd_init.c
@@ -0,0 +1,12 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int cnd_init(cnd_t * c)
+{
+	*c = (cnd_t) {
+		0
+	};
+	__cnd_t* ret = __cnd_getref(c, 0);
+	__cnd_unref(ret);
+	return ret ? thrd_success : thrd_nomem;
+}
diff --git a/src/thread/cnd_signal.c b/src/thread/cnd_signal.c
new file mode 100644
index 0000000..9ed765b
--- /dev/null
+++ b/src/thread/cnd_signal.c
@@ -0,0 +1,25 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int cnd_signal(cnd_t * cond) {
+	int ret = thrd_success;
+	/* Critical section protected by spin lock */
+	a_splck(&cond->_cx_lock);
+	__cnd_t * c = cond->_cx_cnd;
+	if (c) {
+		/* If the mtx isn't set, this __cnd_t has never been used for a
+		   wait, nothing to do. */
+		if (!c->mtx) c = 0;
+		else __cnd_addref(c);
+	}
+	a_spunl(&cond->_cx_lock);
+
+	/* If c is 0, there haven't been any waiters, yet, nothing to do. */
+	if (c) {
+		a_inc(&c->tok);
+		ret = __syscall(SYS_futex, &c->tok, FUTEX_WAKE|THRD_PRIVATE, 1);
+		ret = (ret < 0) ? thrd_error : thrd_success;
+		__cnd_unref(c);
+	}
+	return ret;
+}
diff --git a/src/thread/cnd_timedwait.c b/src/thread/cnd_timedwait.c
new file mode 100644
index 0000000..b34604e
--- /dev/null
+++ b/src/thread/cnd_timedwait.c
@@ -0,0 +1,56 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+void __pthread_testcancel(void);
+int __clock_gettime(clockid_t clk, struct timespec *ts);
+int __thrd_wait(volatile int *addr, int val, const struct timespec *at);
+
+static int __cnd_timedwait(__cnd_t *restrict c, __mtx_t *restrict mut, const struct timespec *restrict ts)
+{
+	int e, r;
+	__mtx_unlock(mut);
+
+	do {
+		e = 0;
+		/* get a token if some is available */
+		for (int token = c->tok; token > 0;) {
+			int prev = a_cas(&c->tok, token, token - 1);
+			if (prev == token) {
+				goto RELOCK;
+			}
+			token = prev;
+		}
+
+		e = __thrd_wait(&c->tok, 0, ts);
+
+	} while (!e || e == EINTR || e == EWOULDBLOCK);
+
+	if (e != ETIMEDOUT) return thrd_error;
+
+RELOCK:
+	if ((r=__mtx_lock(mut))) return r;
+
+	switch (e) {
+	case 0:
+		return thrd_success;
+	case ETIMEDOUT:
+		return thrd_timedout;
+	default:
+		return thrd_error;
+	}
+}
+
+int cnd_timedwait(cnd_t *restrict cond, mtx_t *restrict mut, const struct timespec *restrict ts)
+{
+	int ret = thrd_error;
+	__mtx_t * m = __mtx_getref(mut);
+	if (m) {
+		__cnd_t * c = __cnd_getref(cond, m);
+		if (c) {
+			ret = __cnd_timedwait(c, m, ts);
+			__cnd_unref(c);
+		}
+		__mtx_unref(m);
+	}
+	return ret;
+}
diff --git a/src/thread/cnd_wait.c b/src/thread/cnd_wait.c
new file mode 100644
index 0000000..91e89db
--- /dev/null
+++ b/src/thread/cnd_wait.c
@@ -0,0 +1,10 @@
+#include <threads.h>
+
+int cnd_wait(cnd_t *cond, mtx_t *mutex)
+{
+	/* Calling cnd_timedwait with a null pointer is an
+	   extension. Such a call is convenient, here since it avoids to
+	   repeat the case analysis that is already done for
+	   cnd_timedwait. */
+	return cnd_timedwait(cond, mutex, 0);
+}
diff --git a/src/thread/mtx_destroy.c b/src/thread/mtx_destroy.c
new file mode 100644
index 0000000..c8e4e68
--- /dev/null
+++ b/src/thread/mtx_destroy.c
@@ -0,0 +1,11 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+void (mtx_destroy)(mtx_t *mut) {
+	/* Critical section protected by spin lock */
+	a_splck(&mut->_mx_lock);
+	__mtx_t * ret = mut->_mx_mtx;
+	mut->_mx_mtx = 0;
+	a_spunl(&mut->_mx_lock);
+	if (ret) __mtx_unref(ret);
+}
diff --git a/src/thread/mtx_init.c b/src/thread/mtx_init.c
new file mode 100644
index 0000000..40cc6c1
--- /dev/null
+++ b/src/thread/mtx_init.c
@@ -0,0 +1,12 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int mtx_init(mtx_t * m, int type)
+{
+	*m = (mtx_t) {
+		._mx_type = ((type&mtx_recursive) ? PTHREAD_MUTEX_RECURSIVE : 0),
+	};
+	__mtx_t* ret = __mtx_getref(m);
+	__mtx_unref(ret);
+	return ret ? thrd_success : thrd_error;
+}
diff --git a/src/thread/mtx_lock.c b/src/thread/mtx_lock.c
new file mode 100644
index 0000000..59bc1b2
--- /dev/null
+++ b/src/thread/mtx_lock.c
@@ -0,0 +1,13 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __mtx_lock(__mtx_t *mut)
+{
+	return __mtx_timedlock(mut, 0);
+}
+
+int mtx_lock(mtx_t *mut)
+{
+	__THRD_ABI_MARK;
+	return mtx_timedlock(mut, 0);
+}
diff --git a/src/thread/mtx_timedlock.c b/src/thread/mtx_timedlock.c
new file mode 100644
index 0000000..cf52749
--- /dev/null
+++ b/src/thread/mtx_timedlock.c
@@ -0,0 +1,46 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __thrd_wait(volatile int *addr, int val, const struct timespec *at);
+
+int __mtx_timedlock(__mtx_t *restrict mtx, const struct timespec *restrict at)
+{
+	int r, t;
+
+	if (mtx->_mt_typ == PTHREAD_MUTEX_NORMAL && !a_cas(&mtx->_mt_lck, 0, thrd_busy))
+		return thrd_success;
+
+	for (;;) {
+		r=__mtx_trylock(mtx);
+		if (r != thrd_busy) return r;
+		else {
+			if (!(r=mtx->_mt_lck) || (r&0x40000000)) continue;
+			a_inc(&mtx->_mt_wts);
+			t = r | 0x80000000;
+			a_cas(&mtx->_mt_lck, r, t);
+			r = __thrd_wait(&mtx->_mt_lck, t, at);
+			a_dec(&mtx->_mt_wts);
+			switch (r) {
+			case 0:
+				break;
+			case EINTR:
+				break;
+			case EWOULDBLOCK:
+				break;
+			case ETIMEDOUT:
+				return thrd_timedout;
+			default:
+				return thrd_error;
+			}
+		}
+	}
+}
+
+int mtx_timedlock(mtx_t *restrict mut, const struct timespec *restrict at)
+{
+	__mtx_t * m = __mtx_getref(mut);
+	if (!m) return thrd_error;
+	int ret = __mtx_timedlock(m, at);
+	__mtx_unref(m);
+	return ret;
+}
diff --git a/src/thread/mtx_trylock.c b/src/thread/mtx_trylock.c
new file mode 100644
index 0000000..852fa15
--- /dev/null
+++ b/src/thread/mtx_trylock.c
@@ -0,0 +1,33 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __mtx_trylock(__mtx_t *mtx)
+{
+	int tid, old, own, ret = 0;
+	pthread_t self;
+	if (mtx->_mt_typ == PTHREAD_MUTEX_NORMAL) {
+		return a_cas(&mtx->_mt_lck, 0, thrd_busy) & thrd_busy;
+	} else {
+		self = __pthread_self();
+		tid = self->tid;
+		old = mtx->_mt_lck;
+		own = old & 0x7fffffff;
+		if (own == tid) {
+			if ((unsigned)mtx->_mt_cnt >= INT_MAX) return thrd_error;
+			else mtx->_mt_cnt++;
+		} else {
+			if ((own && !(own & 0x40000000)) || a_cas(&mtx->_mt_lck, old, tid)!=old)
+				return thrd_busy;
+		}
+	}
+	return thrd_success;
+}
+
+int mtx_trylock(mtx_t *mut)
+{
+	__mtx_t * m = __mtx_getref(mut);
+	if (!m) return thrd_error;
+	int ret = __mtx_trylock(m);
+	__mtx_unref(m);
+	return ret;
+}
diff --git a/src/thread/mtx_unlock.c b/src/thread/mtx_unlock.c
new file mode 100644
index 0000000..5c98b07
--- /dev/null
+++ b/src/thread/mtx_unlock.c
@@ -0,0 +1,28 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+int __mtx_unlock(__mtx_t *mtx)
+{
+	int ret = thrd_success;
+	if ((mtx->_mt_typ&3) == PTHREAD_MUTEX_RECURSIVE && mtx->_mt_cnt) {
+		if ((mtx->_mt_lck&0x1fffffff) != __pthread_self()->tid)
+			ret = thrd_error;
+		/* _m_count is the count of additional locks, no need to real unlock */
+		else --mtx->_mt_cnt;
+	} else {
+		if (a_swap(&mtx->_mt_lck, 0)<0 || mtx->_mt_wts)
+			__syscall(SYS_futex, &mtx->_mt_lck, FUTEX_WAKE|THRD_PRIVATE, 1);
+	}
+	return ret;
+}
+
+int mtx_unlock(mtx_t *mut)
+{
+	int ret = thrd_error;
+	__mtx_t * m = __mtx_getref(mut);
+	if (m) {
+		ret = __mtx_unlock(m);
+		__mtx_unref(m);
+	}
+	return ret;
+}
diff --git a/src/thread/pthread_cond_timedwait.c b/src/thread/pthread_cond_timedwait.c
index 99d62cc..f43ca9e 100644
--- a/src/thread/pthread_cond_timedwait.c
+++ b/src/thread/pthread_cond_timedwait.c
@@ -1,5 +1,7 @@
 #include "pthread_impl.h"
 
+void __pthread_testcancel(void);
+
 struct cm {
 	pthread_cond_t *c;
 	pthread_mutex_t *m;
@@ -47,7 +49,7 @@ int pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restrict
 	if (ts && ts->tv_nsec >= 1000000000UL)
 		return EINVAL;
 
-	pthread_testcancel();
+	__pthread_testcancel();
 
 	a_inc(&c->_c_waiters);
 
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index e77e54a..d19d78b 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -3,97 +3,21 @@
 #include "stdio_impl.h"
 #include "libc.h"
 #include <sys/mman.h>
+#include <threads.h>
+
+void *__mmap(void *, size_t, int, int, int, off_t);
+int __munmap(void *, size_t);
+int __mprotect(void *, size_t, int);
+void __thread_enable(void);
+_Noreturn void __pthread_exit(void *);
+void *__copy_tls(unsigned char *);
+extern volatile size_t __pthread_tsd_size;
 
 static void dummy_0()
 {
 }
 weak_alias(dummy_0, __acquire_ptc);
 weak_alias(dummy_0, __release_ptc);
-weak_alias(dummy_0, __pthread_tsd_run_dtors);
-
-_Noreturn void pthread_exit(void *result)
-{
-	pthread_t self = __pthread_self();
-	sigset_t set;
-
-	self->result = result;
-
-	while (self->cancelbuf) {
-		void (*f)(void *) = self->cancelbuf->__f;
-		void *x = self->cancelbuf->__x;
-		self->cancelbuf = self->cancelbuf->__next;
-		f(x);
-	}
-
-	__pthread_tsd_run_dtors();
-
-	__lock(self->exitlock);
-
-	/* Mark this thread dead before decrementing count */
-	__lock(self->killlock);
-	self->dead = 1;
-
-	/* Block all signals before decrementing the live thread count.
-	 * This is important to ensure that dynamically allocated TLS
-	 * is not under-allocated/over-committed, and possibly for other
-	 * reasons as well. */
-	__block_all_sigs(&set);
-
-	/* Wait to unlock the kill lock, which governs functions like
-	 * pthread_kill which target a thread id, until signals have
-	 * been blocked. This precludes observation of the thread id
-	 * as a live thread (with application code running in it) after
-	 * the thread was reported dead by ESRCH being returned. */
-	__unlock(self->killlock);
-
-	/* It's impossible to determine whether this is "the last thread"
-	 * until performing the atomic decrement, since multiple threads
-	 * could exit at the same time. For the last thread, revert the
-	 * decrement and unblock signals to give the atexit handlers and
-	 * stdio cleanup code a consistent state. */
-	if (a_fetch_add(&libc.threads_minus_1, -1)==0) {
-		libc.threads_minus_1 = 0;
-		__restore_sigs(&set);
-		exit(0);
-	}
-
-	if (self->locale != &libc.global_locale) {
-		a_dec(&libc.uselocale_cnt);
-		if (self->locale->ctype_utf8)
-			a_dec(&libc.bytelocale_cnt_minus_1);
-	}
-
-	if (self->detached && self->map_base) {
-		/* Detached threads must avoid the kernel clear_child_tid
-		 * feature, since the virtual address will have been
-		 * unmapped and possibly already reused by a new mapping
-		 * at the time the kernel would perform the write. In
-		 * the case of threads that started out detached, the
-		 * initial clone flags are correct, but if the thread was
-		 * detached later (== 2), we need to clear it here. */
-		if (self->detached == 2) __syscall(SYS_set_tid_address, 0);
-
-		/* The following call unmaps the thread's stack mapping
-		 * and then exits without touching the stack. */
-		__unmapself(self->map_base, self->map_size);
-	}
-
-	for (;;) __syscall(SYS_exit, 0);
-}
-
-void __do_cleanup_push(struct __ptcb *cb)
-{
-	if (!libc.has_thread_pointer) return;
-	struct pthread *self = __pthread_self();
-	cb->__next = self->cancelbuf;
-	self->cancelbuf = cb;
-}
-
-void __do_cleanup_pop(struct __ptcb *cb)
-{
-	if (!libc.has_thread_pointer) return;
-	__pthread_self()->cancelbuf = cb->__next;
-}
 
 static int start(void *p)
 {
@@ -115,28 +39,10 @@ static int start(void *p)
 
 #define ROUND(x) (((x)+PAGE_SIZE-1)&-PAGE_SIZE)
 
-/* pthread_key_create.c overrides this */
-static volatile size_t dummy = 0;
-weak_alias(dummy, __pthread_tsd_size);
-static void *dummy_tsd[1] = { 0 };
-weak_alias(dummy_tsd, __pthread_tsd_main);
-
-static FILE *volatile dummy_file = 0;
-weak_alias(dummy_file, __stdin_used);
-weak_alias(dummy_file, __stdout_used);
-weak_alias(dummy_file, __stderr_used);
-
-static void init_file_lock(FILE *f)
-{
-	if (f && f->lock<0) f->lock = 0;
-}
-
-void *__copy_tls(unsigned char *);
-
 int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp, void *(*entry)(void *), void *restrict arg)
 {
 	int ret;
-	size_t size, guard;
+	size_t size, guard = 0;
 	struct pthread *self, *new;
 	unsigned char *map = 0, *stack = 0, *tsd = 0, *stack_limit;
 	unsigned flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND
@@ -147,16 +53,7 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp
 
 	if (!libc.can_do_threads) return ENOSYS;
 	self = __pthread_self();
-	if (!libc.threaded) {
-		for (FILE *f=libc.ofl_head; f; f=f->next)
-			init_file_lock(f);
-		init_file_lock(__stdin_used);
-		init_file_lock(__stdout_used);
-		init_file_lock(__stderr_used);
-		__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
-		self->tsd = (void **)__pthread_tsd_main;
-		libc.threaded = 1;
-	}
+	if (!libc.threaded) __thread_enable();
 	if (attrp) attr = *attrp;
 
 	__acquire_ptc();
@@ -184,14 +81,14 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp
 
 	if (!tsd) {
 		if (guard) {
-			map = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
+			map = __mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
 			if (map == MAP_FAILED) goto fail;
-			if (mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
-				munmap(map, size);
+			if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
+				__munmap(map, size);
 				goto fail;
 			}
 		} else {
-			map = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+			map = __mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
 			if (map == MAP_FAILED) goto fail;
 		}
 		tsd = map + size - __pthread_tsd_size;
@@ -233,7 +130,7 @@ int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attrp
 
 	if (ret < 0) {
 		a_dec(&libc.threads_minus_1);
-		if (map) munmap(map, size);
+		if (map) __munmap(map, size);
 		return EAGAIN;
 	}
 
diff --git a/src/thread/pthread_exit.c b/src/thread/pthread_exit.c
new file mode 100644
index 0000000..c6f4e60
--- /dev/null
+++ b/src/thread/pthread_exit.c
@@ -0,0 +1,128 @@
+#define _GNU_SOURCE
+#include "pthread_impl.h"
+#include "stdio_impl.h"
+#include "libc.h"
+#include <sys/mman.h>
+#include <threads.h>
+
+static void dummy_0()
+{
+}
+weak_alias(dummy_0, __pthread_tsd_run_dtors);
+weak_alias(dummy_0, __tss_run_dtors);
+
+_Noreturn void __pthread_exit(void *result)
+{
+	pthread_t self = __pthread_self();
+	sigset_t set;
+
+	self->result = result;
+
+	while (self->cancelbuf) {
+		void (*f)(void *) = self->cancelbuf->__f;
+		void *x = self->cancelbuf->__x;
+		self->cancelbuf = self->cancelbuf->__next;
+		f(x);
+	}
+
+	__pthread_tsd_run_dtors();
+
+	__tss_run_dtors();
+
+	__lock(self->exitlock);
+
+	/* Mark this thread dead before decrementing count */
+	__lock(self->killlock);
+	self->dead = 1;
+
+	/* Block all signals before decrementing the live thread count.
+	 * This is important to ensure that dynamically allocated TLS
+	 * is not under-allocated/over-committed, and possibly for other
+	 * reasons as well. */
+	__block_all_sigs(&set);
+
+	/* Wait to unlock the kill lock, which governs functions like
+	 * pthread_kill which target a thread id, until signals have
+	 * been blocked. This precludes observation of the thread id
+	 * as a live thread (with application code running in it) after
+	 * the thread was reported dead by ESRCH being returned. */
+	__unlock(self->killlock);
+
+	/* It's impossible to determine whether this is "the last thread"
+	 * until performing the atomic decrement, since multiple threads
+	 * could exit at the same time. For the last thread, revert the
+	 * decrement and unblock signals to give the atexit handlers and
+	 * stdio cleanup code a consistent state. */
+	if (a_fetch_add(&libc.threads_minus_1, -1)==0) {
+		libc.threads_minus_1 = 0;
+		__restore_sigs(&set);
+		exit(0);
+	}
+
+	if (self->locale != &libc.global_locale) {
+		a_dec(&libc.uselocale_cnt);
+		if (self->locale->ctype_utf8)
+			a_dec(&libc.bytelocale_cnt_minus_1);
+	}
+
+	if (self->detached && self->map_base) {
+		/* Detached threads must avoid the kernel clear_child_tid
+		 * feature, since the virtual address will have been
+		 * unmapped and possibly already reused by a new mapping
+		 * at the time the kernel would perform the write. In
+		 * the case of threads that started out detached, the
+		 * initial clone flags are correct, but if the thread was
+		 * detached later (== 2), we need to clear it here. */
+		if (self->detached == 2) __syscall(SYS_set_tid_address, 0);
+
+		/* The following call unmaps the thread's stack mapping
+		 * and then exits without touching the stack. */
+		__unmapself(self->map_base, self->map_size);
+	}
+
+	for (;;) __syscall(SYS_exit, 0);
+}
+weak_alias(__pthread_exit, pthread_exit);
+
+void __do_cleanup_push(struct __ptcb *cb)
+{
+	if (!libc.has_thread_pointer) return;
+	struct pthread *self = __pthread_self();
+	cb->__next = self->cancelbuf;
+	self->cancelbuf = cb;
+}
+
+void __do_cleanup_pop(struct __ptcb *cb)
+{
+	if (!libc.has_thread_pointer) return;
+	__pthread_self()->cancelbuf = cb->__next;
+}
+
+/* pthread_key_create.c overrides this */
+static volatile size_t dummy = 0;
+weak_alias(dummy, __pthread_tsd_size);
+static void *dummy_tsd[1] = { 0 };
+weak_alias(dummy_tsd, __pthread_tsd_main);
+
+static FILE *volatile dummy_file = 0;
+weak_alias(dummy_file, __stdin_used);
+weak_alias(dummy_file, __stdout_used);
+weak_alias(dummy_file, __stderr_used);
+
+static void init_file_lock(FILE *f)
+{
+	if (f && f->lock<0) f->lock = 0;
+}
+
+void __thread_enable(void)
+{
+	for (FILE *f=libc.ofl_head; f; f=f->next)
+		init_file_lock(f);
+	init_file_lock(__stdin_used);
+	init_file_lock(__stdout_used);
+	init_file_lock(__stderr_used);
+	__syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
+	__pthread_self()->tsd = (void **)__pthread_tsd_main;
+	libc.threaded = 1;
+}
+
diff --git a/src/thread/pthread_setcancelstate.c b/src/thread/pthread_setcancelstate.c
index 060bcdc..2268217 100644
--- a/src/thread/pthread_setcancelstate.c
+++ b/src/thread/pthread_setcancelstate.c
@@ -1,6 +1,6 @@
 #include "pthread_impl.h"
 
-int pthread_setcancelstate(int new, int *old)
+int __pthread_setcancelstate(int new, int *old)
 {
 	if (new > 1U) return EINVAL;
 	if (!libc.has_thread_pointer) return ENOSYS;
@@ -9,3 +9,5 @@ int pthread_setcancelstate(int new, int *old)
 	self->canceldisable = new;
 	return 0;
 }
+
+weak_alias(__pthread_setcancelstate, pthread_setcancelstate);
diff --git a/src/thread/pthread_setcanceltype.c b/src/thread/pthread_setcanceltype.c
index bf0a3f3..d493c1b 100644
--- a/src/thread/pthread_setcanceltype.c
+++ b/src/thread/pthread_setcanceltype.c
@@ -1,11 +1,13 @@
 #include "pthread_impl.h"
 
+void __pthread_testcancel(void);
+
 int pthread_setcanceltype(int new, int *old)
 {
 	struct pthread *self = __pthread_self();
 	if (new > 1U) return EINVAL;
 	if (old) *old = self->cancelasync;
 	self->cancelasync = new;
-	if (new) pthread_testcancel();
+	if (new) __pthread_testcancel();
 	return 0;
 }
diff --git a/src/thread/pthread_testcancel.c b/src/thread/pthread_testcancel.c
index ba5f7c6..ee48e6d 100644
--- a/src/thread/pthread_testcancel.c
+++ b/src/thread/pthread_testcancel.c
@@ -7,7 +7,9 @@ static void dummy()
 
 weak_alias(dummy, __testcancel);
 
-void pthread_testcancel()
+void __pthread_testcancel()
 {
 	__testcancel();
 }
+
+weak_alias(__pthread_testcancel, pthread_testcancel);
diff --git a/src/thread/thrd_create.c b/src/thread/thrd_create.c
new file mode 100644
index 0000000..f72f992
--- /dev/null
+++ b/src/thread/thrd_create.c
@@ -0,0 +1,98 @@
+#define _GNU_SOURCE
+#include "pthread_impl.h"
+#include "stdio_impl.h"
+#include "libc.h"
+#include <sys/mman.h>
+#include <threads.h>
+
+void *__mmap(void *, size_t, int, int, int, off_t);
+int __munmap(void *, size_t);
+int __mprotect(void *, size_t, int);
+void __thread_enable(void);
+_Noreturn void __pthread_exit(void *);
+void *__copy_tls(unsigned char *);
+extern volatile size_t __pthread_tsd_size;
+
+_Noreturn void thrd_exit(int result) {
+	__pthread_exit((void*)(intptr_t)result);
+}
+
+static void dummy_0()
+{
+}
+weak_alias(dummy_0, __acquire_ptc);
+weak_alias(dummy_0, __release_ptc);
+
+static int start(void *p)
+{
+	thrd_t self = p;
+	int (*start)(void*) = (int(*)(void*)) self->start;
+	thrd_exit(start(self->start_arg));
+	return 0;
+}
+
+#define ROUND(x) (((x)+PAGE_SIZE-1)&-PAGE_SIZE)
+
+int thrd_create(thrd_t *res, thrd_start_t entry, void *arg)
+{
+	int ret = -ENOMEM;
+	size_t guard = ROUND(DEFAULT_GUARD_SIZE);
+	size_t size = guard + ROUND(DEFAULT_STACK_SIZE + libc.tls_size +  __pthread_tsd_size);
+	struct pthread *self, *new;
+	unsigned char *map, *stack, *tsd, *stack_limit;
+	unsigned flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND
+		| CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS
+		| CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | CLONE_DETACHED;
+
+	if (!libc.can_do_threads) return thrd_error;
+	self = __pthread_self();
+	if (!libc.threaded) __thread_enable();
+
+	__acquire_ptc();
+
+	if (guard) {
+		map = __mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
+		if (map == MAP_FAILED) goto CLEANUP;
+		if (__mprotect(map+guard, size-guard, PROT_READ|PROT_WRITE)) {
+			__munmap(map, size);
+			goto CLEANUP;
+		}
+	} else {
+		map = __mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+		if (map == MAP_FAILED) goto CLEANUP;
+	}
+	tsd = map + size - __pthread_tsd_size;
+	stack = tsd - libc.tls_size;
+	stack_limit = map + guard;
+
+	new = __copy_tls(tsd - libc.tls_size);
+	new->map_base = map;
+	new->map_size = size;
+	new->stack = stack;
+	new->stack_size = stack - stack_limit;
+	new->start = (void *(*)(void*))entry;
+	new->start_arg = arg;
+	new->self = new;
+	new->tsd = (void *)tsd;
+	new->locale = &libc.global_locale;
+	new->unblock_cancel = self->cancel;
+	new->canary = self->canary;
+
+	a_inc(&libc.threads_minus_1);
+	ret = __clone(start, stack, flags, new, &new->tid, TP_ADJ(new), &new->tid);
+
+ CLEANUP:
+	__release_ptc();
+
+	if (ret > 0) {
+          *res = new;
+          ret = thrd_success;
+        } else if (ret == -ENOMEM) {
+          ret = thrd_nomem;
+        } else {
+          a_dec(&libc.threads_minus_1);
+          if (map) __munmap(map, size);
+          ret = thrd_error;
+	}
+        return ret;
+}
diff --git a/src/thread/thrd_current.c b/src/thread/thrd_current.c
new file mode 100644
index 0000000..1728535
--- /dev/null
+++ b/src/thread/thrd_current.c
@@ -0,0 +1,11 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+/* Not all arch have __pthread_self as a symbol. On some this results
+   in some magic. So this "call" to __pthread_self is not necessary a
+   tail call. In particular, other than the appearance, thrd_current
+   can not be implemented as a weak symbol. */
+pthread_t thrd_current()
+{
+	return __pthread_self();
+}
diff --git a/src/thread/thrd_detach.c b/src/thread/thrd_detach.c
new file mode 100644
index 0000000..dd29308
--- /dev/null
+++ b/src/thread/thrd_detach.c
@@ -0,0 +1,17 @@
+#include "pthread_impl.h"
+#include <threads.h>
+#include <sys/mman.h>
+
+int thrd_detach(thrd_t t)
+{
+	/* Cannot detach a thread that's already exiting */
+	if (a_swap(t->exitlock, 1)){
+		int tmp;
+		while ((tmp = t->tid)) __timedwait(&t->tid, tmp, 0, 0, 0, 0, 0);
+		if (t->map_base) munmap(t->map_base, t->map_size);
+	} else {
+		t->detached = 2;
+		__unlock(t->exitlock);
+	}
+	return thrd_success;
+}
diff --git a/src/thread/thrd_equal.c b/src/thread/thrd_equal.c
new file mode 100644
index 0000000..ac49a44
--- /dev/null
+++ b/src/thread/thrd_equal.c
@@ -0,0 +1,6 @@
+#include <threads.h>
+
+int (thrd_equal)(thrd_t a, thrd_t b)
+{
+	return a==b;
+}
diff --git a/src/thread/thrd_join.c b/src/thread/thrd_join.c
new file mode 100644
index 0000000..0446975
--- /dev/null
+++ b/src/thread/thrd_join.c
@@ -0,0 +1,14 @@
+#include "pthread_impl.h"
+#include <sys/mman.h>
+#include <threads.h>
+
+/* C11 threads cannot be canceled, so there is no need for a
+   cancelation function pointer, here. */
+int thrd_join(thrd_t t, int *res)
+{
+	int tmp;
+	while ((tmp = t->tid)) __timedwait(&t->tid, tmp, 0, 0, 0, 0, 0);
+	if (res) *res = (int)(intptr_t)t->result;
+	if (t->map_base) munmap(t->map_base, t->map_size);
+	return thrd_success;
+}
diff --git a/src/thread/tss_create.c b/src/thread/tss_create.c
new file mode 100644
index 0000000..2338d59
--- /dev/null
+++ b/src/thread/tss_create.c
@@ -0,0 +1,97 @@
+#include "pthread_impl.h"
+#include <threads.h>
+
+/* future version of C might prescribe a minimum number of tss */
+#define __TSS_MAX_MIN 64
+
+#ifndef TSS_MAX
+# define TSS_MAX __TSS_MAX_MIN
+#endif
+
+#define __LOW_BIT(X) ((X+0U) & -(X+0U))
+
+#define STRINGIFY_(X) #X
+#define STRINGIFY(X) STRINGIFY_(X)
+#define WARN__(X) _Pragma(#X)
+#define WARN_(X) WARN__(message (X))
+#define WARN(X) WARN_(X)
+
+#if (TSS_MAX) < __TSS_MAX_MIN
+WARN("TSS_MAX " STRINGIFY(TSS_MAX) " replaced by minimal value " STRINGIFY(__TSS_MAX_MIN))
+# undef TSS_MAX
+# define TSS_MAX __TSS_MAX_MIN
+#endif
+
+#if (TSS_MAX) != __LOW_BIT(TSS_MAX)
+# error TSS_MAX must be a power of 2
+#endif
+
+enum { tss_max = (TSS_MAX), };
+
+static tss_dtor_t dtors[tss_max];
+
+static void nodtor(void *dummy)
+{
+}
+
+/* This should use _Thread_local which must exist for C11 threads.
+   Since musl might get compiled with pre-C11 options we use gcc's
+   __thread extension, instead. */
+static __thread void * data[tss_max];
+/* Thread local data is accounted for with a usage counter */
+static __thread tss_t used;
+
+int tss_create(tss_t *tss, tss_dtor_t dtor)
+{
+	unsigned start = (uintptr_t)&tss / 16 % tss_max;
+	if (!dtor) dtor = nodtor;
+
+	for (unsigned i = 0; i < tss_max; ++i) {
+		unsigned j = (start+i)%tss_max;
+		if (!a_cas_p(dtors+j, 0, (void *)dtor)) {
+			*tss = j;
+			return thrd_success;
+		}
+	}
+	*tss = -1;
+	return thrd_error;
+}
+
+void tss_delete(tss_t k)
+{
+	/* tss_delete has no failure path, so check the argument. */
+	if (k < tss_max) dtors[k] = 0;
+}
+
+int tss_set(tss_t k, void *x)
+{
+	if (k >= tss_max || !dtors[k]) return thrd_error;
+	if (data[k] != x) {
+		/* tss_set can be used to add or remove a tss. compute
+		   the difference in usage. */
+		used += (!!x - !!data[k]);
+		data[k] = x;
+	}
+	return thrd_success;
+}
+
+void * tss_get(tss_t k) {
+	return dtors[k] ? data[k] : 0;
+}
+
+
+void __tss_run_dtors(void)
+{
+	for (unsigned j=0; used && j<TSS_DTOR_ITERATIONS; j++) {
+		for (unsigned i=0; i<tss_max; i++) {
+			if (data[i]) {
+				void *tmp = data[i];
+				data[i] = 0;
+				--used;
+                                // slot i might have been deleted, check
+				if (dtors[i]) dtors[i](tmp);
+                               	if (!used) return;
+			}
+		}
+	}
+}
diff --git a/src/time/thrd_sleep.c b/src/time/thrd_sleep.c
new file mode 100644
index 0000000..6570b0c
--- /dev/null
+++ b/src/time/thrd_sleep.c
@@ -0,0 +1,31 @@
+#include <time.h>
+#include <errno.h>
+#include "syscall.h"
+#include "libc.h"
+#include "threads.h"
+
+/* Roughly __syscall already returns the right thing: 0 if all went
+   well or a negative error indication, otherwise. */
+int thrd_sleep(const struct timespec *req, struct timespec *rem)
+{
+  int ret = __syscall(SYS_nanosleep, req, rem);
+  // compile time comparison, constant propagated
+  if (EINTR != 1) {
+    switch (ret) {
+    case 0: return 0;
+      /* error described by POSIX:                                    */
+      /* EINTR  The nanosleep() function was interrupted by a signal. */
+      /* The C11 wording is:                                          */
+      /* -1 if it has been interrupted by a signal                    */
+    case -EINTR:  return -1;
+      /* error described by POSIX */
+    case -EINVAL: return -2;
+      /* described for linux */
+    case -EFAULT: return -3;
+      /* No other error returns are specified. */
+    default: return INT_MIN;
+    }
+  }
+  // potentially a tail call
+  return ret;
+}

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2014-08-13 12:34 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-11 23:58 bug in pthread_cond_broadcast Jens Gustedt
2014-08-12 16:50 ` Szabolcs Nagy
2014-08-12 17:19   ` Rich Felker
2014-08-12 18:18     ` Jens Gustedt
2014-08-12 21:20       ` Rich Felker
2014-08-12 22:50         ` Jens Gustedt
2014-08-12 23:33           ` Rich Felker
2014-08-13  4:11             ` Rich Felker
2014-08-13  0:30           ` Rich Felker
2014-08-13  7:00             ` Jens Gustedt
2014-08-13 12:34               ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).