mailing list of musl libc
 help / color / mirror / code / Atom feed
From: Jens Gustedt <Jens.Gustedt@inria.fr>
To: musl@lists.openwall.com
Subject: [PATCH 4/8] determine the existence of private futexes at the first thread creation
Date: Tue, 20 Jun 2017 22:35:28 +0200	[thread overview]
Message-ID: <f21ea165c38a4dfc9ac0b9a5b95f2a5622697213.1498228733.git.Jens.Gustedt@inria.fr> (raw)
In-Reply-To: <cover.1498228733.git.Jens.Gustedt@inria.fr>

The current strategie to deal with kernels that don't implement private
futexes is to

 - test if the call works with FUTEX_PRIVATE
 - fallback to a call without if it doesn't

This forces an overhead for both sides, Linux'es with and without private
futexes. For those with, it adds a superflouous branch instruction to all
calls. For those without, it add the whole call overhead of a syscall.

The idea of this patch is simple. We determine dynamically but only once
when we switch from 1 thread to multiple threads if private futexes are
implemented. This reduces the overhead to
 - one system call in total
 - one load of a global and an OR of the flags at each futex call

In addition this system call will be as short as possible, because it
futexes on a local variable where nobody is to woken, and all this is in
a context (thread creation) that is clearly dominated by a lot of other
things.

This patch also clears an inconsistency in the code that had
FUTEX_PRIVATE written out as 128 in many places, which made it really
hard to digest and to identify what was going on.
---
 src/internal/pthread_impl.h         | 11 +++++------
 src/thread/__timedwait.c            |  3 +--
 src/thread/__wait.c                 |  5 ++---
 src/thread/pthread_barrier_wait.c   |  3 +--
 src/thread/pthread_cond_timedwait.c |  3 +--
 src/thread/pthread_create.c         | 10 +++++++++-
 6 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
index e1dd4421..02929e07 100644
--- a/src/internal/pthread_impl.h
+++ b/src/internal/pthread_impl.h
@@ -126,18 +126,17 @@ void __vm_unlock(void);
 int __timedwait(volatile int *, int, clockid_t, const struct timespec *, int);
 int __timedwait_cp(volatile int *, int, clockid_t, const struct timespec *, int);
 void __wait(volatile int *, volatile int *, int, int);
+extern int __futex_private;
 static inline void __wake(volatile void *addr, int cnt, int priv)
 {
-	if (priv) priv = 128;
+	if (priv) priv = __futex_private;
 	if (cnt<0) cnt = INT_MAX;
-	__syscall(SYS_futex, addr, FUTEX_WAKE|priv, cnt) != -ENOSYS ||
-	__syscall(SYS_futex, addr, FUTEX_WAKE, cnt);
+	__syscall(SYS_futex, addr, FUTEX_WAKE|priv, cnt);
 }
 static inline void __futexwait(volatile void *addr, int val, int priv)
 {
-	if (priv) priv = 128;
-	__syscall(SYS_futex, addr, FUTEX_WAIT|priv, val) != -ENOSYS ||
-	__syscall(SYS_futex, addr, FUTEX_WAIT, val);
+	if (priv) priv = __futex_private;
+	__syscall(SYS_futex, addr, FUTEX_WAIT|priv, val);
 }
 
 void __acquire_ptc(void);
diff --git a/src/thread/__timedwait.c b/src/thread/__timedwait.c
index 13d8465a..7f0f03e6 100644
--- a/src/thread/__timedwait.c
+++ b/src/thread/__timedwait.c
@@ -14,7 +14,7 @@ int __timedwait_cp(volatile int *addr, int val,
 	int r;
 	struct timespec to, *top=0;
 
-	if (priv) priv = 128;
+	if (priv) priv = __futex_private;
 
 	if (at) {
 		if (at->tv_nsec >= 1000000000UL) return EINVAL;
@@ -29,7 +29,6 @@ int __timedwait_cp(volatile int *addr, int val,
 	}
 
 	r = -__syscall_cp(SYS_futex, addr, FUTEX_WAIT|priv, val, top);
-	if (r == ENOSYS) r = -__syscall_cp(SYS_futex, addr, FUTEX_WAIT, val, top);
 	if (r != EINTR && r != ETIMEDOUT && r != ECANCELED) r = 0;
 
 	return r;
diff --git a/src/thread/__wait.c b/src/thread/__wait.c
index dc33c1a3..a3391b79 100644
--- a/src/thread/__wait.c
+++ b/src/thread/__wait.c
@@ -3,15 +3,14 @@
 void __wait(volatile int *addr, volatile int *waiters, int val, int priv)
 {
 	int spins=100;
-	if (priv) priv = FUTEX_PRIVATE;
+	if (priv) priv = __futex_private;
 	while (spins-- && (!waiters || !*waiters)) {
 		if (*addr==val) a_spin();
 		else return;
 	}
 	if (waiters) a_inc(waiters);
 	while (*addr==val) {
-		__syscall(SYS_futex, addr, FUTEX_WAIT|priv, val, 0) != -ENOSYS
-		|| __syscall(SYS_futex, addr, FUTEX_WAIT, val, 0);
+		__syscall(SYS_futex, addr, FUTEX_WAIT|priv, val, 0);
 	}
 	if (waiters) a_dec(waiters);
 }
diff --git a/src/thread/pthread_barrier_wait.c b/src/thread/pthread_barrier_wait.c
index 06b83db9..a89051e5 100644
--- a/src/thread/pthread_barrier_wait.c
+++ b/src/thread/pthread_barrier_wait.c
@@ -84,8 +84,7 @@ int pthread_barrier_wait(pthread_barrier_t *b)
 			a_spin();
 		a_inc(&inst->finished);
 		while (inst->finished == 1)
-			__syscall(SYS_futex,&inst->finished,FUTEX_WAIT|128,1,0) != -ENOSYS
-			|| __syscall(SYS_futex,&inst->finished,FUTEX_WAIT,1,0);
+			__syscall(SYS_futex,&inst->finished,FUTEX_WAIT|__futex_private,1,0);
 		return PTHREAD_BARRIER_SERIAL_THREAD;
 	}
 
diff --git a/src/thread/pthread_cond_timedwait.c b/src/thread/pthread_cond_timedwait.c
index 3526ecfb..6d92ea88 100644
--- a/src/thread/pthread_cond_timedwait.c
+++ b/src/thread/pthread_cond_timedwait.c
@@ -54,8 +54,7 @@ static inline void unlock_requeue(volatile int *l, volatile int *r, int w)
 {
 	a_store(l, 0);
 	if (w) __wake(l, 1, 1);
-	else __syscall(SYS_futex, l, FUTEX_REQUEUE|128, 0, 1, r) != -ENOSYS
-		|| __syscall(SYS_futex, l, FUTEX_REQUEUE, 0, 1, r);
+	else __syscall(SYS_futex, l, FUTEX_REQUEUE|__futex_private, 0, 1, r);
 }
 
 enum {
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index ca5c6b90..26945022 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -10,6 +10,8 @@ void *__mmap(void *, size_t, int, int, int, off_t);
 int __munmap(void *, size_t);
 int __mprotect(void *, size_t, int);
 
+int __futex_private = 0;
+
 static void dummy_0()
 {
 }
@@ -277,7 +279,13 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
 	new->unblock_cancel = self->cancel;
 	new->CANARY = self->CANARY;
 
-	a_inc(&libc.threads_minus_1);
+	if (!a_fetch_add(&libc.threads_minus_1, 1)) {
+          // As long as we only have one thread, test if this supports
+          // private futexes.
+          __lock_t dummy = { 0 };
+          if (__syscall(SYS_futex, dummy.lc, FUTEX_WAKE|FUTEX_PRIVATE, 0) != -ENOSYS)
+		__futex_private = FUTEX_PRIVATE;
+        }
 	ret = __clone((c11 ? start_c11 : start), stack, flags, new, &new->tid, TP_ADJ(new), &new->tid);
 
 	__release_ptc();


  parent reply	other threads:[~2017-06-20 20:35 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-23 14:38 [PATCH 0/8] the new __lock and follow up patches Jens Gustedt
2017-06-16  7:11 ` [PATCH 1/8] (V2) a new lock algorithm with lock value and CS counts in the same atomic int Jens Gustedt
2017-12-20 21:58   ` Rich Felker
2017-12-21 11:06     ` Jens Gustedt
2017-12-21 23:34       ` Rich Felker
2018-01-03 14:08         ` automated coding style Jens Gustedt
2018-01-11  4:41           ` Samuel Holland
2018-01-11  8:28             ` Jens Gustedt
2017-06-20 13:25 ` [PATCH 3/8] revise the definition of multiple basic locks in the code Jens Gustedt
2017-06-20 19:08 ` [PATCH 6/8] use the new lock algorithm for malloc Jens Gustedt
2017-06-23 15:01   ` Jens Gustedt
2017-06-20 19:41 ` [PATCH 2/8] consistently use the LOCK an UNLOCK macros Jens Gustedt
2017-06-20 19:44 ` [PATCH 5/8] separate the fast parts of __lock and __unlock into a .h file that may be used by other TU Jens Gustedt
2017-06-20 20:35 ` Jens Gustedt [this message]
2017-06-23 17:05   ` [PATCH 4/8] determine the existence of private futexes at the first thread creation Rich Felker
2017-06-23 17:16     ` Jens Gustedt
2017-06-23 21:48     ` Jens Gustedt
2017-06-23 22:08       ` Rich Felker
2017-06-23 23:42         ` Jens Gustedt
2017-06-24  0:53           ` Rich Felker
2017-06-24  8:00             ` Jens Gustedt
2017-06-24 10:12               ` flag 128 Jens Gustedt
2017-06-24 22:28                 ` Rich Felker
2017-06-24 22:23   ` [PATCH 4/8] determine the existence of private futexes at the first thread creation Rich Felker
2017-06-22 21:17 ` [PATCH 7/8] implement __unlock_requeue Jens Gustedt
2017-06-22 21:42 ` [PATCH 8/8] implement the local lock for conditions with __lock & Co Jens Gustedt
2017-06-23 14:57 ` [PATCH 0/8] the new __lock and follow up patches Jens Gustedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f21ea165c38a4dfc9ac0b9a5b95f2a5622697213.1498228733.git.Jens.Gustedt@inria.fr \
    --to=jens.gustedt@inria.fr \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).