mailing list of musl libc
 help / color / mirror / code / Atom feed
From: Rich Felker <dalias@libc.org>
To: musl@lists.openwall.com
Subject: Fixing atomic asm constraints
Date: Fri, 11 Jul 2014 18:53:26 -0400	[thread overview]
Message-ID: <20140711225326.GA13611@brightrain.aerifal.cx> (raw)

[-- Attachment #1: Type: text/plain, Size: 2340 bytes --]

It came to my attention as part of reviewing and testing the or1k port
(actually I was aware of this issue before but somehow missed it for
powerpc) that using "r"(ptr) input constraints for object modified by
inline asm is not valid, unless the asm block is volatile-qualified:

https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Volatile

However, the better way to tell the compiler that pointed-to data will
be accessed/modified is to use a type "+m"(*ptr) constraint:

https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Clobbers

Presently, we're using the latter on x86[_64] and arm, the former on
mips and microblaze, and neither on powerpc. Thus powerpc is clearly
broken. I've attached a patch (untested) that should fix powerpc and
also make the mips and microblaze versions consistent, but before
applying it, I want to make sure that using only the "+m"(*ptr)
approach (plus "memory" in the clobberlist, of course) is actually
sufficient.

My impression is that there is only one scenario where the semantics
might differ. Consider:

{
	int x;
	__asm__ __volatile__ ( ... : : "r"(&x) : "memory" );
}

vs.

{
	int x;
	__asm__ ( ... : "+m"(x) : : "memory" );
}

In the first case, the asm being volatile forces it to be emitted. In
the second case, since the address of x has not leaked and since the
asm is not "needed" (by the AST) except for possibly writing x, whose
lifetime is immediately ending, it seems like the compiler could chose
to omit the entire block, thereby also omitting any other side effects
(e.g. synchronizing memory between cores) that the asm might have.

I'm not aware of anywhere in musl where this scenario would arise
(using atomics on an object whose lifetime is ending) but perhaps
making the asm volatile everywhere is actually more correct
semantically anyway. On the other hand, if LTO caused this such
optimizations as in case 2 above to be applied for the last unlock
before freeing a reference-counted object, I think omitting the
atomics and the memory-synchronization would actually be the correct
optimizing behavior -- nothing else can see the object at this point,
so it's acceptable for the unlock to be a nop.

If we do opt for volatile asm everywhere, should we also use the
"+m"(ptr) operand form just for the sake of being explicit that the
asm accesses the atomic object?

Rich

[-- Attachment #2: asm_constraints.diff --]
[-- Type: text/plain, Size: 5945 bytes --]

diff --git a/arch/microblaze/atomic.h b/arch/microblaze/atomic.h
index 818bcc0..215db9e 100644
--- a/arch/microblaze/atomic.h
+++ b/arch/microblaze/atomic.h
@@ -22,23 +22,10 @@ static inline int a_ctz_64(uint64_t x)
 	return a_ctz_l(y);
 }
 
-static inline int a_cas_1(volatile int *p, int t, int s)
-{
-	register int tmp;
-	do {
-		__asm__ __volatile__ ("lwx %0, %1, r0"
-			: "=r"(tmp) : "r"(p) : "memory");
-		if (tmp != t) return tmp;
-		__asm__ __volatile__ ("swx %2, %1, r0 ; addic %0, r0, 0"
-			: "=r"(tmp) : "r"(p), "r"(s) : "cc", "memory");
-	} while (tmp);
-	return t;
-}
-
 static inline int a_cas(volatile int *p, int t, int s)
 {
 	register int old, tmp;
-	__asm__ __volatile__ (
+	__asm__(
 		"	addi %0, r0, 0\n"
 		"1:	lwx %0, %2, r0\n"
 		"	rsubk %1, %0, %3\n"
@@ -47,8 +34,8 @@ static inline int a_cas(volatile int *p, int t, int s)
 		"	addic %1, r0, 0\n"
 		"	bnei %1, 1b\n"
 		"1:	"
-		: "=&r"(old), "=&r"(tmp)
-		: "r"(p), "r"(t), "r"(s)
+		: "=&r"(old), "=&r"(tmp), "+m"(*p)
+		: "r"(t), "r"(s)
 		: "cc", "memory" );
 	return old;
 }
@@ -66,15 +53,15 @@ static inline long a_cas_l(volatile void *p, long t, long s)
 static inline int a_swap(volatile int *x, int v)
 {
 	register int old, tmp;
-	__asm__ __volatile__ (
+	__asm__(
 		"	addi %0, r0, 0\n"
 		"1:	lwx %0, %2, r0\n"
 		"	swx %3, %2, r0\n"
 		"	addic %1, r0, 0\n"
 		"	bnei %1, 1b\n"
 		"1:	"
-		: "=&r"(old), "=&r"(tmp)
-		: "r"(x), "r"(v)
+		: "=&r"(old), "=&r"(tmp), "+m"(*x)
+		: "r"(v)
 		: "cc", "memory" );
 	return old;
 }
@@ -82,7 +69,7 @@ static inline int a_swap(volatile int *x, int v)
 static inline int a_fetch_add(volatile int *x, int v)
 {
 	register int new, tmp;
-	__asm__ __volatile__ (
+	__asm__(
 		"	addi %0, r0, 0\n"
 		"1:	lwx %0, %2, r0\n"
 		"	addk %0, %0, %3\n"
@@ -90,8 +77,8 @@ static inline int a_fetch_add(volatile int *x, int v)
 		"	addic %1, r0, 0\n"
 		"	bnei %1, 1b\n"
 		"1:	"
-		: "=&r"(new), "=&r"(tmp)
-		: "r"(x), "r"(v)
+		: "=&r"(new), "=&r"(tmp), "+m"(*x)
+		: "r"(v)
 		: "cc", "memory" );
 	return new-v;
 }
diff --git a/arch/mips/atomic.h b/arch/mips/atomic.h
index 69dcdf4..fd623f6 100644
--- a/arch/mips/atomic.h
+++ b/arch/mips/atomic.h
@@ -25,7 +25,7 @@ static inline int a_ctz_64(uint64_t x)
 static inline int a_cas(volatile int *p, int t, int s)
 {
 	int dummy;
-	__asm__ __volatile__(
+	__asm__(
 		".set push\n"
 		".set mips2\n"
 		".set noreorder\n"
@@ -37,7 +37,7 @@ static inline int a_cas(volatile int *p, int t, int s)
 		"	nop\n"
 		"1:	\n"
 		".set pop\n"
-		: "=&r"(t), "=&r"(dummy) : "r"(p), "r"(t), "r"(s) : "memory" );
+		: "=&r"(t), "=&r"(dummy), "+m"(*p) : "r"(t), "r"(s) : "memory" );
         return t;
 }
 
@@ -55,7 +55,7 @@ static inline long a_cas_l(volatile void *p, long t, long s)
 static inline int a_swap(volatile int *x, int v)
 {
 	int old, dummy;
-	__asm__ __volatile__(
+	__asm__(
 		".set push\n"
 		".set mips2\n"
 		".set noreorder\n"
@@ -66,14 +66,14 @@ static inline int a_swap(volatile int *x, int v)
 		"	nop\n"
 		"1:	\n"
 		".set pop\n"
-		: "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : "memory" );
+		: "=&r"(old), "=&r"(dummy), "+m"(*x) : "r"(v) : "memory" );
         return old;
 }
 
 static inline int a_fetch_add(volatile int *x, int v)
 {
 	int old, dummy;
-	__asm__ __volatile__(
+	__asm__(
 		".set push\n"
 		".set mips2\n"
 		".set noreorder\n"
@@ -84,14 +84,14 @@ static inline int a_fetch_add(volatile int *x, int v)
 		"	nop\n"
 		"1:	\n"
 		".set pop\n"
-		: "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : "memory" );
+		: "=&r"(old), "=&r"(dummy), "+m"(*x) : "r"(v) : "memory" );
         return old;
 }
 
 static inline void a_inc(volatile int *x)
 {
 	int dummy;
-	__asm__ __volatile__(
+	__asm__(
 		".set push\n"
 		".set mips2\n"
 		".set noreorder\n"
@@ -102,13 +102,13 @@ static inline void a_inc(volatile int *x)
 		"	nop\n"
 		"1:	\n"
 		".set pop\n"
-		: "=&r"(dummy) : "r"(x) : "memory" );
+		: "=&r"(dummy), "+m"(*x) : : "memory" );
 }
 
 static inline void a_dec(volatile int *x)
 {
 	int dummy;
-	__asm__ __volatile__(
+	__asm__(
 		".set push\n"
 		".set mips2\n"
 		".set noreorder\n"
@@ -119,13 +119,13 @@ static inline void a_dec(volatile int *x)
 		"	nop\n"
 		"1:	\n"
 		".set pop\n"
-		: "=&r"(dummy) : "r"(x) : "memory" );
+		: "=&r"(dummy), "+m"(*x) : : "memory" );
 }
 
 static inline void a_store(volatile int *p, int x)
 {
 	int dummy;
-	__asm__ __volatile__(
+	__asm__(
 		".set push\n"
 		".set mips2\n"
 		".set noreorder\n"
@@ -136,7 +136,7 @@ static inline void a_store(volatile int *p, int x)
 		"	nop\n"
 		"1:	\n"
 		".set pop\n"
-		: "=&r"(dummy) : "r"(p), "r"(x) : "memory" );
+		: "=&r"(dummy), "+m"(*p) : "r"(x) : "memory" );
 }
 
 static inline void a_spin()
@@ -151,7 +151,7 @@ static inline void a_crash()
 static inline void a_and(volatile int *p, int v)
 {
 	int dummy;
-	__asm__ __volatile__(
+	__asm__(
 		".set push\n"
 		".set mips2\n"
 		".set noreorder\n"
@@ -162,13 +162,13 @@ static inline void a_and(volatile int *p, int v)
 		"	nop\n"
 		"1:	\n"
 		".set pop\n"
-		: "=&r"(dummy) : "r"(p), "r"(v) : "memory" );
+		: "=&r"(dummy), "+m"(*p) : "r"(v) : "memory" );
 }
 
 static inline void a_or(volatile int *p, int v)
 {
 	int dummy;
-	__asm__ __volatile__(
+	__asm__(
 		".set push\n"
 		".set mips2\n"
 		".set noreorder\n"
@@ -179,7 +179,7 @@ static inline void a_or(volatile int *p, int v)
 		"	nop\n"
 		"1:	\n"
 		".set pop\n"
-		: "=&r"(dummy) : "r"(p), "r"(v) : "memory" );
+		: "=&r"(dummy), "+m"(*p) : "r"(v) : "memory" );
 }
 
 static inline void a_or_l(volatile void *p, long v)
diff --git a/arch/powerpc/atomic.h b/arch/powerpc/atomic.h
index d52ee0c..05951a2 100644
--- a/arch/powerpc/atomic.h
+++ b/arch/powerpc/atomic.h
@@ -31,7 +31,7 @@ static inline int a_cas(volatile int *p, int t, int s)
 		"	stwcx. %3, 0, %1\n"
 		"	bne- 1b\n"
 		"1:	\n"
-		: "=&r"(t) : "r"(p), "r"(t), "r"(s) : "cc", "memory" );
+		: "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" );
         return t;
 }
 

             reply	other threads:[~2014-07-11 22:53 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-11 22:53 Rich Felker [this message]
2014-07-12 13:15 ` Szabolcs Nagy
2014-07-17 18:14   ` Rich Felker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140711225326.GA13611@brightrain.aerifal.cx \
    --to=dalias@libc.org \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).