mailing list of musl libc
 help / color / mirror / code / Atom feed
* [PATCH] remove a_ctz_l from atomic_arch.h
@ 2017-12-05  2:05 Andre McCurdy
  2017-12-15  4:41 ` Rich Felker
  0 siblings, 1 reply; 2+ messages in thread
From: Andre McCurdy @ 2017-12-05  2:05 UTC (permalink / raw)
  To: musl; +Cc: Andre McCurdy

Arch specific code should now define only a_ctz_32 and/or a_ctz_64
and a_ctz_l will always be defined generically.

Provide an ARM specific a_ctz_32 helper function for ARM architecture
versions for which it can be implemented efficiently via the "rbit"
instruction (ie all Thumb-2 capable versions of ARM v6 and above).

Provide a more optimal generic a_ctz_32 for targets which support
a_clz_32 but not a_ctz_32 (e.g. ARMv5) and define generic a_ctz_64 in
terms of a_ctz_32 to make better use of architure specific a_ctz_32.

Signed-off-by: Andre McCurdy <armccurdy@gmail.com>
---
 arch/arm/atomic_arch.h  | 12 ++++++++++++
 arch/i386/atomic_arch.h |  6 +++---
 arch/x32/atomic_arch.h  |  4 ++--
 src/internal/atomic.h   | 42 +++++++++++++++++++++++-------------------
 4 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/arch/arm/atomic_arch.h b/arch/arm/atomic_arch.h
index 6e2e3b4..62458b4 100644
--- a/arch/arm/atomic_arch.h
+++ b/arch/arm/atomic_arch.h
@@ -91,4 +91,16 @@ static inline int a_clz_32(uint32_t x)
 	return x;
 }
 
+#if __ARM_ARCH_6T2__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
+
+#define a_ctz_32 a_ctz_32
+static inline int a_ctz_32(uint32_t x)
+{
+	uint32_t xr;
+	__asm__ ("rbit %0, %1" : "=r"(xr) : "r"(x));
+	return a_clz_32(xr);
+}
+
+#endif
+
 #endif
diff --git a/arch/i386/atomic_arch.h b/arch/i386/atomic_arch.h
index 7d2a48a..047fb68 100644
--- a/arch/i386/atomic_arch.h
+++ b/arch/i386/atomic_arch.h
@@ -92,10 +92,10 @@ static inline int a_ctz_64(uint64_t x)
 	return r;
 }
 
-#define a_ctz_l a_ctz_l
-static inline int a_ctz_l(unsigned long x)
+#define a_ctz_32 a_ctz_32
+static inline int a_ctz_32(uint32_t x)
 {
-	long r;
+	int r;
 	__asm__( "bsf %1,%0" : "=r"(r) : "r"(x) );
 	return r;
 }
diff --git a/arch/x32/atomic_arch.h b/arch/x32/atomic_arch.h
index a744c29..918c2d4 100644
--- a/arch/x32/atomic_arch.h
+++ b/arch/x32/atomic_arch.h
@@ -106,8 +106,8 @@ static inline int a_ctz_64(uint64_t x)
 	return x;
 }
 
-#define a_ctz_l a_ctz_l
-static inline int a_ctz_l(unsigned long x)
+#define a_ctz_32 a_ctz_32
+static inline int a_ctz_32(uint32_t x)
 {
 	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
 	return x;
diff --git a/src/internal/atomic.h b/src/internal/atomic.h
index ab473dd..f938879 100644
--- a/src/internal/atomic.h
+++ b/src/internal/atomic.h
@@ -251,6 +251,22 @@ static inline void a_crash()
 }
 #endif
 
+#ifndef a_ctz_32
+#define a_ctz_32 a_ctz_32
+static inline int a_ctz_32(uint32_t x)
+{
+#ifdef a_clz_32
+	return 31-a_clz_32(x&-x);
+#else
+	static const char debruijn32[32] = {
+		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
+		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
+	};
+	return debruijn32[(x&-x)*0x076be629 >> 27];
+#endif
+}
+#endif
+
 #ifndef a_ctz_64
 #define a_ctz_64 a_ctz_64
 static inline int a_ctz_64(uint64_t x)
@@ -261,22 +277,23 @@ static inline int a_ctz_64(uint64_t x)
 		63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
 		51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
 	};
-	static const char debruijn32[32] = {
-		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
-		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
-	};
 	if (sizeof(long) < 8) {
 		uint32_t y = x;
 		if (!y) {
 			y = x>>32;
-			return 32 + debruijn32[(y&-y)*0x076be629 >> 27];
+			return 32 + a_ctz_32(y);
 		}
-		return debruijn32[(y&-y)*0x076be629 >> 27];
+		return a_ctz_32(y);
 	}
 	return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
 }
 #endif
 
+static inline int a_ctz_l(unsigned long x)
+{
+	return (sizeof(long) < 8) ? a_ctz_32(x) : a_ctz_64(x);
+}
+
 #ifndef a_clz_64
 #define a_clz_64 a_clz_64
 static inline int a_clz_64(uint64_t x)
@@ -298,17 +315,4 @@ static inline int a_clz_64(uint64_t x)
 }
 #endif
 
-#ifndef a_ctz_l
-#define a_ctz_l a_ctz_l
-static inline int a_ctz_l(unsigned long x)
-{
-	static const char debruijn32[32] = {
-		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
-		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
-	};
-	if (sizeof(long) == 8) return a_ctz_64(x);
-	return debruijn32[(x&-x)*0x076be629 >> 27];
-}
-#endif
-
 #endif
-- 
1.9.1



^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] remove a_ctz_l from atomic_arch.h
  2017-12-05  2:05 [PATCH] remove a_ctz_l from atomic_arch.h Andre McCurdy
@ 2017-12-15  4:41 ` Rich Felker
  0 siblings, 0 replies; 2+ messages in thread
From: Rich Felker @ 2017-12-15  4:41 UTC (permalink / raw)
  To: musl

On Mon, Dec 04, 2017 at 06:05:21PM -0800, Andre McCurdy wrote:
> Arch specific code should now define only a_ctz_32 and/or a_ctz_64
> and a_ctz_l will always be defined generically.
> 
> Provide an ARM specific a_ctz_32 helper function for ARM architecture
> versions for which it can be implemented efficiently via the "rbit"
> instruction (ie all Thumb-2 capable versions of ARM v6 and above).
> 
> Provide a more optimal generic a_ctz_32 for targets which support
> a_clz_32 but not a_ctz_32 (e.g. ARMv5) and define generic a_ctz_64 in
> terms of a_ctz_32 to make better use of architure specific a_ctz_32.
> 
> Signed-off-by: Andre McCurdy <armccurdy@gmail.com>
> ---
>  arch/arm/atomic_arch.h  | 12 ++++++++++++
>  arch/i386/atomic_arch.h |  6 +++---
>  arch/x32/atomic_arch.h  |  4 ++--
>  src/internal/atomic.h   | 42 +++++++++++++++++++++++-------------------
>  4 files changed, 40 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/arm/atomic_arch.h b/arch/arm/atomic_arch.h
> index 6e2e3b4..62458b4 100644
> --- a/arch/arm/atomic_arch.h
> +++ b/arch/arm/atomic_arch.h
> @@ -91,4 +91,16 @@ static inline int a_clz_32(uint32_t x)
>  	return x;
>  }
>  
> +#if __ARM_ARCH_6T2__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
> +
> +#define a_ctz_32 a_ctz_32
> +static inline int a_ctz_32(uint32_t x)
> +{
> +	uint32_t xr;
> +	__asm__ ("rbit %0, %1" : "=r"(xr) : "r"(x));
> +	return a_clz_32(xr);
> +}
> +
> +#endif
> +
>  #endif
> diff --git a/arch/i386/atomic_arch.h b/arch/i386/atomic_arch.h
> index 7d2a48a..047fb68 100644
> --- a/arch/i386/atomic_arch.h
> +++ b/arch/i386/atomic_arch.h
> @@ -92,10 +92,10 @@ static inline int a_ctz_64(uint64_t x)
>  	return r;
>  }
>  
> -#define a_ctz_l a_ctz_l
> -static inline int a_ctz_l(unsigned long x)
> +#define a_ctz_32 a_ctz_32
> +static inline int a_ctz_32(uint32_t x)
>  {
> -	long r;
> +	int r;
>  	__asm__( "bsf %1,%0" : "=r"(r) : "r"(x) );
>  	return r;
>  }
> diff --git a/arch/x32/atomic_arch.h b/arch/x32/atomic_arch.h
> index a744c29..918c2d4 100644
> --- a/arch/x32/atomic_arch.h
> +++ b/arch/x32/atomic_arch.h
> @@ -106,8 +106,8 @@ static inline int a_ctz_64(uint64_t x)
>  	return x;
>  }
>  
> -#define a_ctz_l a_ctz_l
> -static inline int a_ctz_l(unsigned long x)
> +#define a_ctz_32 a_ctz_32
> +static inline int a_ctz_32(uint32_t x)
>  {
>  	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
>  	return x;
> diff --git a/src/internal/atomic.h b/src/internal/atomic.h
> index ab473dd..f938879 100644
> --- a/src/internal/atomic.h
> +++ b/src/internal/atomic.h
> @@ -251,6 +251,22 @@ static inline void a_crash()
>  }
>  #endif
>  
> +#ifndef a_ctz_32
> +#define a_ctz_32 a_ctz_32
> +static inline int a_ctz_32(uint32_t x)
> +{
> +#ifdef a_clz_32
> +	return 31-a_clz_32(x&-x);
> +#else
> +	static const char debruijn32[32] = {
> +		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
> +		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
> +	};
> +	return debruijn32[(x&-x)*0x076be629 >> 27];
> +#endif
> +}
> +#endif
> +
>  #ifndef a_ctz_64
>  #define a_ctz_64 a_ctz_64
>  static inline int a_ctz_64(uint64_t x)
> @@ -261,22 +277,23 @@ static inline int a_ctz_64(uint64_t x)
>  		63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
>  		51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
>  	};
> -	static const char debruijn32[32] = {
> -		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
> -		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
> -	};
>  	if (sizeof(long) < 8) {
>  		uint32_t y = x;
>  		if (!y) {
>  			y = x>>32;
> -			return 32 + debruijn32[(y&-y)*0x076be629 >> 27];
> +			return 32 + a_ctz_32(y);
>  		}
> -		return debruijn32[(y&-y)*0x076be629 >> 27];
> +		return a_ctz_32(y);
>  	}
>  	return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
>  }
>  #endif
>  
> +static inline int a_ctz_l(unsigned long x)
> +{
> +	return (sizeof(long) < 8) ? a_ctz_32(x) : a_ctz_64(x);
> +}
> +
>  #ifndef a_clz_64
>  #define a_clz_64 a_clz_64
>  static inline int a_clz_64(uint64_t x)
> @@ -298,17 +315,4 @@ static inline int a_clz_64(uint64_t x)
>  }
>  #endif
>  
> -#ifndef a_ctz_l
> -#define a_ctz_l a_ctz_l
> -static inline int a_ctz_l(unsigned long x)
> -{
> -	static const char debruijn32[32] = {
> -		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
> -		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
> -	};
> -	if (sizeof(long) == 8) return a_ctz_64(x);
> -	return debruijn32[(x&-x)*0x076be629 >> 27];
> -}
> -#endif
> -
>  #endif
> -- 
> 1.9.1

Overall I think this looks good. I want to recheck it again before
committing, and I'd like to separate out the ARM rbit addition from
the refactoring as a separate commit, but I can do that when I merge
unless you really want to do it first.

Rich


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-12-15  4:41 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-05  2:05 [PATCH] remove a_ctz_l from atomic_arch.h Andre McCurdy
2017-12-15  4:41 ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).