[PATCH] powerpc64le: Add single instruction math functions

mailing list of musl libc
 help / color / mirror / code / Atom feed

* [PATCH] powerpc64le: Add single instruction math functions
@ 2017-06-21 14:53 David Edelsohn
  2017-06-23 19:35 ` Rich Felker
  0 siblings, 1 reply; 23+ messages in thread
From: David Edelsohn @ 2017-06-21 14:53 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 271 bytes --]

The following two patches are a start at single instruction math
functions for PowerPC64 architecture.  Although PPC64LE Linux and
ELFv2 ABI require Power8 as the minimum architecture, I have added
guards that fallback to C code for earlier architectures.

Thanks, David

[-- Attachment #2: 0001-Add-single-instruction-math-functions-for-Power8.patch --]
[-- Type: text/x-patch, Size: 8010 bytes --]

From 4eb34b23ba5c9cd932e2ee2fd27fb1f3a6e2f4b0 Mon Sep 17 00:00:00 2001
From: David Edelsohn <dje.gcc@gmail.com>
Date: Fri, 9 Jun 2017 15:40:40 +0000
Subject: [PATCH 1/2] Add single instruction math functions for Power8

---
 src/math/powerpc64/ceil.c   |  7 +++++++
 src/math/powerpc64/ceilf.c  |  7 +++++++
 src/math/powerpc64/fabs.c   |  7 +++++++
 src/math/powerpc64/fabsf.c  |  7 +++++++
 src/math/powerpc64/floor.c  |  7 +++++++
 src/math/powerpc64/floorf.c |  7 +++++++
 src/math/powerpc64/fma.c    |  7 +++++++
 src/math/powerpc64/fmaf.c   |  7 +++++++
 src/math/powerpc64/fmax.c   |  7 +++++++
 src/math/powerpc64/fmaxf.c  |  7 +++++++
 src/math/powerpc64/fmin.c   |  7 +++++++
 src/math/powerpc64/fminf.c  |  7 +++++++
 src/math/powerpc64/lrint.c  | 10 ++++++++++
 src/math/powerpc64/lrintf.c | 10 ++++++++++
 src/math/powerpc64/round.c  |  7 +++++++
 src/math/powerpc64/roundf.c |  7 +++++++
 src/math/powerpc64/sqrt.c   |  7 +++++++
 src/math/powerpc64/sqrtf.c  |  7 +++++++
 src/math/powerpc64/trunc.c  |  7 +++++++
 src/math/powerpc64/truncf.c |  7 +++++++
 20 files changed, 146 insertions(+)
 create mode 100644 src/math/powerpc64/ceil.c
 create mode 100644 src/math/powerpc64/ceilf.c
 create mode 100644 src/math/powerpc64/fabs.c
 create mode 100644 src/math/powerpc64/fabsf.c
 create mode 100644 src/math/powerpc64/floor.c
 create mode 100644 src/math/powerpc64/floorf.c
 create mode 100644 src/math/powerpc64/fma.c
 create mode 100644 src/math/powerpc64/fmaf.c
 create mode 100644 src/math/powerpc64/fmax.c
 create mode 100644 src/math/powerpc64/fmaxf.c
 create mode 100644 src/math/powerpc64/fmin.c
 create mode 100644 src/math/powerpc64/fminf.c
 create mode 100644 src/math/powerpc64/lrint.c
 create mode 100644 src/math/powerpc64/lrintf.c
 create mode 100644 src/math/powerpc64/round.c
 create mode 100644 src/math/powerpc64/roundf.c
 create mode 100644 src/math/powerpc64/sqrt.c
 create mode 100644 src/math/powerpc64/sqrtf.c
 create mode 100644 src/math/powerpc64/trunc.c
 create mode 100644 src/math/powerpc64/truncf.c

diff --git a/src/math/powerpc64/ceil.c b/src/math/powerpc64/ceil.c
new file mode 100644
index 0000000..b2e6d35
--- /dev/null
+++ b/src/math/powerpc64/ceil.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double ceil(double x)
+{
+	__asm__ ("frip %0, %1" : "=d"(x) : "d"(x));
+	return x;
+}
diff --git a/src/math/powerpc64/ceilf.c b/src/math/powerpc64/ceilf.c
new file mode 100644
index 0000000..97d69c7
--- /dev/null
+++ b/src/math/powerpc64/ceilf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float ceilf(float x)
+{
+	__asm__ ("frip %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/powerpc64/fabs.c b/src/math/powerpc64/fabs.c
new file mode 100644
index 0000000..6123c75
--- /dev/null
+++ b/src/math/powerpc64/fabs.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fabs(double x)
+{
+	__asm__ ("fabs %0, %1" : "=d"(x) : "d"(x));
+	return x;
+}
diff --git a/src/math/powerpc64/fabsf.c b/src/math/powerpc64/fabsf.c
new file mode 100644
index 0000000..e9e4564
--- /dev/null
+++ b/src/math/powerpc64/fabsf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fabsf(float x)
+{
+	__asm__ ("fabs %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/powerpc64/floor.c b/src/math/powerpc64/floor.c
new file mode 100644
index 0000000..d40ba65
--- /dev/null
+++ b/src/math/powerpc64/floor.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double floor(double x)
+{
+	__asm__ ("frim %0, %1" : "=d"(x) : "d"(x));
+	return x;
+}
diff --git a/src/math/powerpc64/floorf.c b/src/math/powerpc64/floorf.c
new file mode 100644
index 0000000..34ea5ee
--- /dev/null
+++ b/src/math/powerpc64/floorf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float floorf(float x)
+{
+	__asm__ ("frim %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/powerpc64/fma.c b/src/math/powerpc64/fma.c
new file mode 100644
index 0000000..5aebd1a
--- /dev/null
+++ b/src/math/powerpc64/fma.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fma(double x, double y, double z)
+{
+	__asm__ ("fmadd %0, %1, %2, %3" : "=d"(x) : "d"(x), "d"(y), "d"(z));
+	return x;
+}
diff --git a/src/math/powerpc64/fmaf.c b/src/math/powerpc64/fmaf.c
new file mode 100644
index 0000000..c678fef
--- /dev/null
+++ b/src/math/powerpc64/fmaf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fmaf(float x, float y, float z)
+{
+	__asm__ ("fmadds %0, %1, %2, %3" : "=f"(x) : "f"(x), "f"(y), "f"(z));
+	return x;
+}
diff --git a/src/math/powerpc64/fmax.c b/src/math/powerpc64/fmax.c
new file mode 100644
index 0000000..a9c507e
--- /dev/null
+++ b/src/math/powerpc64/fmax.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fmax(double x, double y)
+{
+	__asm__ ("xsmaxdp %x0, %x1, %x2" : "=ws"(x) : "ws"(x), "ws"(y));
+	return x;
+}
diff --git a/src/math/powerpc64/fmaxf.c b/src/math/powerpc64/fmaxf.c
new file mode 100644
index 0000000..f2dbd4b
--- /dev/null
+++ b/src/math/powerpc64/fmaxf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fmaxf(float x, float y)
+{
+	__asm__ ("xsmaxdp %x0, %x1, %x2" : "=ww"(x) : "ww"(x), "ww"(y));
+	return x;
+}
diff --git a/src/math/powerpc64/fmin.c b/src/math/powerpc64/fmin.c
new file mode 100644
index 0000000..62504d6
--- /dev/null
+++ b/src/math/powerpc64/fmin.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fmin(double x, double y)
+{
+	__asm__ ("xsmindp %x0, %x1, %x2" : "=ws"(x) : "ws"(x), "ws"(y));
+	return x;
+}
diff --git a/src/math/powerpc64/fminf.c b/src/math/powerpc64/fminf.c
new file mode 100644
index 0000000..4d19262
--- /dev/null
+++ b/src/math/powerpc64/fminf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fminf(float x, float y)
+{
+	__asm__ ("xsmindp %x0, %x1, %x2" : "=ww"(x) : "ww"(x), "ww"(y));
+	return x;
+}
diff --git a/src/math/powerpc64/lrint.c b/src/math/powerpc64/lrint.c
new file mode 100644
index 0000000..3e5269d
--- /dev/null
+++ b/src/math/powerpc64/lrint.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+long lrint(double x)
+{
+	long n;
+	__asm__ (
+		"fctid %1, %1\n"
+		"mfvsrd %0, %1\n" : "=r"(n), "+d"(x));
+	return n;
+}
diff --git a/src/math/powerpc64/lrintf.c b/src/math/powerpc64/lrintf.c
new file mode 100644
index 0000000..2db1a5e
--- /dev/null
+++ b/src/math/powerpc64/lrintf.c
@@ -0,0 +1,10 @@
+#include <math.h>
+
+long lrintf(float x)
+{
+	long n;
+	__asm__ (
+		"fctid %1, %1\n"
+		"mfvsrd %0, %1\n" : "=r"(n), "+f"(x));
+	return n;
+}
diff --git a/src/math/powerpc64/round.c b/src/math/powerpc64/round.c
new file mode 100644
index 0000000..ea396d9
--- /dev/null
+++ b/src/math/powerpc64/round.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double round(double x)
+{
+	__asm__ ("frin %0, %1" : "=d"(x) : "d"(x));
+	return x;
+}
diff --git a/src/math/powerpc64/roundf.c b/src/math/powerpc64/roundf.c
new file mode 100644
index 0000000..15c3439
--- /dev/null
+++ b/src/math/powerpc64/roundf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float roundf(float x)
+{
+	__asm__ ("frin %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/powerpc64/sqrt.c b/src/math/powerpc64/sqrt.c
new file mode 100644
index 0000000..13bb98d
--- /dev/null
+++ b/src/math/powerpc64/sqrt.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double sqrt(double x)
+{
+	__asm__ ("fsqrt %0, %1" : "=d"(x) : "d"(x));
+	return x;
+}
diff --git a/src/math/powerpc64/sqrtf.c b/src/math/powerpc64/sqrtf.c
new file mode 100644
index 0000000..b6ecb10
--- /dev/null
+++ b/src/math/powerpc64/sqrtf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float sqrtf(float x)
+{
+	__asm__ ("fsqrts %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/powerpc64/trunc.c b/src/math/powerpc64/trunc.c
new file mode 100644
index 0000000..abd3048
--- /dev/null
+++ b/src/math/powerpc64/trunc.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double trunc(double x)
+{
+	__asm__ ("friz %0, %1" : "=d"(x) : "d"(x));
+	return x;
+}
diff --git a/src/math/powerpc64/truncf.c b/src/math/powerpc64/truncf.c
new file mode 100644
index 0000000..3cd0e84
--- /dev/null
+++ b/src/math/powerpc64/truncf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float truncf(float x)
+{
+	__asm__ ("friz %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
-- 
1.8.3.1


[-- Attachment #3: 0002-Add-lround-implementations.patch --]
[-- Type: text/x-patch, Size: 7211 bytes --]

From f15e7f41ac8c557516503343f8284f25e954dd9d Mon Sep 17 00:00:00 2001
From: David Edelsohn <dje.gcc@gmail.com>
Date: Wed, 21 Jun 2017 14:32:34 +0000
Subject: [PATCH 2/2] Add lround implementations. Protect implementations with
 fallback for older architectures.

---
 src/math/powerpc64/ceil.c    |  8 ++++++++
 src/math/powerpc64/ceilf.c   |  8 ++++++++
 src/math/powerpc64/floor.c   |  8 ++++++++
 src/math/powerpc64/floorf.c  |  8 ++++++++
 src/math/powerpc64/fmax.c    |  8 ++++++++
 src/math/powerpc64/fmaxf.c   |  8 ++++++++
 src/math/powerpc64/fmin.c    |  8 ++++++++
 src/math/powerpc64/fminf.c   |  8 ++++++++
 src/math/powerpc64/lrint.c   | 12 +++++++++---
 src/math/powerpc64/lrintf.c  | 12 +++++++++---
 src/math/powerpc64/lround.c  | 18 ++++++++++++++++++
 src/math/powerpc64/lroundf.c | 18 ++++++++++++++++++
 src/math/powerpc64/round.c   |  8 ++++++++
 src/math/powerpc64/roundf.c  |  8 ++++++++
 src/math/powerpc64/trunc.c   |  8 ++++++++
 src/math/powerpc64/truncf.c  |  8 ++++++++
 16 files changed, 150 insertions(+), 6 deletions(-)
 create mode 100644 src/math/powerpc64/lround.c
 create mode 100644 src/math/powerpc64/lroundf.c

diff --git a/src/math/powerpc64/ceil.c b/src/math/powerpc64/ceil.c
index b2e6d35..4b01133 100644
--- a/src/math/powerpc64/ceil.c
+++ b/src/math/powerpc64/ceil.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#ifdef _ARCH_PWR5X
+
 double ceil(double x)
 {
 	__asm__ ("frip %0, %1" : "=d"(x) : "d"(x));
 	return x;
 }
+
+#else
+
+#include "../ceil.c"
+
+#endif
diff --git a/src/math/powerpc64/ceilf.c b/src/math/powerpc64/ceilf.c
index 97d69c7..59ba396 100644
--- a/src/math/powerpc64/ceilf.c
+++ b/src/math/powerpc64/ceilf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#ifdef _ARCH_PWR5X
+
 float ceilf(float x)
 {
 	__asm__ ("frip %0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../ceilf.c"
+
+#endif
diff --git a/src/math/powerpc64/floor.c b/src/math/powerpc64/floor.c
index d40ba65..4e68044 100644
--- a/src/math/powerpc64/floor.c
+++ b/src/math/powerpc64/floor.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#ifdef _ARCH_PWR5X
+
 double floor(double x)
 {
 	__asm__ ("frim %0, %1" : "=d"(x) : "d"(x));
 	return x;
 }
+
+#else
+
+#include "../floor.c"
+
+#endif
diff --git a/src/math/powerpc64/floorf.c b/src/math/powerpc64/floorf.c
index 34ea5ee..e1031ef 100644
--- a/src/math/powerpc64/floorf.c
+++ b/src/math/powerpc64/floorf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#ifdef _ARCH_PWR5X
+
 float floorf(float x)
 {
 	__asm__ ("frim %0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../floorf.c"
+
+#endif
diff --git a/src/math/powerpc64/fmax.c b/src/math/powerpc64/fmax.c
index a9c507e..992df7f 100644
--- a/src/math/powerpc64/fmax.c
+++ b/src/math/powerpc64/fmax.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#ifdef __VSX__
+
 double fmax(double x, double y)
 {
 	__asm__ ("xsmaxdp %x0, %x1, %x2" : "=ws"(x) : "ws"(x), "ws"(y));
 	return x;
 }
+
+#else
+
+#include "../fmax.c"
+
+#endif
diff --git a/src/math/powerpc64/fmaxf.c b/src/math/powerpc64/fmaxf.c
index f2dbd4b..345a234 100644
--- a/src/math/powerpc64/fmaxf.c
+++ b/src/math/powerpc64/fmaxf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#ifdef __VSX__
+
 float fmaxf(float x, float y)
 {
 	__asm__ ("xsmaxdp %x0, %x1, %x2" : "=ww"(x) : "ww"(x), "ww"(y));
 	return x;
 }
+
+#else
+
+#include "../fmaxf.c"
+
+#endif
diff --git a/src/math/powerpc64/fmin.c b/src/math/powerpc64/fmin.c
index 62504d6..adf71ba 100644
--- a/src/math/powerpc64/fmin.c
+++ b/src/math/powerpc64/fmin.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#ifdef __VSX__
+
 double fmin(double x, double y)
 {
 	__asm__ ("xsmindp %x0, %x1, %x2" : "=ws"(x) : "ws"(x), "ws"(y));
 	return x;
 }
+
+#else
+
+#include "../fmin.c"
+
+#endif
diff --git a/src/math/powerpc64/fminf.c b/src/math/powerpc64/fminf.c
index 4d19262..faf0e47 100644
--- a/src/math/powerpc64/fminf.c
+++ b/src/math/powerpc64/fminf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#ifdef __VSX__
+
 float fminf(float x, float y)
 {
 	__asm__ ("xsmindp %x0, %x1, %x2" : "=ww"(x) : "ww"(x), "ww"(y));
 	return x;
 }
+
+#else
+
+#include "../fminf.c"
+
+#endif
diff --git a/src/math/powerpc64/lrint.c b/src/math/powerpc64/lrint.c
index 3e5269d..4e4b2e0 100644
--- a/src/math/powerpc64/lrint.c
+++ b/src/math/powerpc64/lrint.c
@@ -1,10 +1,16 @@
 #include <math.h>
 
+#ifdef _ARCH_PWR5X
+
 long lrint(double x)
 {
 	long n;
-	__asm__ (
-		"fctid %1, %1\n"
-		"mfvsrd %0, %1\n" : "=r"(n), "+d"(x));
+	__asm__ ("fctid %0, %1" : "=d"(n) : "d"(x));
 	return n;
 }
+
+#else
+
+#include "../lrint.c"
+
+#endif
diff --git a/src/math/powerpc64/lrintf.c b/src/math/powerpc64/lrintf.c
index 2db1a5e..9070fc0 100644
--- a/src/math/powerpc64/lrintf.c
+++ b/src/math/powerpc64/lrintf.c
@@ -1,10 +1,16 @@
 #include <math.h>
 
+#ifdef _ARCH_PWR5X
+
 long lrintf(float x)
 {
 	long n;
-	__asm__ (
-		"fctid %1, %1\n"
-		"mfvsrd %0, %1\n" : "=r"(n), "+f"(x));
+	__asm__ ("fctid %0, %1" : "=d"(n) : "f"(x));
 	return n;
 }
+
+#else
+
+#include "../lrintf.c"
+
+#endif
diff --git a/src/math/powerpc64/lround.c b/src/math/powerpc64/lround.c
new file mode 100644
index 0000000..ee4d114
--- /dev/null
+++ b/src/math/powerpc64/lround.c
@@ -0,0 +1,18 @@
+#include <math.h>
+
+#ifdef __VSX__
+
+long lround(double x)
+{
+	long n;
+	__asm__ (
+		"xsrdpi %1, %1\n"
+		"fctid %0, %1\n" : "=d"(n), "+d"(x));
+	return n;
+}
+
+#else
+
+#include "../lround.c"
+
+#endif
diff --git a/src/math/powerpc64/lroundf.c b/src/math/powerpc64/lroundf.c
new file mode 100644
index 0000000..033094f
--- /dev/null
+++ b/src/math/powerpc64/lroundf.c
@@ -0,0 +1,18 @@
+#include <math.h>
+
+#ifdef __VSX__
+
+long lroundf(float x)
+{
+	long n;
+	__asm__ (
+		"xsrdpi %1, %1\n"
+		"fctid %0, %1\n" : "=d"(n), "+f"(x));
+	return n;
+}
+
+#else
+
+#include "../lroundf.c"
+
+#endif
diff --git a/src/math/powerpc64/round.c b/src/math/powerpc64/round.c
index ea396d9..4b9318e 100644
--- a/src/math/powerpc64/round.c
+++ b/src/math/powerpc64/round.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#ifdef _ARCH_PWR5X
+
 double round(double x)
 {
 	__asm__ ("frin %0, %1" : "=d"(x) : "d"(x));
 	return x;
 }
+
+#else
+
+#include "../round.c"
+
+#endif
diff --git a/src/math/powerpc64/roundf.c b/src/math/powerpc64/roundf.c
index 15c3439..ae93f99 100644
--- a/src/math/powerpc64/roundf.c
+++ b/src/math/powerpc64/roundf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#ifdef _ARCH_PWR5X
+
 float roundf(float x)
 {
 	__asm__ ("frin %0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../roundf.c"
+
+#endif
diff --git a/src/math/powerpc64/trunc.c b/src/math/powerpc64/trunc.c
index abd3048..5791854 100644
--- a/src/math/powerpc64/trunc.c
+++ b/src/math/powerpc64/trunc.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#ifdef _ARCH_PWR5X
+
 double trunc(double x)
 {
 	__asm__ ("friz %0, %1" : "=d"(x) : "d"(x));
 	return x;
 }
+
+#else
+
+#include "../trunc.c"
+
+#endif
diff --git a/src/math/powerpc64/truncf.c b/src/math/powerpc64/truncf.c
index 3cd0e84..94e638f 100644
--- a/src/math/powerpc64/truncf.c
+++ b/src/math/powerpc64/truncf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#ifdef _ARCH_PWR5X
+
 float truncf(float x)
 {
 	__asm__ ("friz %0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../truncf.c"
+
+#endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-21 14:53 [PATCH] powerpc64le: Add single instruction math functions David Edelsohn
@ 2017-06-23 19:35 ` Rich Felker
  2017-06-23 19:53   ` David Edelsohn
  0 siblings, 1 reply; 23+ messages in thread
From: Rich Felker @ 2017-06-23 19:35 UTC (permalink / raw)
  To: musl

On Wed, Jun 21, 2017 at 10:53:13AM -0400, David Edelsohn wrote:
> The following two patches are a start at single instruction math
> functions for PowerPC64 architecture.  Although PPC64LE Linux and
> ELFv2 ABI require Power8 as the minimum architecture, I have added
> guards that fallback to C code for earlier architectures.

Indeed, musl uses the ELFv2 ABI (minus its gratuitous mandate of
minimum ISA level) for both little and big endian powerpc64, and I
think we have users of both (people running it on old powerbooks,
etc.).

Am I reading correctly that sqrt, fma, and fabs are available even in
the lowest powerpc64 ISA, and don't need preprocessor conditionals?

Rich

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-23 19:35 ` Rich Felker
@ 2017-06-23 19:53   ` David Edelsohn
  2017-06-24  0:46     ` A. Wilcox
  0 siblings, 1 reply; 23+ messages in thread
From: David Edelsohn @ 2017-06-23 19:53 UTC (permalink / raw)
  To: musl

On Fri, Jun 23, 2017 at 3:35 PM, Rich Felker <dalias@libc.org> wrote:
> On Wed, Jun 21, 2017 at 10:53:13AM -0400, David Edelsohn wrote:
>> The following two patches are a start at single instruction math
>> functions for PowerPC64 architecture.  Although PPC64LE Linux and
>> ELFv2 ABI require Power8 as the minimum architecture, I have added
>> guards that fallback to C code for earlier architectures.
>
> Indeed, musl uses the ELFv2 ABI (minus its gratuitous mandate of
> minimum ISA level) for both little and big endian powerpc64, and I
> think we have users of both (people running it on old powerbooks,
> etc.).
>
> Am I reading correctly that sqrt, fma, and fabs are available even in
> the lowest powerpc64 ISA, and don't need preprocessor conditionals?

fabs and fma are part of the base ISA for Power processors that
include floating point support.  fsqrt originally was optional feature
in the distant past (General Purpose group of optional instructions),
but is required in the ISA for Power processors.

Thanks, David


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-23 19:53   ` David Edelsohn
@ 2017-06-24  0:46     ` A. Wilcox
  2017-06-24  0:55       ` Rich Felker
  2017-06-24  3:05       ` David Edelsohn
  0 siblings, 2 replies; 23+ messages in thread
From: A. Wilcox @ 2017-06-24  0:46 UTC (permalink / raw)
  To: musl

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256

On 23/06/17 14:53, David Edelsohn wrote:
> On Fri, Jun 23, 2017 at 3:35 PM, Rich Felker <dalias@libc.org> 
> wrote:
>> On Wed, Jun 21, 2017 at 10:53:13AM -0400, David Edelsohn wrote:
>>> The following two patches are a start at single instruction 
>>> math functions for PowerPC64 architecture.  Although PPC64LE 
>>> Linux and ELFv2 ABI require Power8 as the minimum
>>> architecture, I have added guards that fallback to C code for
>>> earlier architectures.
>> 
>> Indeed, musl uses the ELFv2 ABI (minus its gratuitous mandate of
>>  minimum ISA level) for both little and big endian powerpc64,
>> and I think we have users of both (people running it on old 
>> powerbooks, etc.).
>> 
>> Am I reading correctly that sqrt, fma, and fabs are available 
>> even in the lowest powerpc64 ISA, and don't need preprocessor 
>> conditionals?
> 
> fabs and fma are part of the base ISA for Power processors that 
> include floating point support.  fsqrt originally was optional 
> feature in the distant past (General Purpose group of optional 
> instructions), but is required in the ISA for Power processors.
> 
> Thanks, David
> 

Chiming in as one of the heavy users/developers involved with musl/ppc.

I can confirm FSQRT exists on the oldest PPC64 chip I have - a 970FX
from 2003 - but it is indeed optional. (I didn't bother checking the
actual IBM chips because they're all on the higher end.) The Linux
kernel actually has support for emulating the instruction on PowerPC
chips where it wasn't implemented. See arch/powerpc/math-emu/fsqrt.c
in the kernel tree.

It depends on CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED (or
CONFIG_MATH_EMULATION_FULL), but most (all?) distros that ship PPC
kernels have that knob turned on, as far as I can tell.

So this should be safe. The worst case scenario is that distros would
need to twiddle a config knob in the kernel.

All the best,
- --arw

- -- 
A. Wilcox (awilfox)
Project Lead, Adélie Linux
http://adelielinux.org
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2

iQIcBAEBCAAGBQJZTbZrAAoJEMspy1GSK50UPQIQAJS//JXGNUSjHtlOsK2ngxTz
0lOFPDSMTJHmTTXVGJIfoM/Rw0NtAKO6I9+eBbi08eN+Bmh6swRDK+/WQeIft8xx
Ayyi0fCNFhrEFLCc/M+SE5cwSym2gD7o0XyjNGhi5OM4rqOCybHUzTk5KW2Yxoyr
MUG7tqFgXCX8LOkWauGDkAmr9A6NjCN6GZazR7ubrsqId7KRxPGUtZRNZhfl+/cQ
sR5TcTF1yy1Ify6D0wyHIJlIdQPE65i+L+lAr9FGQSNl/NdOQA56hRiijCLwhMNJ
GNSGS6xPMWX0I7bV/gas15CRmJZyNC5sc+5Gm61/wToHgsclYVWJNJ52NXfSEWCA
LzEBfZ+6xoZgKQqBDumGEPSRoaRp8sYTTv5mgmlMBs76mOvjyd8j1p9gYrfzFoPx
Th1EX/CAlgUq84f2Nx1MyDq47ukeJKjWEGfsJe3+yUHYYpENYIKYgvKfx01UGkqR
vx7Cshtz6zjhadruxWAkQ3oqEJwJOfbBXKCITUMuUxFLHK47Ij4yJ/VZaAwiasLn
9zcTEtsq+Iu4xj/drNNPzVm2KbHTDfcKaQynowvuOx9KgyOpv+UprDXr2Koi5SCD
5+jS26QzcflGOF6WLvY+pKbDO3l1GPRHc57MhPcjJVVdYWE2NoVbcq+BhYw9ljTq
KIiCkOVyagFRViWyrzvO
=eogy
-----END PGP SIGNATURE-----


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-24  0:46     ` A. Wilcox
@ 2017-06-24  0:55       ` Rich Felker
  2017-06-24  3:05       ` David Edelsohn
  1 sibling, 0 replies; 23+ messages in thread
From: Rich Felker @ 2017-06-24  0:55 UTC (permalink / raw)
  To: musl

On Fri, Jun 23, 2017 at 07:46:38PM -0500, A. Wilcox wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA256
> 
> On 23/06/17 14:53, David Edelsohn wrote:
> > On Fri, Jun 23, 2017 at 3:35 PM, Rich Felker <dalias@libc.org> 
> > wrote:
> >> On Wed, Jun 21, 2017 at 10:53:13AM -0400, David Edelsohn wrote:
> >>> The following two patches are a start at single instruction 
> >>> math functions for PowerPC64 architecture.  Although PPC64LE 
> >>> Linux and ELFv2 ABI require Power8 as the minimum
> >>> architecture, I have added guards that fallback to C code for
> >>> earlier architectures.
> >> 
> >> Indeed, musl uses the ELFv2 ABI (minus its gratuitous mandate of
> >>  minimum ISA level) for both little and big endian powerpc64,
> >> and I think we have users of both (people running it on old 
> >> powerbooks, etc.).
> >> 
> >> Am I reading correctly that sqrt, fma, and fabs are available 
> >> even in the lowest powerpc64 ISA, and don't need preprocessor 
> >> conditionals?
> > 
> > fabs and fma are part of the base ISA for Power processors that 
> > include floating point support.  fsqrt originally was optional 
> > feature in the distant past (General Purpose group of optional 
> > instructions), but is required in the ISA for Power processors.
> > 
> > Thanks, David
> > 
> 
> Chiming in as one of the heavy users/developers involved with musl/ppc.
> 
> I can confirm FSQRT exists on the oldest PPC64 chip I have - a 970FX
> from 2003 - but it is indeed optional. (I didn't bother checking the
> actual IBM chips because they're all on the higher end.) The Linux
> kernel actually has support for emulating the instruction on PowerPC
> chips where it wasn't implemented. See arch/powerpc/math-emu/fsqrt.c
> in the kernel tree.
> 
> It depends on CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED (or
> CONFIG_MATH_EMULATION_FULL), but most (all?) distros that ship PPC
> kernels have that knob turned on, as far as I can tell.
> 
> So this should be safe. The worst case scenario is that distros would
> need to twiddle a config knob in the kernel.
> 
> All the best,

Thanks for the feedback. If it ends up being problematic, but gcc has
a way to tell if -march is for a model with or without it, please feel
free to submit a patch to make the use conditional.

Rich


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-24  0:46     ` A. Wilcox
  2017-06-24  0:55       ` Rich Felker
@ 2017-06-24  3:05       ` David Edelsohn
  2017-06-24  3:32         ` Rich Felker
  2017-06-24  3:38         ` A. Wilcox
  1 sibling, 2 replies; 23+ messages in thread
From: David Edelsohn @ 2017-06-24  3:05 UTC (permalink / raw)
  To: musl

Note that I only added the optimizations to the "powerpc64" port, not
the "powerpc" port.  The powerpc64 port only support PPC64LE Linux,
which does not include PPC970.  The comments about emulation are not
relevant to the "powerpc64" port.

If someone wants to copy the support to "powerpc" and add the
additional tests, that's fine.

Thanks, David


On Fri, Jun 23, 2017 at 8:46 PM, A. Wilcox <awilfox@adelielinux.org> wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA256
>
> On 23/06/17 14:53, David Edelsohn wrote:
>> On Fri, Jun 23, 2017 at 3:35 PM, Rich Felker <dalias@libc.org>
>> wrote:
>>> On Wed, Jun 21, 2017 at 10:53:13AM -0400, David Edelsohn wrote:
>>>> The following two patches are a start at single instruction
>>>> math functions for PowerPC64 architecture.  Although PPC64LE
>>>> Linux and ELFv2 ABI require Power8 as the minimum
>>>> architecture, I have added guards that fallback to C code for
>>>> earlier architectures.
>>>
>>> Indeed, musl uses the ELFv2 ABI (minus its gratuitous mandate of
>>>  minimum ISA level) for both little and big endian powerpc64,
>>> and I think we have users of both (people running it on old
>>> powerbooks, etc.).
>>>
>>> Am I reading correctly that sqrt, fma, and fabs are available
>>> even in the lowest powerpc64 ISA, and don't need preprocessor
>>> conditionals?
>>
>> fabs and fma are part of the base ISA for Power processors that
>> include floating point support.  fsqrt originally was optional
>> feature in the distant past (General Purpose group of optional
>> instructions), but is required in the ISA for Power processors.
>>
>> Thanks, David
>>
>
> Chiming in as one of the heavy users/developers involved with musl/ppc.
>
> I can confirm FSQRT exists on the oldest PPC64 chip I have - a 970FX
> from 2003 - but it is indeed optional. (I didn't bother checking the
> actual IBM chips because they're all on the higher end.) The Linux
> kernel actually has support for emulating the instruction on PowerPC
> chips where it wasn't implemented. See arch/powerpc/math-emu/fsqrt.c
> in the kernel tree.
>
> It depends on CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED (or
> CONFIG_MATH_EMULATION_FULL), but most (all?) distros that ship PPC
> kernels have that knob turned on, as far as I can tell.
>
> So this should be safe. The worst case scenario is that distros would
> need to twiddle a config knob in the kernel.
>
> All the best,
> - --arw
>
> - --
> A. Wilcox (awilfox)
> Project Lead, Adélie Linux
> http://adelielinux.org
> -----BEGIN PGP SIGNATURE-----
> Version: GnuPG v2
>
> iQIcBAEBCAAGBQJZTbZrAAoJEMspy1GSK50UPQIQAJS//JXGNUSjHtlOsK2ngxTz
> 0lOFPDSMTJHmTTXVGJIfoM/Rw0NtAKO6I9+eBbi08eN+Bmh6swRDK+/WQeIft8xx
> Ayyi0fCNFhrEFLCc/M+SE5cwSym2gD7o0XyjNGhi5OM4rqOCybHUzTk5KW2Yxoyr
> MUG7tqFgXCX8LOkWauGDkAmr9A6NjCN6GZazR7ubrsqId7KRxPGUtZRNZhfl+/cQ
> sR5TcTF1yy1Ify6D0wyHIJlIdQPE65i+L+lAr9FGQSNl/NdOQA56hRiijCLwhMNJ
> GNSGS6xPMWX0I7bV/gas15CRmJZyNC5sc+5Gm61/wToHgsclYVWJNJ52NXfSEWCA
> LzEBfZ+6xoZgKQqBDumGEPSRoaRp8sYTTv5mgmlMBs76mOvjyd8j1p9gYrfzFoPx
> Th1EX/CAlgUq84f2Nx1MyDq47ukeJKjWEGfsJe3+yUHYYpENYIKYgvKfx01UGkqR
> vx7Cshtz6zjhadruxWAkQ3oqEJwJOfbBXKCITUMuUxFLHK47Ij4yJ/VZaAwiasLn
> 9zcTEtsq+Iu4xj/drNNPzVm2KbHTDfcKaQynowvuOx9KgyOpv+UprDXr2Koi5SCD
> 5+jS26QzcflGOF6WLvY+pKbDO3l1GPRHc57MhPcjJVVdYWE2NoVbcq+BhYw9ljTq
> KIiCkOVyagFRViWyrzvO
> =eogy
> -----END PGP SIGNATURE-----


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-24  3:05       ` David Edelsohn
@ 2017-06-24  3:32         ` Rich Felker
  2017-06-24  3:38         ` A. Wilcox
  1 sibling, 0 replies; 23+ messages in thread
From: Rich Felker @ 2017-06-24  3:32 UTC (permalink / raw)
  To: musl

On Fri, Jun 23, 2017 at 11:05:08PM -0400, David Edelsohn wrote:
> Note that I only added the optimizations to the "powerpc64" port, not
> the "powerpc" port.  The powerpc64 port only support PPC64LE Linux,
> which does not include PPC970.  The comments about emulation are not
> relevant to the "powerpc64" port.
> 
> If someone wants to copy the support to "powerpc" and add the
> additional tests, that's fine.

The musl powerpc64 arch is for both little and big endian powerpc64
linux, and supports ppc970. It uses the "elfv2" abi for both, but does
not impose any mimimum isa level limit like the official elfv2 abi
docs mandate. Since we did not have any existing support for the
legacy abi traditionally used on big endian, and since we needed
something without double-double, the obvious clean choice was to use
the new abi for both, but there was (and is) interest in running on
older hardware than power8.

Rich


> On Fri, Jun 23, 2017 at 8:46 PM, A. Wilcox <awilfox@adelielinux.org> wrote:
> > -----BEGIN PGP SIGNED MESSAGE-----
> > Hash: SHA256
> >
> > On 23/06/17 14:53, David Edelsohn wrote:
> >> On Fri, Jun 23, 2017 at 3:35 PM, Rich Felker <dalias@libc.org>
> >> wrote:
> >>> On Wed, Jun 21, 2017 at 10:53:13AM -0400, David Edelsohn wrote:
> >>>> The following two patches are a start at single instruction
> >>>> math functions for PowerPC64 architecture.  Although PPC64LE
> >>>> Linux and ELFv2 ABI require Power8 as the minimum
> >>>> architecture, I have added guards that fallback to C code for
> >>>> earlier architectures.
> >>>
> >>> Indeed, musl uses the ELFv2 ABI (minus its gratuitous mandate of
> >>>  minimum ISA level) for both little and big endian powerpc64,
> >>> and I think we have users of both (people running it on old
> >>> powerbooks, etc.).
> >>>
> >>> Am I reading correctly that sqrt, fma, and fabs are available
> >>> even in the lowest powerpc64 ISA, and don't need preprocessor
> >>> conditionals?
> >>
> >> fabs and fma are part of the base ISA for Power processors that
> >> include floating point support.  fsqrt originally was optional
> >> feature in the distant past (General Purpose group of optional
> >> instructions), but is required in the ISA for Power processors.
> >>
> >> Thanks, David
> >>
> >
> > Chiming in as one of the heavy users/developers involved with musl/ppc.
> >
> > I can confirm FSQRT exists on the oldest PPC64 chip I have - a 970FX
> > from 2003 - but it is indeed optional. (I didn't bother checking the
> > actual IBM chips because they're all on the higher end.) The Linux
> > kernel actually has support for emulating the instruction on PowerPC
> > chips where it wasn't implemented. See arch/powerpc/math-emu/fsqrt.c
> > in the kernel tree.
> >
> > It depends on CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED (or
> > CONFIG_MATH_EMULATION_FULL), but most (all?) distros that ship PPC
> > kernels have that knob turned on, as far as I can tell.
> >
> > So this should be safe. The worst case scenario is that distros would
> > need to twiddle a config knob in the kernel.
> >
> > All the best,
> > - --arw
> >
> > - --
> > A. Wilcox (awilfox)
> > Project Lead, Adélie Linux
> > http://adelielinux.org
> > -----BEGIN PGP SIGNATURE-----
> > Version: GnuPG v2
> >
> > iQIcBAEBCAAGBQJZTbZrAAoJEMspy1GSK50UPQIQAJS//JXGNUSjHtlOsK2ngxTz
> > 0lOFPDSMTJHmTTXVGJIfoM/Rw0NtAKO6I9+eBbi08eN+Bmh6swRDK+/WQeIft8xx
> > Ayyi0fCNFhrEFLCc/M+SE5cwSym2gD7o0XyjNGhi5OM4rqOCybHUzTk5KW2Yxoyr
> > MUG7tqFgXCX8LOkWauGDkAmr9A6NjCN6GZazR7ubrsqId7KRxPGUtZRNZhfl+/cQ
> > sR5TcTF1yy1Ify6D0wyHIJlIdQPE65i+L+lAr9FGQSNl/NdOQA56hRiijCLwhMNJ
> > GNSGS6xPMWX0I7bV/gas15CRmJZyNC5sc+5Gm61/wToHgsclYVWJNJ52NXfSEWCA
> > LzEBfZ+6xoZgKQqBDumGEPSRoaRp8sYTTv5mgmlMBs76mOvjyd8j1p9gYrfzFoPx
> > Th1EX/CAlgUq84f2Nx1MyDq47ukeJKjWEGfsJe3+yUHYYpENYIKYgvKfx01UGkqR
> > vx7Cshtz6zjhadruxWAkQ3oqEJwJOfbBXKCITUMuUxFLHK47Ij4yJ/VZaAwiasLn
> > 9zcTEtsq+Iu4xj/drNNPzVm2KbHTDfcKaQynowvuOx9KgyOpv+UprDXr2Koi5SCD
> > 5+jS26QzcflGOF6WLvY+pKbDO3l1GPRHc57MhPcjJVVdYWE2NoVbcq+BhYw9ljTq
> > KIiCkOVyagFRViWyrzvO
> > =eogy
> > -----END PGP SIGNATURE-----


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-24  3:05       ` David Edelsohn
  2017-06-24  3:32         ` Rich Felker
@ 2017-06-24  3:38         ` A. Wilcox
  2017-06-24 20:53           ` David Edelsohn
  1 sibling, 1 reply; 23+ messages in thread
From: A. Wilcox @ 2017-06-24  3:38 UTC (permalink / raw)
  To: musl

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256

On 23/06/17 22:05, David Edelsohn wrote:
> Note that I only added the optimizations to the "powerpc64" port,
> not the "powerpc" port.  The powerpc64 port only support PPC64LE
> Linux, which does not include PPC970.  The comments about emulation
> are not relevant to the "powerpc64" port.

On 23/06/17 14:35, Rich Felker wrote:
> Indeed, musl uses the ELFv2 ABI (minus its gratuitous mandate of 
> minimum ISA level) for both little and big endian powerpc64, and I
> think we have users of both (people running it on old powerbooks,
> etc.).

These two statements contradict each other.  Also, I have made a very
minimal big-endian build of musl for the ppc64 architecture, but I
haven't had any time to test it.

There was never a 64-bit PowerPC laptop that I am aware; the highest
spec PowerPC laptop would have been a Daystar with a 32-bit MPC7448.
However, IBM, Apple, Tyan, and a few other manufacturers have released
big-endian 64-bit PowerPC hardware in both workstation and server form.

Let us also not forget that LoPAPR[1] defines (at R1-2.7-1 in my copy,
version 1.1 dated 24 March 2016) that Power Architecture platforms
"must by default operate with Big-Endian addressing".

Are you aware of any little-endian specific code in musl/powerpc64?  I
assume that libc-test would probably catch most of it when I am able
to run it, but until then, it would be nice to know if there is
anything I need to work on in the meantime.

Best,
- --arw

[1]: https://da.gd/LoPAPR

- -- 
A. Wilcox (awilfox)
Project Lead, Adélie Linux
http://adelielinux.org
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2

iQIcBAEBCAAGBQJZTd6cAAoJEMspy1GSK50UAbIP/A7IxGHo1Zc3hEvWKPm233C2
zyk72OGdpJJ0RdLywOq2xR5wVLZSNAahnGVQyX1XN/SHx+QQxlI/VJ+mNJLdi641
UyUW3YkCasgci6+IwarO0xkFX3t1Rs4sMslNWSpSP0Uvffj/TsTkt1JtWzTwoB5y
0z9e+92jSx2HmSDay/TOt5fZghFENHWvpMdf+lJIT+nSKY7dVRZ1le896KR0c/RX
+XIv9i7ilKqkBACvOU8T68+muU2uTD/JuuqmXlSwY/6V7gmm0Pi4F+YLYxwvkPFZ
VwHvo2Dnei+zvcjEyFGYGjQ4qSXAjYCtYDLjFZLPmabXr3FQ8nJhGQ6hRGIwP6ut
hXbZB1JJYefGoyqpMmjYcB2WBfN79McLK431nelru9rfFe+emi7CGCU7w7JTa22+
4cRHhoHEf+q2OtfC9SB2C4ps2wpazbFU+E9nUYnO6QiTFpY9D3/hNPOno4+SvLwo
3A6ZO43paS4IW/Qaq7BAwD7pyGMSKErwqDlqdKcghkDBA7o54vZb+PANXaYwJ/hh
sFxvGmZZvfsGu0FSBcbDzxZn3YjttSVCUzUewLsvOG8l1dVgfPuZ6EuekovHrwh1
OR1GmBd8lTa3C6KeBsZCXmOijYYhZ+NNpkSS8Pv/mPmyxyPTexwwPIH1NbFKEWsC
9Sx4LY7rH2Ww9MdodN4A
=K5um
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-24  3:38         ` A. Wilcox
@ 2017-06-24 20:53           ` David Edelsohn
  2017-06-24 22:44             ` Rich Felker
  2017-06-24 23:57             ` A. Wilcox
  0 siblings, 2 replies; 23+ messages in thread
From: David Edelsohn @ 2017-06-24 20:53 UTC (permalink / raw)
  To: musl

On Fri, Jun 23, 2017 at 11:38 PM, A. Wilcox <awilfox@adelielinux.org> wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA256
>
> On 23/06/17 22:05, David Edelsohn wrote:
>> Note that I only added the optimizations to the "powerpc64" port,
>> not the "powerpc" port.  The powerpc64 port only support PPC64LE
>> Linux, which does not include PPC970.  The comments about emulation
>> are not relevant to the "powerpc64" port.
>
> On 23/06/17 14:35, Rich Felker wrote:
>> Indeed, musl uses the ELFv2 ABI (minus its gratuitous mandate of
>> minimum ISA level) for both little and big endian powerpc64, and I
>> think we have users of both (people running it on old powerbooks,
>> etc.).

The ABIs are not endian-specific.  ELFv1 can operate as little endian
(and did for a brief period as a transition), ELFv2 can operate as big
endian. PowerPC64 Linux only will be 64 bit little endian going
forward, although the existing big endian, ELFv1 Linux distributions
will continue to be supported.  There is no infrastructure or
distribution into which a PPC64BE ELFv2 libc can be installed.

A PPC64 big endian ELFv2 port is an interesting exercise, but does not
match or interact with any other Linux distributions or toolchains.
All of the PPC64 BE Linux ports are based on ELFv1 and have no
intention of changing.

I am not exactly certain what FreeBSD is planning.

>
> These two statements contradict each other.  Also, I have made a very
> minimal big-endian build of musl for the ppc64 architecture, but I
> haven't had any time to test it.
>
> There was never a 64-bit PowerPC laptop that I am aware; the highest
> spec PowerPC laptop would have been a Daystar with a 32-bit MPC7448.
> However, IBM, Apple, Tyan, and a few other manufacturers have released
> big-endian 64-bit PowerPC hardware in both workstation and server form.
>
> Let us also not forget that LoPAPR[1] defines (at R1-2.7-1 in my copy,
> version 1.1 dated 24 March 2016) that Power Architecture platforms
> "must by default operate with Big-Endian addressing".

I think that you're inferring too much into this statement.  The
platform has to interoperate with big-endian addressing, especially
for firmware that assumes big endian, but that does not mean that
operating systems must support big endian user space applications.

>
> Are you aware of any little-endian specific code in musl/powerpc64?  I
> assume that libc-test would probably catch most of it when I am able
> to run it, but until then, it would be nice to know if there is
> anything I need to work on in the meantime.

The PPC64 port of Musl does not assume little endian addressing, but
Musl currently only supports ELFv2.  All of the toolchains and
operating systems that support ELFv2 are little endian.  All of the
big endian toolchains and operating systems are designed for ELFv1.
There is no overlap.

I added the macro tests for portability and completeness.

The only ports of Musl that will function on existing, supported,
big-endian PowerPC systems are the 32 bit "powerpc" port and an
unimplemented PPC64 BE ELFv1 port.

Thanks, David

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-24 20:53           ` David Edelsohn
@ 2017-06-24 22:44             ` Rich Felker
  2017-06-24 23:57             ` A. Wilcox
  1 sibling, 0 replies; 23+ messages in thread
From: Rich Felker @ 2017-06-24 22:44 UTC (permalink / raw)
  To: musl

On Sat, Jun 24, 2017 at 04:53:58PM -0400, David Edelsohn wrote:
> On Fri, Jun 23, 2017 at 11:38 PM, A. Wilcox <awilfox@adelielinux.org> wrote:
> > -----BEGIN PGP SIGNED MESSAGE-----
> > Hash: SHA256
> >
> > On 23/06/17 22:05, David Edelsohn wrote:
> >> Note that I only added the optimizations to the "powerpc64" port,
> >> not the "powerpc" port.  The powerpc64 port only support PPC64LE
> >> Linux, which does not include PPC970.  The comments about emulation
> >> are not relevant to the "powerpc64" port.
> >
> > On 23/06/17 14:35, Rich Felker wrote:
> >> Indeed, musl uses the ELFv2 ABI (minus its gratuitous mandate of
> >> minimum ISA level) for both little and big endian powerpc64, and I
> >> think we have users of both (people running it on old powerbooks,
> >> etc.).
> 
> The ABIs are not endian-specific.  ELFv1 can operate as little endian
> (and did for a brief period as a transition), ELFv2 can operate as big
> endian. PowerPC64 Linux only will be 64 bit little endian going
> forward, although the existing big endian, ELFv1 Linux distributions
> will continue to be supported.  There is no infrastructure or
> distribution into which a PPC64BE ELFv2 libc can be installed.

Any distro that builds from source and uses musl should work. I
suspect Sabotage can do it. Rob Landley's minimal mkroot builds should
also get you a system you can boot.

> A PPC64 big endian ELFv2 port is an interesting exercise, but does not
> match or interact with any other Linux distributions or toolchains.

That's already the case if you're using musl anyway, and part of why
the decision made sense.

> All of the PPC64 BE Linux ports are based on ELFv1 and have no
> intention of changing.

I think the kernel simply supports both ELFv1 and ELFv2 userspace
anyway and doesn't care what binaries you run, though I didn't check.
The only place where it could care anyway is the function pointers
used for signal handlers.

> > Are you aware of any little-endian specific code in musl/powerpc64?  I
> > assume that libc-test would probably catch most of it when I am able
> > to run it, but until then, it would be nice to know if there is
> > anything I need to work on in the meantime.
> 
> The PPC64 port of Musl does not assume little endian addressing, but
> Musl currently only supports ELFv2.  All of the toolchains and
> operating systems that support ELFv2 are little endian.  All of the
> big endian toolchains and operating systems are designed for ELFv1.
> There is no overlap.

git clone https://github.com/richfelker/musl-cross-make.git \
&& cd musl-cross-make && make TARGET=powerpc64-linux-musl

There are no patches needed for the ABI to work, just --with-abi=elfv2
which musl-cross-make always uses for ppc64. qemu-ppc64 (user level)
runs the binaries it produces just fine. I don't have actual hardware
or a kernel for system emulation handy but I don't see any reason to
expect it not to work.

> I added the macro tests for portability and completeness.
> 
> The only ports of Musl that will function on existing, supported,
> big-endian PowerPC systems are the 32 bit "powerpc" port and an
> unimplemented PPC64 BE ELFv1 port.

I guess "supported" is the key word here, in particular whose
perspective you're asking that something be supported from.

Rich


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-24 20:53           ` David Edelsohn
  2017-06-24 22:44             ` Rich Felker
@ 2017-06-24 23:57             ` A. Wilcox
  2017-06-25  0:10               ` Rich Felker
  2017-06-25  3:24               ` David Edelsohn
  1 sibling, 2 replies; 23+ messages in thread
From: A. Wilcox @ 2017-06-24 23:57 UTC (permalink / raw)
  To: musl

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256

On 24/06/17 15:53, David Edelsohn wrote:
> On Fri, Jun 23, 2017 at 11:38 PM, A. Wilcox 
> <awilfox@adelielinux.org> wrote: The ABIs are not endian-specific. 
> ELFv1 can operate as little endian (and did for a brief period as
> a transition), ELFv2 can operate as big endian. PowerPC64 Linux
> only will be 64 bit little endian going forward, although the
> existing big endian, ELFv1 Linux distributions will continue to be 
> supported.  There is no infrastructure or distribution into which
> a PPC64BE ELFv2 libc can be installed.

Is there a technical reason why PowerPC64 Linux will only be
little-endian?  Do the Power8 / Power9 chips not support BE mode for
Linux officially?

I have an IRC log where I was chatting with an IBM engineer and they
said there is no reason P8/P9 can't run Linux in BE mode.  The only
reason to run LE mode is for better GPU support since most of the
Radeons run their framebuffers in that mode (so you'd have much better
performance, esp when using them for GPGPU).  But that could have been
then, and not now, since that conversation was almost a year ago.

> A PPC64 big endian ELFv2 port is an interesting exercise, but does 
> not match or interact with any other Linux distributions or 
> toolchains. All of the PPC64 BE Linux ports are based on ELFv1 and 
> have no intention of changing.

Except those based on musl?  I mean, we at Adélie haven't /shipped/
anything PPC64 yet, but I have very good reasons for that (and will
get to them later in this email).

> I am not exactly certain what FreeBSD is planning.

https://svnweb.freebsd.org/base?view=revision&revision=291668

FreeBSD supports either ABI in either endianness, but defaults to
ELFv2 for both BE and LE with any compiler that supports it.

>> Let us also not forget that LoPAPR[1] defines (at R1-2.7-1 in my 
>> copy, version 1.1 dated 24 March 2016) that Power Architecture 
>> platforms "must by default operate with Big-Endian addressing".
> 
> I think that you're inferring too much into this statement.  The 
> platform has to interoperate with big-endian addressing, especially
> for firmware that assumes big endian, but that does not mean that
> operating systems must support big endian user space applications.

To me, the standard is completely clear that the operating system
(this being the kernel, i.e. Linux itself, and base userland, i.e.
musl) needs to support both to be compliant with the LoPAPR standard;
at least §2.7, §4.2.3, §B.5.1.

Of course, each distro can pick what it wants to support, what tool
chain to use, and so on.  But the base system needs to support both.

>> Are you aware of any little-endian specific code in 
>> musl/powerpc64?  I assume that libc-test would probably catch 
>> most of it when I am able to run it, but until then, it would be 
>> nice to know if there is anything I need to work on in the 
>> meantime.
> 
> The PPC64 port of Musl does not assume little endian addressing, 
> but Musl currently only supports ELFv2.  All of the toolchains and
>  operating systems that support ELFv2 are little endian.  All of 
> the big endian toolchains and operating systems are designed for 
> ELFv1. There is no overlap.

Except Adélie, Sabotage, and anyone who is creating their own
environment without using a distribution.  Or are you saying that GCC
assumes LE with ELFv2?

That is the primary reason I haven't shipped any PPC64 image yet.  In
addition to the usual badness of porting an entire distro worth of
packages to a platform nobody has really used yet (had a similar time
with musl on MIPS64 and 32-bit PowerPC), I'm a bit uneasy on the
toolchain itself being able to understand what Rich has said.  Since
ELFv2 says that Power8 is the minimum ISA, gcc can do whatever it
wants, and I'm not sure if -mcpu=power6 (specific lower ISA) or
- -mcpu=powerpc64 (generic) will affect its code output when it sees
- -mabi=elfv2.  So I'm going to need to put any PPC64 image through a
much more rigorous test than I did any other platform.

> I added the macro tests for portability and completeness.
> 
> The only ports of Musl that will function on existing, supported, 
> big-endian PowerPC systems are the 32 bit "powerpc" port and an 
> unimplemented PPC64 BE ELFv1 port.

Except Rich specifically said that he did not want an ELFv1 port for
64-bit PowerPC when I asked him, so I don't think that's going to happen
.

Again, do you have a _technical_ reason that I cannot spend next
weekend making a PPC64 BE image using musl + ELFv2 ABI?  Or is this
political / community in nature?

I apologise if my words seem strong, but I do not take this lightly.
We have a number of users clamouring for us to save their older PPC64
hardware from unmaintained AIX, unmaintained Debian, or in some cases
ten-or-more year old fruity OSes.  I obviously do not expect ABI
compatibility with decades-old non-Linux Unixes.  However, if there
needs to be an ELFv1 port for a technical reason, I may have to
investigate maintaining the port myself.

Regards,
- --arw

- -- 
A. Wilcox (awilfox)
Project Lead, Adélie Linux
http://adelielinux.org
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2

iQIcBAEBCAAGBQJZTvxTAAoJEMspy1GSK50UtNQP/3lceX5Aq9+uV15PkM54x6tr
N0dlZOn5MGoPmyiudHy1Sj7olotwMTcGJXSJNNjGYibN72U6DcuxBCaorL1eGwjd
n8dOUgua6vS2NurpfGLRtrBlOQpd+Hr/J3wP8OjMp+yUIvyZIUpvyBWqThTosaXm
R5AP/NmjzWLJ5kc9Gv0hJv7Y3iT/5fCo3j5dx5FsZSqpE5U/fuzdEds9y6crHmaK
Wkj6Vc6ejPYTz5LwjYFEY68Unz+du+gQ/DA67MFhG6feaaEUhv1ikqP1ThtjrFCn
KOHH1tjRs3nTg+KkKR5LQbycW/39fmW9Mbk0kG5R1WQHvfqTRCff7b6HfiOpY1d9
ZFVxGyUZOSn7xSFHiY/gF5fb7hzh/r0TajDxtdN64Ap9jun/f+p+PPauWtFY5lPz
N0DRV4+P8HeudcnZZbthhejQNL3Enq2rVxEvFJP1y13WHtsmDbLNFgFld2aIBlLC
pqUjf8joh6cLMQn3+rbXAeZ4Rx1u0e7+eVhroH51EQhM24K4cqRWlwhXc48afoF8
qob0UZlT6DJ5LVSXiTPm+Ox3+4sr44490DhdJ+oNFhyfrztfg9VEi3lzFlkAPhQX
YPaX6An80HxVKuEO4YJKQPkCThuJqbNG+2McgUBm+qL/AW4OByxO3WF/yXjyc3I0
JFtCuwWth5YlO8Re0WC1
=pI9q
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-24 23:57             ` A. Wilcox
@ 2017-06-25  0:10               ` Rich Felker
  2017-06-25  1:41                 ` Rich Felker
  2017-06-29 13:49                 ` David Edelsohn
  2017-06-25  3:24               ` David Edelsohn
  1 sibling, 2 replies; 23+ messages in thread
From: Rich Felker @ 2017-06-25  0:10 UTC (permalink / raw)
  To: musl

On Sat, Jun 24, 2017 at 06:57:23PM -0500, A. Wilcox wrote:
> Except Adélie, Sabotage, and anyone who is creating their own
> environment without using a distribution.  Or are you saying that GCC
> assumes LE with ELFv2?
> 
> That is the primary reason I haven't shipped any PPC64 image yet.  In
> addition to the usual badness of porting an entire distro worth of
> packages to a platform nobody has really used yet (had a similar time
> with musl on MIPS64 and 32-bit PowerPC), I'm a bit uneasy on the
> toolchain itself being able to understand what Rich has said.  Since
> ELFv2 says that Power8 is the minimum ISA, gcc can do whatever it
> wants, and I'm not sure if -mcpu=power6 (specific lower ISA) or
> - -mcpu=powerpc64 (generic) will affect its code output when it sees
> - -mabi=elfv2.  So I'm going to need to put any PPC64 image through a
> much more rigorous test than I did any other platform.

I don't see any reason GCC would introduce a problem here. It should
always honor -march, and the default -march for the
powerpc64-linux-musl (elfv2 of course) toolchain I just built seems to
be POWER4 according to the predefined macros.

> > I added the macro tests for portability and completeness.
> > 
> > The only ports of Musl that will function on existing, supported, 
> > big-endian PowerPC systems are the 32 bit "powerpc" port and an 
> > unimplemented PPC64 BE ELFv1 port.
> 
> 
> Except Rich specifically said that he did not want an ELFv1 port for
> 64-bit PowerPC when I asked him, so I don't think that's going to happen

To clarify, my view is that it does not make sense to add a new port
that differs only in ABI, unless it's an ABI variant that's actually
necessary for reasonable support of some actual hardware (like
softfloat, fdpic for nommu, etc.). That is not the case here.

Also, note that it's not like glibc-linked elfv1 ppc64 binaries would
be safe to use with musl even if we did have such a port; at the very
least the representation of long double would have to mismatch (just
like it does on 32-bit powerpc).

> Again, do you have a _technical_ reason that I cannot spend next
> weekend making a PPC64 BE image using musl + ELFv2 ABI?  Or is this
> political / community in nature?

I think it's mostly just a misunderstanding. It may have started out
from political decisions, but I don't think people are _trying_ to be
political about it at this point.

Rich

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-25  0:10               ` Rich Felker
@ 2017-06-25  1:41                 ` Rich Felker
  2017-06-29 13:49                 ` David Edelsohn
  1 sibling, 0 replies; 23+ messages in thread
From: Rich Felker @ 2017-06-25  1:41 UTC (permalink / raw)
  To: musl

On Sat, Jun 24, 2017 at 08:10:24PM -0400, Rich Felker wrote:
> On Sat, Jun 24, 2017 at 06:57:23PM -0500, A. Wilcox wrote:
> > Except Adélie, Sabotage, and anyone who is creating their own
> > environment without using a distribution.  Or are you saying that GCC
> > assumes LE with ELFv2?
> > 
> > That is the primary reason I haven't shipped any PPC64 image yet.  In
> > addition to the usual badness of porting an entire distro worth of
> > packages to a platform nobody has really used yet (had a similar time
> > with musl on MIPS64 and 32-bit PowerPC), I'm a bit uneasy on the
> > toolchain itself being able to understand what Rich has said.  Since
> > ELFv2 says that Power8 is the minimum ISA, gcc can do whatever it
> > wants, and I'm not sure if -mcpu=power6 (specific lower ISA) or
> > - -mcpu=powerpc64 (generic) will affect its code output when it sees
> > - -mabi=elfv2.  So I'm going to need to put any PPC64 image through a
> > much more rigorous test than I did any other platform.
> 
> I don't see any reason GCC would introduce a problem here. It should
> always honor -march, and the default -march for the
> powerpc64-linux-musl (elfv2 of course) toolchain I just built seems to
> be POWER4 according to the predefined macros.

I just did a quick review (grep -r ELFv2 gcc/config/rs6000) and I
don't see anything that would cause gcc to generate code for a
different isa level than it otherwise would. The option purely
controls the calling convention and related issues.

One unfortunate thing I did discover in the process is that neither
powerpc64 ABI admits sibcall (tail call) to a function that's not
local (static or hidden/protected). This is a consequence of the ABI
GOT register being call-preserved rather than call-clobbered. This is
a common design issue (mistake IMO) affecting several archs, including
SH in the default ABI (but not in the FDPIC ABI, which made the choice
to change the GOT register to call-clobbered).

Rich


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-24 23:57             ` A. Wilcox
  2017-06-25  0:10               ` Rich Felker
@ 2017-06-25  3:24               ` David Edelsohn
  2017-06-25 14:28                 ` Rich Felker
  1 sibling, 1 reply; 23+ messages in thread
From: David Edelsohn @ 2017-06-25  3:24 UTC (permalink / raw)
  To: musl

On Sat, Jun 24, 2017 at 7:57 PM, A. Wilcox <awilfox@adelielinux.org> wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA256
>
> On 24/06/17 15:53, David Edelsohn wrote:
>> On Fri, Jun 23, 2017 at 11:38 PM, A. Wilcox
>> <awilfox@adelielinux.org> wrote: The ABIs are not endian-specific.
>> ELFv1 can operate as little endian (and did for a brief period as
>> a transition), ELFv2 can operate as big endian. PowerPC64 Linux
>> only will be 64 bit little endian going forward, although the
>> existing big endian, ELFv1 Linux distributions will continue to be
>> supported.  There is no infrastructure or distribution into which
>> a PPC64BE ELFv2 libc can be installed.
>
> Is there a technical reason why PowerPC64 Linux will only be
> little-endian?  Do the Power8 / Power9 chips not support BE mode for
> Linux officially?

The processors support operation in either endian mode.

>
> I have an IRC log where I was chatting with an IBM engineer and they
> said there is no reason P8/P9 can't run Linux in BE mode.  The only
> reason to run LE mode is for better GPU support since most of the
> Radeons run their framebuffers in that mode (so you'd have much better
> performance, esp when using them for GPGPU).  But that could have been
> then, and not now, since that conversation was almost a year ago.
>
>
>> A PPC64 big endian ELFv2 port is an interesting exercise, but does
>> not match or interact with any other Linux distributions or
>> toolchains. All of the PPC64 BE Linux ports are based on ELFv1 and
>> have no intention of changing.
>
> Except those based on musl?  I mean, we at Adélie haven't /shipped/
> anything PPC64 yet, but I have very good reasons for that (and will
> get to them later in this email).

Because the PowerPC software ecosystem is based on and designed to
those assumptions.  All of the JITs are based on that.  All of the
optimized libraries are based on that.  All of the hand-written
assembly code is based on that.

Some test ABI and endianness separately, some don't.  It definitely is
less well tested, if at all.

You can do whatever you want, but it has been difficult enough fixing
all of the poor assumptions in the entire Open Source and proprietary
source ecosystem for the change to PPC64LE ELFv2.  If you and Adelie
want to take on that challenge for PPC64BE ELFv2, great.  The
OpenPower Foundation and its members are not going to fight that
battle.

>
>> I am not exactly certain what FreeBSD is planning.
>
> https://svnweb.freebsd.org/base?view=revision&revision=291668
>
> FreeBSD supports either ABI in either endianness, but defaults to
> ELFv2 for both BE and LE with any compiler that supports it.

And this is why FreeBSD remains challenged.  As I wrote above, all of
the PPC64BE software ecosystem is written for ELFv1, so this cuts off
FreeBSD from that entire ecosystem.  Even packages in the FreeBSD
ports won't build and work correctly because the code for PPC assumes
the endianness and the ABI.

>
>>> Let us also not forget that LoPAPR[1] defines (at R1-2.7-1 in my
>>> copy, version 1.1 dated 24 March 2016) that Power Architecture
>>> platforms "must by default operate with Big-Endian addressing".
>>
>> I think that you're inferring too much into this statement.  The
>> platform has to interoperate with big-endian addressing, especially
>> for firmware that assumes big endian, but that does not mean that
>> operating systems must support big endian user space applications.
>
> To me, the standard is completely clear that the operating system
> (this being the kernel, i.e. Linux itself, and base userland, i.e.
> musl) needs to support both to be compliant with the LoPAPR standard;
> at least §2.7, §4.2.3, §B.5.1.

The sections refer to the processor and the platform, not the OSes and
user applications.  Boot and RTAS (runtime abstraction services) is
big endian.  The hypervisor may be big endian.  The OS must be able to
interact with the system -- the underlying system -- in big endian
order.  It says nothing about presenting a big endian environment to
user programs.

>
> Of course, each distro can pick what it wants to support, what tool
> chain to use, and so on.  But the base system needs to support both.
>
>
>>> Are you aware of any little-endian specific code in
>>> musl/powerpc64?  I assume that libc-test would probably catch
>>> most of it when I am able to run it, but until then, it would be
>>> nice to know if there is anything I need to work on in the
>>> meantime.
>>
>> The PPC64 port of Musl does not assume little endian addressing,
>> but Musl currently only supports ELFv2.  All of the toolchains and
>>  operating systems that support ELFv2 are little endian.  All of
>> the big endian toolchains and operating systems are designed for
>> ELFv1. There is no overlap.
>
>
> Except Adélie, Sabotage, and anyone who is creating their own
> environment without using a distribution.  Or are you saying that GCC
> assumes LE with ELFv2?

GCC doesn't assume LE = ELFv2, but not all code generation for PPC is
that flexible.

>
> That is the primary reason I haven't shipped any PPC64 image yet.  In
> addition to the usual badness of porting an entire distro worth of
> packages to a platform nobody has really used yet (had a similar time
> with musl on MIPS64 and 32-bit PowerPC), I'm a bit uneasy on the
> toolchain itself being able to understand what Rich has said.  Since
> ELFv2 says that Power8 is the minimum ISA, gcc can do whatever it
> wants, and I'm not sure if -mcpu=power6 (specific lower ISA) or
> - -mcpu=powerpc64 (generic) will affect its code output when it sees
> - -mabi=elfv2.  So I'm going to need to put any PPC64 image through a
> much more rigorous test than I did any other platform.
>
>
>> I added the macro tests for portability and completeness.
>>
>> The only ports of Musl that will function on existing, supported,
>> big-endian PowerPC systems are the 32 bit "powerpc" port and an
>> unimplemented PPC64 BE ELFv1 port.
>
>
> Except Rich specifically said that he did not want an ELFv1 port for
> 64-bit PowerPC when I asked him, so I don't think that's going to happen
> .
>
> Again, do you have a _technical_ reason that I cannot spend next
> weekend making a PPC64 BE image using musl + ELFv2 ABI?  Or is this
> political / community in nature?
>
> I apologise if my words seem strong, but I do not take this lightly.
> We have a number of users clamouring for us to save their older PPC64
> hardware from unmaintained AIX, unmaintained Debian, or in some cases
> ten-or-more year old fruity OSes.  I obviously do not expect ABI
> compatibility with decades-old non-Linux Unixes.  However, if there
> needs to be an ELFv1 port for a technical reason, I may have to
> investigate maintaining the port myself.

As I wrote above, the entire external ecosystem makes the endianness /
ABI assumption.  Golang assumes this.  OpenJDK assumes this.  ATLAS
BLAS and OpenBLAS assume this.  GMP assumes this.  PyPy assumes this.
Mono assumes this. libffi assumes this. Erlang probably assumes this.
FFMPEG, x264, libvpx assume this.  MongoDB may assume this. NVIDIA
nvcc assumes this.  Etc., etc., etc.

It's not that the packages fundamentally cannot be fixed, but the
FLOSS ecosystem is much larger, richer, complex and more
interdependent.  If one wants to create an embedded system, one can
exert control over the entire software ecosystem.  For a
Linux-compatible system, one cannot.

If you accept that some parts of the software ecosystem simply won't
build or function correctly for your system and configuration, or some
packages randomly will stop building or stop functioning correctly
after a package is updated, fine.  If you want to track it all down,
fine.  I hope that you are incorporating all of that into the price
that you are charging your customers and that they understand the
additional risk.  When you and your customers rely on the larger Linux
ecosystem, you are relying on a lot of assets and resources outside of
your control.

Thanks, David

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-25  3:24               ` David Edelsohn
@ 2017-06-25 14:28                 ` Rich Felker
  2017-06-25 14:56                   ` David Edelsohn
  2018-09-27 22:53                   ` A. Wilcox
  0 siblings, 2 replies; 23+ messages in thread
From: Rich Felker @ 2017-06-25 14:28 UTC (permalink / raw)
  To: musl

On Sat, Jun 24, 2017 at 11:24:59PM -0400, David Edelsohn wrote:
> > Except those based on musl?  I mean, we at Adélie haven't /shipped/
> > anything PPC64 yet, but I have very good reasons for that (and will
> > get to them later in this email).
> 
> Because the PowerPC software ecosystem is based on and designed to
> those assumptions.  All of the JITs are based on that.  All of the
> optimized libraries are based on that.  All of the hand-written
> assembly code is based on that.
> 
> Some test ABI and endianness separately, some don't.  It definitely is
> less well tested, if at all.
> 
> You can do whatever you want, but it has been difficult enough fixing
> all of the poor assumptions in the entire Open Source and proprietary
> source ecosystem for the change to PPC64LE ELFv2.  If you and Adelie
> want to take on that challenge for PPC64BE ELFv2, great.  The
> OpenPower Foundation and its members are not going to fight that
> battle.

I see where you're coming from, but I don't see where it's
significantly harder than fighting with and fixing software that
doesn't work with musl due to gratuitous (or sometimes moderately
reasonable) glibcisms. Having this type of ABI issue increases the
number of such cases a bit, but I don't expect it to be a significant
portion of the overall work.

> > I apologise if my words seem strong, but I do not take this lightly.
> > We have a number of users clamouring for us to save their older PPC64
> > hardware from unmaintained AIX, unmaintained Debian, or in some cases
> > ten-or-more year old fruity OSes.  I obviously do not expect ABI
> > compatibility with decades-old non-Linux Unixes.  However, if there
> > needs to be an ELFv1 port for a technical reason, I may have to
> > investigate maintaining the port myself.
> 
> As I wrote above, the entire external ecosystem makes the endianness /
> ABI assumption.  Golang assumes this.  OpenJDK assumes this.  ATLAS
> BLAS and OpenBLAS assume this.  GMP assumes this.  PyPy assumes this.
> Mono assumes this. libffi assumes this. Erlang probably assumes this.
> FFMPEG, x264, libvpx assume this.  MongoDB may assume this. NVIDIA
> nvcc assumes this.  Etc., etc., etc.

Several of these are trivially fixed with --disable-asm or similar --
at least gmp, ffmpeg, x264, and libvpx should fall in that category.
Obviously it's desirable to get the asm working to improve
performance, but it can be done incrementally. It also should be
possible to heuristically test for this kind of thing by grepping for
ppc64 asm function prologue in the sources.

Only stuff that actually does codegen (compilers, jits, etc.) has a
fundamental reason to be affected, and for the most part fixing it
should just be a matter of fixing the conditionals that look for
endianness to look for _CALL_ELF==2 where that's what they really
meant to do.

> It's not that the packages fundamentally cannot be fixed, but the
> FLOSS ecosystem is much larger, richer, complex and more
> interdependent.  If one wants to create an embedded system, one can
> exert control over the entire software ecosystem.  For a
> Linux-compatible system, one cannot.
> 
> If you accept that some parts of the software ecosystem simply won't
> build or function correctly for your system and configuration, or some
> packages randomly will stop building or stop functioning correctly
> after a package is updated, fine.

These are already risks inherent in using a musl-based system with
upstream packages that are developed on glibc and don't pay attention
to portability issues. We have a very good history of the distros
using musl making efforts to patch these kinds of things, send patchs
upstream, and educate upstreams without attacking or patronizing them.
Sometimes upstream regressions happen, but adequate testing should
catch them.

Rich

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-25 14:28                 ` Rich Felker
@ 2017-06-25 14:56                   ` David Edelsohn
  2018-09-27 22:53                   ` A. Wilcox
  1 sibling, 0 replies; 23+ messages in thread
From: David Edelsohn @ 2017-06-25 14:56 UTC (permalink / raw)
  To: musl

On Sun, Jun 25, 2017 at 10:28 AM, Rich Felker <dalias@libc.org> wrote:
> On Sat, Jun 24, 2017 at 11:24:59PM -0400, David Edelsohn wrote:
>> > Except those based on musl?  I mean, we at Adélie haven't /shipped/
>> > anything PPC64 yet, but I have very good reasons for that (and will
>> > get to them later in this email).
>>
>> Because the PowerPC software ecosystem is based on and designed to
>> those assumptions.  All of the JITs are based on that.  All of the
>> optimized libraries are based on that.  All of the hand-written
>> assembly code is based on that.
>>
>> Some test ABI and endianness separately, some don't.  It definitely is
>> less well tested, if at all.
>>
>> You can do whatever you want, but it has been difficult enough fixing
>> all of the poor assumptions in the entire Open Source and proprietary
>> source ecosystem for the change to PPC64LE ELFv2.  If you and Adelie
>> want to take on that challenge for PPC64BE ELFv2, great.  The
>> OpenPower Foundation and its members are not going to fight that
>> battle.
>
> I see where you're coming from, but I don't see where it's
> significantly harder than fighting with and fixing software that
> doesn't work with musl due to gratuitous (or sometimes moderately
> reasonable) glibcisms. Having this type of ABI issue increases the
> number of such cases a bit, but I don't expect it to be a significant
> portion of the overall work.
>
>> > I apologise if my words seem strong, but I do not take this lightly.
>> > We have a number of users clamouring for us to save their older PPC64
>> > hardware from unmaintained AIX, unmaintained Debian, or in some cases
>> > ten-or-more year old fruity OSes.  I obviously do not expect ABI
>> > compatibility with decades-old non-Linux Unixes.  However, if there
>> > needs to be an ELFv1 port for a technical reason, I may have to
>> > investigate maintaining the port myself.
>>
>> As I wrote above, the entire external ecosystem makes the endianness /
>> ABI assumption.  Golang assumes this.  OpenJDK assumes this.  ATLAS
>> BLAS and OpenBLAS assume this.  GMP assumes this.  PyPy assumes this.
>> Mono assumes this. libffi assumes this. Erlang probably assumes this.
>> FFMPEG, x264, libvpx assume this.  MongoDB may assume this. NVIDIA
>> nvcc assumes this.  Etc., etc., etc.
>
> Several of these are trivially fixed with --disable-asm or similar --
> at least gmp, ffmpeg, x264, and libvpx should fall in that category.
> Obviously it's desirable to get the asm working to improve
> performance, but it can be done incrementally. It also should be
> possible to heuristically test for this kind of thing by grepping for
> ppc64 asm function prologue in the sources.
>
> Only stuff that actually does codegen (compilers, jits, etc.) has a
> fundamental reason to be affected, and for the most part fixing it
> should just be a matter of fixing the conditionals that look for
> endianness to look for _CALL_ELF==2 where that's what they really
> meant to do.
>
>> It's not that the packages fundamentally cannot be fixed, but the
>> FLOSS ecosystem is much larger, richer, complex and more
>> interdependent.  If one wants to create an embedded system, one can
>> exert control over the entire software ecosystem.  For a
>> Linux-compatible system, one cannot.
>>
>> If you accept that some parts of the software ecosystem simply won't
>> build or function correctly for your system and configuration, or some
>> packages randomly will stop building or stop functioning correctly
>> after a package is updated, fine.
>
> These are already risks inherent in using a musl-based system with
> upstream packages that are developed on glibc and don't pay attention
> to portability issues. We have a very good history of the distros
> using musl making efforts to patch these kinds of things, send patchs
> upstream, and educate upstreams without attacking or patronizing them.
> Sometimes upstream regressions happen, but adequate testing should
> catch them.

Rich,

I am not arguing against big-endian PowerPC nor a powerpc64 BE ELFv2
port of musl nor building powerpc64 BE ELFv2 Linux.  Mr. Wilcox asked
about the technical challenges and I replied.

The companies backing OpenPower have chosen a particular direction and
they are investing resources in that direction.  The developers, most
of whom work for those companies and their partners, are working in
that direction.  Most of the paid developers will not invest a lot of
time and effort in BE functionality and support.  PowerPC already is a
small market, so further fragmentation isn't helpful.  Without
corporate backing, the creation of an alternate Linux configuration
and ecosystem is a Herculean task.

Again, I am not and never was arguing against Mr. Wilcox's plans nor
against including powerpc64 BE ELFv2 support in musl libc.  As I wrote
when I submitted the patch, I added the macro tests for completeness
and to allow future flexibility.

Thanks, David


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-25  0:10               ` Rich Felker
  2017-06-25  1:41                 ` Rich Felker
@ 2017-06-29 13:49                 ` David Edelsohn
  2017-06-29 16:05                   ` Rich Felker
  1 sibling, 1 reply; 23+ messages in thread
From: David Edelsohn @ 2017-06-29 13:49 UTC (permalink / raw)
  To: musl

On Sat, Jun 24, 2017 at 8:10 PM, Rich Felker <dalias@libc.org> wrote:
> On Sat, Jun 24, 2017 at 06:57:23PM -0500, A. Wilcox wrote:
>> Except Adélie, Sabotage, and anyone who is creating their own
>> environment without using a distribution.  Or are you saying that GCC
>> assumes LE with ELFv2?
>>
>> That is the primary reason I haven't shipped any PPC64 image yet.  In
>> addition to the usual badness of porting an entire distro worth of
>> packages to a platform nobody has really used yet (had a similar time
>> with musl on MIPS64 and 32-bit PowerPC), I'm a bit uneasy on the
>> toolchain itself being able to understand what Rich has said.  Since
>> ELFv2 says that Power8 is the minimum ISA, gcc can do whatever it
>> wants, and I'm not sure if -mcpu=power6 (specific lower ISA) or
>> - -mcpu=powerpc64 (generic) will affect its code output when it sees
>> - -mabi=elfv2.  So I'm going to need to put any PPC64 image through a
>> much more rigorous test than I did any other platform.
>
> I don't see any reason GCC would introduce a problem here. It should
> always honor -march, and the default -march for the
> powerpc64-linux-musl (elfv2 of course) toolchain I just built seems to
> be POWER4 according to the predefined macros.
>
>> > I added the macro tests for portability and completeness.
>> >
>> > The only ports of Musl that will function on existing, supported,
>> > big-endian PowerPC systems are the 32 bit "powerpc" port and an
>> > unimplemented PPC64 BE ELFv1 port.
>>
>>
>> Except Rich specifically said that he did not want an ELFv1 port for
>> 64-bit PowerPC when I asked him, so I don't think that's going to happen
>
> To clarify, my view is that it does not make sense to add a new port
> that differs only in ABI, unless it's an ABI variant that's actually
> necessary for reasonable support of some actual hardware (like
> softfloat, fdpic for nommu, etc.). That is not the case here.

A colleague of mine reminded me that ELFv2 ABI specifies POWER8 as the
minimum hardware (not little-endian).  The implementation of ELFv2 can
operate on earlier hardware, but binaries may not be forward
compatible because of VSX.  Because of the calling convention of VSX
registers in ELFv2, the stack may be corrupted if an application built
without VSX support is linked with a library that does support VSX.
One cannot mix and match musl libc built for POWER4 or PPC970 and musl
libc built for POWER7.

Thanks, David


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-29 13:49                 ` David Edelsohn
@ 2017-06-29 16:05                   ` Rich Felker
  2017-06-29 17:00                     ` David Edelsohn
  0 siblings, 1 reply; 23+ messages in thread
From: Rich Felker @ 2017-06-29 16:05 UTC (permalink / raw)
  To: musl

On Thu, Jun 29, 2017 at 09:49:34AM -0400, David Edelsohn wrote:
> On Sat, Jun 24, 2017 at 8:10 PM, Rich Felker <dalias@libc.org> wrote:
> > On Sat, Jun 24, 2017 at 06:57:23PM -0500, A. Wilcox wrote:
> >> Except Adélie, Sabotage, and anyone who is creating their own
> >> environment without using a distribution.  Or are you saying that GCC
> >> assumes LE with ELFv2?
> >>
> >> That is the primary reason I haven't shipped any PPC64 image yet.  In
> >> addition to the usual badness of porting an entire distro worth of
> >> packages to a platform nobody has really used yet (had a similar time
> >> with musl on MIPS64 and 32-bit PowerPC), I'm a bit uneasy on the
> >> toolchain itself being able to understand what Rich has said.  Since
> >> ELFv2 says that Power8 is the minimum ISA, gcc can do whatever it
> >> wants, and I'm not sure if -mcpu=power6 (specific lower ISA) or
> >> - -mcpu=powerpc64 (generic) will affect its code output when it sees
> >> - -mabi=elfv2.  So I'm going to need to put any PPC64 image through a
> >> much more rigorous test than I did any other platform.
> >
> > I don't see any reason GCC would introduce a problem here. It should
> > always honor -march, and the default -march for the
> > powerpc64-linux-musl (elfv2 of course) toolchain I just built seems to
> > be POWER4 according to the predefined macros.
> >
> >> > I added the macro tests for portability and completeness.
> >> >
> >> > The only ports of Musl that will function on existing, supported,
> >> > big-endian PowerPC systems are the 32 bit "powerpc" port and an
> >> > unimplemented PPC64 BE ELFv1 port.
> >>
> >>
> >> Except Rich specifically said that he did not want an ELFv1 port for
> >> 64-bit PowerPC when I asked him, so I don't think that's going to happen
> >
> > To clarify, my view is that it does not make sense to add a new port
> > that differs only in ABI, unless it's an ABI variant that's actually
> > necessary for reasonable support of some actual hardware (like
> > softfloat, fdpic for nommu, etc.). That is not the case here.
> 
> A colleague of mine reminded me that ELFv2 ABI specifies POWER8 as the
> minimum hardware (not little-endian).

This is a gratuitous requirement and has nothing to do with the
meaning of ELFv2 we're using (and likewise not with the gcc
--with-abi=elfv2).

> The implementation of ELFv2 can
> operate on earlier hardware, but binaries may not be forward
> compatible because of VSX.  Because of the calling convention of VSX
> registers in ELFv2, the stack may be corrupted if an application built
> without VSX support is linked with a library that does support VSX.
> One cannot mix and match musl libc built for POWER4 or PPC970 and musl
> libc built for POWER7.

I don't think this is accurate. If it is then it's a serious bug we
need to fix, and it should have been discussed at the time the port
was added...

Can you provide a citation for the usage of VSX registers in the
calling convention, and how you think that affects the stack?

Rich


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-29 16:05                   ` Rich Felker
@ 2017-06-29 17:00                     ` David Edelsohn
  2017-06-29 17:59                       ` Rich Felker
  0 siblings, 1 reply; 23+ messages in thread
From: David Edelsohn @ 2017-06-29 17:00 UTC (permalink / raw)
  To: musl

On Thu, Jun 29, 2017 at 12:05 PM, Rich Felker <dalias@libc.org> wrote:
> On Thu, Jun 29, 2017 at 09:49:34AM -0400, David Edelsohn wrote:
>> On Sat, Jun 24, 2017 at 8:10 PM, Rich Felker <dalias@libc.org> wrote:
>> > On Sat, Jun 24, 2017 at 06:57:23PM -0500, A. Wilcox wrote:
>> >> Except Adélie, Sabotage, and anyone who is creating their own
>> >> environment without using a distribution.  Or are you saying that GCC
>> >> assumes LE with ELFv2?
>> >>
>> >> That is the primary reason I haven't shipped any PPC64 image yet.  In
>> >> addition to the usual badness of porting an entire distro worth of
>> >> packages to a platform nobody has really used yet (had a similar time
>> >> with musl on MIPS64 and 32-bit PowerPC), I'm a bit uneasy on the
>> >> toolchain itself being able to understand what Rich has said.  Since
>> >> ELFv2 says that Power8 is the minimum ISA, gcc can do whatever it
>> >> wants, and I'm not sure if -mcpu=power6 (specific lower ISA) or
>> >> - -mcpu=powerpc64 (generic) will affect its code output when it sees
>> >> - -mabi=elfv2.  So I'm going to need to put any PPC64 image through a
>> >> much more rigorous test than I did any other platform.
>> >
>> > I don't see any reason GCC would introduce a problem here. It should
>> > always honor -march, and the default -march for the
>> > powerpc64-linux-musl (elfv2 of course) toolchain I just built seems to
>> > be POWER4 according to the predefined macros.
>> >
>> >> > I added the macro tests for portability and completeness.
>> >> >
>> >> > The only ports of Musl that will function on existing, supported,
>> >> > big-endian PowerPC systems are the 32 bit "powerpc" port and an
>> >> > unimplemented PPC64 BE ELFv1 port.
>> >>
>> >>
>> >> Except Rich specifically said that he did not want an ELFv1 port for
>> >> 64-bit PowerPC when I asked him, so I don't think that's going to happen
>> >
>> > To clarify, my view is that it does not make sense to add a new port
>> > that differs only in ABI, unless it's an ABI variant that's actually
>> > necessary for reasonable support of some actual hardware (like
>> > softfloat, fdpic for nommu, etc.). That is not the case here.
>>
>> A colleague of mine reminded me that ELFv2 ABI specifies POWER8 as the
>> minimum hardware (not little-endian).
>
> This is a gratuitous requirement and has nothing to do with the
> meaning of ELFv2 we're using (and likewise not with the gcc
> --with-abi=elfv2).

2.1.1. Processor Architecture

This ABI is predicated on, at a minimum, Power ISA version 2.7 and
contains additional implementation characteristics.


>
>> The implementation of ELFv2 can
>> operate on earlier hardware, but binaries may not be forward
>> compatible because of VSX.  Because of the calling convention of VSX
>> registers in ELFv2, the stack may be corrupted if an application built
>> without VSX support is linked with a library that does support VSX.
>> One cannot mix and match musl libc built for POWER4 or PPC970 and musl
>> libc built for POWER7.
>
> I don't think this is accurate. If it is then it's a serious bug we
> need to fix, and it should have been discussed at the time the port
> was added...

This is not an implementation detail in the library, it is the calling
convention in the compilers.

>
> Can you provide a citation for the usage of VSX registers in the
> calling convention, and how you think that affects the stack?

Table 2.22 Vector Register Roles in Section 2.2.1.1 Register Roles.
The definition of volatile and non-volatile registers for vector
registers affects the amount of stack allocated and the saving of
non-volatile registers.


What is the status of the PPC64LE math optimization patch?

Thanks, David


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-29 17:00                     ` David Edelsohn
@ 2017-06-29 17:59                       ` Rich Felker
  2017-06-30  1:07                         ` David Edelsohn
  0 siblings, 1 reply; 23+ messages in thread
From: Rich Felker @ 2017-06-29 17:59 UTC (permalink / raw)
  To: musl

On Thu, Jun 29, 2017 at 01:00:51PM -0400, David Edelsohn wrote:
> >> A colleague of mine reminded me that ELFv2 ABI specifies POWER8 as the
> >> minimum hardware (not little-endian).
> >
> > This is a gratuitous requirement and has nothing to do with the
> > meaning of ELFv2 we're using (and likewise not with the gcc
> > --with-abi=elfv2).
> 
> 2.1.1. Processor Architecture
> 
> This ABI is predicated on, at a minimum, Power ISA version 2.7 and
> contains additional implementation characteristics.

Yes, I understand that it's there but this "requirement" is orthogonal
to the actual interface boundaries the ABI defines. ARM's EABI has a
similar gratuitous baseline of v4t; the compiler can satisfy all the
interface boundary requirements even on v4 and probably lower if it
wants to, but nobody has implemented that.

> >> The implementation of ELFv2 can
> >> operate on earlier hardware, but binaries may not be forward
> >> compatible because of VSX.  Because of the calling convention of VSX
> >> registers in ELFv2, the stack may be corrupted if an application built
> >> without VSX support is linked with a library that does support VSX.
> >> One cannot mix and match musl libc built for POWER4 or PPC970 and musl
> >> libc built for POWER7.
> >
> > I don't think this is accurate. If it is then it's a serious bug we
> > need to fix, and it should have been discussed at the time the port
> > was added...
> 
> This is not an implementation detail in the library, it is the calling
> convention in the compilers.
> 
> >
> > Can you provide a citation for the usage of VSX registers in the
> > calling convention, and how you think that affects the stack?
> 
> Table 2.22 Vector Register Roles in Section 2.2.1.1 Register Roles.
> The definition of volatile and non-volatile registers for vector
> registers affects the amount of stack allocated and the saving of
> non-volatile registers.

Are you saying the caller has to allocate space that the callee might
use to preseve call-saved registers it wants to use, and that the
amount saved depends on ISA level? If so I'll look for the associated
logic in GCC and see what it's doing. If that's the case it should be
reserving space regardless of whether it's built for an ISA level with
the registers or not.

> What is the status of the PPC64LE math optimization patch?

It's merged, I just forgot to push. So is the s390x one. Pushing now.

Rich


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-29 17:59                       ` Rich Felker
@ 2017-06-30  1:07                         ` David Edelsohn
  2017-06-30  2:11                           ` Rich Felker
  0 siblings, 1 reply; 23+ messages in thread
From: David Edelsohn @ 2017-06-30  1:07 UTC (permalink / raw)
  To: musl

On Thu, Jun 29, 2017 at 1:59 PM, Rich Felker <dalias@libc.org> wrote:
> On Thu, Jun 29, 2017 at 01:00:51PM -0400, David Edelsohn wrote:
>> >> A colleague of mine reminded me that ELFv2 ABI specifies POWER8 as the
>> >> minimum hardware (not little-endian).
>> >
>> > This is a gratuitous requirement and has nothing to do with the
>> > meaning of ELFv2 we're using (and likewise not with the gcc
>> > --with-abi=elfv2).
>>
>> 2.1.1. Processor Architecture
>>
>> This ABI is predicated on, at a minimum, Power ISA version 2.7 and
>> contains additional implementation characteristics.
>
> Yes, I understand that it's there but this "requirement" is orthogonal
> to the actual interface boundaries the ABI defines. ARM's EABI has a
> similar gratuitous baseline of v4t; the compiler can satisfy all the
> interface boundary requirements even on v4 and probably lower if it
> wants to, but nobody has implemented that.
>
>> >> The implementation of ELFv2 can
>> >> operate on earlier hardware, but binaries may not be forward
>> >> compatible because of VSX.  Because of the calling convention of VSX
>> >> registers in ELFv2, the stack may be corrupted if an application built
>> >> without VSX support is linked with a library that does support VSX.
>> >> One cannot mix and match musl libc built for POWER4 or PPC970 and musl
>> >> libc built for POWER7.
>> >
>> > I don't think this is accurate. If it is then it's a serious bug we
>> > need to fix, and it should have been discussed at the time the port
>> > was added...
>>
>> This is not an implementation detail in the library, it is the calling
>> convention in the compilers.
>>
>> >
>> > Can you provide a citation for the usage of VSX registers in the
>> > calling convention, and how you think that affects the stack?
>>
>> Table 2.22 Vector Register Roles in Section 2.2.1.1 Register Roles.
>> The definition of volatile and non-volatile registers for vector
>> registers affects the amount of stack allocated and the saving of
>> non-volatile registers.
>
> Are you saying the caller has to allocate space that the callee might
> use to preseve call-saved registers it wants to use, and that the
> amount saved depends on ISA level? If so I'll look for the associated
> logic in GCC and see what it's doing. If that's the case it should be
> reserving space regardless of whether it's built for an ISA level with
> the registers or not.

I thought that there was a change in the volatile status of the VSR
registers.  I'm don't remember if this affected the stack frame or
stdarg.  There was some corner case.

How can the toolchain save space for registers that it doesn't know about?

- David


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-30  1:07                         ` David Edelsohn
@ 2017-06-30  2:11                           ` Rich Felker
  0 siblings, 0 replies; 23+ messages in thread
From: Rich Felker @ 2017-06-30  2:11 UTC (permalink / raw)
  To: musl

On Thu, Jun 29, 2017 at 09:07:16PM -0400, David Edelsohn wrote:
> On Thu, Jun 29, 2017 at 1:59 PM, Rich Felker <dalias@libc.org> wrote:
> > On Thu, Jun 29, 2017 at 01:00:51PM -0400, David Edelsohn wrote:
> >> >> A colleague of mine reminded me that ELFv2 ABI specifies POWER8 as the
> >> >> minimum hardware (not little-endian).
> >> >
> >> > This is a gratuitous requirement and has nothing to do with the
> >> > meaning of ELFv2 we're using (and likewise not with the gcc
> >> > --with-abi=elfv2).
> >>
> >> 2.1.1. Processor Architecture
> >>
> >> This ABI is predicated on, at a minimum, Power ISA version 2.7 and
> >> contains additional implementation characteristics.
> >
> > Yes, I understand that it's there but this "requirement" is orthogonal
> > to the actual interface boundaries the ABI defines. ARM's EABI has a
> > similar gratuitous baseline of v4t; the compiler can satisfy all the
> > interface boundary requirements even on v4 and probably lower if it
> > wants to, but nobody has implemented that.
> >
> >> >> The implementation of ELFv2 can
> >> >> operate on earlier hardware, but binaries may not be forward
> >> >> compatible because of VSX.  Because of the calling convention of VSX
> >> >> registers in ELFv2, the stack may be corrupted if an application built
> >> >> without VSX support is linked with a library that does support VSX.
> >> >> One cannot mix and match musl libc built for POWER4 or PPC970 and musl
> >> >> libc built for POWER7.
> >> >
> >> > I don't think this is accurate. If it is then it's a serious bug we
> >> > need to fix, and it should have been discussed at the time the port
> >> > was added...
> >>
> >> This is not an implementation detail in the library, it is the calling
> >> convention in the compilers.
> >>
> >> >
> >> > Can you provide a citation for the usage of VSX registers in the
> >> > calling convention, and how you think that affects the stack?
> >>
> >> Table 2.22 Vector Register Roles in Section 2.2.1.1 Register Roles.
> >> The definition of volatile and non-volatile registers for vector
> >> registers affects the amount of stack allocated and the saving of
> >> non-volatile registers.
> >
> > Are you saying the caller has to allocate space that the callee might
> > use to preseve call-saved registers it wants to use, and that the
> > amount saved depends on ISA level? If so I'll look for the associated
> > logic in GCC and see what it's doing. If that's the case it should be
> > reserving space regardless of whether it's built for an ISA level with
> > the registers or not.
> 
> I thought that there was a change in the volatile status of the VSR
> registers.  I'm don't remember if this affected the stack frame or
> stdarg.  There was some corner case.
> 
> How can the toolchain save space for registers that it doesn't know about?

Normally (on clean ABIs), the caller is not responsible for setting up
room on the stack for the callee to backup call-saved registers it may
want to use. Rather, the callee does this, and therefore it can choose
the exact amount of space for the actual registers it wants to spill.
If the caller is responsible for allocating the stack space into which
the callee might need to spill, this both wastes lots of stack space
(when the callee doesn't actually need to spill much or anything) and
introduces ABI breakage whenever new call-saved registers are added.

I suspect the PPC64 ELFv2 ABI is doing all this right. On page 44 I
found:

"For the purpose of function calls, the right half of VSX registers,
corresponding to the classic floating-point registers (that is, vsr0 -
vsr31), is volatile."

Which suggests they put some attention into not breaking things when
the caller and callee are working with different ISA levels.

Rich


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] powerpc64le: Add single instruction math functions
  2017-06-25 14:28                 ` Rich Felker
  2017-06-25 14:56                   ` David Edelsohn
@ 2018-09-27 22:53                   ` A. Wilcox
  1 sibling, 0 replies; 23+ messages in thread
From: A. Wilcox @ 2018-09-27 22:53 UTC (permalink / raw)
  To: musl

[-- Attachment #1.1: Type: text/plain, Size: 5762 bytes --]

A little update for those interested.

On 06/25/17 09:28, Rich Felker wrote:
> On Sat, Jun 24, 2017 at 11:24:59PM -0400, David Edelsohn wrote:
>>> Except those based on musl?  I mean, we at Adélie haven't /shipped/
>>> anything PPC64 yet, but I have very good reasons for that (and will
>>> get to them later in this email).

We have definitely shipped on PPC64 now.  We were actually the first
distro in history (to my knowledge) to ship musl on PPC64.

That's PPC64, as in 64-bit big endian PowerPC.

KDE, LXQt, Firefox, Thunderbird, the whole lot.  I'm using it on an iMac
G5, several G5 towers, and a Talos 2.  I actually threw out my last
x86_64 system once the Talos 2 arrived.

>> You can do whatever you want, but it has been difficult enough fixing
>> all of the poor assumptions in the entire Open Source and proprietary
>> source ecosystem for the change to PPC64LE ELFv2.  If you and Adelie
>> want to take on that challenge for PPC64BE ELFv2, great.  The
>> OpenPower Foundation and its members are not going to fight that
>> battle.

Keep talking like you speak for the Foundation...

https://twitter.com/hughhalf/status/1027109176563064833

> I see where you're coming from, but I don't see where it's
> significantly harder than fighting with and fixing software that
> doesn't work with musl due to gratuitous (or sometimes moderately
> reasonable) glibcisms. Having this type of ABI issue increases the
> number of such cases a bit, but I don't expect it to be a significant
> portion of the overall work.

Rich is correct; it hasn't been significant, and it has been very little
challenge when something is broken.

>>> I apologise if my words seem strong, but I do not take this lightly.
>>> We have a number of users clamouring for us to save their older PPC64
>>> hardware from unmaintained AIX, unmaintained Debian, or in some cases
>>> ten-or-more year old fruity OSes.  I obviously do not expect ABI
>>> compatibility with decades-old non-Linux Unixes.  However, if there
>>> needs to be an ELFv1 port for a technical reason, I may have to
>>> investigate maintaining the port myself.
>>
>> As I wrote above, the entire external ecosystem makes the endianness /
>> ABI assumption.  Golang assumes this.  OpenJDK assumes this.  ATLAS
>> BLAS and OpenBLAS assume this.  GMP assumes this.  PyPy assumes this.
>> Mono assumes this. libffi assumes this. Erlang probably assumes this.
>> FFMPEG, x264, libvpx assume this.  MongoDB may assume this. NVIDIA
>> nvcc assumes this.  Etc., etc., etc.

* Golang: incorrect.  The code is written correctly, and Go runs
properly on BE PPC64 (though not correctly; this is unrelated to ELF ABI).

* OpenJDK: correct, patch is fermenting.

* BLAS/OpenBLAS: untested as of yet.

* GMP: incorrect.  Works fine and passes all tests on musl without any
patching.

* PyPy: incorrect, as above.

* Mono: incorrect, as above.

* libffi: incorrect, but did require patching for musl's insistence on
using IEEE long doubles instead of IBM long doubles.

* FFmpeg, x264: incorrect.  Work fine, though did need a single patch to
make AltiVec support build properly; this affected ppc32 and ppc64/ELFv1
as well.

* libvpx: does not support PowerPC in any capacity anyway.

* MongoDB: untested as of yet.

* NVIDIA nvcc: violates our package policy in many ways, proprietary
license being the largest.  Also requires LE as far as I can tell, so
completely orthogonal to supporting BE.

> Several of these are trivially fixed with --disable-asm or similar --
> at least gmp, ffmpeg, x264, and libvpx should fall in that category.
> Obviously it's desirable to get the asm working to improve
> performance, but it can be done incrementally. It also should be
> possible to heuristically test for this kind of thing by grepping for
> ppc64 asm function prologue in the sources.

The only --disable-asm required was OpenSSL, and we have a patch
fermenting to fix that as well.  (Most of their checks use _CALL_ELF
correctly, but a few do not.)

> Only stuff that actually does codegen (compilers, jits, etc.) has a
> fundamental reason to be affected, and for the most part fixing it
> should just be a matter of fixing the conditionals that look for
> endianness to look for _CALL_ELF==2 where that's what they really
> meant to do.

One semi-important JIT that wasn't named was ORC, which is used by
gstreamer to help multimedia performance.  I assume that wasn't named
because nobody merged LE support either?

We managed to patch it for ELFv2 cleanly, supporting both musl in BE
*and* all the ELFv2 LE distros including glibc. :)

We're also about to land a Valgrind patch to support ELFv2 on BE.  I
believe the last major issue that we haven't yet sent upstream is
OpenJDK.  (OpenSSL is a performance regression, but functionally complete.)

>> It's not that the packages fundamentally cannot be fixed, but the
>> FLOSS ecosystem is much larger, richer, complex and more
>> interdependent.  If one wants to create an embedded system, one can
>> exert control over the entire software ecosystem.  For a
>> Linux-compatible system, one cannot.
>>
>> If you accept that some parts of the software ecosystem simply won't
>> build or function correctly for your system and configuration, or some
>> packages randomly will stop building or stop functioning correctly
>> after a package is updated, fine.

We do not accept that in the slightest.  And neither should anyone else.

We accept that parts of the software ecosystem need to be *fixed*.

Best to you and yours,
--arw

-- 
A. Wilcox (awilfox)
Project Lead, Adélie Linux
https://www.adelielinux.org

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2018-09-27 22:53 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-21 14:53 [PATCH] powerpc64le: Add single instruction math functions David Edelsohn
2017-06-23 19:35 ` Rich Felker
2017-06-23 19:53   ` David Edelsohn
2017-06-24  0:46     ` A. Wilcox
2017-06-24  0:55       ` Rich Felker
2017-06-24  3:05       ` David Edelsohn
2017-06-24  3:32         ` Rich Felker
2017-06-24  3:38         ` A. Wilcox
2017-06-24 20:53           ` David Edelsohn
2017-06-24 22:44             ` Rich Felker
2017-06-24 23:57             ` A. Wilcox
2017-06-25  0:10               ` Rich Felker
2017-06-25  1:41                 ` Rich Felker
2017-06-29 13:49                 ` David Edelsohn
2017-06-29 16:05                   ` Rich Felker
2017-06-29 17:00                     ` David Edelsohn
2017-06-29 17:59                       ` Rich Felker
2017-06-30  1:07                         ` David Edelsohn
2017-06-30  2:11                           ` Rich Felker
2017-06-25  3:24               ` David Edelsohn
2017-06-25 14:28                 ` Rich Felker
2017-06-25 14:56                   ` David Edelsohn
2018-09-27 22:53                   ` A. Wilcox

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).