[PATCH] s390x: Add single instruction math functions

mailing list of musl libc
 help / color / mirror / code / Atom feed

* [PATCH] s390x: Add single instruction math functions
@ 2017-06-09 14:51 David Edelsohn
  2017-06-10 15:36 ` Szabolcs Nagy
  0 siblings, 1 reply; 34+ messages in thread
From: David Edelsohn @ 2017-06-09 14:51 UTC (permalink / raw)
  To: musl

The following patch is a start at single instruction math functions
for s390x architecture to increase performance.

Thanks, David
---
 src/math/s390x/ceil.c       | 7 +++++++
 src/math/s390x/ceilf.c      | 7 +++++++
 src/math/s390x/ceill.c      | 7 +++++++
 src/math/s390x/fabs.c       | 7 +++++++
 src/math/s390x/fabsf.c      | 7 +++++++
 src/math/s390x/fabsl.c      | 7 +++++++
 src/math/s390x/floor.c      | 7 +++++++
 src/math/s390x/floorf.c     | 7 +++++++
 src/math/s390x/floorl.c     | 7 +++++++
 src/math/s390x/nearbyint.c  | 7 +++++++
 src/math/s390x/nearbyintf.c | 7 +++++++
 src/math/s390x/nearbyintl.c | 7 +++++++
 src/math/s390x/rint.c       | 7 +++++++
 src/math/s390x/rintf.c      | 7 +++++++
 src/math/s390x/rintl.c      | 7 +++++++
 src/math/s390x/round.c      | 7 +++++++
 src/math/s390x/roundf.c     | 7 +++++++
 src/math/s390x/roundl.c     | 7 +++++++
 src/math/s390x/sqrt.c       | 7 +++++++
 src/math/s390x/sqrtf.c      | 7 +++++++
 src/math/s390x/sqrtl.c      | 7 +++++++
 src/math/s390x/trunc.c      | 7 +++++++
 src/math/s390x/truncf.c     | 7 +++++++
 src/math/s390x/truncl.c     | 7 +++++++
 24 files changed, 168 insertions(+)
 create mode 100644 src/math/s390x/ceil.c
 create mode 100644 src/math/s390x/ceilf.c
 create mode 100644 src/math/s390x/ceill.c
 create mode 100644 src/math/s390x/fabs.c
 create mode 100644 src/math/s390x/fabsf.c
 create mode 100644 src/math/s390x/fabsl.c
 create mode 100644 src/math/s390x/floor.c
 create mode 100644 src/math/s390x/floorf.c
 create mode 100644 src/math/s390x/floorl.c
 create mode 100644 src/math/s390x/nearbyint.c
 create mode 100644 src/math/s390x/nearbyintf.c
 create mode 100644 src/math/s390x/nearbyintl.c
 create mode 100644 src/math/s390x/rint.c
 create mode 100644 src/math/s390x/rintf.c
 create mode 100644 src/math/s390x/rintl.c
 create mode 100644 src/math/s390x/round.c
 create mode 100644 src/math/s390x/roundf.c
 create mode 100644 src/math/s390x/roundl.c
 create mode 100644 src/math/s390x/sqrt.c
 create mode 100644 src/math/s390x/sqrtf.c
 create mode 100644 src/math/s390x/sqrtl.c
 create mode 100644 src/math/s390x/trunc.c
 create mode 100644 src/math/s390x/truncf.c
 create mode 100644 src/math/s390x/truncl.c

diff --git a/src/math/s390x/ceil.c b/src/math/s390x/ceil.c
new file mode 100644
index 0000000..2d0b422
--- /dev/null
+++ b/src/math/s390x/ceil.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double ceil(double x)
+{
+ __asm__ ("fidbra %0, 6, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/ceilf.c b/src/math/s390x/ceilf.c
new file mode 100644
index 0000000..94260e6
--- /dev/null
+++ b/src/math/s390x/ceilf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float ceilf(float x)
+{
+ __asm__ ("fiebra %0, 6, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/ceill.c b/src/math/s390x/ceill.c
new file mode 100644
index 0000000..2ee4a5b
--- /dev/null
+++ b/src/math/s390x/ceill.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double ceill(long double x)
+{
+ __asm__ ("fixbra %0, 6, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/fabs.c b/src/math/s390x/fabs.c
new file mode 100644
index 0000000..0c569a2
--- /dev/null
+++ b/src/math/s390x/fabs.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fabs(double x)
+{
+ __asm__ ("lpdbr %0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/fabsf.c b/src/math/s390x/fabsf.c
new file mode 100644
index 0000000..99f884c
--- /dev/null
+++ b/src/math/s390x/fabsf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fabsf(float x)
+{
+ __asm__ ("lpebr %0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/fabsl.c b/src/math/s390x/fabsl.c
new file mode 100644
index 0000000..f543ef0
--- /dev/null
+++ b/src/math/s390x/fabsl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double fabsl(long double x)
+{
+ __asm__ ("lpxbr %0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/floor.c b/src/math/s390x/floor.c
new file mode 100644
index 0000000..d4958eb
--- /dev/null
+++ b/src/math/s390x/floor.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double floor(double x)
+{
+ __asm__ ("fidbra %0, 7, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/floorf.c b/src/math/s390x/floorf.c
new file mode 100644
index 0000000..af06471
--- /dev/null
+++ b/src/math/s390x/floorf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float floorf(float x)
+{
+ __asm__ ("fiebra %0, 7, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/floorl.c b/src/math/s390x/floorl.c
new file mode 100644
index 0000000..0df4be1
--- /dev/null
+++ b/src/math/s390x/floorl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double floorl(long double x)
+{
+ __asm__ ("fixbra %0, 7, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/nearbyint.c b/src/math/s390x/nearbyint.c
new file mode 100644
index 0000000..0d3359f
--- /dev/null
+++ b/src/math/s390x/nearbyint.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double nearbyint(double x)
+{
+ __asm__ ("fidbra %0, 0, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/nearbyintf.c b/src/math/s390x/nearbyintf.c
new file mode 100644
index 0000000..3ad8695
--- /dev/null
+++ b/src/math/s390x/nearbyintf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float nearbyintf(float x)
+{
+ __asm__ ("fiebra %0, 0, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/nearbyintl.c b/src/math/s390x/nearbyintl.c
new file mode 100644
index 0000000..9d900f9
--- /dev/null
+++ b/src/math/s390x/nearbyintl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double nearbyintl(long double x)
+{
+ __asm__ ("fixbra %0, 0, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/rint.c b/src/math/s390x/rint.c
new file mode 100644
index 0000000..bdd62b3
--- /dev/null
+++ b/src/math/s390x/rint.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double rint(double x)
+{
+ __asm__ ("fidbr %0, 0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/rintf.c b/src/math/s390x/rintf.c
new file mode 100644
index 0000000..c1e98c5
--- /dev/null
+++ b/src/math/s390x/rintf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float rintf(float x)
+{
+ __asm__ ("fiebr %0, 0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/rintl.c b/src/math/s390x/rintl.c
new file mode 100644
index 0000000..4856825
--- /dev/null
+++ b/src/math/s390x/rintl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double rintl(long double x)
+{
+ __asm__ ("fixbr %0, 0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/round.c b/src/math/s390x/round.c
new file mode 100644
index 0000000..10b3159
--- /dev/null
+++ b/src/math/s390x/round.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double round(double x)
+{
+ __asm__ ("fidbra %0, 1, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/roundf.c b/src/math/s390x/roundf.c
new file mode 100644
index 0000000..28758ce
--- /dev/null
+++ b/src/math/s390x/roundf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float roundf(float x)
+{
+ __asm__ ("fiebra %0, 1, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/roundl.c b/src/math/s390x/roundl.c
new file mode 100644
index 0000000..deef38e
--- /dev/null
+++ b/src/math/s390x/roundl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double roundl(long double x)
+{
+ __asm__ ("fixbra %0, 1, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/sqrt.c b/src/math/s390x/sqrt.c
new file mode 100644
index 0000000..7407a5c
--- /dev/null
+++ b/src/math/s390x/sqrt.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double sqrt(double x)
+{
+ __asm__ ("sqdbr %0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/sqrtf.c b/src/math/s390x/sqrtf.c
new file mode 100644
index 0000000..fbfdf6a
--- /dev/null
+++ b/src/math/s390x/sqrtf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float sqrtf(float x)
+{
+ __asm__ ("sqebr %0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/sqrtl.c b/src/math/s390x/sqrtl.c
new file mode 100644
index 0000000..9b14d67
--- /dev/null
+++ b/src/math/s390x/sqrtl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double sqrtl(long double x)
+{
+ __asm__ ("sqxbr %0, %1" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/trunc.c b/src/math/s390x/trunc.c
new file mode 100644
index 0000000..24d9ed7
--- /dev/null
+++ b/src/math/s390x/trunc.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double trunc(double x)
+{
+ __asm__ ("fidbra %0, 5, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/truncf.c b/src/math/s390x/truncf.c
new file mode 100644
index 0000000..a59e52a
--- /dev/null
+++ b/src/math/s390x/truncf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float truncf(float x)
+{
+ __asm__ ("fiebra %0, 5, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
diff --git a/src/math/s390x/truncl.c b/src/math/s390x/truncl.c
new file mode 100644
index 0000000..98afa2d
--- /dev/null
+++ b/src/math/s390x/truncl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double truncl(long double x)
+{
+ __asm__ ("fixbra %0, 5, %1, 4" : "=f"(x) : "f"(x));
+ return x;
+}
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-09 14:51 [PATCH] s390x: Add single instruction math functions David Edelsohn
@ 2017-06-10 15:36 ` Szabolcs Nagy
  2017-06-10 17:25   ` David Edelsohn
  0 siblings, 1 reply; 34+ messages in thread
From: Szabolcs Nagy @ 2017-06-10 15:36 UTC (permalink / raw)
  To: musl; +Cc: David Edelsohn

* David Edelsohn <dje.gcc@gmail.com> [2017-06-09 10:51:25 -0400]:
> The following patch is a start at single instruction math functions
> for s390x architecture to increase performance.

looks good, i wonder why gcc does not have builtins support for
ceil, floor, nearbyint, round and trunc

(on aarch64 the builtins expand to single instruction with
-fno-math-errno, but on s390x they remain libc calls

at some point we plan to have some internal header to turn
math/string calls into builtins when compiling musl with gcc)


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-10 15:36 ` Szabolcs Nagy
@ 2017-06-10 17:25   ` David Edelsohn
  2017-06-10 18:29     ` Szabolcs Nagy
  0 siblings, 1 reply; 34+ messages in thread
From: David Edelsohn @ 2017-06-10 17:25 UTC (permalink / raw)
  To: musl

On Sat, Jun 10, 2017 at 11:36 AM, Szabolcs Nagy <nsz@port70.net> wrote:
> * David Edelsohn <dje.gcc@gmail.com> [2017-06-09 10:51:25 -0400]:
>> The following patch is a start at single instruction math functions
>> for s390x architecture to increase performance.
>
> looks good, i wonder why gcc does not have builtins support for
> ceil, floor, nearbyint, round and trunc
>
> (on aarch64 the builtins expand to single instruction with
> -fno-math-errno, but on s390x they remain libc calls

Both the functions and builtins are converted to single instructions
for me.  What architecture level is your GCC assuming?

> at some point we plan to have some internal header to turn
> math/string calls into builtins when compiling musl with gcc)

I have a patch set ready for powerpc64 teed up after this patch.

Thanks, David


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-10 17:25   ` David Edelsohn
@ 2017-06-10 18:29     ` Szabolcs Nagy
  2017-06-10 18:53       ` David Edelsohn
  0 siblings, 1 reply; 34+ messages in thread
From: Szabolcs Nagy @ 2017-06-10 18:29 UTC (permalink / raw)
  To: musl; +Cc: David Edelsohn

* David Edelsohn <dje.gcc@gmail.com> [2017-06-10 13:25:00 -0400]:
> On Sat, Jun 10, 2017 at 11:36 AM, Szabolcs Nagy <nsz@port70.net> wrote:
> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-09 10:51:25 -0400]:
> >> The following patch is a start at single instruction math functions
> >> for s390x architecture to increase performance.
> >
> > looks good, i wonder why gcc does not have builtins support for
> > ceil, floor, nearbyint, round and trunc
> >
> > (on aarch64 the builtins expand to single instruction with
> > -fno-math-errno, but on s390x they remain libc calls
> 
> Both the functions and builtins are converted to single instructions
> for me.  What architecture level is your GCC assuming?
> 

i think it's the default s390x config

$ s390x-linux-musl-gcc -v
Using built-in specs.
COLLECT_GCC=s390x-linux-musl-gcc
COLLECT_LTO_WRAPPER=/home/nsz/w/mcm/output/bin/../libexec/gcc/s390x-linux-musl/6.3.0/lto-wrapper
Target: s390x-linux-musl
Configured with: ../src_toolchain/configure --enable-languages=c,c++ CFLAGS='-g0 -Os' CXXFLAGS='-g0 -Os' LDFLAGS=-s --disable-nls --with-debug-prefix-map=/home/nsz/w/mcm/build-s390x-linux-musl= --enable-languages=c,c++ --disable-libquadmath --disable-libquadmath-support --disable-decimal-float --disable-multilib --disable-libcilkrts --disable-libvtv --disable-libgomp --disable-libitm --disable-werror --target=s390x-linux-musl --prefix= --libdir=/lib --disable-multilib --with-sysroot=/s390x-linux-musl --enable-tls --disable-libmudflap --disable-libsanitizer --disable-gnu-indirect-function --disable-libmpx --enable-libstdcxx-time --with-build-sysroot=/home/nsz/w/mcm/build-s390x-linux-musl/obj_sysroot
Thread model: posix
gcc version 6.3.0 (GCC) 
$ cat a.c
double f(double x)
{
	return __builtin_ceil(x);
}
$ s390x-linux-musl-gcc -O3 -fno-math-errno -S a.c -o -
	.machinemode zarch
	.machine "z900"
.text
	.align	8
.globl f
	.type	f, @function
f:
.LFB0:
	.cfi_startproc
	jg	ceil
	.cfi_endproc
.LFE0:
	.size	f, .-f
	.ident	"GCC: (GNU) 6.3.0"
	.section	.note.GNU-stack,"",@progbits


> > at some point we plan to have some internal header to turn
> > math/string calls into builtins when compiling musl with gcc)
> 
> I have a patch set ready for powerpc64 teed up after this patch.
> 
> Thanks, David


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-10 18:29     ` Szabolcs Nagy
@ 2017-06-10 18:53       ` David Edelsohn
  2017-06-10 19:48         ` Rich Felker
  0 siblings, 1 reply; 34+ messages in thread
From: David Edelsohn @ 2017-06-10 18:53 UTC (permalink / raw)
  To: musl

On Sat, Jun 10, 2017 at 2:29 PM, Szabolcs Nagy <nsz@port70.net> wrote:
> * David Edelsohn <dje.gcc@gmail.com> [2017-06-10 13:25:00 -0400]:
>> On Sat, Jun 10, 2017 at 11:36 AM, Szabolcs Nagy <nsz@port70.net> wrote:
>> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-09 10:51:25 -0400]:
>> >> The following patch is a start at single instruction math functions
>> >> for s390x architecture to increase performance.
>> >
>> > looks good, i wonder why gcc does not have builtins support for
>> > ceil, floor, nearbyint, round and trunc
>> >
>> > (on aarch64 the builtins expand to single instruction with
>> > -fno-math-errno, but on s390x they remain libc calls
>>
>> Both the functions and builtins are converted to single instructions
>> for me.  What architecture level is your GCC assuming?
>>
>
> i think it's the default s390x config
>
> $ s390x-linux-musl-gcc -v
> Using built-in specs.
> COLLECT_GCC=s390x-linux-musl-gcc
> COLLECT_LTO_WRAPPER=/home/nsz/w/mcm/output/bin/../libexec/gcc/s390x-linux-musl/6.3.0/lto-wrapper
> Target: s390x-linux-musl
> Configured with: ../src_toolchain/configure --enable-languages=c,c++ CFLAGS='-g0 -Os' CXXFLAGS='-g0 -Os' LDFLAGS=-s --disable-nls --with-debug-prefix-map=/home/nsz/w/mcm/build-s390x-linux-musl= --enable-languages=c,c++ --disable-libquadmath --disable-libquadmath-support --disable-decimal-float --disable-multilib --disable-libcilkrts --disable-libvtv --disable-libgomp --disable-libitm --disable-werror --target=s390x-linux-musl --prefix= --libdir=/lib --disable-multilib --with-sysroot=/s390x-linux-musl --enable-tls --disable-libmudflap --disable-libsanitizer --disable-gnu-indirect-function --disable-libmpx --enable-libstdcxx-time --with-build-sysroot=/home/nsz/w/mcm/build-s390x-linux-musl/obj_sysroot
> Thread model: posix
> gcc version 6.3.0 (GCC)
> $ cat a.c
> double f(double x)
> {
>         return __builtin_ceil(x);
> }
> $ s390x-linux-musl-gcc -O3 -fno-math-errno -S a.c -o -
>         .machinemode zarch
>         .machine "z900"

Note the default architecture is z900 from 2005-2006.  The FP
instructions were added with the z196 processors in 2010.

s390x-linux-musl probably should default to a much newer processor
level, such as at least z196 or zEC12

- David


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-10 18:53       ` David Edelsohn
@ 2017-06-10 19:48         ` Rich Felker
  2017-06-10 20:22           ` David Edelsohn
  0 siblings, 1 reply; 34+ messages in thread
From: Rich Felker @ 2017-06-10 19:48 UTC (permalink / raw)
  To: musl

On Sat, Jun 10, 2017 at 02:53:14PM -0400, David Edelsohn wrote:
> On Sat, Jun 10, 2017 at 2:29 PM, Szabolcs Nagy <nsz@port70.net> wrote:
> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-10 13:25:00 -0400]:
> >> On Sat, Jun 10, 2017 at 11:36 AM, Szabolcs Nagy <nsz@port70.net> wrote:
> >> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-09 10:51:25 -0400]:
> >> >> The following patch is a start at single instruction math functions
> >> >> for s390x architecture to increase performance.
> >> >
> >> > looks good, i wonder why gcc does not have builtins support for
> >> > ceil, floor, nearbyint, round and trunc
> >> >
> >> > (on aarch64 the builtins expand to single instruction with
> >> > -fno-math-errno, but on s390x they remain libc calls
> >>
> >> Both the functions and builtins are converted to single instructions
> >> for me.  What architecture level is your GCC assuming?
> >>
> >
> > i think it's the default s390x config
> >
> > $ s390x-linux-musl-gcc -v
> > Using built-in specs.
> > COLLECT_GCC=s390x-linux-musl-gcc
> > COLLECT_LTO_WRAPPER=/home/nsz/w/mcm/output/bin/../libexec/gcc/s390x-linux-musl/6.3.0/lto-wrapper
> > Target: s390x-linux-musl
> > Configured with: ../src_toolchain/configure --enable-languages=c,c++ CFLAGS='-g0 -Os' CXXFLAGS='-g0 -Os' LDFLAGS=-s --disable-nls --with-debug-prefix-map=/home/nsz/w/mcm/build-s390x-linux-musl= --enable-languages=c,c++ --disable-libquadmath --disable-libquadmath-support --disable-decimal-float --disable-multilib --disable-libcilkrts --disable-libvtv --disable-libgomp --disable-libitm --disable-werror --target=s390x-linux-musl --prefix= --libdir=/lib --disable-multilib --with-sysroot=/s390x-linux-musl --enable-tls --disable-libmudflap --disable-libsanitizer --disable-gnu-indirect-function --disable-libmpx --enable-libstdcxx-time --with-build-sysroot=/home/nsz/w/mcm/build-s390x-linux-musl/obj_sysroot
> > Thread model: posix
> > gcc version 6.3.0 (GCC)
> > $ cat a.c
> > double f(double x)
> > {
> >         return __builtin_ceil(x);
> > }
> > $ s390x-linux-musl-gcc -O3 -fno-math-errno -S a.c -o -
> >         .machinemode zarch
> >         .machine "z900"
> 
> Note the default architecture is z900 from 2005-2006.  The FP
> instructions were added with the z196 processors in 2010.

In that case the patch should probably have the code inside something
like:

#ifdef __Z196__ // or whatever the predef macro for the ISA level is
// your code here
#else
#include "../foo.c"
#else

See src/math/arm/sqrt.c for a similar example.

> s390x-linux-musl probably should default to a much newer processor
> level, such as at least z196 or zEC12

musl's policy is to just follow whatever ISA level the compiler is
configured for; you can set this at musl build time with CFLAGS or use
a default built into the toolchain at toolchain build time
(--with-arch, I think).

Rich


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-10 19:48         ` Rich Felker
@ 2017-06-10 20:22           ` David Edelsohn
  2017-06-10 21:28             ` Szabolcs Nagy
  2017-06-10 21:37             ` Rich Felker
  0 siblings, 2 replies; 34+ messages in thread
From: David Edelsohn @ 2017-06-10 20:22 UTC (permalink / raw)
  To: musl

On Sat, Jun 10, 2017 at 3:48 PM, Rich Felker <dalias@libc.org> wrote:
> On Sat, Jun 10, 2017 at 02:53:14PM -0400, David Edelsohn wrote:
>> On Sat, Jun 10, 2017 at 2:29 PM, Szabolcs Nagy <nsz@port70.net> wrote:
>> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-10 13:25:00 -0400]:
>> >> On Sat, Jun 10, 2017 at 11:36 AM, Szabolcs Nagy <nsz@port70.net> wrote:
>> >> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-09 10:51:25 -0400]:
>> >> >> The following patch is a start at single instruction math functions
>> >> >> for s390x architecture to increase performance.
>> >> >
>> >> > looks good, i wonder why gcc does not have builtins support for
>> >> > ceil, floor, nearbyint, round and trunc
>> >> >
>> >> > (on aarch64 the builtins expand to single instruction with
>> >> > -fno-math-errno, but on s390x they remain libc calls
>> >>
>> >> Both the functions and builtins are converted to single instructions
>> >> for me.  What architecture level is your GCC assuming?
>> >>
>> >
>> > i think it's the default s390x config
>> >
>> > $ s390x-linux-musl-gcc -v
>> > Using built-in specs.
>> > COLLECT_GCC=s390x-linux-musl-gcc
>> > COLLECT_LTO_WRAPPER=/home/nsz/w/mcm/output/bin/../libexec/gcc/s390x-linux-musl/6.3.0/lto-wrapper
>> > Target: s390x-linux-musl
>> > Configured with: ../src_toolchain/configure --enable-languages=c,c++ CFLAGS='-g0 -Os' CXXFLAGS='-g0 -Os' LDFLAGS=-s --disable-nls --with-debug-prefix-map=/home/nsz/w/mcm/build-s390x-linux-musl= --enable-languages=c,c++ --disable-libquadmath --disable-libquadmath-support --disable-decimal-float --disable-multilib --disable-libcilkrts --disable-libvtv --disable-libgomp --disable-libitm --disable-werror --target=s390x-linux-musl --prefix= --libdir=/lib --disable-multilib --with-sysroot=/s390x-linux-musl --enable-tls --disable-libmudflap --disable-libsanitizer --disable-gnu-indirect-function --disable-libmpx --enable-libstdcxx-time --with-build-sysroot=/home/nsz/w/mcm/build-s390x-linux-musl/obj_sysroot
>> > Thread model: posix
>> > gcc version 6.3.0 (GCC)
>> > $ cat a.c
>> > double f(double x)
>> > {
>> >         return __builtin_ceil(x);
>> > }
>> > $ s390x-linux-musl-gcc -O3 -fno-math-errno -S a.c -o -
>> >         .machinemode zarch
>> >         .machine "z900"
>>
>> Note the default architecture is z900 from 2005-2006.  The FP
>> instructions were added with the z196 processors in 2010.
>
> In that case the patch should probably have the code inside something
> like:
>
> #ifdef __Z196__ // or whatever the predef macro for the ISA level is
> // your code here
> #else
> #include "../foo.c"
> #else
>
> See src/math/arm/sqrt.c for a similar example.
>
>> s390x-linux-musl probably should default to a much newer processor
>> level, such as at least z196 or zEC12
>
> musl's policy is to just follow whatever ISA level the compiler is
> configured for; you can set this at musl build time with CFLAGS or use
> a default built into the toolchain at toolchain build time
> (--with-arch, I think).

Musl already defaults to the later ISA in the rest of the s390x port.

- David


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-10 20:22           ` David Edelsohn
@ 2017-06-10 21:28             ` Szabolcs Nagy
  2017-06-10 21:44               ` David Edelsohn
  2017-06-10 21:37             ` Rich Felker
  1 sibling, 1 reply; 34+ messages in thread
From: Szabolcs Nagy @ 2017-06-10 21:28 UTC (permalink / raw)
  To: musl

* David Edelsohn <dje.gcc@gmail.com> [2017-06-10 16:22:24 -0400]:
> On Sat, Jun 10, 2017 at 3:48 PM, Rich Felker <dalias@libc.org> wrote:
> > On Sat, Jun 10, 2017 at 02:53:14PM -0400, David Edelsohn wrote:
> >> On Sat, Jun 10, 2017 at 2:29 PM, Szabolcs Nagy <nsz@port70.net> wrote:
> >> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-10 13:25:00 -0400]:
> >> >> On Sat, Jun 10, 2017 at 11:36 AM, Szabolcs Nagy <nsz@port70.net> wrote:
> >> >> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-09 10:51:25 -0400]:
> >> >> >> The following patch is a start at single instruction math functions
> >> >> >> for s390x architecture to increase performance.
> >> >> >
> >> >> > looks good, i wonder why gcc does not have builtins support for
> >> >> > ceil, floor, nearbyint, round and trunc
> >> >> >
> >> >> > (on aarch64 the builtins expand to single instruction with
> >> >> > -fno-math-errno, but on s390x they remain libc calls
> >> >>
> >> >> Both the functions and builtins are converted to single instructions
> >> >> for me.  What architecture level is your GCC assuming?
> >> >>
> >> >
> >> > i think it's the default s390x config
> >> >
> >> > $ s390x-linux-musl-gcc -v
> >> > Using built-in specs.
> >> > COLLECT_GCC=s390x-linux-musl-gcc
> >> > COLLECT_LTO_WRAPPER=/home/nsz/w/mcm/output/bin/../libexec/gcc/s390x-linux-musl/6.3.0/lto-wrapper
> >> > Target: s390x-linux-musl
> >> > Configured with: ../src_toolchain/configure --enable-languages=c,c++ CFLAGS='-g0 -Os' CXXFLAGS='-g0 -Os' LDFLAGS=-s --disable-nls --with-debug-prefix-map=/home/nsz/w/mcm/build-s390x-linux-musl= --enable-languages=c,c++ --disable-libquadmath --disable-libquadmath-support --disable-decimal-float --disable-multilib --disable-libcilkrts --disable-libvtv --disable-libgomp --disable-libitm --disable-werror --target=s390x-linux-musl --prefix= --libdir=/lib --disable-multilib --with-sysroot=/s390x-linux-musl --enable-tls --disable-libmudflap --disable-libsanitizer --disable-gnu-indirect-function --disable-libmpx --enable-libstdcxx-time --with-build-sysroot=/home/nsz/w/mcm/build-s390x-linux-musl/obj_sysroot
> >> > Thread model: posix
> >> > gcc version 6.3.0 (GCC)
> >> > $ cat a.c
> >> > double f(double x)
> >> > {
> >> >         return __builtin_ceil(x);
> >> > }
> >> > $ s390x-linux-musl-gcc -O3 -fno-math-errno -S a.c -o -
> >> >         .machinemode zarch
> >> >         .machine "z900"
> >>
> >> Note the default architecture is z900 from 2005-2006.  The FP
> >> instructions were added with the z196 processors in 2010.
> >
> > In that case the patch should probably have the code inside something
> > like:
> >
> > #ifdef __Z196__ // or whatever the predef macro for the ISA level is
> > // your code here
> > #else
> > #include "../foo.c"
> > #else
> >
> > See src/math/arm/sqrt.c for a similar example.
> >
> >> s390x-linux-musl probably should default to a much newer processor
> >> level, such as at least z196 or zEC12
> >
> > musl's policy is to just follow whatever ISA level the compiler is
> > configured for; you can set this at musl build time with CFLAGS or use
> > a default built into the toolchain at toolchain build time
> > (--with-arch, I think).
> 
> Musl already defaults to the later ISA in the rest of the s390x port.

would it be hard to support all s390x isa levels?


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-10 20:22           ` David Edelsohn
  2017-06-10 21:28             ` Szabolcs Nagy
@ 2017-06-10 21:37             ` Rich Felker
  1 sibling, 0 replies; 34+ messages in thread
From: Rich Felker @ 2017-06-10 21:37 UTC (permalink / raw)
  To: musl

On Sat, Jun 10, 2017 at 04:22:24PM -0400, David Edelsohn wrote:
> On Sat, Jun 10, 2017 at 3:48 PM, Rich Felker <dalias@libc.org> wrote:
> > On Sat, Jun 10, 2017 at 02:53:14PM -0400, David Edelsohn wrote:
> >> On Sat, Jun 10, 2017 at 2:29 PM, Szabolcs Nagy <nsz@port70.net> wrote:
> >> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-10 13:25:00 -0400]:
> >> >> On Sat, Jun 10, 2017 at 11:36 AM, Szabolcs Nagy <nsz@port70.net> wrote:
> >> >> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-09 10:51:25 -0400]:
> >> >> >> The following patch is a start at single instruction math functions
> >> >> >> for s390x architecture to increase performance.
> >> >> >
> >> >> > looks good, i wonder why gcc does not have builtins support for
> >> >> > ceil, floor, nearbyint, round and trunc
> >> >> >
> >> >> > (on aarch64 the builtins expand to single instruction with
> >> >> > -fno-math-errno, but on s390x they remain libc calls
> >> >>
> >> >> Both the functions and builtins are converted to single instructions
> >> >> for me.  What architecture level is your GCC assuming?
> >> >>
> >> >
> >> > i think it's the default s390x config
> >> >
> >> > $ s390x-linux-musl-gcc -v
> >> > Using built-in specs.
> >> > COLLECT_GCC=s390x-linux-musl-gcc
> >> > COLLECT_LTO_WRAPPER=/home/nsz/w/mcm/output/bin/../libexec/gcc/s390x-linux-musl/6.3.0/lto-wrapper
> >> > Target: s390x-linux-musl
> >> > Configured with: ../src_toolchain/configure --enable-languages=c,c++ CFLAGS='-g0 -Os' CXXFLAGS='-g0 -Os' LDFLAGS=-s --disable-nls --with-debug-prefix-map=/home/nsz/w/mcm/build-s390x-linux-musl= --enable-languages=c,c++ --disable-libquadmath --disable-libquadmath-support --disable-decimal-float --disable-multilib --disable-libcilkrts --disable-libvtv --disable-libgomp --disable-libitm --disable-werror --target=s390x-linux-musl --prefix= --libdir=/lib --disable-multilib --with-sysroot=/s390x-linux-musl --enable-tls --disable-libmudflap --disable-libsanitizer --disable-gnu-indirect-function --disable-libmpx --enable-libstdcxx-time --with-build-sysroot=/home/nsz/w/mcm/build-s390x-linux-musl/obj_sysroot
> >> > Thread model: posix
> >> > gcc version 6.3.0 (GCC)
> >> > $ cat a.c
> >> > double f(double x)
> >> > {
> >> >         return __builtin_ceil(x);
> >> > }
> >> > $ s390x-linux-musl-gcc -O3 -fno-math-errno -S a.c -o -
> >> >         .machinemode zarch
> >> >         .machine "z900"
> >>
> >> Note the default architecture is z900 from 2005-2006.  The FP
> >> instructions were added with the z196 processors in 2010.
> >
> > In that case the patch should probably have the code inside something
> > like:
> >
> > #ifdef __Z196__ // or whatever the predef macro for the ISA level is
> > // your code here
> > #else
> > #include "../foo.c"
> > #else
> >
> > See src/math/arm/sqrt.c for a similar example.
> >
> >> s390x-linux-musl probably should default to a much newer processor
> >> level, such as at least z196 or zEC12
> >
> > musl's policy is to just follow whatever ISA level the compiler is
> > configured for; you can set this at musl build time with CFLAGS or use
> > a default built into the toolchain at toolchain build time
> > (--with-arch, I think).
> 
> Musl already defaults to the later ISA in the rest of the s390x port.

If that's the case it's not a regression and probably okay; someone
can add support for earlier ISA levels as a separate patch if anyone
cares about support for them.

Can you point out in a reply where we already assume later ISA level,
so that someone reading this thread later has it documented?

Rich


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-10 21:28             ` Szabolcs Nagy
@ 2017-06-10 21:44               ` David Edelsohn
  2017-06-10 21:48                 ` David Edelsohn
  0 siblings, 1 reply; 34+ messages in thread
From: David Edelsohn @ 2017-06-10 21:44 UTC (permalink / raw)
  To: musl

On Sat, Jun 10, 2017 at 5:28 PM, Szabolcs Nagy <nsz@port70.net> wrote:
> * David Edelsohn <dje.gcc@gmail.com> [2017-06-10 16:22:24 -0400]:
>> On Sat, Jun 10, 2017 at 3:48 PM, Rich Felker <dalias@libc.org> wrote:
>> > On Sat, Jun 10, 2017 at 02:53:14PM -0400, David Edelsohn wrote:
>> >> On Sat, Jun 10, 2017 at 2:29 PM, Szabolcs Nagy <nsz@port70.net> wrote:
>> >> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-10 13:25:00 -0400]:
>> >> >> On Sat, Jun 10, 2017 at 11:36 AM, Szabolcs Nagy <nsz@port70.net> wrote:
>> >> >> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-09 10:51:25 -0400]:
>> >> >> >> The following patch is a start at single instruction math functions
>> >> >> >> for s390x architecture to increase performance.
>> >> >> >
>> >> >> > looks good, i wonder why gcc does not have builtins support for
>> >> >> > ceil, floor, nearbyint, round and trunc
>> >> >> >
>> >> >> > (on aarch64 the builtins expand to single instruction with
>> >> >> > -fno-math-errno, but on s390x they remain libc calls
>> >> >>
>> >> >> Both the functions and builtins are converted to single instructions
>> >> >> for me.  What architecture level is your GCC assuming?
>> >> >>
>> >> >
>> >> > i think it's the default s390x config
>> >> >
>> >> > $ s390x-linux-musl-gcc -v
>> >> > Using built-in specs.
>> >> > COLLECT_GCC=s390x-linux-musl-gcc
>> >> > COLLECT_LTO_WRAPPER=/home/nsz/w/mcm/output/bin/../libexec/gcc/s390x-linux-musl/6.3.0/lto-wrapper
>> >> > Target: s390x-linux-musl
>> >> > Configured with: ../src_toolchain/configure --enable-languages=c,c++ CFLAGS='-g0 -Os' CXXFLAGS='-g0 -Os' LDFLAGS=-s --disable-nls --with-debug-prefix-map=/home/nsz/w/mcm/build-s390x-linux-musl= --enable-languages=c,c++ --disable-libquadmath --disable-libquadmath-support --disable-decimal-float --disable-multilib --disable-libcilkrts --disable-libvtv --disable-libgomp --disable-libitm --disable-werror --target=s390x-linux-musl --prefix= --libdir=/lib --disable-multilib --with-sysroot=/s390x-linux-musl --enable-tls --disable-libmudflap --disable-libsanitizer --disable-gnu-indirect-function --disable-libmpx --enable-libstdcxx-time --with-build-sysroot=/home/nsz/w/mcm/build-s390x-linux-musl/obj_sysroot
>> >> > Thread model: posix
>> >> > gcc version 6.3.0 (GCC)
>> >> > $ cat a.c
>> >> > double f(double x)
>> >> > {
>> >> >         return __builtin_ceil(x);
>> >> > }
>> >> > $ s390x-linux-musl-gcc -O3 -fno-math-errno -S a.c -o -
>> >> >         .machinemode zarch
>> >> >         .machine "z900"
>> >>
>> >> Note the default architecture is z900 from 2005-2006.  The FP
>> >> instructions were added with the z196 processors in 2010.
>> >
>> > In that case the patch should probably have the code inside something
>> > like:
>> >
>> > #ifdef __Z196__ // or whatever the predef macro for the ISA level is
>> > // your code here
>> > #else
>> > #include "../foo.c"
>> > #else
>> >
>> > See src/math/arm/sqrt.c for a similar example.
>> >
>> >> s390x-linux-musl probably should default to a much newer processor
>> >> level, such as at least z196 or zEC12
>> >
>> > musl's policy is to just follow whatever ISA level the compiler is
>> > configured for; you can set this at musl build time with CFLAGS or use
>> > a default built into the toolchain at toolchain build time
>> > (--with-arch, I think).
>>
>> Musl already defaults to the later ISA in the rest of the s390x port.
>
> would it be hard to support all s390x isa levels?

It's a waste of effort and will hurt performance on newer processors.

No user of Musl and Alpine is going to -- or even /can/ -- run it on
older processors.  All of the Docker containers and underlying Linux
distributions don't support the older processors.

Thanks, David


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-10 21:44               ` David Edelsohn
@ 2017-06-10 21:48                 ` David Edelsohn
  2017-06-11  2:20                   ` Rich Felker
  0 siblings, 1 reply; 34+ messages in thread
From: David Edelsohn @ 2017-06-10 21:48 UTC (permalink / raw)
  To: musl

On Sat, Jun 10, 2017 at 5:44 PM, David Edelsohn <dje.gcc@gmail.com> wrote:
> On Sat, Jun 10, 2017 at 5:28 PM, Szabolcs Nagy <nsz@port70.net> wrote:
>> * David Edelsohn <dje.gcc@gmail.com> [2017-06-10 16:22:24 -0400]:
>>> On Sat, Jun 10, 2017 at 3:48 PM, Rich Felker <dalias@libc.org> wrote:
>>> > On Sat, Jun 10, 2017 at 02:53:14PM -0400, David Edelsohn wrote:
>>> >> On Sat, Jun 10, 2017 at 2:29 PM, Szabolcs Nagy <nsz@port70.net> wrote:
>>> >> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-10 13:25:00 -0400]:
>>> >> >> On Sat, Jun 10, 2017 at 11:36 AM, Szabolcs Nagy <nsz@port70.net> wrote:
>>> >> >> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-09 10:51:25 -0400]:
>>> >> >> >> The following patch is a start at single instruction math functions
>>> >> >> >> for s390x architecture to increase performance.
>>> >> >> >
>>> >> >> > looks good, i wonder why gcc does not have builtins support for
>>> >> >> > ceil, floor, nearbyint, round and trunc
>>> >> >> >
>>> >> >> > (on aarch64 the builtins expand to single instruction with
>>> >> >> > -fno-math-errno, but on s390x they remain libc calls
>>> >> >>
>>> >> >> Both the functions and builtins are converted to single instructions
>>> >> >> for me.  What architecture level is your GCC assuming?
>>> >> >>
>>> >> >
>>> >> > i think it's the default s390x config
>>> >> >
>>> >> > $ s390x-linux-musl-gcc -v
>>> >> > Using built-in specs.
>>> >> > COLLECT_GCC=s390x-linux-musl-gcc
>>> >> > COLLECT_LTO_WRAPPER=/home/nsz/w/mcm/output/bin/../libexec/gcc/s390x-linux-musl/6.3.0/lto-wrapper
>>> >> > Target: s390x-linux-musl
>>> >> > Configured with: ../src_toolchain/configure --enable-languages=c,c++ CFLAGS='-g0 -Os' CXXFLAGS='-g0 -Os' LDFLAGS=-s --disable-nls --with-debug-prefix-map=/home/nsz/w/mcm/build-s390x-linux-musl= --enable-languages=c,c++ --disable-libquadmath --disable-libquadmath-support --disable-decimal-float --disable-multilib --disable-libcilkrts --disable-libvtv --disable-libgomp --disable-libitm --disable-werror --target=s390x-linux-musl --prefix= --libdir=/lib --disable-multilib --with-sysroot=/s390x-linux-musl --enable-tls --disable-libmudflap --disable-libsanitizer --disable-gnu-indirect-function --disable-libmpx --enable-libstdcxx-time --with-build-sysroot=/home/nsz/w/mcm/build-s390x-linux-musl/obj_sysroot
>>> >> > Thread model: posix
>>> >> > gcc version 6.3.0 (GCC)
>>> >> > $ cat a.c
>>> >> > double f(double x)
>>> >> > {
>>> >> >         return __builtin_ceil(x);
>>> >> > }
>>> >> > $ s390x-linux-musl-gcc -O3 -fno-math-errno -S a.c -o -
>>> >> >         .machinemode zarch
>>> >> >         .machine "z900"
>>> >>
>>> >> Note the default architecture is z900 from 2005-2006.  The FP
>>> >> instructions were added with the z196 processors in 2010.
>>> >
>>> > In that case the patch should probably have the code inside something
>>> > like:
>>> >
>>> > #ifdef __Z196__ // or whatever the predef macro for the ISA level is
>>> > // your code here
>>> > #else
>>> > #include "../foo.c"
>>> > #else
>>> >
>>> > See src/math/arm/sqrt.c for a similar example.
>>> >
>>> >> s390x-linux-musl probably should default to a much newer processor
>>> >> level, such as at least z196 or zEC12
>>> >
>>> > musl's policy is to just follow whatever ISA level the compiler is
>>> > configured for; you can set this at musl build time with CFLAGS or use
>>> > a default built into the toolchain at toolchain build time
>>> > (--with-arch, I think).
>>>
>>> Musl already defaults to the later ISA in the rest of the s390x port.
>>
>> would it be hard to support all s390x isa levels?
>
> It's a waste of effort and will hurt performance on newer processors.
>
> No user of Musl and Alpine is going to -- or even /can/ -- run it on
> older processors.  All of the Docker containers and underlying Linux
> distributions don't support the older processors.

When I worked with Bobby Bingham to create the s390x port of Musl, I
said that he could assume newer processors.  Also, I don't believe
that LLVM supports the earlier processors.  I believe that he assumed
some more recent instructions in other parts of the code.

Thanks, David


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-10 21:48                 ` David Edelsohn
@ 2017-06-11  2:20                   ` Rich Felker
  2017-06-11 10:19                     ` Szabolcs Nagy
  0 siblings, 1 reply; 34+ messages in thread
From: Rich Felker @ 2017-06-11  2:20 UTC (permalink / raw)
  To: musl

On Sat, Jun 10, 2017 at 05:48:05PM -0400, David Edelsohn wrote:
> On Sat, Jun 10, 2017 at 5:44 PM, David Edelsohn <dje.gcc@gmail.com> wrote:
> > On Sat, Jun 10, 2017 at 5:28 PM, Szabolcs Nagy <nsz@port70.net> wrote:
> >> * David Edelsohn <dje.gcc@gmail.com> [2017-06-10 16:22:24 -0400]:
> >>> On Sat, Jun 10, 2017 at 3:48 PM, Rich Felker <dalias@libc.org> wrote:
> >>> > On Sat, Jun 10, 2017 at 02:53:14PM -0400, David Edelsohn wrote:
> >>> >> On Sat, Jun 10, 2017 at 2:29 PM, Szabolcs Nagy <nsz@port70.net> wrote:
> >>> >> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-10 13:25:00 -0400]:
> >>> >> >> On Sat, Jun 10, 2017 at 11:36 AM, Szabolcs Nagy <nsz@port70.net> wrote:
> >>> >> >> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-09 10:51:25 -0400]:
> >>> >> >> >> The following patch is a start at single instruction math functions
> >>> >> >> >> for s390x architecture to increase performance.
> >>> >> >> >
> >>> >> >> > looks good, i wonder why gcc does not have builtins support for
> >>> >> >> > ceil, floor, nearbyint, round and trunc
> >>> >> >> >
> >>> >> >> > (on aarch64 the builtins expand to single instruction with
> >>> >> >> > -fno-math-errno, but on s390x they remain libc calls
> >>> >> >>
> >>> >> >> Both the functions and builtins are converted to single instructions
> >>> >> >> for me.  What architecture level is your GCC assuming?
> >>> >> >>
> >>> >> >
> >>> >> > i think it's the default s390x config
> >>> >> >
> >>> >> > $ s390x-linux-musl-gcc -v
> >>> >> > Using built-in specs.
> >>> >> > COLLECT_GCC=s390x-linux-musl-gcc
> >>> >> > COLLECT_LTO_WRAPPER=/home/nsz/w/mcm/output/bin/../libexec/gcc/s390x-linux-musl/6.3.0/lto-wrapper
> >>> >> > Target: s390x-linux-musl
> >>> >> > Configured with: ../src_toolchain/configure --enable-languages=c,c++ CFLAGS='-g0 -Os' CXXFLAGS='-g0 -Os' LDFLAGS=-s --disable-nls --with-debug-prefix-map=/home/nsz/w/mcm/build-s390x-linux-musl= --enable-languages=c,c++ --disable-libquadmath --disable-libquadmath-support --disable-decimal-float --disable-multilib --disable-libcilkrts --disable-libvtv --disable-libgomp --disable-libitm --disable-werror --target=s390x-linux-musl --prefix= --libdir=/lib --disable-multilib --with-sysroot=/s390x-linux-musl --enable-tls --disable-libmudflap --disable-libsanitizer --disable-gnu-indirect-function --disable-libmpx --enable-libstdcxx-time --with-build-sysroot=/home/nsz/w/mcm/build-s390x-linux-musl/obj_sysroot
> >>> >> > Thread model: posix
> >>> >> > gcc version 6.3.0 (GCC)
> >>> >> > $ cat a.c
> >>> >> > double f(double x)
> >>> >> > {
> >>> >> >         return __builtin_ceil(x);
> >>> >> > }
> >>> >> > $ s390x-linux-musl-gcc -O3 -fno-math-errno -S a.c -o -
> >>> >> >         .machinemode zarch
> >>> >> >         .machine "z900"
> >>> >>
> >>> >> Note the default architecture is z900 from 2005-2006.  The FP
> >>> >> instructions were added with the z196 processors in 2010.
> >>> >
> >>> > In that case the patch should probably have the code inside something
> >>> > like:
> >>> >
> >>> > #ifdef __Z196__ // or whatever the predef macro for the ISA level is
> >>> > // your code here
> >>> > #else
> >>> > #include "../foo.c"
> >>> > #else
> >>> >
> >>> > See src/math/arm/sqrt.c for a similar example.
> >>> >
> >>> >> s390x-linux-musl probably should default to a much newer processor
> >>> >> level, such as at least z196 or zEC12
> >>> >
> >>> > musl's policy is to just follow whatever ISA level the compiler is
> >>> > configured for; you can set this at musl build time with CFLAGS or use
> >>> > a default built into the toolchain at toolchain build time
> >>> > (--with-arch, I think).
> >>>
> >>> Musl already defaults to the later ISA in the rest of the s390x port.
> >>
> >> would it be hard to support all s390x isa levels?
> >
> > It's a waste of effort and will hurt performance on newer processors.

I don't think this is accurate. There is no asm in any
performance-relevant code paths now. If there were, it could be
conditionally built for the compiler's ISA level (-march) based on
predefined macros.

> > No user of Musl and Alpine is going to -- or even /can/ -- run it on
> > older processors.  All of the Docker containers and underlying Linux
> > distributions don't support the older processors.
> 
> When I worked with Bobby Bingham to create the s390x port of Musl, I
> said that he could assume newer processors.  Also, I don't believe
> that LLVM supports the earlier processors.  I believe that he assumed
> some more recent instructions in other parts of the code.

That seems doubtful; the amount of asm in musl is minimal and unlikely
to benefit from later ISA levels; all the instructions I see look like
very basic stuff that would always have been available.

Now, what likely is accurate is your claim that nobody is using musl
on lower ISA levels, so maybe it doesn't matter.

Rich


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-11  2:20                   ` Rich Felker
@ 2017-06-11 10:19                     ` Szabolcs Nagy
  2017-06-11 15:04                       ` Rich Felker
  2017-06-12  2:46                       ` David Edelsohn
  0 siblings, 2 replies; 34+ messages in thread
From: Szabolcs Nagy @ 2017-06-11 10:19 UTC (permalink / raw)
  To: musl

* Rich Felker <dalias@libc.org> [2017-06-10 22:20:44 -0400]:
> On Sat, Jun 10, 2017 at 05:48:05PM -0400, David Edelsohn wrote:
> > When I worked with Bobby Bingham to create the s390x port of Musl, I
> > said that he could assume newer processors.  Also, I don't believe
> > that LLVM supports the earlier processors.  I believe that he assumed
> > some more recent instructions in other parts of the code.
> 
> That seems doubtful; the amount of asm in musl is minimal and unlikely
> to benefit from later ISA levels; all the instructions I see look like
> very basic stuff that would always have been available.
> 
> Now, what likely is accurate is your claim that nobody is using musl
> on lower ISA levels, so maybe it doesn't matter.

well i am using s390x musl with lower isa level for compile tests

and it seems gas rejects unrecognized opcodes so the new inline
asm does not compile for me.

i think either musl configure should make sure the cc targets
the right isa level or the code should handle it with ifdefs


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-11 10:19                     ` Szabolcs Nagy
@ 2017-06-11 15:04                       ` Rich Felker
  2017-06-11 16:45                         ` Szabolcs Nagy
  2017-06-12  2:46                       ` David Edelsohn
  1 sibling, 1 reply; 34+ messages in thread
From: Rich Felker @ 2017-06-11 15:04 UTC (permalink / raw)
  To: musl

On Sun, Jun 11, 2017 at 12:19:55PM +0200, Szabolcs Nagy wrote:
> * Rich Felker <dalias@libc.org> [2017-06-10 22:20:44 -0400]:
> > On Sat, Jun 10, 2017 at 05:48:05PM -0400, David Edelsohn wrote:
> > > When I worked with Bobby Bingham to create the s390x port of Musl, I
> > > said that he could assume newer processors.  Also, I don't believe
> > > that LLVM supports the earlier processors.  I believe that he assumed
> > > some more recent instructions in other parts of the code.
> > 
> > That seems doubtful; the amount of asm in musl is minimal and unlikely
> > to benefit from later ISA levels; all the instructions I see look like
> > very basic stuff that would always have been available.
> > 
> > Now, what likely is accurate is your claim that nobody is using musl
> > on lower ISA levels, so maybe it doesn't matter.
> 
> well i am using s390x musl with lower isa level for compile tests
> 
> and it seems gas rejects unrecognized opcodes so the new inline
> asm does not compile for me.
> 
> i think either musl configure should make sure the cc targets
> the right isa level or the code should handle it with ifdefs

Do you know the right predefined macros to check the ISA level? David,
any objection to including the #if/#ifdef since at least someone seems
to want ability to build for minimum ISA level?

Rich


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-11 15:04                       ` Rich Felker
@ 2017-06-11 16:45                         ` Szabolcs Nagy
  2017-06-11 21:45                           ` Rich Felker
  0 siblings, 1 reply; 34+ messages in thread
From: Szabolcs Nagy @ 2017-06-11 16:45 UTC (permalink / raw)
  To: musl

* Rich Felker <dalias@libc.org> [2017-06-11 11:04:38 -0400]:
> On Sun, Jun 11, 2017 at 12:19:55PM +0200, Szabolcs Nagy wrote:
> > * Rich Felker <dalias@libc.org> [2017-06-10 22:20:44 -0400]:
> > > On Sat, Jun 10, 2017 at 05:48:05PM -0400, David Edelsohn wrote:
> > > > When I worked with Bobby Bingham to create the s390x port of Musl, I
> > > > said that he could assume newer processors.  Also, I don't believe
> > > > that LLVM supports the earlier processors.  I believe that he assumed
> > > > some more recent instructions in other parts of the code.
> > > 
> > > That seems doubtful; the amount of asm in musl is minimal and unlikely
> > > to benefit from later ISA levels; all the instructions I see look like
> > > very basic stuff that would always have been available.
> > > 
> > > Now, what likely is accurate is your claim that nobody is using musl
> > > on lower ISA levels, so maybe it doesn't matter.
> > 
> > well i am using s390x musl with lower isa level for compile tests
> > 
> > and it seems gas rejects unrecognized opcodes so the new inline
> > asm does not compile for me.
> > 
> > i think either musl configure should make sure the cc targets
> > the right isa level or the code should handle it with ifdefs
> 
> Do you know the right predefined macros to check the ISA level? David,
> any objection to including the #if/#ifdef since at least someone seems
> to want ability to build for minimum ISA level?
> 

i see no diff between

s390x-linux-musl-gcc -E -dM -

and

s390x-linux-musl-gcc -E -dM -march=z196 -

so it's not possible to tell at compile time, but the asm
is different so configure can detect it:

$ s390x-linux-musl-gcc -S -xc /dev/null -o -
	.machinemode zarch
	.machine "z900"
	.ident	"GCC: (GNU) 6.3.0"
	.section	.note.GNU-stack,"",@progbits
$ s390x-linux-musl-gcc -S -xc /dev/null -o - -march=z196
	.machinemode zarch
	.machine "z196"
	.ident	"GCC: (GNU) 6.3.0"
	.section	.note.GNU-stack,"",@progbits


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-11 16:45                         ` Szabolcs Nagy
@ 2017-06-11 21:45                           ` Rich Felker
  0 siblings, 0 replies; 34+ messages in thread
From: Rich Felker @ 2017-06-11 21:45 UTC (permalink / raw)
  To: musl

On Sun, Jun 11, 2017 at 06:45:00PM +0200, Szabolcs Nagy wrote:
> * Rich Felker <dalias@libc.org> [2017-06-11 11:04:38 -0400]:
> > On Sun, Jun 11, 2017 at 12:19:55PM +0200, Szabolcs Nagy wrote:
> > > * Rich Felker <dalias@libc.org> [2017-06-10 22:20:44 -0400]:
> > > > On Sat, Jun 10, 2017 at 05:48:05PM -0400, David Edelsohn wrote:
> > > > > When I worked with Bobby Bingham to create the s390x port of Musl, I
> > > > > said that he could assume newer processors.  Also, I don't believe
> > > > > that LLVM supports the earlier processors.  I believe that he assumed
> > > > > some more recent instructions in other parts of the code.
> > > > 
> > > > That seems doubtful; the amount of asm in musl is minimal and unlikely
> > > > to benefit from later ISA levels; all the instructions I see look like
> > > > very basic stuff that would always have been available.
> > > > 
> > > > Now, what likely is accurate is your claim that nobody is using musl
> > > > on lower ISA levels, so maybe it doesn't matter.
> > > 
> > > well i am using s390x musl with lower isa level for compile tests
> > > 
> > > and it seems gas rejects unrecognized opcodes so the new inline
> > > asm does not compile for me.
> > > 
> > > i think either musl configure should make sure the cc targets
> > > the right isa level or the code should handle it with ifdefs
> > 
> > Do you know the right predefined macros to check the ISA level? David,
> > any objection to including the #if/#ifdef since at least someone seems
> > to want ability to build for minimum ISA level?
> > 
> 
> i see no diff between
> 
> s390x-linux-musl-gcc -E -dM -
> 
> and
> 
> s390x-linux-musl-gcc -E -dM -march=z196 -
> 
> so it's not possible to tell at compile time, but the asm
> is different so configure can detect it:
> 
> $ s390x-linux-musl-gcc -S -xc /dev/null -o -
> 	.machinemode zarch
> 	.machine "z900"
> 	.ident	"GCC: (GNU) 6.3.0"
> 	.section	.note.GNU-stack,"",@progbits
> $ s390x-linux-musl-gcc -S -xc /dev/null -o - -march=z196
> 	.machinemode zarch
> 	.machine "z196"
> 	.ident	"GCC: (GNU) 6.3.0"
> 	.section	.note.GNU-stack,"",@progbits

That's... bleh. This should probably be fixed on the gcc side...

Rich


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-11 10:19                     ` Szabolcs Nagy
  2017-06-11 15:04                       ` Rich Felker
@ 2017-06-12  2:46                       ` David Edelsohn
  2017-06-12  4:36                         ` Tuan M. Hoang
  2017-06-12  9:03                         ` Szabolcs Nagy
  1 sibling, 2 replies; 34+ messages in thread
From: David Edelsohn @ 2017-06-12  2:46 UTC (permalink / raw)
  To: musl

On Sun, Jun 11, 2017 at 6:19 AM, Szabolcs Nagy <nsz@port70.net> wrote:
> * Rich Felker <dalias@libc.org> [2017-06-10 22:20:44 -0400]:
>> On Sat, Jun 10, 2017 at 05:48:05PM -0400, David Edelsohn wrote:
>> > When I worked with Bobby Bingham to create the s390x port of Musl, I
>> > said that he could assume newer processors.  Also, I don't believe
>> > that LLVM supports the earlier processors.  I believe that he assumed
>> > some more recent instructions in other parts of the code.
>>
>> That seems doubtful; the amount of asm in musl is minimal and unlikely
>> to benefit from later ISA levels; all the instructions I see look like
>> very basic stuff that would always have been available.
>>
>> Now, what likely is accurate is your claim that nobody is using musl
>> on lower ISA levels, so maybe it doesn't matter.
>
> well i am using s390x musl with lower isa level for compile tests
>
> and it seems gas rejects unrecognized opcodes so the new inline
> asm does not compile for me.
>
> i think either musl configure should make sure the cc targets
> the right isa level or the code should handle it with ifdefs

I have asked the IBM toolchain team how to distinguish the
architecture level at compile time, or at least distinguish support
for the FP rounding instruction.

Are you actually running s390x musl on a system earlier than z196
architecture level?  The public IBM LinuxONE Cloud is running on a z13
system, many generations newer than even z196.

The following IBM table of supported and tested systems

https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html

shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
requires at least zEC12.

I can't find any official hardware requirements description for Alpine
Linux. I tend to doubt that user would run it on older hardware,
especially hardware no longer supported by other, modern Linux
distributions.

Building musl libc on older hardware is a nice accomplishment, but
investing effort and complexity to maintain support probably isn't
useful to any musl libc user and probably isn't a productive use of
developer resources.

I will continue to inquire if there is a simple technique to accomplish this.

Thanks, David

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-12  2:46                       ` David Edelsohn
@ 2017-06-12  4:36                         ` Tuan M. Hoang
  2017-06-12  9:03                         ` Szabolcs Nagy
  1 sibling, 0 replies; 34+ messages in thread
From: Tuan M. Hoang @ 2017-06-12  4:36 UTC (permalink / raw)
  To: musl

On 06/11/2017 10:46 PM, David Edelsohn wrote:
> 
> I can't find any official hardware requirements description for Alpine
> Linux. I tend to doubt that user would run it on older hardware,
> especially hardware no longer supported by other, modern Linux
> distributions.
> 

Indeed Alpine Linux also supports from z196 and later machines :

https://git.alpinelinux.org/cgit/aports/tree/main/gcc/APKBUILD#n253

https://git.alpinelinux.org/cgit/aports/tree/main/linux-vanilla/config-vanilla.s390x#n390

Tuan,




^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-12  2:46                       ` David Edelsohn
  2017-06-12  4:36                         ` Tuan M. Hoang
@ 2017-06-12  9:03                         ` Szabolcs Nagy
  2017-06-12 13:28                           ` David Edelsohn
  1 sibling, 1 reply; 34+ messages in thread
From: Szabolcs Nagy @ 2017-06-12  9:03 UTC (permalink / raw)
  To: musl

* David Edelsohn <dje.gcc@gmail.com> [2017-06-11 22:46:09 -0400]:

> On Sun, Jun 11, 2017 at 6:19 AM, Szabolcs Nagy <nsz@port70.net> wrote:
> > * Rich Felker <dalias@libc.org> [2017-06-10 22:20:44 -0400]:
> >> On Sat, Jun 10, 2017 at 05:48:05PM -0400, David Edelsohn wrote:
> >> > When I worked with Bobby Bingham to create the s390x port of Musl, I
> >> > said that he could assume newer processors.  Also, I don't believe
> >> > that LLVM supports the earlier processors.  I believe that he assumed
> >> > some more recent instructions in other parts of the code.
> >>
> >> That seems doubtful; the amount of asm in musl is minimal and unlikely
> >> to benefit from later ISA levels; all the instructions I see look like
> >> very basic stuff that would always have been available.
> >>
> >> Now, what likely is accurate is your claim that nobody is using musl
> >> on lower ISA levels, so maybe it doesn't matter.
> >
> > well i am using s390x musl with lower isa level for compile tests
> >
> > and it seems gas rejects unrecognized opcodes so the new inline
> > asm does not compile for me.
> >
> > i think either musl configure should make sure the cc targets
> > the right isa level or the code should handle it with ifdefs
> 
> I have asked the IBM toolchain team how to distinguish the
> architecture level at compile time, or at least distinguish support
> for the FP rounding instruction.
> 
> Are you actually running s390x musl on a system earlier than z196
> architecture level?  The public IBM LinuxONE Cloud is running on a z13
> system, many generations newer than even z196.
> 

no, i only have a cross compiler, but since the toolchain build script
uses default settings i get z900 arch, if that's not good then gcc
config default should be fixed for musl.

> The following IBM table of supported and tested systems
> 
> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
> 
> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
> requires at least zEC12.
> 
> I can't find any official hardware requirements description for Alpine
> Linux. I tend to doubt that user would run it on older hardware,
> especially hardware no longer supported by other, modern Linux
> distributions.
> 
> Building musl libc on older hardware is a nice accomplishment, but
> investing effort and complexity to maintain support probably isn't
> useful to any musl libc user and probably isn't a productive use of
> developer resources.
> 
> I will continue to inquire if there is a simple technique to accomplish this.
> 
> Thanks, David


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-12  9:03                         ` Szabolcs Nagy
@ 2017-06-12 13:28                           ` David Edelsohn
  2017-06-12 13:54                             ` David Edelsohn
  2017-06-14 23:44                             ` Rich Felker
  0 siblings, 2 replies; 34+ messages in thread
From: David Edelsohn @ 2017-06-12 13:28 UTC (permalink / raw)
  To: musl

On Mon, Jun 12, 2017 at 5:03 AM, Szabolcs Nagy <nsz@port70.net> wrote:
> * David Edelsohn <dje.gcc@gmail.com> [2017-06-11 22:46:09 -0400]:
>
>> On Sun, Jun 11, 2017 at 6:19 AM, Szabolcs Nagy <nsz@port70.net> wrote:
>> > * Rich Felker <dalias@libc.org> [2017-06-10 22:20:44 -0400]:
>> >> On Sat, Jun 10, 2017 at 05:48:05PM -0400, David Edelsohn wrote:
>> >> > When I worked with Bobby Bingham to create the s390x port of Musl, I
>> >> > said that he could assume newer processors.  Also, I don't believe
>> >> > that LLVM supports the earlier processors.  I believe that he assumed
>> >> > some more recent instructions in other parts of the code.
>> >>
>> >> That seems doubtful; the amount of asm in musl is minimal and unlikely
>> >> to benefit from later ISA levels; all the instructions I see look like
>> >> very basic stuff that would always have been available.
>> >>
>> >> Now, what likely is accurate is your claim that nobody is using musl
>> >> on lower ISA levels, so maybe it doesn't matter.
>> >
>> > well i am using s390x musl with lower isa level for compile tests
>> >
>> > and it seems gas rejects unrecognized opcodes so the new inline
>> > asm does not compile for me.
>> >
>> > i think either musl configure should make sure the cc targets
>> > the right isa level or the code should handle it with ifdefs
>>
>> I have asked the IBM toolchain team how to distinguish the
>> architecture level at compile time, or at least distinguish support
>> for the FP rounding instruction.
>>
>> Are you actually running s390x musl on a system earlier than z196
>> architecture level?  The public IBM LinuxONE Cloud is running on a z13
>> system, many generations newer than even z196.
>>
>
> no, i only have a cross compiler, but since the toolchain build script
> uses default settings i get z900 arch, if that's not good then gcc
> config default should be fixed for musl.
>
>> The following IBM table of supported and tested systems
>>
>> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
>>
>> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
>> requires at least zEC12.
>>
>> I can't find any official hardware requirements description for Alpine
>> Linux. I tend to doubt that user would run it on older hardware,
>> especially hardware no longer supported by other, modern Linux
>> distributions.
>>
>> Building musl libc on older hardware is a nice accomplishment, but
>> investing effort and complexity to maintain support probably isn't
>> useful to any musl libc user and probably isn't a productive use of
>> developer resources.
>>
>> I will continue to inquire if there is a simple technique to accomplish this.

Apparently GCC 7.1 added architecture macros.

As Tuan referenced, Alpine Linux also requires z196 as the minimum
architecture level.  I believe that it would be better for s390-musl
to default to z196 ISA than musl to require GCC 7.1.

Thanks, David


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-12 13:28                           ` David Edelsohn
@ 2017-06-12 13:54                             ` David Edelsohn
  2017-06-12 20:28                               ` Szabolcs Nagy
  2017-06-14 23:44                             ` Rich Felker
  1 sibling, 1 reply; 34+ messages in thread
From: David Edelsohn @ 2017-06-12 13:54 UTC (permalink / raw)
  To: musl

On Mon, Jun 12, 2017 at 9:28 AM, David Edelsohn <dje.gcc@gmail.com> wrote:
> On Mon, Jun 12, 2017 at 5:03 AM, Szabolcs Nagy <nsz@port70.net> wrote:
>> * David Edelsohn <dje.gcc@gmail.com> [2017-06-11 22:46:09 -0400]:
>>
>>> On Sun, Jun 11, 2017 at 6:19 AM, Szabolcs Nagy <nsz@port70.net> wrote:
>>> > * Rich Felker <dalias@libc.org> [2017-06-10 22:20:44 -0400]:
>>> >> On Sat, Jun 10, 2017 at 05:48:05PM -0400, David Edelsohn wrote:
>>> >> > When I worked with Bobby Bingham to create the s390x port of Musl, I
>>> >> > said that he could assume newer processors.  Also, I don't believe
>>> >> > that LLVM supports the earlier processors.  I believe that he assumed
>>> >> > some more recent instructions in other parts of the code.
>>> >>
>>> >> That seems doubtful; the amount of asm in musl is minimal and unlikely
>>> >> to benefit from later ISA levels; all the instructions I see look like
>>> >> very basic stuff that would always have been available.
>>> >>
>>> >> Now, what likely is accurate is your claim that nobody is using musl
>>> >> on lower ISA levels, so maybe it doesn't matter.
>>> >
>>> > well i am using s390x musl with lower isa level for compile tests
>>> >
>>> > and it seems gas rejects unrecognized opcodes so the new inline
>>> > asm does not compile for me.
>>> >
>>> > i think either musl configure should make sure the cc targets
>>> > the right isa level or the code should handle it with ifdefs
>>>
>>> I have asked the IBM toolchain team how to distinguish the
>>> architecture level at compile time, or at least distinguish support
>>> for the FP rounding instruction.
>>>
>>> Are you actually running s390x musl on a system earlier than z196
>>> architecture level?  The public IBM LinuxONE Cloud is running on a z13
>>> system, many generations newer than even z196.
>>>
>>
>> no, i only have a cross compiler, but since the toolchain build script
>> uses default settings i get z900 arch, if that's not good then gcc
>> config default should be fixed for musl.
>>
>>> The following IBM table of supported and tested systems
>>>
>>> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
>>>
>>> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
>>> requires at least zEC12.
>>>
>>> I can't find any official hardware requirements description for Alpine
>>> Linux. I tend to doubt that user would run it on older hardware,
>>> especially hardware no longer supported by other, modern Linux
>>> distributions.
>>>
>>> Building musl libc on older hardware is a nice accomplishment, but
>>> investing effort and complexity to maintain support probably isn't
>>> useful to any musl libc user and probably isn't a productive use of
>>> developer resources.
>>>
>>> I will continue to inquire if there is a simple technique to accomplish this.
>
> Apparently GCC 7.1 added architecture macros.
>
> As Tuan referenced, Alpine Linux also requires z196 as the minimum
> architecture level.  I believe that it would be better for s390-musl
> to default to z196 ISA than musl to require GCC 7.1.

Would a patch such as the following be acceptable?

Thanks, David

diff --git a/configure b/configure
index c2db298..a9e0256 100755
--- a/configure
+++ b/configure
@@ -494,6 +494,15 @@ fnmatch '-mtune=*|*\ -mtune=*' "$CC $CFLAGS" || tryldflag C
 fi

 #
+# On s390x, default to z196 architecture and zEC12 tuning to support newer math
+# instructions.
+#
+if test "$ARCH" = "s390x" ; then
+fnmatch '-march=*|*\ -march=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO
-march=z196
+fnmatch '-mtune=*|*\ -mtune=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO
-mtune=zEC12
+fi
+
+#
 # Even with -std=c99, gcc accepts some constructs which are constraint
 # violations. We want to treat these as errors regardless of whether
 # other purely stylistic warnings are enabled -- especially implicit


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-12 13:54                             ` David Edelsohn
@ 2017-06-12 20:28                               ` Szabolcs Nagy
  2017-06-12 21:02                                 ` David Edelsohn
  0 siblings, 1 reply; 34+ messages in thread
From: Szabolcs Nagy @ 2017-06-12 20:28 UTC (permalink / raw)
  To: musl

* David Edelsohn <dje.gcc@gmail.com> [2017-06-12 09:54:54 -0400]:
> On Mon, Jun 12, 2017 at 9:28 AM, David Edelsohn <dje.gcc@gmail.com> wrote:
> > On Mon, Jun 12, 2017 at 5:03 AM, Szabolcs Nagy <nsz@port70.net> wrote:
> >> * David Edelsohn <dje.gcc@gmail.com> [2017-06-11 22:46:09 -0400]:
> >>> The following IBM table of supported and tested systems
> >>>
> >>> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
> >>>
> >>> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
> >>> requires at least zEC12.
> >>>
> >>> I can't find any official hardware requirements description for Alpine
> >>> Linux. I tend to doubt that user would run it on older hardware,
> >>> especially hardware no longer supported by other, modern Linux
> >>> distributions.
> >>>
> >>> Building musl libc on older hardware is a nice accomplishment, but
> >>> investing effort and complexity to maintain support probably isn't
> >>> useful to any musl libc user and probably isn't a productive use of
> >>> developer resources.
> >>>
> >>> I will continue to inquire if there is a simple technique to accomplish this.
> >
> > Apparently GCC 7.1 added architecture macros.
> >
> > As Tuan referenced, Alpine Linux also requires z196 as the minimum
> > architecture level.  I believe that it would be better for s390-musl
> > to default to z196 ISA than musl to require GCC 7.1.
> 
> Would a patch such as the following be acceptable?
> 
> Thanks, David
> 
> diff --git a/configure b/configure
> index c2db298..a9e0256 100755
> --- a/configure
> +++ b/configure
> @@ -494,6 +494,15 @@ fnmatch '-mtune=*|*\ -mtune=*' "$CC $CFLAGS" || tryldflag C
>  fi
> 
>  #
> +# On s390x, default to z196 architecture and zEC12 tuning to support newer math
> +# instructions.
> +#
> +if test "$ARCH" = "s390x" ; then
> +fnmatch '-march=*|*\ -march=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO
> -march=z196
> +fnmatch '-mtune=*|*\ -mtune=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO
> -mtune=zEC12
> +fi
> +
> +#
>  # Even with -std=c99, gcc accepts some constructs which are constraint
>  # violations. We want to treat these as errors regardless of whether
>  # other purely stylistic warnings are enabled -- especially implicit

well the toolchain may be configured for a different/newer cpu
and then we may not want to override that.. what about

diff --git a/configure b/configure
index c2db298c..bcaf3a7d 100755
--- a/configure
+++ b/configure
@@ -656,6 +656,12 @@ trycppif __LITTLE_ENDIAN__ "$t" && SUBARCH=${SUBARCH}le
 trycppif _SOFT_FLOAT "$t" && fail "$0: error: soft-float not supported on powerpc64"
 fi
 
+if test "$ARCH" = "s390x" ; then
+echo 'float x; void f(){__asm__("fiebra %0,6,%1,4":"=f"(x):"f"(x));}' > "$tmpc"
+$CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS -c -o /dev/null "$tmpc" >/dev/null 2>&1 ||
+  fail "$0: error: s390x isa level is too low, use at least -march=z196"
+fi
+
 if test "$ARCH" = "sh" ; then
 tryflag CFLAGS_AUTO -Wa,--isa=any
 trycppif __BIG_ENDIAN__ "$t" && SUBARCH=${SUBARCH}eb



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-12 20:28                               ` Szabolcs Nagy
@ 2017-06-12 21:02                                 ` David Edelsohn
  2017-06-13 15:55                                   ` Szabolcs Nagy
  0 siblings, 1 reply; 34+ messages in thread
From: David Edelsohn @ 2017-06-12 21:02 UTC (permalink / raw)
  To: musl

On Mon, Jun 12, 2017 at 4:28 PM, Szabolcs Nagy <nsz@port70.net> wrote:
> * David Edelsohn <dje.gcc@gmail.com> [2017-06-12 09:54:54 -0400]:
>> On Mon, Jun 12, 2017 at 9:28 AM, David Edelsohn <dje.gcc@gmail.com> wrote:
>> > On Mon, Jun 12, 2017 at 5:03 AM, Szabolcs Nagy <nsz@port70.net> wrote:
>> >> * David Edelsohn <dje.gcc@gmail.com> [2017-06-11 22:46:09 -0400]:
>> >>> The following IBM table of supported and tested systems
>> >>>
>> >>> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
>> >>>
>> >>> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
>> >>> requires at least zEC12.
>> >>>
>> >>> I can't find any official hardware requirements description for Alpine
>> >>> Linux. I tend to doubt that user would run it on older hardware,
>> >>> especially hardware no longer supported by other, modern Linux
>> >>> distributions.
>> >>>
>> >>> Building musl libc on older hardware is a nice accomplishment, but
>> >>> investing effort and complexity to maintain support probably isn't
>> >>> useful to any musl libc user and probably isn't a productive use of
>> >>> developer resources.
>> >>>
>> >>> I will continue to inquire if there is a simple technique to accomplish this.
>> >
>> > Apparently GCC 7.1 added architecture macros.
>> >
>> > As Tuan referenced, Alpine Linux also requires z196 as the minimum
>> > architecture level.  I believe that it would be better for s390-musl
>> > to default to z196 ISA than musl to require GCC 7.1.
>>
>> Would a patch such as the following be acceptable?
>>
>> Thanks, David
>>
>> diff --git a/configure b/configure
>> index c2db298..a9e0256 100755
>> --- a/configure
>> +++ b/configure
>> @@ -494,6 +494,15 @@ fnmatch '-mtune=*|*\ -mtune=*' "$CC $CFLAGS" || tryldflag C
>>  fi
>>
>>  #
>> +# On s390x, default to z196 architecture and zEC12 tuning to support newer math
>> +# instructions.
>> +#
>> +if test "$ARCH" = "s390x" ; then
>> +fnmatch '-march=*|*\ -march=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO
>> -march=z196
>> +fnmatch '-mtune=*|*\ -mtune=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO
>> -mtune=zEC12
>> +fi
>> +
>> +#
>>  # Even with -std=c99, gcc accepts some constructs which are constraint
>>  # violations. We want to treat these as errors regardless of whether
>>  # other purely stylistic warnings are enabled -- especially implicit
>
> well the toolchain may be configured for a different/newer cpu
> and then we may not want to override that.. what about
>
> diff --git a/configure b/configure
> index c2db298c..bcaf3a7d 100755
> --- a/configure
> +++ b/configure
> @@ -656,6 +656,12 @@ trycppif __LITTLE_ENDIAN__ "$t" && SUBARCH=${SUBARCH}le
>  trycppif _SOFT_FLOAT "$t" && fail "$0: error: soft-float not supported on powerpc64"
>  fi
>
> +if test "$ARCH" = "s390x" ; then
> +echo 'float x; void f(){__asm__("fiebra %0,6,%1,4":"=f"(x):"f"(x));}' > "$tmpc"
> +$CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS -c -o /dev/null "$tmpc" >/dev/null 2>&1 ||
> +  fail "$0: error: s390x isa level is too low, use at least -march=z196"
> +fi
> +
>  if test "$ARCH" = "sh" ; then
>  tryflag CFLAGS_AUTO -Wa,--isa=any
>  trycppif __BIG_ENDIAN__ "$t" && SUBARCH=${SUBARCH}eb

Why is the x86 configure logic that I used as a template correct?

Thanks, David


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-12 21:02                                 ` David Edelsohn
@ 2017-06-13 15:55                                   ` Szabolcs Nagy
  2017-06-14 23:34                                     ` Rich Felker
  0 siblings, 1 reply; 34+ messages in thread
From: Szabolcs Nagy @ 2017-06-13 15:55 UTC (permalink / raw)
  To: musl

* David Edelsohn <dje.gcc@gmail.com> [2017-06-12 17:02:27 -0400]:
> On Mon, Jun 12, 2017 at 4:28 PM, Szabolcs Nagy <nsz@port70.net> wrote:
> > * David Edelsohn <dje.gcc@gmail.com> [2017-06-12 09:54:54 -0400]:
> >> On Mon, Jun 12, 2017 at 9:28 AM, David Edelsohn <dje.gcc@gmail.com> wrote:
> >> > On Mon, Jun 12, 2017 at 5:03 AM, Szabolcs Nagy <nsz@port70.net> wrote:
> >> >> * David Edelsohn <dje.gcc@gmail.com> [2017-06-11 22:46:09 -0400]:
> >> >>> The following IBM table of supported and tested systems
> >> >>>
> >> >>> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
> >> >>>
> >> >>> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
> >> >>> requires at least zEC12.
> >> >>>
> >> >>> I can't find any official hardware requirements description for Alpine
> >> >>> Linux. I tend to doubt that user would run it on older hardware,
> >> >>> especially hardware no longer supported by other, modern Linux
> >> >>> distributions.
> >> >>>
> >> >>> Building musl libc on older hardware is a nice accomplishment, but
> >> >>> investing effort and complexity to maintain support probably isn't
> >> >>> useful to any musl libc user and probably isn't a productive use of
> >> >>> developer resources.
> >> >>>
> >> >>> I will continue to inquire if there is a simple technique to accomplish this.
> >> >
> >> > Apparently GCC 7.1 added architecture macros.
> >> >
> >> > As Tuan referenced, Alpine Linux also requires z196 as the minimum
> >> > architecture level.  I believe that it would be better for s390-musl
> >> > to default to z196 ISA than musl to require GCC 7.1.
> >>
> >> Would a patch such as the following be acceptable?
> >>
> >> Thanks, David
> >>
> >> diff --git a/configure b/configure
> >> index c2db298..a9e0256 100755
> >> --- a/configure
> >> +++ b/configure
> >> @@ -494,6 +494,15 @@ fnmatch '-mtune=*|*\ -mtune=*' "$CC $CFLAGS" || tryldflag C
> >>  fi
> >>
> >>  #
> >> +# On s390x, default to z196 architecture and zEC12 tuning to support newer math
> >> +# instructions.
> >> +#
> >> +if test "$ARCH" = "s390x" ; then
> >> +fnmatch '-march=*|*\ -march=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO
> >> -march=z196
> >> +fnmatch '-mtune=*|*\ -mtune=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO
> >> -mtune=zEC12
> >> +fi
> >> +
> >> +#
> >>  # Even with -std=c99, gcc accepts some constructs which are constraint
> >>  # violations. We want to treat these as errors regardless of whether
> >>  # other purely stylistic warnings are enabled -- especially implicit
> >
> > well the toolchain may be configured for a different/newer cpu
> > and then we may not want to override that.. what about
> >
> > diff --git a/configure b/configure
> > index c2db298c..bcaf3a7d 100755
> > --- a/configure
> > +++ b/configure
> > @@ -656,6 +656,12 @@ trycppif __LITTLE_ENDIAN__ "$t" && SUBARCH=${SUBARCH}le
> >  trycppif _SOFT_FLOAT "$t" && fail "$0: error: soft-float not supported on powerpc64"
> >  fi
> >
> > +if test "$ARCH" = "s390x" ; then
> > +echo 'float x; void f(){__asm__("fiebra %0,6,%1,4":"=f"(x):"f"(x));}' > "$tmpc"
> > +$CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS -c -o /dev/null "$tmpc" >/dev/null 2>&1 ||
> > +  fail "$0: error: s390x isa level is too low, use at least -march=z196"
> > +fi
> > +
> >  if test "$ARCH" = "sh" ; then
> >  tryflag CFLAGS_AUTO -Wa,--isa=any
> >  trycppif __BIG_ENDIAN__ "$t" && SUBARCH=${SUBARCH}eb
> 
> Why is the x86 configure logic that I used as a template correct?
> 

hm i didnt know about that
then the patch is probably ok.

> Thanks, David


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-13 15:55                                   ` Szabolcs Nagy
@ 2017-06-14 23:34                                     ` Rich Felker
  2017-06-14 23:40                                       ` A. Wilcox
  0 siblings, 1 reply; 34+ messages in thread
From: Rich Felker @ 2017-06-14 23:34 UTC (permalink / raw)
  To: musl

On Tue, Jun 13, 2017 at 05:55:13PM +0200, Szabolcs Nagy wrote:
> > > diff --git a/configure b/configure
> > > index c2db298c..bcaf3a7d 100755
> > > --- a/configure
> > > +++ b/configure
> > > @@ -656,6 +656,12 @@ trycppif __LITTLE_ENDIAN__ "$t" && SUBARCH=${SUBARCH}le
> > >  trycppif _SOFT_FLOAT "$t" && fail "$0: error: soft-float not supported on powerpc64"
> > >  fi
> > >
> > > +if test "$ARCH" = "s390x" ; then
> > > +echo 'float x; void f(){__asm__("fiebra %0,6,%1,4":"=f"(x):"f"(x));}' > "$tmpc"
> > > +$CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS -c -o /dev/null "$tmpc" >/dev/null 2>&1 ||
> > > +  fail "$0: error: s390x isa level is too low, use at least -march=z196"
> > > +fi
> > > +
> > >  if test "$ARCH" = "sh" ; then
> > >  tryflag CFLAGS_AUTO -Wa,--isa=any
> > >  trycppif __BIG_ENDIAN__ "$t" && SUBARCH=${SUBARCH}eb
> > 
> > Why is the x86 configure logic that I used as a template correct?
> 
> hm i didnt know about that
> then the patch is probably ok.

The x86 logic is actually implementing policy to the opposite effect,
ignoring the toolchain's default -march and forcing the minimum
baseline isa (i486) unless the user manually puts a specific -march in
CFLAGS. I'm doubtful that it's actually a good idea to be doing that
any more; at least it's inconsistent with how other archs are treated
(using the toolchain's default).

Rich


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-14 23:34                                     ` Rich Felker
@ 2017-06-14 23:40                                       ` A. Wilcox
  0 siblings, 0 replies; 34+ messages in thread
From: A. Wilcox @ 2017-06-14 23:40 UTC (permalink / raw)
  To: musl

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256

On 14/06/17 18:34, Rich Felker wrote:
> The x86 logic is actually implementing policy to the opposite 
> effect, ignoring the toolchain's default -march and forcing the 
> minimum baseline isa (i486) unless the user manually puts a 
> specific -march in CFLAGS. I'm doubtful that it's actually a good 
> idea to be doing that any more; at least it's inconsistent with
> how other archs are treated (using the toolchain's default).
> 
> Rich
> 


Just as a note from the Adélie camp, we have two separate toolchains
and CHOST values, i486-foxkit-linux-musl for the "generic" 486 x86
ISA, and pentium3-foxkit-linux-musl for the "modern" SSE x86 ISA.

So I had no idea musl was still forcing i486 when being built without
a specified -march.  That's interesting.  And probably not necessary.

Best,
- --arw

- -- 
A. Wilcox (awilfox)
Project Lead, Adélie Linux
http://adelielinux.org
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2

iQIcBAEBCAAGBQJZQclfAAoJEMspy1GSK50U3fsQAKQIQG4nZkEzAvjM0mJMhgkU
RlxRxDvtER7wyvYFxKeQPzn3mMcmrokhHUvXVSTYp3XuWK/bjA0oJB0K1JBQmQnd
pZbZa68EEUaIJv1AmlTkgmcmjQpEo0DAtMUk1vHs6As/AVU/brJYCTPTsR5yCxaG
ctTvfN+imTUlDiw+nZF1LTUA+J9iNoiVYV4xwG1z5db+ucCkiWaruPno5Y+qVxKi
dDDTOOwV+W2LVq821Jh3yEDo/SKnQ51uf+Lm2t4hOQT/gsLlNjQKT1emAir/Di1G
mjpqsS3Lx8xWi9xNOYDSyZS8VCDXXmISLDTKqOQveljUjCCe4WNN725LyJFFRBJo
Wl5lJlJtk46s7KVX9MyRQRbdqCnvNc6mTPjoFKVbSIgtLZIpH1zDfbewoWDrTyO0
LfKC7TclWJSbWOB+x2ASu4B3CC+g1SqwOSNqjQ5laX1hs/fU9e2s4vIuyNsX3bbo
/0E77zt1gl011iS4/YP2UtdoDa9vgOk3IiLyofnLRGO1/PcVWEKhhLbB3JvRRxq4
RwRFnRL0VuQscTm07ikHPk60YjFAGkjxon/TyUlgPnvSYjmZ1xdJldw6SBzm3EyY
t0aj6IYBtebLgCSUqpSSv+4FUaWWvHUqcKqqZPnMSrvruFiSq87Hhrg/0DPYNXJb
xtx8p9p8a0X4rcvkHCsK
=RRK3
-----END PGP SIGNATURE-----


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-12 13:28                           ` David Edelsohn
  2017-06-12 13:54                             ` David Edelsohn
@ 2017-06-14 23:44                             ` Rich Felker
  2017-06-15 12:18                               ` David Edelsohn
  2017-06-18 17:12                               ` David Edelsohn
  1 sibling, 2 replies; 34+ messages in thread
From: Rich Felker @ 2017-06-14 23:44 UTC (permalink / raw)
  To: musl

On Mon, Jun 12, 2017 at 09:28:52AM -0400, David Edelsohn wrote:
> >> The following IBM table of supported and tested systems
> >>
> >> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
> >>
> >> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
> >> requires at least zEC12.
> >>
> >> I can't find any official hardware requirements description for Alpine
> >> Linux. I tend to doubt that user would run it on older hardware,
> >> especially hardware no longer supported by other, modern Linux
> >> distributions.
> >>
> >> Building musl libc on older hardware is a nice accomplishment, but
> >> investing effort and complexity to maintain support probably isn't
> >> useful to any musl libc user and probably isn't a productive use of
> >> developer resources.
> >>
> >> I will continue to inquire if there is a simple technique to accomplish this.
> 
> Apparently GCC 7.1 added architecture macros.
> 
> As Tuan referenced, Alpine Linux also requires z196 as the minimum
> architecture level.  I believe that it would be better for s390-musl
> to default to z196 ISA than musl to require GCC 7.1.

I agree we shouldn't "require GCC 7.1", but using the macros does not
imply such a requirement. For example:

	#if __ARCH__ >= 10

would only use the asm on z196+ (if I got the number right) with GCC
7.1+ (no asm on older compilers), whereas:

	#if __ARCH__ >= 10 || !defined(__ARCH__)

would use the asm on z196+ or on compilers too old to provide __ARCH__
(and building for a more minimal baseline ISA would not be supported
on such compilers unless you manually add -D__ARCH__=5 or whatever to
CFLAGS).

I'm fine with waiting to add those pp conditionals until if/when
someone actually wants to use the lower baseline ISA, if you don't
want to do it now. I am hesitant to add new ISA-forcing logic to
configure, though (see the other reply on that). Would it be bad to
have the build fail with low default -march? If so, maybe the
configure logic could check for !defined(__ARCH__) and then do a
compile test to define __ARCH__ on its own, and we could use the above
logic?

Rich


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-14 23:44                             ` Rich Felker
@ 2017-06-15 12:18                               ` David Edelsohn
  2017-06-18 17:12                               ` David Edelsohn
  1 sibling, 0 replies; 34+ messages in thread
From: David Edelsohn @ 2017-06-15 12:18 UTC (permalink / raw)
  To: musl

On Wed, Jun 14, 2017 at 7:44 PM, Rich Felker <dalias@libc.org> wrote:
> On Mon, Jun 12, 2017 at 09:28:52AM -0400, David Edelsohn wrote:
>> >> The following IBM table of supported and tested systems
>> >>
>> >> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
>> >>
>> >> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
>> >> requires at least zEC12.
>> >>
>> >> I can't find any official hardware requirements description for Alpine
>> >> Linux. I tend to doubt that user would run it on older hardware,
>> >> especially hardware no longer supported by other, modern Linux
>> >> distributions.
>> >>
>> >> Building musl libc on older hardware is a nice accomplishment, but
>> >> investing effort and complexity to maintain support probably isn't
>> >> useful to any musl libc user and probably isn't a productive use of
>> >> developer resources.
>> >>
>> >> I will continue to inquire if there is a simple technique to accomplish this.
>>
>> Apparently GCC 7.1 added architecture macros.
>>
>> As Tuan referenced, Alpine Linux also requires z196 as the minimum
>> architecture level.  I believe that it would be better for s390-musl
>> to default to z196 ISA than musl to require GCC 7.1.
>
> I agree we shouldn't "require GCC 7.1", but using the macros does not
> imply such a requirement. For example:
>
>         #if __ARCH__ >= 10
>
> would only use the asm on z196+ (if I got the number right) with GCC
> 7.1+ (no asm on older compilers), whereas:
>
>         #if __ARCH__ >= 10 || !defined(__ARCH__)
>
> would use the asm on z196+ or on compilers too old to provide __ARCH__
> (and building for a more minimal baseline ISA would not be supported
> on such compilers unless you manually add -D__ARCH__=5 or whatever to
> CFLAGS).
>
> I'm fine with waiting to add those pp conditionals until if/when
> someone actually wants to use the lower baseline ISA, if you don't
> want to do it now.

The above comment seems to state that it's okay not to add the
__ARCH__ logic for now, until someone actually shows a need.  Which is
great.

I am hesitant to add new ISA-forcing logic to
> configure, though (see the other reply on that). Would it be bad to
> have the build fail with low default -march? If so, maybe the
> configure logic could check for !defined(__ARCH__) and then do a
> compile test to define __ARCH__ on its own, and we could use the above
> logic?

And this second comment seems to state that you want the __ARCH__
logic and additional configure logic to set __ARCH__ to a default
value that supports z196 if the compiler does not define __ARCH__.

These two statements seem contradictory.

This is an awful lot of time and effort on the part of everyone in
this discussion for architecture support that never will be utilized.

Thanks, David


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-14 23:44                             ` Rich Felker
  2017-06-15 12:18                               ` David Edelsohn
@ 2017-06-18 17:12                               ` David Edelsohn
  2017-06-21  0:49                                 ` Rich Felker
  1 sibling, 1 reply; 34+ messages in thread
From: David Edelsohn @ 2017-06-18 17:12 UTC (permalink / raw)
  To: musl

How can we move forward with this patch?

I would prefer to avoid the __ARCH__ complexity until there is a clear
user requirement.

Thanks, David


On Wed, Jun 14, 2017 at 7:44 PM, Rich Felker <dalias@libc.org> wrote:
> On Mon, Jun 12, 2017 at 09:28:52AM -0400, David Edelsohn wrote:
>> >> The following IBM table of supported and tested systems
>> >>
>> >> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
>> >>
>> >> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
>> >> requires at least zEC12.
>> >>
>> >> I can't find any official hardware requirements description for Alpine
>> >> Linux. I tend to doubt that user would run it on older hardware,
>> >> especially hardware no longer supported by other, modern Linux
>> >> distributions.
>> >>
>> >> Building musl libc on older hardware is a nice accomplishment, but
>> >> investing effort and complexity to maintain support probably isn't
>> >> useful to any musl libc user and probably isn't a productive use of
>> >> developer resources.
>> >>
>> >> I will continue to inquire if there is a simple technique to accomplish this.
>>
>> Apparently GCC 7.1 added architecture macros.
>>
>> As Tuan referenced, Alpine Linux also requires z196 as the minimum
>> architecture level.  I believe that it would be better for s390-musl
>> to default to z196 ISA than musl to require GCC 7.1.
>
> I agree we shouldn't "require GCC 7.1", but using the macros does not
> imply such a requirement. For example:
>
>         #if __ARCH__ >= 10
>
> would only use the asm on z196+ (if I got the number right) with GCC
> 7.1+ (no asm on older compilers), whereas:
>
>         #if __ARCH__ >= 10 || !defined(__ARCH__)
>
> would use the asm on z196+ or on compilers too old to provide __ARCH__
> (and building for a more minimal baseline ISA would not be supported
> on such compilers unless you manually add -D__ARCH__=5 or whatever to
> CFLAGS).
>
> I'm fine with waiting to add those pp conditionals until if/when
> someone actually wants to use the lower baseline ISA, if you don't
> want to do it now. I am hesitant to add new ISA-forcing logic to
> configure, though (see the other reply on that). Would it be bad to
> have the build fail with low default -march? If so, maybe the
> configure logic could check for !defined(__ARCH__) and then do a
> compile test to define __ARCH__ on its own, and we could use the above
> logic?
>
> Rich


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-18 17:12                               ` David Edelsohn
@ 2017-06-21  0:49                                 ` Rich Felker
  2017-06-21  1:07                                   ` David Edelsohn
  0 siblings, 1 reply; 34+ messages in thread
From: Rich Felker @ 2017-06-21  0:49 UTC (permalink / raw)
  To: musl

On Sun, Jun 18, 2017 at 01:12:15PM -0400, David Edelsohn wrote:
> How can we move forward with this patch?
> 
> I would prefer to avoid the __ARCH__ complexity until there is a clear
> user requirement.
> 
> Thanks, David

Rob Landley informed me that the s390x environment he's building with
mkroot (https://github.com/landley/mkroot) for testing under qemu
system level emulation is running a kernel built for z900. If qemu can
emulate newer machines, this may just be an oversight that can be
changes by reconfiguring, but it does indicate that z900 seems to be
supported by kernel, and that there's at least someone using the
baseline ISA level now.

For what it's worth I agree that we've spent an inordinate amount of
time on this topic, and I apologize. I just don't want it to turn into
a regression.

Rich


> On Wed, Jun 14, 2017 at 7:44 PM, Rich Felker <dalias@libc.org> wrote:
> > On Mon, Jun 12, 2017 at 09:28:52AM -0400, David Edelsohn wrote:
> >> >> The following IBM table of supported and tested systems
> >> >>
> >> >> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
> >> >>
> >> >> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
> >> >> requires at least zEC12.
> >> >>
> >> >> I can't find any official hardware requirements description for Alpine
> >> >> Linux. I tend to doubt that user would run it on older hardware,
> >> >> especially hardware no longer supported by other, modern Linux
> >> >> distributions.
> >> >>
> >> >> Building musl libc on older hardware is a nice accomplishment, but
> >> >> investing effort and complexity to maintain support probably isn't
> >> >> useful to any musl libc user and probably isn't a productive use of
> >> >> developer resources.
> >> >>
> >> >> I will continue to inquire if there is a simple technique to accomplish this.
> >>
> >> Apparently GCC 7.1 added architecture macros.
> >>
> >> As Tuan referenced, Alpine Linux also requires z196 as the minimum
> >> architecture level.  I believe that it would be better for s390-musl
> >> to default to z196 ISA than musl to require GCC 7.1.
> >
> > I agree we shouldn't "require GCC 7.1", but using the macros does not
> > imply such a requirement. For example:
> >
> >         #if __ARCH__ >= 10
> >
> > would only use the asm on z196+ (if I got the number right) with GCC
> > 7.1+ (no asm on older compilers), whereas:
> >
> >         #if __ARCH__ >= 10 || !defined(__ARCH__)
> >
> > would use the asm on z196+ or on compilers too old to provide __ARCH__
> > (and building for a more minimal baseline ISA would not be supported
> > on such compilers unless you manually add -D__ARCH__=5 or whatever to
> > CFLAGS).
> >
> > I'm fine with waiting to add those pp conditionals until if/when
> > someone actually wants to use the lower baseline ISA, if you don't
> > want to do it now. I am hesitant to add new ISA-forcing logic to
> > configure, though (see the other reply on that). Would it be bad to
> > have the build fail with low default -march? If so, maybe the
> > configure logic could check for !defined(__ARCH__) and then do a
> > compile test to define __ARCH__ on its own, and we could use the above
> > logic?
> >
> > Rich


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-21  0:49                                 ` Rich Felker
@ 2017-06-21  1:07                                   ` David Edelsohn
  2017-06-21  1:20                                     ` Rich Felker
  0 siblings, 1 reply; 34+ messages in thread
From: David Edelsohn @ 2017-06-21  1:07 UTC (permalink / raw)
  To: musl

Rich,

Another option is a test recommended by a colleague

#if (__HTM__ || __ARCH__ > z196)

__HTM__ is defined in earlier releases of GCC and is enabled in zEC12,
so it can be used as a proxy for the architecture in earlier compiler
releases.

Would that be acceptable?

Thanks, David

On Tue, Jun 20, 2017 at 8:49 PM, Rich Felker <dalias@libc.org> wrote:
> On Sun, Jun 18, 2017 at 01:12:15PM -0400, David Edelsohn wrote:
>> How can we move forward with this patch?
>>
>> I would prefer to avoid the __ARCH__ complexity until there is a clear
>> user requirement.
>>
>> Thanks, David
>
> Rob Landley informed me that the s390x environment he's building with
> mkroot (https://github.com/landley/mkroot) for testing under qemu
> system level emulation is running a kernel built for z900. If qemu can
> emulate newer machines, this may just be an oversight that can be
> changes by reconfiguring, but it does indicate that z900 seems to be
> supported by kernel, and that there's at least someone using the
> baseline ISA level now.
>
> For what it's worth I agree that we've spent an inordinate amount of
> time on this topic, and I apologize. I just don't want it to turn into
> a regression.
>
> Rich
>
>
>> On Wed, Jun 14, 2017 at 7:44 PM, Rich Felker <dalias@libc.org> wrote:
>> > On Mon, Jun 12, 2017 at 09:28:52AM -0400, David Edelsohn wrote:
>> >> >> The following IBM table of supported and tested systems
>> >> >>
>> >> >> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
>> >> >>
>> >> >> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
>> >> >> requires at least zEC12.
>> >> >>
>> >> >> I can't find any official hardware requirements description for Alpine
>> >> >> Linux. I tend to doubt that user would run it on older hardware,
>> >> >> especially hardware no longer supported by other, modern Linux
>> >> >> distributions.
>> >> >>
>> >> >> Building musl libc on older hardware is a nice accomplishment, but
>> >> >> investing effort and complexity to maintain support probably isn't
>> >> >> useful to any musl libc user and probably isn't a productive use of
>> >> >> developer resources.
>> >> >>
>> >> >> I will continue to inquire if there is a simple technique to accomplish this.
>> >>
>> >> Apparently GCC 7.1 added architecture macros.
>> >>
>> >> As Tuan referenced, Alpine Linux also requires z196 as the minimum
>> >> architecture level.  I believe that it would be better for s390-musl
>> >> to default to z196 ISA than musl to require GCC 7.1.
>> >
>> > I agree we shouldn't "require GCC 7.1", but using the macros does not
>> > imply such a requirement. For example:
>> >
>> >         #if __ARCH__ >= 10
>> >
>> > would only use the asm on z196+ (if I got the number right) with GCC
>> > 7.1+ (no asm on older compilers), whereas:
>> >
>> >         #if __ARCH__ >= 10 || !defined(__ARCH__)
>> >
>> > would use the asm on z196+ or on compilers too old to provide __ARCH__
>> > (and building for a more minimal baseline ISA would not be supported
>> > on such compilers unless you manually add -D__ARCH__=5 or whatever to
>> > CFLAGS).
>> >
>> > I'm fine with waiting to add those pp conditionals until if/when
>> > someone actually wants to use the lower baseline ISA, if you don't
>> > want to do it now. I am hesitant to add new ISA-forcing logic to
>> > configure, though (see the other reply on that). Would it be bad to
>> > have the build fail with low default -march? If so, maybe the
>> > configure logic could check for !defined(__ARCH__) and then do a
>> > compile test to define __ARCH__ on its own, and we could use the above
>> > logic?
>> >
>> > Rich


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-21  1:07                                   ` David Edelsohn
@ 2017-06-21  1:20                                     ` Rich Felker
  2017-06-21  3:34                                       ` David Edelsohn
  0 siblings, 1 reply; 34+ messages in thread
From: Rich Felker @ 2017-06-21  1:20 UTC (permalink / raw)
  To: musl

On Tue, Jun 20, 2017 at 09:07:08PM -0400, David Edelsohn wrote:
> Rich,
> 
> Another option is a test recommended by a colleague
> 
> #if (__HTM__ || __ARCH__ > z196)
> 
> __HTM__ is defined in earlier releases of GCC and is enabled in zEC12,
> so it can be used as a proxy for the architecture in earlier compiler
> releases.
> 
> Would that be acceptable?

Indeed, __HTM__ gets defined for me on gcc 6.3 with -march=zEC12 but
not with -march=z196. If that's acceptable to you I think it's okay;
users could also build with "-march=z196 -D__ARCH__=???"  to get the
math insns on baseline z196. BTW what is the actual value for __ARCH__
indicating z196? I think I figured it out from the gcc source as 10.
The "z196" you used in the above example isn't really a macro, is it?
If so that's a bad namespace violation in gcc that needs to be
fixed...

Rich


> On Tue, Jun 20, 2017 at 8:49 PM, Rich Felker <dalias@libc.org> wrote:
> > On Sun, Jun 18, 2017 at 01:12:15PM -0400, David Edelsohn wrote:
> >> How can we move forward with this patch?
> >>
> >> I would prefer to avoid the __ARCH__ complexity until there is a clear
> >> user requirement.
> >>
> >> Thanks, David
> >
> > Rob Landley informed me that the s390x environment he's building with
> > mkroot (https://github.com/landley/mkroot) for testing under qemu
> > system level emulation is running a kernel built for z900. If qemu can
> > emulate newer machines, this may just be an oversight that can be
> > changes by reconfiguring, but it does indicate that z900 seems to be
> > supported by kernel, and that there's at least someone using the
> > baseline ISA level now.
> >
> > For what it's worth I agree that we've spent an inordinate amount of
> > time on this topic, and I apologize. I just don't want it to turn into
> > a regression.
> >
> > Rich
> >
> >
> >> On Wed, Jun 14, 2017 at 7:44 PM, Rich Felker <dalias@libc.org> wrote:
> >> > On Mon, Jun 12, 2017 at 09:28:52AM -0400, David Edelsohn wrote:
> >> >> >> The following IBM table of supported and tested systems
> >> >> >>
> >> >> >> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
> >> >> >>
> >> >> >> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
> >> >> >> requires at least zEC12.
> >> >> >>
> >> >> >> I can't find any official hardware requirements description for Alpine
> >> >> >> Linux. I tend to doubt that user would run it on older hardware,
> >> >> >> especially hardware no longer supported by other, modern Linux
> >> >> >> distributions.
> >> >> >>
> >> >> >> Building musl libc on older hardware is a nice accomplishment, but
> >> >> >> investing effort and complexity to maintain support probably isn't
> >> >> >> useful to any musl libc user and probably isn't a productive use of
> >> >> >> developer resources.
> >> >> >>
> >> >> >> I will continue to inquire if there is a simple technique to accomplish this.
> >> >>
> >> >> Apparently GCC 7.1 added architecture macros.
> >> >>
> >> >> As Tuan referenced, Alpine Linux also requires z196 as the minimum
> >> >> architecture level.  I believe that it would be better for s390-musl
> >> >> to default to z196 ISA than musl to require GCC 7.1.
> >> >
> >> > I agree we shouldn't "require GCC 7.1", but using the macros does not
> >> > imply such a requirement. For example:
> >> >
> >> >         #if __ARCH__ >= 10
> >> >
> >> > would only use the asm on z196+ (if I got the number right) with GCC
> >> > 7.1+ (no asm on older compilers), whereas:
> >> >
> >> >         #if __ARCH__ >= 10 || !defined(__ARCH__)
> >> >
> >> > would use the asm on z196+ or on compilers too old to provide __ARCH__
> >> > (and building for a more minimal baseline ISA would not be supported
> >> > on such compilers unless you manually add -D__ARCH__=5 or whatever to
> >> > CFLAGS).
> >> >
> >> > I'm fine with waiting to add those pp conditionals until if/when
> >> > someone actually wants to use the lower baseline ISA, if you don't
> >> > want to do it now. I am hesitant to add new ISA-forcing logic to
> >> > configure, though (see the other reply on that). Would it be bad to
> >> > have the build fail with low default -march? If so, maybe the
> >> > configure logic could check for !defined(__ARCH__) and then do a
> >> > compile test to define __ARCH__ on its own, and we could use the above
> >> > logic?
> >> >
> >> > Rich


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-21  1:20                                     ` Rich Felker
@ 2017-06-21  3:34                                       ` David Edelsohn
  2017-06-23 19:31                                         ` Rich Felker
  0 siblings, 1 reply; 34+ messages in thread
From: David Edelsohn @ 2017-06-21  3:34 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 4800 bytes --]

__ARCH__ = 9 for z196.

Attached is the original patch to add the FP math instructions and a
second patch using the test proposed by my colleague and following the
template of arm/sqrt.c to include the generic C code for earlier
architectures.

Thanks, David

On Tue, Jun 20, 2017 at 9:20 PM, Rich Felker <dalias@libc.org> wrote:
> On Tue, Jun 20, 2017 at 09:07:08PM -0400, David Edelsohn wrote:
>> Rich,
>>
>> Another option is a test recommended by a colleague
>>
>> #if (__HTM__ || __ARCH__ > z196)
>>
>> __HTM__ is defined in earlier releases of GCC and is enabled in zEC12,
>> so it can be used as a proxy for the architecture in earlier compiler
>> releases.
>>
>> Would that be acceptable?
>
> Indeed, __HTM__ gets defined for me on gcc 6.3 with -march=zEC12 but
> not with -march=z196. If that's acceptable to you I think it's okay;
> users could also build with "-march=z196 -D__ARCH__=???"  to get the
> math insns on baseline z196. BTW what is the actual value for __ARCH__
> indicating z196? I think I figured it out from the gcc source as 10.
> The "z196" you used in the above example isn't really a macro, is it?
> If so that's a bad namespace violation in gcc that needs to be
> fixed...
>
> Rich
>
>
>> On Tue, Jun 20, 2017 at 8:49 PM, Rich Felker <dalias@libc.org> wrote:
>> > On Sun, Jun 18, 2017 at 01:12:15PM -0400, David Edelsohn wrote:
>> >> How can we move forward with this patch?
>> >>
>> >> I would prefer to avoid the __ARCH__ complexity until there is a clear
>> >> user requirement.
>> >>
>> >> Thanks, David
>> >
>> > Rob Landley informed me that the s390x environment he's building with
>> > mkroot (https://github.com/landley/mkroot) for testing under qemu
>> > system level emulation is running a kernel built for z900. If qemu can
>> > emulate newer machines, this may just be an oversight that can be
>> > changes by reconfiguring, but it does indicate that z900 seems to be
>> > supported by kernel, and that there's at least someone using the
>> > baseline ISA level now.
>> >
>> > For what it's worth I agree that we've spent an inordinate amount of
>> > time on this topic, and I apologize. I just don't want it to turn into
>> > a regression.
>> >
>> > Rich
>> >
>> >
>> >> On Wed, Jun 14, 2017 at 7:44 PM, Rich Felker <dalias@libc.org> wrote:
>> >> > On Mon, Jun 12, 2017 at 09:28:52AM -0400, David Edelsohn wrote:
>> >> >> >> The following IBM table of supported and tested systems
>> >> >> >>
>> >> >> >> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
>> >> >> >>
>> >> >> >> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
>> >> >> >> requires at least zEC12.
>> >> >> >>
>> >> >> >> I can't find any official hardware requirements description for Alpine
>> >> >> >> Linux. I tend to doubt that user would run it on older hardware,
>> >> >> >> especially hardware no longer supported by other, modern Linux
>> >> >> >> distributions.
>> >> >> >>
>> >> >> >> Building musl libc on older hardware is a nice accomplishment, but
>> >> >> >> investing effort and complexity to maintain support probably isn't
>> >> >> >> useful to any musl libc user and probably isn't a productive use of
>> >> >> >> developer resources.
>> >> >> >>
>> >> >> >> I will continue to inquire if there is a simple technique to accomplish this.
>> >> >>
>> >> >> Apparently GCC 7.1 added architecture macros.
>> >> >>
>> >> >> As Tuan referenced, Alpine Linux also requires z196 as the minimum
>> >> >> architecture level.  I believe that it would be better for s390-musl
>> >> >> to default to z196 ISA than musl to require GCC 7.1.
>> >> >
>> >> > I agree we shouldn't "require GCC 7.1", but using the macros does not
>> >> > imply such a requirement. For example:
>> >> >
>> >> >         #if __ARCH__ >= 10
>> >> >
>> >> > would only use the asm on z196+ (if I got the number right) with GCC
>> >> > 7.1+ (no asm on older compilers), whereas:
>> >> >
>> >> >         #if __ARCH__ >= 10 || !defined(__ARCH__)
>> >> >
>> >> > would use the asm on z196+ or on compilers too old to provide __ARCH__
>> >> > (and building for a more minimal baseline ISA would not be supported
>> >> > on such compilers unless you manually add -D__ARCH__=5 or whatever to
>> >> > CFLAGS).
>> >> >
>> >> > I'm fine with waiting to add those pp conditionals until if/when
>> >> > someone actually wants to use the lower baseline ISA, if you don't
>> >> > want to do it now. I am hesitant to add new ISA-forcing logic to
>> >> > configure, though (see the other reply on that). Would it be bad to
>> >> > have the build fail with low default -march? If so, maybe the
>> >> > configure logic could check for !defined(__ARCH__) and then do a
>> >> > compile test to define __ARCH__ on its own, and we could use the above
>> >> > logic?
>> >> >
>> >> > Rich

[-- Attachment #2: 0001-Add-single-instruction-s390x-math-functions.patch --]
[-- Type: application/octet-stream, Size: 9121 bytes --]

From a2191414271c21eaf8ee756f848b92e6d0e5ae73 Mon Sep 17 00:00:00 2001
From: David Edelsohn <dje.gcc@gmail.com>
Date: Fri, 9 Jun 2017 10:28:27 -0400
Subject: [PATCH 1/2] Add single instruction s390x math functions.

---
 src/math/s390x/ceil.c       | 7 +++++++
 src/math/s390x/ceilf.c      | 7 +++++++
 src/math/s390x/ceill.c      | 7 +++++++
 src/math/s390x/fabs.c       | 7 +++++++
 src/math/s390x/fabsf.c      | 7 +++++++
 src/math/s390x/fabsl.c      | 7 +++++++
 src/math/s390x/floor.c      | 7 +++++++
 src/math/s390x/floorf.c     | 7 +++++++
 src/math/s390x/floorl.c     | 7 +++++++
 src/math/s390x/nearbyint.c  | 7 +++++++
 src/math/s390x/nearbyintf.c | 7 +++++++
 src/math/s390x/nearbyintl.c | 7 +++++++
 src/math/s390x/rint.c       | 7 +++++++
 src/math/s390x/rintf.c      | 7 +++++++
 src/math/s390x/rintl.c      | 7 +++++++
 src/math/s390x/round.c      | 7 +++++++
 src/math/s390x/roundf.c     | 7 +++++++
 src/math/s390x/roundl.c     | 7 +++++++
 src/math/s390x/sqrt.c       | 7 +++++++
 src/math/s390x/sqrtf.c      | 7 +++++++
 src/math/s390x/sqrtl.c      | 7 +++++++
 src/math/s390x/trunc.c      | 7 +++++++
 src/math/s390x/truncf.c     | 7 +++++++
 src/math/s390x/truncl.c     | 7 +++++++
 24 files changed, 168 insertions(+)
 create mode 100644 src/math/s390x/ceil.c
 create mode 100644 src/math/s390x/ceilf.c
 create mode 100644 src/math/s390x/ceill.c
 create mode 100644 src/math/s390x/fabs.c
 create mode 100644 src/math/s390x/fabsf.c
 create mode 100644 src/math/s390x/fabsl.c
 create mode 100644 src/math/s390x/floor.c
 create mode 100644 src/math/s390x/floorf.c
 create mode 100644 src/math/s390x/floorl.c
 create mode 100644 src/math/s390x/nearbyint.c
 create mode 100644 src/math/s390x/nearbyintf.c
 create mode 100644 src/math/s390x/nearbyintl.c
 create mode 100644 src/math/s390x/rint.c
 create mode 100644 src/math/s390x/rintf.c
 create mode 100644 src/math/s390x/rintl.c
 create mode 100644 src/math/s390x/round.c
 create mode 100644 src/math/s390x/roundf.c
 create mode 100644 src/math/s390x/roundl.c
 create mode 100644 src/math/s390x/sqrt.c
 create mode 100644 src/math/s390x/sqrtf.c
 create mode 100644 src/math/s390x/sqrtl.c
 create mode 100644 src/math/s390x/trunc.c
 create mode 100644 src/math/s390x/truncf.c
 create mode 100644 src/math/s390x/truncl.c

diff --git a/src/math/s390x/ceil.c b/src/math/s390x/ceil.c
new file mode 100644
index 0000000..2d0b422
--- /dev/null
+++ b/src/math/s390x/ceil.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double ceil(double x)
+{
+	__asm__ ("fidbra %0, 6, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/ceilf.c b/src/math/s390x/ceilf.c
new file mode 100644
index 0000000..94260e6
--- /dev/null
+++ b/src/math/s390x/ceilf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float ceilf(float x)
+{
+	__asm__ ("fiebra %0, 6, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/ceill.c b/src/math/s390x/ceill.c
new file mode 100644
index 0000000..2ee4a5b
--- /dev/null
+++ b/src/math/s390x/ceill.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double ceill(long double x)
+{
+	__asm__ ("fixbra %0, 6, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/fabs.c b/src/math/s390x/fabs.c
new file mode 100644
index 0000000..0c569a2
--- /dev/null
+++ b/src/math/s390x/fabs.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double fabs(double x)
+{
+	__asm__ ("lpdbr %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/fabsf.c b/src/math/s390x/fabsf.c
new file mode 100644
index 0000000..99f884c
--- /dev/null
+++ b/src/math/s390x/fabsf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float fabsf(float x)
+{
+	__asm__ ("lpebr %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/fabsl.c b/src/math/s390x/fabsl.c
new file mode 100644
index 0000000..f543ef0
--- /dev/null
+++ b/src/math/s390x/fabsl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double fabsl(long double x)
+{
+	__asm__ ("lpxbr %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/floor.c b/src/math/s390x/floor.c
new file mode 100644
index 0000000..d4958eb
--- /dev/null
+++ b/src/math/s390x/floor.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double floor(double x)
+{
+	__asm__ ("fidbra %0, 7, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/floorf.c b/src/math/s390x/floorf.c
new file mode 100644
index 0000000..af06471
--- /dev/null
+++ b/src/math/s390x/floorf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float floorf(float x)
+{
+	__asm__ ("fiebra %0, 7, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/floorl.c b/src/math/s390x/floorl.c
new file mode 100644
index 0000000..0df4be1
--- /dev/null
+++ b/src/math/s390x/floorl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double floorl(long double x)
+{
+	__asm__ ("fixbra %0, 7, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/nearbyint.c b/src/math/s390x/nearbyint.c
new file mode 100644
index 0000000..0d3359f
--- /dev/null
+++ b/src/math/s390x/nearbyint.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double nearbyint(double x)
+{
+	__asm__ ("fidbra %0, 0, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/nearbyintf.c b/src/math/s390x/nearbyintf.c
new file mode 100644
index 0000000..3ad8695
--- /dev/null
+++ b/src/math/s390x/nearbyintf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float nearbyintf(float x)
+{
+	__asm__ ("fiebra %0, 0, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/nearbyintl.c b/src/math/s390x/nearbyintl.c
new file mode 100644
index 0000000..9d900f9
--- /dev/null
+++ b/src/math/s390x/nearbyintl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double nearbyintl(long double x)
+{
+	__asm__ ("fixbra %0, 0, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/rint.c b/src/math/s390x/rint.c
new file mode 100644
index 0000000..bdd62b3
--- /dev/null
+++ b/src/math/s390x/rint.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double rint(double x)
+{
+	__asm__ ("fidbr %0, 0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/rintf.c b/src/math/s390x/rintf.c
new file mode 100644
index 0000000..c1e98c5
--- /dev/null
+++ b/src/math/s390x/rintf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float rintf(float x)
+{
+	__asm__ ("fiebr %0, 0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/rintl.c b/src/math/s390x/rintl.c
new file mode 100644
index 0000000..4856825
--- /dev/null
+++ b/src/math/s390x/rintl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double rintl(long double x)
+{
+	__asm__ ("fixbr %0, 0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/round.c b/src/math/s390x/round.c
new file mode 100644
index 0000000..10b3159
--- /dev/null
+++ b/src/math/s390x/round.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double round(double x)
+{
+	__asm__ ("fidbra %0, 1, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/roundf.c b/src/math/s390x/roundf.c
new file mode 100644
index 0000000..28758ce
--- /dev/null
+++ b/src/math/s390x/roundf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float roundf(float x)
+{
+	__asm__ ("fiebra %0, 1, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/roundl.c b/src/math/s390x/roundl.c
new file mode 100644
index 0000000..deef38e
--- /dev/null
+++ b/src/math/s390x/roundl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double roundl(long double x)
+{
+	__asm__ ("fixbra %0, 1, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/sqrt.c b/src/math/s390x/sqrt.c
new file mode 100644
index 0000000..7407a5c
--- /dev/null
+++ b/src/math/s390x/sqrt.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double sqrt(double x)
+{
+	__asm__ ("sqdbr %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/sqrtf.c b/src/math/s390x/sqrtf.c
new file mode 100644
index 0000000..fbfdf6a
--- /dev/null
+++ b/src/math/s390x/sqrtf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float sqrtf(float x)
+{
+	__asm__ ("sqebr %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/sqrtl.c b/src/math/s390x/sqrtl.c
new file mode 100644
index 0000000..9b14d67
--- /dev/null
+++ b/src/math/s390x/sqrtl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double sqrtl(long double x)
+{
+	__asm__ ("sqxbr %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/trunc.c b/src/math/s390x/trunc.c
new file mode 100644
index 0000000..24d9ed7
--- /dev/null
+++ b/src/math/s390x/trunc.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+double trunc(double x)
+{
+	__asm__ ("fidbra %0, 5, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/truncf.c b/src/math/s390x/truncf.c
new file mode 100644
index 0000000..a59e52a
--- /dev/null
+++ b/src/math/s390x/truncf.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+float truncf(float x)
+{
+	__asm__ ("fiebra %0, 5, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
diff --git a/src/math/s390x/truncl.c b/src/math/s390x/truncl.c
new file mode 100644
index 0000000..98afa2d
--- /dev/null
+++ b/src/math/s390x/truncl.c
@@ -0,0 +1,7 @@
+#include <math.h>
+
+long double truncl(long double x)
+{
+	__asm__ ("fixbra %0, 5, %1, 4" : "=f"(x) : "f"(x));
+	return x;
+}
-- 
1.8.3.1


[-- Attachment #3: 0002-Add-support-for-pre-z196-architecture.patch --]
[-- Type: application/octet-stream, Size: 10236 bytes --]

From 87d9584ec29f9e5b0cffa5c8dc30a599142c8b30 Mon Sep 17 00:00:00 2001
From: David Edelsohn <dje.gcc@gmail.com>
Date: Tue, 20 Jun 2017 23:23:28 -0400
Subject: [PATCH 2/2] Add support for pre-z196 architecture.

---
 src/math/s390x/ceil.c       | 8 ++++++++
 src/math/s390x/ceilf.c      | 8 ++++++++
 src/math/s390x/ceill.c      | 8 ++++++++
 src/math/s390x/fabs.c       | 8 ++++++++
 src/math/s390x/fabsf.c      | 8 ++++++++
 src/math/s390x/fabsl.c      | 8 ++++++++
 src/math/s390x/floor.c      | 8 ++++++++
 src/math/s390x/floorf.c     | 8 ++++++++
 src/math/s390x/floorl.c     | 8 ++++++++
 src/math/s390x/nearbyint.c  | 8 ++++++++
 src/math/s390x/nearbyintf.c | 8 ++++++++
 src/math/s390x/nearbyintl.c | 8 ++++++++
 src/math/s390x/rint.c       | 8 ++++++++
 src/math/s390x/rintf.c      | 8 ++++++++
 src/math/s390x/rintl.c      | 8 ++++++++
 src/math/s390x/round.c      | 8 ++++++++
 src/math/s390x/roundf.c     | 8 ++++++++
 src/math/s390x/roundl.c     | 8 ++++++++
 src/math/s390x/sqrt.c       | 8 ++++++++
 src/math/s390x/sqrtf.c      | 8 ++++++++
 src/math/s390x/sqrtl.c      | 8 ++++++++
 src/math/s390x/trunc.c      | 8 ++++++++
 src/math/s390x/truncf.c     | 8 ++++++++
 src/math/s390x/truncl.c     | 8 ++++++++
 24 files changed, 192 insertions(+)

diff --git a/src/math/s390x/ceil.c b/src/math/s390x/ceil.c
index 2d0b422..9cfabec 100644
--- a/src/math/s390x/ceil.c
+++ b/src/math/s390x/ceil.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 double ceil(double x)
 {
 	__asm__ ("fidbra %0, 6, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../ceil.c"
+
+#endif
diff --git a/src/math/s390x/ceilf.c b/src/math/s390x/ceilf.c
index 94260e6..74dce68 100644
--- a/src/math/s390x/ceilf.c
+++ b/src/math/s390x/ceilf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 float ceilf(float x)
 {
 	__asm__ ("fiebra %0, 6, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../ceilf.c"
+
+#endif
diff --git a/src/math/s390x/ceill.c b/src/math/s390x/ceill.c
index 2ee4a5b..abafa08 100644
--- a/src/math/s390x/ceill.c
+++ b/src/math/s390x/ceill.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 long double ceill(long double x)
 {
 	__asm__ ("fixbra %0, 6, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../ceill.c"
+
+#endif
diff --git a/src/math/s390x/fabs.c b/src/math/s390x/fabs.c
index 0c569a2..3ee2835 100644
--- a/src/math/s390x/fabs.c
+++ b/src/math/s390x/fabs.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 double fabs(double x)
 {
 	__asm__ ("lpdbr %0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../fabs.c"
+
+#endif
diff --git a/src/math/s390x/fabsf.c b/src/math/s390x/fabsf.c
index 99f884c..e0d9951 100644
--- a/src/math/s390x/fabsf.c
+++ b/src/math/s390x/fabsf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 float fabsf(float x)
 {
 	__asm__ ("lpebr %0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../fabsf.c"
+
+#endif
diff --git a/src/math/s390x/fabsl.c b/src/math/s390x/fabsl.c
index f543ef0..2858182 100644
--- a/src/math/s390x/fabsl.c
+++ b/src/math/s390x/fabsl.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 long double fabsl(long double x)
 {
 	__asm__ ("lpxbr %0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../fabsl.c"
+
+#endif
diff --git a/src/math/s390x/floor.c b/src/math/s390x/floor.c
index d4958eb..626aea1 100644
--- a/src/math/s390x/floor.c
+++ b/src/math/s390x/floor.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 double floor(double x)
 {
 	__asm__ ("fidbra %0, 7, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../floor.c"
+
+#endif
diff --git a/src/math/s390x/floorf.c b/src/math/s390x/floorf.c
index af06471..5f4fc41 100644
--- a/src/math/s390x/floorf.c
+++ b/src/math/s390x/floorf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 float floorf(float x)
 {
 	__asm__ ("fiebra %0, 7, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../floorf.c"
+
+#endif
diff --git a/src/math/s390x/floorl.c b/src/math/s390x/floorl.c
index 0df4be1..8411a3e 100644
--- a/src/math/s390x/floorl.c
+++ b/src/math/s390x/floorl.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 long double floorl(long double x)
 {
 	__asm__ ("fixbra %0, 7, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../floorl.c"
+
+#endif
diff --git a/src/math/s390x/nearbyint.c b/src/math/s390x/nearbyint.c
index 0d3359f..56dea06 100644
--- a/src/math/s390x/nearbyint.c
+++ b/src/math/s390x/nearbyint.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 double nearbyint(double x)
 {
 	__asm__ ("fidbra %0, 0, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../nearbyint.c"
+
+#endif
diff --git a/src/math/s390x/nearbyintf.c b/src/math/s390x/nearbyintf.c
index 3ad8695..6155adc 100644
--- a/src/math/s390x/nearbyintf.c
+++ b/src/math/s390x/nearbyintf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 float nearbyintf(float x)
 {
 	__asm__ ("fiebra %0, 0, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../nearbyintf.c"
+
+#endif
diff --git a/src/math/s390x/nearbyintl.c b/src/math/s390x/nearbyintl.c
index 9d900f9..4e186e1 100644
--- a/src/math/s390x/nearbyintl.c
+++ b/src/math/s390x/nearbyintl.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 long double nearbyintl(long double x)
 {
 	__asm__ ("fixbra %0, 0, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../nearbyintl.c"
+
+#endif
diff --git a/src/math/s390x/rint.c b/src/math/s390x/rint.c
index bdd62b3..7f1f335 100644
--- a/src/math/s390x/rint.c
+++ b/src/math/s390x/rint.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 double rint(double x)
 {
 	__asm__ ("fidbr %0, 0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../rint.c"
+
+#endif
diff --git a/src/math/s390x/rintf.c b/src/math/s390x/rintf.c
index c1e98c5..17fd1fe 100644
--- a/src/math/s390x/rintf.c
+++ b/src/math/s390x/rintf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 float rintf(float x)
 {
 	__asm__ ("fiebr %0, 0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../rintf.c"
+
+#endif
diff --git a/src/math/s390x/rintl.c b/src/math/s390x/rintl.c
index 4856825..d31e833 100644
--- a/src/math/s390x/rintl.c
+++ b/src/math/s390x/rintl.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 long double rintl(long double x)
 {
 	__asm__ ("fixbr %0, 0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../rintl.c"
+
+#endif
diff --git a/src/math/s390x/round.c b/src/math/s390x/round.c
index 10b3159..6ea6fdc 100644
--- a/src/math/s390x/round.c
+++ b/src/math/s390x/round.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 double round(double x)
 {
 	__asm__ ("fidbra %0, 1, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../round.c"
+
+#endif
diff --git a/src/math/s390x/roundf.c b/src/math/s390x/roundf.c
index 28758ce..43ea2c2 100644
--- a/src/math/s390x/roundf.c
+++ b/src/math/s390x/roundf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 float roundf(float x)
 {
 	__asm__ ("fiebra %0, 1, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../roundf.c"
+
+#endif
diff --git a/src/math/s390x/roundl.c b/src/math/s390x/roundl.c
index deef38e..7f4d11d 100644
--- a/src/math/s390x/roundl.c
+++ b/src/math/s390x/roundl.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 long double roundl(long double x)
 {
 	__asm__ ("fixbra %0, 1, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../roundl.c"
+
+#endif
diff --git a/src/math/s390x/sqrt.c b/src/math/s390x/sqrt.c
index 7407a5c..e326b2c 100644
--- a/src/math/s390x/sqrt.c
+++ b/src/math/s390x/sqrt.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 double sqrt(double x)
 {
 	__asm__ ("sqdbr %0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../sqrt.c"
+
+#endif
diff --git a/src/math/s390x/sqrtf.c b/src/math/s390x/sqrtf.c
index fbfdf6a..65af7a9 100644
--- a/src/math/s390x/sqrtf.c
+++ b/src/math/s390x/sqrtf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 float sqrtf(float x)
 {
 	__asm__ ("sqebr %0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../sqrtf.c"
+
+#endif
diff --git a/src/math/s390x/sqrtl.c b/src/math/s390x/sqrtl.c
index 9b14d67..4918b0b 100644
--- a/src/math/s390x/sqrtl.c
+++ b/src/math/s390x/sqrtl.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 long double sqrtl(long double x)
 {
 	__asm__ ("sqxbr %0, %1" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../sqrtl.c"
+
+#endif
diff --git a/src/math/s390x/trunc.c b/src/math/s390x/trunc.c
index 24d9ed7..4178ef0 100644
--- a/src/math/s390x/trunc.c
+++ b/src/math/s390x/trunc.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 double trunc(double x)
 {
 	__asm__ ("fidbra %0, 5, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../trunc.c"
+
+#endif
diff --git a/src/math/s390x/truncf.c b/src/math/s390x/truncf.c
index a59e52a..90b667e 100644
--- a/src/math/s390x/truncf.c
+++ b/src/math/s390x/truncf.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 float truncf(float x)
 {
 	__asm__ ("fiebra %0, 5, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../truncf.c"
+
+#endif
diff --git a/src/math/s390x/truncl.c b/src/math/s390x/truncl.c
index 98afa2d..9535114 100644
--- a/src/math/s390x/truncl.c
+++ b/src/math/s390x/truncl.c
@@ -1,7 +1,15 @@
 #include <math.h>
 
+#if defined (__HTM__) || __ARCH__ >= 9
+
 long double truncl(long double x)
 {
 	__asm__ ("fixbra %0, 5, %1, 4" : "=f"(x) : "f"(x));
 	return x;
 }
+
+#else
+
+#include "../truncl.c"
+
+#endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] s390x: Add single instruction math functions
  2017-06-21  3:34                                       ` David Edelsohn
@ 2017-06-23 19:31                                         ` Rich Felker
  0 siblings, 0 replies; 34+ messages in thread
From: Rich Felker @ 2017-06-23 19:31 UTC (permalink / raw)
  To: musl

On Tue, Jun 20, 2017 at 11:34:55PM -0400, David Edelsohn wrote:
> __ARCH__ = 9 for z196.
> 
> Attached is the original patch to add the FP math instructions and a
> second patch using the test proposed by my colleague and following the
> template of arm/sqrt.c to include the generic C code for earlier
> architectures.

OK, I'm merging these two and committing them with a note on the
motivation. I also did basic testing of builds with and without
support for the math instructions and they built successfully, so I
think all is well. Thanks.

Rich


> On Tue, Jun 20, 2017 at 9:20 PM, Rich Felker <dalias@libc.org> wrote:
> > On Tue, Jun 20, 2017 at 09:07:08PM -0400, David Edelsohn wrote:
> >> Rich,
> >>
> >> Another option is a test recommended by a colleague
> >>
> >> #if (__HTM__ || __ARCH__ > z196)
> >>
> >> __HTM__ is defined in earlier releases of GCC and is enabled in zEC12,
> >> so it can be used as a proxy for the architecture in earlier compiler
> >> releases.
> >>
> >> Would that be acceptable?
> >
> > Indeed, __HTM__ gets defined for me on gcc 6.3 with -march=zEC12 but
> > not with -march=z196. If that's acceptable to you I think it's okay;
> > users could also build with "-march=z196 -D__ARCH__=???"  to get the
> > math insns on baseline z196. BTW what is the actual value for __ARCH__
> > indicating z196? I think I figured it out from the gcc source as 10.
> > The "z196" you used in the above example isn't really a macro, is it?
> > If so that's a bad namespace violation in gcc that needs to be
> > fixed...
> >
> > Rich
> >
> >
> >> On Tue, Jun 20, 2017 at 8:49 PM, Rich Felker <dalias@libc.org> wrote:
> >> > On Sun, Jun 18, 2017 at 01:12:15PM -0400, David Edelsohn wrote:
> >> >> How can we move forward with this patch?
> >> >>
> >> >> I would prefer to avoid the __ARCH__ complexity until there is a clear
> >> >> user requirement.
> >> >>
> >> >> Thanks, David
> >> >
> >> > Rob Landley informed me that the s390x environment he's building with
> >> > mkroot (https://github.com/landley/mkroot) for testing under qemu
> >> > system level emulation is running a kernel built for z900. If qemu can
> >> > emulate newer machines, this may just be an oversight that can be
> >> > changes by reconfiguring, but it does indicate that z900 seems to be
> >> > supported by kernel, and that there's at least someone using the
> >> > baseline ISA level now.
> >> >
> >> > For what it's worth I agree that we've spent an inordinate amount of
> >> > time on this topic, and I apologize. I just don't want it to turn into
> >> > a regression.
> >> >
> >> > Rich
> >> >
> >> >
> >> >> On Wed, Jun 14, 2017 at 7:44 PM, Rich Felker <dalias@libc.org> wrote:
> >> >> > On Mon, Jun 12, 2017 at 09:28:52AM -0400, David Edelsohn wrote:
> >> >> >> >> The following IBM table of supported and tested systems
> >> >> >> >>
> >> >> >> >> https://www-03.ibm.com/systems/z/os/linux/resources/testedplatforms.html
> >> >> >> >>
> >> >> >> >> shows that RHEL 7 and SLES 12 require at least z196, and Ubuntu 16.04
> >> >> >> >> requires at least zEC12.
> >> >> >> >>
> >> >> >> >> I can't find any official hardware requirements description for Alpine
> >> >> >> >> Linux. I tend to doubt that user would run it on older hardware,
> >> >> >> >> especially hardware no longer supported by other, modern Linux
> >> >> >> >> distributions.
> >> >> >> >>
> >> >> >> >> Building musl libc on older hardware is a nice accomplishment, but
> >> >> >> >> investing effort and complexity to maintain support probably isn't
> >> >> >> >> useful to any musl libc user and probably isn't a productive use of
> >> >> >> >> developer resources.
> >> >> >> >>
> >> >> >> >> I will continue to inquire if there is a simple technique to accomplish this.
> >> >> >>
> >> >> >> Apparently GCC 7.1 added architecture macros.
> >> >> >>
> >> >> >> As Tuan referenced, Alpine Linux also requires z196 as the minimum
> >> >> >> architecture level.  I believe that it would be better for s390-musl
> >> >> >> to default to z196 ISA than musl to require GCC 7.1.
> >> >> >
> >> >> > I agree we shouldn't "require GCC 7.1", but using the macros does not
> >> >> > imply such a requirement. For example:
> >> >> >
> >> >> >         #if __ARCH__ >= 10
> >> >> >
> >> >> > would only use the asm on z196+ (if I got the number right) with GCC
> >> >> > 7.1+ (no asm on older compilers), whereas:
> >> >> >
> >> >> >         #if __ARCH__ >= 10 || !defined(__ARCH__)
> >> >> >
> >> >> > would use the asm on z196+ or on compilers too old to provide __ARCH__
> >> >> > (and building for a more minimal baseline ISA would not be supported
> >> >> > on such compilers unless you manually add -D__ARCH__=5 or whatever to
> >> >> > CFLAGS).
> >> >> >
> >> >> > I'm fine with waiting to add those pp conditionals until if/when
> >> >> > someone actually wants to use the lower baseline ISA, if you don't
> >> >> > want to do it now. I am hesitant to add new ISA-forcing logic to
> >> >> > configure, though (see the other reply on that). Would it be bad to
> >> >> > have the build fail with low default -march? If so, maybe the
> >> >> > configure logic could check for !defined(__ARCH__) and then do a
> >> >> > compile test to define __ARCH__ on its own, and we could use the above
> >> >> > logic?
> >> >> >
> >> >> > Rich





^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2017-06-23 19:31 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-09 14:51 [PATCH] s390x: Add single instruction math functions David Edelsohn
2017-06-10 15:36 ` Szabolcs Nagy
2017-06-10 17:25   ` David Edelsohn
2017-06-10 18:29     ` Szabolcs Nagy
2017-06-10 18:53       ` David Edelsohn
2017-06-10 19:48         ` Rich Felker
2017-06-10 20:22           ` David Edelsohn
2017-06-10 21:28             ` Szabolcs Nagy
2017-06-10 21:44               ` David Edelsohn
2017-06-10 21:48                 ` David Edelsohn
2017-06-11  2:20                   ` Rich Felker
2017-06-11 10:19                     ` Szabolcs Nagy
2017-06-11 15:04                       ` Rich Felker
2017-06-11 16:45                         ` Szabolcs Nagy
2017-06-11 21:45                           ` Rich Felker
2017-06-12  2:46                       ` David Edelsohn
2017-06-12  4:36                         ` Tuan M. Hoang
2017-06-12  9:03                         ` Szabolcs Nagy
2017-06-12 13:28                           ` David Edelsohn
2017-06-12 13:54                             ` David Edelsohn
2017-06-12 20:28                               ` Szabolcs Nagy
2017-06-12 21:02                                 ` David Edelsohn
2017-06-13 15:55                                   ` Szabolcs Nagy
2017-06-14 23:34                                     ` Rich Felker
2017-06-14 23:40                                       ` A. Wilcox
2017-06-14 23:44                             ` Rich Felker
2017-06-15 12:18                               ` David Edelsohn
2017-06-18 17:12                               ` David Edelsohn
2017-06-21  0:49                                 ` Rich Felker
2017-06-21  1:07                                   ` David Edelsohn
2017-06-21  1:20                                     ` Rich Felker
2017-06-21  3:34                                       ` David Edelsohn
2017-06-23 19:31                                         ` Rich Felker
2017-06-10 21:37             ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).