From 9b89d49cd9dae3eb1fd9745e7fc4b91f52d1659f Mon Sep 17 00:00:00 2001 From: Markus Wichmann Date: Sat, 16 Mar 2024 09:51:37 +0100 Subject: [PATCH 1/2] Add internal CPUID machinery. This is meant to provide a way for implementation-internal optimizations to be enabled without a __hwcap flag. The CPUID provides an enormous amount of information, and capturing all of it unconditionally would be incredibly wasteful. Especially with the diversity of implementation out there. So instead I condense exactly the information needed down to one bit per feature, for each interesting feature. For starters, the only features in here are the FMA and FMA4 extentions, but this leaves another 62 bits for other miscellaneous enhancements. I had initially planned to put the call to __init_cpuid() into the arch-specific CRT code, but it is not valid in there. I need access to static variables, and this is not possible in the PIE and dynamic linking cases directly after _start. Only after the relocations were processed. So now I have put it in __libc_start_main, which every process will call between having processed the relocations and running application code. --- src/env/__libc_start_main.c | 2 ++ src/internal/x86_64/cpuid.c | 40 +++++++++++++++++++++++++++++++++++++ src/internal/x86_64/cpuid.h | 12 +++++++++++ 3 files changed, 54 insertions(+) create mode 100644 src/internal/x86_64/cpuid.c create mode 100644 src/internal/x86_64/cpuid.h diff --git a/src/env/__libc_start_main.c b/src/env/__libc_start_main.c index c5b277bd..7d7e9f9b 100644 --- a/src/env/__libc_start_main.c +++ b/src/env/__libc_start_main.c @@ -9,6 +9,7 @@ static void dummy(void) {} weak_alias(dummy, _init); +weak_alias(dummy, __init_cpuid); extern weak hidden void (*const __init_array_start)(void), (*const __init_array_end)(void); @@ -38,6 +39,7 @@ void __init_libc(char **envp, char *pn) __init_tls(aux); __init_ssp((void *)aux[AT_RANDOM]); + __init_cpuid(); if (aux[AT_UID]==aux[AT_EUID] && aux[AT_GID]==aux[AT_EGID] && !aux[AT_SECURE]) return; diff --git a/src/internal/x86_64/cpuid.c b/src/internal/x86_64/cpuid.c new file mode 100644 index 00000000..6218ad62 --- /dev/null +++ b/src/internal/x86_64/cpuid.c @@ -0,0 +1,40 @@ +#include "x86_64/cpuid.h" + +uint64_t __cpuid; + +struct regs { + uint32_t ax, bx, cx, dx; +}; + +static inline struct regs cpuid(uint32_t fn) +{ + struct regs ret; + __asm__("cpuid" : "=a"(ret.ax), "=b"(ret.bx), "=c"(ret.cx), "=d"(ret.dx) : "a"(fn)); + return ret; +} + +static inline int cpu_has_fma(void) +{ + struct regs r = cpuid(1); + return r.cx & 0x1000; +} + +static inline int cpu_is_amd(void) +{ + struct regs r = cpuid(0); + return r.bx == 0x68747541 && r.cx == 0x444d4163 && r.dx == 0x69746e65; +} + +static inline int cpu_has_fma4(void) +{ + struct regs r = cpuid(0x80000001); + return r.cx & 0x10000; +} + +void __init_cpuid(void) +{ + if (cpu_has_fma()) + __cpuid |= X86_FEAT_FMA; + if (cpu_is_amd() && cpu_has_fma4()) + __cpuid |= X86_FEAT_FMA4; +} diff --git a/src/internal/x86_64/cpuid.h b/src/internal/x86_64/cpuid.h new file mode 100644 index 00000000..40b66d3e --- /dev/null +++ b/src/internal/x86_64/cpuid.h @@ -0,0 +1,12 @@ +#ifndef X86_64_CPUID_H +#define X86_64_CPUID_H + +#include +#include +extern hidden uint64_t __cpuid; +void __init_cpuid(void); + +#define X86_FEAT_FMA 1 +#define X86_FEAT_FMA4 2 + +#endif -- 2.39.2