From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=-3.3 required=5.0 tests=HEADER_FROM_DIFFERENT_DOMAINS, MAILING_LIST_MULTI,RCVD_IN_DNSWL_MED,RCVD_IN_MSPIKE_H5, RCVD_IN_MSPIKE_WL,RCVD_IN_ZEN_BLOCKED_OPENDNS autolearn=ham autolearn_force=no version=3.4.4 Received: from second.openwall.net (second.openwall.net [193.110.157.125]) by inbox.vuxu.org (Postfix) with SMTP id 1617F2CC45 for ; Thu, 30 Oct 2025 13:10:02 +0100 (CET) Received: (qmail 13595 invoked by uid 550); 30 Oct 2025 12:09:45 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Reply-To: musl@lists.openwall.com x-ms-reactions: disallow Received: (qmail 13537 invoked from network); 30 Oct 2025 12:09:44 -0000 From: Pincheng Wang To: musl@lists.openwall.com Cc: pincheng.plct@isrc.iscas.ac.cn Date: Thu, 30 Oct 2025 20:09:08 +0800 Message-Id: <20251030120908.47909-2-pincheng.plct@isrc.iscas.ac.cn> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20251030120908.47909-1-pincheng.plct@isrc.iscas.ac.cn> References: <20251030120908.47909-1-pincheng.plct@isrc.iscas.ac.cn> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-CM-TRANSID:qwCowADXQWx7VQNp55s8AA--.2106S3 X-Coremail-Antispam: 1UD129KBjvJXoWxWF1kCw43Jw45Cw1fZrWfuFg_yoWrtF4xpa 18Cry3Gr4av3s7WFs3Ww1qvF43K3yF9Fn8W3Wa93W8Z3y8JFZIyFnrAa4UZFWxJF1Uur4a 9r4jgry5u3WUAaUanT9S1TB71UUUUU7qnTZGkaVYY2UrUUUUjbIjqfuFe4nvWSU5nxnvy2 9KBjDU0xBIdaVrnRJUUU9I14x267AKxVWUJVW8JwAFc2x0x2IEx4CE42xK8VAvwI8IcIk0 rVWrJVCq3wAFIxvE14AKwVWUJVWUGwA2048vs2IY020E87I2jVAFwI0_Jr4l82xGYIkIc2 x26xkF7I0E14v26r18M28lY4IEw2IIxxk0rwA2F7IY1VAKz4vEj48ve4kI8wA2z4x0Y4vE 2Ix0cI8IcVAFwI0_Jr0_JF4l84ACjcxK6xIIjxv20xvEc7CjxVAFwI0_Jr0_Gr1l84ACjc xK6I8E87Iv67AKxVWUJVW8JwA2z4x0Y4vEx4A2jsIEc7CjxVAFwI0_Gr0_Gr1UM2AIxVAI cxkEcVAq07x20xvEncxIr21l5I8CrVACY4xI64kE6c02F40Ex7xfMcIj6xIIjxv20xvE14 v26r1j6r18McIj6I8E87Iv67AKxVWUJVW8JwAm72CE4IkC6x0Yz7v_Jr0_Gr1lF7xvr2IY c2Ij64vIr41lF7I21c0EjII2zVCS5cI20VAGYxC7MxAIw28IcxkI7VAKI48JMxC20s026x CaFVCjc4AY6r1j6r4UMI8I3I0E5I8CrVAFwI0_Jr0_Jr4lx2IqxVCjr7xvwVAFwI0_JrI_ JrWlx4CE17CEb7AF67AKxVWUXVWUAwCIc40Y0x0EwIxGrwCI42IY6xIIjxv20xvE14v26r 1j6r1xMIIF0xvE2Ix0cI8IcVCY1x0267AKxVWUJVW8JwCI42IY6xAIw20EY4v20xvaj40_ Jr0_JF4lIxAIcVC2z280aVAFwI0_Jr0_Gr1lIxAIcVC2z280aVCY1x0267AKxVWUJVW8Jb IYCTnIWIevJa73UjIFyTuYvjfU5VbkUUUUU X-Originating-IP: [120.227.57.48] X-CM-SenderInfo: pslquxhhqjh1xofwqxxvufhxpvfd2hldfou0/ Subject: [musl] [PATCH v3 1/1] riscv64: add runtime-detected vector optimized memset Add a RISC-V vector extension optimized memset implementation with runtime CPU capability detection via HW_CAP. The implementation provides both vector and scalar variants in a single binary. At process startup, __init_riscv_string_optimizations() queries AT_HWCAP to detect RVV support and selects the appropriate implementation via function pointer dispatch. This allows the same libc to run correctly on both vector-capable and non-vector RISC-V CPUs. The vector implementation uses m8 register grouping and processes data in vector-length chunks, providing significant performance improvements on RVV-capable hardware while maintaining compatibility with non-vector RISC-V systems. Signed-off-by: Pincheng Wang --- src/env/__libc_start_main.c | 3 +++ src/internal/libc.h | 3 +++ src/string/riscv64/memset.c | 4 ++++ src/string/riscv64/memset_dispatch.c | 36 ++++++++++++++++++++++++++++ src/string/riscv64/memset_vector.S | 28 ++++++++++++++++++++++ 5 files changed, 74 insertions(+) create mode 100644 src/string/riscv64/memset.c create mode 100644 src/string/riscv64/memset_dispatch.c create mode 100644 src/string/riscv64/memset_vector.S diff --git a/src/env/__libc_start_main.c b/src/env/__libc_start_main.c index c5b277bd..c23e63f4 100644 --- a/src/env/__libc_start_main.c +++ b/src/env/__libc_start_main.c @@ -38,6 +38,9 @@ void __init_libc(char **envp, char *pn) __init_tls(aux); __init_ssp((void *)aux[AT_RANDOM]); +#ifdef __riscv + __init_riscv_string_optimizations(); +#endif if (aux[AT_UID]==aux[AT_EUID] && aux[AT_GID]==aux[AT_EGID] && !aux[AT_SECURE]) return; diff --git a/src/internal/libc.h b/src/internal/libc.h index 619bba86..28860b06 100644 --- a/src/internal/libc.h +++ b/src/internal/libc.h @@ -40,6 +40,9 @@ extern hidden struct __libc __libc; hidden void __init_libc(char **, char *); hidden void __init_tls(size_t *); hidden void __init_ssp(void *); +#ifdef __riscv +hidden void __init_riscv_string_optimizations(void); +#endif hidden void __libc_start_init(void); hidden void __funcs_on_exit(void); hidden void __funcs_on_quick_exit(void); diff --git a/src/string/riscv64/memset.c b/src/string/riscv64/memset.c new file mode 100644 index 00000000..11fa3032 --- /dev/null +++ b/src/string/riscv64/memset.c @@ -0,0 +1,4 @@ +/* Rename the generic memset to __memset_scalar and include it */ +#define memset __memset_scalar +#include "../memset.c" +#undef memset diff --git a/src/string/riscv64/memset_dispatch.c b/src/string/riscv64/memset_dispatch.c new file mode 100644 index 00000000..8e36d0f9 --- /dev/null +++ b/src/string/riscv64/memset_dispatch.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include "libc.h" + +void *__memset_scalar(void *s, int c, size_t n); +void *__memset_vect(void *s, int c, size_t n); + +/* memset function pointer, runtime-dispatched based on RVV support */ +__attribute__((visibility("hidden"))) +#ifndef __riscv_vector +void *(*__memset_ptr)(void *, int, size_t) = __memset_scalar; +#else +void *(*__memset_ptr)(void *, int, size_t) = __memset_vect; +#endif + +void *memset(void *s, int c, size_t n) +{ + return __memset_ptr(s, c, n); +} + +static __inline int __has_rvv_via_hwcap(void) +{ + const unsigned long V_bit = (1ul << ('V' - 'A')); + unsigned long hwcap = __getauxval(AT_HWCAP); + return (hwcap & V_bit) != 0; +} + +__attribute__((visibility("hidden"))) +void __init_riscv_string_optimizations(void) +{ + if (__has_rvv_via_hwcap()) + __memset_ptr = __memset_vect; + else + __memset_ptr = __memset_scalar; +} diff --git a/src/string/riscv64/memset_vector.S b/src/string/riscv64/memset_vector.S new file mode 100644 index 00000000..513645d3 --- /dev/null +++ b/src/string/riscv64/memset_vector.S @@ -0,0 +1,28 @@ + .text + .global __memset_vect + .option push + .option arch, +v +/* void *__memset_vect(void *s, int c, size_t n) + * a0 = s (dest), a1 = c (fill byte), a2 = n (size) + * Returns a0. + */ +__memset_vect: + mv t0, a0 /* running dst; keep a0 as return */ + beqz a2, .Ldone_vect /* n == 0 then return */ + + /* Broadcast fill byte once. */ + vsetvli t1, zero, e8, m8, ta, ma + vmv.v.x v0, a1 + +.Lbulk_vect: + vsetvli t1, a2, e8, m8, ta, ma /* t1 = vl (bytes) */ + vse8.v v0, (t0) + add t0, t0, t1 + sub a2, a2, t1 + bnez a2, .Lbulk_vect + /* fallthrough */ + +.Ldone_vect: + ret +.size __memset_vect, .-__memset_vect +.option pop -- 2.39.5