From: Pincheng Wang <pincheng.plct@isrc.iscas.ac.cn>
To: musl@lists.openwall.com
Cc: pincheng.plct@isrc.iscas.ac.cn
Subject: [musl] [PATCH 1/1] riscv64: optimize memset implementation with vector extension
Date: Thu, 25 Sep 2025 21:15:57 +0800 [thread overview]
Message-ID: <20250925131557.8907-2-pincheng.plct@isrc.iscas.ac.cn> (raw)
In-Reply-To: <20250925131557.8907-1-pincheng.plct@isrc.iscas.ac.cn>
Use head-tail filling strategy for small sizes and dynamic vsetvli
approach for vector loops to reduce branch overhead. Add conditional
compilation to fall back to scalar implementation when __riscv_vector is
not available.
Signed-off-by: Pincheng Wang <pincheng.plct@isrc.iscas.ac.cn>
---
src/string/riscv64/memset.S | 101 ++++++++++++++++++++++++++++++++++++
1 file changed, 101 insertions(+)
create mode 100644 src/string/riscv64/memset.S
diff --git a/src/string/riscv64/memset.S b/src/string/riscv64/memset.S
new file mode 100644
index 00000000..5fc6ee14
--- /dev/null
+++ b/src/string/riscv64/memset.S
@@ -0,0 +1,101 @@
+#ifdef __riscv_vector
+
+ .text
+ .global memset
+/* void *memset(void *s, int c, size_t n)
+ * a0 = s (dest), a1 = c (fill byte), a2 = n (size)
+ * Returns a0.
+ */
+memset:
+ mv t0, a0 /* running dst; keep a0 as return */
+ beqz a2, .Ldone /* n == 0 → return */
+
+ li t3, 8
+ bltu a2, t3, .Lsmall /* small-size fast path */
+
+ /* Broadcast fill byte once. */
+ vsetvli t1, zero, e8, m8, ta, ma
+ vmv.v.x v0, a1
+
+.Lbulk:
+ vsetvli t1, a2, e8, m8, ta, ma /* t1 = vl (bytes) */
+ vse8.v v0, (t0)
+ add t0, t0, t1
+ sub a2, a2, t1
+ bnez a2, .Lbulk
+ j .Ldone
+
+/* Small-size fast path (< 8).
+ * Head-tail fills to minimize branches and avoid vsetvli overhead.
+ */
+.Lsmall:
+ /* Fill s[0], s[n-1] */
+ sb a1, 0(t0)
+ add t2, t0, a2
+ sb a1, -1(t2)
+ li t3, 2
+ bleu a2, t3, .Ldone
+
+ /* Fill s[1], s[2], s[n-2], s[n-3] */
+ sb a1, 1(t0)
+ sb a1, 2(t0)
+ sb a1, -2(t2)
+ sb a1, -3(t2)
+ li t3, 6
+ bleu a2, t3, .Ldone
+
+ /* Fill s[3], s[n-4] */
+ sb a1, 3(t0)
+ sb a1, -4(t2)
+ /* fallthrough for n <= 8 */
+
+.Ldone:
+ ret
+.size memset, .-memset
+
+#else /* !__riscv_vector */
+
+ .text
+ .global memset
+/* Fallback scalar memset
+ * void *memset(void *s, int c, size_t n)
+ */
+memset:
+ mv t0, a0 /* running dst; keep a0 as return */
+ beqz a2, .Ldone
+
+ andi a1, a1, 0xff /* use low 8 bits only */
+
+ /* Head-tail strategy for small n */
+ sb a1, 0(t0) /* s[0] */
+ add t2, t0, a2
+ sb a1, -1(t2) /* s[n-1] */
+ li t3, 2
+ bleu a2, t3, .Ldone
+
+ sb a1, 1(t0)
+ sb a1, 2(t0)
+ sb a1, -2(t2)
+ sb a1, -3(t2)
+ li t3, 6
+ bleu a2, t3, .Ldone
+
+ sb a1, 3(t0)
+ sb a1, -4(t2)
+ li t3, 8
+ bleu a2, t3, .Ldone
+
+ /* Linear fill middle region [4, n-4) */
+ addi t4, t0, 4
+ addi t5, t2, -4
+.Lloop:
+ bgeu t4, t5, .Ldone
+ sb a1, 0(t4)
+ addi t4, t4, 1
+ j .Lloop
+
+.Ldone:
+ ret
+.size memset, .-memset
+
+#endif
--
2.39.5
next prev parent reply other threads:[~2025-09-25 13:16 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-25 13:15 [musl] [PATCH 0/1] riscv64: Add RVV optimized memset implementation Pincheng Wang
2025-09-25 13:15 ` Pincheng Wang [this message]
2025-09-25 15:30 ` [musl] [PATCH 1/1] riscv64: optimize memset implementation with vector extension Yao Zi
2025-09-26 0:31 ` Pincheng Wang
2025-09-26 3:37 ` Markus Wichmann
2025-09-26 11:21 ` Pincheng Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250925131557.8907-2-pincheng.plct@isrc.iscas.ac.cn \
--to=pincheng.plct@isrc.iscas.ac.cn \
--cc=musl@lists.openwall.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/musl/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).