From: zhangfei <zhang_fei_0403@163.com>
To: dalias@libc.org, musl@lists.openwall.com
Cc: zhangfei <zhangfei@nj.iscas.ac.cn>
Subject: [musl] [PATCH 1/3] RISC-V: Optimize memset
Date: Wed, 7 Jun 2023 18:07:08 +0800 [thread overview]
Message-ID: <20230607100710.4286-2-zhang_fei_0403@163.com> (raw)
In-Reply-To: <20230607100710.4286-1-zhang_fei_0403@163.com>
From: zhangfei <zhangfei@nj.iscas.ac.cn>
This code is based on linux/arch/riscv/lib/memset.S. Removed macro definition and modified
to support RISCV64.
When the amount of data in the source code is less than 16 bytes or after loop tail
processing, byte storage is used. Here we refer to musl/src/string/memset.c, and modify it
to fill head and tail with minimal branching.
Signed-off-by: Zhang Fei<zhangfei@nj.iscas.ac.cn>
---
src/string/riscv64/memset.S | 136 ++++++++++++++++++++++++++++++++++++
1 file changed, 136 insertions(+)
create mode 100644 src/string/riscv64/memset.S
diff --git a/src/string/riscv64/memset.S b/src/string/riscv64/memset.S
new file mode 100644
index 0000000..f8663d7
--- /dev/null
+++ b/src/string/riscv64/memset.S
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013 Regents of the University of California
+ */
+
+#define SZREG 8
+#define REG_S sd
+
+.global memset
+.type memset,@function
+memset:
+ move t0, a0 /* Preserve return value */
+
+ /* Defer to byte-oriented fill for small sizes */
+ sltiu a3, a2, 16
+ bnez a3, 4f
+
+ /*
+ * Round to nearest XLEN-aligned address
+ * greater than or equal to start address
+ */
+ addi a3, t0, SZREG-1
+ andi a3, a3, ~(SZREG-1)
+ beq a3, t0, 2f /* Skip if already aligned */
+ /* Handle initial misalignment */
+ sub a4, a3, t0
+1:
+ sb a1, 0(t0)
+ addi t0, t0, 1
+ bltu t0, a3, 1b
+ sub a2, a2, a4 /* Update count */
+
+2:
+ andi a1, a1, 0xff
+ slli a3, a1, 8
+ or a1, a3, a1
+ slli a3, a1, 16
+ or a1, a3, a1
+ slli a3, a1, 32
+ or a1, a3, a1
+
+ /* Calculate end address */
+ andi a4, a2, ~(SZREG-1)
+ add a3, t0, a4
+
+ andi a4, a4, 31*SZREG /* Calculate remainder */
+ beqz a4, 3f /* Shortcut if no remainder */
+ neg a4, a4
+ addi a4, a4, 32*SZREG /* Calculate initial offset */
+
+ /* Adjust start address with offset */
+ sub t0, t0, a4
+
+ /* Jump into loop body */
+ /* Assumes 64-bit instruction lengths */
+ la a5, 3f
+ srli a4, a4, 1
+ add a5, a5, a4
+ jr a5
+3:
+ REG_S a1, 0(t0)
+ REG_S a1, SZREG(t0)
+ REG_S a1, 2*SZREG(t0)
+ REG_S a1, 3*SZREG(t0)
+ REG_S a1, 4*SZREG(t0)
+ REG_S a1, 5*SZREG(t0)
+ REG_S a1, 6*SZREG(t0)
+ REG_S a1, 7*SZREG(t0)
+ REG_S a1, 8*SZREG(t0)
+ REG_S a1, 9*SZREG(t0)
+ REG_S a1, 10*SZREG(t0)
+ REG_S a1, 11*SZREG(t0)
+ REG_S a1, 12*SZREG(t0)
+ REG_S a1, 13*SZREG(t0)
+ REG_S a1, 14*SZREG(t0)
+ REG_S a1, 15*SZREG(t0)
+ REG_S a1, 16*SZREG(t0)
+ REG_S a1, 17*SZREG(t0)
+ REG_S a1, 18*SZREG(t0)
+ REG_S a1, 19*SZREG(t0)
+ REG_S a1, 20*SZREG(t0)
+ REG_S a1, 21*SZREG(t0)
+ REG_S a1, 22*SZREG(t0)
+ REG_S a1, 23*SZREG(t0)
+ REG_S a1, 24*SZREG(t0)
+ REG_S a1, 25*SZREG(t0)
+ REG_S a1, 26*SZREG(t0)
+ REG_S a1, 27*SZREG(t0)
+ REG_S a1, 28*SZREG(t0)
+ REG_S a1, 29*SZREG(t0)
+ REG_S a1, 30*SZREG(t0)
+ REG_S a1, 31*SZREG(t0)
+ addi t0, t0, 32*SZREG
+ bltu t0, a3, 3b
+ andi a2, a2, SZREG-1 /* Update count */
+
+4:
+ /* Handle trailing misalignment */
+ beqz a2, 6f
+ add a3, t0, a2
+5:
+ /* Fill head and tail with minimal branching. Each
+ * conditional ensures that all the subsequently used
+ * offsets are well-defined and in the dest region. */
+ sb a1, 0(t0)
+ sb a1, -1(a3)
+ li a4, 2
+ bgeu a4, a2, 6f
+
+ sb a1, 1(t0)
+ sb a1, 2(t0)
+ sb a1, -2(a3)
+ sb a1, -3(a3)
+ li a4, 6
+ bgeu a4, a2, 6f
+
+ sb a1, 3(t0)
+ sb a1, -4(a3)
+ li a4, 8
+ bgeu a4, a2, 6f
+
+ sb a1, 4(t0)
+ sb a1, 5(t0)
+ sb a1, -5(a3)
+ li a4, 11
+ bgeu a4, a2, 6f
+
+ sb a1, 6(t0)
+ sb a1, -6(a3)
+ sb a1, -7(a3)
+ li a4, 14
+ bgeu a4, a2, 6f
+
+ sb a1, 7(t0)
+6:
+ ret
--
2.34.1
next prev parent reply other threads:[~2023-06-07 10:08 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-07 10:07 [musl] [PATCH 0/3] RISC-V: Optimize memset, memcpy and memmove zhangfei
2023-06-07 10:07 ` zhangfei [this message]
2023-06-07 12:57 ` [musl] [PATCH 1/3] RISC-V: Optimize memset Rich Felker
2023-06-07 10:07 ` [musl] [PATCH 2/3] RISC-V: Optimize memcpy zhangfei
2023-06-07 10:07 ` [musl] [PATCH 3/3] RISC-V: Optimize memmove zhangfei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230607100710.4286-2-zhang_fei_0403@163.com \
--to=zhang_fei_0403@163.com \
--cc=dalias@libc.org \
--cc=musl@lists.openwall.com \
--cc=zhangfei@nj.iscas.ac.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/musl/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).