mailing list of musl libc
 help / color / mirror / code / Atom feed
ea6168705f36aa43edda8d5034ac6dd475b56008 blob 1122 bytes (raw)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
 
.global memset
.type memset,@function
memset:
	movzbq %sil,%rax
	cmp $16,%rdx
	jb .Less_than_16

	test %esi,%esi
	jnz .L_widen_rax  # unlikely
.L_widened:

	mov %rdi,%r8

	test $7,%dil
	jnz .L_align  # unlikely
.L_aligned:

	lea -1(%rdx),%rcx
	shr $3,%rcx
	mov %rax,-8(%rdi,%rdx)
	rep
	stosq

	mov %r8,%rax
	ret

.L_widen_rax:
	# 64-bit imul has 3-7 cycles latency
	mov $0x101010101010101,%rsi
	imul %rsi,%rax
	jmp .L_widened

# 8-byte alignment gives ~25% speedup on "rep stosq" memsets
# to L1 cache, compared to intentionally misaligned ones.
# It is a smaller win of ~15% on larger memsets to L2 too.
# Measured on Intel Sandy Bridge CPU (i7-2620M, 2.70GHz)
.L_align:
	mov %rax,(%rdi)
1:	inc %rdi
	dec %rdx
	test $7,%dil
	jnz 1b
	jmp .L_aligned


.Less_than_16:
	test %edx,%edx
	jz .L_ret

	mov %al,(%rdi)
	mov %al,-1(%rdi,%rdx)
	cmp $2,%edx
	jbe .L_ret

	mov %al,1(%rdi)
	mov %al,-2(%rdi,%rdx)
	# 32-bit imul has 3-4 cycles latency
	imul $0x1010101,%eax
	cmp $4,%edx
	jbe .L_ret

	mov %eax,(%rdi)
	mov %eax,-4(%rdi,%rdx)
	cmp $8,%edx
	jbe .L_ret

	mov %eax,4(%rdi)
	mov %eax,-8(%rdi,%rdx)
.L_ret:
	mov %rdi,%rax
	ret
debug log:

solving 5c9e333 ...
found 5c9e333 in https://inbox.vuxu.org/musl/1423761423-30050-2-git-send-email-vda.linux@googlemail.com/
found 523caa0 in https://inbox.vuxu.org/musl/1423761423-30050-1-git-send-email-vda.linux@googlemail.com/
found 3cc8fcf in https://git.vuxu.org/mirror/musl/
preparing index
index prepared:
100644 3cc8fcf6b718295f4c977da2d7957d7d53d9c421	src/string/x86_64/memset.s

applying [1/2] https://inbox.vuxu.org/musl/1423761423-30050-1-git-send-email-vda.linux@googlemail.com/
diff --git a/src/string/x86_64/memset.s b/src/string/x86_64/memset.s
index 3cc8fcf..523caa0 100644


applying [2/2] https://inbox.vuxu.org/musl/1423761423-30050-2-git-send-email-vda.linux@googlemail.com/
diff --git a/src/string/x86_64/memset.s b/src/string/x86_64/memset.s
index 523caa0..5c9e333 100644

Checking patch src/string/x86_64/memset.s...
Applied patch src/string/x86_64/memset.s cleanly.
Checking patch src/string/x86_64/memset.s...
Applied patch src/string/x86_64/memset.s cleanly.

index at:
100644 ea6168705f36aa43edda8d5034ac6dd475b56008	src/string/x86_64/memset.s

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).