1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
| | .global memset
.type memset,@function
memset:
movzbq %sil,%rax
cmp $16,%rdx
jb .Less_than_16
test %esi,%esi
jnz .L_widen_rax # unlikely
.L_widened:
lea -1(%rdx),%rcx
mov %rdi,%r8
shr $3,%rcx
mov %rax,-8(%rdi,%rdx)
rep
stosq
mov %r8,%rax
ret
.L_widen_rax:
# 64-bit imul has 3-7 cycles latency
mov $0x101010101010101,%rsi
imul %rsi,%rax
jmp .L_widened
.Less_than_16:
test %edx,%edx
jz .L_ret
mov %al,(%rdi)
mov %al,-1(%rdi,%rdx)
cmp $2,%edx
jbe .L_ret
mov %al,1(%rdi)
mov %al,-2(%rdi,%rdx)
# 32-bit imul has 3-4 cycles latency
imul $0x1010101,%eax
cmp $4,%edx
jbe .L_ret
mov %eax,(%rdi)
mov %eax,-4(%rdi,%rdx)
cmp $8,%edx
jbe .L_ret
mov %eax,4(%rdi)
mov %eax,-8(%rdi,%rdx)
.L_ret:
mov %rdi,%rax
ret
|