* Bug in llvm compiler
@ 2015-09-24 17:40 Sebastian Gniazdowski
0 siblings, 0 replies; only message in thread
From: Sebastian Gniazdowski @ 2015-09-24 17:40 UTC (permalink / raw)
To: zsh-workers
[-- Attachment #1: Type: text/plain, Size: 1714 bytes --]
Hello,
I'm writing optimization of string.c. The point is that functions
there run strlen() and then in general discard the returned
information by running strcpy(). Consider this:
http://www.opensource.apple.com/source/Libc/Libc-997.1.1/string/strcpy.c
Of course there are different implementations that will not run
strlen() second time:
http://www.opensource.apple.com/source/Libc/Libc-262/i386/gen/strcpy.c
but still - utilizing the information allows for optimizations.
I think I've encountered a bug in Apple's llvm. Changing 11 line in
the patch, from "if( l < 8 ) {" to "if( l < 0 ) {" causes the script
to run for 2.5 seconds. Changing it to "if( l < 1 ) {" restores
running time of 2 seconds. Looking at generated assembly shows that
"if( l < 0 ) {" is treated as "if( 0 )" and only the memcpy() part is
emitted. That's fine, but why does that optimized version run slower
having in mind that "if( l < 1 ) {" is impossible condition (every
string has at least 1 byte). The problem doesn't reproduce on FreeBSD
10.1 and Ubuntu 12.10, running times are equal there. I thought I will
show the asm source, maybe someone will find something interesting in
it. In general this seems a bug that should be maybe considered as Zsh
uses memcpy() in various places (also: google
"-Wno-builtin-memcpy-chk-size"). As for string.c, I will provide
memcpy's implementation taken from glibc or other library.
The compiler is:
# gcc --version
Configured with:
--prefix=/Applications/Xcode.app/Contents/Developer/usr
--with-gxx-include-dir=/usr/include/c++/4.2.1
Apple LLVM version 5.0 (clang-500.2.79) (based on LLVM 3.3svn)
Target: x86_64-apple-darwin13.1.0
Thread model: posix
Best regards,
Sebastian Gniazdowski
[-- Attachment #2: copymemory.patch --]
[-- Type: application/octet-stream, Size: 4391 bytes --]
diff --git a/Src/string.c b/Src/string.c
index 04e7446..f78fcba 100644
--- a/Src/string.c
+++ b/Src/string.c
@@ -28,16 +28,33 @@
#include "zsh.mdh"
+#define copymemory_maybestring(dest,src,l,type) do \
+ { \
+ if( l < 8 ) { \
+ type *d_; \
+ const type *s_; \
+ d_ = dest; \
+ s_ = src; \
+ while( (*d_++ = *s_++) ) {} \
+ } else { \
+ memcpy(dest, src, l*sizeof(type)); \
+ } \
+ } while(0);
+
/**/
mod_export char *
dupstring(const char *s)
{
char *t;
+ size_t lenw0;
if (!s)
return NULL;
- t = (char *) zhalloc(strlen((char *)s) + 1);
- strcpy(t, s);
+
+ lenw0 = 1 + strlen((char *)s);
+ t = (char *) zhalloc(lenw0 * sizeof(char));
+ copymemory_maybestring(t, s, lenw0, char);
+
return t;
}
@@ -46,11 +63,15 @@ mod_export char *
ztrdup(const char *s)
{
char *t;
+ size_t lenw0;
if (!s)
return NULL;
- t = (char *)zalloc(strlen((char *)s) + 1);
- strcpy(t, s);
+
+ lenw0= 1 + strlen((char *)s);
+ t = (char *)zalloc(lenw0 * sizeof(char));
+ copymemory_maybestring(t, s, lenw0, char);
+
return t;
}
@@ -61,11 +82,15 @@ mod_export wchar_t *
wcs_ztrdup(const wchar_t *s)
{
wchar_t *t;
+ size_t lenw0;
if (!s)
return NULL;
- t = (wchar_t *)zalloc(sizeof(wchar_t) * (wcslen((wchar_t *)s) + 1));
- wcscpy(t, s);
+
+ lenw0 = 1 + wcslen((wchar_t *)s);
+ t = (wchar_t *)zalloc(lenw0 * sizeof(wchar_t));
+ copymemory_maybestring(t, s, lenw0, wchar_t);
+
return t;
}
/**/
@@ -80,13 +105,14 @@ tricat(char const *s1, char const *s2, char const *s3)
{
/* This version always uses permanently-allocated space. */
char *ptr;
- size_t l1 = strlen(s1);
- size_t l2 = strlen(s2);
-
- ptr = (char *)zalloc(l1 + l2 + strlen(s3) + 1);
- strcpy(ptr, s1);
- strcpy(ptr + l1, s2);
- strcpy(ptr + l1 + l2, s3);
+ size_t l1w0 = 1 + strlen(s1);
+ size_t l2w0 = 1 + strlen(s2);
+ size_t l3w0 = 1 + strlen(s3);
+
+ ptr = (char *)zalloc((l1w0 + l2w0 + l3w0 - 2) * sizeof(char));
+ copymemory_maybestring(ptr, s1, l1w0, char);
+ copymemory_maybestring(ptr + l1w0 - 1, s2, l2w0, char);
+ copymemory_maybestring(ptr + l1w0 - 1 + l2w0 - 1, s3, l3w0, char);
return ptr;
}
@@ -95,13 +121,14 @@ mod_export char *
zhtricat(char const *s1, char const *s2, char const *s3)
{
char *ptr;
- size_t l1 = strlen(s1);
- size_t l2 = strlen(s2);
-
- ptr = (char *)zhalloc(l1 + l2 + strlen(s3) + 1);
- strcpy(ptr, s1);
- strcpy(ptr + l1, s2);
- strcpy(ptr + l1 + l2, s3);
+ size_t l1w0 = 1 + strlen(s1);
+ size_t l2w0 = 1 + strlen(s2);
+ size_t l3w0 = 1 + strlen(s3);
+
+ ptr = (char *)zhalloc((l1w0 + l2w0 + l3w0 - 2) * sizeof(char));
+ copymemory_maybestring(ptr, s1, l1w0, char);
+ copymemory_maybestring(ptr + l1w0 - 1, s2, l2w0, char);
+ copymemory_maybestring(ptr + l1w0 - 1 + l2w0 - 1, s3, l3w0, char);
return ptr;
}
@@ -113,11 +140,12 @@ dyncat(const char *s1, const char *s2)
{
/* This version always uses space from the current heap. */
char *ptr;
- size_t l1 = strlen(s1);
+ size_t l1w0 = 1 + strlen(s1);
+ size_t l2w0 = 1 + strlen(s2);
- ptr = (char *)zhalloc(l1 + strlen(s2) + 1);
- strcpy(ptr, s1);
- strcpy(ptr + l1, s2);
+ ptr = (char *)zhalloc(l1w0 + l2w0 - 1);
+ copymemory_maybestring(ptr, s1, l1w0, char);
+ copymemory_maybestring(ptr + l1w0 - 1, s2, l2w0, char);
return ptr;
}
@@ -127,11 +155,12 @@ bicat(const char *s1, const char *s2)
{
/* This version always uses permanently-allocated space. */
char *ptr;
- size_t l1 = strlen(s1);
+ size_t l1w0 = 1 + strlen(s1);
+ size_t l2w0 = 1 + strlen(s2);
- ptr = (char *)zalloc(l1 + strlen(s2) + 1);
- strcpy(ptr, s1);
- strcpy(ptr + l1, s2);
+ ptr = (char *)zalloc(l1w0 + l2w0 - 1);
+ copymemory_maybestring(ptr, s1, l1w0, char);
+ copymemory_maybestring(ptr + l1w0 - 1, s2, l2w0, char);
return ptr;
}
[-- Attachment #3: out_if_smaller_t0.asm --]
[-- Type: application/octet-stream, Size: 9358 bytes --]
.section __TEXT,__text,regular,pure_instructions
.globl _dupstring
.align 4, 0x90
_dupstring: ## @dupstring
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp3:
.cfi_def_cfa_offset 16
Ltmp4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp5:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Ltmp6:
.cfi_offset %rbx, -40
Ltmp7:
.cfi_offset %r14, -32
Ltmp8:
.cfi_offset %r15, -24
movq %rdi, %r14
testq %r14, %r14
je LBB0_1
## BB#2:
movq %r14, %rdi
callq _strlen
movq %rax, %rbx
incq %rbx
movq %rbx, %rdi
callq _zhalloc
movq %rax, %r15
movq %r15, %rdi
movq %r14, %rsi
movq %rbx, %rdx
callq _memcpy
jmp LBB0_3
LBB0_1:
xorl %r15d, %r15d
LBB0_3:
movq %r15, %rax
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _ztrdup
.align 4, 0x90
_ztrdup: ## @ztrdup
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp12:
.cfi_def_cfa_offset 16
Ltmp13:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp14:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Ltmp15:
.cfi_offset %rbx, -40
Ltmp16:
.cfi_offset %r14, -32
Ltmp17:
.cfi_offset %r15, -24
movq %rdi, %r14
testq %r14, %r14
je LBB1_1
## BB#2:
movq %r14, %rdi
callq _strlen
movq %rax, %rbx
incq %rbx
movq %rbx, %rdi
callq _zalloc
movq %rax, %r15
movq %r15, %rdi
movq %r14, %rsi
movq %rbx, %rdx
callq _memcpy
jmp LBB1_3
LBB1_1:
xorl %r15d, %r15d
LBB1_3:
movq %r15, %rax
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _wcs_ztrdup
.align 4, 0x90
_wcs_ztrdup: ## @wcs_ztrdup
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp21:
.cfi_def_cfa_offset 16
Ltmp22:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp23:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Ltmp24:
.cfi_offset %rbx, -40
Ltmp25:
.cfi_offset %r14, -32
Ltmp26:
.cfi_offset %r15, -24
movq %rdi, %r14
testq %r14, %r14
je LBB2_1
## BB#2:
movq %r14, %rdi
callq _wcslen
leaq 4(,%rax,4), %r15
movq %r15, %rdi
callq _zalloc
movq %rax, %rbx
movq %rbx, %rdi
movq %r14, %rsi
movq %r15, %rdx
callq _memcpy
jmp LBB2_3
LBB2_1:
xorl %ebx, %ebx
LBB2_3:
movq %rbx, %rax
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _tricat
.align 4, 0x90
_tricat: ## @tricat
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp30:
.cfi_def_cfa_offset 16
Ltmp31:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp32:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $24, %rsp
Ltmp33:
.cfi_offset %rbx, -56
Ltmp34:
.cfi_offset %r12, -48
Ltmp35:
.cfi_offset %r13, -40
Ltmp36:
.cfi_offset %r14, -32
Ltmp37:
.cfi_offset %r15, -24
movq %rdx, %r15
movq %r15, -48(%rbp) ## 8-byte Spill
movq %rsi, %r14
movq %r14, -56(%rbp) ## 8-byte Spill
movq %rdi, %r13
callq _strlen
movq %rax, %rbx
movq %r14, %rdi
callq _strlen
movq %rax, %r14
leaq (%rbx,%r14), %r12
movq %r15, %rdi
callq _strlen
movq %rax, -64(%rbp) ## 8-byte Spill
leaq 1(%rax,%r12), %rdi
callq _zalloc
movq %rax, %r15
leaq 1(%rbx), %rdx
movq %r15, %rdi
movq %r13, %rsi
callq _memcpy
addq %r15, %rbx
leaq 1(%r14), %rdx
movq %rbx, %rdi
movq -56(%rbp), %rsi ## 8-byte Reload
callq _memcpy
addq %r15, %r12
movq -64(%rbp), %rax ## 8-byte Reload
leaq 1(%rax), %rdx
movq %r12, %rdi
movq -48(%rbp), %rsi ## 8-byte Reload
callq _memcpy
movq %r15, %rax
addq $24, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _zhtricat
.align 4, 0x90
_zhtricat: ## @zhtricat
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp41:
.cfi_def_cfa_offset 16
Ltmp42:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp43:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $24, %rsp
Ltmp44:
.cfi_offset %rbx, -56
Ltmp45:
.cfi_offset %r12, -48
Ltmp46:
.cfi_offset %r13, -40
Ltmp47:
.cfi_offset %r14, -32
Ltmp48:
.cfi_offset %r15, -24
movq %rdx, %r15
movq %r15, -48(%rbp) ## 8-byte Spill
movq %rsi, %r14
movq %r14, -56(%rbp) ## 8-byte Spill
movq %rdi, %r13
callq _strlen
movq %rax, %rbx
movq %r14, %rdi
callq _strlen
movq %rax, %r14
leaq (%rbx,%r14), %r12
movq %r15, %rdi
callq _strlen
movq %rax, -64(%rbp) ## 8-byte Spill
leaq 1(%rax,%r12), %rdi
callq _zhalloc
movq %rax, %r15
leaq 1(%rbx), %rdx
movq %r15, %rdi
movq %r13, %rsi
callq _memcpy
addq %r15, %rbx
leaq 1(%r14), %rdx
movq %rbx, %rdi
movq -56(%rbp), %rsi ## 8-byte Reload
callq _memcpy
addq %r15, %r12
movq -64(%rbp), %rax ## 8-byte Reload
leaq 1(%rax), %rdx
movq %r12, %rdi
movq -48(%rbp), %rsi ## 8-byte Reload
callq _memcpy
movq %r15, %rax
addq $24, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _dyncat
.align 4, 0x90
_dyncat: ## @dyncat
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp52:
.cfi_def_cfa_offset 16
Ltmp53:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp54:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
pushq %rax
Ltmp55:
.cfi_offset %rbx, -56
Ltmp56:
.cfi_offset %r12, -48
Ltmp57:
.cfi_offset %r13, -40
Ltmp58:
.cfi_offset %r14, -32
Ltmp59:
.cfi_offset %r15, -24
movq %rsi, %r14
movq %rdi, %r15
callq _strlen
movq %rax, %rbx
movq %r14, %rdi
callq _strlen
movq %rax, %r12
leaq 1(%rbx,%r12), %rdi
callq _zhalloc
movq %rax, %r13
leaq 1(%rbx), %rdx
movq %r13, %rdi
movq %r15, %rsi
callq _memcpy
addq %r13, %rbx
leaq 1(%r12), %rdx
movq %rbx, %rdi
movq %r14, %rsi
callq _memcpy
movq %r13, %rax
addq $8, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _bicat
.align 4, 0x90
_bicat: ## @bicat
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp63:
.cfi_def_cfa_offset 16
Ltmp64:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp65:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
pushq %rax
Ltmp66:
.cfi_offset %rbx, -56
Ltmp67:
.cfi_offset %r12, -48
Ltmp68:
.cfi_offset %r13, -40
Ltmp69:
.cfi_offset %r14, -32
Ltmp70:
.cfi_offset %r15, -24
movq %rsi, %r14
movq %rdi, %r15
callq _strlen
movq %rax, %rbx
movq %r14, %rdi
callq _strlen
movq %rax, %r12
leaq 1(%rbx,%r12), %rdi
callq _zalloc
movq %rax, %r13
leaq 1(%rbx), %rdx
movq %r13, %rdi
movq %r15, %rsi
callq _memcpy
addq %r13, %rbx
leaq 1(%r12), %rdx
movq %rbx, %rdi
movq %r14, %rsi
callq _memcpy
movq %r13, %rax
addq $8, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _dupstrpfx
.align 4, 0x90
_dupstrpfx: ## @dupstrpfx
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp74:
.cfi_def_cfa_offset 16
Ltmp75:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp76:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Ltmp77:
.cfi_offset %rbx, -40
Ltmp78:
.cfi_offset %r14, -32
Ltmp79:
.cfi_offset %r15, -24
movl %esi, %r15d
movq %rdi, %r14
leal 1(%r15), %eax
movslq %eax, %rdi
callq _zhalloc
movq %rax, %rbx
movslq %r15d, %r15
movq %rbx, %rdi
movq %r14, %rsi
movq %r15, %rdx
callq _memcpy
movb $0, (%rbx,%r15)
movq %rbx, %rax
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _ztrduppfx
.align 4, 0x90
_ztrduppfx: ## @ztrduppfx
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp83:
.cfi_def_cfa_offset 16
Ltmp84:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp85:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Ltmp86:
.cfi_offset %rbx, -40
Ltmp87:
.cfi_offset %r14, -32
Ltmp88:
.cfi_offset %r15, -24
movl %esi, %r15d
movq %rdi, %r14
leal 1(%r15), %eax
movslq %eax, %rdi
callq _zalloc
movq %rax, %rbx
movslq %r15d, %r15
movq %rbx, %rdi
movq %r14, %rsi
movq %r15, %rdx
callq _memcpy
movb $0, (%rbx,%r15)
movq %rbx, %rax
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _appstr
.align 4, 0x90
_appstr: ## @appstr
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp92:
.cfi_def_cfa_offset 16
Ltmp93:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp94:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Ltmp95:
.cfi_offset %rbx, -40
Ltmp96:
.cfi_offset %r14, -32
Ltmp97:
.cfi_offset %r15, -24
movq %rsi, %r14
movq %rdi, %rbx
callq _strlen
movq %rax, %r15
movq %r14, %rdi
callq _strlen
leaq 1(%r15,%rax), %rsi
movq %rbx, %rdi
callq _realloc
movq %rax, %rdi
movq %r14, %rsi
movq $-1, %rdx
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
jmp ___strcat_chk ## TAILCALL
.cfi_endproc
.globl _strend
.align 4, 0x90
_strend: ## @strend
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp101:
.cfi_def_cfa_offset 16
Ltmp102:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp103:
.cfi_def_cfa_register %rbp
pushq %rbx
pushq %rax
Ltmp104:
.cfi_offset %rbx, -24
movq %rdi, %rbx
cmpb $0, (%rbx)
je LBB10_2
## BB#1:
movq %rbx, %rdi
callq _strlen
leaq -1(%rax,%rbx), %rbx
LBB10_2:
movq %rbx, %rax
addq $8, %rsp
popq %rbx
popq %rbp
ret
.cfi_endproc
.subsections_via_symbols
[-- Attachment #4: out_if_smaller_t1.asm --]
[-- Type: application/octet-stream, Size: 14100 bytes --]
.section __TEXT,__text,regular,pure_instructions
.globl _dupstring
.align 4, 0x90
_dupstring: ## @dupstring
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp3:
.cfi_def_cfa_offset 16
Ltmp4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp5:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Ltmp6:
.cfi_offset %rbx, -40
Ltmp7:
.cfi_offset %r14, -32
Ltmp8:
.cfi_offset %r15, -24
movq %rdi, %r14
xorl %ebx, %ebx
testq %r14, %r14
je LBB0_5
## BB#1:
movq %r14, %rdi
callq _strlen
movq %rax, %r15
leaq 1(%r15), %rdi
callq _zhalloc
movq %rax, %rbx
incq %r15
je LBB0_2
## BB#4:
movq %rbx, %rdi
movq %r14, %rsi
movq %r15, %rdx
callq _memcpy
jmp LBB0_5
LBB0_2:
xorl %eax, %eax
.align 4, 0x90
LBB0_3: ## %.preheader
## =>This Inner Loop Header: Depth=1
movb (%r14,%rax), %cl
movb %cl, (%rbx,%rax)
incq %rax
testb %cl, %cl
jne LBB0_3
LBB0_5: ## %.loopexit
movq %rbx, %rax
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _ztrdup
.align 4, 0x90
_ztrdup: ## @ztrdup
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp12:
.cfi_def_cfa_offset 16
Ltmp13:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp14:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Ltmp15:
.cfi_offset %rbx, -40
Ltmp16:
.cfi_offset %r14, -32
Ltmp17:
.cfi_offset %r15, -24
movq %rdi, %r14
xorl %ebx, %ebx
testq %r14, %r14
je LBB1_5
## BB#1:
movq %r14, %rdi
callq _strlen
movq %rax, %r15
leaq 1(%r15), %rdi
callq _zalloc
movq %rax, %rbx
incq %r15
je LBB1_2
## BB#4:
movq %rbx, %rdi
movq %r14, %rsi
movq %r15, %rdx
callq _memcpy
jmp LBB1_5
LBB1_2:
xorl %eax, %eax
.align 4, 0x90
LBB1_3: ## %.preheader
## =>This Inner Loop Header: Depth=1
movb (%r14,%rax), %cl
movb %cl, (%rbx,%rax)
incq %rax
testb %cl, %cl
jne LBB1_3
LBB1_5: ## %.loopexit
movq %rbx, %rax
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _wcs_ztrdup
.align 4, 0x90
_wcs_ztrdup: ## @wcs_ztrdup
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp21:
.cfi_def_cfa_offset 16
Ltmp22:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp23:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
Ltmp24:
.cfi_offset %rbx, -48
Ltmp25:
.cfi_offset %r12, -40
Ltmp26:
.cfi_offset %r14, -32
Ltmp27:
.cfi_offset %r15, -24
movq %rdi, %rbx
xorl %r14d, %r14d
testq %rbx, %rbx
je LBB2_5
## BB#1:
movq %rbx, %rdi
callq _wcslen
movq %rax, %r12
leaq 4(,%r12,4), %r15
incq %r12
movq %r15, %rdi
callq _zalloc
movq %rax, %r14
testq %r12, %r12
je LBB2_2
## BB#4:
movq %r14, %rdi
movq %rbx, %rsi
movq %r15, %rdx
callq _memcpy
jmp LBB2_5
LBB2_2:
movq %r14, %rax
.align 4, 0x90
LBB2_3: ## %.preheader
## =>This Inner Loop Header: Depth=1
movl (%rbx), %ecx
movl %ecx, (%rax)
addq $4, %rax
addq $4, %rbx
testl %ecx, %ecx
jne LBB2_3
LBB2_5: ## %.loopexit
movq %r14, %rax
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _tricat
.align 4, 0x90
_tricat: ## @tricat
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp31:
.cfi_def_cfa_offset 16
Ltmp32:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp33:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $24, %rsp
Ltmp34:
.cfi_offset %rbx, -56
Ltmp35:
.cfi_offset %r12, -48
Ltmp36:
.cfi_offset %r13, -40
Ltmp37:
.cfi_offset %r14, -32
Ltmp38:
.cfi_offset %r15, -24
movq %rdx, %r14
movq %rsi, %rbx
movq %rdi, %r13
callq _strlen
movq %rax, %r15
movq %rbx, %rdi
callq _strlen
movq %rax, -56(%rbp) ## 8-byte Spill
leaq (%r15,%rax), %r12
movq %r14, %rdi
callq _strlen
movq %rax, -48(%rbp) ## 8-byte Spill
leaq 1(%rax,%r12), %rdi
callq _zalloc
incq -56(%rbp) ## 8-byte Folded Spill
incq -48(%rbp) ## 8-byte Folded Spill
movq %r15, %rdx
incq %rdx
je LBB3_1
## BB#3:
movq %rax, %rdi
movq %rax, -64(%rbp) ## 8-byte Spill
movq %r13, %rsi
callq _memcpy
movq -64(%rbp), %rax ## 8-byte Reload
jmp LBB3_4
LBB3_1:
movq %rax, %rdx
.align 4, 0x90
LBB3_2: ## %.preheader8
## =>This Inner Loop Header: Depth=1
movb (%r13), %cl
movb %cl, (%rdx)
incq %rdx
incq %r13
testb %cl, %cl
jne LBB3_2
LBB3_4: ## %.loopexit9
addq %rax, %r15
movq -56(%rbp), %rdx ## 8-byte Reload
testq %rdx, %rdx
je LBB3_5
## BB#6:
movq %r15, %rdi
movq %rax, %r15
movq %rbx, %rsi
callq _memcpy
movq %r15, %rax
jmp LBB3_7
.align 4, 0x90
LBB3_5: ## %.preheader6
## =>This Inner Loop Header: Depth=1
movb (%rbx), %cl
movb %cl, (%r15)
incq %r15
incq %rbx
testb %cl, %cl
jne LBB3_5
LBB3_7: ## %.loopexit7
addq %rax, %r12
movq -48(%rbp), %rdx ## 8-byte Reload
testq %rdx, %rdx
je LBB3_8
## BB#9:
movq %r12, %rdi
movq %rax, %rbx
movq %r14, %rsi
callq _memcpy
movq %rbx, %rax
jmp LBB3_10
.align 4, 0x90
LBB3_8: ## %.preheader
## =>This Inner Loop Header: Depth=1
movb (%r14), %cl
movb %cl, (%r12)
incq %r12
incq %r14
testb %cl, %cl
jne LBB3_8
LBB3_10: ## %.loopexit
addq $24, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _zhtricat
.align 4, 0x90
_zhtricat: ## @zhtricat
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp42:
.cfi_def_cfa_offset 16
Ltmp43:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp44:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $24, %rsp
Ltmp45:
.cfi_offset %rbx, -56
Ltmp46:
.cfi_offset %r12, -48
Ltmp47:
.cfi_offset %r13, -40
Ltmp48:
.cfi_offset %r14, -32
Ltmp49:
.cfi_offset %r15, -24
movq %rdx, %r14
movq %rsi, %rbx
movq %rdi, %r13
callq _strlen
movq %rax, %r15
movq %rbx, %rdi
callq _strlen
movq %rax, -56(%rbp) ## 8-byte Spill
leaq (%r15,%rax), %r12
movq %r14, %rdi
callq _strlen
movq %rax, -48(%rbp) ## 8-byte Spill
leaq 1(%rax,%r12), %rdi
callq _zhalloc
incq -56(%rbp) ## 8-byte Folded Spill
incq -48(%rbp) ## 8-byte Folded Spill
movq %r15, %rdx
incq %rdx
je LBB4_1
## BB#3:
movq %rax, %rdi
movq %rax, -64(%rbp) ## 8-byte Spill
movq %r13, %rsi
callq _memcpy
movq -64(%rbp), %rax ## 8-byte Reload
jmp LBB4_4
LBB4_1:
movq %rax, %rdx
.align 4, 0x90
LBB4_2: ## %.preheader8
## =>This Inner Loop Header: Depth=1
movb (%r13), %cl
movb %cl, (%rdx)
incq %rdx
incq %r13
testb %cl, %cl
jne LBB4_2
LBB4_4: ## %.loopexit9
addq %rax, %r15
movq -56(%rbp), %rdx ## 8-byte Reload
testq %rdx, %rdx
je LBB4_5
## BB#6:
movq %r15, %rdi
movq %rax, %r15
movq %rbx, %rsi
callq _memcpy
movq %r15, %rax
jmp LBB4_7
.align 4, 0x90
LBB4_5: ## %.preheader6
## =>This Inner Loop Header: Depth=1
movb (%rbx), %cl
movb %cl, (%r15)
incq %r15
incq %rbx
testb %cl, %cl
jne LBB4_5
LBB4_7: ## %.loopexit7
addq %rax, %r12
movq -48(%rbp), %rdx ## 8-byte Reload
testq %rdx, %rdx
je LBB4_8
## BB#9:
movq %r12, %rdi
movq %rax, %rbx
movq %r14, %rsi
callq _memcpy
movq %rbx, %rax
jmp LBB4_10
.align 4, 0x90
LBB4_8: ## %.preheader
## =>This Inner Loop Header: Depth=1
movb (%r14), %cl
movb %cl, (%r12)
incq %r12
incq %r14
testb %cl, %cl
jne LBB4_8
LBB4_10: ## %.loopexit
addq $24, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _dyncat
.align 4, 0x90
_dyncat: ## @dyncat
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp53:
.cfi_def_cfa_offset 16
Ltmp54:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp55:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
pushq %rax
Ltmp56:
.cfi_offset %rbx, -56
Ltmp57:
.cfi_offset %r12, -48
Ltmp58:
.cfi_offset %r13, -40
Ltmp59:
.cfi_offset %r14, -32
Ltmp60:
.cfi_offset %r15, -24
movq %rsi, %r14
movq %rdi, %rbx
callq _strlen
movq %rax, %r13
movq %r14, %rdi
callq _strlen
movq %rax, %r12
leaq 1(%r13,%r12), %rdi
callq _zhalloc
movq %rax, %r15
incq %r12
movq %r13, %rdx
incq %rdx
je LBB5_1
## BB#3:
movq %r15, %rdi
movq %rbx, %rsi
callq _memcpy
jmp LBB5_4
LBB5_1:
movq %r15, %rax
.align 4, 0x90
LBB5_2: ## %.preheader3
## =>This Inner Loop Header: Depth=1
movb (%rbx), %cl
movb %cl, (%rax)
incq %rax
incq %rbx
testb %cl, %cl
jne LBB5_2
LBB5_4: ## %.loopexit4
addq %r15, %r13
testq %r12, %r12
je LBB5_5
## BB#6:
movq %r13, %rdi
movq %r14, %rsi
movq %r12, %rdx
callq _memcpy
jmp LBB5_7
.align 4, 0x90
LBB5_5: ## %.preheader
## =>This Inner Loop Header: Depth=1
movb (%r14), %al
movb %al, (%r13)
incq %r13
incq %r14
testb %al, %al
jne LBB5_5
LBB5_7: ## %.loopexit
movq %r15, %rax
addq $8, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _bicat
.align 4, 0x90
_bicat: ## @bicat
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp64:
.cfi_def_cfa_offset 16
Ltmp65:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp66:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
pushq %rax
Ltmp67:
.cfi_offset %rbx, -56
Ltmp68:
.cfi_offset %r12, -48
Ltmp69:
.cfi_offset %r13, -40
Ltmp70:
.cfi_offset %r14, -32
Ltmp71:
.cfi_offset %r15, -24
movq %rsi, %r14
movq %rdi, %rbx
callq _strlen
movq %rax, %r13
movq %r14, %rdi
callq _strlen
movq %rax, %r12
leaq 1(%r13,%r12), %rdi
callq _zalloc
movq %rax, %r15
incq %r12
movq %r13, %rdx
incq %rdx
je LBB6_1
## BB#3:
movq %r15, %rdi
movq %rbx, %rsi
callq _memcpy
jmp LBB6_4
LBB6_1:
movq %r15, %rax
.align 4, 0x90
LBB6_2: ## %.preheader3
## =>This Inner Loop Header: Depth=1
movb (%rbx), %cl
movb %cl, (%rax)
incq %rax
incq %rbx
testb %cl, %cl
jne LBB6_2
LBB6_4: ## %.loopexit4
addq %r15, %r13
testq %r12, %r12
je LBB6_5
## BB#6:
movq %r13, %rdi
movq %r14, %rsi
movq %r12, %rdx
callq _memcpy
jmp LBB6_7
.align 4, 0x90
LBB6_5: ## %.preheader
## =>This Inner Loop Header: Depth=1
movb (%r14), %al
movb %al, (%r13)
incq %r13
incq %r14
testb %al, %al
jne LBB6_5
LBB6_7: ## %.loopexit
movq %r15, %rax
addq $8, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _dupstrpfx
.align 4, 0x90
_dupstrpfx: ## @dupstrpfx
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp75:
.cfi_def_cfa_offset 16
Ltmp76:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp77:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Ltmp78:
.cfi_offset %rbx, -40
Ltmp79:
.cfi_offset %r14, -32
Ltmp80:
.cfi_offset %r15, -24
movl %esi, %r15d
movq %rdi, %r14
leal 1(%r15), %eax
movslq %eax, %rdi
callq _zhalloc
movq %rax, %rbx
movslq %r15d, %r15
movq %rbx, %rdi
movq %r14, %rsi
movq %r15, %rdx
callq _memcpy
movb $0, (%rbx,%r15)
movq %rbx, %rax
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _ztrduppfx
.align 4, 0x90
_ztrduppfx: ## @ztrduppfx
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp84:
.cfi_def_cfa_offset 16
Ltmp85:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp86:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Ltmp87:
.cfi_offset %rbx, -40
Ltmp88:
.cfi_offset %r14, -32
Ltmp89:
.cfi_offset %r15, -24
movl %esi, %r15d
movq %rdi, %r14
leal 1(%r15), %eax
movslq %eax, %rdi
callq _zalloc
movq %rax, %rbx
movslq %r15d, %r15
movq %rbx, %rdi
movq %r14, %rsi
movq %r15, %rdx
callq _memcpy
movb $0, (%rbx,%r15)
movq %rbx, %rax
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
ret
.cfi_endproc
.globl _appstr
.align 4, 0x90
_appstr: ## @appstr
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp93:
.cfi_def_cfa_offset 16
Ltmp94:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp95:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Ltmp96:
.cfi_offset %rbx, -40
Ltmp97:
.cfi_offset %r14, -32
Ltmp98:
.cfi_offset %r15, -24
movq %rsi, %r14
movq %rdi, %rbx
callq _strlen
movq %rax, %r15
movq %r14, %rdi
callq _strlen
leaq 1(%r15,%rax), %rsi
movq %rbx, %rdi
callq _realloc
movq %rax, %rdi
movq %r14, %rsi
movq $-1, %rdx
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
jmp ___strcat_chk ## TAILCALL
.cfi_endproc
.globl _strend
.align 4, 0x90
_strend: ## @strend
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp102:
.cfi_def_cfa_offset 16
Ltmp103:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp104:
.cfi_def_cfa_register %rbp
pushq %rbx
pushq %rax
Ltmp105:
.cfi_offset %rbx, -24
movq %rdi, %rbx
cmpb $0, (%rbx)
je LBB10_2
## BB#1:
movq %rbx, %rdi
callq _strlen
leaq -1(%rax,%rbx), %rbx
LBB10_2:
movq %rbx, %rax
addq $8, %rsp
popq %rbx
popq %rbp
ret
.cfi_endproc
.subsections_via_symbols
[-- Attachment #5: opttest3.zsh --]
[-- Type: application/octet-stream, Size: 247 bytes --]
#!/usr/local/bin/zsh-0cpmem-5.1.1-dev-0
#!/usr/local/bin/zsh-1cpmem-5.1.1-dev-0
zmodload zsh/zprof
strtest() {
a=""
i=5000
while (( i -- )); do
b=$a
a+="$i"
done
}
strtest
strtest
strtest
strtest
strtest
zprof
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2015-09-24 17:41 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-24 17:40 Bug in llvm compiler Sebastian Gniazdowski
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/zsh/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).