zsh-workers
 help / color / mirror / code / Atom feed
* Bug in llvm compiler
@ 2015-09-24 17:40 Sebastian Gniazdowski
  0 siblings, 0 replies; only message in thread
From: Sebastian Gniazdowski @ 2015-09-24 17:40 UTC (permalink / raw)
  To: zsh-workers

[-- Attachment #1: Type: text/plain, Size: 1714 bytes --]

Hello,
I'm writing optimization of string.c. The point is that functions
there run strlen() and then in general discard the returned
information by running strcpy(). Consider this:

http://www.opensource.apple.com/source/Libc/Libc-997.1.1/string/strcpy.c

Of course there are different implementations that will not run
strlen() second time:

http://www.opensource.apple.com/source/Libc/Libc-262/i386/gen/strcpy.c

but still - utilizing the information allows for optimizations.

I think I've encountered a bug in Apple's llvm. Changing 11 line in
the patch, from "if( l < 8 ) {" to "if( l < 0 ) {" causes the script
to run for 2.5 seconds. Changing it to "if( l < 1 ) {" restores
running time of 2 seconds. Looking at generated assembly shows that
"if( l < 0 ) {" is treated as "if( 0 )" and only the memcpy() part is
emitted. That's fine, but why does that optimized version run slower
having in mind that "if( l < 1 ) {" is impossible condition (every
string has at least 1 byte). The problem doesn't reproduce on FreeBSD
10.1 and Ubuntu 12.10, running times are equal there. I thought I will
show the asm source, maybe someone will find something interesting in
it. In general this seems a bug that should be maybe considered as Zsh
uses memcpy() in various places (also: google
"-Wno-builtin-memcpy-chk-size"). As for string.c, I will provide
memcpy's implementation taken from glibc or other library.

The compiler is:
# gcc --version
Configured with:
--prefix=/Applications/Xcode.app/Contents/Developer/usr
--with-gxx-include-dir=/usr/include/c++/4.2.1
Apple LLVM version 5.0 (clang-500.2.79) (based on LLVM 3.3svn)
Target: x86_64-apple-darwin13.1.0
Thread model: posix

Best regards,
Sebastian Gniazdowski

[-- Attachment #2: copymemory.patch --]
[-- Type: application/octet-stream, Size: 4391 bytes --]

diff --git a/Src/string.c b/Src/string.c
index 04e7446..f78fcba 100644
--- a/Src/string.c
+++ b/Src/string.c
@@ -28,16 +28,33 @@
 
 #include "zsh.mdh"
 
+#define copymemory_maybestring(dest,src,l,type) do      \
+            {                                           \
+                if( l < 8 ) {                           \
+                    type *d_;                           \
+                    const type *s_;                     \
+                    d_ = dest;                          \
+                    s_ = src;                           \
+                    while( (*d_++ = *s_++) ) {}         \
+                } else {                                \
+                    memcpy(dest, src, l*sizeof(type));  \
+                }                                       \
+            } while(0);
+
 /**/
 mod_export char *
 dupstring(const char *s)
 {
     char *t;
+    size_t lenw0;
 
     if (!s)
 	return NULL;
-    t = (char *) zhalloc(strlen((char *)s) + 1);
-    strcpy(t, s);
+
+    lenw0 = 1 + strlen((char *)s);
+    t = (char *) zhalloc(lenw0 * sizeof(char));
+    copymemory_maybestring(t, s, lenw0, char);
+
     return t;
 }
 
@@ -46,11 +63,15 @@ mod_export char *
 ztrdup(const char *s)
 {
     char *t;
+    size_t lenw0;
 
     if (!s)
 	return NULL;
-    t = (char *)zalloc(strlen((char *)s) + 1);
-    strcpy(t, s);
+
+    lenw0= 1 + strlen((char *)s);
+    t = (char *)zalloc(lenw0 * sizeof(char));
+    copymemory_maybestring(t, s, lenw0, char);
+
     return t;
 }
 
@@ -61,11 +82,15 @@ mod_export wchar_t *
 wcs_ztrdup(const wchar_t *s)
 {
     wchar_t *t;
+    size_t lenw0;
 
     if (!s)
 	return NULL;
-    t = (wchar_t *)zalloc(sizeof(wchar_t) * (wcslen((wchar_t *)s) + 1));
-    wcscpy(t, s);
+
+    lenw0 = 1 + wcslen((wchar_t *)s);
+    t = (wchar_t *)zalloc(lenw0 * sizeof(wchar_t));
+    copymemory_maybestring(t, s, lenw0, wchar_t);
+
     return t;
 }
 /**/
@@ -80,13 +105,14 @@ tricat(char const *s1, char const *s2, char const *s3)
 {
     /* This version always uses permanently-allocated space. */
     char *ptr;
-    size_t l1 = strlen(s1);
-    size_t l2 = strlen(s2);
-
-    ptr = (char *)zalloc(l1 + l2 + strlen(s3) + 1);
-    strcpy(ptr, s1);
-    strcpy(ptr + l1, s2);
-    strcpy(ptr + l1 + l2, s3);
+    size_t l1w0 = 1 + strlen(s1);
+    size_t l2w0 = 1 + strlen(s2);
+    size_t l3w0 = 1 + strlen(s3);
+
+    ptr = (char *)zalloc((l1w0 + l2w0 + l3w0 - 2) * sizeof(char));
+    copymemory_maybestring(ptr, s1, l1w0, char);
+    copymemory_maybestring(ptr + l1w0 - 1, s2, l2w0, char);
+    copymemory_maybestring(ptr + l1w0 - 1 + l2w0 - 1, s3, l3w0, char);
     return ptr;
 }
 
@@ -95,13 +121,14 @@ mod_export char *
 zhtricat(char const *s1, char const *s2, char const *s3)
 {
     char *ptr;
-    size_t l1 = strlen(s1);
-    size_t l2 = strlen(s2);
-
-    ptr = (char *)zhalloc(l1 + l2 + strlen(s3) + 1);
-    strcpy(ptr, s1);
-    strcpy(ptr + l1, s2);
-    strcpy(ptr + l1 + l2, s3);
+    size_t l1w0 = 1 + strlen(s1);
+    size_t l2w0 = 1 + strlen(s2);
+    size_t l3w0 = 1 + strlen(s3);
+
+    ptr = (char *)zhalloc((l1w0 + l2w0 + l3w0 - 2) * sizeof(char));
+    copymemory_maybestring(ptr, s1, l1w0, char);
+    copymemory_maybestring(ptr + l1w0 - 1, s2, l2w0, char);
+    copymemory_maybestring(ptr + l1w0 - 1 + l2w0 - 1, s3, l3w0, char);
     return ptr;
 }
 
@@ -113,11 +140,12 @@ dyncat(const char *s1, const char *s2)
 {
     /* This version always uses space from the current heap. */
     char *ptr;
-    size_t l1 = strlen(s1);
+    size_t l1w0 = 1 + strlen(s1);
+    size_t l2w0 = 1 + strlen(s2);
 
-    ptr = (char *)zhalloc(l1 + strlen(s2) + 1);
-    strcpy(ptr, s1);
-    strcpy(ptr + l1, s2);
+    ptr = (char *)zhalloc(l1w0 + l2w0 - 1);
+    copymemory_maybestring(ptr, s1, l1w0, char);
+    copymemory_maybestring(ptr + l1w0 - 1, s2, l2w0, char);
     return ptr;
 }
 
@@ -127,11 +155,12 @@ bicat(const char *s1, const char *s2)
 {
     /* This version always uses permanently-allocated space. */
     char *ptr;
-    size_t l1 = strlen(s1);
+    size_t l1w0 = 1 + strlen(s1);
+    size_t l2w0 = 1 + strlen(s2);
 
-    ptr = (char *)zalloc(l1 + strlen(s2) + 1);
-    strcpy(ptr, s1);
-    strcpy(ptr + l1, s2);
+    ptr = (char *)zalloc(l1w0 + l2w0 - 1);
+    copymemory_maybestring(ptr, s1, l1w0, char);
+    copymemory_maybestring(ptr + l1w0 - 1, s2, l2w0, char);
     return ptr;
 }
 

[-- Attachment #3: out_if_smaller_t0.asm --]
[-- Type: application/octet-stream, Size: 9358 bytes --]

	.section	__TEXT,__text,regular,pure_instructions
	.globl	_dupstring
	.align	4, 0x90
_dupstring:                             ## @dupstring
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp3:
	.cfi_def_cfa_offset 16
Ltmp4:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp5:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Ltmp6:
	.cfi_offset %rbx, -40
Ltmp7:
	.cfi_offset %r14, -32
Ltmp8:
	.cfi_offset %r15, -24
	movq	%rdi, %r14
	testq	%r14, %r14
	je	LBB0_1
## BB#2:
	movq	%r14, %rdi
	callq	_strlen
	movq	%rax, %rbx
	incq	%rbx
	movq	%rbx, %rdi
	callq	_zhalloc
	movq	%rax, %r15
	movq	%r15, %rdi
	movq	%r14, %rsi
	movq	%rbx, %rdx
	callq	_memcpy
	jmp	LBB0_3
LBB0_1:
	xorl	%r15d, %r15d
LBB0_3:
	movq	%r15, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_ztrdup
	.align	4, 0x90
_ztrdup:                                ## @ztrdup
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp12:
	.cfi_def_cfa_offset 16
Ltmp13:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp14:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Ltmp15:
	.cfi_offset %rbx, -40
Ltmp16:
	.cfi_offset %r14, -32
Ltmp17:
	.cfi_offset %r15, -24
	movq	%rdi, %r14
	testq	%r14, %r14
	je	LBB1_1
## BB#2:
	movq	%r14, %rdi
	callq	_strlen
	movq	%rax, %rbx
	incq	%rbx
	movq	%rbx, %rdi
	callq	_zalloc
	movq	%rax, %r15
	movq	%r15, %rdi
	movq	%r14, %rsi
	movq	%rbx, %rdx
	callq	_memcpy
	jmp	LBB1_3
LBB1_1:
	xorl	%r15d, %r15d
LBB1_3:
	movq	%r15, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_wcs_ztrdup
	.align	4, 0x90
_wcs_ztrdup:                            ## @wcs_ztrdup
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp21:
	.cfi_def_cfa_offset 16
Ltmp22:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp23:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Ltmp24:
	.cfi_offset %rbx, -40
Ltmp25:
	.cfi_offset %r14, -32
Ltmp26:
	.cfi_offset %r15, -24
	movq	%rdi, %r14
	testq	%r14, %r14
	je	LBB2_1
## BB#2:
	movq	%r14, %rdi
	callq	_wcslen
	leaq	4(,%rax,4), %r15
	movq	%r15, %rdi
	callq	_zalloc
	movq	%rax, %rbx
	movq	%rbx, %rdi
	movq	%r14, %rsi
	movq	%r15, %rdx
	callq	_memcpy
	jmp	LBB2_3
LBB2_1:
	xorl	%ebx, %ebx
LBB2_3:
	movq	%rbx, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_tricat
	.align	4, 0x90
_tricat:                                ## @tricat
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp30:
	.cfi_def_cfa_offset 16
Ltmp31:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp32:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	subq	$24, %rsp
Ltmp33:
	.cfi_offset %rbx, -56
Ltmp34:
	.cfi_offset %r12, -48
Ltmp35:
	.cfi_offset %r13, -40
Ltmp36:
	.cfi_offset %r14, -32
Ltmp37:
	.cfi_offset %r15, -24
	movq	%rdx, %r15
	movq	%r15, -48(%rbp)         ## 8-byte Spill
	movq	%rsi, %r14
	movq	%r14, -56(%rbp)         ## 8-byte Spill
	movq	%rdi, %r13
	callq	_strlen
	movq	%rax, %rbx
	movq	%r14, %rdi
	callq	_strlen
	movq	%rax, %r14
	leaq	(%rbx,%r14), %r12
	movq	%r15, %rdi
	callq	_strlen
	movq	%rax, -64(%rbp)         ## 8-byte Spill
	leaq	1(%rax,%r12), %rdi
	callq	_zalloc
	movq	%rax, %r15
	leaq	1(%rbx), %rdx
	movq	%r15, %rdi
	movq	%r13, %rsi
	callq	_memcpy
	addq	%r15, %rbx
	leaq	1(%r14), %rdx
	movq	%rbx, %rdi
	movq	-56(%rbp), %rsi         ## 8-byte Reload
	callq	_memcpy
	addq	%r15, %r12
	movq	-64(%rbp), %rax         ## 8-byte Reload
	leaq	1(%rax), %rdx
	movq	%r12, %rdi
	movq	-48(%rbp), %rsi         ## 8-byte Reload
	callq	_memcpy
	movq	%r15, %rax
	addq	$24, %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_zhtricat
	.align	4, 0x90
_zhtricat:                              ## @zhtricat
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp41:
	.cfi_def_cfa_offset 16
Ltmp42:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp43:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	subq	$24, %rsp
Ltmp44:
	.cfi_offset %rbx, -56
Ltmp45:
	.cfi_offset %r12, -48
Ltmp46:
	.cfi_offset %r13, -40
Ltmp47:
	.cfi_offset %r14, -32
Ltmp48:
	.cfi_offset %r15, -24
	movq	%rdx, %r15
	movq	%r15, -48(%rbp)         ## 8-byte Spill
	movq	%rsi, %r14
	movq	%r14, -56(%rbp)         ## 8-byte Spill
	movq	%rdi, %r13
	callq	_strlen
	movq	%rax, %rbx
	movq	%r14, %rdi
	callq	_strlen
	movq	%rax, %r14
	leaq	(%rbx,%r14), %r12
	movq	%r15, %rdi
	callq	_strlen
	movq	%rax, -64(%rbp)         ## 8-byte Spill
	leaq	1(%rax,%r12), %rdi
	callq	_zhalloc
	movq	%rax, %r15
	leaq	1(%rbx), %rdx
	movq	%r15, %rdi
	movq	%r13, %rsi
	callq	_memcpy
	addq	%r15, %rbx
	leaq	1(%r14), %rdx
	movq	%rbx, %rdi
	movq	-56(%rbp), %rsi         ## 8-byte Reload
	callq	_memcpy
	addq	%r15, %r12
	movq	-64(%rbp), %rax         ## 8-byte Reload
	leaq	1(%rax), %rdx
	movq	%r12, %rdi
	movq	-48(%rbp), %rsi         ## 8-byte Reload
	callq	_memcpy
	movq	%r15, %rax
	addq	$24, %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_dyncat
	.align	4, 0x90
_dyncat:                                ## @dyncat
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp52:
	.cfi_def_cfa_offset 16
Ltmp53:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp54:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	pushq	%rax
Ltmp55:
	.cfi_offset %rbx, -56
Ltmp56:
	.cfi_offset %r12, -48
Ltmp57:
	.cfi_offset %r13, -40
Ltmp58:
	.cfi_offset %r14, -32
Ltmp59:
	.cfi_offset %r15, -24
	movq	%rsi, %r14
	movq	%rdi, %r15
	callq	_strlen
	movq	%rax, %rbx
	movq	%r14, %rdi
	callq	_strlen
	movq	%rax, %r12
	leaq	1(%rbx,%r12), %rdi
	callq	_zhalloc
	movq	%rax, %r13
	leaq	1(%rbx), %rdx
	movq	%r13, %rdi
	movq	%r15, %rsi
	callq	_memcpy
	addq	%r13, %rbx
	leaq	1(%r12), %rdx
	movq	%rbx, %rdi
	movq	%r14, %rsi
	callq	_memcpy
	movq	%r13, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_bicat
	.align	4, 0x90
_bicat:                                 ## @bicat
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp63:
	.cfi_def_cfa_offset 16
Ltmp64:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp65:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	pushq	%rax
Ltmp66:
	.cfi_offset %rbx, -56
Ltmp67:
	.cfi_offset %r12, -48
Ltmp68:
	.cfi_offset %r13, -40
Ltmp69:
	.cfi_offset %r14, -32
Ltmp70:
	.cfi_offset %r15, -24
	movq	%rsi, %r14
	movq	%rdi, %r15
	callq	_strlen
	movq	%rax, %rbx
	movq	%r14, %rdi
	callq	_strlen
	movq	%rax, %r12
	leaq	1(%rbx,%r12), %rdi
	callq	_zalloc
	movq	%rax, %r13
	leaq	1(%rbx), %rdx
	movq	%r13, %rdi
	movq	%r15, %rsi
	callq	_memcpy
	addq	%r13, %rbx
	leaq	1(%r12), %rdx
	movq	%rbx, %rdi
	movq	%r14, %rsi
	callq	_memcpy
	movq	%r13, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_dupstrpfx
	.align	4, 0x90
_dupstrpfx:                             ## @dupstrpfx
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp74:
	.cfi_def_cfa_offset 16
Ltmp75:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp76:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Ltmp77:
	.cfi_offset %rbx, -40
Ltmp78:
	.cfi_offset %r14, -32
Ltmp79:
	.cfi_offset %r15, -24
	movl	%esi, %r15d
	movq	%rdi, %r14
	leal	1(%r15), %eax
	movslq	%eax, %rdi
	callq	_zhalloc
	movq	%rax, %rbx
	movslq	%r15d, %r15
	movq	%rbx, %rdi
	movq	%r14, %rsi
	movq	%r15, %rdx
	callq	_memcpy
	movb	$0, (%rbx,%r15)
	movq	%rbx, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_ztrduppfx
	.align	4, 0x90
_ztrduppfx:                             ## @ztrduppfx
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp83:
	.cfi_def_cfa_offset 16
Ltmp84:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp85:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Ltmp86:
	.cfi_offset %rbx, -40
Ltmp87:
	.cfi_offset %r14, -32
Ltmp88:
	.cfi_offset %r15, -24
	movl	%esi, %r15d
	movq	%rdi, %r14
	leal	1(%r15), %eax
	movslq	%eax, %rdi
	callq	_zalloc
	movq	%rax, %rbx
	movslq	%r15d, %r15
	movq	%rbx, %rdi
	movq	%r14, %rsi
	movq	%r15, %rdx
	callq	_memcpy
	movb	$0, (%rbx,%r15)
	movq	%rbx, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_appstr
	.align	4, 0x90
_appstr:                                ## @appstr
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp92:
	.cfi_def_cfa_offset 16
Ltmp93:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp94:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Ltmp95:
	.cfi_offset %rbx, -40
Ltmp96:
	.cfi_offset %r14, -32
Ltmp97:
	.cfi_offset %r15, -24
	movq	%rsi, %r14
	movq	%rdi, %rbx
	callq	_strlen
	movq	%rax, %r15
	movq	%r14, %rdi
	callq	_strlen
	leaq	1(%r15,%rax), %rsi
	movq	%rbx, %rdi
	callq	_realloc
	movq	%rax, %rdi
	movq	%r14, %rsi
	movq	$-1, %rdx
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	jmp	___strcat_chk           ## TAILCALL
	.cfi_endproc

	.globl	_strend
	.align	4, 0x90
_strend:                                ## @strend
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp101:
	.cfi_def_cfa_offset 16
Ltmp102:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp103:
	.cfi_def_cfa_register %rbp
	pushq	%rbx
	pushq	%rax
Ltmp104:
	.cfi_offset %rbx, -24
	movq	%rdi, %rbx
	cmpb	$0, (%rbx)
	je	LBB10_2
## BB#1:
	movq	%rbx, %rdi
	callq	_strlen
	leaq	-1(%rax,%rbx), %rbx
LBB10_2:
	movq	%rbx, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	ret
	.cfi_endproc


.subsections_via_symbols

[-- Attachment #4: out_if_smaller_t1.asm --]
[-- Type: application/octet-stream, Size: 14100 bytes --]

	.section	__TEXT,__text,regular,pure_instructions
	.globl	_dupstring
	.align	4, 0x90
_dupstring:                             ## @dupstring
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp3:
	.cfi_def_cfa_offset 16
Ltmp4:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp5:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Ltmp6:
	.cfi_offset %rbx, -40
Ltmp7:
	.cfi_offset %r14, -32
Ltmp8:
	.cfi_offset %r15, -24
	movq	%rdi, %r14
	xorl	%ebx, %ebx
	testq	%r14, %r14
	je	LBB0_5
## BB#1:
	movq	%r14, %rdi
	callq	_strlen
	movq	%rax, %r15
	leaq	1(%r15), %rdi
	callq	_zhalloc
	movq	%rax, %rbx
	incq	%r15
	je	LBB0_2
## BB#4:
	movq	%rbx, %rdi
	movq	%r14, %rsi
	movq	%r15, %rdx
	callq	_memcpy
	jmp	LBB0_5
LBB0_2:
	xorl	%eax, %eax
	.align	4, 0x90
LBB0_3:                                 ## %.preheader
                                        ## =>This Inner Loop Header: Depth=1
	movb	(%r14,%rax), %cl
	movb	%cl, (%rbx,%rax)
	incq	%rax
	testb	%cl, %cl
	jne	LBB0_3
LBB0_5:                                 ## %.loopexit
	movq	%rbx, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_ztrdup
	.align	4, 0x90
_ztrdup:                                ## @ztrdup
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp12:
	.cfi_def_cfa_offset 16
Ltmp13:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp14:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Ltmp15:
	.cfi_offset %rbx, -40
Ltmp16:
	.cfi_offset %r14, -32
Ltmp17:
	.cfi_offset %r15, -24
	movq	%rdi, %r14
	xorl	%ebx, %ebx
	testq	%r14, %r14
	je	LBB1_5
## BB#1:
	movq	%r14, %rdi
	callq	_strlen
	movq	%rax, %r15
	leaq	1(%r15), %rdi
	callq	_zalloc
	movq	%rax, %rbx
	incq	%r15
	je	LBB1_2
## BB#4:
	movq	%rbx, %rdi
	movq	%r14, %rsi
	movq	%r15, %rdx
	callq	_memcpy
	jmp	LBB1_5
LBB1_2:
	xorl	%eax, %eax
	.align	4, 0x90
LBB1_3:                                 ## %.preheader
                                        ## =>This Inner Loop Header: Depth=1
	movb	(%r14,%rax), %cl
	movb	%cl, (%rbx,%rax)
	incq	%rax
	testb	%cl, %cl
	jne	LBB1_3
LBB1_5:                                 ## %.loopexit
	movq	%rbx, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_wcs_ztrdup
	.align	4, 0x90
_wcs_ztrdup:                            ## @wcs_ztrdup
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp21:
	.cfi_def_cfa_offset 16
Ltmp22:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp23:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%r12
	pushq	%rbx
Ltmp24:
	.cfi_offset %rbx, -48
Ltmp25:
	.cfi_offset %r12, -40
Ltmp26:
	.cfi_offset %r14, -32
Ltmp27:
	.cfi_offset %r15, -24
	movq	%rdi, %rbx
	xorl	%r14d, %r14d
	testq	%rbx, %rbx
	je	LBB2_5
## BB#1:
	movq	%rbx, %rdi
	callq	_wcslen
	movq	%rax, %r12
	leaq	4(,%r12,4), %r15
	incq	%r12
	movq	%r15, %rdi
	callq	_zalloc
	movq	%rax, %r14
	testq	%r12, %r12
	je	LBB2_2
## BB#4:
	movq	%r14, %rdi
	movq	%rbx, %rsi
	movq	%r15, %rdx
	callq	_memcpy
	jmp	LBB2_5
LBB2_2:
	movq	%r14, %rax
	.align	4, 0x90
LBB2_3:                                 ## %.preheader
                                        ## =>This Inner Loop Header: Depth=1
	movl	(%rbx), %ecx
	movl	%ecx, (%rax)
	addq	$4, %rax
	addq	$4, %rbx
	testl	%ecx, %ecx
	jne	LBB2_3
LBB2_5:                                 ## %.loopexit
	movq	%r14, %rax
	popq	%rbx
	popq	%r12
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_tricat
	.align	4, 0x90
_tricat:                                ## @tricat
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp31:
	.cfi_def_cfa_offset 16
Ltmp32:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp33:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	subq	$24, %rsp
Ltmp34:
	.cfi_offset %rbx, -56
Ltmp35:
	.cfi_offset %r12, -48
Ltmp36:
	.cfi_offset %r13, -40
Ltmp37:
	.cfi_offset %r14, -32
Ltmp38:
	.cfi_offset %r15, -24
	movq	%rdx, %r14
	movq	%rsi, %rbx
	movq	%rdi, %r13
	callq	_strlen
	movq	%rax, %r15
	movq	%rbx, %rdi
	callq	_strlen
	movq	%rax, -56(%rbp)         ## 8-byte Spill
	leaq	(%r15,%rax), %r12
	movq	%r14, %rdi
	callq	_strlen
	movq	%rax, -48(%rbp)         ## 8-byte Spill
	leaq	1(%rax,%r12), %rdi
	callq	_zalloc
	incq	-56(%rbp)               ## 8-byte Folded Spill
	incq	-48(%rbp)               ## 8-byte Folded Spill
	movq	%r15, %rdx
	incq	%rdx
	je	LBB3_1
## BB#3:
	movq	%rax, %rdi
	movq	%rax, -64(%rbp)         ## 8-byte Spill
	movq	%r13, %rsi
	callq	_memcpy
	movq	-64(%rbp), %rax         ## 8-byte Reload
	jmp	LBB3_4
LBB3_1:
	movq	%rax, %rdx
	.align	4, 0x90
LBB3_2:                                 ## %.preheader8
                                        ## =>This Inner Loop Header: Depth=1
	movb	(%r13), %cl
	movb	%cl, (%rdx)
	incq	%rdx
	incq	%r13
	testb	%cl, %cl
	jne	LBB3_2
LBB3_4:                                 ## %.loopexit9
	addq	%rax, %r15
	movq	-56(%rbp), %rdx         ## 8-byte Reload
	testq	%rdx, %rdx
	je	LBB3_5
## BB#6:
	movq	%r15, %rdi
	movq	%rax, %r15
	movq	%rbx, %rsi
	callq	_memcpy
	movq	%r15, %rax
	jmp	LBB3_7
	.align	4, 0x90
LBB3_5:                                 ## %.preheader6
                                        ## =>This Inner Loop Header: Depth=1
	movb	(%rbx), %cl
	movb	%cl, (%r15)
	incq	%r15
	incq	%rbx
	testb	%cl, %cl
	jne	LBB3_5
LBB3_7:                                 ## %.loopexit7
	addq	%rax, %r12
	movq	-48(%rbp), %rdx         ## 8-byte Reload
	testq	%rdx, %rdx
	je	LBB3_8
## BB#9:
	movq	%r12, %rdi
	movq	%rax, %rbx
	movq	%r14, %rsi
	callq	_memcpy
	movq	%rbx, %rax
	jmp	LBB3_10
	.align	4, 0x90
LBB3_8:                                 ## %.preheader
                                        ## =>This Inner Loop Header: Depth=1
	movb	(%r14), %cl
	movb	%cl, (%r12)
	incq	%r12
	incq	%r14
	testb	%cl, %cl
	jne	LBB3_8
LBB3_10:                                ## %.loopexit
	addq	$24, %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_zhtricat
	.align	4, 0x90
_zhtricat:                              ## @zhtricat
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp42:
	.cfi_def_cfa_offset 16
Ltmp43:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp44:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	subq	$24, %rsp
Ltmp45:
	.cfi_offset %rbx, -56
Ltmp46:
	.cfi_offset %r12, -48
Ltmp47:
	.cfi_offset %r13, -40
Ltmp48:
	.cfi_offset %r14, -32
Ltmp49:
	.cfi_offset %r15, -24
	movq	%rdx, %r14
	movq	%rsi, %rbx
	movq	%rdi, %r13
	callq	_strlen
	movq	%rax, %r15
	movq	%rbx, %rdi
	callq	_strlen
	movq	%rax, -56(%rbp)         ## 8-byte Spill
	leaq	(%r15,%rax), %r12
	movq	%r14, %rdi
	callq	_strlen
	movq	%rax, -48(%rbp)         ## 8-byte Spill
	leaq	1(%rax,%r12), %rdi
	callq	_zhalloc
	incq	-56(%rbp)               ## 8-byte Folded Spill
	incq	-48(%rbp)               ## 8-byte Folded Spill
	movq	%r15, %rdx
	incq	%rdx
	je	LBB4_1
## BB#3:
	movq	%rax, %rdi
	movq	%rax, -64(%rbp)         ## 8-byte Spill
	movq	%r13, %rsi
	callq	_memcpy
	movq	-64(%rbp), %rax         ## 8-byte Reload
	jmp	LBB4_4
LBB4_1:
	movq	%rax, %rdx
	.align	4, 0x90
LBB4_2:                                 ## %.preheader8
                                        ## =>This Inner Loop Header: Depth=1
	movb	(%r13), %cl
	movb	%cl, (%rdx)
	incq	%rdx
	incq	%r13
	testb	%cl, %cl
	jne	LBB4_2
LBB4_4:                                 ## %.loopexit9
	addq	%rax, %r15
	movq	-56(%rbp), %rdx         ## 8-byte Reload
	testq	%rdx, %rdx
	je	LBB4_5
## BB#6:
	movq	%r15, %rdi
	movq	%rax, %r15
	movq	%rbx, %rsi
	callq	_memcpy
	movq	%r15, %rax
	jmp	LBB4_7
	.align	4, 0x90
LBB4_5:                                 ## %.preheader6
                                        ## =>This Inner Loop Header: Depth=1
	movb	(%rbx), %cl
	movb	%cl, (%r15)
	incq	%r15
	incq	%rbx
	testb	%cl, %cl
	jne	LBB4_5
LBB4_7:                                 ## %.loopexit7
	addq	%rax, %r12
	movq	-48(%rbp), %rdx         ## 8-byte Reload
	testq	%rdx, %rdx
	je	LBB4_8
## BB#9:
	movq	%r12, %rdi
	movq	%rax, %rbx
	movq	%r14, %rsi
	callq	_memcpy
	movq	%rbx, %rax
	jmp	LBB4_10
	.align	4, 0x90
LBB4_8:                                 ## %.preheader
                                        ## =>This Inner Loop Header: Depth=1
	movb	(%r14), %cl
	movb	%cl, (%r12)
	incq	%r12
	incq	%r14
	testb	%cl, %cl
	jne	LBB4_8
LBB4_10:                                ## %.loopexit
	addq	$24, %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_dyncat
	.align	4, 0x90
_dyncat:                                ## @dyncat
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp53:
	.cfi_def_cfa_offset 16
Ltmp54:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp55:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	pushq	%rax
Ltmp56:
	.cfi_offset %rbx, -56
Ltmp57:
	.cfi_offset %r12, -48
Ltmp58:
	.cfi_offset %r13, -40
Ltmp59:
	.cfi_offset %r14, -32
Ltmp60:
	.cfi_offset %r15, -24
	movq	%rsi, %r14
	movq	%rdi, %rbx
	callq	_strlen
	movq	%rax, %r13
	movq	%r14, %rdi
	callq	_strlen
	movq	%rax, %r12
	leaq	1(%r13,%r12), %rdi
	callq	_zhalloc
	movq	%rax, %r15
	incq	%r12
	movq	%r13, %rdx
	incq	%rdx
	je	LBB5_1
## BB#3:
	movq	%r15, %rdi
	movq	%rbx, %rsi
	callq	_memcpy
	jmp	LBB5_4
LBB5_1:
	movq	%r15, %rax
	.align	4, 0x90
LBB5_2:                                 ## %.preheader3
                                        ## =>This Inner Loop Header: Depth=1
	movb	(%rbx), %cl
	movb	%cl, (%rax)
	incq	%rax
	incq	%rbx
	testb	%cl, %cl
	jne	LBB5_2
LBB5_4:                                 ## %.loopexit4
	addq	%r15, %r13
	testq	%r12, %r12
	je	LBB5_5
## BB#6:
	movq	%r13, %rdi
	movq	%r14, %rsi
	movq	%r12, %rdx
	callq	_memcpy
	jmp	LBB5_7
	.align	4, 0x90
LBB5_5:                                 ## %.preheader
                                        ## =>This Inner Loop Header: Depth=1
	movb	(%r14), %al
	movb	%al, (%r13)
	incq	%r13
	incq	%r14
	testb	%al, %al
	jne	LBB5_5
LBB5_7:                                 ## %.loopexit
	movq	%r15, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_bicat
	.align	4, 0x90
_bicat:                                 ## @bicat
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp64:
	.cfi_def_cfa_offset 16
Ltmp65:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp66:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%r13
	pushq	%r12
	pushq	%rbx
	pushq	%rax
Ltmp67:
	.cfi_offset %rbx, -56
Ltmp68:
	.cfi_offset %r12, -48
Ltmp69:
	.cfi_offset %r13, -40
Ltmp70:
	.cfi_offset %r14, -32
Ltmp71:
	.cfi_offset %r15, -24
	movq	%rsi, %r14
	movq	%rdi, %rbx
	callq	_strlen
	movq	%rax, %r13
	movq	%r14, %rdi
	callq	_strlen
	movq	%rax, %r12
	leaq	1(%r13,%r12), %rdi
	callq	_zalloc
	movq	%rax, %r15
	incq	%r12
	movq	%r13, %rdx
	incq	%rdx
	je	LBB6_1
## BB#3:
	movq	%r15, %rdi
	movq	%rbx, %rsi
	callq	_memcpy
	jmp	LBB6_4
LBB6_1:
	movq	%r15, %rax
	.align	4, 0x90
LBB6_2:                                 ## %.preheader3
                                        ## =>This Inner Loop Header: Depth=1
	movb	(%rbx), %cl
	movb	%cl, (%rax)
	incq	%rax
	incq	%rbx
	testb	%cl, %cl
	jne	LBB6_2
LBB6_4:                                 ## %.loopexit4
	addq	%r15, %r13
	testq	%r12, %r12
	je	LBB6_5
## BB#6:
	movq	%r13, %rdi
	movq	%r14, %rsi
	movq	%r12, %rdx
	callq	_memcpy
	jmp	LBB6_7
	.align	4, 0x90
LBB6_5:                                 ## %.preheader
                                        ## =>This Inner Loop Header: Depth=1
	movb	(%r14), %al
	movb	%al, (%r13)
	incq	%r13
	incq	%r14
	testb	%al, %al
	jne	LBB6_5
LBB6_7:                                 ## %.loopexit
	movq	%r15, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r12
	popq	%r13
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_dupstrpfx
	.align	4, 0x90
_dupstrpfx:                             ## @dupstrpfx
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp75:
	.cfi_def_cfa_offset 16
Ltmp76:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp77:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Ltmp78:
	.cfi_offset %rbx, -40
Ltmp79:
	.cfi_offset %r14, -32
Ltmp80:
	.cfi_offset %r15, -24
	movl	%esi, %r15d
	movq	%rdi, %r14
	leal	1(%r15), %eax
	movslq	%eax, %rdi
	callq	_zhalloc
	movq	%rax, %rbx
	movslq	%r15d, %r15
	movq	%rbx, %rdi
	movq	%r14, %rsi
	movq	%r15, %rdx
	callq	_memcpy
	movb	$0, (%rbx,%r15)
	movq	%rbx, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_ztrduppfx
	.align	4, 0x90
_ztrduppfx:                             ## @ztrduppfx
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp84:
	.cfi_def_cfa_offset 16
Ltmp85:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp86:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Ltmp87:
	.cfi_offset %rbx, -40
Ltmp88:
	.cfi_offset %r14, -32
Ltmp89:
	.cfi_offset %r15, -24
	movl	%esi, %r15d
	movq	%rdi, %r14
	leal	1(%r15), %eax
	movslq	%eax, %rdi
	callq	_zalloc
	movq	%rax, %rbx
	movslq	%r15d, %r15
	movq	%rbx, %rdi
	movq	%r14, %rsi
	movq	%r15, %rdx
	callq	_memcpy
	movb	$0, (%rbx,%r15)
	movq	%rbx, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	ret
	.cfi_endproc

	.globl	_appstr
	.align	4, 0x90
_appstr:                                ## @appstr
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp93:
	.cfi_def_cfa_offset 16
Ltmp94:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp95:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Ltmp96:
	.cfi_offset %rbx, -40
Ltmp97:
	.cfi_offset %r14, -32
Ltmp98:
	.cfi_offset %r15, -24
	movq	%rsi, %r14
	movq	%rdi, %rbx
	callq	_strlen
	movq	%rax, %r15
	movq	%r14, %rdi
	callq	_strlen
	leaq	1(%r15,%rax), %rsi
	movq	%rbx, %rdi
	callq	_realloc
	movq	%rax, %rdi
	movq	%r14, %rsi
	movq	$-1, %rdx
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	jmp	___strcat_chk           ## TAILCALL
	.cfi_endproc

	.globl	_strend
	.align	4, 0x90
_strend:                                ## @strend
	.cfi_startproc
## BB#0:
	pushq	%rbp
Ltmp102:
	.cfi_def_cfa_offset 16
Ltmp103:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Ltmp104:
	.cfi_def_cfa_register %rbp
	pushq	%rbx
	pushq	%rax
Ltmp105:
	.cfi_offset %rbx, -24
	movq	%rdi, %rbx
	cmpb	$0, (%rbx)
	je	LBB10_2
## BB#1:
	movq	%rbx, %rdi
	callq	_strlen
	leaq	-1(%rax,%rbx), %rbx
LBB10_2:
	movq	%rbx, %rax
	addq	$8, %rsp
	popq	%rbx
	popq	%rbp
	ret
	.cfi_endproc


.subsections_via_symbols

[-- Attachment #5: opttest3.zsh --]
[-- Type: application/octet-stream, Size: 247 bytes --]

#!/usr/local/bin/zsh-0cpmem-5.1.1-dev-0
#!/usr/local/bin/zsh-1cpmem-5.1.1-dev-0

zmodload zsh/zprof

strtest() {
    a=""

    i=5000
    while (( i -- )); do
        b=$a
        a+="$i"
    done
}

strtest
strtest
strtest
strtest
strtest

zprof

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2015-09-24 17:41 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-24 17:40 Bug in llvm compiler Sebastian Gniazdowski

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).