From mboxrd@z Thu Jan 1 00:00:00 1970 X-Msuck: nntp://news.gmane.org/gmane.linux.lib.musl.general/12299 Path: news.gmane.org!.POSTED!not-for-mail From: John Reiser Newsgroups: gmane.linux.lib.musl.general Subject: Re: [PATCH] Add comments to i386 assembly source Date: Mon, 1 Jan 2018 14:57:02 -0800 Message-ID: <5caf910a-dd98-6836-c70f-6a98cf8a9d22@bitwagon.com> References: <20171223094545.rmx6xtmucyz5xzap@voyager> <72c68934-4445-c83d-7bbc-004953b2f9e9@bitwagon.com> <20171231154926.GG1627@brightrain.aerifal.cx> <20180101195224.tpkl5g5w66rzwzz3@voyager> Reply-To: musl@lists.openwall.com NNTP-Posting-Host: blaine.gmane.org Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit X-Trace: blaine.gmane.org 1514847327 29376 195.159.176.226 (1 Jan 2018 22:55:27 GMT) X-Complaints-To: usenet@blaine.gmane.org NNTP-Posting-Date: Mon, 1 Jan 2018 22:55:27 +0000 (UTC) User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.4.0 To: musl@lists.openwall.com Original-X-From: musl-return-12315-gllmg-musl=m.gmane.org@lists.openwall.com Mon Jan 01 23:55:23 2018 Return-path: Envelope-to: gllmg-musl@m.gmane.org Original-Received: from mother.openwall.net ([195.42.179.200]) by blaine.gmane.org with smtp (Exim 4.84_2) (envelope-from ) id 1eW8zE-0006uc-C1 for gllmg-musl@m.gmane.org; Mon, 01 Jan 2018 23:55:16 +0100 Original-Received: (qmail 3971 invoked by uid 550); 1 Jan 2018 22:57:17 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Original-Received: (qmail 3947 invoked from network); 1 Jan 2018 22:57:16 -0000 In-Reply-To: <20180101195224.tpkl5g5w66rzwzz3@voyager> Content-Language: en-US Xref: news.gmane.org gmane.linux.lib.musl.general:12299 Archived-At: There's a bug. clone() is a user-level function that can be used independently of the musl internal implementation of threads. Thus when clone() in musl/src/linux/clone.c calls return __syscall_ret(__clone(func, stack, flags, arg, ptid, tls, ctid)); then the i386 implementation of __clone has no guarantee about the value in %gs, and it is a bug to assume that (%gs >> 3) fits in 8 bits. The code in musl/src/thread/i386/clone.s wastes up to 12 bytes when aligning the new stack, by aligning before [pre-]allocating space for the one argument to the thread function. This code fixes the %gs bug, wastes no stack space in the new thread, and is 6 bytes smaller (83 ==> 77; -7.2%): ===== musl/src/thread/i386/clone.s __NR_clone = 120 NBPW = 4 /* Number of Bytes Per Word */ .text .global __clone .type __clone,@function __clone: /* clone(func, stack, flags, arg, ptid, tls, ctid) */ push %esi /* non-standard; save .text space */ lea 2*NBPW(%esp),%esi /* &func */ push %ebx push %ebp push %edi /* 'cld' must be in effect upon entry to a .globl function */ lodsl; xchg %eax,%ebp /* func (save) */ lodsl; lea -NBPW(%eax),%ecx /* stack; pre-allocate space for 1 arg */ lodsl; xchg %eax,%ebx /* flags */ and $-16,%ecx /* 16-byte align new stack */ lodsl; mov %eax,(%ecx) /* arg to new thread */ lodsl; xchg %eax,%edx /* ptid */ push $0x51 /* flags */ push $0xffff /* limit */ lodsl; push %eax /* tls */ xor %eax,%eax; mov %gs,%ax; shr $3,%eax; push %eax /* segment # */ mov (%esi),%edi /* ctid */ mov %esp,%esi /* &segment_descriptor on current stack */ push $__NR_clone; pop %eax int $128 test %eax,%eax jnz 1f mov %ebp,%eax /* func */ xor %ebp,%ebp /* end chain of stack frames */ call *%eax /* func(arg) */ mov %eax,%ebx /* rv is arg1 to syscall */ xor %eax,%eax inc %eax /* __NR_exit */ int $128 hlt 1: add $16,%esp pop %edi pop %ebp pop %ebx pop %esi ret ===== --