From mboxrd@z Thu Jan 1 00:00:00 1970 X-Msuck: nntp://news.gmane.org/gmane.linux.lib.musl.general/1384 Path: news.gmane.org!not-for-mail From: Rich Felker Newsgroups: gmane.linux.lib.musl.general Subject: ARM floating point setjmp/longjmp support Date: Sat, 28 Jul 2012 20:31:39 -0400 Message-ID: <20120729003139.GA4649@brightrain.aerifal.cx> Reply-To: musl@lists.openwall.com NNTP-Posting-Host: plane.gmane.org Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="HcAYCG3uE/tztfnV" X-Trace: dough.gmane.org 1343521903 27756 80.91.229.3 (29 Jul 2012 00:31:43 GMT) X-Complaints-To: usenet@dough.gmane.org NNTP-Posting-Date: Sun, 29 Jul 2012 00:31:43 +0000 (UTC) To: musl@lists.openwall.com Original-X-From: musl-return-1385-gllmg-musl=m.gmane.org@lists.openwall.com Sun Jul 29 02:31:44 2012 Return-path: Envelope-to: gllmg-musl@plane.gmane.org Original-Received: from mother.openwall.net ([195.42.179.200]) by plane.gmane.org with smtp (Exim 4.69) (envelope-from ) id 1SvHQ7-00056Q-Te for gllmg-musl@plane.gmane.org; Sun, 29 Jul 2012 02:31:44 +0200 Original-Received: (qmail 5264 invoked by uid 550); 29 Jul 2012 00:31:42 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: Original-Received: (qmail 5256 invoked from network); 29 Jul 2012 00:31:42 -0000 Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) Xref: news.gmane.org gmane.linux.lib.musl.general:1384 Archived-At: --HcAYCG3uE/tztfnV Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Hi, The attached patch is intended to add runtime-conditional support for saving FPA/VFP/iWMMXt call-preserved registers across setjmp/longjmp. It's modelled based on what the code in uClibc does, so I'm not sure if it's correct, but it seems to be, and it avoids using mnemonics that could fail depending on -march/-mfpu settings. Could someone familiar with ARM take a look at it and see if it makes sense, or even better, if it works? I'm a little bit doubtful about whether the iWMMXt stuff is needed (i.e. whether it's part of any ABI one could reasonably expect to be honored) or worth the cost, and I'm unsure if there are other optional coprocessor registers we should be checking for and possibly saving. Also, there's the question of whether the conditional-execution coprocessor instructions will generate illegal instruction exceptions on machines without the coprocessor when the condition check fails. One site (http://www.peter-cockerell.net/aalp/html/app-a.html) claims they're safe, but I'd like to see something more authoritative. Rich --HcAYCG3uE/tztfnV Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="arm-fp-sjlj.diff" diff --git a/src/setjmp/arm/longjmp.s b/src/setjmp/arm/longjmp.s index 690a008..11e6b58 100644 --- a/src/setjmp/arm/longjmp.s +++ b/src/setjmp/arm/longjmp.s @@ -8,6 +8,31 @@ longjmp: movs r0,r1 moveq r0,#1 ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp,sp,lr} - tst lr,#1 + + adr r1,1f + ldr r2,1f + ldr r1,[r1,r2] + + tst r1,#0x20 + ldcne p2, cr4, [ip], #48 + + tst r1,#0x40 + ldcne p11, cr8, [ip], #68 + ldrne r2, [ip], #4 + mcrne p10, 7, r2, cr1, cr0, 0 + + tst r1,#0x200 + beq 2f + ldcl p1, cr10, [ip], #8 + ldcl p1, cr11, [ip], #8 + ldcl p1, cr12, [ip], #8 + ldcl p1, cr13, [ip], #8 + ldcl p1, cr14, [ip], #8 + ldcl p1, cr15, [ip], #8 + +2: tst lr,#1 moveq pc,lr bx lr + +.hidden __hwcap +1: .word __hwcap-1b diff --git a/src/setjmp/arm/setjmp.s b/src/setjmp/arm/setjmp.s index 6985caa..763f959 100644 --- a/src/setjmp/arm/setjmp.s +++ b/src/setjmp/arm/setjmp.s @@ -10,6 +10,31 @@ setjmp: mov ip,r0 stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp,sp,lr} mov r0,#0 - tst lr,#1 + + adr r1,1f + ldr r2,1f + ldr r1,[r1,r2] + + tst r1,#0x20 + stcne p2, cr4, [ip], #48 + + tst r1,#0x40 + stcne p11, cr8, [ip], #68 + mrcne p10, 7, r2, cr1, cr0, 0 + strne r2, [ip], #4 + + tst r1,#0x200 + beq 2f + stcl p1, cr10, [ip], #8 + stcl p1, cr11, [ip], #8 + stcl p1, cr12, [ip], #8 + stcl p1, cr13, [ip], #8 + stcl p1, cr14, [ip], #8 + stcl p1, cr15, [ip], #8 + +2: tst lr,#1 moveq pc,lr bx lr + +.hidden __hwcap +1: .word __hwcap-1b --HcAYCG3uE/tztfnV--