Another issue is 0l/vl seems to output wrong bits for single precision floats in little endian mode, due to a similar reason: it used bytes 4-7 instead of 0-3. This seems to fix it: % diff /sys/src/cmd/vl/asm.c asm.c 672c672,675 < buf.dbuf[l] = cast[fnuxi8[i+4]]; --- > if(little) > buf.dbuf[l] = cast[fnuxi8[i]]; > else > buf.dbuf[l] = cast[fnuxi8[i+4]]; An alternative fix would be simply use fnuxi4 instead of fnuxi8, so that both BE and LE would work (I guess, don't have BE machine to test). Is there any (probably historical) reason for not using fnuxi4? I am asking this since in /sys/src/cmd/vl/l.h: EXTERN char fnuxi4[4]; /* for 3l [sic] */ Thanks, - cherry On Tue, Dec 24, 2013 at 12:20 PM, cherry wrote: > Hello Fans, > > It seems 0l/vl in little endian mode outputs wrong ordering of fp > registers in MOVD. For a double, which is stored in an even-odd pair of fp > registers, the least significant bits should be held in the even numbered > register, regardless of the endianess. When moving from/to memory, in LE > mode the first 4 bytes should go to even numbered registers, which is > different from BE mode. > > A patch is submitted. > > % patch/diff 0l-movd-fpreg-order > /sys/src/cmd/vl/asm.c > asm.c.orig:1019,1026 - > /n/sources/patch/0l-movd-fpreg-order/asm.c:1019,1031 > o1 = OP_IRR(opirr(ALAST), v>>16, REGZERO, REGTMP); > o2 = OP_IRR(opirr(AOR), v, REGTMP, REGTMP); > o3 = OP_RRR(oprrr(AADDU), r, REGTMP, REGTMP); > - o4 = OP_IRR(opirr(AMOVF+ALAST), 0, REGTMP, p->to.reg+1); > - o5 = OP_IRR(opirr(AMOVF+ALAST), 4, REGTMP, p->to.reg); > + if(little) { > + o4 = OP_IRR(opirr(AMOVF+ALAST), 0, REGTMP, > p->to.reg); > + o5 = OP_IRR(opirr(AMOVF+ALAST), 4, REGTMP, > p->to.reg+1); > + } else { > + o4 = OP_IRR(opirr(AMOVF+ALAST), 0, REGTMP, > p->to.reg+1); > + o5 = OP_IRR(opirr(AMOVF+ALAST), 4, REGTMP, > p->to.reg); > + } > break; > case 16: > o1 = OP_IRR(opirr(ALAST), v>>16, REGZERO, REGTMP); > asm.c.orig:1029,1036 - > /n/sources/patch/0l-movd-fpreg-order/asm.c:1034,1046 > o4 = OP_IRR(opirr(AMOVF+ALAST), 0, REGTMP, p->to.reg); > break; > case 8: > - o1 = OP_IRR(opirr(AMOVF+ALAST), v, r, p->to.reg+1); > - o2 = OP_IRR(opirr(AMOVF+ALAST), v+4, r, p->to.reg); > + if(little) { > + o1 = OP_IRR(opirr(AMOVF+ALAST), v, r, p->to.reg); > + o2 = OP_IRR(opirr(AMOVF+ALAST), v+4, r, p->to.reg+1); > + } else { > + o1 = OP_IRR(opirr(AMOVF+ALAST), v, r, p->to.reg+1); > + o2 = OP_IRR(opirr(AMOVF+ALAST), v+4, r, p->to.reg); > + } > break; > case 4: > o1 = OP_IRR(opirr(AMOVF+ALAST), v, r, p->to.reg); > asm.c.orig:1050,1057 - > /n/sources/patch/0l-movd-fpreg-order/asm.c:1060,1072 > o1 = OP_IRR(opirr(ALAST), v>>16, REGZERO, REGTMP); > o2 = OP_IRR(opirr(AOR), v, REGTMP, REGTMP); > o3 = OP_RRR(oprrr(AADDU), r, REGTMP, REGTMP); > - o4 = OP_IRR(opirr(AMOVF), 0, REGTMP, p->from.reg+1); > - o5 = OP_IRR(opirr(AMOVF), 4, REGTMP, p->from.reg); > + if(little) { > + o4 = OP_IRR(opirr(AMOVF), 0, REGTMP, p->from.reg); > + o5 = OP_IRR(opirr(AMOVF), 4, REGTMP, p->from.reg+1); > + } else { > + o4 = OP_IRR(opirr(AMOVF), 0, REGTMP, p->from.reg+1); > + o5 = OP_IRR(opirr(AMOVF), 4, REGTMP, p->from.reg); > + } > break; > case 16: > if(r == REGTMP) > asm.c.orig:1062,1069 - > /n/sources/patch/0l-movd-fpreg-order/asm.c:1077,1089 > o4 = OP_IRR(opirr(AMOVF), 0, REGTMP, p->from.reg); > break; > case 8: > - o1 = OP_IRR(opirr(AMOVF), v, r, p->from.reg+1); > - o2 = OP_IRR(opirr(AMOVF), v+4, r, p->from.reg); > + if(little) { > + o1 = OP_IRR(opirr(AMOVF), v, r, p->from.reg); > + o2 = OP_IRR(opirr(AMOVF), v+4, r, p->from.reg+1); > + } else { > + o1 = OP_IRR(opirr(AMOVF), v, r, p->from.reg+1); > + o2 = OP_IRR(opirr(AMOVF), v+4, r, p->from.reg); > + } > break; > case 4: > o1 = OP_IRR(opirr(AMOVF), v, r, p->from.reg); > > Let me know if I missed anything. > > Thanks and Merry Christmas. > - cherry > >