> A not-very-thorough search at tuhs turned up V9/jerq/src/lib/j/bitblt.c > It appears to be a pre-Reiser bitblt, not what was asked for. The Reiser code is in the V8 jerq tarball that Dan Cross donated: v8jerq.tar.bz2 It is in file blit/src/libj/bitblt.s (attached below for convenience). It is 750 lines of 68K assembler. It does not appear to have been ported to the Bellmac 32 CPU. Maybe it did not make sense in that context. Paul ===== # # bitblt(sm,r,dm,p,fc) # Bitmap *sm,*dm; # Rectangle r; # Point p; # int fc; # # by John F. Reiser summer 1982 # # Depending on the case at hand, generate very good code and execute it. # # offsets in a Point set x,0 set y,2 # offsets in a Rectangle set origin,0 set corner,4 # offsets in a Bitmap set base,0 set width,4 set rect,6 # parameter offsets from %fp set sm,8 set r,12 set dm,20 set p,24 set fc,28 set NREG,11 global bitblt bitblt: movm.l &0x3f3e,-(%sp) # save C registers movm.l NREG*4-4+sm(%sp),&0x001f # d1=r.o.x,,r.o.y; d2=r.c.x,,r.c.y; d4=p.x,,p.y; mov.l %d0,%a4 # sm mov.l %d3,%a5 # dm mov.w NREG*4-4+fc(%sp),%a6 # a6.w == fc movm.l rect(%a4),&0x9 # d0=sm.o.x,,sm.o.y; d3=sm.c.x,,sm.c.y; movm.l rect(%a5),&0x60 # d5=dm.o.x,,dm.o.y; d6=dm.c.x,,dm.c.y; lea.l $L50(%pc),%a0 L5: # clip r.y to sm.y mov.w %d0,%d7 # sm.o.y sub.w %d1,%d7 # - r.o.y ble.b L10 mov.w %d0,%d1 # r.o.y = sm.o.y; /* r.o.y was above sm.rect */ add.w %d7,%d4 # p.y parallels r.o.y L10: cmp.w %d2,%d3 # r.c.y : sm.c.y ble.b L20 mov.w %d3,%d2 # r.c.y = sm.c.y; /* bottom of r was below sm.rect */ L20: # clip (r.y at p.y) to dm.y mov.w %d5,%d7 # dm.o.y sub.w %d4,%d7 # -p.y ble.b L30 mov.w %d5,%d4 # p.y = dm.o.y; /* p.y was above dm.rect */ add.w %d7,%d1 # r.o.y parallels p.y L30: mov.w %d1,%d7 # r.o.y add.w %d6,%d7 # + dm.c.y sub.w %d4,%d7 # - p.y /* == max y that dm.rect allows in r */ cmp.w %d2,%d7 # r.c.y : limit ble.b L40 mov.w %d7,%d2 # r.c.y = limit L40: mov.w %d2,%d7 # r.c.y sub.w %d1,%d7 # - r.o.y sub.w &1,%d7 # /* == h-1 in bits */ blt.b ret jmp (%a0) retgen: lea.l gensiz(%sp),%sp ret8: add.l &8,%sp ret: movm.l (%sp)+,&0x7cfc rts L50: # mirror in pi/4 and reuse same code to clip x swap.w %d0; swap.w %d1; swap.w %d2; swap.w %d3 swap.w %d4; swap.w %d5; swap.w %d6; swap.w %d7 lea.l $L55(%pc),%a0 br.b L5 L55: mov.l %d1,%a1 mov.l %d4,%d6 # # So far # %d7 == h-1,,w-1 # %d6 == p.y,,p.x # %a6.w == fc # %a5 == dm # %a4 == sm # %a1 == r.o.y,,r.o.x # # Compute masks, and width in words # mov.w %d6,%d0 # p.x /* left endpoint of dst */ mov.w %d7,%d1 # w-1 add.w %d6,%d1 # right endpoint mov.l &-1,%d3 mov.l &15,%d2 and.w %d0,%d2 lsr.w %d2,%d3 # mask1 mov.l &-1,%d5 mov.l &15,%d2 and.w %d1,%d2 add.w &1,%d2 lsr.w %d2,%d5 not.w %d5 # mask2 swap.w %d5 mov.w %d3,%d5 # mask2,,mask1 asr.w &4,%d0 asr.w &4,%d1 sub.w %d0,%d1 sub.w &1,%d1 # inner-loop width in words mov.l &0,%d4 # assume LtoR mov.w width(%a5),%d3 add.w %d3,%d3 mov.w width(%a4),%d2 add.w %d2,%d2 # # So far # %d7 == h-1,,w-1 in bits # %d6 == p.y,,p.x # %d5 == mask2,,mask1 # %d4 == 0 (LtoR) # %d3.w == dm width in bytes # %d2.w == sm width in bytes # %d1.w == inner-loop width in words # %a6.w == fc # %a5 == dm # %a4 == sm # %a1 == r.o.y,,r.o.x # # If necessary, compensate for overlap of source and destination # cmp.l %a4,%a5 bne.b L80 # overlap not possible mov.l %d6,%d0 # p.y,,p.x mov.w %a1,%d0 # p.y,,r.o.x cmp.l %a1,%d0 # r.o.y : p.y bge.b L60 # if (r.o.y < p.y) mov.l %d7,%d0 # h-1,,w-1 clr.w %d0 # h-1,,0 add.l %d0,%a1 # r.o.y += h-1; add.l %d0,%d6 # p.y += h-1; neg.w %d3 # wdst = -wdst; neg.w %d2 # wsrc = -wsrc; L60: cmp.w %d7,&16 blt.b L70 # l<->r swap not needed for narrow cmp.w %d6,%a1 # p.x : r.o.x ble.b L70 # if (r.o.x < p.x) mov.l %a1,%d0 add.w %d7,%d0 mov.l %d0,%a1 # r.o.x += w-1; add.w %d7,%d6 # p.x += w-1; mov.l &-1,%d4 # RtoL swap.w %d5 # masks in other order L70: L80: # # Locate actual starting points # mov.l %d6,%d0 # p.y,,p.x swap.w %d0 mov.l %d0,-(%sp) # p mov.l %a5,-(%sp) # dm mov.l &15,%d0 lea.l $L82(%pc),%a0 # assume narrow cmp.w %d7,%d0 # w-1 : 15 ble.b L81 # guessed correctly lea.l $L85(%pc),%a0 # wide L81: mov.l %a0,-(%sp) # on return, go directly to wide/narrow code add.w %a6,%a6; add.w %a6,%a6 # with 4*fc mov.w %d1,%d7 # h-1 in bits,,inner width in words and.l %d0,%d6 # 0,,bit offset of p.x mov.l %a1,%d1 # r.o.y,,r.o.x and.w %d1,%d0 # bit offset of r.o.x sub.w %d0,%d6 # BO(p.x) - BO(r.o.x) /* amount of right rotation */ swap.w %d1 # r.o.x,,r.o.y mov.l %d1,-(%sp) # r.o mov.l %a4,-(%sp) # sm lea.l addr,%a3 jsr (%a3) mov.l %a0,%a2 # src = addr(sm,r.origin); add.l &8,%sp jmp (%a3) # %a0 = addr(dm,p); L82: mov.l &0,%d4 mov.w %d5,%d4 # 0,,mask1 swap.w %d5 # mask1,,mask2 (proper long mask; maybe 16 bits too wide) and.w %d5,%d4 # check for overlap of mask1 and mask2 beq.b L83 # no overlap ==> %d5 already correct mov.l %d4,%d5 # overlap ==> reduce %d5 by 16 bits swap.w %d5 # and put it in the proper half L83: swap.w %d7 # ,,height-1 lea.l $nrwtab(%pc,%a6.w),%a6 # -> optab tst.w %d6 # amount of right rotation bge.b L84 neg.w %d6 add.l &2,%a6 L84: add.w (%a6),%a6 jmp (%a6) nrwtab: short opMnwr-nrwtab- 0, opMnwl-nrwtab- 2 short opSnwr-nrwtab- 4, opSnwl-nrwtab- 6 short opCnwr-nrwtab- 8, opCnwl-nrwtab-10 short opXnwr-nrwtab-12, opXnwl-nrwtab-14 opMnwr: mov.l (%a2),%d0 mov.l (%a0),%d1 ror.l %d6,%d0 eor.l %d1,%d0 and.l %d5,%d0 eor.l %d1,%d0 mov.l %d0,(%a0) add.w %d2,%a2 add.w %d3,%a0 dbr %d7,opMnwr br ret8 opMnwl: mov.l (%a2),%d0 mov.l (%a0),%d1 rol.l %d6,%d0 eor.l %d1,%d0 and.l %d5,%d0 eor.l %d1,%d0 mov.l %d0,(%a0) add.w %d2,%a2 add.w %d3,%a0 dbr %d7,opMnwl br ret8 opSnwr: mov.l (%a2),%d0 ror.l %d6,%d0 and.l %d5,%d0 or.l %d0,(%a0) add.w %d2,%a2 add.w %d3,%a0 dbr %d7,opSnwr br ret8 opSnwl: mov.l (%a2),%d0 rol.l %d6,%d0 and.l %d5,%d0 or.l %d0,(%a0) add.w %d2,%a2 add.w %d3,%a0 dbr %d7,opSnwl br ret8 opCnwr: mov.l (%a2),%d0 ror.l %d6,%d0 and.l %d5,%d0 not.l %d0 and.l %d0,(%a0) add.w %d2,%a2 add.w %d3,%a0 dbr %d7,opCnwr br ret8 opCnwl: mov.l (%a2),%d0 rol.l %d6,%d0 and.l %d5,%d0 not.l %d0 and.l %d0,(%a0) add.w %d2,%a2 add.w %d3,%a0 dbr %d7,opCnwl br ret8 opXnwr: mov.l (%a2),%d0 ror.l %d6,%d0 and.l %d5,%d0 eor.l %d0,(%a0) add.w %d2,%a2 add.w %d3,%a0 dbr %d7,opXnwr br ret8 opXnwl: mov.l (%a2),%d0 rol.l %d6,%d0 and.l %d5,%d0 eor.l %d0,(%a0) add.w %d2,%a2 add.w %d3,%a0 dbr %d7,opXnwl br ret8 set DBR,0x51c8 set MOVLI,0x2000+074 # mov.l &..., set MOVWI,0x3000+074 # mov.w &..., set ADDWI,0x0640 # add.w &..., set FDFRAG,16 # first destination is a fragment set LDFRAG,17 # last destination is a fragment set NSHF1,18 set FD2D,19 # first destination should store 2 words set LD2D,20 # last destination should store 2 words set FSTORE,21 set DST1L,24 # dst inner count is 0 set SRC1L,25 # Nsrc is 2 set gensiz,80 widtab: mov.w %d0,(%a0)+; short 0 or.w %d0,(%a0)+; short 0 and.w %d0,(%a0)+; not.w %d0 eor.w %d0,(%a0)+; short 0 # # So far # %d7 == h-1 (bits),,w (words) # %d6 == 0,,rotate count # %d5 == mask2,,mask1 # %d4 == -RtoL # %d3.w == wdst (bytes) # %d2.w == wsrc (bytes) # %a6.w == 4*fc # %a2 -> src # %a0 -> dst # L85: lea.l $widtab(%pc,%a6.w),%a6 tst.w %d4; bpl.b L300; bset &31,%d6 L300: mov.w %d7,%d0 # inner word count bne.b L304; bset &DST1L,%d6 L304: add.w &1,%d0 # Nsrc = 1+Ninner mov.w %d0,%a1 # + ... add.w &1,%d0 # Ndst = 1+Ninner+1 add.w %d0,%d0 # magnitude of dst addressing side effects tst.l %d6; bpl.b L310 neg.w %d0; add.l &2,%a0 # RtoL L310: sub.w %d0,%d3 # compensate dst for autoincrement mov.w %d5,%d4 # mask1 swap.w %d5 # mask2 cmp.w %d4,&-1; beq.b L320; bset &FDFRAG,%d6 L320: cmp.w %d5,&-1; seq.b %d1; beq.b L330; bset &LDFRAG,%d6 L330: tst.w %d6; bne.b L360 # not NOSHIFT add.w &1,%a1 # Nsrc = 1+Ninner+1 mov.l %d6,%d0; swap.w %d0; ext.w %d0 # 0,,flag bits asr.w &1,%d7; roxl.w &1,%d0 # account for inner words odd mov.b $nstab(%pc,%d0.w),%d0 bpl.b L340; add.w &1,%d7 L340: add.b %d0,%d0 bpl.b L350; sub.w &1,%d7 L350: swap.w %d0; eor.l %d0,%d6 # the bits btst &DST1L,%d6; bne.b L355 btst &FD2D,%d6; beq.b L410 L355: ext.l %d4; bmi.b L410; swap.w %d4; not.w %d4 # NOSHIFT mask1 .l br.b L410 # NOSHIFT mask2 .l nstab: byte 0x82,0x80,0x04,0x80 # 0x80: +1 inner; 0x40: -1 inner byte 0x02,0x00,0x44,0x00 # 0x04: FD2D; 0x02: NSHF1 no first word L360: ext.w %d1; sub.w %d1,%d7 # extend inner loop mov.l &0xf,%d0 # 0 1 7 8 9 e f add.w &8,%d6 # 8 9 f 0 1 6 7 and.w %d0,%d6 sub.w &8,%d6 # 0 1 7 -8 -7 -2 -1 X=C= sign mov.w %d6,%d1; bge.b L367 # X unchanged neg.w %d1 # 8 7 2 1 X=C= 1 L367: roxl.w &1,%d1 # 0 2 e 11 f 5 3 and.w %d0,%d1 # 0 2 e 1 f 5 3 lsl.w &8,%d1 # magic position short ADDWI+001 ror.l &8,%d0 mov.w %d1,%a3 # the rotate instruction mov.l &0,%d1; not.w %d1 # 0,,-1 ror.l %d6,%d1 # where the bits are after a rotate mov.w %d1,%d0; and.w %d4,%d0; beq.b L370 # 1 src word covers dst frag not.w %d1; and.w %d4,%d1; beq.b L370 add.w &1,%a1; br.b L390 # fragment needs another src word L370: sub.w &1,%d7 # .l takes an inner word bset &FD2D,%d6 ext.l %d4; bmi.b L390 swap.w %d4; not.w %d4 # mask1 .l L390: swap.w %d1 mov.w %d1,%d0; and.w %d5,%d0; beq.b L400 # 1 src word covers dst frag not.w %d1; and.w %d5,%d1; beq.b L400 add.w &1,%a1; br.b L420 # fragment needs another src word L400: dbr %d7,L405 # .l takes an inner word clr.w %d7; br.b L420 # nothing there to take L405: L410: bset &LD2D,%d6 ext.l %d5; bmi.b L420 swap.w %d5; not.w %d5 # mask2 .l L420: tst.w NREG*4-4+fc+8(%sp); bne.b L430; bset &FSTORE,%d6 L430: mov.w %a1,%d0 # Nsrc add.w %d0,%d0 # magnitude of src addressing side effects tst.l %d6; bpl.b L431 neg.w %d0; add.l &2,%a2 # RtoL L431: sub.w %d0,%d2 # compensate src for autoincrement lea.l -gensiz(%sp),%sp mov.l %sp,%a5 swap.w %d3 swap.w %d2 cmp.w %a1,&2; bgt L445 short MOVWI+00000 mov.l (%a2)+,%d0 tst.l %d6; bpl.b L432; add.w &010,%d0 # RtoL L432: mov.w %d0,(%a5)+ mov.l &0,%d1; mov.w &-0x1000,%d2; mov.w &0100,%d3 lea.l $L438(%pc),%a1 mov.l &-1,%d0 # prepare bits to decide on "swap" tst.w %d6; bpl.b L432d; neg.w %d6 lsl.l %d6,%d0; br.b L432e L432d: lsr.l %d6,%d0 L432e: btst &DST1L,%d6; beq.b L434 bset &FD2D,%d6; bne.b L432a ext.l %d4; bmi.b L432a; swap.w %d4; not.w %d4 # mask1 .l L432a: bset &LD2D,%d6; bne.b L432b ext.l %d5; bmi.b L432b; swap.w %d5; not.w %d5 # mask2 .l L432b: and.l %d5,%d4; mov.l %d4,%d5 # single .l does it all add.l &1,%d4; beq L730 # all 32 bits sub.l &1,%d4 # need an "and" and.l %d5,%d0 cmp.l %d5,%d0 beq.b L432c short MOVWI+05300 swap.w %d0 L432c: tst.w %d6; bne L690 # and a rotate br.b L437 # NOSHIFT L434: mov.w %a3,(%a5)+ # the rotate instr short MOVWI+05300 mov.l %d0,%d1 # copy after rotate and.l %d4,%d0 cmp.l %d4,%d0 seq.b %d0; neg.b %d0; ext.w %d0 short ADDWI+000 swap.w %d0 mov.w %d0,(%a5)+ lea.l $L436(%pc),%a1 br.b L437 L436: and.w %d4,%d0 mov.w &01001,%d1; clr.w %d2; clr.w %d3 lea.l $L438(%pc),%a1 L437: br L700 L438: and.w %d5,%d0 br L545 L445: # # During compilation # %d7 == h-1,,w # %d6 == flags,,rotate count # %d5 == mask2 # %d4 == mask1 # %d3 == dst_dW,,bits for xxx.[wl] # %d2 == src_dW,,bits for mov.[wl] # %d1.w == parity # %a6 -> optab # %a5 -> next generated instruction # %a4 -> top of inner loop # %a3.w == rotate instruction # %a2 -> src # %a1 -> fragment "and" instruction # %a0 -> dst # tst.w %d6; bne.b L480 # not NOSHIFT ==> always need first word btst &NSHF1,%d6; bne.b L485 # interplay of NOSHIFT, odd, FDFRAG L480: mov.l &1,%d1 and.w %d7,%d1 # parity of inner word count lsl.w &2,%d1 # even ==> frag in %d0, odd ==> frag in %d1 bsr genwid # generate for first word and.w %d4,%d0 L485: cmp.w %d7,&2; ble.b L490 # inner dbr always falls through btst &FSTORE,%d6; beq.b L490 # no conflict "mov field" vs. %d6 short MOVWI+05300 # init inner count mov.w %a4,%d6 L490: mov.l %a5,%a4 # top of inner loop asr.w &1,%d7 # check inner word count blt.b L540 # single .l does it all bcc.b L500 # even beq.b L520 # 1 short MOVWI+05300 br.b L500 # jump into middle of inner loop add.l &1,%a4 # remember to fixup "br.b" add.w &1,%d7 # middle entry ==> no dbr offset L500: beq.b L530 # no inner words at all mov.l &4,%d1 # use %d1 in bsr.b genwid # even half of inner loop short 0 L510: mov.w %a4,%d0; neg.w %d0 bclr &0,%d0; beq.b L520 add.w %a5,%d0; mov.b %d0,(%a4)+ # fixup "br.b" into middle L520: mov.l &0,%d1 # use %d0 in bsr.b genwid # odd half of inner loop short 0 sub.w &1,%d7 # offset for inner dbr loop ble.b L530 # dbr always falls through mov.w &DBR+6,(%a5)+ sub.l %a5,%a4; mov.w %a4,(%a5)+ # dbr displacement L530: btst &LDFRAG,%d6; beq.b L540 # omit "and" for full last word mov.l &4,%d1 bsr.b genwid and.w %d5,%d0 L540: tst.w %d7; ble.b L545 # no inner loop btst &FSTORE,%d6; bne.b L545 # possible conflict "mov field" vs. %d6 short MOVWI+05300 # init inner count mov.w %a4,%d6 L545: swap.w %d3; tst.w %d3; beq.b L546 # wdst is full width of bitmap mov.w %d3,%a1 # dst_dW short MOVWI+05300 add.w %a1,%a0 L546: swap.w %d2; tst.w %d2; beq.b L547 # wsrc is full width of bitmap mov.w %d2,%a3 # src_dW short MOVWI+05300 add.w %a3,%a2 L547: mov.w &DBR+7,(%a5)+ mov.l %sp,%a4 # top of outer loop cmp.b (%a4),&0x60; bne.b L548 # not br.b mov.b 1(%a4),%d0; ext.w %d0; lea.l 2(%a4,%d0.w),%a4 # collapse branches L548: sub.l %a5,%a4; mov.w %a4,(%a5)+ # dbr displacement short MOVWI+05300 jmp (%a5) mov.w %d7,%a4 # init inner count mov.w %d7,%d6 # init inner count, 2nd case swap.w %d7 # h-1 lea.l $retgen(%pc),%a5 jmp (%sp) genwid: mov.l (%sp)+,%a1 # -> inline parameter mov.l $genget(%pc,%d1.w),%d0 tst.w %d1; beq.b L550; mov.w &01001,%d1; swap.w %d1 # parity bits L550: clr.w %d2; clr.w %d3 # .[wl] bits default to .w tst.l %d6; bpl.b L560; add.w &010,%d0 # RtoL L560: tst.w %d6; bne.b L569 # not NOSHIFT bclr &9,%d0 # NOSHIFT always %d0 mov.w (%a1),%d1; bne.b L564 # not inner loop btst &FSTORE,%d6; beq.b L562 # not "mov" mov.l &070,%d1; and.w %d0,%d1 lsl.w &3,%d1; or.w %d1,%d0 # copy RtoL mode add.w &-0x1000,%d0 # .w ==> .l mov.w %d0,(%a5)+ L561: jmp 2(%a1) genget: swap.w %d0; mov.w (%a2)+,%d0 swap.w %d1; mov.w (%a2)+,%d1 L562: mov.w &-0x1000,%d2; mov.w &0100,%d3 # .w +=> .l add.w %d2,%d0 L563: mov.l &0,%d1 # NOSHIFT always %d0 br L698 # assemble the fetch, then do the op L564: lsr.w &1,%d1; bcs.b L562 # NOSHIFT always LD2D btst &FD2D,%d6; bne.b L562 br.b L563 # alas, .w L569: mov.w (%a1),%d1; beq.b L630 # inner loop L570: lsr.w &1,%d1; bcs.b L580 # last word add.w &-0x1000,%d0 # force fetch .l mov.w %d0,(%a5)+ # the fetch .l short MOVLI+00000 mov.l %d0,%d1 swap.w %d0 clr.w %d1; eor.l %d1,%d0 # parity for mov.l %d[01],%d[10] tst.l %d1; sne.b %d1; sub.b %d1,%d0 # parity for swap.w %d[01] mov.l %d0,(%a5) # ran out of registers mov.l &0x4c80ec,%d0 # microcoded bits tst.l %d6; bpl.b L572; ror.l &1,%d0 # RtoL L572: tst.w %d6; bpl.b L574; ror.l &2,%d0 # rol L574: btst &FD2D,%d6; beq.b L576; ror.l &4,%d0 # first op .l mov.w &-0x1000,%d2; mov.w &0100,%d3 # .w +=> .l corrections L576: ror.l &1,%d0; bpl.b L578 # "swap" not needed add.l &2,%a5 ror.l &8,%d0; bpl.b L577 # existing "swap" parity OK eor.w &1,(%a5) L577: ror.l &8,%d0; bpl.b L578 # existing order OK sub.l &2,%a5 mov.l (%a5),%d0; swap.w %d0; mov.l %d0,(%a5) add.l &2,%a5 L578: add.l &2,%a5 swap.w %d1 # junk,,parity br.b L690 L580: btst &LD2D,%d6; beq.b L630 # operator .w mov.w &-0x1000,%d2 # mov.w +=> mov.l mov.w &0100,%d3 # xxx.w +=> xxx.l L630: tst.l %d6; smi.b %d1 eor.b %d6,%d1; bpl.b L650 # rotation in same direction as scan swap.w %d0 # interchange "swap" and "mov" L650: mov.l %d0,(%a5)+ swap.w %d1 # junk,,parity mov.w (%a1),%d0; lsr.w &1,%d0; bcs.b L660 # last word short MOVWI+000 mov.l %d0,%d1 eor.w %d1,%d0 mov.w %d0,(%a5)+ br.b L690 L660: tst.l %d6; bmi.b L690 # RtoL btst &LD2D,%d6; beq.b L690 # not .l tst.w %d6; bpl.b L670 # ror sub.l &2,%a5; br.b L690 # no "swap" L670: mov.w -4(%a5),(%a5)+ # extra "swap" L690: mov.w %a3,%d0 eor.b %d1,%d0 L698: mov.w %d0,(%a5)+ # the rotate instruction L700: mov.w (%a1),%d0; beq.b L730 # inner loop btst &0,%d0; bne.b L705 # last word btst &FDFRAG,%d6; beq.b L730 # no "and" L705: add.w %d3,%d0; add.w %d1,%d0; sub.b %d1,%d0 # and.[wl] %d[45],%d[01] btst &FSTORE,%d6; beq.b L720 # "mov" partial word swap.w %d0 # save the "and" short MOVWI+00000 # ,%d0 mov.w (%a0),%d6 add.w %d2,%d0 # mov.[wl] tst.l %d6; bpl.b L710; add.w &020,%d0 # RtoL; "(%a0)" ==> "-(%a0)" L710: mov.w %d0,(%a5)+ # instr to fetch memory part of word short MOVWI+00000 # ,%d0 eor.w %d6,%d0 add.w %d3,%d0; add.b %d1,%d0 # eor.[wl] %d6,%d[01] swap.w %d0; mov.l %d0,(%a5)+; swap.w %d0; mov.w %d0,(%a5)+ mov.w %d2,%d0; add.b %d1,%d0 # mov.[wl] %d[01], mov.l &-0100,%d1 # RtoL correction, if necessary br.b L770 L720: mov.w %d0,(%a5)+ # "and" for non-mov operators L730: mov.w 2(%a6),%d0; beq.b L740 # not F_CLR add.w %d3,%d0; add.b %d1,%d0 # not.[wl] %d[01] mov.w %d0,(%a5)+ L740: btst &FSTORE,%d6; beq.b L790 # non-"mov" mov.w %d2,%d0; add.b %d1,%d0 # mov.[wl] %d[01], mov.l &0100,%d1 # RtoL correction, if necessary L770: add.w (%a6),%d0 tst.l %d6; bpl.b L780 add.w %d1,%d0 # RtoL correction L780: mov.w %d0,(%a5)+ jmp 2(%a1) L790: mov.w %d1,%d0; clr.b %d0; add.w %d3,%d0 # xxx.[wl] %d[01] mov.l &010,%d1 # RtoL correction, if necessary br.b L770 # # During execution # %d[01] == rotator # %d2 [reserved for texture bits] # %d3 [reserved for texture index] # %d4 == mask1 # %d5 == mask2 # %d6.w == inner count # %d7.w == outer count # %a0 -> dst # %a1 == dst_dW # %a2 -> src # %a3 == src_dW # %a4.w == inner count init # %a5 -> retgen # %a6 [reserved for -> texture] #