> A not-very-thorough search at tuhs turned up V9/jerq/src/lib/j/bitblt.c
> It appears to be a pre-Reiser bitblt, not what was asked for.


The Reiser code is in the V8 jerq tarball that Dan Cross donated:
v8jerq.tar.bz2 <https://www.tuhs.org/Archive/Distributions/Research/Dan_Cross_v8/v8jerq.tar.bz2>

It is in file blit/src/libj/bitblt.s (attached below for convenience). It is 750 lines of 68K assembler. It does not appear to have been ported to the Bellmac 32 CPU. Maybe it did not make sense in that context.

Paul

=====

#
#  bitblt(sm,r,dm,p,fc)
#  Bitmap *sm,*dm;
#  Rectangle r;
#  Point p;
#  int fc;
#
#  by John F. Reiser  summer 1982
#
#  Depending on the case at hand, generate very good code and execute it.
#

		# offsets in a Point
	set x,0
	set y,2
		# offsets in a Rectangle
	set origin,0
	set corner,4
		# offsets in a Bitmap
	set base,0
	set width,4
	set rect,6
		# parameter offsets from %fp
	set sm,8
	set r,12
	set dm,20
	set p,24
	set fc,28

	set NREG,11

	global bitblt
bitblt:
	movm.l &0x3f3e,-(%sp)		# save C registers
	movm.l NREG*4-4+sm(%sp),&0x001f
		# d1=r.o.x,,r.o.y; d2=r.c.x,,r.c.y; d4=p.x,,p.y;
	mov.l %d0,%a4	# sm
	mov.l %d3,%a5	# dm
	mov.w NREG*4-4+fc(%sp),%a6	# a6.w == fc
	movm.l rect(%a4),&0x9	# d0=sm.o.x,,sm.o.y; d3=sm.c.x,,sm.c.y;
	movm.l rect(%a5),&0x60	# d5=dm.o.x,,dm.o.y; d6=dm.c.x,,dm.c.y;

	lea.l $L50(%pc),%a0
L5:
		# clip r.y to sm.y
	mov.w %d0,%d7	# sm.o.y
	sub.w %d1,%d7	# - r.o.y
	ble.b L10
	mov.w %d0,%d1	# r.o.y = sm.o.y; /* r.o.y was above sm.rect */
	add.w %d7,%d4	# p.y parallels r.o.y
L10:
	cmp.w %d2,%d3	# r.c.y : sm.c.y
	ble.b L20
	mov.w %d3,%d2	# r.c.y = sm.c.y; /* bottom of r was below sm.rect */
L20:
		# clip (r.y at p.y) to dm.y
	mov.w %d5,%d7	# dm.o.y
	sub.w %d4,%d7	# -p.y
	ble.b L30
	mov.w %d5,%d4	# p.y = dm.o.y; /* p.y was above dm.rect */
	add.w %d7,%d1	# r.o.y parallels p.y
L30:
	mov.w %d1,%d7	# r.o.y
	add.w %d6,%d7	# + dm.c.y
	sub.w %d4,%d7	# - p.y  /* == max y that dm.rect allows in r */
	cmp.w %d2,%d7	# r.c.y : limit
	ble.b L40
	mov.w %d7,%d2	# r.c.y = limit
L40:
	mov.w %d2,%d7	# r.c.y
	sub.w %d1,%d7	# - r.o.y
	sub.w &1,%d7	# /* == h-1  in bits */
	blt.b ret
	jmp (%a0)

retgen:
	lea.l gensiz(%sp),%sp
ret8:
	add.l &8,%sp
ret:
	movm.l (%sp)+,&0x7cfc
	rts

L50:
		# mirror in pi/4 and reuse same code to clip x
	swap.w %d0; swap.w %d1; swap.w %d2; swap.w %d3
	swap.w %d4; swap.w %d5; swap.w %d6; swap.w %d7
	lea.l $L55(%pc),%a0
	br.b L5

L55:
	mov.l %d1,%a1
	mov.l %d4,%d6
#
#  So far
#	%d7 == h-1,,w-1
#	%d6 == p.y,,p.x
#	%a6.w == fc
#	%a5 == dm
#	%a4 == sm
#	%a1 == r.o.y,,r.o.x
#
#  Compute masks, and width in words
#
	mov.w %d6,%d0		# p.x  /* left endpoint of dst */
	mov.w %d7,%d1		# w-1
	add.w %d6,%d1		# right endpoint

	mov.l &-1,%d3
	mov.l &15,%d2
	and.w %d0,%d2
	lsr.w %d2,%d3		# mask1
	mov.l &-1,%d5
	mov.l &15,%d2
	and.w %d1,%d2
	add.w &1,%d2
	lsr.w %d2,%d5
	not.w %d5		# mask2
	swap.w %d5
	mov.w %d3,%d5		# mask2,,mask1

	asr.w &4,%d0
	asr.w &4,%d1
	sub.w %d0,%d1
	sub.w &1,%d1		# inner-loop width in words

	mov.l &0,%d4		# assume LtoR
	mov.w width(%a5),%d3
	add.w %d3,%d3
	mov.w width(%a4),%d2
	add.w %d2,%d2
#
#  So far
#	%d7 == h-1,,w-1  in bits
#	%d6 == p.y,,p.x
#	%d5 == mask2,,mask1
#	%d4 == 0  (LtoR)
#	%d3.w == dm width in bytes
#	%d2.w == sm width in bytes
#	%d1.w == inner-loop width in words
#	%a6.w == fc
#	%a5 == dm
#	%a4 == sm
#	%a1 == r.o.y,,r.o.x
#
#  If necessary, compensate for overlap of source and destination
#
	cmp.l %a4,%a5
	bne.b L80		# overlap not possible
	mov.l %d6,%d0	# p.y,,p.x
	mov.w %a1,%d0	# p.y,,r.o.x
	cmp.l %a1,%d0	# r.o.y : p.y
	bge.b L60	# if (r.o.y < p.y)
	mov.l %d7,%d0	# h-1,,w-1
	clr.w %d0		# h-1,,0
	add.l %d0,%a1	# r.o.y += h-1;
	add.l %d0,%d6	# p.y += h-1;
	neg.w %d3		# wdst = -wdst;
	neg.w %d2		# wsrc = -wsrc;
L60:
	cmp.w %d7,&16
	blt.b L70		# l<->r swap not needed for narrow
	cmp.w %d6,%a1	# p.x : r.o.x
	ble.b L70	# if (r.o.x < p.x)
	mov.l %a1,%d0
	add.w %d7,%d0
	mov.l %d0,%a1	# r.o.x += w-1;
	add.w %d7,%d6	# p.x += w-1;
	mov.l &-1,%d4	# RtoL
	swap.w %d5		# masks in other order
L70:
L80:
#
#  Locate actual starting points
#
	mov.l %d6,%d0	# p.y,,p.x
	swap.w %d0
	mov.l %d0,-(%sp)	# p
	mov.l %a5,-(%sp)	# dm

	mov.l &15,%d0
	lea.l $L82(%pc),%a0	# assume narrow
	cmp.w %d7,%d0		# w-1 : 15
	ble.b L81		# guessed correctly
	lea.l $L85(%pc),%a0	# wide
L81:
	mov.l %a0,-(%sp)	# on return, go directly to wide/narrow code
	add.w %a6,%a6; add.w %a6,%a6	# with 4*fc

	mov.w %d1,%d7		# h-1 in bits,,inner width in words
	and.l %d0,%d6		# 0,,bit offset of p.x
	mov.l %a1,%d1		# r.o.y,,r.o.x
	and.w %d1,%d0		# bit offset of r.o.x
	sub.w %d0,%d6		# BO(p.x) - BO(r.o.x) /* amount of right rotation */
	swap.w %d1		# r.o.x,,r.o.y
	mov.l %d1,-(%sp)	# r.o
	mov.l %a4,-(%sp)	# sm
	lea.l addr,%a3
	jsr (%a3)
	mov.l %a0,%a2		# src = addr(sm,r.origin);
	add.l &8,%sp
	jmp (%a3)		# %a0 = addr(dm,p);
L82:
	mov.l &0,%d4
	mov.w %d5,%d4	# 0,,mask1
	swap.w %d5		# mask1,,mask2  (proper long mask; maybe 16 bits too wide)
	and.w %d5,%d4	# check for overlap of mask1 and mask2
	beq.b L83		# no overlap ==> %d5 already correct
	mov.l %d4,%d5	# overlap ==> reduce %d5 by 16 bits
	swap.w %d5		# and put it in the proper half
L83:
	swap.w %d7		# ,,height-1
	lea.l $nrwtab(%pc,%a6.w),%a6	# -> optab
	tst.w %d6		# amount of right rotation
	bge.b L84
	neg.w %d6
	add.l &2,%a6
L84:
	add.w (%a6),%a6
	jmp (%a6)

nrwtab:
	short opMnwr-nrwtab- 0, opMnwl-nrwtab- 2
	short opSnwr-nrwtab- 4, opSnwl-nrwtab- 6
	short opCnwr-nrwtab- 8, opCnwl-nrwtab-10
	short opXnwr-nrwtab-12, opXnwl-nrwtab-14

opMnwr:
	mov.l (%a2),%d0
	mov.l (%a0),%d1
	ror.l %d6,%d0
	eor.l %d1,%d0
	and.l %d5,%d0
	eor.l %d1,%d0
	mov.l %d0,(%a0)
	add.w %d2,%a2
	add.w %d3,%a0
	dbr %d7,opMnwr
	br ret8

opMnwl:
	mov.l (%a2),%d0
	mov.l (%a0),%d1
	rol.l %d6,%d0
	eor.l %d1,%d0
	and.l %d5,%d0
	eor.l %d1,%d0
	mov.l %d0,(%a0)
	add.w %d2,%a2
	add.w %d3,%a0
	dbr %d7,opMnwl
	br ret8

opSnwr:
	mov.l (%a2),%d0
	ror.l %d6,%d0
	and.l %d5,%d0
	or.l %d0,(%a0)
	add.w %d2,%a2
	add.w %d3,%a0
	dbr %d7,opSnwr
	br ret8

opSnwl:
	mov.l (%a2),%d0
	rol.l %d6,%d0
	and.l %d5,%d0
	or.l %d0,(%a0)
	add.w %d2,%a2
	add.w %d3,%a0
	dbr %d7,opSnwl
	br ret8

opCnwr:
	mov.l (%a2),%d0
	ror.l %d6,%d0
	and.l %d5,%d0
	not.l %d0
	and.l %d0,(%a0)
	add.w %d2,%a2
	add.w %d3,%a0
	dbr %d7,opCnwr
	br ret8

opCnwl:
	mov.l (%a2),%d0
	rol.l %d6,%d0
	and.l %d5,%d0
	not.l %d0
	and.l %d0,(%a0)
	add.w %d2,%a2
	add.w %d3,%a0
	dbr %d7,opCnwl
	br ret8

opXnwr:
	mov.l (%a2),%d0
	ror.l %d6,%d0
	and.l %d5,%d0
	eor.l %d0,(%a0)
	add.w %d2,%a2
	add.w %d3,%a0
	dbr %d7,opXnwr
	br ret8

opXnwl:
	mov.l (%a2),%d0
	rol.l %d6,%d0
	and.l %d5,%d0
	eor.l %d0,(%a0)
	add.w %d2,%a2
	add.w %d3,%a0
	dbr %d7,opXnwl
	br ret8

	set DBR,0x51c8
	set MOVLI,0x2000+074	# mov.l &...,
	set MOVWI,0x3000+074	# mov.w &...,
	set ADDWI,0x0640	# add.w &...,

	set FDFRAG,16	# first destination is a fragment
	set LDFRAG,17	# last destination is a fragment
	set NSHF1,18
	set FD2D,19	# first destination should store 2 words
	set LD2D,20	# last destination should store 2 words
	set FSTORE,21
	set DST1L,24	# dst inner count is 0
	set SRC1L,25	# Nsrc is 2

	set gensiz,80

widtab:
	mov.w %d0,(%a0)+; short 0
	or.w %d0,(%a0)+; short 0
	and.w %d0,(%a0)+; not.w %d0
	eor.w %d0,(%a0)+; short 0

#
#  So far
#	%d7 == h-1 (bits),,w (words)
#	%d6 == 0,,rotate count
#	%d5 == mask2,,mask1
#	%d4 == -RtoL
#	%d3.w == wdst (bytes)
#	%d2.w == wsrc (bytes)
#	%a6.w == 4*fc
#	%a2 -> src
#	%a0 -> dst
#
L85:
	lea.l $widtab(%pc,%a6.w),%a6
	tst.w %d4; bpl.b L300; bset &31,%d6
L300:
	mov.w %d7,%d0		# inner word count
	bne.b L304; bset &DST1L,%d6
L304:
	add.w &1,%d0		# Nsrc = 1+Ninner
	mov.w %d0,%a1		#   + ...
	add.w &1,%d0		# Ndst = 1+Ninner+1
	add.w %d0,%d0		# magnitude of dst addressing side effects
	tst.l %d6; bpl.b L310
	neg.w %d0; add.l &2,%a0		# RtoL
L310:
	sub.w %d0,%d3		# compensate dst for autoincrement

	mov.w %d5,%d4		# mask1
	swap.w %d5		# mask2

	cmp.w %d4,&-1;            beq.b L320; bset &FDFRAG,%d6
L320:

	cmp.w %d5,&-1; seq.b %d1; beq.b L330; bset &LDFRAG,%d6
L330:

	tst.w %d6; bne.b L360	# not NOSHIFT
	add.w &1,%a1		# Nsrc = 1+Ninner+1
	mov.l %d6,%d0; swap.w %d0; ext.w %d0	# 0,,flag bits
	asr.w &1,%d7; roxl.w &1,%d0	# account for inner words odd
	mov.b $nstab(%pc,%d0.w),%d0
	bpl.b L340; add.w &1,%d7
L340:
	add.b %d0,%d0
	bpl.b L350; sub.w &1,%d7
L350:
	swap.w %d0; eor.l %d0,%d6	# the bits
	btst &DST1L,%d6; bne.b L355
	btst &FD2D,%d6; beq.b L410
L355:
	ext.l %d4; bmi.b L410; swap.w %d4; not.w %d4	# NOSHIFT mask1 .l
	br.b L410		# NOSHIFT mask2 .l
nstab:
	byte 0x82,0x80,0x04,0x80	# 0x80: +1 inner;  0x40: -1 inner
	byte 0x02,0x00,0x44,0x00	# 0x04: FD2D;      0x02: NSHF1 no first word
L360:
	ext.w %d1; sub.w %d1,%d7	# extend inner loop

	mov.l &0xf,%d0		# 0  1     7  8  9     e  f
	add.w &8,%d6		# 8  9     f  0  1     6  7
	and.w %d0,%d6
	sub.w &8,%d6		# 0  1     7 -8 -7    -2 -1  X=C= sign
	mov.w %d6,%d1; bge.b L367	#                    X unchanged
	neg.w %d1   		#             8  7     2  1  X=C= 1
L367:
	roxl.w &1,%d1		# 0  2     e 11  f     5  3
	and.w %d0,%d1		# 0  2     e  1  f     5  3
	lsl.w &8,%d1		# magic position
	short ADDWI+001
	  ror.l &8,%d0
	mov.w %d1,%a3		# the rotate instruction

	mov.l &0,%d1; not.w %d1		# 0,,-1
	ror.l %d6,%d1		# where the bits are after a rotate

	mov.w %d1,%d0; and.w %d4,%d0; beq.b L370	# 1 src word covers dst frag
	not.w %d1;     and.w %d4,%d1; beq.b L370
	add.w &1,%a1; br.b L390		# fragment needs another src word
L370:
	sub.w &1,%d7		# .l takes an inner word
	bset &FD2D,%d6
	ext.l %d4; bmi.b L390
	swap.w %d4; not.w %d4	# mask1 .l
L390:

	swap.w %d1

	mov.w %d1,%d0; and.w %d5,%d0; beq.b L400	# 1 src word covers dst frag
	not.w %d1;     and.w %d5,%d1; beq.b L400
	add.w &1,%a1; br.b L420		# fragment needs another src word
L400:
	dbr %d7,L405		# .l takes an inner word
	clr.w %d7; br.b L420	# nothing there to take
L405:
L410:
	bset &LD2D,%d6
	ext.l %d5; bmi.b L420
	swap.w %d5; not.w %d5	# mask2 .l
L420:

	tst.w NREG*4-4+fc+8(%sp); bne.b L430; bset &FSTORE,%d6
L430:
	mov.w %a1,%d0		# Nsrc
	add.w %d0,%d0		# magnitude of src addressing side effects
	tst.l %d6; bpl.b L431
	neg.w %d0; add.l &2,%a2		# RtoL
L431:
	sub.w %d0,%d2		# compensate src for autoincrement

	lea.l -gensiz(%sp),%sp
	mov.l %sp,%a5
	swap.w %d3
	swap.w %d2

	cmp.w %a1,&2; bgt L445
	short MOVWI+00000
	  mov.l (%a2)+,%d0
	tst.l %d6; bpl.b L432; add.w &010,%d0	# RtoL
L432:
	mov.w %d0,(%a5)+
	mov.l &0,%d1; mov.w &-0x1000,%d2; mov.w &0100,%d3
	lea.l $L438(%pc),%a1
	mov.l &-1,%d0		# prepare bits to decide on "swap"
	tst.w %d6; bpl.b L432d; neg.w %d6
	lsl.l %d6,%d0; br.b L432e
L432d:
	lsr.l %d6,%d0
L432e:
	btst &DST1L,%d6; beq.b L434
	bset &FD2D,%d6; bne.b L432a
	ext.l %d4; bmi.b L432a; swap.w %d4; not.w %d4	# mask1 .l
L432a:
	bset &LD2D,%d6; bne.b L432b
	ext.l %d5; bmi.b L432b; swap.w %d5; not.w %d5	# mask2 .l
L432b:
	and.l %d5,%d4; mov.l %d4,%d5	# single .l does it all
	add.l &1,%d4; beq L730		# all 32 bits
	sub.l &1,%d4		# need an "and"
	and.l %d5,%d0
	cmp.l %d5,%d0
	beq.b L432c
	short MOVWI+05300
	  swap.w %d0
L432c:
	tst.w %d6; bne L690	# and a rotate
	br.b L437		# NOSHIFT
L434:
	mov.w %a3,(%a5)+	# the rotate instr
	short MOVWI+05300
	  mov.l %d0,%d1		# copy after rotate
	and.l %d4,%d0
	cmp.l %d4,%d0
	seq.b %d0; neg.b %d0; ext.w %d0
	short ADDWI+000
	  swap.w %d0
	mov.w %d0,(%a5)+
	lea.l $L436(%pc),%a1
	br.b L437
L436:
	  and.w %d4,%d0
	mov.w &01001,%d1; clr.w %d2; clr.w %d3
	lea.l $L438(%pc),%a1
L437:
	br L700
L438:
	  and.w %d5,%d0
	br L545
L445:
#
#  During compilation
#	%d7 == h-1,,w
#	%d6 == flags,,rotate count
#	%d5 == mask2
#	%d4 == mask1
#	%d3 == dst_dW,,bits for xxx.[wl]
#	%d2 == src_dW,,bits for mov.[wl]
#	%d1.w == parity
#	%a6 -> optab
#	%a5 -> next generated instruction
#	%a4 -> top of inner loop
#	%a3.w == rotate instruction
#	%a2 -> src
#	%a1 -> fragment "and" instruction
#	%a0 -> dst
#
	tst.w %d6; bne.b L480	# not NOSHIFT ==> always need first word
	btst &NSHF1,%d6; bne.b L485	# interplay of NOSHIFT, odd, FDFRAG
L480:
	mov.l &1,%d1
	and.w %d7,%d1		# parity of inner word count
	lsl.w &2,%d1		# even ==> frag in %d0, odd ==> frag in %d1
	bsr genwid		# generate for first word
	  and.w %d4,%d0
L485:
	cmp.w %d7,&2; ble.b L490	# inner dbr always falls through
	btst &FSTORE,%d6; beq.b L490	# no conflict "mov field" vs. %d6
	short MOVWI+05300		# init inner count
	  mov.w %a4,%d6
L490:
	mov.l %a5,%a4		# top of inner loop
	asr.w &1,%d7		# check inner word count
	blt.b L540		# single .l does it all
	bcc.b L500		# even
	beq.b L520		# 1
	short MOVWI+05300
	  br.b L500		# jump into middle of inner loop
	add.l &1,%a4		# remember to fixup "br.b"
	add.w &1,%d7		# middle entry ==> no dbr offset
L500:
	beq.b L530		# no inner words at all
	mov.l &4,%d1		# use %d1 in
	bsr.b genwid		# even half of inner loop
	  short 0
L510:
	mov.w %a4,%d0; neg.w %d0
	bclr &0,%d0; beq.b L520
	add.w %a5,%d0; mov.b %d0,(%a4)+		# fixup "br.b" into middle
L520:
	mov.l &0,%d1		# use %d0 in
	bsr.b genwid		# odd half of inner loop
	  short 0
	sub.w &1,%d7		# offset for inner dbr loop
	ble.b L530		# dbr always falls through
	mov.w &DBR+6,(%a5)+
	sub.l %a5,%a4; mov.w %a4,(%a5)+	# dbr displacement
L530:

	btst &LDFRAG,%d6; beq.b L540	# omit "and" for full last word
	mov.l &4,%d1
	bsr.b genwid
	  and.w %d5,%d0
L540:

	tst.w %d7; ble.b L545	# no inner loop
	btst &FSTORE,%d6; bne.b L545	# possible conflict "mov field" vs. %d6
	short MOVWI+05300		# init inner count
	  mov.w %a4,%d6
L545:
	swap.w %d3; tst.w %d3; beq.b L546	# wdst is full width of bitmap
	mov.w %d3,%a1		# dst_dW
	short MOVWI+05300
	  add.w %a1,%a0
L546:
	swap.w %d2; tst.w %d2; beq.b L547	# wsrc is full width of bitmap
	mov.w %d2,%a3		# src_dW
	short MOVWI+05300
	  add.w %a3,%a2
L547:
	mov.w &DBR+7,(%a5)+
	mov.l %sp,%a4		# top of outer loop
	cmp.b (%a4),&0x60; bne.b L548		# not br.b
	mov.b 1(%a4),%d0; ext.w %d0; lea.l 2(%a4,%d0.w),%a4	# collapse branches
L548:
	sub.l %a5,%a4; mov.w %a4,(%a5)+	# dbr displacement
	short MOVWI+05300
	  jmp (%a5)

	mov.w %d7,%a4	# init inner count
	mov.w %d7,%d6	# init inner count, 2nd case
	swap.w %d7   	# h-1
	lea.l $retgen(%pc),%a5
	jmp (%sp)

genwid:
	mov.l (%sp)+,%a1	# -> inline parameter
	mov.l $genget(%pc,%d1.w),%d0
	tst.w %d1; beq.b L550; mov.w &01001,%d1; swap.w %d1	# parity bits
L550:
	clr.w %d2; clr.w %d3	# .[wl] bits default to .w
	tst.l %d6; bpl.b L560; add.w &010,%d0	# RtoL
L560:
	tst.w %d6; bne.b L569	# not NOSHIFT
	bclr &9,%d0		# NOSHIFT always %d0
	mov.w (%a1),%d1; bne.b L564	# not inner loop
	btst &FSTORE,%d6; beq.b L562	# not "mov"
	mov.l &070,%d1; and.w %d0,%d1
	lsl.w &3,%d1; or.w %d1,%d0	# copy RtoL mode
	add.w &-0x1000,%d0		# .w ==> .l
	mov.w %d0,(%a5)+
L561:
	jmp 2(%a1)
genget:
	swap.w %d0; mov.w (%a2)+,%d0
	swap.w %d1; mov.w (%a2)+,%d1

L562:
	mov.w &-0x1000,%d2; mov.w &0100,%d3	# .w +=> .l
	add.w %d2,%d0
L563:
	mov.l &0,%d1	# NOSHIFT always %d0
	br L698		# assemble the fetch, then do the op
L564:
	lsr.w &1,%d1; bcs.b L562	# NOSHIFT always LD2D
	btst &FD2D,%d6; bne.b L562
	br.b L563		# alas, .w
L569:
	mov.w (%a1),%d1; beq.b L630	# inner loop
L570:
	lsr.w &1,%d1; bcs.b L580	# last word
	add.w &-0x1000,%d0		# force fetch .l
	mov.w %d0,(%a5)+		# the fetch .l
	short MOVLI+00000
	  mov.l %d0,%d1
	  swap.w %d0
	clr.w %d1; eor.l %d1,%d0	# parity for mov.l %d[01],%d[10]
	tst.l %d1; sne.b %d1; sub.b %d1,%d0	# parity for swap.w %d[01]
	mov.l %d0,(%a5)		# ran out of registers
	mov.l &0x4c80ec,%d0	# microcoded bits
	tst.l %d6; bpl.b L572; ror.l &1,%d0	# RtoL
L572:
	tst.w %d6; bpl.b L574; ror.l &2,%d0	# rol
L574:
	btst &FD2D,%d6; beq.b L576; ror.l &4,%d0	# first op .l
	mov.w &-0x1000,%d2; mov.w &0100,%d3	# .w +=> .l corrections
L576:
	ror.l &1,%d0; bpl.b L578	# "swap" not needed
	add.l &2,%a5
	ror.l &8,%d0; bpl.b L577	# existing "swap" parity OK
	eor.w &1,(%a5)
L577:
	ror.l &8,%d0; bpl.b L578	# existing order OK
	sub.l &2,%a5
	mov.l (%a5),%d0; swap.w %d0; mov.l %d0,(%a5)
	add.l &2,%a5
L578:
	add.l &2,%a5
	swap.w %d1		# junk,,parity
	br.b L690
L580:
	btst &LD2D,%d6; beq.b L630	# operator .w
	mov.w &-0x1000,%d2		# mov.w +=> mov.l
	mov.w &0100,%d3 		# xxx.w +=> xxx.l
L630:
	tst.l %d6; smi.b %d1
	eor.b %d6,%d1; bpl.b L650	# rotation in same direction as scan
	swap.w %d0		# interchange "swap" and "mov"
L650:
	mov.l %d0,(%a5)+

	swap.w %d1		# junk,,parity
	mov.w (%a1),%d0; lsr.w &1,%d0; bcs.b L660	# last word
	short MOVWI+000
	  mov.l %d0,%d1
	eor.w %d1,%d0
	mov.w %d0,(%a5)+
	br.b L690
L660:
	tst.l %d6; bmi.b L690		# RtoL
	btst &LD2D,%d6; beq.b L690	# not .l
	tst.w %d6; bpl.b L670		# ror
	sub.l &2,%a5; br.b L690		# no "swap"
L670:
	mov.w -4(%a5),(%a5)+		# extra "swap"
L690:
	mov.w %a3,%d0
	eor.b %d1,%d0
L698:
	mov.w %d0,(%a5)+	# the rotate instruction
L700:

	mov.w (%a1),%d0; beq.b L730	# inner loop
	btst &0,%d0; bne.b L705		# last word
	btst &FDFRAG,%d6; beq.b L730	# no "and"
L705:
	add.w %d3,%d0; add.w %d1,%d0; sub.b %d1,%d0	# and.[wl] %d[45],%d[01]
	btst &FSTORE,%d6; beq.b L720
		# "mov" partial word
	swap.w %d0		# save the "and"
	short MOVWI+00000	# ,%d0
	  mov.w (%a0),%d6
	add.w %d2,%d0		# mov.[wl]
	tst.l %d6; bpl.b L710; add.w &020,%d0	# RtoL; "(%a0)" ==> "-(%a0)"
L710:
	mov.w %d0,(%a5)+	# instr to fetch memory part of word
	short MOVWI+00000 	# ,%d0
	  eor.w %d6,%d0
	add.w %d3,%d0; add.b %d1,%d0	# eor.[wl] %d6,%d[01]
	swap.w %d0; mov.l %d0,(%a5)+; swap.w %d0; mov.w %d0,(%a5)+
	mov.w %d2,%d0; add.b %d1,%d0	# mov.[wl] %d[01],
	mov.l &-0100,%d1	# RtoL correction, if necessary
	br.b L770
L720:
	mov.w %d0,(%a5)+	# "and" for non-mov operators
L730:
	mov.w 2(%a6),%d0; beq.b L740	# not F_CLR
	add.w %d3,%d0; add.b %d1,%d0	# not.[wl] %d[01]
	mov.w %d0,(%a5)+
L740:
	btst &FSTORE,%d6; beq.b L790	# non-"mov"
	mov.w %d2,%d0; add.b %d1,%d0	# mov.[wl] %d[01],
	mov.l &0100,%d1 	# RtoL correction, if necessary
L770:
	add.w (%a6),%d0
	tst.l %d6; bpl.b L780
	add.w %d1,%d0 	# RtoL correction
L780:
	mov.w %d0,(%a5)+
	jmp 2(%a1)

L790:
	mov.w %d1,%d0; clr.b %d0; add.w %d3,%d0		# xxx.[wl] %d[01]
	mov.l &010,%d1		# RtoL correction, if necessary
	br.b L770

#
#  During execution
#	%d[01] == rotator
#	%d2 [reserved for texture bits]
#	%d3 [reserved for texture index]
#	%d4 == mask1
#	%d5 == mask2
#	%d6.w == inner count
#	%d7.w == outer count
#	%a0 -> dst
#	%a1 == dst_dW
#	%a2 -> src
#	%a3 == src_dW
#	%a4.w == inner count init
#	%a5 -> retgen
#	%a6 [reserved for -> texture]
#