From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=-1.1 required=5.0 tests=DKIM_SIGNED,DKIM_VALID, DKIM_VALID_AU,FREEMAIL_FROM,MAILING_LIST_MULTI,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.4 Received: (qmail 18489 invoked from network); 3 Apr 2022 12:31:31 -0000 Received: from minnie.tuhs.org (45.79.103.53) by inbox.vuxu.org with ESMTPUTF8; 3 Apr 2022 12:31:31 -0000 Received: by minnie.tuhs.org (Postfix, from userid 112) id 165339D67F; Sun, 3 Apr 2022 22:31:31 +1000 (AEST) Received: from minnie.tuhs.org (localhost [127.0.0.1]) by minnie.tuhs.org (Postfix) with ESMTP id 6157B9D667; Sun, 3 Apr 2022 22:30:26 +1000 (AEST) Authentication-Results: minnie.tuhs.org; dkim=pass (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b="oT8nNU/4"; dkim-atps=neutral Received: by minnie.tuhs.org (Postfix, from userid 112) id F10D59D667; Sun, 3 Apr 2022 22:26:24 +1000 (AEST) Received: from mail-pj1-f47.google.com (mail-pj1-f47.google.com [209.85.216.47]) by minnie.tuhs.org (Postfix) with ESMTPS id 1F4F19D663 for ; Sun, 3 Apr 2022 22:26:21 +1000 (AEST) Received: by mail-pj1-f47.google.com with SMTP id kw18so65608pjb.5 for ; Sun, 03 Apr 2022 05:26:21 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=mime-version:references:in-reply-to:from:date:message-id:subject:to :cc; bh=s1nnQLfJUY7nUG3fqrqENw/cpVgFODCHAjAU7gl+irg=; b=oT8nNU/4ZhpC3WTFDQ5L/aLhWaUY1xH6cjq6Na1P5I3CL1yGeyM7u9Gz93kBkOcB4+ K+eAoAR/CVl1VjWGHtjbiJsAPTN/BAe9UjHntz1srwEnvQY/NQALb2aMgQeH+x+iiQeM bUBjhY7uAi95BoiL5awucUKE+iBZHd0EhEquEmTMzRU+GBUVlu5N6C7h3/2eMU6HKki2 F9iSLvOLkweLwDo0FmrD0xRLKfQsZi/ZGo5V5f6asR+/TfcxyxtbgLEA6/xCJGVO8B1X I08ccl4BW2qzVnHy0mdCv/+rCNAzR5knpXm7Sw8UFNIv6aTaoub4MyBgN2Ni6EKtc4RG pZrg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:mime-version:references:in-reply-to:from:date :message-id:subject:to:cc; bh=s1nnQLfJUY7nUG3fqrqENw/cpVgFODCHAjAU7gl+irg=; b=km/t9sDWddZNOGPCaYsUZopG1E9I4j4l37wbrd936pJ6nZCvedIaA1/3Om1ofgD1XV eTIfcb+ALDN4pwL7/cn37r9jxw1oxCmZH4oVPIioClWOwjTR0SASSgh6dnN4Agws0oP2 rS5uLzy0vl0VFj32a4dfR6x6OwztIvJ942iYyP9FQoPoxGys876mRezu/2hGrc8C9qwL TMrW40xuSdbIBrNADUMcICErRUhAHM9csK6lq+Fy5jAicYBJ6snOGRKhw6tZ/HsjszaL Woz11tNrTtsN25DilfBDeEkNJbS20SH5us/o/zwaE/YieNt4EzdTANegzAIONsgyiPTP 40xw== X-Gm-Message-State: AOAM532mdYFz3ryfyEIGYE1gwvzK7d7OF/vGOOisp40n/jEUNTVd11I2 9aUQrrxIsEHqN6ozVvJG5H9Eh3HMOQpIRLkezPI= X-Google-Smtp-Source: ABdhPJx07j1JcQsAynWviUeqRHQx5GdG18Kb4Eg+xo1Pa5bhJivlpxOa+ywG+BjS5sivfA+QxGm28W13GHuxiAWIt0c= X-Received: by 2002:a17:90b:352:b0:1c6:77e:a4f7 with SMTP id fh18-20020a17090b035200b001c6077ea4f7mr21122020pjb.77.1648988780133; Sun, 03 Apr 2022 05:26:20 -0700 (PDT) MIME-Version: 1.0 References: In-Reply-To: From: Rob Pike Date: Sun, 3 Apr 2022 22:26:07 +1000 Message-ID: To: Paul Ruizendaal Content-Type: text/plain; charset="UTF-8" Subject: Re: [TUHS] A Reiser tour do force X-BeenThere: tuhs@minnie.tuhs.org X-Mailman-Version: 2.1.26 Precedence: list List-Id: The Unix Heritage Society mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: TUHS main list Errors-To: tuhs-bounces@minnie.tuhs.org Sender: "TUHS" Ah, yes, that's the one. -rob On Sun, Apr 3, 2022 at 9:03 PM Paul Ruizendaal via TUHS wrote: > > A not-very-thorough search at tuhs turned up V9/jerq/src/lib/j/bitblt.c > It appears to be a pre-Reiser bitblt, not what was asked for. > > > The Reiser code is in the V8 jerq tarball that Dan Cross donated: > v8jerq.tar.bz2 > > It is in file blit/src/libj/bitblt.s (attached below for convenience). It is 750 lines of 68K assembler. It does not appear to have been ported to the Bellmac 32 CPU. Maybe it did not make sense in that context. > > Paul > > ===== > > # > # bitblt(sm,r,dm,p,fc) > # Bitmap *sm,*dm; > # Rectangle r; > # Point p; > # int fc; > # > # by John F. Reiser summer 1982 > # > # Depending on the case at hand, generate very good code and execute it. > # > > # offsets in a Point > set x,0 > set y,2 > # offsets in a Rectangle > set origin,0 > set corner,4 > # offsets in a Bitmap > set base,0 > set width,4 > set rect,6 > # parameter offsets from %fp > set sm,8 > set r,12 > set dm,20 > set p,24 > set fc,28 > > set NREG,11 > > global bitblt > bitblt: > movm.l &0x3f3e,-(%sp) # save C registers > movm.l NREG*4-4+sm(%sp),&0x001f > # d1=r.o.x,,r.o.y; d2=r.c.x,,r.c.y; d4=p.x,,p.y; > mov.l %d0,%a4 # sm > mov.l %d3,%a5 # dm > mov.w NREG*4-4+fc(%sp),%a6 # a6.w == fc > movm.l rect(%a4),&0x9 # d0=sm.o.x,,sm.o.y; d3=sm.c.x,,sm.c.y; > movm.l rect(%a5),&0x60 # d5=dm.o.x,,dm.o.y; d6=dm.c.x,,dm.c.y; > > lea.l $L50(%pc),%a0 > L5: > # clip r.y to sm.y > mov.w %d0,%d7 # sm.o.y > sub.w %d1,%d7 # - r.o.y > ble.b L10 > mov.w %d0,%d1 # r.o.y = sm.o.y; /* r.o.y was above sm.rect */ > add.w %d7,%d4 # p.y parallels r.o.y > L10: > cmp.w %d2,%d3 # r.c.y : sm.c.y > ble.b L20 > mov.w %d3,%d2 # r.c.y = sm.c.y; /* bottom of r was below sm.rect */ > L20: > # clip (r.y at p.y) to dm.y > mov.w %d5,%d7 # dm.o.y > sub.w %d4,%d7 # -p.y > ble.b L30 > mov.w %d5,%d4 # p.y = dm.o.y; /* p.y was above dm.rect */ > add.w %d7,%d1 # r.o.y parallels p.y > L30: > mov.w %d1,%d7 # r.o.y > add.w %d6,%d7 # + dm.c.y > sub.w %d4,%d7 # - p.y /* == max y that dm.rect allows in r */ > cmp.w %d2,%d7 # r.c.y : limit > ble.b L40 > mov.w %d7,%d2 # r.c.y = limit > L40: > mov.w %d2,%d7 # r.c.y > sub.w %d1,%d7 # - r.o.y > sub.w &1,%d7 # /* == h-1 in bits */ > blt.b ret > jmp (%a0) > > retgen: > lea.l gensiz(%sp),%sp > ret8: > add.l &8,%sp > ret: > movm.l (%sp)+,&0x7cfc > rts > > L50: > # mirror in pi/4 and reuse same code to clip x > swap.w %d0; swap.w %d1; swap.w %d2; swap.w %d3 > swap.w %d4; swap.w %d5; swap.w %d6; swap.w %d7 > lea.l $L55(%pc),%a0 > br.b L5 > > L55: > mov.l %d1,%a1 > mov.l %d4,%d6 > # > # So far > # %d7 == h-1,,w-1 > # %d6 == p.y,,p.x > # %a6.w == fc > # %a5 == dm > # %a4 == sm > # %a1 == r.o.y,,r.o.x > # > # Compute masks, and width in words > # > mov.w %d6,%d0 # p.x /* left endpoint of dst */ > mov.w %d7,%d1 # w-1 > add.w %d6,%d1 # right endpoint > > mov.l &-1,%d3 > mov.l &15,%d2 > and.w %d0,%d2 > lsr.w %d2,%d3 # mask1 > mov.l &-1,%d5 > mov.l &15,%d2 > and.w %d1,%d2 > add.w &1,%d2 > lsr.w %d2,%d5 > not.w %d5 # mask2 > swap.w %d5 > mov.w %d3,%d5 # mask2,,mask1 > > asr.w &4,%d0 > asr.w &4,%d1 > sub.w %d0,%d1 > sub.w &1,%d1 # inner-loop width in words > > mov.l &0,%d4 # assume LtoR > mov.w width(%a5),%d3 > add.w %d3,%d3 > mov.w width(%a4),%d2 > add.w %d2,%d2 > # > # So far > # %d7 == h-1,,w-1 in bits > # %d6 == p.y,,p.x > # %d5 == mask2,,mask1 > # %d4 == 0 (LtoR) > # %d3.w == dm width in bytes > # %d2.w == sm width in bytes > # %d1.w == inner-loop width in words > # %a6.w == fc > # %a5 == dm > # %a4 == sm > # %a1 == r.o.y,,r.o.x > # > # If necessary, compensate for overlap of source and destination > # > cmp.l %a4,%a5 > bne.b L80 # overlap not possible > mov.l %d6,%d0 # p.y,,p.x > mov.w %a1,%d0 # p.y,,r.o.x > cmp.l %a1,%d0 # r.o.y : p.y > bge.b L60 # if (r.o.y < p.y) > mov.l %d7,%d0 # h-1,,w-1 > clr.w %d0 # h-1,,0 > add.l %d0,%a1 # r.o.y += h-1; > add.l %d0,%d6 # p.y += h-1; > neg.w %d3 # wdst = -wdst; > neg.w %d2 # wsrc = -wsrc; > L60: > cmp.w %d7,&16 > blt.b L70 # l<->r swap not needed for narrow > cmp.w %d6,%a1 # p.x : r.o.x > ble.b L70 # if (r.o.x < p.x) > mov.l %a1,%d0 > add.w %d7,%d0 > mov.l %d0,%a1 # r.o.x += w-1; > add.w %d7,%d6 # p.x += w-1; > mov.l &-1,%d4 # RtoL > swap.w %d5 # masks in other order > L70: > L80: > # > # Locate actual starting points > # > mov.l %d6,%d0 # p.y,,p.x > swap.w %d0 > mov.l %d0,-(%sp) # p > mov.l %a5,-(%sp) # dm > > mov.l &15,%d0 > lea.l $L82(%pc),%a0 # assume narrow > cmp.w %d7,%d0 # w-1 : 15 > ble.b L81 # guessed correctly > lea.l $L85(%pc),%a0 # wide > L81: > mov.l %a0,-(%sp) # on return, go directly to wide/narrow code > add.w %a6,%a6; add.w %a6,%a6 # with 4*fc > > mov.w %d1,%d7 # h-1 in bits,,inner width in words > and.l %d0,%d6 # 0,,bit offset of p.x > mov.l %a1,%d1 # r.o.y,,r.o.x > and.w %d1,%d0 # bit offset of r.o.x > sub.w %d0,%d6 # BO(p.x) - BO(r.o.x) /* amount of right rotation */ > swap.w %d1 # r.o.x,,r.o.y > mov.l %d1,-(%sp) # r.o > mov.l %a4,-(%sp) # sm > lea.l addr,%a3 > jsr (%a3) > mov.l %a0,%a2 # src = addr(sm,r.origin); > add.l &8,%sp > jmp (%a3) # %a0 = addr(dm,p); > L82: > mov.l &0,%d4 > mov.w %d5,%d4 # 0,,mask1 > swap.w %d5 # mask1,,mask2 (proper long mask; maybe 16 bits too wide) > and.w %d5,%d4 # check for overlap of mask1 and mask2 > beq.b L83 # no overlap ==> %d5 already correct > mov.l %d4,%d5 # overlap ==> reduce %d5 by 16 bits > swap.w %d5 # and put it in the proper half > L83: > swap.w %d7 # ,,height-1 > lea.l $nrwtab(%pc,%a6.w),%a6 # -> optab > tst.w %d6 # amount of right rotation > bge.b L84 > neg.w %d6 > add.l &2,%a6 > L84: > add.w (%a6),%a6 > jmp (%a6) > > nrwtab: > short opMnwr-nrwtab- 0, opMnwl-nrwtab- 2 > short opSnwr-nrwtab- 4, opSnwl-nrwtab- 6 > short opCnwr-nrwtab- 8, opCnwl-nrwtab-10 > short opXnwr-nrwtab-12, opXnwl-nrwtab-14 > > opMnwr: > mov.l (%a2),%d0 > mov.l (%a0),%d1 > ror.l %d6,%d0 > eor.l %d1,%d0 > and.l %d5,%d0 > eor.l %d1,%d0 > mov.l %d0,(%a0) > add.w %d2,%a2 > add.w %d3,%a0 > dbr %d7,opMnwr > br ret8 > > opMnwl: > mov.l (%a2),%d0 > mov.l (%a0),%d1 > rol.l %d6,%d0 > eor.l %d1,%d0 > and.l %d5,%d0 > eor.l %d1,%d0 > mov.l %d0,(%a0) > add.w %d2,%a2 > add.w %d3,%a0 > dbr %d7,opMnwl > br ret8 > > opSnwr: > mov.l (%a2),%d0 > ror.l %d6,%d0 > and.l %d5,%d0 > or.l %d0,(%a0) > add.w %d2,%a2 > add.w %d3,%a0 > dbr %d7,opSnwr > br ret8 > > opSnwl: > mov.l (%a2),%d0 > rol.l %d6,%d0 > and.l %d5,%d0 > or.l %d0,(%a0) > add.w %d2,%a2 > add.w %d3,%a0 > dbr %d7,opSnwl > br ret8 > > opCnwr: > mov.l (%a2),%d0 > ror.l %d6,%d0 > and.l %d5,%d0 > not.l %d0 > and.l %d0,(%a0) > add.w %d2,%a2 > add.w %d3,%a0 > dbr %d7,opCnwr > br ret8 > > opCnwl: > mov.l (%a2),%d0 > rol.l %d6,%d0 > and.l %d5,%d0 > not.l %d0 > and.l %d0,(%a0) > add.w %d2,%a2 > add.w %d3,%a0 > dbr %d7,opCnwl > br ret8 > > opXnwr: > mov.l (%a2),%d0 > ror.l %d6,%d0 > and.l %d5,%d0 > eor.l %d0,(%a0) > add.w %d2,%a2 > add.w %d3,%a0 > dbr %d7,opXnwr > br ret8 > > opXnwl: > mov.l (%a2),%d0 > rol.l %d6,%d0 > and.l %d5,%d0 > eor.l %d0,(%a0) > add.w %d2,%a2 > add.w %d3,%a0 > dbr %d7,opXnwl > br ret8 > > set DBR,0x51c8 > set MOVLI,0x2000+074 # mov.l &..., > set MOVWI,0x3000+074 # mov.w &..., > set ADDWI,0x0640 # add.w &..., > > set FDFRAG,16 # first destination is a fragment > set LDFRAG,17 # last destination is a fragment > set NSHF1,18 > set FD2D,19 # first destination should store 2 words > set LD2D,20 # last destination should store 2 words > set FSTORE,21 > set DST1L,24 # dst inner count is 0 > set SRC1L,25 # Nsrc is 2 > > set gensiz,80 > > widtab: > mov.w %d0,(%a0)+; short 0 > or.w %d0,(%a0)+; short 0 > and.w %d0,(%a0)+; not.w %d0 > eor.w %d0,(%a0)+; short 0 > > # > # So far > # %d7 == h-1 (bits),,w (words) > # %d6 == 0,,rotate count > # %d5 == mask2,,mask1 > # %d4 == -RtoL > # %d3.w == wdst (bytes) > # %d2.w == wsrc (bytes) > # %a6.w == 4*fc > # %a2 -> src > # %a0 -> dst > # > L85: > lea.l $widtab(%pc,%a6.w),%a6 > tst.w %d4; bpl.b L300; bset &31,%d6 > L300: > mov.w %d7,%d0 # inner word count > bne.b L304; bset &DST1L,%d6 > L304: > add.w &1,%d0 # Nsrc = 1+Ninner > mov.w %d0,%a1 # + ... > add.w &1,%d0 # Ndst = 1+Ninner+1 > add.w %d0,%d0 # magnitude of dst addressing side effects > tst.l %d6; bpl.b L310 > neg.w %d0; add.l &2,%a0 # RtoL > L310: > sub.w %d0,%d3 # compensate dst for autoincrement > > mov.w %d5,%d4 # mask1 > swap.w %d5 # mask2 > > cmp.w %d4,&-1; beq.b L320; bset &FDFRAG,%d6 > L320: > > cmp.w %d5,&-1; seq.b %d1; beq.b L330; bset &LDFRAG,%d6 > L330: > > tst.w %d6; bne.b L360 # not NOSHIFT > add.w &1,%a1 # Nsrc = 1+Ninner+1 > mov.l %d6,%d0; swap.w %d0; ext.w %d0 # 0,,flag bits > asr.w &1,%d7; roxl.w &1,%d0 # account for inner words odd > mov.b $nstab(%pc,%d0.w),%d0 > bpl.b L340; add.w &1,%d7 > L340: > add.b %d0,%d0 > bpl.b L350; sub.w &1,%d7 > L350: > swap.w %d0; eor.l %d0,%d6 # the bits > btst &DST1L,%d6; bne.b L355 > btst &FD2D,%d6; beq.b L410 > L355: > ext.l %d4; bmi.b L410; swap.w %d4; not.w %d4 # NOSHIFT mask1 .l > br.b L410 # NOSHIFT mask2 .l > nstab: > byte 0x82,0x80,0x04,0x80 # 0x80: +1 inner; 0x40: -1 inner > byte 0x02,0x00,0x44,0x00 # 0x04: FD2D; 0x02: NSHF1 no first word > L360: > ext.w %d1; sub.w %d1,%d7 # extend inner loop > > mov.l &0xf,%d0 # 0 1 7 8 9 e f > add.w &8,%d6 # 8 9 f 0 1 6 7 > and.w %d0,%d6 > sub.w &8,%d6 # 0 1 7 -8 -7 -2 -1 X=C= sign > mov.w %d6,%d1; bge.b L367 # X unchanged > neg.w %d1 # 8 7 2 1 X=C= 1 > L367: > roxl.w &1,%d1 # 0 2 e 11 f 5 3 > and.w %d0,%d1 # 0 2 e 1 f 5 3 > lsl.w &8,%d1 # magic position > short ADDWI+001 > ror.l &8,%d0 > mov.w %d1,%a3 # the rotate instruction > > mov.l &0,%d1; not.w %d1 # 0,,-1 > ror.l %d6,%d1 # where the bits are after a rotate > > mov.w %d1,%d0; and.w %d4,%d0; beq.b L370 # 1 src word covers dst frag > not.w %d1; and.w %d4,%d1; beq.b L370 > add.w &1,%a1; br.b L390 # fragment needs another src word > L370: > sub.w &1,%d7 # .l takes an inner word > bset &FD2D,%d6 > ext.l %d4; bmi.b L390 > swap.w %d4; not.w %d4 # mask1 .l > L390: > > swap.w %d1 > > mov.w %d1,%d0; and.w %d5,%d0; beq.b L400 # 1 src word covers dst frag > not.w %d1; and.w %d5,%d1; beq.b L400 > add.w &1,%a1; br.b L420 # fragment needs another src word > L400: > dbr %d7,L405 # .l takes an inner word > clr.w %d7; br.b L420 # nothing there to take > L405: > L410: > bset &LD2D,%d6 > ext.l %d5; bmi.b L420 > swap.w %d5; not.w %d5 # mask2 .l > L420: > > tst.w NREG*4-4+fc+8(%sp); bne.b L430; bset &FSTORE,%d6 > L430: > mov.w %a1,%d0 # Nsrc > add.w %d0,%d0 # magnitude of src addressing side effects > tst.l %d6; bpl.b L431 > neg.w %d0; add.l &2,%a2 # RtoL > L431: > sub.w %d0,%d2 # compensate src for autoincrement > > lea.l -gensiz(%sp),%sp > mov.l %sp,%a5 > swap.w %d3 > swap.w %d2 > > cmp.w %a1,&2; bgt L445 > short MOVWI+00000 > mov.l (%a2)+,%d0 > tst.l %d6; bpl.b L432; add.w &010,%d0 # RtoL > L432: > mov.w %d0,(%a5)+ > mov.l &0,%d1; mov.w &-0x1000,%d2; mov.w &0100,%d3 > lea.l $L438(%pc),%a1 > mov.l &-1,%d0 # prepare bits to decide on "swap" > tst.w %d6; bpl.b L432d; neg.w %d6 > lsl.l %d6,%d0; br.b L432e > L432d: > lsr.l %d6,%d0 > L432e: > btst &DST1L,%d6; beq.b L434 > bset &FD2D,%d6; bne.b L432a > ext.l %d4; bmi.b L432a; swap.w %d4; not.w %d4 # mask1 .l > L432a: > bset &LD2D,%d6; bne.b L432b > ext.l %d5; bmi.b L432b; swap.w %d5; not.w %d5 # mask2 .l > L432b: > and.l %d5,%d4; mov.l %d4,%d5 # single .l does it all > add.l &1,%d4; beq L730 # all 32 bits > sub.l &1,%d4 # need an "and" > and.l %d5,%d0 > cmp.l %d5,%d0 > beq.b L432c > short MOVWI+05300 > swap.w %d0 > L432c: > tst.w %d6; bne L690 # and a rotate > br.b L437 # NOSHIFT > L434: > mov.w %a3,(%a5)+ # the rotate instr > short MOVWI+05300 > mov.l %d0,%d1 # copy after rotate > and.l %d4,%d0 > cmp.l %d4,%d0 > seq.b %d0; neg.b %d0; ext.w %d0 > short ADDWI+000 > swap.w %d0 > mov.w %d0,(%a5)+ > lea.l $L436(%pc),%a1 > br.b L437 > L436: > and.w %d4,%d0 > mov.w &01001,%d1; clr.w %d2; clr.w %d3 > lea.l $L438(%pc),%a1 > L437: > br L700 > L438: > and.w %d5,%d0 > br L545 > L445: > # > # During compilation > # %d7 == h-1,,w > # %d6 == flags,,rotate count > # %d5 == mask2 > # %d4 == mask1 > # %d3 == dst_dW,,bits for xxx.[wl] > # %d2 == src_dW,,bits for mov.[wl] > # %d1.w == parity > # %a6 -> optab > # %a5 -> next generated instruction > # %a4 -> top of inner loop > # %a3.w == rotate instruction > # %a2 -> src > # %a1 -> fragment "and" instruction > # %a0 -> dst > # > tst.w %d6; bne.b L480 # not NOSHIFT ==> always need first word > btst &NSHF1,%d6; bne.b L485 # interplay of NOSHIFT, odd, FDFRAG > L480: > mov.l &1,%d1 > and.w %d7,%d1 # parity of inner word count > lsl.w &2,%d1 # even ==> frag in %d0, odd ==> frag in %d1 > bsr genwid # generate for first word > and.w %d4,%d0 > L485: > cmp.w %d7,&2; ble.b L490 # inner dbr always falls through > btst &FSTORE,%d6; beq.b L490 # no conflict "mov field" vs. %d6 > short MOVWI+05300 # init inner count > mov.w %a4,%d6 > L490: > mov.l %a5,%a4 # top of inner loop > asr.w &1,%d7 # check inner word count > blt.b L540 # single .l does it all > bcc.b L500 # even > beq.b L520 # 1 > short MOVWI+05300 > br.b L500 # jump into middle of inner loop > add.l &1,%a4 # remember to fixup "br.b" > add.w &1,%d7 # middle entry ==> no dbr offset > L500: > beq.b L530 # no inner words at all > mov.l &4,%d1 # use %d1 in > bsr.b genwid # even half of inner loop > short 0 > L510: > mov.w %a4,%d0; neg.w %d0 > bclr &0,%d0; beq.b L520 > add.w %a5,%d0; mov.b %d0,(%a4)+ # fixup "br.b" into middle > L520: > mov.l &0,%d1 # use %d0 in > bsr.b genwid # odd half of inner loop > short 0 > sub.w &1,%d7 # offset for inner dbr loop > ble.b L530 # dbr always falls through > mov.w &DBR+6,(%a5)+ > sub.l %a5,%a4; mov.w %a4,(%a5)+ # dbr displacement > L530: > > btst &LDFRAG,%d6; beq.b L540 # omit "and" for full last word > mov.l &4,%d1 > bsr.b genwid > and.w %d5,%d0 > L540: > > tst.w %d7; ble.b L545 # no inner loop > btst &FSTORE,%d6; bne.b L545 # possible conflict "mov field" vs. %d6 > short MOVWI+05300 # init inner count > mov.w %a4,%d6 > L545: > swap.w %d3; tst.w %d3; beq.b L546 # wdst is full width of bitmap > mov.w %d3,%a1 # dst_dW > short MOVWI+05300 > add.w %a1,%a0 > L546: > swap.w %d2; tst.w %d2; beq.b L547 # wsrc is full width of bitmap > mov.w %d2,%a3 # src_dW > short MOVWI+05300 > add.w %a3,%a2 > L547: > mov.w &DBR+7,(%a5)+ > mov.l %sp,%a4 # top of outer loop > cmp.b (%a4),&0x60; bne.b L548 # not br.b > mov.b 1(%a4),%d0; ext.w %d0; lea.l 2(%a4,%d0.w),%a4 # collapse branches > L548: > sub.l %a5,%a4; mov.w %a4,(%a5)+ # dbr displacement > short MOVWI+05300 > jmp (%a5) > > mov.w %d7,%a4 # init inner count > mov.w %d7,%d6 # init inner count, 2nd case > swap.w %d7 # h-1 > lea.l $retgen(%pc),%a5 > jmp (%sp) > > genwid: > mov.l (%sp)+,%a1 # -> inline parameter > mov.l $genget(%pc,%d1.w),%d0 > tst.w %d1; beq.b L550; mov.w &01001,%d1; swap.w %d1 # parity bits > L550: > clr.w %d2; clr.w %d3 # .[wl] bits default to .w > tst.l %d6; bpl.b L560; add.w &010,%d0 # RtoL > L560: > tst.w %d6; bne.b L569 # not NOSHIFT > bclr &9,%d0 # NOSHIFT always %d0 > mov.w (%a1),%d1; bne.b L564 # not inner loop > btst &FSTORE,%d6; beq.b L562 # not "mov" > mov.l &070,%d1; and.w %d0,%d1 > lsl.w &3,%d1; or.w %d1,%d0 # copy RtoL mode > add.w &-0x1000,%d0 # .w ==> .l > mov.w %d0,(%a5)+ > L561: > jmp 2(%a1) > genget: > swap.w %d0; mov.w (%a2)+,%d0 > swap.w %d1; mov.w (%a2)+,%d1 > > L562: > mov.w &-0x1000,%d2; mov.w &0100,%d3 # .w +=> .l > add.w %d2,%d0 > L563: > mov.l &0,%d1 # NOSHIFT always %d0 > br L698 # assemble the fetch, then do the op > L564: > lsr.w &1,%d1; bcs.b L562 # NOSHIFT always LD2D > btst &FD2D,%d6; bne.b L562 > br.b L563 # alas, .w > L569: > mov.w (%a1),%d1; beq.b L630 # inner loop > L570: > lsr.w &1,%d1; bcs.b L580 # last word > add.w &-0x1000,%d0 # force fetch .l > mov.w %d0,(%a5)+ # the fetch .l > short MOVLI+00000 > mov.l %d0,%d1 > swap.w %d0 > clr.w %d1; eor.l %d1,%d0 # parity for mov.l %d[01],%d[10] > tst.l %d1; sne.b %d1; sub.b %d1,%d0 # parity for swap.w %d[01] > mov.l %d0,(%a5) # ran out of registers > mov.l &0x4c80ec,%d0 # microcoded bits > tst.l %d6; bpl.b L572; ror.l &1,%d0 # RtoL > L572: > tst.w %d6; bpl.b L574; ror.l &2,%d0 # rol > L574: > btst &FD2D,%d6; beq.b L576; ror.l &4,%d0 # first op .l > mov.w &-0x1000,%d2; mov.w &0100,%d3 # .w +=> .l corrections > L576: > ror.l &1,%d0; bpl.b L578 # "swap" not needed > add.l &2,%a5 > ror.l &8,%d0; bpl.b L577 # existing "swap" parity OK > eor.w &1,(%a5) > L577: > ror.l &8,%d0; bpl.b L578 # existing order OK > sub.l &2,%a5 > mov.l (%a5),%d0; swap.w %d0; mov.l %d0,(%a5) > add.l &2,%a5 > L578: > add.l &2,%a5 > swap.w %d1 # junk,,parity > br.b L690 > L580: > btst &LD2D,%d6; beq.b L630 # operator .w > mov.w &-0x1000,%d2 # mov.w +=> mov.l > mov.w &0100,%d3 # xxx.w +=> xxx.l > L630: > tst.l %d6; smi.b %d1 > eor.b %d6,%d1; bpl.b L650 # rotation in same direction as scan > swap.w %d0 # interchange "swap" and "mov" > L650: > mov.l %d0,(%a5)+ > > swap.w %d1 # junk,,parity > mov.w (%a1),%d0; lsr.w &1,%d0; bcs.b L660 # last word > short MOVWI+000 > mov.l %d0,%d1 > eor.w %d1,%d0 > mov.w %d0,(%a5)+ > br.b L690 > L660: > tst.l %d6; bmi.b L690 # RtoL > btst &LD2D,%d6; beq.b L690 # not .l > tst.w %d6; bpl.b L670 # ror > sub.l &2,%a5; br.b L690 # no "swap" > L670: > mov.w -4(%a5),(%a5)+ # extra "swap" > L690: > mov.w %a3,%d0 > eor.b %d1,%d0 > L698: > mov.w %d0,(%a5)+ # the rotate instruction > L700: > > mov.w (%a1),%d0; beq.b L730 # inner loop > btst &0,%d0; bne.b L705 # last word > btst &FDFRAG,%d6; beq.b L730 # no "and" > L705: > add.w %d3,%d0; add.w %d1,%d0; sub.b %d1,%d0 # and.[wl] %d[45],%d[01] > btst &FSTORE,%d6; beq.b L720 > # "mov" partial word > swap.w %d0 # save the "and" > short MOVWI+00000 # ,%d0 > mov.w (%a0),%d6 > add.w %d2,%d0 # mov.[wl] > tst.l %d6; bpl.b L710; add.w &020,%d0 # RtoL; "(%a0)" ==> "-(%a0)" > L710: > mov.w %d0,(%a5)+ # instr to fetch memory part of word > short MOVWI+00000 # ,%d0 > eor.w %d6,%d0 > add.w %d3,%d0; add.b %d1,%d0 # eor.[wl] %d6,%d[01] > swap.w %d0; mov.l %d0,(%a5)+; swap.w %d0; mov.w %d0,(%a5)+ > mov.w %d2,%d0; add.b %d1,%d0 # mov.[wl] %d[01], > mov.l &-0100,%d1 # RtoL correction, if necessary > br.b L770 > L720: > mov.w %d0,(%a5)+ # "and" for non-mov operators > L730: > mov.w 2(%a6),%d0; beq.b L740 # not F_CLR > add.w %d3,%d0; add.b %d1,%d0 # not.[wl] %d[01] > mov.w %d0,(%a5)+ > L740: > btst &FSTORE,%d6; beq.b L790 # non-"mov" > mov.w %d2,%d0; add.b %d1,%d0 # mov.[wl] %d[01], > mov.l &0100,%d1 # RtoL correction, if necessary > L770: > add.w (%a6),%d0 > tst.l %d6; bpl.b L780 > add.w %d1,%d0 # RtoL correction > L780: > mov.w %d0,(%a5)+ > jmp 2(%a1) > > L790: > mov.w %d1,%d0; clr.b %d0; add.w %d3,%d0 # xxx.[wl] %d[01] > mov.l &010,%d1 # RtoL correction, if necessary > br.b L770 > > # > # During execution > # %d[01] == rotator > # %d2 [reserved for texture bits] > # %d3 [reserved for texture index] > # %d4 == mask1 > # %d5 == mask2 > # %d6.w == inner count > # %d7.w == outer count > # %a0 -> dst > # %a1 == dst_dW > # %a2 -> src > # %a3 == src_dW > # %a4.w == inner count init > # %a5 -> retgen > # %a6 [reserved for -> texture] > # >