Re: [TUHS] A Reiser tour de force - Paul Ruizendaal via TUHS

The Unix Heritage Society mailing list
 help / color / mirror / Atom feed

From: Paul Ruizendaal via TUHS <tuhs@minnie.tuhs.org>
To: TUHS main list <tuhs@minnie.tuhs.org>
Subject: Re: [TUHS] A Reiser tour de force
Date: Sun, 3 Apr 2022 13:22:17 +0200	[thread overview]
Message-ID: <7521B7CB-E1C7-44C2-BBF4-A97F63082545@planet.nl> (raw)

[-- Attachment #1: Type: text/plain, Size: 12603 bytes --]

>> A not-very-thorough search at tuhs turned up V9/jerq/src/lib/j/bitblt.c
>> It appears to be a pre-Reiser bitblt, not what was asked for.
> 
> 
> The Reiser code is in the V8 jerq tarball that Dan Cross donated:
> v8jerq.tar.bz2 <https://www.tuhs.org/Archive/Distributions/Research/Dan_Cross_v8/v8jerq.tar.bz2>
> 
> It is in file blit/src/libj/bitblt.s (attached below for convenience). It is 750 lines of 68K assembler. It does not appear to have been ported to the Bellmac 32 CPU. Maybe it did not make sense in that context.
> 
> Paul

There also is a file “bitblt.C” in that same directory (dated May 82, versus Aug 82 for the assembler file) that seems to have a similar approach coded up in C, with a big switch where each case has a lot of “asm()” statements. It is about 400 lines long. No author is mentioned.

I have not investigated deeply, but at first glance it is possible that the approach was first coded up as a kind of threaded code in C (with inline asm) and later that Summer redone as fully hand coded assembler. Just a guess.

Paul

====

#include <jerq.h>
#define LEFTDIR	8
#define NOSHIFT 4
#define DAMMIT	4		/* you'll see why */
#undef	sw

bitblt(sm,r,dm,p,fc)
Bitmap *sm,*dm;
Rectangle r;
Point p;
int fc;
{
	register Word *source,*dest,*_source,*_dest;	/* %a2-%a5 */
	register UWord m,mask1,mask2;		/* %d2,%d3,%d4 */
	register int a,b,i;			/* %d5,%d6,%d7 */

	int j,h,w,dx1,sw,dw;

	/* clip to the destination Bitmap only */
#define	rp	dest
#define pp	source
	rp = (int *) &(dm->rect);
	pp = (int *) &p;
	if ((a = *rp++ - *pp++) > 0) {
		*(pp-1) += a;
		r.origin.x += a;
	}
	if ((a = *rp++ - *pp) > 0) {
		*pp += a;
		r.origin.y += a;
	}
	if ((a = r.origin.x + *rp++ - *(pp-1)) < r.corner.x)
		r.corner.x = a;
	if ((a = r.origin.y + *rp - *pp) < r.corner.y)
		r.corner.y = a;
	i = r.corner.y - r.origin.y;	/* going to be h */
	a = r.corner.x - r.origin.x - 1;	/* going to be dx1 */
	if (i <= 0 || a < 0)
		return;
	if (a < 16)
		goto narrow;
	h = i; 
	dx1 = a;		/* i and b are regs, avoid work! */
	sw = sm->width << 1;	/* sleazy hack to avoid shift */
	dw = dm->width << 1;	/* in outer, inner loops */
	w = ((p.x+dx1) >> 4) - (p.x >> 4) - 1;	/* inner loop */
	mask1 = ~topbits[p.x & 15];
	mask2 = topbits[((p.x+dx1) & 15) + 1];
	if (sm == dm) {		/* may have to mess with loop order */
		if (r.origin.y < p.y) {		/* swap top with bottom */
			r.origin.y += h-1;
			p.y += h-1;
			sw = -sw;
			dw = -dw;
		}
		if (r.origin.x < p.x) {	/* swap left with right */
			fc |= LEFTDIR;
			r.origin.x += dx1;
			p.x += dx1;
		}
	}
	_dest = addr(dm,p);
	_source = addr(sm,r.origin);
	a = (p.x&15) - (r.origin.x&15);
	if (a < 0)
		a += 16;
	else	/* a == 0 means no shift, remember that */
		_source--;	/* else grab long and shift right */
	b = 16 - a;
	if (a == 0)
		fc |= NOSHIFT;
	source = _source;
	dest = _dest;
	switch (fc) {
	case F_STORE | NOSHIFT:
		b = w;
		_source++;
		source = _source;
		a = h;		/* a is free => use it */
		do {
			*dest++ = (~mask1 & *dest) | (mask1 & *source++);
			if ((i = b>>2) > 0) do {
				*((long *)dest)++ = *((long *)source)++;
				*((long *)dest)++ = *((long *)source)++;
			} while (--i > 0);
			if ((i = b&3) > 0) do {
				*dest++ = *source++;
			} while (--i > 0);
			*dest = (~mask2 & *dest) | (mask2 & *source);
			(char *) _source += sw;
			source = _source;
			(char *) _dest += dw;
			dest = _dest;
		} while (--a > 0);
		break;
	case F_STORE:
		do {
asm("			mov.l	(%a2)+,%d2	# (long) m = *source++");
asm("			ror.l	%d5,%d2		# rotate m right by a");
			*dest++ = (~mask1 & *dest) | (mask1 & m);
asm("			ror.l	%d6,%d2		# rotate m right by b");
			if ((i = w) > 0) do {
				m = *source++;
asm("				ror.l	%d5,%d2 	# m >> a");
				*dest++ = m;
asm("				ror.l	%d6,%d2		# m >> b");
			} while (--i > 0);
			m = *source;
asm("			ror.l	%d5,%d2		# m >> a");
			*dest = (~mask2 & *dest) | (mask2 & m);
			(char *) _source += sw;
			source = _source;
			(char *) _dest += dw;
			dest = _dest;
		} while (--h > 0);
		break;
	case F_STORE | NOSHIFT | LEFTDIR:
		b = w;
		_source++;
		source = _source;
		a = h;
		do {
			*dest = (~mask2 & *dest) | (mask2 & *source);
			if ((i = b>>2) > 0) do {
				*--((long *)dest) = *--((long *)source);
				*--((long *)dest) = *--((long *)source);
			} while (--i > 0);
			if ((i = b&3) > 0) do {
				*(--dest) = *(--source);
			} while (--i > 0);
			dest--;
			*dest = (~mask1 & *dest) | (mask1 & *(--source));
			(char *) _source += sw;
			source = _source;
			(char *) _dest += dw;
			dest = _dest;
		} while (--a > 0);
		break;
	case F_STORE | LEFTDIR:
		do {
asm("			mov.l	(%a2),%d2	# (long) m = *source");
asm("			ror.l	%d5,%d2		# m >> a");
			*dest = (~mask2 & *dest) | (mask2 & m);
asm("			rol.l	%d5,%d2		# m << a");
			if ((i = w) > 0) do {
				m = *(--source);
asm("				rol.l	%d6,%d2		# m << b");
				*(--dest) = m;
asm("				rol.l	%d5,%d2		# m << a");
			} while (--i > 0);
			m = *(--source);
asm("			rol.l	%d6,%d2		# m << b");
			dest--;
			*dest = (~mask1 & *dest) | (mask1 & m);
			(char *) _source += sw;
			source = _source;
			(char *) _dest += dw;
			dest = _dest;
		} while (--h > 0);
		break;
	case F_OR:
	case F_OR | NOSHIFT:
		do {
asm("			mov.l	(%a2)+,%d2	# (long) m = *source++");
asm("			ror.l	%d5,%d2		# rotate m right by a");
			*dest++ |= (mask1 & m);
asm("			ror.l	%d6,%d2		# rotate m right by b");
			if ((i = w) > 0) do {
				m = *source++;
asm("				ror.l	%d5,%d2 	# m >> a");
				*dest++ |= m;
asm("				ror.l	%d6,%d2		# m >> b");
			} while (--i > 0);
			m = *source;
asm("			ror.l	%d5,%d2		# m >> a");
			*dest |= (mask2 & m);
			(char *) _source += sw;
			source = _source;
			(char *) _dest += dw;
			dest = _dest;
		} while (--h > 0);
		break;
	case F_OR | LEFTDIR:
	case F_OR | NOSHIFT | LEFTDIR:
		do {
asm("			mov.l	(%a2),%d2	# (long) m = *source");
asm("			ror.l	%d5,%d2		# m >> a");
			*dest |= (mask2 & m);
asm("			rol.l	%d5,%d2		# m << a");
			if ((i = w) > 0) do {
				m = *(--source);
asm("				rol.l	%d6,%d2		# m << b");
				*(--dest) |= m;
asm("				rol.l	%d5,%d2		# m << a");
			} while (--i > 0);
			m = *(--source);
asm("			rol.l	%d6,%d2		# m << b");
			dest--;
			*dest |= (mask1 & m);
			(char *) _source += sw;
			source = _source;
			(char *) _dest += dw;
			dest = _dest;
		} while (--h > 0);
		break;
	case F_CLR:
	case F_CLR | NOSHIFT:
		do {
asm("			mov.l	(%a2)+,%d2	# (long) m = *source++");
asm("			ror.l	%d5,%d2		# rotate m right by a");
			*dest++ &= ~(mask1 & m);
asm("			ror.l	%d6,%d2		# rotate m right by b");
			if ((i = w) > 0) do {
				m = *source++;
asm("				ror.l	%d5,%d2 	# m >> a");
asm("				not.w	%d2		# m = ~m");
				*dest++ &= m;
asm("				ror.l	%d6,%d2		# m >> b");
			} while (--i > 0);
			m = *source;
asm("			ror.l	%d5,%d2		# m >> a");
			*dest &= ~(mask2 & m);
			(char *) _source += sw;
			source = _source;
			(char *) _dest += dw;
			dest = _dest;
		} while (--h > 0);
		break;
	case F_CLR | LEFTDIR:
	case F_CLR | NOSHIFT | LEFTDIR:
		do {
asm("			mov.l	(%a2),%d2	# (long) m = *source");
asm("			ror.l	%d5,%d2		# m >> a");
			*dest &= ~(mask2 & m);
asm("			rol.l	%d5,%d2		# m << a");
			if ((i = w) > 0) do {
				m = *(--source);
asm("				rol.l	%d6,%d2		# m << b");
asm("				not.w	%d2		# m = ~m");
				*(--dest) &= m;
asm("				rol.l	%d5,%d2		# m << a");
			} while (--i > 0);
			m = *(--source);
asm("			rol.l	%d6,%d2		# m << b");
			dest--;
			*dest &= ~(mask1 & m);
			(char *) _source += sw;
			source = _source;
			(char *) _dest += dw;
			dest = _dest;
		} while (--h > 0);
		break;
	case F_XOR | NOSHIFT:
		b = w;
		_source++;
		source = _source;
		a = h;
		do {
			*dest++ ^= (mask1 & *source++);
			if ((i = b>>2) > 0) do {
				*((long *)dest)++ ^= *((long *)source)++;
				*((long *)dest)++ ^= *((long *)source)++;
			} while (--i > 0);
			if ((i = b&3) > 0) do {
				*dest++ ^= *source++;
			} while (--i > 0);
			*dest ^= (mask2 & *source);
			(char *) _source += sw;
			source = _source;
			(char *) _dest += dw;
			dest = _dest;
		} while (--a > 0);
		break;
	case F_XOR:
	default:
		do {
asm("			mov.l	(%a2)+,%d2	# (long) m = *source++");
asm("			ror.l	%d5,%d2		# rotate m right by a");
			*dest++ ^= (mask1 & m);
asm("			ror.l	%d6,%d2		# rotate m right by b");
			if ((i = w) > 0) do {
				m = *source++;
asm("				ror.l	%d5,%d2 	# m >> a");
				*dest++ ^= m;
asm("				ror.l	%d6,%d2		# m >> b");
			} while (--i > 0);
			m = *source;
asm("			ror.l	%d5,%d2		# m >> a");
			*dest ^= (mask2 & m);
			(char *) _source += sw;
			source = _source;
			(char *) _dest += dw;
			dest = _dest;
		} while (--h > 0);
		break;
	case F_XOR | NOSHIFT | LEFTDIR:
		b = w;
		_source++;
		source = _source;
		a = h;
		do {
			*dest ^= (mask2 & *source);
			if ((i = b>>2) > 0) do {
				*--((long *)dest) ^= *--((long *)source);
				*--((long *)dest) ^= *--((long *)source);
			} while (--i > 0);
			if ((i = b&3) > 0) do {
				*(--dest) ^= *(--source);
			} while (--i > 0);
			dest--;
			*dest ^= (mask1 & *(--source));
			(char *) _source += sw;
			source = _source;
			(char *) _dest += dw;
			dest = _dest;
		} while (--a > 0);
		break;
	case F_XOR | LEFTDIR:
		do {
asm("			mov.l	(%a2),%d2	# (long) m = *source");
asm("			ror.l	%d5,%d2		# m >> a");
			*dest ^= (mask2 & m);
asm("			rol.l	%d5,%d2		# m << a");
			if ((i = w) > 0) do {
				m = *(--source);
asm("				rol.l	%d6,%d2		# m << b");
				*(--dest) ^= m;
asm("				rol.l	%d5,%d2		# m << a");
			} while (--i > 0);
			m = *(--source);
asm("			rol.l	%d6,%d2		# m << b");
			dest--;
			*dest ^= (mask1 & m);
			(char *) _source += sw;
			source = _source;
			(char *) _dest += dw;
			dest = _dest;
		} while (--h > 0);
		break;
	}
	return;
narrow:
	/*
	 * width is 16 bits or less, so we can do it by reading and
	 * writing 32 bits at a time
	 */
	_source = (Word *) sm;
	_dest = (Word *) dm;
	mask1 = ((Bitmap *) _source)->width;	/* source increment */
	mask1 <<= 1;		/* hack to add to an address register */
	mask2 = ((Bitmap *) _dest)->width;	/* dest increment */
	mask2 <<= 1;
	if (_source == _dest && r.origin.y < p.y) {	/* swap top with bottom */
		r.origin.y += i-1;
		p.y += i-1;
		mask1 = -mask1;
		mask2 = -mask2;
	}
asm("	mov	&0,%d6		# (long) b = 0");
	b = topbits[a+1];
	a = (16 - (p.x & 15));	/* hocus pocus to get long mask */
asm("	rol.l	%d5,%d6		# (long) b <<= a");
	a = 16 - a - (r.origin.x & 15);	/* shift constant */
	if (a < 0) {		/* guess what! -1 == 63 to the 68000!!! */
		fc |= DAMMIT;	/* not fatal, just slow */
	}
	source = addr(_source,r.origin);
	dest = addr(_dest,p);
	switch (fc) {
	case F_STORE:
	case F_STORE | DAMMIT:
asm("		mov.l	%d6,%d1		# prepare inverse mask");
asm("		not.l	%d1		");
		do {
asm("			mov.l	(%a2),%d2	# m = *source");
asm("			ror.l	%d5,%d2		# rotate m right by a");
asm("			and.l	%d6,%d2		# m &= b");
asm("			mov.l	(%a3),%d0	# m |= *dest&~b");
asm("			and.l	%d1,%d0		");
asm("			or.l	%d0,%d2		");
asm("			mov.l	%d2,(%a3)	# *dest = m");
			(char *) source += (int) mask1;
			(char *) dest += (int) mask2;
		} while (--i > 0);
		break;
	case F_OR:
	case F_OR | DAMMIT:
		do {
asm("			mov.l	(%a2),%d2	# m = *source");
asm("			ror.l	%d5,%d2		# rotate m right by a");
asm("			and.l	%d6,%d2		# m &= b");
asm("			or.l	%d2,(%a3)	# *dest |= m");
			(char *) source += (int) mask1;
			(char *) dest += (int) mask2;
		} while (--i > 0);
		break;
	case F_CLR:
	case F_CLR | DAMMIT:
		do {
asm("			mov.l	(%a2),%d2	# m = *source");
asm("			ror.l	%d5,%d2		# rotate m right by a");
asm("			and.l	%d6,%d2		# m &= b");
asm("			not.l	%d2		# m = ^m");
asm("			and.l	%d2,(%a3)	# *dest &= m");
			(char *) source += (int) mask1;
			(char *) dest += (int) mask2;
		} while (--i > 0);
		break;
	case F_XOR:
		do {
asm("			mov.l	(%a2),%d2	# m = *source");
asm("			ror.l	%d5,%d2		# rotate m right by a");
asm("			and.l	%d6,%d2		# m &= b");
asm("			eor.l	%d2,(%a3)	# *dest ^= m");
			(char *) source += (int) mask1;
			(char *) dest += (int) mask2;
		} while (--i > 0);
		break;
	case F_XOR | DAMMIT:
		a = -a;
		do {
asm("			mov.l	(%a2),%d2	# m = *source");
asm("			rol.l	%d5,%d2		# rotate m left by a");
asm("			and.l	%d6,%d2		# m &= b");
asm("			eor.l	%d2,(%a3)	# *dest ^= m");
			(char *) source += (int) mask1;
			(char *) dest += (int) mask2;
		} while (--i > 0);
		break;
	}
	return;
}


[-- Attachment #2: Type: text/html, Size: 58167 bytes --]

next             reply	other threads:[~2022-04-03 11:24 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-03 11:22 Paul Ruizendaal via TUHS [this message]
2022-04-03 12:24 ` Rob Pike
  -- strict thread matches above, loose matches on Subject: below --
2022-04-01 15:59 Douglas McIlroy
2022-04-01 17:15 ` David Barto
2022-04-01 17:26   ` Jon Steinhart
2022-04-01 19:41     ` Steffen Nurpmeso
2022-04-01 21:29       ` Rob Pike
2022-04-01 21:31         ` Rob Pike
2022-04-01 21:43       ` Jon Steinhart

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7521B7CB-E1C7-44C2-BBF4-A97F63082545@planet.nl \
    --to=tuhs@minnie.tuhs.org \
    --cc=pnr@planet.nl \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).