9front - general discussion about 9front
 help / color / mirror / Atom feed
* [9front] [second attempt] git/query: improve LCA results
@ 2021-09-04 18:13 ori
  2021-09-06 16:50 ` ori
  0 siblings, 1 reply; 2+ messages in thread
From: ori @ 2021-09-04 18:13 UTC (permalink / raw)
  To: 9front

This is an other attempt at solving the LCA problem with
git/query. It also refactors the object queue to use a
date-ordered priority queue, which in theory should be a
useful heuristic for branchy repos.

More testing requested.

diff 3e7a249f146ce7fb3b7f482adc6b1e7f3aba5616 uncommitted
--- a/sys/src/cmd/git/git.h
+++ b/sys/src/cmd/git/git.h
@@ -18,6 +18,8 @@
 typedef struct Objlist	Objlist;
 typedef struct Dtab	Dtab;
 typedef struct Dblock	Dblock;
+typedef struct Objq	Objq;
+typedef struct Qelt	Qelt;
 
 enum {
 	Pathmax		= 512,
@@ -151,6 +153,28 @@
 	int	sz;
 };
 
+struct Qelt {
+	Object	*o;
+	vlong	mtime;
+	int	color;
+	int	dist;
+};
+
+struct Objq {
+	Hash	*head;
+	Hash	*tail;
+	int	nhead;
+	int	ntail;
+
+	Qelt	*heap;
+	int	nheap;
+	int	heapsz;
+	int	nkeep;
+
+	Object	*best;
+	int	dist;
+};
+
 struct Dtab {
 	Object	*o;
 	uchar	*base;
@@ -301,3 +325,9 @@
 int	readphase(Conn *);
 int	writephase(Conn *);
 void	closeconn(Conn *);
+
+/* queues */
+void	qinit(Objq*);
+void	qclear(Objq*);
+void	qput(Objq*, Object*, int, int);
+int	qpop(Objq*, Qelt*);
--- a/sys/src/cmd/git/log.c
+++ b/sys/src/cmd/git/log.c
@@ -15,10 +15,8 @@
 char	*commitid;
 int	shortlog;
 
-Object	**heap;
-int	nheap;
-int	heapsz;
 Objset	done;
+Objq	objq;
 Pfilt	*pathfilt;
 
 void
@@ -192,64 +190,10 @@
 }
 
 static void
-qput(Object *o)
-{
-	Object *p;
-	int i;
-
-	if(oshas(&done, o->hash))
-		return;
-	osadd(&done, o);
-	if(nheap == heapsz){
-		heapsz *= 2;
-		heap = earealloc(heap, heapsz, sizeof(Object*));
-	}
-	heap[nheap++] = o;
-	for(i = nheap - 1; i > 0; i = (i-1)/2){
-		o = heap[i];
-		p = heap[(i-1)/2];
-		if(o->commit->mtime < p->commit->mtime)
-			break;
-		heap[i] = p;
-		heap[(i-1)/2] = o;
-	}
-}
-
-static Object*
-qpop(void)
-{
-	Object *o, *t;
-	int i, l, r, m;
-
-	if(nheap == 0)
-		return nil;
-
-	i = 0;
-	o = heap[0];
-	t = heap[--nheap];
-	heap[0] = t;
-	while(1){
-		m = i;
-		l = 2*i+1;
-		r = 2*i+2;
-		if(l < nheap && heap[m]->commit->mtime < heap[l]->commit->mtime)
-			m = l;
-		if(r < nheap && heap[m]->commit->mtime < heap[r]->commit->mtime)
-			m = r;
-		else
-			break;
-		t = heap[m];
-		heap[m] = heap[i];
-		heap[i] = t;
-		i = m;
-	}
-	return o;
-}
-
-static void
 showcommits(char *c)
 {
 	Object *o, *p;
+	Qelt e;
 	int i;
 	Hash h;
 
@@ -259,18 +203,20 @@
 		sysfatal("resolve %s: %r", c);
 	if((o = readobject(h)) == nil)
 		sysfatal("load %H: %r", h);
-	heapsz = 8;
-	heap = eamalloc(heapsz, sizeof(Object*));
+	qinit(&objq);
 	osinit(&done);
-	qput(o);
-	while((o = qpop()) != nil){
-		show(o);
-		for(i = 0; i < o->commit->nparent; i++){
-			if((p = readobject(o->commit->parent[i])) == nil)
+	qput(&objq, o, 0, 0);
+	while(qpop(&objq, &e)){
+		show(e.o);
+		for(i = 0; i < e.o->commit->nparent; i++){
+			if(oshas(&done, e.o->commit->parent[i]))
+				continue;
+			if((p = readobject(e.o->commit->parent[i])) == nil)
 				sysfatal("load %H: %r", o->commit->parent[i]);
-			qput(p);
+			osadd(&done, p);
+			qput(&objq, p, 0, 0);
 		}
-		unref(o);
+		unref(e.o);
 	}
 }
 
--- a/sys/src/cmd/git/query.c
+++ b/sys/src/cmd/git/query.c
@@ -158,6 +158,7 @@
 	char query[2048], repo[512];
 
 	ARGBEGIN{
+	case 'd':	chattygit++;	break;
 	case 'p':	fullpath++;	break;
 	case 'c':	changes++;	break;
 	case 'r':	reverse ^= 1;	break;
--- a/sys/src/cmd/git/ref.c
+++ b/sys/src/cmd/git/ref.c
@@ -5,8 +5,6 @@
 #include "git.h"
 
 typedef struct Eval	Eval;
-typedef struct XObject	XObject;
-typedef struct Objq	Objq;
 
 enum {
 	Blank,
@@ -22,19 +20,12 @@
 	int	stksz;
 };
 
-struct XObject {
-	Object	*obj;
-	Object	*mark;
-	XObject	*queue;
-	XObject	*next;
+static char *colors[] = {
+[Keep] "keep",
+[Drop] "drop",
+[Blank] "blank",
 };
 
-struct Objq {
-	Objq	*next;
-	Object	*o;
-	int	color;
-};
-
 static Object zcommit = {
 	.type=GCommit
 };
@@ -128,113 +119,53 @@
 	return 1;
 }
 
-XObject*
-hnode(XObject *ht[], Object *o)
+static int
+repaint(Objset *keep, Objset *drop, Object *o)
 {
-	XObject *h;
-	int	hh;
+	Objq objq;
+	Qelt e;
+	Object *p;
+	int i, r;
 
-	hh = o->hash.h[0] & 0xff;
-	for(h = ht[hh]; h; h = h->next)
-		if(hasheq(&o->hash, &h->obj->hash))
-			return h;
-
-	h = emalloc(sizeof(*h));
-	h->obj = o;
-	h->mark = nil;
-	h->queue = nil;
-	h->next = ht[hh];
-	ht[hh] = h;
-	return h;
-}
-
-Object*
-ancestor(Object *a, Object *b)
-{
-	Object *o, *p, *r;
-	XObject *ht[256];
-	XObject *h, *q, *q1, *q2;
-	int i;
-
-	if(a == b)
-		return a;
-	if(a == nil || b == nil)
-		return nil;
-	r = nil;
-	memset(ht, 0, sizeof(ht));
-	q1 = nil;
-
-	h = hnode(ht, a);
-	h->mark = a;
-	h->queue = q1;
-	q1 = h;
-
-	h = hnode(ht, b);
-	h->mark = b;
-	h->queue = q1;
-	q1 = h;
-
-	while(1){
-		q2 = nil;
-		while(q = q1){
-			q1 = q->queue;
-			q->queue = nil;
-			o = q->obj;
-			for(i = 0; i < o->commit->nparent; i++){
-				p = readobject(o->commit->parent[i]);
-				if(p == nil)
-					goto err;
-				h = hnode(ht, p);
-				if(h->mark != nil){
-					if(h->mark != q->mark){
-						r = h->obj;
-						goto done;
-					}
-				} else {
-					h->mark = q->mark;
-					h->queue = q2;
-					q2 = h;
-				}
+	r = -1;
+	qinit(&objq);
+	if((o = readobject(o->hash)) == nil)
+		goto out;
+	qput(&objq, o, 0, 0);
+	while(qpop(&objq, &e)){
+		o = e.o;
+		if(oshas(drop, o->hash))
+			continue;
+		if(!oshas(keep, o->hash)){
+			dprint(2, "repaint: blank => drop %H\n", o->hash);
+			osadd(drop, o);
+			continue;
+		}
+		for(i = 0; i < o->commit->nparent; i++){
+			if(oshas(drop, o->commit->parent[i]))
+				continue;
+			if((p = readobject(o->commit->parent[i])) == nil)
+				goto out;
+			if(p->type != GCommit){
+				fprint(2, "hash %H not commit\n", p->hash);
+				unref(p);
 			}
+			qput(&objq, p, 0, 0);
 		}
-		if(q2 == nil){
-err:
-			werrstr("no common ancestor");
-			break;
-		}
-		q1 = q2;
+		unref(e.o);
 	}
-done:
-	for(i=0; i<nelem(ht); i++){
-		while(h = ht[i]){
-			ht[i] = h->next;
-			free(h);
-		}
-	}
+	r = 0;
+out:
+	qclear(&objq);
 	return r;
 }
 
-int
-lca(Eval *ev)
-{
-	Object *a, *b, *o;
-
-	if(ev->nstk < 2){
-		werrstr("ancestor needs 2 objects");
-		return -1;
-	}
-	a = pop(ev);
-	b = pop(ev);
-	o = ancestor(a, b);
-	if(o == nil)
-		return -1;
-	push(ev, o);
-	return 0;
-}
-
+#ifdef NOPE
 static int
 repaint(Objset *keep, Objset *drop, Object *o)
 {
+	Objq objq;
+	Qelt e;
 	Object *p;
 	int i;
 
@@ -245,33 +176,75 @@
 	}
 	if(oshas(keep, o->hash))
 		dprint(2, "repaint: keep => drop %H\n", o->hash);
-	osadd(drop, o);
-	for(i = 0; i < o->commit->nparent; i++){
-		if((p = readobject(o->commit->parent[i])) == nil)
-			return -1;
-		if(repaint(keep, drop, p) == -1)
-			return -1;
-		unref(p);
+	qinit(&objq);
+	qput(&objq, o, 0, 0);
+	while(qpop(&objq, &e)){
+		osadd(drop, o);
+		for(i = 0; i < o->commit->nparent; i++){
+			if((p = readobject(o->commit->parent[i])) == nil)
+				return -1;
+			if(oshas(drop, p->hash))
+				goto next;
+			qput(&objq, p, 0, 0);
+next:
+			unref(p);
+		}
 	}
 	return 0;
 }
+#endif
 
-int
-findtwixt(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres)
+static int
+pickbest(Objq *q, Qelt *e, int color)
 {
-	Objq *q, *e, *n, **p;
+	int i, best, exact;
+
+	best = 0;
+	exact = 0;
+	if(color == Blank || e->color == color)
+		return 0;
+	if(e->dist < q->dist){
+		dprint(1, "found best (dist %d < %d): %H\n", e->dist, q->dist, e->o->hash);
+		best = 1;
+	}
+	for(i = 0; i < q->nhead; i++)
+		if(hasheq(&q->head[i], &e->o->hash)){
+			dprint(1, "found best (exact head): %H\n", e->o->hash);
+			best = 1;
+			exact = 1;
+		}
+	for(i = 0; i < q->ntail; i++)
+		if(hasheq(&q->tail[i], &e->o->hash)){
+			dprint(1, "found best (exact tail): %H\n", e->o->hash);
+			best = 1;
+			exact = 1;
+		}
+	if(best){
+		q->best = e->o;
+		q->dist = e->dist;
+	}
+	return exact;
+}
+
+static int
+paint(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres, int ancestor)
+{
+	Qelt e;
+	Objq objq;
 	Objset keep, drop;
 	Object *o, *c;
 	int i, ncolor;
 
-	e = nil;
-	q = nil;
-	p = &q;
 	osinit(&keep);
 	osinit(&drop);
+	qinit(&objq);
+	objq.head = head;
+	objq.nhead = nhead;
+	objq.tail = tail;
+	objq.ntail = ntail;
+	objq.dist = 1<<30;
+
 	for(i = 0; i < nhead; i++){
-		if(hasheq(&head[i], &Zhash))
-			continue;
 		if((o = readobject(head[i])) == nil){
 			fprint(2, "warning: %H does not point at commit\n", o->hash);
 			werrstr("read head %H: %r", head[i]);
@@ -282,17 +255,11 @@
 			unref(o);
 			continue;
 		}
-		dprint(1, "twixt init: keep %H\n", o->hash);
-		e = emalloc(sizeof(Objq));
-		e->o = o;
-		e->color = Keep;
-		*p = e;
-		p = &e->next;
+		dprint(1, "init: keep %H\n", o->hash);
+		qput(&objq, o, Keep, 0);
 		unref(o);
 	}		
 	for(i = 0; i < ntail; i++){
-		if(hasheq(&tail[i], &Zhash))
-			continue;
 		if((o = readobject(tail[i])) == nil){
 			werrstr("read tail %H: %r", tail[i]);
 			return -1;
@@ -303,35 +270,33 @@
 			continue;
 		}
 		dprint(1, "init: drop %H\n", o->hash);
-		e = emalloc(sizeof(Objq));
-		e->o = o;
-		e->color = Drop;
-		*p = e;
-		p = &e->next;
+		qput(&objq, o, Drop, 0);
 		unref(o);
 	}
 
 	dprint(1, "finding twixt commits\n");
-	while(q != nil){
-		if(oshas(&drop, q->o->hash))
+	while(qpop(&objq, &e)){
+		if(oshas(&drop, e.o->hash))
 			ncolor = Drop;
-		else if(oshas(&keep, q->o->hash))
+		else if(oshas(&keep, e.o->hash))
 			ncolor = Keep;
 		else
 			ncolor = Blank;
-		if(ncolor == Drop || ncolor == Keep && q->color == Keep)
-			goto next;
-		if(ncolor == Keep && q->color == Drop){
-			if(repaint(&keep, &drop, q->o) == -1)
+		if(ancestor && pickbest(&objq, &e, ncolor))
+			goto exactlca;
+		if(ncolor == Keep && e.color == Keep || ncolor == Drop)
+			continue;
+		if(ncolor == Keep && e.color == Drop){
+			if(repaint(&keep, &drop, e.o))
 				goto error;
 		}else if (ncolor == Blank) {
-			dprint(2, "visit: %s %H\n", q->color == Keep ? "keep" : "drop", q->o->hash);
-			if(q->color == Keep)
-				osadd(&keep, q->o);
+			if(e.color == Keep)
+				osadd(&keep, e.o);
 			else
-				osadd(&drop, q->o);
-			for(i = 0; i < q->o->commit->nparent; i++){
-				if((c = readobject(q->o->commit->parent[i])) == nil)
+				osadd(&drop, e.o);
+			o = readobject(e.o->hash);
+			for(i = 0; i < o->commit->nparent; i++){
+				if((c = readobject(e.o->commit->parent[i])) == nil)
 					goto error;
 				if(c->type != GCommit){
 					fprint(2, "warning: %H does not point at commit\n", c->hash);
@@ -338,40 +303,79 @@
 					unref(c);
 					continue;
 				}
-				dprint(2, "enqueue: %s %H\n", q->color == Keep ? "keep" : "drop", c->hash);
-				n = emalloc(sizeof(Objq));
-				n->color = q->color;
-				n->next = nil;
-				n->o = c;
-				e->next = n;
-				e = n;
+				dprint(2, "\tenqueue: %s %H\n", colors[e.color], c->hash);
+				qput(&objq, c, e.color, e.dist+1);
 				unref(c);
 			}
-		}else{
-			sysfatal("oops");
+			unref(o);
 		}
-next:
-		n = q->next;
-		free(q);
-		q = n;
 	}
-	*res = eamalloc(keep.nobj, sizeof(Object*));
-	*nres = 0;
-	for(i = 0; i < keep.sz; i++){
-		if(keep.obj[i] != nil && !oshas(&drop, keep.obj[i]->hash)){
-			(*res)[*nres] = keep.obj[i];
-			(*nres)++;
+	if(ancestor){
+exactlca:
+		if(objq.best == nil){
+			*nres = 0;
+			*res = nil;
+		}else{
+			*nres = 1;
+			*res = eamalloc(1, sizeof(Object*));
+			(*res)[0] = objq.best;
 		}
+	}else{
+		*res = eamalloc(keep.nobj, sizeof(Object*));
+		*nres = 0;
+		for(i = 0; i < keep.sz; i++){
+			if(keep.obj[i] != nil && !oshas(&drop, keep.obj[i]->hash)){
+				(*res)[*nres] = keep.obj[i];
+				(*nres)++;
+			}
+		}
 	}
 	osclear(&keep);
 	osclear(&drop);
 	return 0;
 error:
-	for(; q != nil; q = n) {
-		n = q->next;
-		free(q);
-	}
+	free(objq.heap);
 	return -1;
+}
+
+int
+findtwixt(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres)
+{
+	return paint(head, nhead, tail, ntail, res, nres, 0);
+}
+
+Object*
+ancestor(Object *a, Object *b)
+{
+	Object **o, *r;
+	int n;
+
+	if(paint(&a->hash, 1, &b->hash, 1, &o, &n, 1) == -1 || n == 0)
+		return nil;
+	r = o[0];
+	free(o);
+	return r;
+}
+
+int
+lca(Eval *ev)
+{
+	Object *a, *b, **o;
+	int n;
+
+	if(ev->nstk < 2){
+		werrstr("ancestor needs 2 objects");
+		return -1;
+	}
+	n = 0;
+	b = pop(ev);
+	a = pop(ev);
+	paint(&a->hash, 1, &b->hash, 1, &o, &n, 1);
+	if(n == 0)
+		return -1;
+	push(ev, *o);
+	free(o);
+	return 0;
 }
 
 static int
--- a/sys/src/cmd/git/util.c
+++ b/sys/src/cmd/git/util.c
@@ -321,3 +321,75 @@
 	}
 	return pct;
 }
+
+void
+qinit(Objq *q)
+{
+	memset(q, 0, sizeof(Objq));
+	q->nheap = 0;
+	q->heapsz = 8;
+	q->heap = eamalloc(q->heapsz, sizeof(Qelt));
+}
+
+void
+qclear(Objq *q)
+{
+	free(q->heap);
+}
+
+void
+qput(Objq *q, Object *o, int color, int dist)
+{
+	Qelt a, b;
+	int i;
+
+	if(q->nheap == q->heapsz){
+		q->heapsz *= 2;
+		q->heap = earealloc(q->heap, q->heapsz, sizeof(Qelt));
+	}
+	q->heap[q->nheap].o = o;
+	q->heap[q->nheap].color = color;
+	q->heap[q->nheap].dist = dist;
+	q->heap[q->nheap].mtime = o->commit->mtime;
+	q->nheap++;
+	for(i = q->nheap - 1; i > 0; i = (i-1)/2){
+		a = q->heap[i];
+		b = q->heap[(i-1)/2];
+		if(a.mtime < b.mtime)
+			break;
+		q->heap[(i-1)/2] = a;
+		q->heap[i] = b;
+	}
+}
+
+int
+qpop(Objq *q, Qelt *e)
+{
+	int i, l, r, m;
+	Qelt t;
+
+	if(q->nheap == 0)
+		return 0;
+	*e = q->heap[0];
+	if(--q->nheap > 0){
+		i = 0;
+		t = q->heap[q->nheap];
+		q->heap[0] = t;
+		while(1){
+			m = i;
+			l = 2*i+1;
+			r = 2*i+2;
+			if(l < q->nheap && q->heap[m].mtime < q->heap[l].mtime)
+				m = l;
+			if(r < q->nheap && q->heap[m].mtime < q->heap[r].mtime)
+				m = r;
+			else
+				break;
+			t = q->heap[m];
+			q->heap[m] = q->heap[i];
+			q->heap[i] = t;
+			i = m;
+		}
+	}
+	return 1;
+}


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-09-06 16:56 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-04 18:13 [9front] [second attempt] git/query: improve LCA results ori
2021-09-06 16:50 ` ori

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).