From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=none autolearn=ham autolearn_force=no version=3.4.4 Received: (qmail 26730 invoked from network); 6 Sep 2021 16:56:26 -0000 Received: from 4ess.inri.net (216.126.196.42) by inbox.vuxu.org with ESMTPUTF8; 6 Sep 2021 16:56:26 -0000 Received: from mimir.eigenstate.org ([206.124.132.107]) by 4ess; Mon Sep 6 12:51:13 -0400 2021 Received: from abbatoir.myfiosgateway.com (pool-74-108-56-225.nycmny.fios.verizon.net [74.108.56.225]) by mimir.eigenstate.org (OpenSMTPD) with ESMTPSA id d46e0b26 (TLSv1.2:ECDHE-RSA-AES256-SHA:256:NO) for <9front@9front.org>; Mon, 6 Sep 2021 09:50:57 -0700 (PDT) Message-ID: <64D8C191930456E4CFA2972AF78976E2@eigenstate.org> To: 9front@9front.org Date: Mon, 06 Sep 2021 12:50:55 -0400 From: ori@eigenstate.org In-Reply-To: <6EE1AB2AE36BD707BA1C199CD4D1548C@eigenstate.org> MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit List-ID: <9front.9front.org> List-Help: X-Glyph: ➈ X-Bullshit: open virtualized cloud CSS ACPI extension shader deep-learning framework Subject: Re: [9front] [second attempt] git/query: improve LCA results Reply-To: 9front@9front.org Precedence: bulk Quoth ori@eigenstate.org: > This is an other attempt at solving the LCA problem with > git/query. It also refactors the object queue to use a > date-ordered priority queue, which in theory should be a > useful heuristic for branchy repos. > > More testing requested. One more revision, to add a missing ref() call. Running this on shithub right now, so if you see issues, let me know. diff d116b2d47ca4d0e5895c360f4cbcb51d6a19e0da uncommitted --- a/sys/src/cmd/git/git.h +++ b/sys/src/cmd/git/git.h @@ -18,6 +18,8 @@ typedef struct Objlist Objlist; typedef struct Dtab Dtab; typedef struct Dblock Dblock; +typedef struct Objq Objq; +typedef struct Qelt Qelt; enum { Pathmax = 512, @@ -151,6 +153,28 @@ int sz; }; +struct Qelt { + Object *o; + vlong mtime; + int color; + int dist; +}; + +struct Objq { + Hash *head; + Hash *tail; + int nhead; + int ntail; + + Qelt *heap; + int nheap; + int heapsz; + int nkeep; + + Object *best; + int dist; +}; + struct Dtab { Object *o; uchar *base; @@ -301,3 +325,9 @@ int readphase(Conn *); int writephase(Conn *); void closeconn(Conn *); + +/* queues */ +void qinit(Objq*); +void qclear(Objq*); +void qput(Objq*, Object*, int, int); +int qpop(Objq*, Qelt*); --- a/sys/src/cmd/git/log.c +++ b/sys/src/cmd/git/log.c @@ -15,10 +15,8 @@ char *commitid; int shortlog; -Object **heap; -int nheap; -int heapsz; Objset done; +Objq objq; Pfilt *pathfilt; void @@ -192,64 +190,10 @@ } static void -qput(Object *o) -{ - Object *p; - int i; - - if(oshas(&done, o->hash)) - return; - osadd(&done, o); - if(nheap == heapsz){ - heapsz *= 2; - heap = earealloc(heap, heapsz, sizeof(Object*)); - } - heap[nheap++] = o; - for(i = nheap - 1; i > 0; i = (i-1)/2){ - o = heap[i]; - p = heap[(i-1)/2]; - if(o->commit->mtime < p->commit->mtime) - break; - heap[i] = p; - heap[(i-1)/2] = o; - } -} - -static Object* -qpop(void) -{ - Object *o, *t; - int i, l, r, m; - - if(nheap == 0) - return nil; - - i = 0; - o = heap[0]; - t = heap[--nheap]; - heap[0] = t; - while(1){ - m = i; - l = 2*i+1; - r = 2*i+2; - if(l < nheap && heap[m]->commit->mtime < heap[l]->commit->mtime) - m = l; - if(r < nheap && heap[m]->commit->mtime < heap[r]->commit->mtime) - m = r; - else - break; - t = heap[m]; - heap[m] = heap[i]; - heap[i] = t; - i = m; - } - return o; -} - -static void showcommits(char *c) { Object *o, *p; + Qelt e; int i; Hash h; @@ -259,18 +203,20 @@ sysfatal("resolve %s: %r", c); if((o = readobject(h)) == nil) sysfatal("load %H: %r", h); - heapsz = 8; - heap = eamalloc(heapsz, sizeof(Object*)); + qinit(&objq); osinit(&done); - qput(o); - while((o = qpop()) != nil){ - show(o); - for(i = 0; i < o->commit->nparent; i++){ - if((p = readobject(o->commit->parent[i])) == nil) + qput(&objq, o, 0, 0); + while(qpop(&objq, &e)){ + show(e.o); + for(i = 0; i < e.o->commit->nparent; i++){ + if(oshas(&done, e.o->commit->parent[i])) + continue; + if((p = readobject(e.o->commit->parent[i])) == nil) sysfatal("load %H: %r", o->commit->parent[i]); - qput(p); + osadd(&done, p); + qput(&objq, p, 0, 0); } - unref(o); + unref(e.o); } } --- a/sys/src/cmd/git/query.c +++ b/sys/src/cmd/git/query.c @@ -158,6 +158,7 @@ char query[2048], repo[512]; ARGBEGIN{ + case 'd': chattygit++; break; case 'p': fullpath++; break; case 'c': changes++; break; case 'r': reverse ^= 1; break; --- a/sys/src/cmd/git/ref.c +++ b/sys/src/cmd/git/ref.c @@ -5,8 +5,6 @@ #include "git.h" typedef struct Eval Eval; -typedef struct XObject XObject; -typedef struct Objq Objq; enum { Blank, @@ -22,19 +20,12 @@ int stksz; }; -struct XObject { - Object *obj; - Object *mark; - XObject *queue; - XObject *next; +static char *colors[] = { +[Keep] "keep", +[Drop] "drop", +[Blank] "blank", }; -struct Objq { - Objq *next; - Object *o; - int color; -}; - static Object zcommit = { .type=GCommit }; @@ -128,113 +119,53 @@ return 1; } -XObject* -hnode(XObject *ht[], Object *o) +static int +repaint(Objset *keep, Objset *drop, Object *o) { - XObject *h; - int hh; + Objq objq; + Qelt e; + Object *p; + int i, r; - hh = o->hash.h[0] & 0xff; - for(h = ht[hh]; h; h = h->next) - if(hasheq(&o->hash, &h->obj->hash)) - return h; - - h = emalloc(sizeof(*h)); - h->obj = o; - h->mark = nil; - h->queue = nil; - h->next = ht[hh]; - ht[hh] = h; - return h; -} - -Object* -ancestor(Object *a, Object *b) -{ - Object *o, *p, *r; - XObject *ht[256]; - XObject *h, *q, *q1, *q2; - int i; - - if(a == b) - return a; - if(a == nil || b == nil) - return nil; - r = nil; - memset(ht, 0, sizeof(ht)); - q1 = nil; - - h = hnode(ht, a); - h->mark = a; - h->queue = q1; - q1 = h; - - h = hnode(ht, b); - h->mark = b; - h->queue = q1; - q1 = h; - - while(1){ - q2 = nil; - while(q = q1){ - q1 = q->queue; - q->queue = nil; - o = q->obj; - for(i = 0; i < o->commit->nparent; i++){ - p = readobject(o->commit->parent[i]); - if(p == nil) - goto err; - h = hnode(ht, p); - if(h->mark != nil){ - if(h->mark != q->mark){ - r = h->obj; - goto done; - } - } else { - h->mark = q->mark; - h->queue = q2; - q2 = h; - } + r = -1; + qinit(&objq); + if((o = readobject(o->hash)) == nil) + goto out; + qput(&objq, o, 0, 0); + while(qpop(&objq, &e)){ + o = e.o; + if(oshas(drop, o->hash)) + continue; + if(!oshas(keep, o->hash)){ + dprint(2, "repaint: blank => drop %H\n", o->hash); + osadd(drop, o); + continue; + } + for(i = 0; i < o->commit->nparent; i++){ + if(oshas(drop, o->commit->parent[i])) + continue; + if((p = readobject(o->commit->parent[i])) == nil) + goto out; + if(p->type != GCommit){ + fprint(2, "hash %H not commit\n", p->hash); + unref(p); } + qput(&objq, p, 0, 0); } - if(q2 == nil){ -err: - werrstr("no common ancestor"); - break; - } - q1 = q2; + unref(e.o); } -done: - for(i=0; inext; - free(h); - } - } + r = 0; +out: + qclear(&objq); return r; } -int -lca(Eval *ev) -{ - Object *a, *b, *o; - - if(ev->nstk < 2){ - werrstr("ancestor needs 2 objects"); - return -1; - } - a = pop(ev); - b = pop(ev); - o = ancestor(a, b); - if(o == nil) - return -1; - push(ev, o); - return 0; -} - +#ifdef NOPE static int repaint(Objset *keep, Objset *drop, Object *o) { + Objq objq; + Qelt e; Object *p; int i; @@ -245,33 +176,75 @@ } if(oshas(keep, o->hash)) dprint(2, "repaint: keep => drop %H\n", o->hash); - osadd(drop, o); - for(i = 0; i < o->commit->nparent; i++){ - if((p = readobject(o->commit->parent[i])) == nil) - return -1; - if(repaint(keep, drop, p) == -1) - return -1; - unref(p); + qinit(&objq); + qput(&objq, o, 0, 0); + while(qpop(&objq, &e)){ + osadd(drop, o); + for(i = 0; i < o->commit->nparent; i++){ + if((p = readobject(o->commit->parent[i])) == nil) + return -1; + if(oshas(drop, p->hash)) + goto next; + qput(&objq, p, 0, 0); +next: + unref(p); + } } return 0; } +#endif -int -findtwixt(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres) +static int +pickbest(Objq *q, Qelt *e, int color) { - Objq *q, *e, *n, **p; + int i, best, exact; + + best = 0; + exact = 0; + if(color == Blank || e->color == color) + return 0; + if(e->dist < q->dist){ + dprint(1, "found best (dist %d < %d): %H\n", e->dist, q->dist, e->o->hash); + best = 1; + } + for(i = 0; i < q->nhead; i++) + if(hasheq(&q->head[i], &e->o->hash)){ + dprint(1, "found best (exact head): %H\n", e->o->hash); + best = 1; + exact = 1; + } + for(i = 0; i < q->ntail; i++) + if(hasheq(&q->tail[i], &e->o->hash)){ + dprint(1, "found best (exact tail): %H\n", e->o->hash); + best = 1; + exact = 1; + } + if(best){ + q->best = e->o; + q->dist = e->dist; + } + return exact; +} + +static int +paint(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres, int ancestor) +{ + Qelt e; + Objq objq; Objset keep, drop; Object *o, *c; int i, ncolor; - e = nil; - q = nil; - p = &q; osinit(&keep); osinit(&drop); + qinit(&objq); + objq.head = head; + objq.nhead = nhead; + objq.tail = tail; + objq.ntail = ntail; + objq.dist = 1<<30; + for(i = 0; i < nhead; i++){ - if(hasheq(&head[i], &Zhash)) - continue; if((o = readobject(head[i])) == nil){ fprint(2, "warning: %H does not point at commit\n", o->hash); werrstr("read head %H: %r", head[i]); @@ -282,17 +255,11 @@ unref(o); continue; } - dprint(1, "twixt init: keep %H\n", o->hash); - e = emalloc(sizeof(Objq)); - e->o = o; - e->color = Keep; - *p = e; - p = &e->next; + dprint(1, "init: keep %H\n", o->hash); + qput(&objq, o, Keep, 0); unref(o); } for(i = 0; i < ntail; i++){ - if(hasheq(&tail[i], &Zhash)) - continue; if((o = readobject(tail[i])) == nil){ werrstr("read tail %H: %r", tail[i]); return -1; @@ -303,35 +270,33 @@ continue; } dprint(1, "init: drop %H\n", o->hash); - e = emalloc(sizeof(Objq)); - e->o = o; - e->color = Drop; - *p = e; - p = &e->next; + qput(&objq, o, Drop, 0); unref(o); } dprint(1, "finding twixt commits\n"); - while(q != nil){ - if(oshas(&drop, q->o->hash)) + while(qpop(&objq, &e)){ + if(oshas(&drop, e.o->hash)) ncolor = Drop; - else if(oshas(&keep, q->o->hash)) + else if(oshas(&keep, e.o->hash)) ncolor = Keep; else ncolor = Blank; - if(ncolor == Drop || ncolor == Keep && q->color == Keep) - goto next; - if(ncolor == Keep && q->color == Drop){ - if(repaint(&keep, &drop, q->o) == -1) + if(ancestor && pickbest(&objq, &e, ncolor)) + goto exactlca; + if(ncolor == Keep && e.color == Keep || ncolor == Drop) + continue; + if(ncolor == Keep && e.color == Drop){ + if(repaint(&keep, &drop, e.o)) goto error; }else if (ncolor == Blank) { - dprint(2, "visit: %s %H\n", q->color == Keep ? "keep" : "drop", q->o->hash); - if(q->color == Keep) - osadd(&keep, q->o); + if(e.color == Keep) + osadd(&keep, e.o); else - osadd(&drop, q->o); - for(i = 0; i < q->o->commit->nparent; i++){ - if((c = readobject(q->o->commit->parent[i])) == nil) + osadd(&drop, e.o); + o = readobject(e.o->hash); + for(i = 0; i < o->commit->nparent; i++){ + if((c = readobject(e.o->commit->parent[i])) == nil) goto error; if(c->type != GCommit){ fprint(2, "warning: %H does not point at commit\n", c->hash); @@ -338,40 +303,79 @@ unref(c); continue; } - dprint(2, "enqueue: %s %H\n", q->color == Keep ? "keep" : "drop", c->hash); - n = emalloc(sizeof(Objq)); - n->color = q->color; - n->next = nil; - n->o = c; - e->next = n; - e = n; + dprint(2, "\tenqueue: %s %H\n", colors[e.color], c->hash); + qput(&objq, c, e.color, e.dist+1); unref(c); } - }else{ - sysfatal("oops"); + unref(o); } -next: - n = q->next; - free(q); - q = n; } - *res = eamalloc(keep.nobj, sizeof(Object*)); - *nres = 0; - for(i = 0; i < keep.sz; i++){ - if(keep.obj[i] != nil && !oshas(&drop, keep.obj[i]->hash)){ - (*res)[*nres] = keep.obj[i]; - (*nres)++; + if(ancestor){ +exactlca: + if(objq.best == nil){ + *nres = 0; + *res = nil; + }else{ + *nres = 1; + *res = eamalloc(1, sizeof(Object*)); + (*res)[0] = objq.best; } + }else{ + *res = eamalloc(keep.nobj, sizeof(Object*)); + *nres = 0; + for(i = 0; i < keep.sz; i++){ + if(keep.obj[i] != nil && !oshas(&drop, keep.obj[i]->hash)){ + (*res)[*nres] = keep.obj[i]; + (*nres)++; + } + } } osclear(&keep); osclear(&drop); return 0; error: - for(; q != nil; q = n) { - n = q->next; - free(q); - } + free(objq.heap); return -1; +} + +int +findtwixt(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres) +{ + return paint(head, nhead, tail, ntail, res, nres, 0); +} + +Object* +ancestor(Object *a, Object *b) +{ + Object **o, *r; + int n; + + if(paint(&a->hash, 1, &b->hash, 1, &o, &n, 1) == -1 || n == 0) + return nil; + r = o[0]; + free(o); + return ref(r); +} + +int +lca(Eval *ev) +{ + Object *a, *b, **o; + int n; + + if(ev->nstk < 2){ + werrstr("ancestor needs 2 objects"); + return -1; + } + n = 0; + b = pop(ev); + a = pop(ev); + paint(&a->hash, 1, &b->hash, 1, &o, &n, 1); + if(n == 0) + return -1; + push(ev, *o); + free(o); + return 0; } static int --- a/sys/src/cmd/git/util.c +++ b/sys/src/cmd/git/util.c @@ -321,3 +321,75 @@ } return pct; } + +void +qinit(Objq *q) +{ + memset(q, 0, sizeof(Objq)); + q->nheap = 0; + q->heapsz = 8; + q->heap = eamalloc(q->heapsz, sizeof(Qelt)); +} + +void +qclear(Objq *q) +{ + free(q->heap); +} + +void +qput(Objq *q, Object *o, int color, int dist) +{ + Qelt a, b; + int i; + + if(q->nheap == q->heapsz){ + q->heapsz *= 2; + q->heap = earealloc(q->heap, q->heapsz, sizeof(Qelt)); + } + q->heap[q->nheap].o = o; + q->heap[q->nheap].color = color; + q->heap[q->nheap].dist = dist; + q->heap[q->nheap].mtime = o->commit->mtime; + q->nheap++; + for(i = q->nheap - 1; i > 0; i = (i-1)/2){ + a = q->heap[i]; + b = q->heap[(i-1)/2]; + if(a.mtime < b.mtime) + break; + q->heap[(i-1)/2] = a; + q->heap[i] = b; + } +} + +int +qpop(Objq *q, Qelt *e) +{ + int i, l, r, m; + Qelt t; + + if(q->nheap == 0) + return 0; + *e = q->heap[0]; + if(--q->nheap > 0){ + i = 0; + t = q->heap[q->nheap]; + q->heap[0] = t; + while(1){ + m = i; + l = 2*i+1; + r = 2*i+2; + if(l < q->nheap && q->heap[m].mtime < q->heap[l].mtime) + m = l; + if(r < q->nheap && q->heap[m].mtime < q->heap[r].mtime) + m = r; + else + break; + t = q->heap[m]; + q->heap[m] = q->heap[i]; + q->heap[i] = t; + i = m; + } + } + return 1; +}