From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: * X-Spam-Status: No, score=1.3 required=5.0 tests=RDNS_NONE autolearn=no autolearn_force=no version=3.4.4 Received: (qmail 8517 invoked from network); 29 Aug 2021 02:11:54 -0000 Received: from unknown (HELO 4ess.inri.net) (216.126.196.42) by inbox.vuxu.org with ESMTPUTF8; 29 Aug 2021 02:11:54 -0000 Received: from mimir.eigenstate.org ([206.124.132.107]) by 4ess; Sat Aug 28 22:04:31 -0400 2021 Received: from abbatoir.myfiosgateway.com (pool-74-108-56-225.nycmny.fios.verizon.net [74.108.56.225]) by mimir.eigenstate.org (OpenSMTPD) with ESMTPSA id d8613f35 (TLSv1.2:ECDHE-RSA-AES256-SHA:256:NO) for <9front@9front.org>; Sat, 28 Aug 2021 19:04:15 -0700 (PDT) Message-ID: To: 9front@9front.org Date: Sat, 28 Aug 2021 22:04:14 -0400 From: ori@eigenstate.org MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit List-ID: <9front.9front.org> List-Help: X-Glyph: ➈ X-Bullshit: object-oriented hypervisor Subject: [9front] git/query: improve LCA results Reply-To: 9front@9front.org Precedence: bulk Currently, git/pull can request you to do spurious merges. We actually ran into this once in the 9front repo: commit 185fe31de404b67441248d91e6321645278ba92a will request a supurious merge if you pull from it. This is because when faced with a commit graph like this: ---a---b---c---d---e \ / +-----f----+ and we want to find the least common ancestor of commits 'b' and 'e', we will return commit 'a'. This is technically the correct answer: we can get to 'a' with 1 step from 'e', and 2 from 'e', for a total of 3 steps, which is shorter than the number of steps to get from 'e' to 'b' directly However, for every practical purpose of the LCA algorithm, if a node is an ancestor of another, we want to return that ancestor, rather than the node which can be reached from all parents in the smallest number of steps. This means that we need to keep scanning until we've visited all potential ancestors between the two nodes, and picking the shortest distance between nodes reachable from 'e' and the nodes reachable from 'b'. We already have code that almost does this, in our findtwixt() code, painting backwards from a set of 'head' and 'tail' nodes in parallel until there are no more nodes reachable from 'head' without crossing over 'tail'. Since we paint until all nodes reachable from head are painted, we're guaranted to encounter the ancestor we want to return if it's present. This change takes findtwixt() and refactors it into a paint function which, when passed 'ancestor=1', will return the lca rather than the full set of covered nodes. Along with these changes are a few refactors to prevent exponential blowups in time when repainting a graph of mostly-done nodes, by bailing out early: this made pulling git (commits a5fa49ff0 => c4203212e36) take tens of seconnds instead of more than tens of minutes. As a bonus, we delete some code by doing this refactor. Since bugs here can lead to issues sysupdating, I'd like some extra eyeballs, testing, and review on this before I commit. diff e6f0abc25e4e237fff97ef9bdd1dc98ee519def8 uncommitted --- a/sys/src/cmd/git/ref.c +++ b/sys/src/cmd/git/ref.c @@ -22,17 +22,11 @@ int stksz; }; -struct XObject { - Object *obj; - Object *mark; - XObject *queue; - XObject *next; -}; - struct Objq { Objq *next; Object *o; int color; + int dist; }; static Object zcommit = { @@ -128,110 +122,6 @@ return 1; } -XObject* -hnode(XObject *ht[], Object *o) -{ - XObject *h; - int hh; - - hh = o->hash.h[0] & 0xff; - for(h = ht[hh]; h; h = h->next) - if(hasheq(&o->hash, &h->obj->hash)) - return h; - - h = emalloc(sizeof(*h)); - h->obj = o; - h->mark = nil; - h->queue = nil; - h->next = ht[hh]; - ht[hh] = h; - return h; -} - -Object* -ancestor(Object *a, Object *b) -{ - Object *o, *p, *r; - XObject *ht[256]; - XObject *h, *q, *q1, *q2; - int i; - - if(a == b) - return a; - if(a == nil || b == nil) - return nil; - r = nil; - memset(ht, 0, sizeof(ht)); - q1 = nil; - - h = hnode(ht, a); - h->mark = a; - h->queue = q1; - q1 = h; - - h = hnode(ht, b); - h->mark = b; - h->queue = q1; - q1 = h; - - while(1){ - q2 = nil; - while(q = q1){ - q1 = q->queue; - q->queue = nil; - o = q->obj; - for(i = 0; i < o->commit->nparent; i++){ - p = readobject(o->commit->parent[i]); - if(p == nil) - goto err; - h = hnode(ht, p); - if(h->mark != nil){ - if(h->mark != q->mark){ - r = h->obj; - goto done; - } - } else { - h->mark = q->mark; - h->queue = q2; - q2 = h; - } - } - } - if(q2 == nil){ -err: - werrstr("no common ancestor"); - break; - } - q1 = q2; - } -done: - for(i=0; inext; - free(h); - } - } - return r; -} - -int -lca(Eval *ev) -{ - Object *a, *b, *o; - - if(ev->nstk < 2){ - werrstr("ancestor needs 2 objects"); - return -1; - } - a = pop(ev); - b = pop(ev); - o = ancestor(a, b); - if(o == nil) - return -1; - push(ev, o); - return 0; -} - static int repaint(Objset *keep, Objset *drop, Object *o) { @@ -249,29 +139,57 @@ for(i = 0; i < o->commit->nparent; i++){ if((p = readobject(o->commit->parent[i])) == nil) return -1; + if(oshas(drop, p->hash)) + goto next; if(repaint(keep, drop, p) == -1) return -1; +next: unref(p); } return 0; } -int -findtwixt(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres) +static int +haskeep(Objq *q) { + for(; q != nil; q = q->next) + if(q->color == Keep) + return 1; + return 0; +} + +static int +bestpick(Hash *head, int nhead, Hash *tail, int ntail, Objq *q, int dist) +{ + int i; + + if(q->dist < dist) + return 1; + for(i = 0; i < nhead; i++) + if(hasheq(&head[i], &q->o->hash)) + return 1; + for(i = 0; i < ntail; i++) + if(hasheq(&tail[i], &q->o->hash)) + return 1; + return 0; +} + +static int +paint(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres, int ancestor) +{ Objq *q, *e, *n, **p; Objset keep, drop; - Object *o, *c; - int i, ncolor; + Object *o, *c, *best; + int i, dist, ncolor; e = nil; q = nil; p = &q; + best = nil; + dist = 1<<30; osinit(&keep); osinit(&drop); for(i = 0; i < nhead; i++){ - if(hasheq(&head[i], &Zhash)) - continue; if((o = readobject(head[i])) == nil){ fprint(2, "warning: %H does not point at commit\n", o->hash); werrstr("read head %H: %r", head[i]); @@ -286,13 +204,12 @@ e = emalloc(sizeof(Objq)); e->o = o; e->color = Keep; + e->dist = 0; *p = e; p = &e->next; unref(o); } for(i = 0; i < ntail; i++){ - if(hasheq(&tail[i], &Zhash)) - continue; if((o = readobject(tail[i])) == nil){ werrstr("read tail %H: %r", tail[i]); return -1; @@ -306,6 +223,7 @@ e = emalloc(sizeof(Objq)); e->o = o; e->color = Drop; + e->dist = 0; *p = e; p = &e->next; unref(o); @@ -312,7 +230,7 @@ } dprint(1, "finding twixt commits\n"); - while(q != nil){ + while(haskeep(q)){ if(oshas(&drop, q->o->hash)) ncolor = Drop; else if(oshas(&keep, q->o->hash)) @@ -319,18 +237,36 @@ ncolor = Keep; else ncolor = Blank; - if(ncolor == Drop || ncolor == Keep && q->color == Keep) + dprint(2, "visit: ncolor=%s, qcolor=%s %H\n", + ncolor == Keep ? "keep" : "drop", + q->color == Keep ? "keep" : "drop", + q->o->hash); + if(ncolor == Keep && q->color == Keep){ goto next; - if(ncolor == Keep && q->color == Drop){ + }else if(ncolor == Drop){ + dprint(2, "consider: best %H => %H (dist: %d => %d)\n", best ? best->hash : Zhash, q->o->hash, dist, q->dist); + if(q->color == Keep && bestpick(head, nhead, tail, ntail, q, dist)){ + dprint(2, "ancestor: best %H => %H (dist: %d => %d)\n", best ? best->hash : Zhash, q->o->hash, dist, q->dist); + best = q->o; + dist = q->dist; + } + }else if(ncolor == Keep && q->color == Drop){ + dprint(2, "consider: best %H => %H (dist: %d => %d)\n", best ? best->hash : Zhash, q->o->hash, dist, q->dist); + if(bestpick(head, nhead, tail, ntail, q, dist)){ + //q->dist < dist || hasheq(head, &q->o->hash)){ + dprint(2, "ancestor: best %H => %H (dist: %d => %d)\n", best->hash, q->o->hash, dist, q->dist); + best = q->o; + dist = q->dist; + } if(repaint(&keep, &drop, q->o) == -1) goto error; }else if (ncolor == Blank) { - dprint(2, "visit: %s %H\n", q->color == Keep ? "keep" : "drop", q->o->hash); if(q->color == Keep) osadd(&keep, q->o); else osadd(&drop, q->o); - for(i = 0; i < q->o->commit->nparent; i++){ + o = readobject(q->o->hash); + for(i = 0; i < o->commit->nparent; i++){ if((c = readobject(q->o->commit->parent[i])) == nil) goto error; if(c->type != GCommit){ @@ -341,6 +277,7 @@ dprint(2, "enqueue: %s %H\n", q->color == Keep ? "keep" : "drop", c->hash); n = emalloc(sizeof(Objq)); n->color = q->color; + n->dist = q->dist + 1; n->next = nil; n->o = c; e->next = n; @@ -347,8 +284,9 @@ e = n; unref(c); } + unref(o); }else{ - sysfatal("oops"); + sysfatal("oops: ncolor==%d && qcolor==%d", ncolor, q->color); } next: n = q->next; @@ -355,13 +293,24 @@ free(q); q = n; } - *res = eamalloc(keep.nobj, sizeof(Object*)); - *nres = 0; - for(i = 0; i < keep.sz; i++){ - if(keep.obj[i] != nil && !oshas(&drop, keep.obj[i]->hash)){ - (*res)[*nres] = keep.obj[i]; - (*nres)++; + if(ancestor){ + if(best == nil){ + *nres = 0; + *res = nil; + }else{ + *nres = 1; + *res = eamalloc(1, sizeof(Object*)); + (*res)[0] = best; } + }else{ + *res = eamalloc(keep.nobj, sizeof(Object*)); + *nres = 0; + for(i = 0; i < keep.sz; i++){ + if(keep.obj[i] != nil && !oshas(&drop, keep.obj[i]->hash)){ + (*res)[*nres] = keep.obj[i]; + (*nres)++; + } + } } osclear(&keep); osclear(&drop); @@ -372,6 +321,46 @@ free(q); } return -1; +} + +int +findtwixt(Hash *head, int nhead, Hash *tail, int ntail, Object ***res, int *nres) +{ + return paint(head, nhead, tail, ntail, res, nres, 0); +} + +Object* +ancestor(Object *a, Object *b) +{ + Object **o, *r; + int n; + + if(paint(&a->hash, 1, &b->hash, 1, &o, &n, 1) == -1 || n == 0) + return nil; + r = o[0]; + free(o); + return r; +} + +int +lca(Eval *ev) +{ + Object *a, *b, **o; + int n; + + if(ev->nstk < 2){ + werrstr("ancestor needs 2 objects"); + return -1; + } + n = 0; + a = pop(ev); + b = pop(ev); + paint(&a->hash, 1, &b->hash, 1, &o, &n, 1); + if(n == 0) + return -1; + push(ev, *o); + free(o); + return 0; } static int