there was a diff that went in a while ago to improve this, but it got backed out because it encounters a bug in upstream git -- the spec says that a single ACK should be sent when not using multi-ack modes, but they send back multiple ones. I've got an apparent workaround to fix that, but it's not yet tested enough to commit. Here's the work in progress -- if you can apply it @{cd / && ape/patch -p1} < $patch and let me know if that improves things, that'd be appreciated. There's a chance I may just need to put in a heuristic for when the pack starts, or bite the bullet and implement multi-ack. diff f63d1d3ced81702e0eadf56228a54a467278b0d4 uncommitted --- a/sys/src/cmd/git/fetch.c +++ b/sys/src/cmd/git/fetch.c @@ -186,6 +186,7 @@ int nref, refsz, first; int i, n, req, pfd; vlong packsz; + Objset hadobj; Object *o; nref = 0; @@ -246,13 +247,19 @@ req = 1; } flushpkt(c); + osinit(&hadobj); for(i = 0; i < nref; i++){ - if(hasheq(&have[i], &Zhash)) + if(hasheq(&have[i], &Zhash) || oshas(&hadobj, have[i])) continue; + if((o = readobject(have[i])) == nil) + sysfatal("missing object we should have: %H", have[i]); + osadd(&hadobj, o); + unref(o); n = snprint(buf, sizeof(buf), "have %H\n", have[i]); if(writepkt(c, buf, n + 1) == -1) sysfatal("could not send have for %H", have[i]); } + osclear(&hadobj); if(!req) flushpkt(c); @@ -260,7 +267,7 @@ if(writepkt(c, buf, n) == -1) sysfatal("write: %r"); if(!req) - return 0; + goto showrefs; if(readphase(c) == -1) sysfatal("read: %r"); if((n = readpkt(c, buf, sizeof(buf))) == -1) --- a/sys/src/cmd/git/pull +++ b/sys/src/cmd/git/pull @@ -7,13 +7,10 @@ upstream=$2 url=$3 dir=$4 - bflag=() dflag=() - if(! ~ $#branch 0) - bflag=(-b $branch) if(! ~ $#debug 0) dflag='-d' - {git/fetch $dflag $bflag -u $upstream $url >[2=3] || die $status} | awk ' + {git/fetch $dflag -u $upstream $url >[2=3] || die $status} | awk ' /^remote/{ if($2=="HEAD") next
Quoth ori@eigenstate.org:
> Here's the work in progress
And here's an updated work in progress that works around
the bug more directly:
diff 1d9d4ffef89e883949261ec9c56c57e0344726d7 uncommitted
--- a/sys/src/cmd/git/fetch.c
+++ b/sys/src/cmd/git/fetch.c
@@ -186,6 +186,7 @@
int nref, refsz, first;
int i, n, req, pfd;
vlong packsz;
+ Objset hadobj;
Object *o;
nref = 0;
@@ -246,13 +247,19 @@
req = 1;
}
flushpkt(c);
+ osinit(&hadobj);
for(i = 0; i < nref; i++){
- if(hasheq(&have[i], &Zhash))
+ if(hasheq(&have[i], &Zhash) || oshas(&hadobj, have[i]))
continue;
+ if((o = readobject(have[i])) == nil)
+ sysfatal("missing object we should have: %H", have[i]);
+ osadd(&hadobj, o);
+ unref(o);
n = snprint(buf, sizeof(buf), "have %H\n", have[i]);
if(writepkt(c, buf, n + 1) == -1)
sysfatal("could not send have for %H", have[i]);
}
+ osclear(&hadobj);
if(!req)
flushpkt(c);
@@ -260,7 +267,7 @@
if(writepkt(c, buf, n) == -1)
sysfatal("write: %r");
if(!req)
- return 0;
+ goto showrefs;
if(readphase(c) == -1)
sysfatal("read: %r");
if((n = readpkt(c, buf, sizeof(buf))) == -1)
@@ -277,7 +284,24 @@
sysfatal("could not create %s: %r", packtmp);
fprint(2, "fetching...\n");
+ /*
+ * Work around torvalds git bug: we get duplicate have lines
+ * somtimes, even though the protocol is supposed to start the
+ * pack file immediately.
+ *
+ * Skip ahead until we read 'PACK' off the wire
+ */
+ if(readn(c->rfd, buf, 4) != 4)
+ sysfatal("fetch packfile: short read");
+ while(1){
+ if(strncmp(buf, "PACK", 4) == 0)
+ break;
+ memmove(buf, buf + 1, 3);
+ if(read(c->rfd, buf+3, 1) != 1)
+ sysfatal("fetch packfile: could not scan for pack start");
+ }
packsz = 0;
+ write(pfd, "PACK", 4);
while(1){
n = read(c->rfd, buf, sizeof buf);
if(n == 0)
--- a/sys/src/cmd/git/pull
+++ b/sys/src/cmd/git/pull
@@ -7,13 +7,10 @@
upstream=$2
url=$3
dir=$4
- bflag=()
dflag=()
- if(! ~ $#branch 0)
- bflag=(-b $branch)
if(! ~ $#debug 0)
dflag='-d'
- {git/fetch $dflag $bflag -u $upstream $url >[2=3] || die $status} | awk '
+ {git/fetch $dflag -u $upstream $url >[2=3] || die $status} | awk '
/^remote/{
if($2=="HEAD")
next
Quoth ori@eigenstate.org:
> Quoth ori@eigenstate.org:
> > Here's the work in progress
>
> And here's an updated work in progress that works around
> the bug more directly:
And one more iteration that doesn't shit up the packfile,
and will handle things right even if you name a branch
PACK.
diff 1d9d4ffef89e883949261ec9c56c57e0344726d7 uncommitted
--- a/sys/src/cmd/git/fetch.c
+++ b/sys/src/cmd/git/fetch.c
@@ -180,12 +180,13 @@
int
fetchpack(Conn *c)
{
- char buf[Pktmax], *sp[3];
+ char buf[Pktmax], *sp[3], *ep;
char *packtmp, *idxtmp, **ref;
Hash h, *have, *want;
int nref, refsz, first;
- int i, n, req, pfd;
+ int i, n, l, req, pfd;
vlong packsz;
+ Objset hadobj;
Object *o;
nref = 0;
@@ -246,13 +247,19 @@
req = 1;
}
flushpkt(c);
+ osinit(&hadobj);
for(i = 0; i < nref; i++){
- if(hasheq(&have[i], &Zhash))
+ if(hasheq(&have[i], &Zhash) || oshas(&hadobj, have[i]))
continue;
+ if((o = readobject(have[i])) == nil)
+ sysfatal("missing object we should have: %H", have[i]);
+ osadd(&hadobj, o);
+ unref(o);
n = snprint(buf, sizeof(buf), "have %H\n", have[i]);
if(writepkt(c, buf, n + 1) == -1)
sysfatal("could not send have for %H", have[i]);
}
+ osclear(&hadobj);
if(!req)
flushpkt(c);
@@ -260,7 +267,7 @@
if(writepkt(c, buf, n) == -1)
sysfatal("write: %r");
if(!req)
- return 0;
+ goto showrefs;
if(readphase(c) == -1)
sysfatal("read: %r");
if((n = readpkt(c, buf, sizeof(buf))) == -1)
@@ -277,7 +284,28 @@
sysfatal("could not create %s: %r", packtmp);
fprint(2, "fetching...\n");
- packsz = 0;
+ /*
+ * Work around torvalds git bug: we get duplicate have lines
+ * somtimes, even though the protocol is supposed to start the
+ * pack file immediately.
+ *
+ * Skip ahead until we read 'PACK' off the wire
+ */
+ while(1){
+ if(readn(c->rfd, buf, 4) != 4)
+ sysfatal("fetch packfile: short read");
+ buf[4] = 0;
+ if(strncmp(buf, "PACK", 4) == 0)
+ break;
+ l = strtol(buf, &ep, 16);
+ if(l == 0 || ep != buf + 4)
+ sysfatal("fetch packfile: junk pktline");
+ if(readn(c->rfd, buf, l) != l)
+ sysfatal("fetch packfile: short read");
+ }
+ if(write(pfd, "PACK", 4) != 4)
+ sysfatal("write pack header: %r");
+ packsz = 4;
while(1){
n = read(c->rfd, buf, sizeof buf);
if(n == 0)
--- a/sys/src/cmd/git/pull
+++ b/sys/src/cmd/git/pull
@@ -7,13 +7,10 @@
upstream=$2
url=$3
dir=$4
- bflag=()
dflag=()
- if(! ~ $#branch 0)
- bflag=(-b $branch)
if(! ~ $#debug 0)
dflag='-d'
- {git/fetch $dflag $bflag -u $upstream $url >[2=3] || die $status} | awk '
+ {git/fetch $dflag -u $upstream $url >[2=3] || die $status} | awk '
/^remote/{
if($2=="HEAD")
next
On 2022-04-11, ori@eigenstate.org <ori@eigenstate.org> wrote: > diff 1d9d4ffef89e883949261ec9c56c57e0344726d7 uncommitted > --- a/sys/src/cmd/git/fetch.c > +++ b/sys/src/cmd/git/fetch.c > @@ -180,12 +180,13 @@ > int > fetchpack(Conn *c) > { > - char buf[Pktmax], *sp[3]; > + char buf[Pktmax], *sp[3], *ep; > char *packtmp, *idxtmp, **ref; > Hash h, *have, *want; > int nref, refsz, first; > - int i, n, req, pfd; > + int i, n, l, req, pfd; > vlong packsz; > + Objset hadobj; > Object *o; > > nref = 0; > @@ -246,13 +247,19 @@ > req = 1; > } > flushpkt(c); > + osinit(&hadobj); > for(i = 0; i < nref; i++){ > - if(hasheq(&have[i], &Zhash)) > + if(hasheq(&have[i], &Zhash) || oshas(&hadobj, have[i])) > continue; > + if((o = readobject(have[i])) == nil) > + sysfatal("missing object we should have: %H", have[i]); > + osadd(&hadobj, o); > + unref(o); > n = snprint(buf, sizeof(buf), "have %H\n", have[i]); > if(writepkt(c, buf, n + 1) == -1) > sysfatal("could not send have for %H", have[i]); > } > + osclear(&hadobj); I've been looking over the http-protocol docs, and it describes an algorithm involving a priority queue and commit graph traversal that we don't seem to do. How does this work when our refs have several local commits that the server doesn't have? > if(!req) > flushpkt(c); > > @@ -260,7 +267,7 @@ > if(writepkt(c, buf, n) == -1) > sysfatal("write: %r"); > if(!req) > - return 0; > + goto showrefs; > if(readphase(c) == -1) > sysfatal("read: %r"); > if((n = readpkt(c, buf, sizeof(buf))) == -1) > @@ -277,7 +284,28 @@ > sysfatal("could not create %s: %r", packtmp); > > fprint(2, "fetching...\n"); > - packsz = 0; > + /* > + * Work around torvalds git bug: we get duplicate have lines Do you mean duplicate ACK lines? From my reading of the protocol docs, "have" lines are only sent by us (the client). In fact, I don't really see much about ACK lines at all. Do you know if this is described anywhere in the git docs? > + * somtimes, even though the protocol is supposed to start the Typo here. > + * pack file immediately. > + * > + * Skip ahead until we read 'PACK' off the wire > + */ > + while(1){ > + if(readn(c->rfd, buf, 4) != 4) > + sysfatal("fetch packfile: short read"); > + buf[4] = 0; > + if(strncmp(buf, "PACK", 4) == 0) > + break; > + l = strtol(buf, &ep, 16); > + if(l == 0 || ep != buf + 4) > + sysfatal("fetch packfile: junk pktline"); > + if(readn(c->rfd, buf, l) != l) > + sysfatal("fetch packfile: short read"); > + } > + if(write(pfd, "PACK", 4) != 4) > + sysfatal("write pack header: %r"); > + packsz = 4; > while(1){ > n = read(c->rfd, buf, sizeof buf); > if(n == 0) > --- a/sys/src/cmd/git/pull > +++ b/sys/src/cmd/git/pull > @@ -7,13 +7,10 @@ > upstream=$2 > url=$3 > dir=$4 > - bflag=() > dflag=() > - if(! ~ $#branch 0) > - bflag=(-b $branch) > if(! ~ $#debug 0) > dflag='-d' > - {git/fetch $dflag $bflag -u $upstream $url >[2=3] || die $status} | awk ' > + {git/fetch $dflag -u $upstream $url >[2=3] || die $status} | awk ' > /^remote/{ > if($2=="HEAD") > next Now that $branch is unused, I think you can remove the corresponding positional parameter in the update function, as well as the git/pull -a and -b options.
Quoth Michael Forney <mforney@mforney.org>: > On 2022-04-11, ori@eigenstate.org <ori@eigenstate.org> wrote: > I've been looking over the http-protocol docs, and it describes an > algorithm involving a priority queue and commit graph traversal that > we don't seem to do. > > How does this work when our refs have several local commits that the > server doesn't have? Yeah. We currently fetch too much. In practice it seems to not come up too often, but I would like to implement that queue/ack/nak algorithm (and implement multi-ack while I'm there). It'd be a separate commit. > > + * Work around torvalds git bug: we get duplicate have lines > > Do you mean duplicate ACK lines? From my reading of the protocol docs, > "have" lines are only sent by us (the client). I do. Good catch. > In fact, I don't really see much about ACK lines at all. Do you know > if this is described anywhere in the git docs? It's described in the pack protocol, as well as the extensions. git/Documentation/technical/pack-protocol.txt git/Documentation/technical/protocol-capabilities.txt > Now that $branch is unused, I think you can remove the corresponding > positional parameter in the update function, as well as the git/pull > -a and -b options. Yes. thanks.