9front - general discussion about 9front
 help / color / mirror / Atom feed
* [9front] git/pull: fetch all branches (please test)
@ 2022-04-11  3:19 ori
  2022-04-11 21:32 ` ori
  0 siblings, 1 reply; 5+ messages in thread
From: ori @ 2022-04-11  3:19 UTC (permalink / raw)
  To: 9front

there was a diff that went in a while ago to improve
this, but it got backed out because it encounters a
bug in upstream git -- the spec says that a single
ACK should be sent when not using multi-ack modes,
but they send back multiple ones.  I've got an
apparent workaround to fix that, but it's not yet
tested enough to commit.

Here's the work in progress -- if you can apply it

	@{cd / && ape/patch -p1} < $patch

and let me know if that improves things, that'd be
appreciated. There's a chance I may just need to put
in a heuristic for when the pack starts, or bite the
bullet and implement multi-ack.

diff f63d1d3ced81702e0eadf56228a54a467278b0d4 uncommitted
--- a/sys/src/cmd/git/fetch.c
+++ b/sys/src/cmd/git/fetch.c
@@ -186,6 +186,7 @@
 	int nref, refsz, first;
 	int i, n, req, pfd;
 	vlong packsz;
+	Objset hadobj;
 	Object *o;
 
 	nref = 0;
@@ -246,13 +247,19 @@
 		req = 1;
 	}
 	flushpkt(c);
+	osinit(&hadobj);
 	for(i = 0; i < nref; i++){
-		if(hasheq(&have[i], &Zhash))
+		if(hasheq(&have[i], &Zhash) || oshas(&hadobj, have[i]))
 			continue;
+		if((o = readobject(have[i])) == nil)
+			sysfatal("missing object we should have: %H", have[i]);
+		osadd(&hadobj, o);
+		unref(o);	
 		n = snprint(buf, sizeof(buf), "have %H\n", have[i]);
 		if(writepkt(c, buf, n + 1) == -1)
 			sysfatal("could not send have for %H", have[i]);
 	}
+	osclear(&hadobj);
 	if(!req)
 		flushpkt(c);
 
@@ -260,7 +267,7 @@
 	if(writepkt(c, buf, n) == -1)
 		sysfatal("write: %r");
 	if(!req)
-		return 0;
+		goto showrefs;
 	if(readphase(c) == -1)
 		sysfatal("read: %r");
 	if((n = readpkt(c, buf, sizeof(buf))) == -1)
--- a/sys/src/cmd/git/pull
+++ b/sys/src/cmd/git/pull
@@ -7,13 +7,10 @@
 	upstream=$2
 	url=$3
 	dir=$4
-	bflag=()
 	dflag=()
-	if(! ~ $#branch 0)
-		bflag=(-b $branch)
 	if(! ~ $#debug 0)
 		dflag='-d'
-	{git/fetch $dflag $bflag -u $upstream $url >[2=3] || die $status} | awk '
+	{git/fetch $dflag -u $upstream $url >[2=3] || die $status} | awk '
 	/^remote/{
 		if($2=="HEAD")
 			next



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [9front] git/pull: fetch all branches (please test)
  2022-04-11  3:19 [9front] git/pull: fetch all branches (please test) ori
@ 2022-04-11 21:32 ` ori
  2022-04-12  2:55   ` ori
  0 siblings, 1 reply; 5+ messages in thread
From: ori @ 2022-04-11 21:32 UTC (permalink / raw)
  To: 9front

Quoth ori@eigenstate.org:
> Here's the work in progress

And here's an updated work in progress that works around
the bug more directly:

diff 1d9d4ffef89e883949261ec9c56c57e0344726d7 uncommitted
--- a/sys/src/cmd/git/fetch.c
+++ b/sys/src/cmd/git/fetch.c
@@ -186,6 +186,7 @@
 	int nref, refsz, first;
 	int i, n, req, pfd;
 	vlong packsz;
+	Objset hadobj;
 	Object *o;
 
 	nref = 0;
@@ -246,13 +247,19 @@
 		req = 1;
 	}
 	flushpkt(c);
+	osinit(&hadobj);
 	for(i = 0; i < nref; i++){
-		if(hasheq(&have[i], &Zhash))
+		if(hasheq(&have[i], &Zhash) || oshas(&hadobj, have[i]))
 			continue;
+		if((o = readobject(have[i])) == nil)
+			sysfatal("missing object we should have: %H", have[i]);
+		osadd(&hadobj, o);
+		unref(o);	
 		n = snprint(buf, sizeof(buf), "have %H\n", have[i]);
 		if(writepkt(c, buf, n + 1) == -1)
 			sysfatal("could not send have for %H", have[i]);
 	}
+	osclear(&hadobj);
 	if(!req)
 		flushpkt(c);
 
@@ -260,7 +267,7 @@
 	if(writepkt(c, buf, n) == -1)
 		sysfatal("write: %r");
 	if(!req)
-		return 0;
+		goto showrefs;
 	if(readphase(c) == -1)
 		sysfatal("read: %r");
 	if((n = readpkt(c, buf, sizeof(buf))) == -1)
@@ -277,7 +284,24 @@
 		sysfatal("could not create %s: %r", packtmp);
 
 	fprint(2, "fetching...\n");
+	/*
+	 * Work around torvalds git bug: we get duplicate have lines
+	 * somtimes, even though the protocol is supposed to start the
+	 * pack file immediately.
+	 *
+	 * Skip ahead until we read 'PACK' off the wire
+	 */
+	if(readn(c->rfd, buf, 4) != 4)
+		sysfatal("fetch packfile: short read");
+	while(1){
+		if(strncmp(buf, "PACK", 4) == 0)
+			break;
+		memmove(buf, buf + 1, 3);
+		if(read(c->rfd, buf+3, 1) != 1)
+			sysfatal("fetch packfile: could not scan for pack start");
+	}
 	packsz = 0;
+	write(pfd, "PACK", 4);
 	while(1){
 		n = read(c->rfd, buf, sizeof buf);
 		if(n == 0)
--- a/sys/src/cmd/git/pull
+++ b/sys/src/cmd/git/pull
@@ -7,13 +7,10 @@
 	upstream=$2
 	url=$3
 	dir=$4
-	bflag=()
 	dflag=()
-	if(! ~ $#branch 0)
-		bflag=(-b $branch)
 	if(! ~ $#debug 0)
 		dflag='-d'
-	{git/fetch $dflag $bflag -u $upstream $url >[2=3] || die $status} | awk '
+	{git/fetch $dflag -u $upstream $url >[2=3] || die $status} | awk '
 	/^remote/{
 		if($2=="HEAD")
 			next


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [9front] git/pull: fetch all branches (please test)
  2022-04-11 21:32 ` ori
@ 2022-04-12  2:55   ` ori
  2022-04-19 19:31     ` Michael Forney
  0 siblings, 1 reply; 5+ messages in thread
From: ori @ 2022-04-12  2:55 UTC (permalink / raw)
  To: 9front

Quoth ori@eigenstate.org:
> Quoth ori@eigenstate.org:
> > Here's the work in progress
> 
> And here's an updated work in progress that works around
> the bug more directly:

And one more iteration that doesn't shit up the packfile,
and will handle things right even if you name a branch
PACK.

diff 1d9d4ffef89e883949261ec9c56c57e0344726d7 uncommitted
--- a/sys/src/cmd/git/fetch.c
+++ b/sys/src/cmd/git/fetch.c
@@ -180,12 +180,13 @@
 int
 fetchpack(Conn *c)
 {
-	char buf[Pktmax], *sp[3];
+	char buf[Pktmax], *sp[3], *ep;
 	char *packtmp, *idxtmp, **ref;
 	Hash h, *have, *want;
 	int nref, refsz, first;
-	int i, n, req, pfd;
+	int i, n, l, req, pfd;
 	vlong packsz;
+	Objset hadobj;
 	Object *o;
 
 	nref = 0;
@@ -246,13 +247,19 @@
 		req = 1;
 	}
 	flushpkt(c);
+	osinit(&hadobj);
 	for(i = 0; i < nref; i++){
-		if(hasheq(&have[i], &Zhash))
+		if(hasheq(&have[i], &Zhash) || oshas(&hadobj, have[i]))
 			continue;
+		if((o = readobject(have[i])) == nil)
+			sysfatal("missing object we should have: %H", have[i]);
+		osadd(&hadobj, o);
+		unref(o);	
 		n = snprint(buf, sizeof(buf), "have %H\n", have[i]);
 		if(writepkt(c, buf, n + 1) == -1)
 			sysfatal("could not send have for %H", have[i]);
 	}
+	osclear(&hadobj);
 	if(!req)
 		flushpkt(c);
 
@@ -260,7 +267,7 @@
 	if(writepkt(c, buf, n) == -1)
 		sysfatal("write: %r");
 	if(!req)
-		return 0;
+		goto showrefs;
 	if(readphase(c) == -1)
 		sysfatal("read: %r");
 	if((n = readpkt(c, buf, sizeof(buf))) == -1)
@@ -277,7 +284,28 @@
 		sysfatal("could not create %s: %r", packtmp);
 
 	fprint(2, "fetching...\n");
-	packsz = 0;
+	/*
+	 * Work around torvalds git bug: we get duplicate have lines
+	 * somtimes, even though the protocol is supposed to start the
+	 * pack file immediately.
+	 *
+	 * Skip ahead until we read 'PACK' off the wire
+	 */
+	while(1){
+		if(readn(c->rfd, buf, 4) != 4)
+			sysfatal("fetch packfile: short read");
+		buf[4] = 0;
+		if(strncmp(buf, "PACK", 4) == 0)
+			break;
+		l = strtol(buf, &ep, 16);
+		if(l == 0 || ep != buf + 4)
+			sysfatal("fetch packfile: junk pktline");
+		if(readn(c->rfd, buf, l) != l)
+			sysfatal("fetch packfile: short read");
+	}
+	if(write(pfd, "PACK", 4) != 4)
+		sysfatal("write pack header: %r");
+	packsz = 4;
 	while(1){
 		n = read(c->rfd, buf, sizeof buf);
 		if(n == 0)
--- a/sys/src/cmd/git/pull
+++ b/sys/src/cmd/git/pull
@@ -7,13 +7,10 @@
 	upstream=$2
 	url=$3
 	dir=$4
-	bflag=()
 	dflag=()
-	if(! ~ $#branch 0)
-		bflag=(-b $branch)
 	if(! ~ $#debug 0)
 		dflag='-d'
-	{git/fetch $dflag $bflag -u $upstream $url >[2=3] || die $status} | awk '
+	{git/fetch $dflag -u $upstream $url >[2=3] || die $status} | awk '
 	/^remote/{
 		if($2=="HEAD")
 			next


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [9front] git/pull: fetch all branches (please test)
  2022-04-12  2:55   ` ori
@ 2022-04-19 19:31     ` Michael Forney
  2022-04-20  3:42       ` ori
  0 siblings, 1 reply; 5+ messages in thread
From: Michael Forney @ 2022-04-19 19:31 UTC (permalink / raw)
  To: 9front

On 2022-04-11, ori@eigenstate.org <ori@eigenstate.org> wrote:
> diff 1d9d4ffef89e883949261ec9c56c57e0344726d7 uncommitted
> --- a/sys/src/cmd/git/fetch.c
> +++ b/sys/src/cmd/git/fetch.c
> @@ -180,12 +180,13 @@
>  int
>  fetchpack(Conn *c)
>  {
> -	char buf[Pktmax], *sp[3];
> +	char buf[Pktmax], *sp[3], *ep;
>  	char *packtmp, *idxtmp, **ref;
>  	Hash h, *have, *want;
>  	int nref, refsz, first;
> -	int i, n, req, pfd;
> +	int i, n, l, req, pfd;
>  	vlong packsz;
> +	Objset hadobj;
>  	Object *o;
>
>  	nref = 0;
> @@ -246,13 +247,19 @@
>  		req = 1;
>  	}
>  	flushpkt(c);
> +	osinit(&hadobj);
>  	for(i = 0; i < nref; i++){
> -		if(hasheq(&have[i], &Zhash))
> +		if(hasheq(&have[i], &Zhash) || oshas(&hadobj, have[i]))
>  			continue;
> +		if((o = readobject(have[i])) == nil)
> +			sysfatal("missing object we should have: %H", have[i]);
> +		osadd(&hadobj, o);
> +		unref(o);	
>  		n = snprint(buf, sizeof(buf), "have %H\n", have[i]);
>  		if(writepkt(c, buf, n + 1) == -1)
>  			sysfatal("could not send have for %H", have[i]);
>  	}
> +	osclear(&hadobj);

I've been looking over the http-protocol docs, and it describes an
algorithm involving a priority queue and commit graph traversal that
we don't seem to do.

How does this work when our refs have several local commits that the
server doesn't have?

>  	if(!req)
>  		flushpkt(c);
>
> @@ -260,7 +267,7 @@
>  	if(writepkt(c, buf, n) == -1)
>  		sysfatal("write: %r");
>  	if(!req)
> -		return 0;
> +		goto showrefs;
>  	if(readphase(c) == -1)
>  		sysfatal("read: %r");
>  	if((n = readpkt(c, buf, sizeof(buf))) == -1)
> @@ -277,7 +284,28 @@
>  		sysfatal("could not create %s: %r", packtmp);
>
>  	fprint(2, "fetching...\n");
> -	packsz = 0;
> +	/*
> +	 * Work around torvalds git bug: we get duplicate have lines

Do you mean duplicate ACK lines? From my reading of the protocol docs,
"have" lines are only sent by us (the client).

In fact, I don't really see much about ACK lines at all. Do you know
if this is described anywhere in the git docs?

> +	 * somtimes, even though the protocol is supposed to start the

Typo here.

> +	 * pack file immediately.
> +	 *
> +	 * Skip ahead until we read 'PACK' off the wire
> +	 */
> +	while(1){
> +		if(readn(c->rfd, buf, 4) != 4)
> +			sysfatal("fetch packfile: short read");
> +		buf[4] = 0;
> +		if(strncmp(buf, "PACK", 4) == 0)
> +			break;
> +		l = strtol(buf, &ep, 16);
> +		if(l == 0 || ep != buf + 4)
> +			sysfatal("fetch packfile: junk pktline");
> +		if(readn(c->rfd, buf, l) != l)
> +			sysfatal("fetch packfile: short read");
> +	}
> +	if(write(pfd, "PACK", 4) != 4)
> +		sysfatal("write pack header: %r");
> +	packsz = 4;
>  	while(1){
>  		n = read(c->rfd, buf, sizeof buf);
>  		if(n == 0)
> --- a/sys/src/cmd/git/pull
> +++ b/sys/src/cmd/git/pull
> @@ -7,13 +7,10 @@
>  	upstream=$2
>  	url=$3
>  	dir=$4
> -	bflag=()
>  	dflag=()
> -	if(! ~ $#branch 0)
> -		bflag=(-b $branch)
>  	if(! ~ $#debug 0)
>  		dflag='-d'
> -	{git/fetch $dflag $bflag -u $upstream $url >[2=3] || die $status} | awk '
> +	{git/fetch $dflag -u $upstream $url >[2=3] || die $status} | awk '
>  	/^remote/{
>  		if($2=="HEAD")
>  			next

Now that $branch is unused, I think you can remove the corresponding
positional parameter in the update function, as well as the git/pull
-a and -b options.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [9front] git/pull: fetch all branches (please test)
  2022-04-19 19:31     ` Michael Forney
@ 2022-04-20  3:42       ` ori
  0 siblings, 0 replies; 5+ messages in thread
From: ori @ 2022-04-20  3:42 UTC (permalink / raw)
  To: 9front

Quoth Michael Forney <mforney@mforney.org>:
> On 2022-04-11, ori@eigenstate.org <ori@eigenstate.org> wrote:
> I've been looking over the http-protocol docs, and it describes an
> algorithm involving a priority queue and commit graph traversal that
> we don't seem to do.
> 
> How does this work when our refs have several local commits that the
> server doesn't have?

Yeah. We currently fetch too much. In practice it seems to
not come up too often, but I would like to implement that
queue/ack/nak algorithm (and implement multi-ack while
I'm there). It'd be a separate commit.


> > +	 * Work around torvalds git bug: we get duplicate have lines
> 
> Do you mean duplicate ACK lines? From my reading of the protocol docs,
> "have" lines are only sent by us (the client).

I do. Good catch.

> In fact, I don't really see much about ACK lines at all. Do you know
> if this is described anywhere in the git docs?

It's described in the pack protocol, as well as the
extensions.

	git/Documentation/technical/pack-protocol.txt
	git/Documentation/technical/protocol-capabilities.txt
 
> Now that $branch is unused, I think you can remove the corresponding
> positional parameter in the update function, as well as the git/pull
> -a and -b options.

Yes. thanks.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-04-20  3:43 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-11  3:19 [9front] git/pull: fetch all branches (please test) ori
2022-04-11 21:32 ` ori
2022-04-12  2:55   ` ori
2022-04-19 19:31     ` Michael Forney
2022-04-20  3:42       ` ori

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).