9fans - fans of the OS Plan 9 from Bell Labs
 help / color / mirror / Atom feed
From: erik quanstrom <quanstro@quanstro.net>
To: 9fans@9fans.net
Subject: Re: [9fans] fossil+venti performance question
Date: Sat,  9 May 2015 09:23:22 -0700	[thread overview]
Message-ID: <c4cac4ae3086ef46e7cdcd173378c504@brasstown.quanstro.net> (raw)
In-Reply-To: <a5c5415571f488ee6f1dbcd336d37a2a@felloff.net>

On Fri May  8 20:12:57 PDT 2015, cinap_lenrek@felloff.net wrote:
> do we really need to initialize tcb->mss to tcpmtu() in procsyn()?
> as i see it, procsyn() is called only when tcb->state is Syn_sent,
> which only should happen for client connections doing a connect, in
> which case tcpsndsyn() would have initialized tcb->mss already no?

i think there was a subtile reason for this, but i don't recall.  a real
reason for setting it here is because it makes the code easier to reason
about, imo.

there are a couple problems with the patch as it stands.  they are
inherited from previous mistakes.

* the setting of tpriv->stats[Mss] is bogus.  it's not shared between connections.
it is also v4 only.

* so, mss should be added to each tcp connection's status file.

* the setting of tcb->mss in tcpincoming is not correct, tcp->mss is
set by SYN, not by ACK, and may not be reset.  (see snoopy below.)

* the SYN-ACK needs to send the local mss, not echo the remote mss.
asymmetry is "fine" in the other side, even if ip/tcp.c isn't smart enough to
keep tx and rx mss seperate.  (scare quotes = untested, there may be
some performance niggles if the sender is sending legal packets larger than
tcb->mss.)

my patch to nix is below.  i haven't submitted it yet.

- erik

---
005319 ms
	ether(s=a0369f1c3af7 d=0cc47a328da4 pr=0800 ln=62)
	ip(s=10.1.1.8 d=10.1.1.9 id=ee54 frag=0000 ttl=255 pr=6 ln=48)
	tcp(s=38903 d=17766 seq=3552109414 ack=0 fl=S win=65535 ck=d68e ln=0 opt4=(mss 1460) opt3=(wscale 4) opt=NOOP)
005320 ms
	ether(s=0cc47a328da4 d=a0369f1c3af7 pr=0800 ln=62)
	ip(s=10.1.1.9 d=10.1.1.8 id=54d3 frag=0000 ttl=255 pr=6 ln=48)
	tcp(s=17766 d=38903 seq=441373010 ack=3552109415 fl=AS win=65535 ck=eadc ln=0 opt4=(mss 1460) opt3=(wscale 4) opt=NOOP)

---

/n/dump/2015/0509/sys/src/nix/ip/tcp.c:491,501 - /sys/src/nix/ip/tcp.c:491,502
  	s = (Tcpctl*)(c->ptcl);

  	return snprint(state, n,
- 		"%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+ 		"%s qin %d qout %d rq %d.%d mss %d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
  		tcpstates[s->state],
  		c->rq ? qlen(c->rq) : 0,
  		c->wq ? qlen(c->wq) : 0,
  		s->nreseq, s->reseqlen,
+ 		s->mss,
  		s->srtt, s->mdev, s->ssthresh,
  		s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
  		s->qscale,
/n/dump/2015/0509/sys/src/nix/ip/tcp.c:843,854 - /sys/src/nix/ip/tcp.c:844,857

  /* mtu (- TCP + IP hdr len) of 1st hop */
  static int
- tcpmtu(Proto *tcp, uchar *addr, int version, uint *scale)
+ tcpmtu(Proto *tcp, uchar *addr, int version, uint reqmss, uint *scale)
  {
+ 	Tcppriv *tpriv;
  	Ipifc *ifc;
  	int mtu;

  	ifc = findipifc(tcp->f, addr, 0);
+ 	tpriv = tcp->priv;
  	switch(version){
  	default:
  	case V4:
/n/dump/2015/0509/sys/src/nix/ip/tcp.c:855,865 - /sys/src/nix/ip/tcp.c:858,870
  		mtu = DEF_MSS;
  		if(ifc != nil)
  			mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
+ 		tpriv->stats[Mss] = mtu;
  		break;
  	case V6:
  		mtu = DEF_MSS6;
  		if(ifc != nil)
  			mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
+ 		tpriv->stats[Mss] = mtu + (TCP6_PKT + TCP6_HDRSIZE) - (TCP4_PKT + TCP4_HDRSIZE);
  		break;
  	}
  	/*
/n/dump/2015/0509/sys/src/nix/ip/tcp.c:868,873 - /sys/src/nix/ip/tcp.c:873,882
  	 */
  	*scale = Defadvscale;

+ 	/* our sending max segment size cannot be bigger than what he asked for */
+ 	if(reqmss != 0 && reqmss < mtu)
+ 		mtu = reqmss;
+
  	return mtu;
  }

/n/dump/2015/0509/sys/src/nix/ip/tcp.c:1300,1307 - /sys/src/nix/ip/tcp.c:1309,1314
  static void
  tcpsndsyn(Conv *s, Tcpctl *tcb)
  {
- 	Tcppriv *tpriv;
-
  	tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
  	tcb->rttseq = tcb->iss;
  	tcb->snd.wl2 = tcb->iss;
/n/dump/2015/0509/sys/src/nix/ip/tcp.c:1314,1322 - /sys/src/nix/ip/tcp.c:1321,1327
  	tcb->sndsyntime = NOW;

  	/* set desired mss and scale */
- 	tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
- 	tpriv = s->p->priv;
- 	tpriv->stats[Mss] = tcb->mss;
+ 	tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, 0, &tcb->scale);
  }

  void
/n/dump/2015/0509/sys/src/nix/ip/tcp.c:1492,1498 - /sys/src/nix/ip/tcp.c:1497,1503
  	seg.ack = lp->irs+1;
  	seg.flags = SYN|ACK;
  	seg.urg = 0;
- 	seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+ 	seg.mss = tcpmtu(tcp, lp->laddr, lp->version, 0, &scale);	/* send our mss, not lp->mss */
  	seg.wnd = QMAX;

  	/* if the other side set scale, we should too */
/n/dump/2015/0509/sys/src/nix/ip/tcp.c:1767,1777 - /sys/src/nix/ip/tcp.c:1772,1779
  	tcb->flgcnt = 0;
  	tcb->flags |= SYNACK;

- 	/* our sending max segment size cannot be bigger than what he asked for */
- 	if(lp->mss != 0 && lp->mss < tcb->mss) {
- 		tcb->mss = lp->mss;
- 		tpriv->stats[Mss] = tcb->mss;
- 	}
+ 	/* per rfc, we can't set the mss any more */
+ //	tcb->mss = tcpmtu(s->p, lp->laddr, lp->version, lp->mss, &tcb->scale);

  	/* window scaling */
  	tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
/n/dump/2015/0509/sys/src/nix/ip/tcp.c:3014,3020 - /sys/src/nix/ip/tcp.c:3016,3021
  procsyn(Conv *s, Tcp *seg)
  {
  	Tcpctl *tcb;
- 	Tcppriv *tpriv;

  	tcb = (Tcpctl*)s->ptcl;
  	tcb->flags |= FORCE;
/n/dump/2015/0509/sys/src/nix/ip/tcp.c:3026,3036 - /sys/src/nix/ip/tcp.c:3027,3033
  	tcb->irs = seg->seq;

  	/* our sending max segment size cannot be bigger than what he asked for */
- 	if(seg->mss != 0 && seg->mss < tcb->mss) {
- 		tcb->mss = seg->mss;
- 		tpriv = s->p->priv;
- 		tpriv->stats[Mss] = tcb->mss;
- 	}
+ 	tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, seg->mss, &tcb->scale);

  	tcb->snd.wnd = seg->wnd;
  	initialwindow(tcb);



  parent reply	other threads:[~2015-05-09 16:23 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-04  9:32 KADOTA Kyohei
2015-05-04 16:10 ` Anthony Sorace
2015-05-04 18:11   ` Aram Hăvărneanu
2015-05-04 18:51     ` David du Colombier
2015-05-05 14:29       ` Sergey Zhilkin
2015-05-05 15:05       ` Charles Forsyth
2015-05-05 15:38         ` David du Colombier
2015-05-05 22:23           ` Charles Forsyth
2015-05-05 22:29             ` cinap_lenrek
2015-05-05 22:33             ` David du Colombier
2015-05-05 22:53               ` Aram Hăvărneanu
2015-05-06 20:55                 ` David du Colombier
2015-05-06 21:17                   ` Charles Forsyth
2015-05-06 21:26                     ` David du Colombier
2015-05-06 21:28                       ` David du Colombier
2015-05-06 22:28                         ` Charles Forsyth
2015-05-07  3:35                           ` erik quanstrom
2015-05-07  6:15                             ` David du Colombier
2015-05-07 13:17                               ` erik quanstrom
2015-05-08 16:13                                 ` David du Colombier
2015-05-08 16:39                                   ` Charles Forsyth
2015-05-08 17:16                                     ` David du Colombier
2015-05-08 19:24                                       ` David du Colombier
2015-05-08 20:03                                         ` Steve Simon
2015-05-08 21:19                                         ` Bakul Shah
2015-05-09 14:43                                           ` erik quanstrom
2015-05-09 17:25                                             ` Lyndon Nerenberg
2015-05-09 17:30                                               ` Devon H. O'Dell
2015-05-09 17:35                                                 ` Lyndon Nerenberg
2015-05-09 21:54                                                   ` Devon H. O'Dell
2015-05-09 18:20                                               ` Bakul Shah
2015-05-09  3:11                                         ` cinap_lenrek
2015-05-09  5:59                                           ` lucio
2015-05-09 16:26                                             ` cinap_lenrek
2015-05-09 16:23                                           ` erik quanstrom [this message]
2015-05-10  4:55                                             ` erik quanstrom
2015-05-10  5:07                                               ` erik quanstrom
2015-05-10 17:57                                                 ` David du Colombier
2015-05-10 20:18                                                   ` erik quanstrom
2015-05-10 20:19                                             ` cinap_lenrek
2015-05-10 20:51                                               ` erik quanstrom
2015-05-10 21:34                                                 ` cinap_lenrek
2015-05-11  1:23                                                   ` erik quanstrom
2015-05-09 16:59                                           ` erik quanstrom
2015-05-06 22:35                         ` Steven Stallion
2015-05-06 23:47                           ` Charles Forsyth
2015-05-07  3:38                       ` erik quanstrom
2015-05-07  3:43                 ` erik quanstrom
2015-05-05 15:07     ` KADOTA Kyohei
2015-05-05 14:47   ` KADOTA Kyohei
2015-05-05 15:46     ` steve
2015-05-05 15:54       ` David du Colombier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c4cac4ae3086ef46e7cdcd173378c504@brasstown.quanstro.net \
    --to=quanstro@quanstro.net \
    --cc=9fans@9fans.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).