From mboxrd@z Thu Jan 1 00:00:00 1970 From: erik quanstrom Date: Sat, 9 May 2015 09:23:22 -0700 To: 9fans@9fans.net Message-ID: In-Reply-To: References: MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit Subject: Re: [9fans] fossil+venti performance question Topicbox-Message-UUID: 50705562-ead9-11e9-9d60-3106f5b1d025 On Fri May 8 20:12:57 PDT 2015, cinap_lenrek@felloff.net wrote: > do we really need to initialize tcb->mss to tcpmtu() in procsyn()? > as i see it, procsyn() is called only when tcb->state is Syn_sent, > which only should happen for client connections doing a connect, in > which case tcpsndsyn() would have initialized tcb->mss already no? i think there was a subtile reason for this, but i don't recall. a real reason for setting it here is because it makes the code easier to reason about, imo. there are a couple problems with the patch as it stands. they are inherited from previous mistakes. * the setting of tpriv->stats[Mss] is bogus. it's not shared between connections. it is also v4 only. * so, mss should be added to each tcp connection's status file. * the setting of tcb->mss in tcpincoming is not correct, tcp->mss is set by SYN, not by ACK, and may not be reset. (see snoopy below.) * the SYN-ACK needs to send the local mss, not echo the remote mss. asymmetry is "fine" in the other side, even if ip/tcp.c isn't smart enough to keep tx and rx mss seperate. (scare quotes = untested, there may be some performance niggles if the sender is sending legal packets larger than tcb->mss.) my patch to nix is below. i haven't submitted it yet. - erik --- 005319 ms ether(s=a0369f1c3af7 d=0cc47a328da4 pr=0800 ln=62) ip(s=10.1.1.8 d=10.1.1.9 id=ee54 frag=0000 ttl=255 pr=6 ln=48) tcp(s=38903 d=17766 seq=3552109414 ack=0 fl=S win=65535 ck=d68e ln=0 opt4=(mss 1460) opt3=(wscale 4) opt=NOOP) 005320 ms ether(s=0cc47a328da4 d=a0369f1c3af7 pr=0800 ln=62) ip(s=10.1.1.9 d=10.1.1.8 id=54d3 frag=0000 ttl=255 pr=6 ln=48) tcp(s=17766 d=38903 seq=441373010 ack=3552109415 fl=AS win=65535 ck=eadc ln=0 opt4=(mss 1460) opt3=(wscale 4) opt=NOOP) --- /n/dump/2015/0509/sys/src/nix/ip/tcp.c:491,501 - /sys/src/nix/ip/tcp.c:491,502 s = (Tcpctl*)(c->ptcl); return snprint(state, n, - "%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n", + "%s qin %d qout %d rq %d.%d mss %d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n", tcpstates[s->state], c->rq ? qlen(c->rq) : 0, c->wq ? qlen(c->wq) : 0, s->nreseq, s->reseqlen, + s->mss, s->srtt, s->mdev, s->ssthresh, s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale, s->qscale, /n/dump/2015/0509/sys/src/nix/ip/tcp.c:843,854 - /sys/src/nix/ip/tcp.c:844,857 /* mtu (- TCP + IP hdr len) of 1st hop */ static int - tcpmtu(Proto *tcp, uchar *addr, int version, uint *scale) + tcpmtu(Proto *tcp, uchar *addr, int version, uint reqmss, uint *scale) { + Tcppriv *tpriv; Ipifc *ifc; int mtu; ifc = findipifc(tcp->f, addr, 0); + tpriv = tcp->priv; switch(version){ default: case V4: /n/dump/2015/0509/sys/src/nix/ip/tcp.c:855,865 - /sys/src/nix/ip/tcp.c:858,870 mtu = DEF_MSS; if(ifc != nil) mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE); + tpriv->stats[Mss] = mtu; break; case V6: mtu = DEF_MSS6; if(ifc != nil) mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE); + tpriv->stats[Mss] = mtu + (TCP6_PKT + TCP6_HDRSIZE) - (TCP4_PKT + TCP4_HDRSIZE); break; } /* /n/dump/2015/0509/sys/src/nix/ip/tcp.c:868,873 - /sys/src/nix/ip/tcp.c:873,882 */ *scale = Defadvscale; + /* our sending max segment size cannot be bigger than what he asked for */ + if(reqmss != 0 && reqmss < mtu) + mtu = reqmss; + return mtu; } /n/dump/2015/0509/sys/src/nix/ip/tcp.c:1300,1307 - /sys/src/nix/ip/tcp.c:1309,1314 static void tcpsndsyn(Conv *s, Tcpctl *tcb) { - Tcppriv *tpriv; - tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16); tcb->rttseq = tcb->iss; tcb->snd.wl2 = tcb->iss; /n/dump/2015/0509/sys/src/nix/ip/tcp.c:1314,1322 - /sys/src/nix/ip/tcp.c:1321,1327 tcb->sndsyntime = NOW; /* set desired mss and scale */ - tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale); - tpriv = s->p->priv; - tpriv->stats[Mss] = tcb->mss; + tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, 0, &tcb->scale); } void /n/dump/2015/0509/sys/src/nix/ip/tcp.c:1492,1498 - /sys/src/nix/ip/tcp.c:1497,1503 seg.ack = lp->irs+1; seg.flags = SYN|ACK; seg.urg = 0; - seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale); + seg.mss = tcpmtu(tcp, lp->laddr, lp->version, 0, &scale); /* send our mss, not lp->mss */ seg.wnd = QMAX; /* if the other side set scale, we should too */ /n/dump/2015/0509/sys/src/nix/ip/tcp.c:1767,1777 - /sys/src/nix/ip/tcp.c:1772,1779 tcb->flgcnt = 0; tcb->flags |= SYNACK; - /* our sending max segment size cannot be bigger than what he asked for */ - if(lp->mss != 0 && lp->mss < tcb->mss) { - tcb->mss = lp->mss; - tpriv->stats[Mss] = tcb->mss; - } + /* per rfc, we can't set the mss any more */ + // tcb->mss = tcpmtu(s->p, lp->laddr, lp->version, lp->mss, &tcb->scale); /* window scaling */ tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale); /n/dump/2015/0509/sys/src/nix/ip/tcp.c:3014,3020 - /sys/src/nix/ip/tcp.c:3016,3021 procsyn(Conv *s, Tcp *seg) { Tcpctl *tcb; - Tcppriv *tpriv; tcb = (Tcpctl*)s->ptcl; tcb->flags |= FORCE; /n/dump/2015/0509/sys/src/nix/ip/tcp.c:3026,3036 - /sys/src/nix/ip/tcp.c:3027,3033 tcb->irs = seg->seq; /* our sending max segment size cannot be bigger than what he asked for */ - if(seg->mss != 0 && seg->mss < tcb->mss) { - tcb->mss = seg->mss; - tpriv = s->p->priv; - tpriv->stats[Mss] = tcb->mss; - } + tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, seg->mss, &tcb->scale); tcb->snd.wnd = seg->wnd; initialwindow(tcb);