On Sun, Feb 16, 2014 at 11:25:16AM +0000, Ethan Grammatikidis wrote: > mischief reports (and i've confirmed) the following patch works. i'm > almost scared to ask why. > > diff -r 709e18f21cad sys/src/cmd/mothra/rdhtml.c > --- a/sys/src/cmd/mothra/rdhtml.c Sat Feb 15 17:18:58 2014 -0500 > +++ b/sys/src/cmd/mothra/rdhtml.c Sat Feb 15 03:13:05 2014 -0800 > @@ -166,7 +166,7 @@ > g->hbufp=g->hbuf; > g->ehbuf=g->hbuf+n; > } > - c=*g->hbufp++&255; > + c=*g->hbufp++; > if(c=='\n') g->lineno++; > return c; > } > i'm not sure why i said that was a good idea. maybe this one isn't either. here we actually read full runes from the fd. this seems to fix visiting https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-demo.txt in mothra. diff -r 709e18f21cad sys/src/cmd/mothra/rdhtml.c --- a/sys/src/cmd/mothra/rdhtml.c Sat Feb 15 17:18:58 2014 -0500 +++ b/sys/src/cmd/mothra/rdhtml.c Sat Feb 15 04:10:34 2014 -0800 @@ -154,6 +154,7 @@ int n, c; char err[1024]; if(g->hbufp==g->ehbuf){ +doread: n=read(g->hfd, g->hbuf, NHBUF); if(n<=0){ if(n<0){ @@ -166,7 +167,11 @@ g->hbufp=g->hbuf; g->ehbuf=g->hbuf+n; } - c=*g->hbufp++&255; + if(!fullrune(g->hbufp, g->ehbuf - g->hbufp)) { + goto doread; + } + + g->hbufp += chartorune((Rune*)&c, g->hbufp); if(c=='\n') g->lineno++; return c; }