From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=none autolearn=ham autolearn_force=no version=3.4.4 Received: (qmail 16156 invoked from network); 24 Jan 2021 05:53:58 -0000 Received: from 1ess.inri.net (216.126.196.35) by inbox.vuxu.org with ESMTPUTF8; 24 Jan 2021 05:53:58 -0000 Received: from asquith.prosimetrum.com ([125.236.209.157]) by 1ess; Sun Jan 24 00:44:54 -0500 2021 Message-ID: <48DB1AA516DB965474E6B2487192B8F8@prosimetrum.com> Date: Sun, 24 Jan 2021 18:46:07 +1300 From: umbraticus@prosimetrum.com To: 9front@9front.org In-Reply-To: MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="upas-nroiaokaftqhraplaxsxtgrlot" List-ID: <9front.9front.org> List-Help: X-Glyph: ➈ X-Bullshit: lossless converged WEB2.0 persistence-scale API blockchain scripting manager Subject: Re: [9front] htmlfmt anchor corner cases Reply-To: 9front@9front.org Precedence: bulk This is a multi-part message in MIME format. --upas-nroiaokaftqhraplaxsxtgrlot Content-Disposition: inline Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: 8bit Thanks for having a look and for your comments, Ori. First attachment: another whack at libhtml. • full href/src urls where possible • point directly to Item's Anchor, rather than have to look up by id • no more AnchorDest: just use Anchor • add anchor to Area struct, link to master anchor list • sort anchor/image/form/table/map lists into order they appear in doc • better table caption and drop-down form handling • update documentation & programs using lib (htmlfmt & abaco) More tinkering could be done… though not sure what the point is for just two programs, one of which throws most stuff away. Any interest in an updated (html5) and better-integrated libhtml that could be used by other programs that currently roll their own parser (mothra, html2ms, maybe even the netsurf port)? Second attachment: suggested htmlfmt replacement. • print document title • # h1 # & ## h2 ## headings • *italics*, **bold**, and ~~strikethrough~~ • footnotes for hyperlink¹ and {image}² urls • better table and form presentation • print frame info (not iframes...) and image maps umbraticus --upas-nroiaokaftqhraplaxsxtgrlot Content-Disposition: attachment; filename=libhtml.diff Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: 8bit diff -r dde79cf2874d sys/include/html.h --- a/sys/include/html.h Sat Jan 23 19:47:12 2021 -0800 +++ b/sys/include/html.h Sun Jan 24 18:33:21 2021 +1300 @@ -103,7 +103,6 @@ typedef struct Align Align; typedef struct Dimen Dimen; typedef struct Anchor Anchor; -typedef struct DestAnchor DestAnchor; typedef struct Map Map; typedef struct Area Area; typedef struct Background Background; @@ -174,7 +173,7 @@ int width; /* width in pixels (0 for floating items) */ int height; /* height in pixels */ int ascent; /* ascent (from top to baseline) in pixels */ - int anchorid; /* if nonzero, which anchor we're in */ + Anchor* anchor; /* if non-nil, which anchor we're in */ int state; /* flags and values (see below) */ Genattr*genattr; /* generic attributes and events */ int tag; /* variant discriminator: Itexttag, etc. */ @@ -405,7 +404,7 @@ int cellspacing; /* cellspacing attr */ int cellpadding; /* cellpadding attr */ Background background; /* table background */ - Item* caption; /* linked list of Items, giving caption */ + Rune* caption; /* caption text */ uchar caption_place; /* ALtop or ALbottom */ Lay* caption_lay; /* layout of caption */ int totw; /* total width */ @@ -466,27 +465,15 @@ Point pos; /* nw corner of cell contents, in cell */ }; -/* Anchor is for info about hyperlinks that go somewhere */ struct Anchor { Anchor* next; /* next in list of document's anchors */ - int index; /* serial no. of anchor within its doc */ Rune* name; /* name attr */ Rune* href; /* href attr */ int target; /* target attr as targetid */ }; -/* DestAnchor is for info about hyperlinks that are destinations */ -struct DestAnchor -{ - DestAnchor*next; /* next in list of document's destanchors */ - int index; /* serial no. of anchor within its doc */ - Rune* name; /* name attr */ - Item* item; /* the destination */ -}; - - /* Maps (client side) */ struct Map { @@ -500,10 +487,9 @@ { Area* next; /* next in list of a map's areas */ int shape; /* SHrect, etc. */ - Rune* href; /* associated hypertext link */ - int target; /* associated target frame */ Dimen* coords; /* array of coords for shape */ int ncoords; /* size of coords array */ + Anchor* anchor; /* associated hypertext link */ }; /* Area shapes */ @@ -600,7 +586,6 @@ /* info needed to respond to user actions */ Anchor* anchors; /* list of href anchors */ - DestAnchor*dests; /* list of destination anchors */ Form* forms; /* list of forms */ Table* tables; /* list of tables */ Map* maps; /* list of maps */ diff -r dde79cf2874d sys/man/2/html --- a/sys/man/2/html Sat Jan 23 19:47:12 2021 -0800 +++ b/sys/man/2/html Sun Jan 24 18:33:21 2021 +1300 @@ -194,7 +194,7 @@ int width; int height; int ascent; - int anchorid; + Anchor* anchor; int state; Genattr* genattr; int tag; @@ -209,8 +209,8 @@ and .B ascent are intended for use by the caller as part of the layout process. -.BR Anchorid , -if non-zero, gives the integer id assigned by the parser to the anchor that +.BR Anchor , +if non-nil, points to the anchor that this item is in (see section .IR Anchors ). .B State @@ -713,7 +713,7 @@ Global information about an HTML page is stored in the following structure: .PP .EX -.ta 6n +\w'DestAnchor* 'u +.ta 6n +\w'Background 'u typedef struct Docinfo Docinfo; struct Docinfo { @@ -738,7 +738,6 @@ // info needed to respond to user actions Anchor* anchors; - DestAnchor* dests; Form* forms; Table* tables; Map* maps; @@ -797,28 +796,26 @@ .B Frameid is this document's frame id. .PP -.B Anchors -is a list of hyperlinks contained in the document, -and -.B dests -is a list of hyperlink destinations within the page (see the following section for details). -.BR Forms , +.BR Anchors , +.BR forms , .B tables and .B maps -are lists of the various forms, tables and client-side maps contained +are lists of the various hyperlinks, forms, tables and client-side maps contained in the document, as described in subsequent sections. .B Images is a list of all the image items in the document. .SS Anchors .PP -The library builds two lists for all of the +The library builds a list of all .B elements (anchors) in a document. -Each anchor is assigned a unique anchor id within the document. -For anchors which are hyperlinks (the -.B href -attribute was supplied), the following structure is defined: +The +.B anchor +field of an +.B Item +structure points to one of these if the item is inside an anchor. +The elements of this list are as follows: .PP .EX .ta 6n +\w'Anchor* 'u @@ -826,7 +823,6 @@ struct Anchor { Anchor* next; - int index; Rune* name; Rune* href; int target; @@ -836,11 +832,6 @@ .B Next points to the next anchor in the list (the head of this list is .BR Docinfo.anchors ). -.B Index -is the anchor id; each item within this hyperlink is tagged with this value -in its -.B anchorid -field. .B Name and .B href @@ -848,33 +839,6 @@ (in particular, href is the URL to go to). .B Target is the value of the target attribute (if provided) converted to a frame id. -.PP -Destinations within the document (anchors with the name attribute set) -are held in the -.B Docinfo.dests -list, using the following structure: -.PP -.EX -.ta 6n +\w'DestAnchor* 'u -typedef struct DestAnchor DestAnchor; -struct DestAnchor -{ - DestAnchor* next; - int index; - Rune* name; - Item* item; -}; -.EE -.PP -.B Next -is the next element of the list, -.B index -is the anchor id, -.B name -is the value of the name attribute, and -.B item -is points to the item within the parsed document that should be considered -to be the destination. .SS Forms .PP Any forms within a document are kept in a list, headed by @@ -936,7 +900,7 @@ int cols; uchar flags; Option* options; - Item* image; + Iimage* image; int ctlid; SEvent* events; }; @@ -1041,7 +1005,7 @@ int cellspacing; int cellpadding; Background background; - Item* caption; + Rune* caption; uchar caption_place; Lay* caption_lay; int totw; @@ -1102,7 +1066,7 @@ .B background gives the requested background for the table. .B Caption -is a linked list of items to be displayed as the caption of the +is the text to be displayed as the caption of the table, either above or below depending on whether .B caption_place is @@ -1293,10 +1257,9 @@ { Area* next; int shape; - Rune* href; - int target; Dimen* coords; int ncoords; + Anchor* anchor; }; .EE .PP @@ -1308,15 +1271,15 @@ .B SHcircle or .BR SHpoly . -.B Href -is the URL associated with this area in its role as -a hypertext link, and -.B target -is the target frame it should be loaded in. .B Coords is an array of coordinates for the shape, and .B ncoords is the size of this array (number of elements). +.B Anchor +is the URL associated with this area in its role as +a hypertext link, with +.B anchor->name +set to any alternative text. .SS Frames .PP If the diff -r dde79cf2874d sys/src/cmd/abaco/fns.h --- a/sys/src/cmd/abaco/fns.h Sat Jan 23 19:47:12 2021 -0800 +++ b/sys/src/cmd/abaco/fns.h Sun Jan 24 18:33:21 2021 +1300 @@ -59,7 +59,6 @@ void colarray(Image **, Image *, Image *, Image *, int); void rect3d(Image *, Rectangle, int, Image **, Point); void ellipse3d(Image *, Point, int, int, Image **, Point); -void reverseimages(Iimage **); void setstatus(Window *, char *, ...); int istextfield(Item *); int forceitem(Item *); @@ -69,7 +68,6 @@ int pipeline(int fd, char *cmd, ...); void getimage(Cimage *, Rune *); Point getpt(Page *p, Point); -Rune *urlcombine(Rune *, Rune *); void fixtext(Page *); void addrefresh(Page *, char *, ...); void flushrefresh(void); diff -r dde79cf2874d sys/src/cmd/abaco/html.c --- a/sys/src/cmd/abaco/html.c Sat Jan 23 19:47:12 2021 -0800 +++ b/sys/src/cmd/abaco/html.c Sun Jan 24 18:33:21 2021 +1300 @@ -282,7 +282,7 @@ draw(im, r, ci->i, nil, ci->i->r.min); if(i->border){ - if(i->anchorid >= 0) + if(i->anchor != nil) c = getcolor(p->doc->link); else c = display->black; @@ -459,27 +459,16 @@ mouselink(Box *b, Page *p, int but) { Runestr rs; - Anchor *a; /* eat mouse */ while(mousectl->buttons) readmouse(mousectl); - if(b->i->anchorid < 0) + if(b->i->anchor == nil || b->i->anchor->href == nil) return; - /* binary search would be better */ - for(a=p->doc->anchors; a!=nil; a=a->next) - if(a->index == b->i->anchorid) - break; - - if(a==nil || a->href==nil) - return; - - p = whichtarget(p, a->target); - rs.r = urlcombine(getbase(p), a->href); - if(rs.r == nil) - return; + p = whichtarget(p, b->i->anchor->target); + rs.r = runestrdup(b->i->anchor->href); rs.nr = runestrlen(rs.r); if(but == 1) @@ -521,7 +510,7 @@ x = y; } p = whichtarget(p, form->target); - y = urlcombine(getbase(p), form->action); + y = form->action; memset(&src, 0, sizeof(Runestr)); memset(&post, 0, sizeof(Runestr)); @@ -532,9 +521,8 @@ sep = L"?"; src.r = runesmprint("%S%S%S",y, sep, x); free(x); - free(y); }else{ - src.r = y; + src.r = runestrdup(y); post.r = x; post.nr = runestrlen(x); if(post.nr == 0){ @@ -684,7 +672,7 @@ void boxinit(Box *b) { - if(b->i->anchorid) + if(b->i->anchor != nil) b->mouse = mouselink; /* override mouselink for forms */ if(b->i->tag == Iformfieldtag){ diff -r dde79cf2874d sys/src/cmd/abaco/page.c --- a/sys/src/cmd/abaco/page.c Sat Jan 23 19:47:12 2021 -0800 +++ b/sys/src/cmd/abaco/page.c Sun Jan 24 18:33:21 2021 +1300 @@ -54,10 +54,9 @@ c->kidinfo = t; /* this check shouldn't be necessary, but... */ if(t->src){ - rs.r = urlcombine(p->url->act.r, t->src); + rs.r = t->src; rs.nr = runestrlen(rs.r); pageload1(c, urlalloc(&rs, nil, HGet), FALSE); - closerunestr(&rs); } } } @@ -205,23 +204,19 @@ { Cimage *ci; Iimage *i; - Rune *src; addrefresh(p, "loading images..."); - reverseimages(&p->doc->images); for(i=p->doc->images; i!=nil; i=i->nextimage){ if(p->aborting) break; - src = urlcombine(getbase(p), i->imsrc); - ci = findimg(src); + ci = findimg(i->imsrc); if(ci == nil){ - ci = loadimg(src, i->imwidth, i->imheight); + ci = loadimg(i->imsrc, i->imwidth, i->imheight); qlock(&cimagelock); ci->next = cimages; cimages = ci; qunlock(&cimagelock); } - free(src); incref(ci); i->aux = ci; p->cimage = erealloc(p->cimage, ++p->ncimage*sizeof(Cimage *)); @@ -752,8 +747,7 @@ void pagesetrefresh(Page *p) { - Runestr rs; - Rune *s, *q, *t; + Rune *s, *q; char *v; int n; @@ -765,8 +759,6 @@ if(q == nil) return; q++; - if(!q) - return; n = runestrlen(q); if(*q == L'''){ q++; @@ -774,12 +766,9 @@ } if(n <= 0) return; - t = runesmprint("%.*S", n, q); - rs.r = urlcombine(getbase(p), t); - rs.nr = runestrlen(rs.r); - copyrunestr(&p->refresh.rs, &rs); - closerunestr(&rs); - free(t); + /* broken for relative urls; don't think this ever worked anyway... */ + p->refresh.rs.nr = n; + p->refresh.rs.r = runesmprint("%.*S", n, q); /* now the time */ q = runestrchr(s, L';'); diff -r dde79cf2874d sys/src/cmd/abaco/tabs.c --- a/sys/src/cmd/abaco/tabs.c Sat Jan 23 19:47:12 2021 -0800 +++ b/sys/src/cmd/abaco/tabs.c Sun Jan 24 18:33:21 2021 +1300 @@ -64,14 +64,9 @@ settables(Page *p) { Table *t; - Item *i; if(p->doc==nil) return; - for(i=p->items; i!=nil; i=i->next) - if(i->tag == Itabletag) - ((Itable *)i)->table->flags |= Ttoplevel; - for(t=p->doc->tables; t!=nil; t=t->next) settable(t); } diff -r dde79cf2874d sys/src/cmd/abaco/urls.c --- a/sys/src/cmd/abaco/urls.c Sat Jan 23 19:47:12 2021 -0800 +++ b/sys/src/cmd/abaco/urls.c Sun Jan 24 18:33:21 2021 +1300 @@ -121,130 +121,3 @@ close(cfd); return fd; } - -void -urlcanon(Rune *name) -{ - Rune *s, *e, *tail, tailr; - Rune **comp, **p, **q; - int n; - - name = runestrstr(name, L"://"); - if(name == nil) - return; - name = runestrchr(name+3, '/'); - if(name == nil) - return; - if(*name == L'/') - name++; - - n = 0; - for(e = name; *e != 0; e++) - if(*e == L'/') - n++; - comp = emalloc((n+2)*sizeof *comp); - - /* - * Break the name into a list of components - */ - p = comp; - *p++ = name; - tail = nil; - tailr = L'☺'; /* silence compiler */ - for(s = name; *s != 0; s++){ - if(*s == '?' || *s == '#'){ - tail = s+1; - tailr = *s; - *s = 0; - break; - } - else if(*s == L'/'){ - *p++ = s+1; - *s = 0; - } - } - - /* - * go through the component list, deleting components that are empty (except - * the last component) or ., and any .. and its predecessor. - */ - for(p = q = comp; *p != nil; p++){ - if(runestrcmp(*p, L"") == 0 && p[1] != nil - || runestrcmp(*p, L".") == 0) - continue; - else if(q>comp && runestrcmp(*p, L"..") == 0 && runestrcmp(q[-1], L"..") != 0) - q--; - else - *q++ = *p; - } - *q = nil; - - /* - * rebuild the path name - */ - s = name; - for(p = comp; pnextimage; - c->nextimage = r; - r = c; - } - *head = r; -} - char urlexpr[] = "^(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|" "prospero)://[^/]+"; Reprog *urlprog; @@ -907,7 +893,7 @@ ntext = emalloc(sizeof(Itext)); ntext->s = s2; ntext->ascent = text->ascent; - ntext->anchorid = text->anchorid; + ntext->anchor = text->anchor; ntext->state = text->state&~(IFbrk|IFbrksp|IFnobrk|IFcleft|IFcright); ntext->tag = text->tag; ntext->fnt = text->fnt; diff -r dde79cf2874d sys/src/cmd/htmlfmt/html.c --- a/sys/src/cmd/htmlfmt/html.c Sat Jan 23 19:47:12 2021 -0800 +++ b/sys/src/cmd/htmlfmt/html.c Sun Jan 24 18:33:21 2021 +1300 @@ -7,11 +7,6 @@ #include #include "dat.h" -char urlexpr[] = - "^(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero)" - "://([a-zA-Z0-9_@\\-]+([.:][a-zA-Z0-9_@\\-]+)*)"; -Reprog *urlprog; - int inword = 0; int col = 0; int wordi = 0; @@ -40,19 +35,6 @@ return nil; } -char* -runetobyte(Rune *r, int n) -{ - char *s; - - if(n == 0) - return emalloc(1); - s = smprint("%.*S", n, r); - if(s == nil) - error("malloc failed"); - return s; -} - int closingpunct(char c) { @@ -129,73 +111,16 @@ free(r); } -char* -baseurl(char *url) -{ - char *base, *slash; - Resub rs[10]; - - if(url == nil) - return nil; - if(urlprog == nil){ - urlprog = regcomp(urlexpr); - if(urlprog == nil) - error("can't compile URL regexp"); - } - memset(rs, 0, sizeof rs); - if(regexec(urlprog, url, rs, nelem(rs)) == 0) - return nil; - base = estrdup(url); - slash = strrchr(base, '/'); - if(slash!=nil && slash>=&base[rs[0].ep-rs[0].sp]) - *slash = '\0'; - else - base[rs[0].ep-rs[0].sp] = '\0'; - return base; -} - -char* -fullurl(URLwin *u, Rune *rhref) -{ - char *base, *href, *hrefbase; - char *result; - - if(rhref == nil) - return estrdup("NULL URL"); - href = runetobyte(rhref, runestrlen(rhref)); - hrefbase = baseurl(href); - result = nil; - if(hrefbase==nil && (base = baseurl(u->url))!=nil){ - result = estrdup(base); - if(base[strlen(base)-1]!='/' && (href==nil || href[0]!='/')) - result = eappend(result, "/", ""); - free(base); - } - if(href){ - if(result) - result = eappend(result, "", href); - else - result = estrdup(href); - } - free(hrefbase); - if(result == nil) - return estrdup("***unknown***"); - return result; -} - void -render(URLwin *u, Bytes *t, Item *items, int curanchor) +render(URLwin *u, Bytes *t, Item *items, Anchor *curanchor) { Item *il; Itext *it; Ifloat *ifl; Ispacer *is; Itable *ita; - Iimage *im; - Anchor *a; Table *tab; Tablecell *cell; - char *href; inword = 0; col = 0; @@ -221,14 +146,8 @@ renderbytes(t, "=======\n"); break; case Iimagetag: - if(!aflag) - break; - im = (Iimage*)il; - if(im->imsrc){ - href = fullurl(u, im->imsrc); - renderbytes(t, "[image %s]", href); - free(href); - } + if(aflag) + renderbytes(t, "[image %S]", ((Iimage*)il)->imsrc); break; case Iformfieldtag: if(aflag) @@ -255,15 +174,10 @@ default: error("unknown item tag %d\n", il->tag); } - if(il->anchorid != 0 && il->anchorid!=curanchor){ - for(a=u->docinfo->anchors; a!=nil; a=a->next) - if(aflag && a->index == il->anchorid){ - href = fullurl(u, a->href); - renderbytes(t, "[%s]", href); - free(href); - break; - } - curanchor = il->anchorid; + if(aflag && il->anchor!=curanchor){ + if(curanchor != nil) + renderbytes(t, "[%S]", curanchor->href); + curanchor = il->anchor; } } if(t->n>0 && t->b[t->n-1]!='\n') diff -r dde79cf2874d sys/src/libhtml/build.c --- a/sys/src/libhtml/build.c Sat Jan 23 19:47:12 2021 -0800 +++ b/sys/src/libhtml/build.c Sun Jan 24 18:33:21 2021 +1300 @@ -27,7 +27,6 @@ int curvoff; // current baseline offset uchar curul; // current underline/strike state uchar curjust; // current justify state - int curanchor; // current (href) anchor id (if in one), or 0 int curstate; // current value of item state int literal; // current literal state int inpar; // true when in a paragraph-like construct @@ -52,11 +51,8 @@ Pstate* psstk; int nforms; int ntables; - int nanchors; int nframes; - Formfield* curfield; Form* curform; - Map* curmap; Table* tabstk; Kidinfo* kidstk; }; @@ -255,7 +251,6 @@ static Pstate* finishcell(Table* curtab, Pstate* psstk); static void finish_table(Table* t); static void freeanchor(Anchor* a); -static void freedestanchor(DestAnchor* da); static void freeform(Form* f); static void freeformfield(Formfield* ff); static void freeitem(Item* it); @@ -271,9 +266,6 @@ static Align makealign(int halign, int valign); static Background makebackground(Rune* imgurl, int color); static Dimen makedimen(int kind, int spec); -static Anchor* newanchor(int index, Rune* name, Rune* href, int target, Anchor* link); -static Area* newarea(int shape, Rune* href, int target, Area* link); -static DestAnchor* newdestanchor(int index, Rune* name, Item* item, DestAnchor* link); static Docinfo* newdocinfo(void); static Genattr* newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, SEvent* events); static Form* newform(int formid, Rune* name, Rune* action, @@ -357,18 +349,15 @@ is->psstk = ps; is->nforms = 0; is->ntables = 0; - is->nanchors = 0; is->nframes = 0; - is->curfield = nil; is->curform = nil; - is->curmap = nil; is->tabstk = nil; is->kidstk = nil; return is; } static void -linkitems(Docinfo *di, Item *it) +linkitems(Docinfo *di, Item *it, Iimage** imgtail) { Formfield *ff; Tablecell *c; @@ -378,8 +367,8 @@ switch(it->tag) { case Iimagetag: /* link image to docinfo */ - ((Iimage*)it)->nextimage = di->images; - di->images = (Iimage*)it; + *imgtail = (Iimage*)it; + imgtail = &(*imgtail)->nextimage; break; case Iformfieldtag: /* link formfield to form */ @@ -399,7 +388,7 @@ ff = ((Iformfield*)it)->formfield; ff->form->fields = ff; } - linkitems(di, ff->image); + linkitems(di, ff->image, imgtail); } break; case Itabletag: @@ -410,12 +399,11 @@ tt->tabletok = nil; tt->next = di->tables; di->tables = tt; - linkitems(di, tt->caption); for(c = tt->cells; c != nil; c = c->next) - linkitems(di, c->content); + linkitems(di, c->content, imgtail); break; case Ifloattag: - linkitems(di, ((Ifloat*)it)->item); + linkitems(di, ((Ifloat*)it)->item, imgtail); break; } Next: @@ -474,8 +462,6 @@ int sty; int nosh; int color; - int oldcuranchor; - int dfltbd; int v; int hang; int isempty; @@ -489,7 +475,6 @@ uchar ty; uchar ty2; Pstate* ps; - Pstate* nextps; Pstate* outerps; Table* curtab; Token* tok; @@ -521,12 +506,13 @@ Tablerow* tr; Formfield* field; Formfield* ff; - Rune* href; Rune* src; Rune* scriptsrc; Rune* bgurl; Rune* action; Background bg; + Anchor** atail; + Area *area; if(!buildinited) buildinit(); @@ -534,6 +520,8 @@ ps = is->psstk; curtab = is->tabstk; di = is->doc; + atail = &di->anchors; + map = nil; toks = _gettoks(data, datalen, di->chset, di->mediatype, &tokslen); toki = 0; for(; toki < tokslen; toki++) { @@ -628,36 +616,36 @@ // Anchors are not supposed to be nested, but you sometimes see // href anchors inside destination anchors. case Ta: - if(ps->curanchor != 0) { + if(*atail != nil) { if(warn) fprint(2, "warning: nested or missing \n"); - ps->curanchor = 0; - } - name = aval(tok, Aname); - href = aurlval(tok, Ahref, nil, di->base); - // ignore rel, rev, and title attrs - if(href != nil) { - target = atargval(tok, di->target); - di->anchors = newanchor(++is->nanchors, name, href, target, di->anchors); - if(name != nil) - name = _Strdup(name); // for DestAnchor construction, below - ps->curanchor = is->nanchors; - ps->curfg = push(&ps->fgstk, di->link); - ps->curul = push(&ps->ulstk, ULunder); - } - if(name != nil) { - // add a null item to be destination - additem(ps, newispacer(ISPnull), tok); - di->dests = newdestanchor(++is->nanchors, name, ps->lastit, di->dests); - } - break; - - case Ta+RBRA : - if(ps->curanchor != 0) { ps->curfg = popretnewtop(&ps->fgstk, di->text); ps->curul = popretnewtop(&ps->ulstk, ULnone); - ps->curanchor = 0; - } + additem(ps, newispacer(ISPnull), tok); + ps->lastit->anchor = nil; + atail = &(*atail)->next; + } + *atail = (Anchor*)emalloc(sizeof(Anchor)); + (*atail)->name = aval(tok, Aname); + (*atail)->href = aurlval(tok, Ahref, nil, di->base); + (*atail)->target = atargval(tok, di->target); + additem(ps, newispacer(ISPnull), tok); + ps->lastit->anchor = *atail; + ps->curfg = push(&ps->fgstk, di->link); + ps->curul = push(&ps->ulstk, ULunder); + break; + + case Ta+RBRA : + if(*atail == nil) { + if(warn) + fprint(2, "warning: unexpected \n"); + continue; + } + ps->curfg = popretnewtop(&ps->fgstk, di->text); + ps->curul = popretnewtop(&ps->ulstk, ULnone); + additem(ps, newispacer(ISPnull), tok); + ps->lastit->anchor = nil; + atail = &(*atail)->next; break; // @@ -671,17 +659,31 @@ // case Tarea: - map = di->maps; if(map == nil) { if(warn) fprint(2, "warning: not inside \n"); continue; } - map->areas = newarea(atabval(tok, Ashape, shape_tab, NSHAPETAB, SHrect), - aurlval(tok, Ahref, nil, di->base), - atargval(tok, di->target), - map->areas); - setdimarray(tok, Acoords, &map->areas->coords, &map->areas->ncoords); + area = (Area*)emalloc(sizeof(Area)); + area->next = map->areas; + map->areas = area; + area->shape = atabval(tok, Ashape, shape_tab, NSHAPETAB, SHrect); + setdimarray(tok, Acoords, &area->coords, &area->ncoords); + if(*atail != nil) { + if(warn) + fprint(2, "warning: inside \n"); + ps->curfg = popretnewtop(&ps->fgstk, di->text); + ps->curul = popretnewtop(&ps->ulstk, ULnone); + additem(ps, newispacer(ISPnull), tok); + ps->lastit->anchor = nil; + atail = &(*atail)->next; + } + area->anchor = (Anchor*)emalloc(sizeof(Anchor)); + area->anchor->name = aval(tok, Aalt); + area->anchor->href = aurlval(tok, Ahref, nil, di->base); + area->anchor->target = atargval(tok, di->target); + *atail = area->anchor; + atail = &(*atail)->next; break; // @@ -789,25 +791,12 @@ fprint(2, "warning: more than one in \n"); continue; } - ps = newpstate(ps); + curtab->caption = getpcdata(toks, tokslen, &toki); + if(warn && toki < tokslen - 1 && toks[toki + 1].tag != Tcaption + RBRA) + fprint(2, "warning: \n"); - continue; - } - if(curtab->caption != nil) - freeitems(curtab->caption); - curtab->caption = ps->items->next; - ps->items->next = nil; - freepstate(ps); - ps = nextps; - break; - case Tcenter: case Tdiv: if(tag == Tcenter) @@ -1092,7 +1081,6 @@ // case Timg: map = nil; - oldcuranchor = ps->curanchor; if(_tokaval(tok, Ausemap, &usemap, 0)) { if(!_prefix(L"#", usemap)) { if(warn) @@ -1100,21 +1088,13 @@ } else { map = getmap(di, usemap+1); - if(ps->curanchor == 0) { - di->anchors = newanchor(++is->nanchors, nil, nil, di->target, di->anchors); - ps->curanchor = is->nanchors; - } } } align = atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom); - dfltbd = 0; - if(ps->curanchor != 0) - dfltbd = 2; src = aurlval(tok, Asrc, nil, di->base); if(src == nil) { if(warn) fprint(2, "warning: has no src attribute\n"); - ps->curanchor = oldcuranchor; continue; } img = newiimage(src, @@ -1124,7 +1104,7 @@ auintval(tok, Aheight, 0), auintval(tok, Ahspace, IMGHSPACE), auintval(tok, Avspace, IMGVSPACE), - auintval(tok, Aborder, dfltbd), + auintval(tok, Aborder, ps->lastit->anchor == nil && usemap == nil ? 0 : 2), aflagval(tok, Aismap), map); if(align == ALleft || align == ALright) { @@ -1137,7 +1117,6 @@ ps->skipwhite = 0; additem(ps, img, tok); } - ps->curanchor = oldcuranchor; break; // @@ -1148,7 +1127,7 @@ fprint(2, " not inside \n"); continue; } - field = newformfield( + field = is->curform->fields = newformfield( atabval(tok, Atype, input_tab, NINPUTTAB, Ftext), ++is->curform->nfields, is->curform, @@ -1156,7 +1135,7 @@ aval(tok, Avalue), auintval(tok, Asize, 0), auintval(tok, Amaxlength, 1000), - nil); + is->curform->fields); if(aflagval(tok, Achecked)) field->flags = FFchecked; @@ -1239,14 +1218,14 @@ HGet, di->forms); di->forms = frm; - ff = newformfield(Ftext, + frm->fields = ff = newformfield(Ftext, ++frm->nfields, frm, _Strdup(L"_ISINDEX_"), nil, 50, 1000, - nil); + frm->fields); additem(ps, newiformfield(ff), tok); addbrk(ps, 1, 0); break; @@ -1279,17 +1258,16 @@ // case Tmap: if(_tokaval(tok, Aname, &name, 0)) - is->curmap = getmap(di, name); + map = getmap(di, name); break; case Tmap+RBRA: - map = is->curmap; if(map == nil) { if(warn) fprint(2, "warning: unexpected \n"); continue; } - map->areas = (Area*)_revlist((List*)map->areas); + map = nil; break; case Tmeta: @@ -1443,14 +1421,14 @@ fprint(2, "\n"); continue; } - field = is->curfield; + field = is->curform->fields; if(field->ftype != Fselect) continue; // put options back in input order field->options = (Option*)_revlist((List*)field->options); - is->curfield = nil; break; // @@ -1676,14 +1653,14 @@ fprint(2, "
data ended by %T\n", &toks[toki + 1]); curtab->caption_place = atabval(tok, Aalign, align_tab, NALIGNTAB, ALtop); break; - case Tcaption+RBRA: - nextps = ps->next; - if(curtab == nil || nextps == nil) { - if(warn) - fprint(2, "warning: unexpected