* Re: [9front] htmlfmt anchor corner cases
2021-01-20 3:17 ` umbraticus
@ 2021-01-24 5:46 ` umbraticus
2021-01-24 23:51 ` ori
` (2 more replies)
0 siblings, 3 replies; 17+ messages in thread
From: umbraticus @ 2021-01-24 5:46 UTC (permalink / raw)
To: 9front
[-- Attachment #1: Type: text/plain, Size: 1118 bytes --]
Thanks for having a look and for your comments, Ori.
First attachment: another whack at libhtml.
• full href/src urls where possible
• point directly to Item's Anchor, rather than have to look up by id
• no more AnchorDest: just use Anchor
• add anchor to Area struct, link to master anchor list
• sort anchor/image/form/table/map lists into order they appear in doc
• better table caption and drop-down form handling
• update documentation & programs using lib (htmlfmt & abaco)
More tinkering could be done… though not sure what the point is
for just two programs, one of which throws most stuff away. Any
interest in an updated (html5) and better-integrated libhtml that
could be used by other programs that currently roll their own
parser (mothra, html2ms, maybe even the netsurf port)?
Second attachment: suggested htmlfmt replacement.
• print document title
• # h1 # & ## h2 ## headings
• *italics*, **bold**, and ~~strikethrough~~
• footnotes for hyperlink¹ and {image}² urls
• better table and form presentation
• print frame info (not iframes...) and image maps
umbraticus
[-- Attachment #2: libhtml.diff --]
[-- Type: text/plain, Size: 36013 bytes --]
diff -r dde79cf2874d sys/include/html.h
--- a/sys/include/html.h Sat Jan 23 19:47:12 2021 -0800
+++ b/sys/include/html.h Sun Jan 24 18:33:21 2021 +1300
@@ -103,7 +103,6 @@
typedef struct Align Align;
typedef struct Dimen Dimen;
typedef struct Anchor Anchor;
-typedef struct DestAnchor DestAnchor;
typedef struct Map Map;
typedef struct Area Area;
typedef struct Background Background;
@@ -174,7 +173,7 @@
int width; /* width in pixels (0 for floating items) */
int height; /* height in pixels */
int ascent; /* ascent (from top to baseline) in pixels */
- int anchorid; /* if nonzero, which anchor we're in */
+ Anchor* anchor; /* if non-nil, which anchor we're in */
int state; /* flags and values (see below) */
Genattr*genattr; /* generic attributes and events */
int tag; /* variant discriminator: Itexttag, etc. */
@@ -405,7 +404,7 @@
int cellspacing; /* cellspacing attr */
int cellpadding; /* cellpadding attr */
Background background; /* table background */
- Item* caption; /* linked list of Items, giving caption */
+ Rune* caption; /* caption text */
uchar caption_place; /* ALtop or ALbottom */
Lay* caption_lay; /* layout of caption */
int totw; /* total width */
@@ -466,27 +465,15 @@
Point pos; /* nw corner of cell contents, in cell */
};
-/* Anchor is for info about hyperlinks that go somewhere */
struct Anchor
{
Anchor* next; /* next in list of document's anchors */
- int index; /* serial no. of anchor within its doc */
Rune* name; /* name attr */
Rune* href; /* href attr */
int target; /* target attr as targetid */
};
-/* DestAnchor is for info about hyperlinks that are destinations */
-struct DestAnchor
-{
- DestAnchor*next; /* next in list of document's destanchors */
- int index; /* serial no. of anchor within its doc */
- Rune* name; /* name attr */
- Item* item; /* the destination */
-};
-
-
/* Maps (client side) */
struct Map
{
@@ -500,10 +487,9 @@
{
Area* next; /* next in list of a map's areas */
int shape; /* SHrect, etc. */
- Rune* href; /* associated hypertext link */
- int target; /* associated target frame */
Dimen* coords; /* array of coords for shape */
int ncoords; /* size of coords array */
+ Anchor* anchor; /* associated hypertext link */
};
/* Area shapes */
@@ -600,7 +586,6 @@
/* info needed to respond to user actions */
Anchor* anchors; /* list of href anchors */
- DestAnchor*dests; /* list of destination anchors */
Form* forms; /* list of forms */
Table* tables; /* list of tables */
Map* maps; /* list of maps */
diff -r dde79cf2874d sys/man/2/html
--- a/sys/man/2/html Sat Jan 23 19:47:12 2021 -0800
+++ b/sys/man/2/html Sun Jan 24 18:33:21 2021 +1300
@@ -194,7 +194,7 @@
int width;
int height;
int ascent;
- int anchorid;
+ Anchor* anchor;
int state;
Genattr* genattr;
int tag;
@@ -209,8 +209,8 @@
and
.B ascent
are intended for use by the caller as part of the layout process.
-.BR Anchorid ,
-if non-zero, gives the integer id assigned by the parser to the anchor that
+.BR Anchor ,
+if non-nil, points to the anchor that
this item is in (see section
.IR Anchors ).
.B State
@@ -713,7 +713,7 @@
Global information about an HTML page is stored in the following structure:
.PP
.EX
-.ta 6n +\w'DestAnchor* 'u
+.ta 6n +\w'Background 'u
typedef struct Docinfo Docinfo;
struct Docinfo
{
@@ -738,7 +738,6 @@
// info needed to respond to user actions
Anchor* anchors;
- DestAnchor* dests;
Form* forms;
Table* tables;
Map* maps;
@@ -797,28 +796,26 @@
.B Frameid
is this document's frame id.
.PP
-.B Anchors
-is a list of hyperlinks contained in the document,
-and
-.B dests
-is a list of hyperlink destinations within the page (see the following section for details).
-.BR Forms ,
+.BR Anchors ,
+.BR forms ,
.B tables
and
.B maps
-are lists of the various forms, tables and client-side maps contained
+are lists of the various hyperlinks, forms, tables and client-side maps contained
in the document, as described in subsequent sections.
.B Images
is a list of all the image items in the document.
.SS Anchors
.PP
-The library builds two lists for all of the
+The library builds a list of all
.B <a>
elements (anchors) in a document.
-Each anchor is assigned a unique anchor id within the document.
-For anchors which are hyperlinks (the
-.B href
-attribute was supplied), the following structure is defined:
+The
+.B anchor
+field of an
+.B Item
+structure points to one of these if the item is inside an anchor.
+The elements of this list are as follows:
.PP
.EX
.ta 6n +\w'Anchor* 'u
@@ -826,7 +823,6 @@
struct Anchor
{
Anchor* next;
- int index;
Rune* name;
Rune* href;
int target;
@@ -836,11 +832,6 @@
.B Next
points to the next anchor in the list (the head of this list is
.BR Docinfo.anchors ).
-.B Index
-is the anchor id; each item within this hyperlink is tagged with this value
-in its
-.B anchorid
-field.
.B Name
and
.B href
@@ -848,33 +839,6 @@
(in particular, href is the URL to go to).
.B Target
is the value of the target attribute (if provided) converted to a frame id.
-.PP
-Destinations within the document (anchors with the name attribute set)
-are held in the
-.B Docinfo.dests
-list, using the following structure:
-.PP
-.EX
-.ta 6n +\w'DestAnchor* 'u
-typedef struct DestAnchor DestAnchor;
-struct DestAnchor
-{
- DestAnchor* next;
- int index;
- Rune* name;
- Item* item;
-};
-.EE
-.PP
-.B Next
-is the next element of the list,
-.B index
-is the anchor id,
-.B name
-is the value of the name attribute, and
-.B item
-is points to the item within the parsed document that should be considered
-to be the destination.
.SS Forms
.PP
Any forms within a document are kept in a list, headed by
@@ -936,7 +900,7 @@
int cols;
uchar flags;
Option* options;
- Item* image;
+ Iimage* image;
int ctlid;
SEvent* events;
};
@@ -1041,7 +1005,7 @@
int cellspacing;
int cellpadding;
Background background;
- Item* caption;
+ Rune* caption;
uchar caption_place;
Lay* caption_lay;
int totw;
@@ -1102,7 +1066,7 @@
.B background
gives the requested background for the table.
.B Caption
-is a linked list of items to be displayed as the caption of the
+is the text to be displayed as the caption of the
table, either above or below depending on whether
.B caption_place
is
@@ -1293,10 +1257,9 @@
{
Area* next;
int shape;
- Rune* href;
- int target;
Dimen* coords;
int ncoords;
+ Anchor* anchor;
};
.EE
.PP
@@ -1308,15 +1271,15 @@
.B SHcircle
or
.BR SHpoly .
-.B Href
-is the URL associated with this area in its role as
-a hypertext link, and
-.B target
-is the target frame it should be loaded in.
.B Coords
is an array of coordinates for the shape, and
.B ncoords
is the size of this array (number of elements).
+.B Anchor
+is the URL associated with this area in its role as
+a hypertext link, with
+.B anchor->name
+set to any alternative text.
.SS Frames
.PP
If the
diff -r dde79cf2874d sys/src/cmd/abaco/fns.h
--- a/sys/src/cmd/abaco/fns.h Sat Jan 23 19:47:12 2021 -0800
+++ b/sys/src/cmd/abaco/fns.h Sun Jan 24 18:33:21 2021 +1300
@@ -59,7 +59,6 @@
void colarray(Image **, Image *, Image *, Image *, int);
void rect3d(Image *, Rectangle, int, Image **, Point);
void ellipse3d(Image *, Point, int, int, Image **, Point);
-void reverseimages(Iimage **);
void setstatus(Window *, char *, ...);
int istextfield(Item *);
int forceitem(Item *);
@@ -69,7 +68,6 @@
int pipeline(int fd, char *cmd, ...);
void getimage(Cimage *, Rune *);
Point getpt(Page *p, Point);
-Rune *urlcombine(Rune *, Rune *);
void fixtext(Page *);
void addrefresh(Page *, char *, ...);
void flushrefresh(void);
diff -r dde79cf2874d sys/src/cmd/abaco/html.c
--- a/sys/src/cmd/abaco/html.c Sat Jan 23 19:47:12 2021 -0800
+++ b/sys/src/cmd/abaco/html.c Sun Jan 24 18:33:21 2021 +1300
@@ -282,7 +282,7 @@
draw(im, r, ci->i, nil, ci->i->r.min);
if(i->border){
- if(i->anchorid >= 0)
+ if(i->anchor != nil)
c = getcolor(p->doc->link);
else
c = display->black;
@@ -459,27 +459,16 @@
mouselink(Box *b, Page *p, int but)
{
Runestr rs;
- Anchor *a;
/* eat mouse */
while(mousectl->buttons)
readmouse(mousectl);
- if(b->i->anchorid < 0)
+ if(b->i->anchor == nil || b->i->anchor->href == nil)
return;
- /* binary search would be better */
- for(a=p->doc->anchors; a!=nil; a=a->next)
- if(a->index == b->i->anchorid)
- break;
-
- if(a==nil || a->href==nil)
- return;
-
- p = whichtarget(p, a->target);
- rs.r = urlcombine(getbase(p), a->href);
- if(rs.r == nil)
- return;
+ p = whichtarget(p, b->i->anchor->target);
+ rs.r = runestrdup(b->i->anchor->href);
rs.nr = runestrlen(rs.r);
if(but == 1)
@@ -521,7 +510,7 @@
x = y;
}
p = whichtarget(p, form->target);
- y = urlcombine(getbase(p), form->action);
+ y = form->action;
memset(&src, 0, sizeof(Runestr));
memset(&post, 0, sizeof(Runestr));
@@ -532,9 +521,8 @@
sep = L"?";
src.r = runesmprint("%S%S%S",y, sep, x);
free(x);
- free(y);
}else{
- src.r = y;
+ src.r = runestrdup(y);
post.r = x;
post.nr = runestrlen(x);
if(post.nr == 0){
@@ -684,7 +672,7 @@
void
boxinit(Box *b)
{
- if(b->i->anchorid)
+ if(b->i->anchor != nil)
b->mouse = mouselink;
/* override mouselink for forms */
if(b->i->tag == Iformfieldtag){
diff -r dde79cf2874d sys/src/cmd/abaco/page.c
--- a/sys/src/cmd/abaco/page.c Sat Jan 23 19:47:12 2021 -0800
+++ b/sys/src/cmd/abaco/page.c Sun Jan 24 18:33:21 2021 +1300
@@ -54,10 +54,9 @@
c->kidinfo = t;
/* this check shouldn't be necessary, but... */
if(t->src){
- rs.r = urlcombine(p->url->act.r, t->src);
+ rs.r = t->src;
rs.nr = runestrlen(rs.r);
pageload1(c, urlalloc(&rs, nil, HGet), FALSE);
- closerunestr(&rs);
}
}
}
@@ -205,23 +204,19 @@
{
Cimage *ci;
Iimage *i;
- Rune *src;
addrefresh(p, "loading images...");
- reverseimages(&p->doc->images);
for(i=p->doc->images; i!=nil; i=i->nextimage){
if(p->aborting)
break;
- src = urlcombine(getbase(p), i->imsrc);
- ci = findimg(src);
+ ci = findimg(i->imsrc);
if(ci == nil){
- ci = loadimg(src, i->imwidth, i->imheight);
+ ci = loadimg(i->imsrc, i->imwidth, i->imheight);
qlock(&cimagelock);
ci->next = cimages;
cimages = ci;
qunlock(&cimagelock);
}
- free(src);
incref(ci);
i->aux = ci;
p->cimage = erealloc(p->cimage, ++p->ncimage*sizeof(Cimage *));
@@ -752,8 +747,7 @@
void
pagesetrefresh(Page *p)
{
- Runestr rs;
- Rune *s, *q, *t;
+ Rune *s, *q;
char *v;
int n;
@@ -765,8 +759,6 @@
if(q == nil)
return;
q++;
- if(!q)
- return;
n = runestrlen(q);
if(*q == L'''){
q++;
@@ -774,12 +766,9 @@
}
if(n <= 0)
return;
- t = runesmprint("%.*S", n, q);
- rs.r = urlcombine(getbase(p), t);
- rs.nr = runestrlen(rs.r);
- copyrunestr(&p->refresh.rs, &rs);
- closerunestr(&rs);
- free(t);
+ /* broken for relative urls; don't think this ever worked anyway... */
+ p->refresh.rs.nr = n;
+ p->refresh.rs.r = runesmprint("%.*S", n, q);
/* now the time */
q = runestrchr(s, L';');
diff -r dde79cf2874d sys/src/cmd/abaco/tabs.c
--- a/sys/src/cmd/abaco/tabs.c Sat Jan 23 19:47:12 2021 -0800
+++ b/sys/src/cmd/abaco/tabs.c Sun Jan 24 18:33:21 2021 +1300
@@ -64,14 +64,9 @@
settables(Page *p)
{
Table *t;
- Item *i;
if(p->doc==nil)
return;
- for(i=p->items; i!=nil; i=i->next)
- if(i->tag == Itabletag)
- ((Itable *)i)->table->flags |= Ttoplevel;
-
for(t=p->doc->tables; t!=nil; t=t->next)
settable(t);
}
diff -r dde79cf2874d sys/src/cmd/abaco/urls.c
--- a/sys/src/cmd/abaco/urls.c Sat Jan 23 19:47:12 2021 -0800
+++ b/sys/src/cmd/abaco/urls.c Sun Jan 24 18:33:21 2021 +1300
@@ -121,130 +121,3 @@
close(cfd);
return fd;
}
-
-void
-urlcanon(Rune *name)
-{
- Rune *s, *e, *tail, tailr;
- Rune **comp, **p, **q;
- int n;
-
- name = runestrstr(name, L"://");
- if(name == nil)
- return;
- name = runestrchr(name+3, '/');
- if(name == nil)
- return;
- if(*name == L'/')
- name++;
-
- n = 0;
- for(e = name; *e != 0; e++)
- if(*e == L'/')
- n++;
- comp = emalloc((n+2)*sizeof *comp);
-
- /*
- * Break the name into a list of components
- */
- p = comp;
- *p++ = name;
- tail = nil;
- tailr = L'☺'; /* silence compiler */
- for(s = name; *s != 0; s++){
- if(*s == '?' || *s == '#'){
- tail = s+1;
- tailr = *s;
- *s = 0;
- break;
- }
- else if(*s == L'/'){
- *p++ = s+1;
- *s = 0;
- }
- }
-
- /*
- * go through the component list, deleting components that are empty (except
- * the last component) or ., and any .. and its predecessor.
- */
- for(p = q = comp; *p != nil; p++){
- if(runestrcmp(*p, L"") == 0 && p[1] != nil
- || runestrcmp(*p, L".") == 0)
- continue;
- else if(q>comp && runestrcmp(*p, L"..") == 0 && runestrcmp(q[-1], L"..") != 0)
- q--;
- else
- *q++ = *p;
- }
- *q = nil;
-
- /*
- * rebuild the path name
- */
- s = name;
- for(p = comp; p<q; p++){
- n = runestrlen(*p);
- memmove(s, *p, sizeof(Rune)*n);
- s += n;
- if(p[1] != nil)
- *s++ = '/';
- }
- *s = 0;
- if(tail)
- runeseprint(s, e+1, "%C%S", tailr, tail);
- free(comp);
-}
-
-/* this is a HACK */
-Rune*
-urlcombine(Rune *b, Rune *u)
-{
- Rune *p, *q, *sep, *s;
- Rune endrune[] = { L'?', L'#' };
- int i, restore;
-
- if(u == nil)
- error("urlcombine: u == nil");
-
- if(validurl(u))
- return erunestrdup(u);
-
- if(b==nil || !validurl(b))
- error("urlcombine: b==nil || !validurl(b)");
-
- if(runestrncmp(u, L"//", 2) == 0){
- q = runestrchr(b, L':');
- return runesmprint("%.*S:%S", (int)(q-b), b, u);
- }
- p = runestrstr(b, L"://");
- if(p != nil)
- p += 3;
- sep = L"";
- q = nil;
- if(*u ==L'/')
- q = runestrchr(p, L'/');
- else if(*u==L'#' || *u==L'?'){
- for(i=0; i<nelem(endrune); i++)
- if(q = runestrchr(p, endrune[i]))
- break;
- }else if(p != nil){
- sep = L"/";
- restore = 0;
- s = runestrchr(p, L'?');
- if(s != nil){
- *s = '\0';
- restore = 1;
- }
- q = runestrrchr(p, L'/');
- if(restore)
- *s = L'?';
- }else
- sep = L"/";
- if(q == nil)
- p = runesmprint("%S%S%S", b, sep, u);
- else
- p = runesmprint("%.*S%S%S", (int)(q-b), b, sep, u);
- urlcanon(p);
- return p;
-}
diff -r dde79cf2874d sys/src/cmd/abaco/util.c
--- a/sys/src/cmd/abaco/util.c Sat Jan 23 19:47:12 2021 -0800
+++ b/sys/src/cmd/abaco/util.c Sun Jan 24 18:33:21 2021 +1300
@@ -624,20 +624,6 @@
return u;
}
-void
-reverseimages(Iimage **head)
-{
- Iimage *r, *c, *n;
-
- r = nil;
- for(c=*head; c!=nil; c=n){
- n = c->nextimage;
- c->nextimage = r;
- r = c;
- }
- *head = r;
-}
-
char urlexpr[] = "^(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|"
"prospero)://[^/]+";
Reprog *urlprog;
@@ -907,7 +893,7 @@
ntext = emalloc(sizeof(Itext));
ntext->s = s2;
ntext->ascent = text->ascent;
- ntext->anchorid = text->anchorid;
+ ntext->anchor = text->anchor;
ntext->state = text->state&~(IFbrk|IFbrksp|IFnobrk|IFcleft|IFcright);
ntext->tag = text->tag;
ntext->fnt = text->fnt;
diff -r dde79cf2874d sys/src/cmd/htmlfmt/html.c
--- a/sys/src/cmd/htmlfmt/html.c Sat Jan 23 19:47:12 2021 -0800
+++ b/sys/src/cmd/htmlfmt/html.c Sun Jan 24 18:33:21 2021 +1300
@@ -7,11 +7,6 @@
#include <ctype.h>
#include "dat.h"
-char urlexpr[] =
- "^(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero)"
- "://([a-zA-Z0-9_@\\-]+([.:][a-zA-Z0-9_@\\-]+)*)";
-Reprog *urlprog;
-
int inword = 0;
int col = 0;
int wordi = 0;
@@ -40,19 +35,6 @@
return nil;
}
-char*
-runetobyte(Rune *r, int n)
-{
- char *s;
-
- if(n == 0)
- return emalloc(1);
- s = smprint("%.*S", n, r);
- if(s == nil)
- error("malloc failed");
- return s;
-}
-
int
closingpunct(char c)
{
@@ -129,73 +111,16 @@
free(r);
}
-char*
-baseurl(char *url)
-{
- char *base, *slash;
- Resub rs[10];
-
- if(url == nil)
- return nil;
- if(urlprog == nil){
- urlprog = regcomp(urlexpr);
- if(urlprog == nil)
- error("can't compile URL regexp");
- }
- memset(rs, 0, sizeof rs);
- if(regexec(urlprog, url, rs, nelem(rs)) == 0)
- return nil;
- base = estrdup(url);
- slash = strrchr(base, '/');
- if(slash!=nil && slash>=&base[rs[0].ep-rs[0].sp])
- *slash = '\0';
- else
- base[rs[0].ep-rs[0].sp] = '\0';
- return base;
-}
-
-char*
-fullurl(URLwin *u, Rune *rhref)
-{
- char *base, *href, *hrefbase;
- char *result;
-
- if(rhref == nil)
- return estrdup("NULL URL");
- href = runetobyte(rhref, runestrlen(rhref));
- hrefbase = baseurl(href);
- result = nil;
- if(hrefbase==nil && (base = baseurl(u->url))!=nil){
- result = estrdup(base);
- if(base[strlen(base)-1]!='/' && (href==nil || href[0]!='/'))
- result = eappend(result, "/", "");
- free(base);
- }
- if(href){
- if(result)
- result = eappend(result, "", href);
- else
- result = estrdup(href);
- }
- free(hrefbase);
- if(result == nil)
- return estrdup("***unknown***");
- return result;
-}
-
void
-render(URLwin *u, Bytes *t, Item *items, int curanchor)
+render(URLwin *u, Bytes *t, Item *items, Anchor *curanchor)
{
Item *il;
Itext *it;
Ifloat *ifl;
Ispacer *is;
Itable *ita;
- Iimage *im;
- Anchor *a;
Table *tab;
Tablecell *cell;
- char *href;
inword = 0;
col = 0;
@@ -221,14 +146,8 @@
renderbytes(t, "=======\n");
break;
case Iimagetag:
- if(!aflag)
- break;
- im = (Iimage*)il;
- if(im->imsrc){
- href = fullurl(u, im->imsrc);
- renderbytes(t, "[image %s]", href);
- free(href);
- }
+ if(aflag)
+ renderbytes(t, "[image %S]", ((Iimage*)il)->imsrc);
break;
case Iformfieldtag:
if(aflag)
@@ -255,15 +174,10 @@
default:
error("unknown item tag %d\n", il->tag);
}
- if(il->anchorid != 0 && il->anchorid!=curanchor){
- for(a=u->docinfo->anchors; a!=nil; a=a->next)
- if(aflag && a->index == il->anchorid){
- href = fullurl(u, a->href);
- renderbytes(t, "[%s]", href);
- free(href);
- break;
- }
- curanchor = il->anchorid;
+ if(aflag && il->anchor!=curanchor){
+ if(curanchor != nil)
+ renderbytes(t, "[%S]", curanchor->href);
+ curanchor = il->anchor;
}
}
if(t->n>0 && t->b[t->n-1]!='\n')
diff -r dde79cf2874d sys/src/libhtml/build.c
--- a/sys/src/libhtml/build.c Sat Jan 23 19:47:12 2021 -0800
+++ b/sys/src/libhtml/build.c Sun Jan 24 18:33:21 2021 +1300
@@ -27,7 +27,6 @@
int curvoff; // current baseline offset
uchar curul; // current underline/strike state
uchar curjust; // current justify state
- int curanchor; // current (href) anchor id (if in one), or 0
int curstate; // current value of item state
int literal; // current literal state
int inpar; // true when in a paragraph-like construct
@@ -52,11 +51,8 @@
Pstate* psstk;
int nforms;
int ntables;
- int nanchors;
int nframes;
- Formfield* curfield;
Form* curform;
- Map* curmap;
Table* tabstk;
Kidinfo* kidstk;
};
@@ -255,7 +251,6 @@
static Pstate* finishcell(Table* curtab, Pstate* psstk);
static void finish_table(Table* t);
static void freeanchor(Anchor* a);
-static void freedestanchor(DestAnchor* da);
static void freeform(Form* f);
static void freeformfield(Formfield* ff);
static void freeitem(Item* it);
@@ -271,9 +266,6 @@
static Align makealign(int halign, int valign);
static Background makebackground(Rune* imgurl, int color);
static Dimen makedimen(int kind, int spec);
-static Anchor* newanchor(int index, Rune* name, Rune* href, int target, Anchor* link);
-static Area* newarea(int shape, Rune* href, int target, Area* link);
-static DestAnchor* newdestanchor(int index, Rune* name, Item* item, DestAnchor* link);
static Docinfo* newdocinfo(void);
static Genattr* newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, SEvent* events);
static Form* newform(int formid, Rune* name, Rune* action,
@@ -357,18 +349,15 @@
is->psstk = ps;
is->nforms = 0;
is->ntables = 0;
- is->nanchors = 0;
is->nframes = 0;
- is->curfield = nil;
is->curform = nil;
- is->curmap = nil;
is->tabstk = nil;
is->kidstk = nil;
return is;
}
static void
-linkitems(Docinfo *di, Item *it)
+linkitems(Docinfo *di, Item *it, Iimage** imgtail)
{
Formfield *ff;
Tablecell *c;
@@ -378,8 +367,8 @@
switch(it->tag) {
case Iimagetag:
/* link image to docinfo */
- ((Iimage*)it)->nextimage = di->images;
- di->images = (Iimage*)it;
+ *imgtail = (Iimage*)it;
+ imgtail = &(*imgtail)->nextimage;
break;
case Iformfieldtag:
/* link formfield to form */
@@ -399,7 +388,7 @@
ff = ((Iformfield*)it)->formfield;
ff->form->fields = ff;
}
- linkitems(di, ff->image);
+ linkitems(di, ff->image, imgtail);
}
break;
case Itabletag:
@@ -410,12 +399,11 @@
tt->tabletok = nil;
tt->next = di->tables;
di->tables = tt;
- linkitems(di, tt->caption);
for(c = tt->cells; c != nil; c = c->next)
- linkitems(di, c->content);
+ linkitems(di, c->content, imgtail);
break;
case Ifloattag:
- linkitems(di, ((Ifloat*)it)->item);
+ linkitems(di, ((Ifloat*)it)->item, imgtail);
break;
}
Next:
@@ -474,8 +462,6 @@
int sty;
int nosh;
int color;
- int oldcuranchor;
- int dfltbd;
int v;
int hang;
int isempty;
@@ -489,7 +475,6 @@
uchar ty;
uchar ty2;
Pstate* ps;
- Pstate* nextps;
Pstate* outerps;
Table* curtab;
Token* tok;
@@ -521,12 +506,13 @@
Tablerow* tr;
Formfield* field;
Formfield* ff;
- Rune* href;
Rune* src;
Rune* scriptsrc;
Rune* bgurl;
Rune* action;
Background bg;
+ Anchor** atail;
+ Area *area;
if(!buildinited)
buildinit();
@@ -534,6 +520,8 @@
ps = is->psstk;
curtab = is->tabstk;
di = is->doc;
+ atail = &di->anchors;
+ map = nil;
toks = _gettoks(data, datalen, di->chset, di->mediatype, &tokslen);
toki = 0;
for(; toki < tokslen; toki++) {
@@ -628,36 +616,36 @@
// Anchors are not supposed to be nested, but you sometimes see
// href anchors inside destination anchors.
case Ta:
- if(ps->curanchor != 0) {
+ if(*atail != nil) {
if(warn)
fprint(2, "warning: nested <A> or missing </A>\n");
- ps->curanchor = 0;
- }
- name = aval(tok, Aname);
- href = aurlval(tok, Ahref, nil, di->base);
- // ignore rel, rev, and title attrs
- if(href != nil) {
- target = atargval(tok, di->target);
- di->anchors = newanchor(++is->nanchors, name, href, target, di->anchors);
- if(name != nil)
- name = _Strdup(name); // for DestAnchor construction, below
- ps->curanchor = is->nanchors;
- ps->curfg = push(&ps->fgstk, di->link);
- ps->curul = push(&ps->ulstk, ULunder);
- }
- if(name != nil) {
- // add a null item to be destination
- additem(ps, newispacer(ISPnull), tok);
- di->dests = newdestanchor(++is->nanchors, name, ps->lastit, di->dests);
- }
- break;
-
- case Ta+RBRA :
- if(ps->curanchor != 0) {
ps->curfg = popretnewtop(&ps->fgstk, di->text);
ps->curul = popretnewtop(&ps->ulstk, ULnone);
- ps->curanchor = 0;
- }
+ additem(ps, newispacer(ISPnull), tok);
+ ps->lastit->anchor = nil;
+ atail = &(*atail)->next;
+ }
+ *atail = (Anchor*)emalloc(sizeof(Anchor));
+ (*atail)->name = aval(tok, Aname);
+ (*atail)->href = aurlval(tok, Ahref, nil, di->base);
+ (*atail)->target = atargval(tok, di->target);
+ additem(ps, newispacer(ISPnull), tok);
+ ps->lastit->anchor = *atail;
+ ps->curfg = push(&ps->fgstk, di->link);
+ ps->curul = push(&ps->ulstk, ULunder);
+ break;
+
+ case Ta+RBRA :
+ if(*atail == nil) {
+ if(warn)
+ fprint(2, "warning: unexpected </A>\n");
+ continue;
+ }
+ ps->curfg = popretnewtop(&ps->fgstk, di->text);
+ ps->curul = popretnewtop(&ps->ulstk, ULnone);
+ additem(ps, newispacer(ISPnull), tok);
+ ps->lastit->anchor = nil;
+ atail = &(*atail)->next;
break;
// <!ELEMENT APPLET - - (PARAM | %text)* >
@@ -671,17 +659,31 @@
// <!ELEMENT AREA - O EMPTY>
case Tarea:
- map = di->maps;
if(map == nil) {
if(warn)
fprint(2, "warning: <AREA> not inside <MAP>\n");
continue;
}
- map->areas = newarea(atabval(tok, Ashape, shape_tab, NSHAPETAB, SHrect),
- aurlval(tok, Ahref, nil, di->base),
- atargval(tok, di->target),
- map->areas);
- setdimarray(tok, Acoords, &map->areas->coords, &map->areas->ncoords);
+ area = (Area*)emalloc(sizeof(Area));
+ area->next = map->areas;
+ map->areas = area;
+ area->shape = atabval(tok, Ashape, shape_tab, NSHAPETAB, SHrect);
+ setdimarray(tok, Acoords, &area->coords, &area->ncoords);
+ if(*atail != nil) {
+ if(warn)
+ fprint(2, "warning: <AREA> inside <A>\n");
+ ps->curfg = popretnewtop(&ps->fgstk, di->text);
+ ps->curul = popretnewtop(&ps->ulstk, ULnone);
+ additem(ps, newispacer(ISPnull), tok);
+ ps->lastit->anchor = nil;
+ atail = &(*atail)->next;
+ }
+ area->anchor = (Anchor*)emalloc(sizeof(Anchor));
+ area->anchor->name = aval(tok, Aalt);
+ area->anchor->href = aurlval(tok, Ahref, nil, di->base);
+ area->anchor->target = atargval(tok, di->target);
+ *atail = area->anchor;
+ atail = &(*atail)->next;
break;
// <!ELEMENT (B|STRONG) - - (%text)*>
@@ -789,25 +791,12 @@
fprint(2, "warning: more than one <CAPTION> in <TABLE>\n");
continue;
}
- ps = newpstate(ps);
+ curtab->caption = getpcdata(toks, tokslen, &toki);
+ if(warn && toki < tokslen - 1 && toks[toki + 1].tag != Tcaption + RBRA)
+ fprint(2, "warning: <CAPTION> data ended by %T\n", &toks[toki + 1]);
curtab->caption_place = atabval(tok, Aalign, align_tab, NALIGNTAB, ALtop);
break;
- case Tcaption+RBRA:
- nextps = ps->next;
- if(curtab == nil || nextps == nil) {
- if(warn)
- fprint(2, "warning: unexpected </CAPTION>\n");
- continue;
- }
- if(curtab->caption != nil)
- freeitems(curtab->caption);
- curtab->caption = ps->items->next;
- ps->items->next = nil;
- freepstate(ps);
- ps = nextps;
- break;
-
case Tcenter:
case Tdiv:
if(tag == Tcenter)
@@ -1092,7 +1081,6 @@
// <!ELEMENT IMG - O EMPTY>
case Timg:
map = nil;
- oldcuranchor = ps->curanchor;
if(_tokaval(tok, Ausemap, &usemap, 0)) {
if(!_prefix(L"#", usemap)) {
if(warn)
@@ -1100,21 +1088,13 @@
}
else {
map = getmap(di, usemap+1);
- if(ps->curanchor == 0) {
- di->anchors = newanchor(++is->nanchors, nil, nil, di->target, di->anchors);
- ps->curanchor = is->nanchors;
- }
}
}
align = atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom);
- dfltbd = 0;
- if(ps->curanchor != 0)
- dfltbd = 2;
src = aurlval(tok, Asrc, nil, di->base);
if(src == nil) {
if(warn)
fprint(2, "warning: <img> has no src attribute\n");
- ps->curanchor = oldcuranchor;
continue;
}
img = newiimage(src,
@@ -1124,7 +1104,7 @@
auintval(tok, Aheight, 0),
auintval(tok, Ahspace, IMGHSPACE),
auintval(tok, Avspace, IMGVSPACE),
- auintval(tok, Aborder, dfltbd),
+ auintval(tok, Aborder, ps->lastit->anchor == nil && usemap == nil ? 0 : 2),
aflagval(tok, Aismap),
map);
if(align == ALleft || align == ALright) {
@@ -1137,7 +1117,6 @@
ps->skipwhite = 0;
additem(ps, img, tok);
}
- ps->curanchor = oldcuranchor;
break;
// <!ELEMENT INPUT - O EMPTY>
@@ -1148,7 +1127,7 @@
fprint(2, "<INPUT> not inside <FORM>\n");
continue;
}
- field = newformfield(
+ field = is->curform->fields = newformfield(
atabval(tok, Atype, input_tab, NINPUTTAB, Ftext),
++is->curform->nfields,
is->curform,
@@ -1156,7 +1135,7 @@
aval(tok, Avalue),
auintval(tok, Asize, 0),
auintval(tok, Amaxlength, 1000),
- nil);
+ is->curform->fields);
if(aflagval(tok, Achecked))
field->flags = FFchecked;
@@ -1239,14 +1218,14 @@
HGet,
di->forms);
di->forms = frm;
- ff = newformfield(Ftext,
+ frm->fields = ff = newformfield(Ftext,
++frm->nfields,
frm,
_Strdup(L"_ISINDEX_"),
nil,
50,
1000,
- nil);
+ frm->fields);
additem(ps, newiformfield(ff), tok);
addbrk(ps, 1, 0);
break;
@@ -1279,17 +1258,16 @@
// <!ELEMENT MAP - - (AREA)+>
case Tmap:
if(_tokaval(tok, Aname, &name, 0))
- is->curmap = getmap(di, name);
+ map = getmap(di, name);
break;
case Tmap+RBRA:
- map = is->curmap;
if(map == nil) {
if(warn)
fprint(2, "warning: unexpected </MAP>\n");
continue;
}
- map->areas = (Area*)_revlist((List*)map->areas);
+ map = nil;
break;
case Tmeta:
@@ -1443,14 +1421,14 @@
fprint(2, "<SELECT> not inside <FORM>\n");
continue;
}
- is->curfield = field = newformfield(Fselect,
+ field = is->curform->fields = newformfield(Fselect,
++is->curform->nfields,
is->curform,
aval(tok, Aname),
nil,
auintval(tok, Asize, 0),
0,
- nil);
+ is->curform->fields);
if(aflagval(tok, Amultiple))
field->flags = FFmultiple;
ffit = newiformfield(field);
@@ -1464,17 +1442,16 @@
break;
case Tselect+RBRA:
- if(is->curform == nil || is->curfield == nil) {
+ if(is->curform == nil || is->curform->fields == nil) {
if(warn)
fprint(2, "warning: unexpected </SELECT>\n");
continue;
}
- field = is->curfield;
+ field = is->curform->fields;
if(field->ftype != Fselect)
continue;
// put options back in input order
field->options = (Option*)_revlist((List*)field->options);
- is->curfield = nil;
break;
// <!ELEMENT (STRIKE|U) - - (%text)*>
@@ -1676,14 +1653,14 @@
fprint(2, "<TEXTAREA> not inside <FORM>\n");
continue;
}
- field = newformfield(Ftextarea,
+ field = is->curform->fields = newformfield(Ftextarea,
++is->curform->nfields,
is->curform,
aval(tok, Aname),
nil,
0,
0,
- nil);
+ is->curform->fields);
field->rows = auintval(tok, Arows, 3);
field->cols = auintval(tok, Acols, 50);
field->value = getpcdata(toks, tokslen, &toki);
@@ -1759,6 +1736,7 @@
case Tbase+RBRA:
case Tbasefont+RBRA:
case Tbr+RBRA:
+ case Tcaption+RBRA:
case Tdd+RBRA:
case Tdt+RBRA:
case Tframe+RBRA:
@@ -1859,7 +1837,12 @@
else
printitems(ans, "getitems returning:");
}
- linkitems(di, ans);
+ linkitems(di, ans, &di->images);
+ di->forms = (Form*)_revlist((List*)di->forms);
+ di->tables = (Table*)_revlist((List*)di->tables);
+ di->maps = (Map*)_revlist((List*)di->maps);
+ for(map = di->maps; map != nil; map = map->next)
+ map->areas = (Area*)_revlist((List*)map->areas);
_freetokens(toks, tokslen);
return ans;
}
@@ -1959,7 +1942,6 @@
ps = newpstate(oldps);
ps->skipwhite = 1;
- ps->curanchor = oldps->curanchor;
copystack(&ps->fntstylestk, &oldps->fntstylestk);
copystack(&ps->fntsizestk, &oldps->fntsizestk);
ps->curfont = oldps->curfont;
@@ -2027,7 +2009,7 @@
freeitem(it);
return;
}
- it->anchorid = ps->curanchor;
+ it->anchor = ps->lastit->anchor;
it->state |= ps->curstate;
if(tok != nil) {
any = 0;
@@ -2745,20 +2727,6 @@
return m;
}
-// Transfers ownership of href to Area
-static Area*
-newarea(int shape, Rune* href, int target, Area* link)
-{
- Area* a;
-
- a = (Area*)emalloc(sizeof(Area));
- a->shape = shape;
- a->href = href;
- a->target = target;
- a->next = link;
- return a;
-}
-
// Return string value associated with attid in tok, nil if none.
// Caller must free the result (eventually).
static Rune*
@@ -2924,21 +2892,51 @@
}
// Attribute value when value is a URL, possibly relative to base.
-// FOR NOW: leave the url relative.
+// A full URL is returned where possible.
// Caller must free the result (eventually).
static Rune*
aurlval(Token* tok, int attid, Rune* dflt, Rune* base)
{
- Rune* ans;
Rune* url;
-
- USED(base);
- ans = nil;
- if(_tokaval(tok, attid, &url, 0) && url != nil)
- ans = removeallwhite(url);
- if(ans == nil)
- ans = _Strdup(dflt);
- return ans;
+ Rune* r;
+
+ _tokaval(tok, attid, &url, 0);
+ if(url == nil)
+ return _Strdup(dflt);
+
+ // Check whether base need even be considered
+ if(base == nil)
+ return _Strdup(url);
+ for(r = url; isalpha(*r); r++)
+ ;
+ if(*url == '#' || r > url && !runestrncmp(r, L"://", 3))
+ return _Strdup(url);
+
+ // Check base starts with a proto://
+ for(r = base; isalpha(*r); r++)
+ ;
+ if(r == base || runestrncmp(r, L"://", 3))
+ return _Strdup(url);
+
+ // Combine “protocol relative” url
+ if(!runestrncmp(url, L"//", 2))
+ return runesmprint("%.*S%S", (int)(r - base + 1), base, url);
+ r += 3;
+
+ // Check base looks like it has a hostname
+ while(isalnum(*r) || runestrchr(L"_@-:.", *r))
+ r++;
+ if(r[-1] == '/' || *r && *r != '/')
+ return _Strdup(url);
+
+ // Combine rooted url
+ if(*url == '/')
+ return runesmprint("%.*S%S", (int)(r - base), base, url);
+
+ // Find final slash if not already at end, combine and return
+ if(*r)
+ r = runestrrchr(r, '/');
+ return runesmprint("%.*S/%S", (int)(r - base), base, url);
}
// Return copy of s but with all whitespace (even internal) removed.
@@ -3334,7 +3332,7 @@
}
free(t->rows);
free(t->cols);
- freeitems(t->caption);
+ free(t->caption);
free(t);
}
@@ -3386,34 +3384,12 @@
}
static void
-freedestanchor(DestAnchor* da)
-{
- if(da == nil)
- return;
-
- free(da->name);
- free(da);
-}
-
-static void
-freedestanchors(DestAnchor* dahead)
-{
- DestAnchor* da;
- DestAnchor* danext;
-
- for(da = dahead; da != nil; da = danext) {
- danext = da->next;
- freedestanchor(da);
- }
-}
-
-static void
freearea(Area* a)
{
if(a == nil)
return;
- free(a->href);
free(a->coords);
+ // anchor will be freed from docinfo->anchors
}
static void freekidinfos(Kidinfo* khead);
@@ -3486,7 +3462,6 @@
free(d->refresh);
freekidinfos(d->kidinfo);
freeanchors(d->anchors);
- freedestanchors(d->dests);
freeforms(d->forms);
freemaps(d->maps);
// tables, images, and formfields are freed when
@@ -3640,8 +3615,8 @@
bi += snprint(buf+bi, nbuf-bi, "Spacer %s ", p);
break;
}
- bi += snprint(buf+bi, nbuf-bi, " w=%d, h=%d, a=%d, anchor=%d\n",
- it->width, it->height, it->ascent, it->anchorid);
+ bi += snprint(buf+bi, nbuf-bi, " w=%d, h=%d, a=%d, anchor=%S\n",
+ it->width, it->height, it->ascent, it->anchor == nil ? nil : it->anchor->href);
buf[bi] = 0;
return fmtstrcpy(f, buf);
}
@@ -3807,33 +3782,6 @@
return c;
}
-static Anchor*
-newanchor(int index, Rune* name, Rune* href, int target, Anchor* link)
-{
- Anchor* a;
-
- a = (Anchor*)emalloc(sizeof(Anchor));
- a->index = index;
- a->name = name;
- a->href = href;
- a->target = target;
- a->next = link;
- return a;
-}
-
-static DestAnchor*
-newdestanchor(int index, Rune* name, Item* item, DestAnchor* link)
-{
- DestAnchor* d;
-
- d = (DestAnchor*)emalloc(sizeof(DestAnchor));
- d->index = index;
- d->name = name;
- d->item = item;
- d->next = link;
- return d;
-}
-
static SEvent*
newscriptevent(int type, Rune* script, SEvent* link)
{
@@ -4027,10 +3975,10 @@
(i->width >= 0 && i->width < HUGEPIX) &&
(i->height >= 0 && i->height < HUGEPIX) &&
(i->ascent > -HUGEPIX && i->ascent < HUGEPIX) &&
- (i->anchorid >= 0) &&
+ (i->anchor == nil || validptr(i->anchor)) &&
(i->genattr == nil || validptr(i->genattr));
// also, could check state for ridiculous combinations
- // also, could check anchorid for within-doc-range
+ // also, could check anchor points into docinfo->anchors
if(ok)
switch(i->tag) {
case Itexttag:
@@ -4144,7 +4092,7 @@
(t->border >= 0 && t->border < HUGEPIX) &&
(t->cellspacing >= 0 && t->cellspacing < HUGEPIX) &&
(t->cellpadding >= 0 && t->cellpadding < HUGEPIX) &&
- validitems(t->caption) &&
+ (t->caption == nil || validptr(t->caption)) &&
(t->caption_place == ALtop || t->caption_place == ALbottom) &&
(t->totw >= 0 && t->totw < HUGEPIX) &&
(t->toth >= 0 && t->toth < HUGEPIX) &&
[-- Attachment #3: htmlfmt.c --]
[-- Type: text/plain, Size: 7106 bytes --]
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <draw.h>
#include <html.h>
int br, wrap, nfn, nallocfn = 64, width = 70;
char *defcharset, **fn;
Rune *url, *l;
Biobuf out;
Anchor *curanchor;
void*
emalloc(ulong n){
void *p;
p = malloc(n);
if(p == nil)
sysfatal("malloc: %r");
memset(p, 0, n);
return p;
}
void*
erealloc(void *p, ulong n){
p = realloc(p, n);
if(p == nil)
sysfatal("realloc: %r");
return p;
}
void
spew(Rune *s){
static int off;
Rune *sp;
int n;
if(br & IFbrk){
Bprint(&out, "%.*S\n", off, l);
if(br & IFbrksp) Bputc(&out, '\n');
br = off = 0;
}
if(s == nil || *s == 0)
return;
if(!wrap){
Bprint(&out, "%S", s);
return;
}
for(;;){
n = runestrlen(s);
if(off + n <= width){
runestrncpy(l + off, s, n);
off += n;
return;
}
n = width - off;
runestrncpy(l + off, s, n);
s += n;
sp = runestrrchr(l, ' ');
if(sp == nil){ /*long line */
off = 0;
Bprint(&out, "%.*S", width, l);
sp = runestrchr(s, ' ');
if(sp == nil){
Bprint(&out, "%S\n", s);
return;
}
Bprint(&out, "%.*S\n", (int)(sp - s), s);
s = sp + 1;
continue;
}
n = sp - l;
Bprint(&out, "%.*S\n", n, l);
off = width - n - 1;
runestrncpy(l, sp + 1, off);
}
}
void
addfn(Rune *ref){
static Rune *a = L"⁰¹²³⁴⁵⁶⁷⁸⁹";
Rune *num, *r;
num = runesmprint("%d", nfn);
if(num == nil) sysfatal("smprint: %r");
for(r = num; *r; r++)
*r = a[*r - '0'];
if(nfn >= nallocfn)
fn = erealloc(fn, (nallocfn <<= 1) * sizeof(char*));
fn[nfn] = smprint("%S %S\n", num, ref);
if(fn[nfn] == nil) sysfatal("smprint: %r");
spew(num);
free(num);
nfn++;
}
void
render(Item *item){
static Rune *ftype[] = {
L"§ input: ", L"§ password: ", L"□ ", L"○ ",
L"[submit] ", L"§ hidden: ", L"§ image: ", L"§ reset: ",
L"§ file: ", L"§ button: ", L"§ dropdown: ", L"§ textarea: "
};
Itext *txt; Iimage *img; Area *area;
Formfield *field; Option *opt;
Table *table; Tablecell *cell;
Item *subitem;
br |= item->state;
if(wrap ^ item->state & IFwrap){
br |= IFbrk;
wrap = item->state & IFwrap;
}
if(item->anchor != curanchor){
if(curanchor != nil)
addfn(curanchor->href);
curanchor = item->anchor;
return;
}
switch(item->tag){
case Itexttag:
txt = (Itext*)item;
if(txt->fnt % NumSize == Verylarge){
spew(L"# "); spew(txt->s); spew(L" #");
}else if(txt->fnt % NumSize == Large){
spew(L"## "); spew(txt->s); spew(L" ##");
}else if(txt->ul == ULmid){
spew(L"~~"); spew(txt->s); spew(L"~~");
}else if(txt->fnt / NumSize == FntI){
spew(L"*"); spew(txt->s); spew(L"*");
}else if(txt->fnt / NumSize == FntB){
spew(L"**"); spew(txt->s); spew(L"**");
}else
spew(txt->s);
if(item->state & IFhangmask) /* list marker */
spew(L" ");
break;
case Iruletag:
br = IFbrk | IFbrksp;
spew(nil);
Bprint(&out, " %0*d ", width - 2, 0); /* hahaha */
br = IFbrk | IFbrksp;
break;
case Iimagetag:
img = (Iimage*)item;
if(img->map != nil){
if(curanchor != nil){
addfn(curanchor->href);
curanchor = nil;
}
br = IFbrk | IFbrksp;
spew(L"§imagemap:");
br = IFbrk;
}
fimage:
spew(L"{");
spew(img->altrep);
addfn(img->imsrc);
spew(L"}");
if(img->map == nil)
break;
for(area = img->map->areas; area != nil; area = area->next){
br = IFbrk;
spew(area->anchor->name);
addfn(area->anchor->href);
}
br = IFbrk | IFbrksp;
break;
case Iformfieldtag:
br |= IFbrk;
field = ((Iformfield*)item)->formfield;
spew(ftype[field->ftype]);
spew(field->name);
spew(L": ");
spew(field->value);
if(field->ftype == Fimage){
img = (Iimage*)field->image;
goto fimage;
}
br = IFbrk;
if(field->ftype == Fselect){
spew(nil);
for(opt = field->options; opt != nil; opt = opt->next)
Bprint(&out, "→ %S (%S)\n", opt->display, opt->value);
}
break;
case Itabletag:
table = ((Itable*)item)->table;
if(table->caption_place == ALtop)
spew(table->caption);
br |= IFbrk;
cell = table->cells;
while(cell != nil){
for(subitem = cell->content; subitem != nil; subitem = subitem->next)
render(subitem);
if(cell->nextinrow == nil){
br |= IFbrk;
cell = cell->next;
continue;
}
spew(L"\t");
cell = cell->nextinrow;
}
br |= IFbrk;
if(table->caption_place == ALbottom)
spew(table->caption);
break;
case Ifloattag:
for(subitem = ((Ifloat*)item)->item; subitem != nil; subitem = subitem->next)
render(subitem);
break;
case Ispacertag:
switch(((Ispacer*)item)->spkind){
case ISPvline:
br = IFbrk | IFbrksp;
break;
case ISPhspace: case ISPgeneral: default:
if(!(br & IFbrk))
spew(L" ");
case ISPnull:
break;
}
break;
default:
fprint(2, "unknown item tag %d\n", item->tag);
}
}
void
frames(Kidinfo *k){
while(k != nil){
if(k->isframeset){
Bprint(&out, "\nFrameset: %S", k->name);
frames(k->kidinfos);
}
else
Bprint(&out, "\nFrame: %S: %S", k->name, k->src);
k = k->next;
}
}
void
loadhtml(int fd){
Docinfo *d;
Item *items, *i;
uchar *buf;
long n, nbuf, nalloc;
int p[2];
if(pipe(p) < 0)
sysfatal("pipe: %r");
switch(fork()){
case -1:
sysfatal("fork: %r");
case 0:
dup(fd, 0);
dup(p[1], 1);
close(p[1]);
close(p[0]);
if(defcharset)
execl("/bin/uhtml", "uhtml", "-c", defcharset, nil);
execl("/bin/uhtml", "uhtml", nil);
execl("/bin/cat", "cat", nil);
sysfatal("execl: %r");
}
close(p[1]);
buf = erealloc(nil, nalloc = 8192);
nbuf = 0;
while(n = read(p[0], buf + nbuf, nalloc - nbuf))
if((nbuf += n) > nalloc / 2)
buf = erealloc(buf, nalloc <<= 1);
close(p[0]);
close(fd);
if(nbuf == 0){
free(buf);
return;
}
buf = erealloc(buf, nbuf);
items = parsehtml(buf, nbuf, url, TextHtml, UTF_8, &d);
free(buf);
Bprint(&out, "%S", d->doctitle);
frames(d->kidinfo);
br = IFbrk | IFbrksp;
for(i = items; i != nil; i = i->next)
render(i);
if(curanchor != nil)
addfn(curanchor->href);
br = IFbrk;
spew(nil);
freeitems(items);
freedocinfo(d);
}
void
usage(void){
fprint(2, "usage: %s [-c charset] [-u URL] [-l length] [file ...]\n", argv0);
exits("usage");
}
void
main(int argc, char *argv[]){
int i, fd;
char *s;
ARGBEGIN{
case 'c':
defcharset = EARGF(usage());
break;
case 'l': case 'w':
s = EARGF(usage());
width = atoi(s);
if(width < 1)
usage();
break;
case 'u':
s = EARGF(usage());
free(url);
url = emalloc((utflen(s) + 1) * sizeof(Rune));
for(i = 0; *s; i++)
s += chartorune(url + i, s);
break;
default:
usage();
}ARGEND
Binit(&out, 1, OWRITE);
l = emalloc(width * sizeof(Rune));
fn = erealloc(nil, nallocfn * sizeof(char*));
s = nil;
if(argc == 0)
loadhtml(0);
else
for(i = 0; i < argc; i++){
fd = open(argv[i], OREAD);
if(fd < 0){
fprint(2, "skipping %s: open: %r\n", argv[i]);
s = "open error";
continue;
}
if(i) Bputc(&out, '\n');
if(argc > 1) Bprint(&out, "→ %s: ", argv[i]);
loadhtml(fd);
}
free(l);
if(nfn){
Bwrite(&out, "\nReferences:\n", 13);
for(i = 0; i < nfn; i++){
Bwrite(&out, fn[i], strlen(fn[i]));
free(fn[i]);
}
}
free(fn);
exits(s);
}
^ permalink raw reply [flat|nested] 17+ messages in thread