source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mandoc: The .UR and .MT blocks in man(7) are represented by <a> elements
@ 2019-01-18 14:36 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2019-01-18 14:36 UTC (permalink / raw)
  To: source

Log Message:
-----------
The .UR and .MT blocks in man(7) are represented by <a> elements
which establish phrasing context, but they can contain paragraph
breaks (which is relevant for terminal formatting, so we can't just
change the structure of the syntax tree), which are respresented
by <p> elements and cannot occur inside <a>.

Fix this by prematurely closing the <a> element in the HTML formatter.  
This menas that the clickable text in HTML output is shorter than
what is represented as the link text in terminal output, but in
HTML, it is frankly impossible to have the clickable area of a
hyperlink extend across a paragraph break.  The difference in
presentation is not a major problem, and besides, paragraph breaks
inside .UR are rather poor style in the first place.

The implementation is quite tricky.  Naively closing out the <a>
prematurely would result in accessing a stale pointer when later
reaching the physical end of the .UR block.  So this commit separates
visual and structural closing of "struct tag" stack items.  Visual
closing means that the HTML element is closed but the "struct tag"
remains on the stack, to avoid later access to a stale pointer and
to avoid closing the same HTML element a second time later.

This also needs reference counting of pointers to "struct tag" stack 
items because often more than one child holds a pointer to the same
parent item, and only the outermost child can safely do the physical
closing.

In the whole corpus of nearly half a million manual pages on
man.openbsd.org, this problem occurs in exactly one page: the
groff(1) version 1.20.1 manual contained in DragonFly-3.8.2, which 
contains a formatting error triggering the bug.

Modified Files:
--------------
    mandoc:
        html.c
        html.h
        man_html.c
        mdoc_html.c

Revision Data
-------------
Index: html.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/html.c,v
retrieving revision 1.251
retrieving revision 1.252
diff -Lhtml.c -Lhtml.c -u -p -r1.251 -r1.252
--- html.c
+++ html.c
@@ -271,11 +271,19 @@ html_close_paragraph(struct html *h)
 {
 	struct tag	*t;
 
-	for (t = h->tag; t != NULL; t = t->next) {
-		if (t->tag == TAG_P || t->tag == TAG_PRE) {
+	for (t = h->tag; t != NULL && t->closed == 0; t = t->next) {
+		switch(t->tag) {
+		case TAG_P:
+		case TAG_PRE:
 			print_tagq(h, t);
 			break;
+		case TAG_A:
+			print_tagq(h, t);
+			continue;
+		default:
+			continue;
 		}
+		break;
 	}
 }
 
@@ -579,6 +587,8 @@ print_otag(struct html *h, enum htmltag 
 		t = mandoc_malloc(sizeof(struct tag));
 		t->tag = tag;
 		t->next = h->tag;
+		t->refcnt = 0;
+		t->closed = 0;
 		h->tag = t;
 	} else
 		t = NULL;
@@ -711,33 +721,32 @@ print_ctag(struct html *h, struct tag *t
 {
 	int	 tflags;
 
-	/*
-	 * Remember to close out and nullify the current
-	 * meta-font and table, if applicable.
-	 */
-	if (tag == h->metaf)
-		h->metaf = NULL;
-	if (tag == h->tblt)
-		h->tblt = NULL;
-
-	tflags = htmltags[tag->tag].flags;
-
-	if (tflags & HTML_INDENT)
-		h->indent--;
-	if (tflags & HTML_NOINDENT)
-		h->noindent--;
-	if (tflags & HTML_NLEND)
-		print_endline(h);
-	print_indent(h);
-	print_byte(h, '<');
-	print_byte(h, '/');
-	print_word(h, htmltags[tag->tag].name);
-	print_byte(h, '>');
-	if (tflags & HTML_NLAFTER)
-		print_endline(h);
-
-	h->tag = tag->next;
-	free(tag);
+	if (tag->closed == 0) {
+		tag->closed = 1;
+		if (tag == h->metaf)
+			h->metaf = NULL;
+		if (tag == h->tblt)
+			h->tblt = NULL;
+
+		tflags = htmltags[tag->tag].flags;
+		if (tflags & HTML_INDENT)
+			h->indent--;
+		if (tflags & HTML_NOINDENT)
+			h->noindent--;
+		if (tflags & HTML_NLEND)
+			print_endline(h);
+		print_indent(h);
+		print_byte(h, '<');
+		print_byte(h, '/');
+		print_word(h, htmltags[tag->tag].name);
+		print_byte(h, '>');
+		if (tflags & HTML_NLAFTER)
+			print_endline(h);
+	}
+	if (tag->refcnt == 0) {
+		h->tag = tag->next;
+		free(tag);
+	}
 }
 
 void
@@ -824,12 +833,11 @@ print_text(struct html *h, const char *w
 void
 print_tagq(struct html *h, const struct tag *until)
 {
-	struct tag	*tag;
+	struct tag	*this, *next;
 
-	while ((tag = h->tag) != NULL) {
-		print_ctag(h, tag);
-		if (tag == until)
-			return;
+	for (this = h->tag; this != NULL; this = next) {
+		next = this == until ? NULL : this->next;
+		print_ctag(h, this);
 	}
 }
 
@@ -841,14 +849,14 @@ print_tagq(struct html *h, const struct 
 void
 print_stagq(struct html *h, const struct tag *suntil)
 {
-	struct tag	*tag;
+	struct tag	*this, *next;
 
-	while ((tag = h->tag) != NULL) {
-		if (tag == suntil ||
-		    (tag->next == suntil &&
-		     (tag->tag == TAG_P || tag->tag == TAG_PRE)))
-			return;
-		print_ctag(h, tag);
+	for (this = h->tag; this != NULL; this = next) {
+		next = this->next;
+		if (this == suntil || (next == suntil &&
+		    (this->tag == TAG_P || this->tag == TAG_PRE)))
+			break;
+		print_ctag(h, this);
 	}
 }
 
Index: man_html.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/man_html.c,v
retrieving revision 1.169
retrieving revision 1.170
diff -Lman_html.c -Lman_html.c -u -p -r1.169 -r1.170
--- man_html.c
+++ man_html.c
@@ -168,10 +168,6 @@ print_man_node(MAN_ARGS)
 	html_fillmode(h, n->flags & NODE_NOFILL ? ROFF_nf : ROFF_fi);
 
 	child = 1;
-	t = h->tag;
-	if (t->tag == TAG_P || t->tag == TAG_PRE)
-		t = t->next;
-
 	switch (n->type) {
 	case ROFFT_TEXT:
 		if (*n->string == '\0') {
@@ -183,9 +179,13 @@ print_man_node(MAN_ARGS)
 			print_endline(h);
 		else if (n->flags & NODE_DELIMC)
 			h->flags |= HTML_NOSPACE;
+		t = h->tag;
+		t->refcnt++;
 		print_text(h, n->string);
 		break;
 	case ROFFT_EQN:
+		t = h->tag;
+		t->refcnt++;
 		print_eqn(h, n->eqn);
 		break;
 	case ROFFT_TBL:
@@ -211,12 +211,13 @@ print_man_node(MAN_ARGS)
 		 * the "meta" table state.  This will be reopened on the
 		 * next table element.
 		 */
-		if (h->tblt != NULL) {
+		if (h->tblt != NULL)
 			print_tblclose(h);
-			t = h->tag;
-		}
+		t = h->tag;
+		t->refcnt++;
 		if (n->tok < ROFF_MAX) {
 			roff_html_pre(h, n);
+			t->refcnt--;
 			print_stagq(h, t);
 			return;
 		}
@@ -231,6 +232,7 @@ print_man_node(MAN_ARGS)
 		print_man_nodelist(man, n->child, h);
 
 	/* This will automatically close out any font scope. */
+	t->refcnt--;
 	print_stagq(h, t);
 
 	if (n->flags & NODE_NOFILL && n->tok != MAN_YS &&
Index: html.h
===================================================================
RCS file: /home/cvs/mandoc/mandoc/html.h,v
retrieving revision 1.100
retrieving revision 1.101
diff -Lhtml.h -Lhtml.h -u -p -r1.100 -r1.101
--- html.h
+++ html.h
@@ -79,6 +79,8 @@ enum	htmlfont {
 
 struct	tag {
 	struct tag	 *next;
+	int		  refcnt;
+	int		  closed;
 	enum htmltag	  tag;
 };
 
Index: mdoc_html.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/mdoc_html.c,v
retrieving revision 1.326
retrieving revision 1.327
diff -Lmdoc_html.c -Lmdoc_html.c -u -p -r1.326 -r1.327
--- mdoc_html.c
+++ mdoc_html.c
@@ -354,13 +354,12 @@ print_mdoc_node(MDOC_ARGS)
 	html_fillmode(h, n->flags & NODE_NOFILL ? ROFF_nf : ROFF_fi);
 
 	child = 1;
-	t = h->tag;
-	if (t->tag == TAG_P || t->tag == TAG_PRE)
-		t = t->next;
-
 	n->flags &= ~NODE_ENDED;
 	switch (n->type) {
 	case ROFFT_TEXT:
+		t = h->tag;
+		t->refcnt++;
+
 		/* No tables in this mode... */
 		assert(NULL == h->tblt);
 
@@ -379,6 +378,8 @@ print_mdoc_node(MDOC_ARGS)
 			h->flags |= HTML_NOSPACE;
 		break;
 	case ROFFT_EQN:
+		t = h->tag;
+		t->refcnt++;
 		print_eqn(h, n->eqn);
 		break;
 	case ROFFT_TBL:
@@ -395,13 +396,14 @@ print_mdoc_node(MDOC_ARGS)
 		 * the "meta" table state.  This will be reopened on the
 		 * next table element.
 		 */
-		if (h->tblt != NULL) {
+		if (h->tblt != NULL)
 			print_tblclose(h);
-			t = h->tag;
-		}
 		assert(h->tblt == NULL);
+		t = h->tag;
+		t->refcnt++;
 		if (n->tok < ROFF_MAX) {
 			roff_html_pre(h, n);
+			t->refcnt--;
 			print_stagq(h, t);
 			return;
 		}
@@ -421,6 +423,7 @@ print_mdoc_node(MDOC_ARGS)
 	if (child && n->child != NULL)
 		print_mdoc_nodelist(meta, n->child, h);
 
+	t->refcnt--;
 	print_stagq(h, t);
 
 	switch (n->type) {
--
 To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2019-01-18 14:36 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-18 14:36 mandoc: The .UR and .MT blocks in man(7) are represented by <a> elements schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).