source@mandoc.bsd.lv
 help / color / Atom feed
* mandoc: In the HTML formatter, assert(3) that no HTML nesting violation 
@ 2019-08-29 17:58 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2019-08-29 17:58 UTC (permalink / raw)
  To: source

Log Message:
-----------
In the HTML formatter, assert(3) that no HTML nesting violation occurs.  
Tested on the complete manual page trees of Version 7 AT&T UNIX,
4.4BSD-Lite2, POSIX-2013, OpenBSD 2.2 to 6.5 and -current, 
FreeBSD 10.0 to 12.0, NetBSD 6.1.5 to 8.1, DragonFly 3.8.2 to 5.6.1, 
and Linux 4.05 to 5.02.

Modified Files:
--------------
    mandoc:
        html.c
        html.h

Revision Data
-------------
Index: html.h
===================================================================
RCS file: /home/cvs/mandoc/mandoc/html.h,v
retrieving revision 1.103
retrieving revision 1.104
diff -Lhtml.h -Lhtml.h -u -p -r1.103 -r1.104
--- html.h
+++ html.h
@@ -19,18 +19,14 @@
 enum	htmltag {
 	TAG_HTML,
 	TAG_HEAD,
-	TAG_BODY,
 	TAG_META,
+	TAG_LINK,
+	TAG_STYLE,
 	TAG_TITLE,
+	TAG_BODY,
 	TAG_DIV,
 	TAG_IDIV,
 	TAG_SECTION,
-	TAG_H1,
-	TAG_H2,
-	TAG_SPAN,
-	TAG_LINK,
-	TAG_BR,
-	TAG_A,
 	TAG_TABLE,
 	TAG_TR,
 	TAG_TD,
@@ -40,15 +36,19 @@ enum	htmltag {
 	TAG_DL,
 	TAG_DT,
 	TAG_DD,
+	TAG_H1,
+	TAG_H2,
 	TAG_P,
 	TAG_PRE,
-	TAG_VAR,
-	TAG_CITE,
+	TAG_A,
 	TAG_B,
-	TAG_I,
+	TAG_CITE,
 	TAG_CODE,
+	TAG_I,
 	TAG_SMALL,
-	TAG_STYLE,
+	TAG_SPAN,
+	TAG_VAR,
+	TAG_BR,
 	TAG_MATH,
 	TAG_MROW,
 	TAG_MI,
Index: html.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/html.c,v
retrieving revision 1.256
retrieving revision 1.257
diff -Lhtml.c -Lhtml.c -u -p -r1.256 -r1.257
--- html.c
+++ html.c
@@ -42,34 +42,31 @@
 struct	htmldata {
 	const char	 *name;
 	int		  flags;
-#define	HTML_NOSTACK	 (1 << 0)
-#define	HTML_AUTOCLOSE	 (1 << 1)
-#define	HTML_NLBEFORE	 (1 << 2)
-#define	HTML_NLBEGIN	 (1 << 3)
-#define	HTML_NLEND	 (1 << 4)
-#define	HTML_NLAFTER	 (1 << 5)
+#define	HTML_INPHRASE	 (1 << 0)  /* Can appear in phrasing context. */
+#define	HTML_TOPHRASE	 (1 << 1)  /* Establishes phrasing context. */
+#define	HTML_NOSTACK	 (1 << 2)  /* Does not have an end tag. */
+#define	HTML_NLBEFORE	 (1 << 3)  /* Output line break before opening. */
+#define	HTML_NLBEGIN	 (1 << 4)  /* Output line break after opening. */
+#define	HTML_NLEND	 (1 << 5)  /* Output line break before closing. */
+#define	HTML_NLAFTER	 (1 << 6)  /* Output line break after closing. */
 #define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
 #define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
 #define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
-#define	HTML_INDENT	 (1 << 6)
-#define	HTML_NOINDENT	 (1 << 7)
+#define	HTML_INDENT	 (1 << 7)  /* Indent content by two spaces. */
+#define	HTML_NOINDENT	 (1 << 8)  /* Exception: never indent content. */
 };
 
 static	const struct htmldata htmltags[TAG_MAX] = {
 	{"html",	HTML_NLALL},
 	{"head",	HTML_NLALL | HTML_INDENT},
-	{"body",	HTML_NLALL},
-	{"meta",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
+	{"meta",	HTML_NOSTACK | HTML_NLALL},
+	{"link",	HTML_NOSTACK | HTML_NLALL},
+	{"style",	HTML_NLALL | HTML_INDENT},
 	{"title",	HTML_NLAROUND},
+	{"body",	HTML_NLALL},
 	{"div",		HTML_NLAROUND},
 	{"div",		0},
 	{"section",	HTML_NLALL},
-	{"h1",		HTML_NLAROUND},
-	{"h2",		HTML_NLAROUND},
-	{"span",	0},
-	{"link",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
-	{"br",		HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
-	{"a",		0},
 	{"table",	HTML_NLALL | HTML_INDENT},
 	{"tr",		HTML_NLALL | HTML_INDENT},
 	{"td",		HTML_NLAROUND},
@@ -79,16 +76,20 @@ static	const struct htmldata htmltags[TA
 	{"dl",		HTML_NLALL | HTML_INDENT},
 	{"dt",		HTML_NLAROUND},
 	{"dd",		HTML_NLAROUND | HTML_INDENT},
-	{"p",		HTML_NLAROUND | HTML_INDENT},
-	{"pre",		HTML_NLALL | HTML_NOINDENT},
-	{"var",		0},
-	{"cite",	0},
-	{"b",		0},
-	{"i",		0},
-	{"code",	0},
-	{"small",	0},
-	{"style",	HTML_NLALL | HTML_INDENT},
-	{"math",	HTML_NLALL | HTML_INDENT},
+	{"h1",		HTML_TOPHRASE | HTML_NLAROUND},
+	{"h2",		HTML_TOPHRASE | HTML_NLAROUND},
+	{"p",		HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
+	{"pre",		HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT},
+	{"a",		HTML_INPHRASE | HTML_TOPHRASE},
+	{"b",		HTML_INPHRASE | HTML_TOPHRASE},
+	{"cite",	HTML_INPHRASE | HTML_TOPHRASE},
+	{"code",	HTML_INPHRASE | HTML_TOPHRASE},
+	{"i",		HTML_INPHRASE | HTML_TOPHRASE},
+	{"small",	HTML_INPHRASE | HTML_TOPHRASE},
+	{"span",	HTML_INPHRASE | HTML_TOPHRASE},
+	{"var",		HTML_INPHRASE | HTML_TOPHRASE},
+	{"br",		HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
+	{"math",	HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
 	{"mrow",	0},
 	{"mi",		0},
 	{"mn",		0},
@@ -584,6 +585,17 @@ print_otag(struct html *h, enum htmltag 
 
 	tflags = htmltags[tag].flags;
 
+	/* Flow content is not allowed in phrasing context. */
+
+	if ((tflags & HTML_INPHRASE) == 0) {
+		for (t = h->tag; t != NULL; t = t->next) {
+			if (t->closed)
+				continue;
+			assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
+			break;
+		}
+	}
+
 	/* Push this tag onto the stack of open scopes. */
 
 	if ((tflags & HTML_NOSTACK) == 0) {
@@ -701,7 +713,7 @@ print_otag(struct html *h, enum htmltag 
 
 	/* Accommodate for "well-formed" singleton escaping. */
 
-	if (HTML_AUTOCLOSE & htmltags[tag].flags)
+	if (htmltags[tag].flags & HTML_NOSTACK)
 		print_byte(h, '/');
 
 	print_byte(h, '>');
--
 To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, back to index

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-29 17:58 mandoc: In the HTML formatter, assert(3) that no HTML nesting violation schwarze

source@mandoc.bsd.lv

Archives are clonable: git clone --mirror http://inbox.vuxu.org/mandoc-source

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://inbox.vuxu.org/vuxu.archive.mandoc.source


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git