source@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: schwarze@mandoc.bsd.lv
To: source@mandoc.bsd.lv
Subject: mandoc: In the HTML formatter, assert(3) that no HTML nesting violation
Date: Thu, 29 Aug 2019 12:58:00 -0500 (EST)	[thread overview]
Message-ID: <862a7c8bc0acf18b@mandoc.bsd.lv> (raw)

Log Message:
-----------
In the HTML formatter, assert(3) that no HTML nesting violation occurs.  
Tested on the complete manual page trees of Version 7 AT&T UNIX,
4.4BSD-Lite2, POSIX-2013, OpenBSD 2.2 to 6.5 and -current, 
FreeBSD 10.0 to 12.0, NetBSD 6.1.5 to 8.1, DragonFly 3.8.2 to 5.6.1, 
and Linux 4.05 to 5.02.

Modified Files:
--------------
    mandoc:
        html.c
        html.h

Revision Data
-------------
Index: html.h
===================================================================
RCS file: /home/cvs/mandoc/mandoc/html.h,v
retrieving revision 1.103
retrieving revision 1.104
diff -Lhtml.h -Lhtml.h -u -p -r1.103 -r1.104
--- html.h
+++ html.h
@@ -19,18 +19,14 @@
 enum	htmltag {
 	TAG_HTML,
 	TAG_HEAD,
-	TAG_BODY,
 	TAG_META,
+	TAG_LINK,
+	TAG_STYLE,
 	TAG_TITLE,
+	TAG_BODY,
 	TAG_DIV,
 	TAG_IDIV,
 	TAG_SECTION,
-	TAG_H1,
-	TAG_H2,
-	TAG_SPAN,
-	TAG_LINK,
-	TAG_BR,
-	TAG_A,
 	TAG_TABLE,
 	TAG_TR,
 	TAG_TD,
@@ -40,15 +36,19 @@ enum	htmltag {
 	TAG_DL,
 	TAG_DT,
 	TAG_DD,
+	TAG_H1,
+	TAG_H2,
 	TAG_P,
 	TAG_PRE,
-	TAG_VAR,
-	TAG_CITE,
+	TAG_A,
 	TAG_B,
-	TAG_I,
+	TAG_CITE,
 	TAG_CODE,
+	TAG_I,
 	TAG_SMALL,
-	TAG_STYLE,
+	TAG_SPAN,
+	TAG_VAR,
+	TAG_BR,
 	TAG_MATH,
 	TAG_MROW,
 	TAG_MI,
Index: html.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/html.c,v
retrieving revision 1.256
retrieving revision 1.257
diff -Lhtml.c -Lhtml.c -u -p -r1.256 -r1.257
--- html.c
+++ html.c
@@ -42,34 +42,31 @@
 struct	htmldata {
 	const char	 *name;
 	int		  flags;
-#define	HTML_NOSTACK	 (1 << 0)
-#define	HTML_AUTOCLOSE	 (1 << 1)
-#define	HTML_NLBEFORE	 (1 << 2)
-#define	HTML_NLBEGIN	 (1 << 3)
-#define	HTML_NLEND	 (1 << 4)
-#define	HTML_NLAFTER	 (1 << 5)
+#define	HTML_INPHRASE	 (1 << 0)  /* Can appear in phrasing context. */
+#define	HTML_TOPHRASE	 (1 << 1)  /* Establishes phrasing context. */
+#define	HTML_NOSTACK	 (1 << 2)  /* Does not have an end tag. */
+#define	HTML_NLBEFORE	 (1 << 3)  /* Output line break before opening. */
+#define	HTML_NLBEGIN	 (1 << 4)  /* Output line break after opening. */
+#define	HTML_NLEND	 (1 << 5)  /* Output line break before closing. */
+#define	HTML_NLAFTER	 (1 << 6)  /* Output line break after closing. */
 #define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
 #define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
 #define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
-#define	HTML_INDENT	 (1 << 6)
-#define	HTML_NOINDENT	 (1 << 7)
+#define	HTML_INDENT	 (1 << 7)  /* Indent content by two spaces. */
+#define	HTML_NOINDENT	 (1 << 8)  /* Exception: never indent content. */
 };
 
 static	const struct htmldata htmltags[TAG_MAX] = {
 	{"html",	HTML_NLALL},
 	{"head",	HTML_NLALL | HTML_INDENT},
-	{"body",	HTML_NLALL},
-	{"meta",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
+	{"meta",	HTML_NOSTACK | HTML_NLALL},
+	{"link",	HTML_NOSTACK | HTML_NLALL},
+	{"style",	HTML_NLALL | HTML_INDENT},
 	{"title",	HTML_NLAROUND},
+	{"body",	HTML_NLALL},
 	{"div",		HTML_NLAROUND},
 	{"div",		0},
 	{"section",	HTML_NLALL},
-	{"h1",		HTML_NLAROUND},
-	{"h2",		HTML_NLAROUND},
-	{"span",	0},
-	{"link",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
-	{"br",		HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
-	{"a",		0},
 	{"table",	HTML_NLALL | HTML_INDENT},
 	{"tr",		HTML_NLALL | HTML_INDENT},
 	{"td",		HTML_NLAROUND},
@@ -79,16 +76,20 @@ static	const struct htmldata htmltags[TA
 	{"dl",		HTML_NLALL | HTML_INDENT},
 	{"dt",		HTML_NLAROUND},
 	{"dd",		HTML_NLAROUND | HTML_INDENT},
-	{"p",		HTML_NLAROUND | HTML_INDENT},
-	{"pre",		HTML_NLALL | HTML_NOINDENT},
-	{"var",		0},
-	{"cite",	0},
-	{"b",		0},
-	{"i",		0},
-	{"code",	0},
-	{"small",	0},
-	{"style",	HTML_NLALL | HTML_INDENT},
-	{"math",	HTML_NLALL | HTML_INDENT},
+	{"h1",		HTML_TOPHRASE | HTML_NLAROUND},
+	{"h2",		HTML_TOPHRASE | HTML_NLAROUND},
+	{"p",		HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
+	{"pre",		HTML_TOPHRASE | HTML_NLALL | HTML_NOINDENT},
+	{"a",		HTML_INPHRASE | HTML_TOPHRASE},
+	{"b",		HTML_INPHRASE | HTML_TOPHRASE},
+	{"cite",	HTML_INPHRASE | HTML_TOPHRASE},
+	{"code",	HTML_INPHRASE | HTML_TOPHRASE},
+	{"i",		HTML_INPHRASE | HTML_TOPHRASE},
+	{"small",	HTML_INPHRASE | HTML_TOPHRASE},
+	{"span",	HTML_INPHRASE | HTML_TOPHRASE},
+	{"var",		HTML_INPHRASE | HTML_TOPHRASE},
+	{"br",		HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
+	{"math",	HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
 	{"mrow",	0},
 	{"mi",		0},
 	{"mn",		0},
@@ -584,6 +585,17 @@ print_otag(struct html *h, enum htmltag 
 
 	tflags = htmltags[tag].flags;
 
+	/* Flow content is not allowed in phrasing context. */
+
+	if ((tflags & HTML_INPHRASE) == 0) {
+		for (t = h->tag; t != NULL; t = t->next) {
+			if (t->closed)
+				continue;
+			assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
+			break;
+		}
+	}
+
 	/* Push this tag onto the stack of open scopes. */
 
 	if ((tflags & HTML_NOSTACK) == 0) {
@@ -701,7 +713,7 @@ print_otag(struct html *h, enum htmltag 
 
 	/* Accommodate for "well-formed" singleton escaping. */
 
-	if (HTML_AUTOCLOSE & htmltags[tag].flags)
+	if (htmltags[tag].flags & HTML_NOSTACK)
 		print_byte(h, '/');
 
 	print_byte(h, '>');
--
 To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv

                 reply	other threads:[~2019-08-29 17:58 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=862a7c8bc0acf18b@mandoc.bsd.lv \
    --to=schwarze@mandoc.bsd.lv \
    --cc=source@mandoc.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).