source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Improve error handling in the eqn(7) parser.
@ 2014-10-12 19:31 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2014-10-12 19:31 UTC (permalink / raw)
  To: source

Log Message:
-----------
Improve error handling in the eqn(7) parser.
Get rid of the first fatal error, MANDOCERR_EQNSYNT.
In eqn(7), there is no need to be bug-compatible with groff, so there
is no need to abondon the whole equation in case of a syntax error.

In particular:
* Skip "back", "delim", "down", "fwd", "gfont", "gsize", "left",
  "right", "size", and "up" without arguments.
* Skip "gsize" and "size" with a non-numeric argument.
* Skip closing delimiters that are not open.
* Skip "above" outside piles.
* For diacritic marks and binary operators without a left operand,
  default to an empty box.
* Let piles and matrices take one argument rather than insisting
  on a braced list.  Let HTML output handle that, too.
* When rewinding, if the root box is guaranteed to match
  the termination condition, no error handling is needed.

Modified Files:
--------------
    mdocml:
        eqn.c
        eqn_html.c
        mandoc.1
        mandoc.h
        read.c

Revision Data
-------------
Index: eqn.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/eqn.c,v
retrieving revision 1.51
retrieving revision 1.52
diff -Leqn.c -Leqn.c -u -p -r1.51 -r1.52
--- eqn.c
+++ eqn.c
@@ -656,7 +656,7 @@ static int
 eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
 {
 	char		*p;
-	enum eqn_tok	 tok;
+	enum eqn_tok	 tok, subtok;
 	enum eqn_post	 pos;
 	struct eqn_box	*cur;
 	int		 rc, size;
@@ -665,9 +665,12 @@ eqn_parse(struct eqn_node *ep, struct eq
 	const char	*start;
 
 	assert(NULL != parent);
-again:
 
-	switch ((tok = eqn_tok_parse(ep, &p))) {
+next_tok:
+	tok = eqn_tok_parse(ep, &p);
+
+this_tok:
+	switch (tok) {
 	case (EQN_TOK_UNDEF):
 		if ((rc = eqn_undef(ep)) <= 0)
 			return(rc);
@@ -686,10 +689,9 @@ again:
 		break;
 	case (EQN_TOK_DELIM):
 	case (EQN_TOK_GFONT):
-		if (NULL == eqn_nextrawtok(ep, NULL)) {
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(-1);
-		}
+		if (eqn_nextrawtok(ep, NULL) == NULL)
+			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
+			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
 		break;
 	case (EQN_TOK_MARK):
 	case (EQN_TOK_LINEUP):
@@ -703,9 +705,12 @@ again:
 	case (EQN_TOK_HAT):
 	case (EQN_TOK_DOT):
 	case (EQN_TOK_DOTDOT):
-		if (NULL == parent->last) {
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(-1);
+		if (parent->last == NULL) {
+			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
+			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
+			cur = eqn_box_alloc(ep, parent);
+			cur->type = EQN_TEXT;
+			cur->text = mandoc_strdup("");
 		}
 		parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
 		parent->type = EQN_LISTONE;
@@ -761,10 +766,12 @@ again:
 	case (EQN_TOK_BACK):
 	case (EQN_TOK_DOWN):
 	case (EQN_TOK_UP):
-		tok = eqn_tok_parse(ep, NULL);
-		if (EQN_TOK__MAX != tok) {
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(-1);
+		subtok = eqn_tok_parse(ep, NULL);
+		if (subtok != EQN_TOK__MAX) {
+			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
+			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
+			tok = subtok;
+			goto this_tok;
 		}
 		break;
 	case (EQN_TOK_FAT):
@@ -772,10 +779,7 @@ again:
 	case (EQN_TOK_ITALIC):
 	case (EQN_TOK_BOLD):
 		while (parent->args == parent->expectargs)
-			if (NULL == (parent = parent->parent)) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
-			}
+			parent = parent->parent;
 		/*
 		 * These values apply to the next word or sequence of
 		 * words; thus, we mark that we'll have a child with
@@ -805,13 +809,15 @@ again:
 	case (EQN_TOK_GSIZE):
 		/* Accept two values: integral size and a single. */
 		if (NULL == (start = eqn_nexttok(ep, &sz))) {
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(-1);
+			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
+			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
+			break;
 		}
 		size = mandoc_strntoi(start, sz, 10);
 		if (-1 == size) {
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(-1);
+			mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
+			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
+			break;
 		}
 		if (EQN_TOK_GSIZE == tok) {
 			ep->gsize = size;
@@ -831,9 +837,12 @@ again:
 		 * Repivot under a positional node, open a child scope
 		 * and keep on reading.
 		 */
-		if (NULL == parent->last) {
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(-1);
+		if (parent->last == NULL) {
+			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
+			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
+			cur = eqn_box_alloc(ep, parent);
+			cur->type = EQN_TEXT;
+			cur->text = mandoc_strdup("");
 		}
 		/* Handle the "subsup" and "fromto" positions. */
 		if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
@@ -866,10 +875,7 @@ again:
 		break;
 	case (EQN_TOK_SQRT):
 		while (parent->args == parent->expectargs)
-			if (NULL == (parent = parent->parent)) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
-			}
+			parent = parent->parent;
 		/*
 		 * Accept a left-right-associative set of arguments just
 		 * like sub and sup and friends but without rebalancing
@@ -886,15 +892,15 @@ again:
 		 * Close out anything that's currently open, then
 		 * rebalance and continue reading.
 		 */
-		if (NULL == parent->last) {
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(-1);
+		if (parent->last == NULL) {
+			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
+			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
+			cur = eqn_box_alloc(ep, parent);
+			cur->type = EQN_TEXT;
+			cur->text = mandoc_strdup("");
 		}
 		while (EQN_SUBEXPR == parent->type)
-			if (NULL == (parent = parent->parent)) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
-			}
+			parent = parent->parent;
 		parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
 		break;
 	case (EQN_TOK_RIGHT):
@@ -904,19 +910,23 @@ again:
 		 * FIXME: this is a shitty sentinel: we should really
 		 * have a native EQN_BRACE type or whatnot.
 		 */
-		while (parent->type != EQN_LIST)
-			if (NULL == (parent = parent->parent)) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
-			}
+		for (cur = parent; cur != NULL; cur = cur->parent)
+			if (cur->type == EQN_LIST &&
+			    (tok == EQN_TOK_BRACE_CLOSE ||
+			     cur->left != NULL))
+				break;
+		if (cur == NULL) {
+			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
+			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
+			break;
+		}
+		parent = cur;
 		if (EQN_TOK_RIGHT == tok) {
-			if (NULL == parent->left) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
-			}
 			if (NULL == (start = eqn_nexttok(ep, &sz))) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
+				mandoc_msg(MANDOCERR_REQ_EMPTY,
+				    ep->parse, ep->eqn.ln,
+				    ep->eqn.pos, eqn_toks[tok]);
+				break;
 			}
 			/* Handling depends on right/left. */
 			if (STRNEQ(start, sz, "ceiling", 7)) {
@@ -928,10 +938,7 @@ again:
 			} else
 				parent->right = mandoc_strndup(start, sz);
 		}
-		if (NULL == (parent = parent->parent)) {
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(-1);
-		}
+		parent = parent->parent;
 		if (EQN_TOK_BRACE_CLOSE == tok && parent &&
 		    (parent->type == EQN_PILE ||
 		     parent->type == EQN_MATRIX))
@@ -939,10 +946,7 @@ again:
 		/* Close out any "singleton" lists. */
 		while (parent->type == EQN_LISTONE &&
 		    parent->args == parent->expectargs)
-			if (NULL == (parent = parent->parent)) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
-			}
+			parent = parent->parent;
 		break;
 	case (EQN_TOK_BRACE_OPEN):
 	case (EQN_TOK_LEFT):
@@ -952,18 +956,16 @@ again:
 		 * (just like with the text node).
 		 */
 		while (parent->args == parent->expectargs)
-			if (NULL == (parent = parent->parent)) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
-			}
+			parent = parent->parent;
+		if (EQN_TOK_LEFT == tok &&
+		    (start = eqn_nexttok(ep, &sz)) == NULL) {
+			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
+			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
+			break;
+		}
 		parent = eqn_box_alloc(ep, parent);
 		parent->type = EQN_LIST;
 		if (EQN_TOK_LEFT == tok) {
-			if (NULL == (start = eqn_nexttok(ep, &sz))) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
-			}
-			/* Handling depends on right/left. */
 			if (STRNEQ(start, sz, "ceiling", 7)) {
 				strlcpy(sym, "\\[lc]", sizeof(sym));
 				parent->left = mandoc_strdup(sym);
@@ -982,42 +984,29 @@ again:
 	case (EQN_TOK_LCOL):
 	case (EQN_TOK_RCOL):
 		while (parent->args == parent->expectargs)
-			if (NULL == (parent = parent->parent)) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
-			}
-		if (EQN_TOK_BRACE_OPEN != eqn_tok_parse(ep, NULL)) {
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(-1);
-		}
+			parent = parent->parent;
 		parent = eqn_box_alloc(ep, parent);
 		parent->type = EQN_PILE;
-		parent = eqn_box_alloc(ep, parent);
-		parent->type = EQN_LIST;
+		parent->expectargs = 1;
 		break;
 	case (EQN_TOK_ABOVE):
-		while (parent->type != EQN_PILE)
-			if (NULL == (parent = parent->parent)) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
-			}
-		parent = eqn_box_alloc(ep, parent);
+		for (cur = parent; cur != NULL; cur = cur->parent)
+			if (cur->type == EQN_PILE)
+				break;
+		if (cur == NULL) {
+			mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
+			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
+			break;
+		}
+		parent = eqn_box_alloc(ep, cur);
 		parent->type = EQN_LIST;
 		break;
 	case (EQN_TOK_MATRIX):
 		while (parent->args == parent->expectargs)
-			if (NULL == (parent = parent->parent)) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
-			}
-		if (EQN_TOK_BRACE_OPEN != eqn_tok_parse(ep, NULL)) {
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(-1);
-		}
+			parent = parent->parent;
 		parent = eqn_box_alloc(ep, parent);
 		parent->type = EQN_MATRIX;
-		parent = eqn_box_alloc(ep, parent);
-		parent->type = EQN_LIST;
+		parent->expectargs = 1;
 		break;
 	case (EQN_TOK_EOF):
 		/*
@@ -1033,11 +1022,7 @@ again:
 		 * in an expression, then rewind til we're not any more.
 		 */
 		while (parent->args == parent->expectargs)
-			if (NULL == (parent = parent->parent)) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				free(p);
-				return(-1);
-			}
+			parent = parent->parent;
 		cur = eqn_box_alloc(ep, parent);
 		cur->type = EQN_TEXT;
 		for (i = 0; i < EQNSYM__MAX; i++)
@@ -1055,14 +1040,11 @@ again:
 		 * Post-process list status.
 		 */
 		while (parent->type == EQN_LISTONE &&
-			parent->args == parent->expectargs)
-			if (NULL == (parent = parent->parent)) {
-				EQN_MSG(MANDOCERR_EQNSYNT, ep);
-				return(-1);
-			}
+		    parent->args == parent->expectargs)
+			parent = parent->parent;
 		break;
 	}
-	goto again;
+	goto next_tok;
 }
 
 enum rofferr
Index: mandoc.1
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.1,v
retrieving revision 1.117
retrieving revision 1.118
diff -Lmandoc.1 -Lmandoc.1 -u -p -r1.117 -r1.118
--- mandoc.1
+++ mandoc.1
@@ -911,8 +911,11 @@ The previous, interrupted macro is delet
 .Ss "Warnings related to missing arguments"
 .Bl -ohang
 .It Sy "skipping empty request"
-.Pq roff
-The macro name is missing from a macro definition request.
+.Pq roff , eqn
+The macro name is missing from a macro definition request,
+or an
+.Xr eqn 7
+control statement or operation keyword lacks its required argument.
 .It Sy "conditional request controls empty scope"
 .Pq roff
 A conditional request is only useful if any of the following
@@ -1046,6 +1049,11 @@ The
 utility assumes
 .Fl std
 even when it is not specified, but other implementations may not.
+.It Sy "missing eqn box, using \(dq\(dq"
+.Pq eqn
+A diacritic mark or a binary operator is found,
+but there is nothing to the left of it.
+An empty box is inserted.
 .El
 .Ss "Warnings related to bad macro arguments"
 .Bl -ohang
@@ -1218,7 +1226,6 @@ keeps the code more readable.
 .It "equation scope open on exit"
 .It "overlapping equation scopes"
 .It "unexpected end of equation"
-.It "equation syntax error"
 .El
 .Ss "Errors related to tables"
 .Bl -inset -compact
@@ -1272,12 +1279,15 @@ macro.
 It may be mistyped or unsupported.
 The request or macro is discarded including its arguments.
 .It Sy "skipping item outside list"
-.Pq mdoc
+.Pq mdoc , eqn
 An
 .Ic \&It
 macro occurs outside any
 .Ic \&Bl
-list.
+list, or an
+.Xr eqn 7
+.Ic above
+delimiter occurs outside any pile.
 It is discarded including its arguments.
 .It Sy "skipping column outside column list"
 .Pq mdoc
@@ -1298,7 +1308,9 @@ block closing macro, a
 .Ic \&RE
 or
 .Ic \&UE
-macro, or the end of an equation, table, or
+macro, an
+.Xr eqn 7
+right delimiter or closing brace, or the end of an equation, table, or
 .Xr roff 7
 conditional request is encountered but no matching block is open.
 The offending request or macro is discarded.
@@ -1395,11 +1407,16 @@ An
 .Ic \&St
 macro has an unknown argument and is discarded.
 .It Sy "skipping request without numeric argument"
-.Pq roff
+.Pq roff , eqn
 An
 .Ic \&it
-request has a non-numeric or negative argument or no argument at all.
-The invalid request is ignored.
+request or an
+.Xr eqn 7
+.Ic \&size
+or
+.Ic \&gsize
+statement has a non-numeric or negative argument or no argument at all.
+The invalid request or statement is ignored.
 .It Sy "skipping all arguments"
 .Pq mdoc , man , eqn , roff
 An
Index: mandoc.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.h,v
retrieving revision 1.161
retrieving revision 1.162
diff -Lmandoc.h -Lmandoc.h -u -p -r1.161 -r1.162
--- mandoc.h
+++ mandoc.h
@@ -104,6 +104,7 @@ enum	mandocerr {
 	MANDOCERR_BF_NOFONT, /* missing font type, using \fR: Bf */
 	MANDOCERR_BF_BADFONT, /* unknown font type, using \fR: Bf font */
 	MANDOCERR_ARG_STD, /* missing -std argument, adding it: macro */
+	MANDOCERR_EQN_NOBOX, /* missing eqn box, using "": op */
 
 	/* related to bad arguments */
 	MANDOCERR_ARG_QUOTE, /* unterminated quoted argument */
@@ -134,7 +135,6 @@ enum	mandocerr {
 	MANDOCERR_EQNSCOPE, /* equation scope open on exit */
 	MANDOCERR_EQNBADSCOPE, /* overlapping equation scopes */
 	MANDOCERR_EQNEOF, /* unexpected end of equation */
-	MANDOCERR_EQNSYNT, /* equation syntax error */
 
 	/* related to tables */
 	MANDOCERR_TBL, /* bad table syntax */
Index: eqn_html.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/eqn_html.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -Leqn_html.c -Leqn_html.c -u -p -r1.9 -r1.10
--- eqn_html.c
+++ eqn_html.c
@@ -47,10 +47,12 @@ eqn_box(struct html *p, const struct eqn
 	if (EQN_MATRIX == bp->type) {
 		if (NULL == bp->first)
 			goto out;
-		assert(EQN_LIST == bp->first->type);
+		if (EQN_LIST != bp->first->type) {
+			eqn_box(p, bp->first);
+			goto out;
+		}
 		if (NULL == (parent = bp->first->first))
 			goto out;
-		assert(EQN_PILE == parent->type);
 		/* Estimate the number of rows, first. */
 		if (NULL == (child = parent->first))
 			goto out;
@@ -126,8 +128,10 @@ eqn_box(struct html *p, const struct eqn
 
 	if (EQN_PILE == bp->type) {
 		assert(NULL == post);
-		post = print_otag(p, TAG_MTABLE, 0, NULL);
-	} else if (bp->parent && EQN_PILE == bp->parent->type) {
+		if (bp->first != NULL && bp->first->type == EQN_LIST)
+			post = print_otag(p, TAG_MTABLE, 0, NULL);
+	} else if (bp->type == EQN_LIST &&
+	    bp->parent && bp->parent->type == EQN_PILE) {
 		assert(NULL == post);
 		post = print_otag(p, TAG_MTR, 0, NULL);
 		print_otag(p, TAG_MTD, 0, NULL);
Index: read.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/read.c,v
retrieving revision 1.89
retrieving revision 1.90
diff -Lread.c -Lread.c -u -p -r1.89 -r1.90
--- read.c
+++ read.c
@@ -149,6 +149,7 @@ static	const char * const	mandocerrs[MAN
 	"missing font type, using \\fR",
 	"unknown font type, using \\fR",
 	"missing -std argument, adding it",
+	"missing eqn box, using \"\"",
 
 	/* related to bad macro arguments */
 	"unterminated quoted argument",
@@ -179,7 +180,6 @@ static	const char * const	mandocerrs[MAN
 	"equation scope open on exit",
 	"overlapping equation scopes",
 	"unexpected end of equation",
-	"equation syntax error",
 
 	/* related to tables */
 	"bad table syntax",
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2014-10-12 19:31 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-12 19:31 mdocml: Improve error handling in the eqn(7) parser schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).