From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from krisdoz.my.domain (schwarze@localhost [127.0.0.1]) by krisdoz.my.domain (8.14.5/8.14.5) with ESMTP id s9CJVfsE024037 for ; Sun, 12 Oct 2014 15:31:41 -0400 (EDT) Received: (from schwarze@localhost) by krisdoz.my.domain (8.14.5/8.14.3/Submit) id s9CJVfLs017685; Sun, 12 Oct 2014 15:31:41 -0400 (EDT) Date: Sun, 12 Oct 2014 15:31:41 -0400 (EDT) Message-Id: <201410121931.s9CJVfLs017685@krisdoz.my.domain> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: schwarze@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: mdocml: Improve error handling in the eqn(7) parser. X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- Improve error handling in the eqn(7) parser. Get rid of the first fatal error, MANDOCERR_EQNSYNT. In eqn(7), there is no need to be bug-compatible with groff, so there is no need to abondon the whole equation in case of a syntax error. In particular: * Skip "back", "delim", "down", "fwd", "gfont", "gsize", "left", "right", "size", and "up" without arguments. * Skip "gsize" and "size" with a non-numeric argument. * Skip closing delimiters that are not open. * Skip "above" outside piles. * For diacritic marks and binary operators without a left operand, default to an empty box. * Let piles and matrices take one argument rather than insisting on a braced list. Let HTML output handle that, too. * When rewinding, if the root box is guaranteed to match the termination condition, no error handling is needed. Modified Files: -------------- mdocml: eqn.c eqn_html.c mandoc.1 mandoc.h read.c Revision Data ------------- Index: eqn.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/eqn.c,v retrieving revision 1.51 retrieving revision 1.52 diff -Leqn.c -Leqn.c -u -p -r1.51 -r1.52 --- eqn.c +++ eqn.c @@ -656,7 +656,7 @@ static int eqn_parse(struct eqn_node *ep, struct eqn_box *parent) { char *p; - enum eqn_tok tok; + enum eqn_tok tok, subtok; enum eqn_post pos; struct eqn_box *cur; int rc, size; @@ -665,9 +665,12 @@ eqn_parse(struct eqn_node *ep, struct eq const char *start; assert(NULL != parent); -again: - switch ((tok = eqn_tok_parse(ep, &p))) { +next_tok: + tok = eqn_tok_parse(ep, &p); + +this_tok: + switch (tok) { case (EQN_TOK_UNDEF): if ((rc = eqn_undef(ep)) <= 0) return(rc); @@ -686,10 +689,9 @@ again: break; case (EQN_TOK_DELIM): case (EQN_TOK_GFONT): - if (NULL == eqn_nextrawtok(ep, NULL)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + if (eqn_nextrawtok(ep, NULL) == NULL) + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); break; case (EQN_TOK_MARK): case (EQN_TOK_LINEUP): @@ -703,9 +705,12 @@ again: case (EQN_TOK_HAT): case (EQN_TOK_DOT): case (EQN_TOK_DOTDOT): - if (NULL == parent->last) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + if (parent->last == NULL) { + mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = mandoc_strdup(""); } parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent); parent->type = EQN_LISTONE; @@ -761,10 +766,12 @@ again: case (EQN_TOK_BACK): case (EQN_TOK_DOWN): case (EQN_TOK_UP): - tok = eqn_tok_parse(ep, NULL); - if (EQN_TOK__MAX != tok) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + subtok = eqn_tok_parse(ep, NULL); + if (subtok != EQN_TOK__MAX) { + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + tok = subtok; + goto this_tok; } break; case (EQN_TOK_FAT): @@ -772,10 +779,7 @@ again: case (EQN_TOK_ITALIC): case (EQN_TOK_BOLD): while (parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; /* * These values apply to the next word or sequence of * words; thus, we mark that we'll have a child with @@ -805,13 +809,15 @@ again: case (EQN_TOK_GSIZE): /* Accept two values: integral size and a single. */ if (NULL == (start = eqn_nexttok(ep, &sz))) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; } size = mandoc_strntoi(start, sz, 10); if (-1 == size) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + mandoc_msg(MANDOCERR_IT_NONUM, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; } if (EQN_TOK_GSIZE == tok) { ep->gsize = size; @@ -831,9 +837,12 @@ again: * Repivot under a positional node, open a child scope * and keep on reading. */ - if (NULL == parent->last) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + if (parent->last == NULL) { + mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = mandoc_strdup(""); } /* Handle the "subsup" and "fromto" positions. */ if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) { @@ -866,10 +875,7 @@ again: break; case (EQN_TOK_SQRT): while (parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; /* * Accept a left-right-associative set of arguments just * like sub and sup and friends but without rebalancing @@ -886,15 +892,15 @@ again: * Close out anything that's currently open, then * rebalance and continue reading. */ - if (NULL == parent->last) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + if (parent->last == NULL) { + mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = mandoc_strdup(""); } while (EQN_SUBEXPR == parent->type) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent); break; case (EQN_TOK_RIGHT): @@ -904,19 +910,23 @@ again: * FIXME: this is a shitty sentinel: we should really * have a native EQN_BRACE type or whatnot. */ - while (parent->type != EQN_LIST) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + for (cur = parent; cur != NULL; cur = cur->parent) + if (cur->type == EQN_LIST && + (tok == EQN_TOK_BRACE_CLOSE || + cur->left != NULL)) + break; + if (cur == NULL) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; + } + parent = cur; if (EQN_TOK_RIGHT == tok) { - if (NULL == parent->left) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } if (NULL == (start = eqn_nexttok(ep, &sz))) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); + mandoc_msg(MANDOCERR_REQ_EMPTY, + ep->parse, ep->eqn.ln, + ep->eqn.pos, eqn_toks[tok]); + break; } /* Handling depends on right/left. */ if (STRNEQ(start, sz, "ceiling", 7)) { @@ -928,10 +938,7 @@ again: } else parent->right = mandoc_strndup(start, sz); } - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; if (EQN_TOK_BRACE_CLOSE == tok && parent && (parent->type == EQN_PILE || parent->type == EQN_MATRIX)) @@ -939,10 +946,7 @@ again: /* Close out any "singleton" lists. */ while (parent->type == EQN_LISTONE && parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; break; case (EQN_TOK_BRACE_OPEN): case (EQN_TOK_LEFT): @@ -952,18 +956,16 @@ again: * (just like with the text node). */ while (parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; + if (EQN_TOK_LEFT == tok && + (start = eqn_nexttok(ep, &sz)) == NULL) { + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; + } parent = eqn_box_alloc(ep, parent); parent->type = EQN_LIST; if (EQN_TOK_LEFT == tok) { - if (NULL == (start = eqn_nexttok(ep, &sz))) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } - /* Handling depends on right/left. */ if (STRNEQ(start, sz, "ceiling", 7)) { strlcpy(sym, "\\[lc]", sizeof(sym)); parent->left = mandoc_strdup(sym); @@ -982,42 +984,29 @@ again: case (EQN_TOK_LCOL): case (EQN_TOK_RCOL): while (parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } - if (EQN_TOK_BRACE_OPEN != eqn_tok_parse(ep, NULL)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; parent = eqn_box_alloc(ep, parent); parent->type = EQN_PILE; - parent = eqn_box_alloc(ep, parent); - parent->type = EQN_LIST; + parent->expectargs = 1; break; case (EQN_TOK_ABOVE): - while (parent->type != EQN_PILE) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } - parent = eqn_box_alloc(ep, parent); + for (cur = parent; cur != NULL; cur = cur->parent) + if (cur->type == EQN_PILE) + break; + if (cur == NULL) { + mandoc_msg(MANDOCERR_IT_STRAY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; + } + parent = eqn_box_alloc(ep, cur); parent->type = EQN_LIST; break; case (EQN_TOK_MATRIX): while (parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } - if (EQN_TOK_BRACE_OPEN != eqn_tok_parse(ep, NULL)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent = parent->parent; parent = eqn_box_alloc(ep, parent); parent->type = EQN_MATRIX; - parent = eqn_box_alloc(ep, parent); - parent->type = EQN_LIST; + parent->expectargs = 1; break; case (EQN_TOK_EOF): /* @@ -1033,11 +1022,7 @@ again: * in an expression, then rewind til we're not any more. */ while (parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - free(p); - return(-1); - } + parent = parent->parent; cur = eqn_box_alloc(ep, parent); cur->type = EQN_TEXT; for (i = 0; i < EQNSYM__MAX; i++) @@ -1055,14 +1040,11 @@ again: * Post-process list status. */ while (parent->type == EQN_LISTONE && - parent->args == parent->expectargs) - if (NULL == (parent = parent->parent)) { - EQN_MSG(MANDOCERR_EQNSYNT, ep); - return(-1); - } + parent->args == parent->expectargs) + parent = parent->parent; break; } - goto again; + goto next_tok; } enum rofferr Index: mandoc.1 =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.1,v retrieving revision 1.117 retrieving revision 1.118 diff -Lmandoc.1 -Lmandoc.1 -u -p -r1.117 -r1.118 --- mandoc.1 +++ mandoc.1 @@ -911,8 +911,11 @@ The previous, interrupted macro is delet .Ss "Warnings related to missing arguments" .Bl -ohang .It Sy "skipping empty request" -.Pq roff -The macro name is missing from a macro definition request. +.Pq roff , eqn +The macro name is missing from a macro definition request, +or an +.Xr eqn 7 +control statement or operation keyword lacks its required argument. .It Sy "conditional request controls empty scope" .Pq roff A conditional request is only useful if any of the following @@ -1046,6 +1049,11 @@ The utility assumes .Fl std even when it is not specified, but other implementations may not. +.It Sy "missing eqn box, using \(dq\(dq" +.Pq eqn +A diacritic mark or a binary operator is found, +but there is nothing to the left of it. +An empty box is inserted. .El .Ss "Warnings related to bad macro arguments" .Bl -ohang @@ -1218,7 +1226,6 @@ keeps the code more readable. .It "equation scope open on exit" .It "overlapping equation scopes" .It "unexpected end of equation" -.It "equation syntax error" .El .Ss "Errors related to tables" .Bl -inset -compact @@ -1272,12 +1279,15 @@ macro. It may be mistyped or unsupported. The request or macro is discarded including its arguments. .It Sy "skipping item outside list" -.Pq mdoc +.Pq mdoc , eqn An .Ic \&It macro occurs outside any .Ic \&Bl -list. +list, or an +.Xr eqn 7 +.Ic above +delimiter occurs outside any pile. It is discarded including its arguments. .It Sy "skipping column outside column list" .Pq mdoc @@ -1298,7 +1308,9 @@ block closing macro, a .Ic \&RE or .Ic \&UE -macro, or the end of an equation, table, or +macro, an +.Xr eqn 7 +right delimiter or closing brace, or the end of an equation, table, or .Xr roff 7 conditional request is encountered but no matching block is open. The offending request or macro is discarded. @@ -1395,11 +1407,16 @@ An .Ic \&St macro has an unknown argument and is discarded. .It Sy "skipping request without numeric argument" -.Pq roff +.Pq roff , eqn An .Ic \&it -request has a non-numeric or negative argument or no argument at all. -The invalid request is ignored. +request or an +.Xr eqn 7 +.Ic \&size +or +.Ic \&gsize +statement has a non-numeric or negative argument or no argument at all. +The invalid request or statement is ignored. .It Sy "skipping all arguments" .Pq mdoc , man , eqn , roff An Index: mandoc.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.h,v retrieving revision 1.161 retrieving revision 1.162 diff -Lmandoc.h -Lmandoc.h -u -p -r1.161 -r1.162 --- mandoc.h +++ mandoc.h @@ -104,6 +104,7 @@ enum mandocerr { MANDOCERR_BF_NOFONT, /* missing font type, using \fR: Bf */ MANDOCERR_BF_BADFONT, /* unknown font type, using \fR: Bf font */ MANDOCERR_ARG_STD, /* missing -std argument, adding it: macro */ + MANDOCERR_EQN_NOBOX, /* missing eqn box, using "": op */ /* related to bad arguments */ MANDOCERR_ARG_QUOTE, /* unterminated quoted argument */ @@ -134,7 +135,6 @@ enum mandocerr { MANDOCERR_EQNSCOPE, /* equation scope open on exit */ MANDOCERR_EQNBADSCOPE, /* overlapping equation scopes */ MANDOCERR_EQNEOF, /* unexpected end of equation */ - MANDOCERR_EQNSYNT, /* equation syntax error */ /* related to tables */ MANDOCERR_TBL, /* bad table syntax */ Index: eqn_html.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/eqn_html.c,v retrieving revision 1.9 retrieving revision 1.10 diff -Leqn_html.c -Leqn_html.c -u -p -r1.9 -r1.10 --- eqn_html.c +++ eqn_html.c @@ -47,10 +47,12 @@ eqn_box(struct html *p, const struct eqn if (EQN_MATRIX == bp->type) { if (NULL == bp->first) goto out; - assert(EQN_LIST == bp->first->type); + if (EQN_LIST != bp->first->type) { + eqn_box(p, bp->first); + goto out; + } if (NULL == (parent = bp->first->first)) goto out; - assert(EQN_PILE == parent->type); /* Estimate the number of rows, first. */ if (NULL == (child = parent->first)) goto out; @@ -126,8 +128,10 @@ eqn_box(struct html *p, const struct eqn if (EQN_PILE == bp->type) { assert(NULL == post); - post = print_otag(p, TAG_MTABLE, 0, NULL); - } else if (bp->parent && EQN_PILE == bp->parent->type) { + if (bp->first != NULL && bp->first->type == EQN_LIST) + post = print_otag(p, TAG_MTABLE, 0, NULL); + } else if (bp->type == EQN_LIST && + bp->parent && bp->parent->type == EQN_PILE) { assert(NULL == post); post = print_otag(p, TAG_MTR, 0, NULL); print_otag(p, TAG_MTD, 0, NULL); Index: read.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/read.c,v retrieving revision 1.89 retrieving revision 1.90 diff -Lread.c -Lread.c -u -p -r1.89 -r1.90 --- read.c +++ read.c @@ -149,6 +149,7 @@ static const char * const mandocerrs[MAN "missing font type, using \\fR", "unknown font type, using \\fR", "missing -std argument, adding it", + "missing eqn box, using \"\"", /* related to bad macro arguments */ "unterminated quoted argument", @@ -179,7 +180,6 @@ static const char * const mandocerrs[MAN "equation scope open on exit", "overlapping equation scopes", "unexpected end of equation", - "equation syntax error", /* related to tables */ "bad table syntax", -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv