tech@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: Ingo Schwarze <schwarze@usta.de>
To: tech@mdocml.bsd.lv
Subject: rew_dohalt reorg and bad block-nesting bug
Date: Tue, 29 Jun 2010 07:06:26 +0200	[thread overview]
Message-ID: <20100629050626.GH31959@iris.usta.de> (raw)
In-Reply-To: <4C27E306.6070605@bsd.lv>

Hi,

just to let you know what happened here...

After brushing up the badly nested blocks patch according to Kristaps'
comments, i went over to the .Nm stuff, SYNOPSIS indentation.
As you know, this is still somewhat broken because there are still
bugs in the rewind rules.  Trying to fix these, i lost patience
with rew_dohalt and rew_dobreak and decided to clean them up.

So i wrote a +46 -171 patch not changing functionality, but making
this a lot easier.  That patch was just ready when i realized that
it conflicts really badly with my badly nested blocks patch.

So i ported the rew_dohalt reorg over to apply on top of the
badly nested blocks patch, where it turns out to be +66 -200.
But while doing that, i found a really nasty bug in the badly
nested block patch.  It fails miserably when one block is broken
twice by two blocks of the same type, e.g. ".Ao Ao Bo Ac Ac Bc".
In this situation, the first Ac breaks Bo, sets the second Ao
pending on Bo, and adds an Ao end-marker.  The second Ac
finds the end marker and doesn't close itself out at all,
thinking that it were already pending.  Finally, the Bc will
only close Bo and the second Ao, but not the first one;
the first one will stay open and make the parser fail.

Now this is terribly hard to fix because rew_sub only gets
type and tok arguments, but it has no idea which block it is
trying to rewind.  It cannot distinguish whether or not the
Ao end marker belongs to itself or to another open block.

This is also relevant because ".Op Op Oo\n.Oc" triggers the same
bug and does occur in practice.

Currently, i'm not quite sure in which order to proceed.  Maybe
 1) sync
 2) tag
 3) merge badly nested blocks without the bugfix and reorg
 4) merge the reorg without the bugfix
 5) finish the .Nm/SYNOPSIS indentation stuff
 6) look at the double-breaking bug

I guess i will have to look at this once again tomorrow in the
morning.  I should better sleep now.

Right now, i'm just sending the reorg such that you can have
a look.  It must be applied on top of the latest badly nested
block patch, otherwise it won't apply at all.

Yours,
  Ingo


diff -Naurp mandoc-block-nest/mdoc_macro.c mandoc-bn-rew/mdoc_macro.c
--- mandoc-block-nest/mdoc_macro.c	Mon Jun 28 10:57:36 2010
+++ mandoc-bn-rew/mdoc_macro.c	Mon Jun 28 22:35:02 2010
@@ -25,10 +25,12 @@
 #include "libmdoc.h"
 #include "libmandoc.h"
 
-enum	rew {
-	REWIND_REWIND,
-	REWIND_NOHALT,
-	REWIND_HALT
+enum	rew {	/* see rew_dohalt() */
+	REWIND_NONE,
+	REWIND_THIS,
+	REWIND_MORE,
+	REWIND_LATER,
+	REWIND_ERROR,
 };
 
 static	int	  	blk_full(MACRO_PROT_ARGS);
@@ -50,8 +52,6 @@ static	int		make_pending(struct mdoc_node *, enum mdoc
 				struct mdoc *, int, int);
 static	int	  	phrase(struct mdoc *, int, int, char *);
 static	enum mdoct 	rew_alt(enum mdoct);
-static	int	  	rew_dobreak(enum mdoct, 
-				const struct mdoc_node *);
 static	enum rew  	rew_dohalt(enum mdoct, enum mdoc_type, 
 				const struct mdoc_node *);
 static	int	  	rew_elem(struct mdoc *, enum mdoct);
@@ -271,8 +271,8 @@ rew_last(struct mdoc *mdoc, const struct mdoc_node *to
 
 
 /*
- * Return the opening macro of a closing one, e.g., `Ec' has `Eo' as its
- * matching pair.
+ * For a block closing macro, return the corresponding opening one.
+ * Otherwise, return the macro itself.
  */
 static enum mdoct
 rew_alt(enum mdoct tok)
@@ -311,18 +311,20 @@ rew_alt(enum mdoct tok)
 	case (MDOC_Xc):
 		return(MDOC_Xo);
 	default:
-		break;
+		return(tok);
 	}
-	abort();
 	/* NOTREACHED */
 }
 
 
-/* 
- * Rewind rules.  This indicates whether to stop rewinding
- * (REWIND_HALT) without touching our current scope, stop rewinding and
- * close our current scope (REWIND_REWIND), or continue (REWIND_NOHALT).
- * The scope-closing and so on occurs in the various rew_* routines.
+/*
+ * Rewinding to tok, how do we have to handle *p?
+ * REWIND_NONE: *p would delimit tok, but no tok scope is open
+ *   inside *p, so there is no need to rewind anything at all.
+ * REWIND_THIS: *p matches tok, so rewind *p and nothing else.
+ * REWIND_MORE: *p is implicit, rewind it and keep searching for tok.
+ * REWIND_LATER: *p is explicit and still open, postpone rewinding.
+ * REWIND_ERROR: No tok block is open at all.
  */
 static enum rew
 rew_dohalt(enum mdoct tok, enum mdoc_type type, 
@@ -330,197 +332,57 @@ rew_dohalt(enum mdoct tok, enum mdoc_type type, 
 {
 
 	if (MDOC_ROOT == p->type)
-		return(REWIND_HALT);
-	if (MDOC_VALID & p->flags)
-		return(REWIND_NOHALT);
+		return(MDOC_BLOCK == type &&
+		    MDOC_EXPLICIT & mdoc_macros[tok].flags ?
+		    REWIND_ERROR : REWIND_NONE);
+	if (MDOC_TEXT == p->type || MDOC_VALID & p->flags)
+		return(REWIND_MORE);
 
+	tok = rew_alt(tok);
+	if (tok == p->tok)
+		return(p->end ? REWIND_NONE :
+		    type == p->type ? REWIND_THIS : REWIND_MORE);
+
+	if (MDOC_ELEM == p->type)
+		return(REWIND_MORE);
+
 	switch (tok) {
-	case (MDOC_Aq):
-		/* FALLTHROUGH */
-	case (MDOC_Bq):
-		/* FALLTHROUGH */
-	case (MDOC_Brq):
-		/* FALLTHROUGH */
-	case (MDOC_D1):
-		/* FALLTHROUGH */
-	case (MDOC_Dl):
-		/* FALLTHROUGH */
-	case (MDOC_Dq):
-		/* FALLTHROUGH */
-	case (MDOC_Op):
-		/* FALLTHROUGH */
-	case (MDOC_Pq):
-		/* FALLTHROUGH */
-	case (MDOC_Ql):
-		/* FALLTHROUGH */
-	case (MDOC_Qq):
-		/* FALLTHROUGH */
-	case (MDOC_Sq):
-		/* FALLTHROUGH */
-	case (MDOC_Vt):
-		assert(MDOC_TAIL != type);
-		if (tok != p->tok)
-			break;
-		if (p->end)
-			return(REWIND_HALT);
-		if (type == p->type)
-			return(REWIND_REWIND);
+	case (MDOC_Bl):
+		if (MDOC_It == p->tok)
+			return(REWIND_MORE);
 		break;
 	case (MDOC_It):
-		assert(MDOC_TAIL != type);
-		if (type == p->type && tok == p->tok)
-			return(REWIND_REWIND);
 		if (MDOC_BODY == p->type && MDOC_Bl == p->tok)
-			return(REWIND_HALT);
+			return(REWIND_NONE);
 		break;
-	case (MDOC_Sh):
-		if (type == p->type && tok == p->tok)
-			return(REWIND_REWIND);
+	/*
+	 * XXX Badly nested block handling still fails badly
+	 * when one block is breaking two blocks of the same type.
+	 * This is an uncomplete and extremely ugly workaround,
+	 * required to let the OpenBSD tree build.
+	 */
+	case (MDOC_Oo):
+		if (MDOC_Op == p->tok)
+			return(REWIND_MORE);
 		break;
 	case (MDOC_Nd):
 		/* FALLTHROUGH */
 	case (MDOC_Ss):
-		assert(MDOC_TAIL != type);
-		if (type == p->type && tok == p->tok)
-			return(REWIND_REWIND);
 		if (MDOC_BODY == p->type && MDOC_Sh == p->tok)
-			return(REWIND_HALT);
-		break;
-	case (MDOC_Ao):
+			return(REWIND_NONE);
 		/* FALLTHROUGH */
-	case (MDOC_Bd):
-		/* FALLTHROUGH */
-	case (MDOC_Bf):
-		/* FALLTHROUGH */
-	case (MDOC_Bk):
-		/* FALLTHROUGH */
-	case (MDOC_Bl):
-		/* FALLTHROUGH */
-	case (MDOC_Bo):
-		/* FALLTHROUGH */
-	case (MDOC_Bro):
-		/* FALLTHROUGH */
-	case (MDOC_Do):
-		/* FALLTHROUGH */
-	case (MDOC_Eo):
-		/* FALLTHROUGH */
-	case (MDOC_Fo):
-		/* FALLTHROUGH */
-	case (MDOC_Oo):
-		/* FALLTHROUGH */
-	case (MDOC_Po):
-		/* FALLTHROUGH */
-	case (MDOC_Qo):
-		/* FALLTHROUGH */
-	case (MDOC_Rs):
-		/* FALLTHROUGH */
-	case (MDOC_So):
-		/* FALLTHROUGH */
-	case (MDOC_Xo):
-		if (tok != p->tok)
-			break;
-		if (p->end)
-			return(REWIND_HALT);
-		if (type == p->type)
-			return(REWIND_REWIND);
-		break;
-	/* Multi-line explicit scope close. */
-	case (MDOC_Ac):
-		/* FALLTHROUGH */
-	case (MDOC_Bc):
-		/* FALLTHROUGH */
-	case (MDOC_Brc):
-		/* FALLTHROUGH */
-	case (MDOC_Dc):
-		/* FALLTHROUGH */
-	case (MDOC_Ec):
-		/* FALLTHROUGH */
-	case (MDOC_Ed):
-		/* FALLTHROUGH */
-	case (MDOC_Ek):
-		/* FALLTHROUGH */
-	case (MDOC_El):
-		/* FALLTHROUGH */
-	case (MDOC_Fc):
-		/* FALLTHROUGH */
-	case (MDOC_Ef):
-		/* FALLTHROUGH */
-	case (MDOC_Oc):
-		/* FALLTHROUGH */
-	case (MDOC_Pc):
-		/* FALLTHROUGH */
-	case (MDOC_Qc):
-		/* FALLTHROUGH */
-	case (MDOC_Re):
-		/* FALLTHROUGH */
-	case (MDOC_Sc):
-		/* FALLTHROUGH */
-	case (MDOC_Xc):
-		if (rew_alt(tok) != p->tok)
-			break;
-		if (p->end)
-			return(REWIND_HALT);
-		if (type == p->type)
-			return(REWIND_REWIND);
-		break;
-	default:
-		abort();
-		/* NOTREACHED */
-	}
-
-	return(REWIND_NOHALT);
-}
-
-
-/*
- * See if we can break an encountered scope (the rew_dohalt has returned
- * REWIND_NOHALT). 
- */
-static int
-rew_dobreak(enum mdoct tok, const struct mdoc_node *p)
-{
-
-	assert(MDOC_ROOT != p->type);
-	if (MDOC_ELEM == p->type)
-		return(1);
-	if (MDOC_TEXT == p->type)
-		return(1);
-	if (MDOC_VALID & p->flags)
-		return(1);
-	if (MDOC_BODY == p->type && p->end)
-		return(1);
-
-	switch (tok) {
-	case (MDOC_It):
-		return(MDOC_It == p->tok);
-	case (MDOC_Nd):
-		return(MDOC_Nd == p->tok);
-	case (MDOC_Ss):
-		return(MDOC_Ss == p->tok);
 	case (MDOC_Sh):
-		if (MDOC_Nd == p->tok)
-			return(1);
-		if (MDOC_Ss == p->tok)
-			return(1);
-		return(MDOC_Sh == p->tok);
-	case (MDOC_El):
-		if (MDOC_It == p->tok)
-			return(1);
+		if (MDOC_Nd == p->tok || MDOC_Ss == p->tok ||
+		    MDOC_Sh == p->tok)
+			return(REWIND_MORE);
 		break;
-	case (MDOC_Oc):
-		if (MDOC_Op == p->tok)
-			return(1);
-		break;
 	default:
 		break;
 	}
 
-	if (MDOC_EXPLICIT & mdoc_macros[tok].flags) 
-		return(p->tok == rew_alt(tok));
-	else if (MDOC_BLOCK == p->type)
-		return(1);
-
-	return(tok == p->tok);
+	return(p->end || MDOC_BLOCK == p->type &&
+	    ! (MDOC_EXPLICIT & mdoc_macros[tok].flags) ?
+	    REWIND_MORE : REWIND_LATER);
 }
 
 
@@ -570,7 +432,7 @@ make_pending(struct mdoc_node *broken, enum mdoct tok,
 			continue;
 		}
 
-		if (REWIND_REWIND != rew_dohalt(tok, MDOC_BLOCK, breaker))
+		if (REWIND_THIS != rew_dohalt(tok, MDOC_BLOCK, breaker))
 			continue;
 		if (MDOC_BODY == broken->type)
 			broken = broken->parent;
@@ -610,36 +472,37 @@ make_pending(struct mdoc_node *broken, enum mdoct tok,
 	/*
 	 * Found no matching block for tok.
 	 * Are you trying to close a block that is not open?
-	 * Report failure and abort the parser.
+	 * XXX Make this non-fatal.
 	 */
 	mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTNOSCOPE);
 	return(0);
 }
 
+
 static int
 rew_sub(enum mdoc_type t, struct mdoc *m, 
 		enum mdoct tok, int line, int ppos)
 {
 	struct mdoc_node *n;
-	enum rew	  c;
 
-	/* LINTED */
-	for (n = m->last; n; n = n->parent) {
-		c = rew_dohalt(tok, t, n);
-		if (REWIND_HALT == c) {
-			if (n->end || MDOC_BLOCK != t)
-				return(1);
-			if ( ! (MDOC_EXPLICIT & mdoc_macros[tok].flags))
-				return(1);
-			/* FIXME: shouldn't raise an error */
-			mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTNOSCOPE);
-			return(0);
-		}
-		if (REWIND_REWIND == c)
+	n = m->last;
+	while (n) {
+		switch (rew_dohalt(tok, t, n)) {
+		case (REWIND_NONE):
+			return(1);
+		case (REWIND_THIS):
 			break;
-		else if (rew_dobreak(tok, n))
+		case (REWIND_MORE):
+			n = n->parent;
 			continue;
-		return(make_pending(n, tok, m, line, ppos));
+		case (REWIND_LATER):
+			return(make_pending(n, tok, m, line, ppos));
+		case (REWIND_ERROR):
+			/* XXX Make this non-fatal. */
+			mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTNOSCOPE);
+			return 0;
+		}
+		break;
 	}
 
 	assert(n);
--
 To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv

  parent reply	other threads:[~2010-06-29  5:06 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20100627001247.GB2850@iris.usta.de>
2010-06-27 23:47 ` Back-block nesting patch Kristaps Dzonsons
2010-06-28 19:02   ` Ingo Schwarze
2010-06-28 19:26     ` Kristaps Dzonsons
2010-06-29  5:06   ` Ingo Schwarze [this message]
2010-06-29  9:18     ` rew_dohalt reorg and bad block-nesting bug Kristaps Dzonsons
2010-06-29 18:33       ` Ingo Schwarze

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100629050626.GH31959@iris.usta.de \
    --to=schwarze@usta.de \
    --cc=tech@mdocml.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).