From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from krisdoz.my.domain (schwarze@localhost [127.0.0.1]) by krisdoz.my.domain (8.14.3/8.14.3) with ESMTP id o5TJKc7c019716 for ; Tue, 29 Jun 2010 15:20:38 -0400 (EDT) Received: (from schwarze@localhost) by krisdoz.my.domain (8.14.3/8.14.3/Submit) id o5TJKcig011228; Tue, 29 Jun 2010 15:20:38 -0400 (EDT) Date: Tue, 29 Jun 2010 15:20:38 -0400 (EDT) Message-Id: <201006291920.o5TJKcig011228@krisdoz.my.domain> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: schwarze@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: mdocml: Support for badly nested blocks, written around the time of the X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- Support for badly nested blocks, written around the time of the Rostock mandoc hackathon and tested and polished since, supporting constructs like: .Ao Bo Ac Bc (exp breaking exp) .Aq Bo eol Bc (imp breaking exp) .Ao Bq Ac eol (exp breaking imp) .Ao Bo So Bc Ac Sc (double break, inner before outer) .Ao Bo So Ac Bc Sc (double break, outer before inner) .Ao Bo Ac So Bc Sc (broken breaker) .Ao Bo So Bc Do Ac Sc Dc (broken double breaker) There are still two known issues which are tricky: 1) Breaking two identical explicit blocks (Ao Bo Bo Ac or Aq Bo Bo eol) fails outright, triggering a bogus syntax error. 2) Breaking a block by two identical explicit blocks (Ao Ao Bo Ac Ac Bc or Ao Ao Bq Ac Ac eol) still has a minor rendering error left: " ac1> bc]>" should not have the final ">". We can fix these later in the tree, let's not grow this diff too large. "get it in" kristaps@ Modified Files: -------------- mdocml: libmdoc.h mdoc.3 mdoc.c mdoc.h mdoc_html.c mdoc_macro.c mdoc_term.c tree.c Revision Data ------------- Index: mdoc_macro.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mdoc_macro.c,v retrieving revision 1.82 retrieving revision 1.83 diff -Lmdoc_macro.c -Lmdoc_macro.c -u -p -r1.82 -r1.83 --- mdoc_macro.c +++ mdoc_macro.c @@ -50,6 +50,8 @@ static int append_delims(struct mdoc int, int *, char *); static enum mdoct lookup(enum mdoct, const char *); static enum mdoct lookup_raw(const char *); +static int make_pending(struct mdoc_node *, enum mdoc_type, + struct mdoc *, int, int); static int phrase(struct mdoc *, int, int, char *); static enum mdoct rew_alt(enum mdoct); static int rew_dobreak(enum mdoct, @@ -61,8 +63,6 @@ static int rew_last(struct mdoc *, const struct mdoc_node *); static int rew_sub(enum mdoc_type, struct mdoc *, enum mdoct, int, int); -static int swarn(struct mdoc *, enum mdoc_type, int, - int, const struct mdoc_node *); const struct mdoc_macro __mdoc_macros[MDOC_MAX] = { { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ap */ @@ -192,53 +192,6 @@ const struct mdoc_macro __mdoc_macros[MD const struct mdoc_macro * const mdoc_macros = __mdoc_macros; -static int -swarn(struct mdoc *mdoc, enum mdoc_type type, - int line, int pos, const struct mdoc_node *p) -{ - const char *n, *t, *tt; - enum mandocerr ec; - - n = t = ""; - tt = "block"; - - switch (type) { - case (MDOC_BODY): - tt = "multi-line"; - break; - case (MDOC_HEAD): - tt = "line"; - break; - default: - break; - } - - switch (p->type) { - case (MDOC_BLOCK): - n = mdoc_macronames[p->tok]; - t = "block"; - break; - case (MDOC_BODY): - n = mdoc_macronames[p->tok]; - t = "multi-line"; - break; - case (MDOC_HEAD): - n = mdoc_macronames[p->tok]; - t = "line"; - break; - default: - break; - } - - ec = (MDOC_IGN_SCOPE & mdoc->pflags) ? - MANDOCERR_SCOPE : MANDOCERR_SYNTSCOPE; - - return(mdoc_vmsg(mdoc, ec, line, pos, - "%s scope breaks %s of %s", - tt, t, n)); -} - - /* * This is called at the end of parsing. It must traverse up the tree, * closing out open [implicit] scopes. Obviously, open explicit scopes @@ -410,7 +363,11 @@ rew_dohalt(enum mdoct tok, enum mdoc_typ /* FALLTHROUGH */ case (MDOC_Vt): assert(MDOC_TAIL != type); - if (type == p->type && tok == p->tok) + if (tok != p->tok) + break; + if (p->end) + return(REWIND_HALT); + if (type == p->type) return(REWIND_REWIND); break; case (MDOC_It): @@ -464,7 +421,11 @@ rew_dohalt(enum mdoct tok, enum mdoc_typ case (MDOC_So): /* FALLTHROUGH */ case (MDOC_Xo): - if (type == p->type && tok == p->tok) + if (tok != p->tok) + break; + if (p->end) + return(REWIND_HALT); + if (type == p->type) return(REWIND_REWIND); break; /* Multi-line explicit scope close. */ @@ -499,7 +460,11 @@ rew_dohalt(enum mdoct tok, enum mdoc_typ case (MDOC_Sc): /* FALLTHROUGH */ case (MDOC_Xc): - if (type == p->type && rew_alt(tok) == p->tok) + if (rew_alt(tok) != p->tok) + break; + if (p->end) + return(REWIND_HALT); + if (type == p->type) return(REWIND_REWIND); break; default: @@ -526,6 +491,8 @@ rew_dobreak(enum mdoct tok, const struct return(1); if (MDOC_VALID & p->flags) return(1); + if (MDOC_BODY == p->type && p->end) + return(1); switch (tok) { case (MDOC_It): @@ -576,6 +543,83 @@ rew_elem(struct mdoc *mdoc, enum mdoct t } +/* + * We are trying to close a block identified by tok, + * but the child block *broken is still open. + * Thus, postpone closing the tok block + * until the rew_sub call closing *broken. + */ +static int +make_pending(struct mdoc_node *broken, enum mdoct tok, + struct mdoc *m, int line, int ppos) +{ + struct mdoc_node *breaker; + + /* + * Iterate backwards, searching for the block matching tok, + * that is, the block breaking the *broken block. + */ + for (breaker = broken->parent; breaker; breaker = breaker->parent) { + + /* + * If the *broken block had already been broken before + * and we encounter its breaker, make the tok block + * pending on the inner breaker. + * Graphically, "[A breaker=[B broken=[C->B B] tok=A] C]" + * becomes "[A broken=[B [C->B B] tok=A] C]" + * and finally "[A [B->A [C->B B] A] C]". + */ + if (breaker == broken->pending) { + broken = breaker; + continue; + } + + if (REWIND_REWIND != rew_dohalt(tok, MDOC_BLOCK, breaker)) + continue; + if (MDOC_BODY == broken->type) + broken = broken->parent; + + /* + * Found the breaker. + * If another, outer breaker is already pending on + * the *broken block, we must not clobber the link + * to the outer breaker, but make it pending on the + * new, now inner breaker. + * Graphically, "[A breaker=[B broken=[C->A A] tok=B] C]" + * becomes "[A breaker=[B->A broken=[C A] tok=B] C]" + * and finally "[A [B->A [C->B A] B] C]". + */ + if (broken->pending) { + struct mdoc_node *taker; + + /* + * If the breaker had also been broken before, + * it cannot take on the outer breaker itself, + * but must hand it on to its own breakers. + * Graphically, this is the following situation: + * "[A [B breaker=[C->B B] broken=[D->A A] tok=C] D]" + * "[A taker=[B->A breaker=[C->B B] [D->C A] C] D]" + */ + taker = breaker; + while (taker->pending) + taker = taker->pending; + taker->pending = broken->pending; + } + broken->pending = breaker; + mdoc_vmsg(m, MANDOCERR_SCOPE, line, ppos, "%s breaks %s", + mdoc_macronames[tok], mdoc_macronames[broken->tok]); + return(1); + } + + /* + * Found no matching block for tok. + * Are you trying to close a block that is not open? + * Report failure and abort the parser. + */ + mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTNOSCOPE); + return(0); +} + static int rew_sub(enum mdoc_type t, struct mdoc *m, enum mdoct tok, int line, int ppos) @@ -587,7 +631,7 @@ rew_sub(enum mdoc_type t, struct mdoc *m for (n = m->last; n; n = n->parent) { c = rew_dohalt(tok, t, n); if (REWIND_HALT == c) { - if (MDOC_BLOCK != t) + if (n->end || MDOC_BLOCK != t) return(1); if ( ! (MDOC_EXPLICIT & mdoc_macros[tok].flags)) return(1); @@ -599,8 +643,7 @@ rew_sub(enum mdoc_type t, struct mdoc *m break; else if (rew_dobreak(tok, n)) continue; - if ( ! swarn(m, t, line, ppos, n)) - return(0); + return(make_pending(n, tok, m, line, ppos)); } assert(n); @@ -608,15 +651,14 @@ rew_sub(enum mdoc_type t, struct mdoc *m return(0); /* - * The current block extends an enclosing block beyond a line - * break. Now that the current block ends, close the enclosing - * block, too. + * The current block extends an enclosing block. + * Now that the current block ends, close the enclosing block, too. */ - if (NULL != (n = n->pending)) { - assert(MDOC_HEAD == n->type); + while (NULL != (n = n->pending)) { if ( ! rew_last(m, n)) return(0); - if ( ! mdoc_body_alloc(m, n->line, n->pos, n->tok)) + if (MDOC_HEAD == n->type && + ! mdoc_body_alloc(m, n->line, n->pos, n->tok)) return(0); } @@ -672,9 +714,13 @@ append_delims(struct mdoc *m, int line, static int blk_exp_close(MACRO_PROT_ARGS) { + struct mdoc_node *body; /* Our own body. */ + struct mdoc_node *later; /* A sub-block starting later. */ + struct mdoc_node *n; /* For searching backwards. */ + int j, lastarg, maxargs, flushed, nl; enum margserr ac; - enum mdoct ntok; + enum mdoct atok, ntok; char *p; nl = MDOC_NEWLINE & m->flags; @@ -688,6 +734,68 @@ blk_exp_close(MACRO_PROT_ARGS) break; } + /* + * Search backwards for beginnings of blocks, + * both of our own and of pending sub-blocks. + */ + atok = rew_alt(tok); + body = later = NULL; + for (n = m->last; n; n = n->parent) { + if (MDOC_VALID & n->flags) + continue; + + /* Remember the start of our own body. */ + if (MDOC_BODY == n->type && atok == n->tok) { + if ( ! n->end) + body = n; + continue; + } + + if (MDOC_BLOCK != n->type) + continue; + if (atok == n->tok) { + assert(body); + + /* + * Found the start of our own block. + * When there is no pending sub block, + * just proceed to closing out. + */ + if (NULL == later) + break; + + /* + * When there is a pending sub block, + * postpone closing out the current block + * until the rew_sub() closing out the sub-block. + */ + if ( ! make_pending(later, tok, m, line, ppos)) + return(0); + + /* + * Mark the place where the formatting - but not + * the scope - of the current block ends. + */ + if ( ! mdoc_endbody_alloc(m, line, ppos, + atok, body, ENDBODY_SPACE)) + return(0); + break; + } + + /* + * When finding an open sub block, remember the last + * open explicit block, or, in case there are only + * implicit ones, the first open implicit block. + */ + if (later && + MDOC_EXPLICIT & mdoc_macros[later->tok].flags) + continue; + if (MDOC_CALLABLE & mdoc_macros[n->tok].flags) { + assert( ! (MDOC_ACTED & n->flags)); + later = n; + } + } + if ( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) { /* FIXME: do this in validate */ if (buf[*pos]) @@ -702,7 +810,7 @@ blk_exp_close(MACRO_PROT_ARGS) if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) return(0); - if (maxargs > 0) + if (NULL == later && maxargs > 0) if ( ! mdoc_tail_alloc(m, line, ppos, rew_alt(tok))) return(0); @@ -1255,22 +1363,36 @@ blk_part_imp(MACRO_PROT_ARGS) body->parent->flags |= MDOC_EOS; } + /* + * If there is an open sub-block requiring explicit close-out, + * postpone closing out the current block + * until the rew_sub() call closing out the sub-block. + */ + for (n = m->last; n && n != body && n != blk->parent; n = n->parent) { + if (MDOC_BLOCK == n->type && + MDOC_EXPLICIT & mdoc_macros[n->tok].flags && + ! (MDOC_VALID & n->flags)) { + assert( ! (MDOC_ACTED & n->flags)); + if ( ! make_pending(n, tok, m, line, ppos)) + return(0); + if ( ! mdoc_endbody_alloc(m, line, ppos, + tok, body, ENDBODY_NOSPACE)) + return(0); + return(1); + } + } + /* * If we can't rewind to our body, then our scope has already * been closed by another macro (like `Oc' closing `Op'). This * is ugly behaviour nodding its head to OpenBSD's overwhelming * crufty use of `Op' breakage. - * - * FIXME - this should be ifdef'd OpenBSD? */ - for (n = m->last; n; n = n->parent) - if (body == n) - break; - - if (NULL == n && ! mdoc_nmsg(m, body, MANDOCERR_SCOPE)) + if (n != body && ! mdoc_vmsg(m, MANDOCERR_SCOPE, line, ppos, + "%s broken", mdoc_macronames[tok])) return(0); - if (n && ! rew_last(m, body)) + if (n && ! rew_sub(MDOC_BODY, m, tok, line, ppos)) return(0); /* Standard appending of delimiters. */ @@ -1280,7 +1402,7 @@ blk_part_imp(MACRO_PROT_ARGS) /* Rewind scope, if applicable. */ - if (n && ! rew_last(m, blk)) + if (n && ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) return(0); return(1); Index: mdoc_html.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mdoc_html.c,v retrieving revision 1.87 retrieving revision 1.88 diff -Lmdoc_html.c -Lmdoc_html.c -u -p -r1.87 -r1.88 --- mdoc_html.c +++ mdoc_html.c @@ -437,7 +437,7 @@ print_mdoc_node(MDOC_ARGS) print_text(h, n->string); return; default: - if (mdocs[n->tok].pre) + if (mdocs[n->tok].pre && !n->end) child = (*mdocs[n->tok].pre)(m, n, h); break; } @@ -453,7 +453,7 @@ print_mdoc_node(MDOC_ARGS) mdoc_root_post(m, n, h); break; default: - if (mdocs[n->tok].post) + if (mdocs[n->tok].post && !n->end) (*mdocs[n->tok].post)(m, n, h); break; } Index: libmdoc.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/libmdoc.h,v retrieving revision 1.57 retrieving revision 1.58 diff -Llibmdoc.h -Llibmdoc.h -u -p -r1.57 -r1.58 --- libmdoc.h +++ libmdoc.h @@ -109,6 +109,9 @@ int mdoc_block_alloc(struct mdoc *, i int mdoc_head_alloc(struct mdoc *, int, int, enum mdoct); int mdoc_tail_alloc(struct mdoc *, int, int, enum mdoct); int mdoc_body_alloc(struct mdoc *, int, int, enum mdoct); +int mdoc_endbody_alloc(struct mdoc *m, int line, int pos, + enum mdoct tok, struct mdoc_node *body, + enum mdoc_endbody end); void mdoc_node_delete(struct mdoc *, struct mdoc_node *); void mdoc_hash_init(void); enum mdoct mdoc_hash_find(const char *); Index: mdoc_term.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mdoc_term.c,v retrieving revision 1.161 retrieving revision 1.162 diff -Lmdoc_term.c -Lmdoc_term.c -u -p -r1.161 -r1.162 --- mdoc_term.c +++ mdoc_term.c @@ -325,20 +325,37 @@ print_mdoc_node(DECL_ARGS) memset(&npair, 0, sizeof(struct termpair)); npair.ppair = pair; - if (MDOC_TEXT != n->type) { - if (termacts[n->tok].pre) - chld = (*termacts[n->tok].pre)(p, &npair, m, n); - } else + if (MDOC_TEXT == n->type) term_word(p, n->string); + else if (termacts[n->tok].pre && !n->end) + chld = (*termacts[n->tok].pre)(p, &npair, m, n); if (chld && n->child) print_mdoc_nodelist(p, &npair, m, n->child); term_fontpopq(p, font); - if (MDOC_TEXT != n->type) - if (termacts[n->tok].post) - (*termacts[n->tok].post)(p, &npair, m, n); + if (MDOC_TEXT != n->type && + termacts[n->tok].post && + ! (MDOC_ENDED & n->flags)) { + (*termacts[n->tok].post)(p, &npair, m, n); + + /* + * Explicit end tokens not only call the post + * handler, but also tell the respective block + * that it must not call the post handler again. + */ + if (n->end) + n->pending->flags |= MDOC_ENDED; + + /* + * End of line terminating an implicit block + * while an explicit block is still open. + * Continue the explicit block without spacing. + */ + if (ENDBODY_NOSPACE == n->end) + p->flags |= TERMP_NOSPACE; + } if (MDOC_EOS & n->flags) p->flags |= TERMP_SENTENCE; Index: mdoc.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mdoc.h,v retrieving revision 1.94 retrieving revision 1.95 diff -Lmdoc.h -Lmdoc.h -u -p -r1.94 -r1.95 --- mdoc.h +++ mdoc.h @@ -249,6 +249,12 @@ struct mdoc_arg { unsigned int refcnt; }; +enum mdoc_endbody { + ENDBODY_NOT = 0, + ENDBODY_SPACE, + ENDBODY_NOSPACE, +}; + enum mdoc_list { LIST__NONE = 0, LIST_bullet, @@ -302,6 +308,7 @@ struct mdoc_node { #define MDOC_EOS (1 << 2) /* at sentence boundary */ #define MDOC_LINE (1 << 3) /* first macro/text on line */ #define MDOC_SYNPRETTY (1 << 4) /* SYNOPSIS-style formatting */ +#define MDOC_ENDED (1 << 5) /* rendering has been ended */ enum mdoc_type type; /* AST node type */ enum mdoc_sec sec; /* current named section */ /* FIXME: these can be union'd to shave a few bytes. */ @@ -311,6 +318,7 @@ struct mdoc_node { struct mdoc_node *body; /* BLOCK */ struct mdoc_node *tail; /* BLOCK */ char *string; /* TEXT */ + enum mdoc_endbody end; /* BODY */ union { struct mdoc_bl Bl; Index: mdoc.3 =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mdoc.3,v retrieving revision 1.44 retrieving revision 1.45 diff -Lmdoc.3 -Lmdoc.3 -u -p -r1.44 -r1.45 --- mdoc.3 +++ mdoc.3 @@ -217,10 +217,14 @@ and fields), its position in the tree (the .Va parent , .Va child , +.Va nchild , .Va next and .Va prev -fields) and some type-specific data. +fields) and some type-specific data, in particular, for nodes generated +from macros, the generating macro in the +.Va tok +field. .Pp The tree itself is arranged according to the following normal form, where capitalised non-terminals represent nodes. @@ -235,11 +239,11 @@ where capitalised non-terminals represen .It ELEMENT \(<- TEXT* .It HEAD -\(<- mnode+ +\(<- mnode* .It BODY -\(<- mnode+ +\(<- mnode* [ENDBODY mnode*] .It TAIL -\(<- mnode+ +\(<- mnode* .It TEXT \(<- [[:printable:],0x1e]* .El @@ -253,6 +257,65 @@ an empty line will produce a zero-length Multiple body parts are only found in invocations of .Sq \&Bl \-column , where a new body introduces a new phrase. +.Ss Badly nested blocks +A special kind of node is available to end the formatting +associated with a given block before the physical end of that block. +Such an ENDBODY node has a non-null +.Va end +field, is of the BODY +.Va type , +has the same +.Va tok +as the BLOCK it is ending, and has a +.Va pending +field pointing to that BLOCK's BODY node. +It is an indirect child of that BODY node +and has no children of its own. +.Pp +An ENDBODY node is generated when a block ends while one of its child +blocks is still open, like in the following example: +.Bd -literal -offset indent +\&.Ao ao +\&.Bo bo ac +\&.Ac bc +\&.Bc end +.Ed +.Pp +This example results in the following block structure: +.Bd -literal -offset indent +BLOCK Ao + HEAD Ao + BODY Ao + TEXT ao + BLOCK Bo, pending -> Ao + HEAD Bo + BODY Bo + TEXT bo + TEXT ac + ENDBODY Ao, pending -> Ao + TEXT bc +TEXT end +.Ed +.Pp +Here, the formatting of the Ao block extends from TEXT ao to TEXT ac, +while the formatting of the Bo block extends from TEXT bo to TEXT bc, +rendering like this in +.Fl T Ns Cm ascii +mode: +.Dl bc] end +Support for badly nested blocks is only provided for backward +compatibility with some older +.Xr mdoc 7 +implementations. +Using them in new code is stronly discouraged: +Some frontends, in particular +.Fl T Ns Cm html , +are unable to render them in any meaningful way, +many other +.Xr mdoc 7 +implementations do not support them, and even for those that do, +the behaviour is not well-defined, in particular when using multiple +levels of badly nested blocks. .Sh EXAMPLES The following example reads lines from stdin and parses them, operating on the finished parse tree with Index: mdoc.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mdoc.c,v retrieving revision 1.151 retrieving revision 1.152 diff -Lmdoc.c -Lmdoc.c -u -p -r1.151 -r1.152 --- mdoc.c +++ mdoc.c @@ -332,6 +332,8 @@ node_append(struct mdoc *mdoc, struct md p->parent->tail = p; break; case (MDOC_BODY): + if (p->end) + break; assert(MDOC_BLOCK == p->parent->type); p->parent->body = p; break; @@ -431,6 +433,22 @@ mdoc_body_alloc(struct mdoc *m, int line if ( ! node_append(m, p)) return(0); m->next = MDOC_NEXT_CHILD; + return(1); +} + + +int +mdoc_endbody_alloc(struct mdoc *m, int line, int pos, enum mdoct tok, + struct mdoc_node *body, enum mdoc_endbody end) +{ + struct mdoc_node *p; + + p = node_alloc(m, line, pos, tok, MDOC_BODY); + p->pending = body; + p->end = end; + if ( ! node_append(m, p)) + return(0); + m->next = MDOC_NEXT_SIBLING; return(1); } Index: tree.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/tree.c,v retrieving revision 1.22 retrieving revision 1.23 diff -Ltree.c -Ltree.c -u -p -r1.22 -r1.23 --- tree.c +++ tree.c @@ -75,7 +75,10 @@ print_mdoc(const struct mdoc_node *n, in t = "block-head"; break; case (MDOC_BODY): - t = "block-body"; + if (n->end) + t = "body-end"; + else + t = "block-body"; break; case (MDOC_TAIL): t = "block-tail"; -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv