From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from localhost (fantadrom.bsd.lv [local]); by fantadrom.bsd.lv (OpenSMTPD) with ESMTPA id ce39d05e; for ; Thu, 12 Feb 2015 07:25:04 -0500 (EST) Date: Thu, 12 Feb 2015 07:25:04 -0500 (EST) Message-Id: <6124707707451945917.enqueue@fantadrom.bsd.lv> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: schwarze@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: mdocml: Delete the mdoc_node.pending pointer and the function X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- Delete the mdoc_node.pending pointer and the function calculating it, make_pending(), which was the most difficult function of the whole mdoc(7) parser. After almost five years of maintaining this hellhole, i just noticed the pointer isn't needed after all. Blocks are always rewound in the reverse order they were opened; that even holds for broken blocks. Consequently, it is sufficient to just mark broken blogs with the flag MDOC_BROKEN and breaking blocks with the flag MDOC_ENDED. When rewinding, instead of iterating the pending pointers, just iterate from each broken block to its parents, rewinding all that are MDOC_ENDED and stopping after processing the first ancestor that it not MDOC_BROKEN. For ENDBODY markers, use the mdoc_node.body pointer in place of the former mdoc_node.pending. This also fixes an assertion failure found by jsg@ with afl, test case #467 (Bo Bl It Bd Bc It), where (surprise surprise) the pending pointer got corrupted. Improved functionality, minus one function, minus one struct field, minus 50 lines of code. Modified Files: -------------- mdocml: mdoc.c mdoc.h mdoc_html.c mdoc_macro.c mdoc_man.c mdoc_term.c mdoc_validate.c Revision Data ------------- Index: mdoc.h =================================================================== RCS file: /home/cvs/mdocml/mdocml/mdoc.h,v retrieving revision 1.135 retrieving revision 1.136 diff -Lmdoc.h -Lmdoc.h -u -p -r1.135 -r1.136 --- mdoc.h +++ mdoc.h @@ -1,6 +1,7 @@ /* $Id$ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons + * Copyright (c) 2014, 2015 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -355,11 +356,11 @@ struct mdoc_node { enum mdoct tok; /* tok or MDOC__MAX if none */ int flags; #define MDOC_VALID (1 << 0) /* has been validated */ -#define MDOC_BREAK (1 << 1) /* has broken another block */ +#define MDOC_ENDED (1 << 1) /* gone past body end mark */ #define MDOC_EOS (1 << 2) /* at sentence boundary */ #define MDOC_LINE (1 << 3) /* first macro/text on line */ #define MDOC_SYNPRETTY (1 << 4) /* SYNOPSIS-style formatting */ -#define MDOC_ENDED (1 << 5) /* rendering has been ended */ +#define MDOC_BROKEN (1 << 5) /* must validate parent when ending */ #define MDOC_DELIMO (1 << 6) #define MDOC_DELIMC (1 << 7) enum mdoc_type type; /* AST node type */ @@ -368,9 +369,8 @@ struct mdoc_node { int prev_font; /* before entering this node */ /* FIXME: these can be union'd to shave a few bytes. */ struct mdoc_arg *args; /* BLOCK/ELEM */ - struct mdoc_node *pending; /* BLOCK */ struct mdoc_node *head; /* BLOCK */ - struct mdoc_node *body; /* BLOCK */ + struct mdoc_node *body; /* BLOCK/ENDBODY */ struct mdoc_node *tail; /* BLOCK */ char *string; /* TEXT */ const struct tbl_span *span; /* TBL */ Index: mdoc.c =================================================================== RCS file: /home/cvs/mdocml/mdocml/mdoc.c,v retrieving revision 1.236 retrieving revision 1.237 diff -Lmdoc.c -Lmdoc.c -u -p -r1.236 -r1.237 --- mdoc.c +++ mdoc.c @@ -1,7 +1,7 @@ /* $Id$ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze + * Copyright (c) 2010, 2012-2015 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -419,8 +419,10 @@ mdoc_endbody_alloc(struct mdoc *mdoc, in { struct mdoc_node *p; + body->flags |= MDOC_ENDED; + body->parent->flags |= MDOC_ENDED; p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); - p->pending = body; + p->body = body; p->norm = body->norm; p->end = end; node_append(mdoc, p); Index: mdoc_validate.c =================================================================== RCS file: /home/cvs/mdocml/mdocml/mdoc_validate.c,v retrieving revision 1.277 retrieving revision 1.278 diff -Lmdoc_validate.c -Lmdoc_validate.c -u -p -r1.277 -r1.278 --- mdoc_validate.c +++ mdoc_validate.c @@ -325,7 +325,7 @@ mdoc_valid_post(struct mdoc *mdoc) n = mdoc->last; if (n->flags & MDOC_VALID) return; - n->flags |= MDOC_VALID; + n->flags |= MDOC_VALID | MDOC_ENDED; switch (n->type) { case MDOC_TEXT: @@ -416,24 +416,13 @@ pre_display(PRE_ARGS) static void pre_bl(PRE_ARGS) { - struct mdoc_node *np; struct mdoc_argv *argv, *wa; int i; enum mdocargt mdoclt; enum mdoc_list lt; - if (MDOC_BLOCK != n->type) { - if (ENDBODY_NOT != n->end) { - assert(n->pending); - np = n->pending->parent; - } else - np = n->parent; - - assert(np); - assert(MDOC_BLOCK == np->type); - assert(MDOC_Bl == np->tok); + if (n->type != MDOC_BLOCK) return; - } /* * First figure out which kind of list to use: bind ourselves to @@ -609,25 +598,14 @@ pre_bl(PRE_ARGS) static void pre_bd(PRE_ARGS) { - struct mdoc_node *np; struct mdoc_argv *argv; int i; enum mdoc_disp dt; pre_literal(mdoc, n); - if (MDOC_BLOCK != n->type) { - if (ENDBODY_NOT != n->end) { - assert(n->pending); - np = n->pending->parent; - } else - np = n->parent; - - assert(np); - assert(MDOC_BLOCK == np->type); - assert(MDOC_Bd == np->tok); + if (n->type != MDOC_BLOCK) return; - } for (i = 0; n->args && i < (int)n->args->argc; i++) { argv = n->args->argv + i; @@ -797,22 +775,10 @@ post_bf(POST_ARGS) * element, which contains the goods. */ - if (MDOC_HEAD != mdoc->last->type) { - if (ENDBODY_NOT != mdoc->last->end) { - assert(mdoc->last->pending); - np = mdoc->last->pending->parent->head; - } else if (MDOC_BLOCK != mdoc->last->type) { - np = mdoc->last->parent->head; - } else - np = mdoc->last->head; - - assert(np); - assert(MDOC_HEAD == np->type); - assert(MDOC_Bf == np->tok); + np = mdoc->last; + if (MDOC_HEAD != np->type) return; - } - np = mdoc->last; assert(MDOC_BLOCK == np->parent->type); assert(MDOC_Bf == np->parent->tok); Index: mdoc_term.c =================================================================== RCS file: /home/cvs/mdocml/mdocml/mdoc_term.c,v retrieving revision 1.309 retrieving revision 1.310 diff -Lmdoc_term.c -Lmdoc_term.c -u -p -r1.309 -r1.310 --- mdoc_term.c +++ mdoc_term.c @@ -309,6 +309,7 @@ print_mdoc_node(DECL_ARGS) chld = 1; offset = p->offset; rmargin = p->rmargin; + n->flags &= ~MDOC_ENDED; n->prev_font = p->fonti; memset(&npair, 0, sizeof(struct termpair)); @@ -361,7 +362,7 @@ print_mdoc_node(DECL_ARGS) print_mdoc_nodelist(p, &npair, meta, n->child); term_fontpopq(p, - (ENDBODY_NOT == n->end ? n : n->pending)->prev_font); + (ENDBODY_NOT == n->end ? n : n->body)->prev_font); switch (n->type) { case MDOC_TEXT: @@ -381,7 +382,7 @@ print_mdoc_node(DECL_ARGS) * that it must not call the post handler again. */ if (ENDBODY_NOT != n->end) - n->pending->flags |= MDOC_ENDED; + n->body->flags |= MDOC_ENDED; /* * End of line terminating an implicit block Index: mdoc_man.c =================================================================== RCS file: /home/cvs/mdocml/mdocml/mdoc_man.c,v retrieving revision 1.86 retrieving revision 1.87 diff -Lmdoc_man.c -Lmdoc_man.c -u -p -r1.86 -r1.87 --- mdoc_man.c +++ mdoc_man.c @@ -29,8 +29,7 @@ #include "mdoc.h" #include "main.h" -#define DECL_ARGS const struct mdoc_meta *meta, \ - const struct mdoc_node *n +#define DECL_ARGS const struct mdoc_meta *meta, struct mdoc_node *n struct manact { int (*cond)(DECL_ARGS); /* DON'T run actions */ @@ -548,10 +547,10 @@ void man_mdoc(void *arg, const struct mdoc *mdoc) { const struct mdoc_meta *meta; - const struct mdoc_node *n; + struct mdoc_node *n; meta = mdoc_meta(mdoc); - n = mdoc_node(mdoc); + n = mdoc_node(mdoc)->child; printf(".TH \"%s\" \"%s\" \"%s\" \"%s\" \"%s\"\n", meta->title, @@ -567,15 +566,18 @@ man_mdoc(void *arg, const struct mdoc *m fontqueue.head = fontqueue.tail = mandoc_malloc(8); *fontqueue.tail = 'R'; } - print_node(meta, n); + while (n != NULL) { + print_node(meta, n); + n = n->next; + } putchar('\n'); } static void print_node(DECL_ARGS) { - const struct mdoc_node *sub; const struct manact *act; + struct mdoc_node *sub; int cond, do_sub; /* @@ -588,6 +590,7 @@ print_node(DECL_ARGS) act = NULL; cond = 0; do_sub = 1; + n->flags &= ~MDOC_ENDED; if (MDOC_TEXT == n->type) { /* @@ -635,7 +638,7 @@ print_node(DECL_ARGS) (*act->post)(meta, n); if (ENDBODY_NOT != n->end) - n->pending->flags |= MDOC_ENDED; + n->body->flags |= MDOC_ENDED; if (ENDBODY_NOSPACE == n->end) outflags &= ~(MMAN_spc | MMAN_nl); Index: mdoc_macro.c =================================================================== RCS file: /home/cvs/mdocml/mdocml/mdoc_macro.c,v retrieving revision 1.182 retrieving revision 1.183 diff -Lmdoc_macro.c -Lmdoc_macro.c -u -p -r1.182 -r1.183 --- mdoc_macro.c +++ mdoc_macro.c @@ -47,8 +47,6 @@ static void append_delims(struct mdoc * static enum mdoct lookup(struct mdoc *, enum mdoct, int, int, const char *); static int macro_or_word(MACRO_PROT_ARGS, int); -static void make_pending(struct mdoc *, struct mdoc_node *, - struct mdoc_node *, int, int); static int parse_rest(struct mdoc *, enum mdoct, int, int *, char *); static enum mdoct rew_alt(enum mdoct); @@ -285,15 +283,34 @@ static void rew_pending(struct mdoc *mdoc, const struct mdoc_node *n) { - rew_last(mdoc, n); - - if (n->type != MDOC_BLOCK) - return; - - while ((n = n->pending) != NULL) { + for (;;) { rew_last(mdoc, n); - if (n->type == MDOC_HEAD) + + switch (n->type) { + case MDOC_HEAD: mdoc_body_alloc(mdoc, n->line, n->pos, n->tok); + return; + case MDOC_BLOCK: + break; + default: + return; + } + + if ( ! (n->flags & MDOC_BROKEN)) + return; + + for (;;) { + if ((n = n->parent) == NULL) + return; + + if (n->type == MDOC_BLOCK || + n->type == MDOC_HEAD) { + if (n->flags & MDOC_ENDED) + break; + else + return; + } + } } } @@ -357,78 +374,6 @@ rew_elem(struct mdoc *mdoc, enum mdoct t } /* - * We are trying to close the block *breaker, - * but the child block *broken is still open. - * Thus, postpone closing the *breaker - * until the rew_pending() call closing *broken. - */ -static void -make_pending(struct mdoc *mdoc, struct mdoc_node *breaker, - struct mdoc_node *broken, int line, int ppos) -{ - struct mdoc_node *n; - - mandoc_vmsg(MANDOCERR_BLK_NEST, mdoc->parse, line, ppos, - "%s breaks %s", mdoc_macronames[breaker->tok], - mdoc_macronames[broken->tok]); - - /* - * If the *broken block (Z) is already broken by a block (B) - * contained in the breaker (A), make the breaker pending - * on that inner breaker (B). Graphically, - * - * breaker=[A! broken=n=[B!->A (old broken=)[Z->B B] A] Z] - * - * In these graphics, "->" indicates the "pending" pointer and - * "!" indicates the MDOC_BREAK flag. Each of the cases gets - * one additional pointer (B->A) and one additional flag (A!). - */ - - for (n = broken->parent; ; n = n->parent) - if (n == broken->pending) - broken = n; - else if (n == breaker) - break; - - /* - * Found the breaker. - * - * If another, outer breaker (X) is already pending on - * the *broken block (B), we must not clobber the link - * to the outer breaker, but make it pending on the new, - * now inner breaker (A). Graphically, - * - * [X! n=breaker=[A!->X broken=[B(->X)->A X] A] B]. - */ - - if (broken->pending != NULL) { - n = breaker; - - /* - * If the inner breaker (A) is already broken, too, - * it cannot take on the outer breaker (X) but must - * hand it on to its own breakers (Y). Graphically, - * - * [X! n=[Y!->X breaker=[A!->Y Y] broken=[B(->X)->A X] A] B] - */ - - while (n->pending) - n = n->pending; - n->pending = broken->pending; - } - - /* - * Now we have reduced the situation to the simplest case: - * breaker=[A! broken=[B->A A] B]. - */ - - broken->pending = breaker; - breaker->flags |= MDOC_BREAK; - if (breaker->body != NULL) - breaker->body->flags |= MDOC_BREAK; -} - -/* * Allocate a word and check whether it's punctuation or not. * Punctuation consists of those tokens found in mdoc_isdelim(). */ @@ -569,8 +514,11 @@ blk_exp_close(MACRO_PROT_ARGS) atok = rew_alt(tok); body = endbody = itblk = later = NULL; for (n = mdoc->last; n; n = n->parent) { - if (n->flags & (MDOC_VALID | MDOC_BREAK)) + if (n->flags & MDOC_ENDED) { + if ( ! (n->flags & MDOC_VALID)) + n->flags |= MDOC_BROKEN; continue; + } /* Remember the start of our own body. */ @@ -605,20 +553,21 @@ blk_exp_close(MACRO_PROT_ARGS) * When there is a pending sub block, postpone * closing out the current block until the * rew_pending() closing out the sub-block. - */ - - make_pending(mdoc, n, later, line, ppos); - if (tok == MDOC_El) - itblk->flags |= MDOC_BREAK; - - /* * Mark the place where the formatting - but not * the scope - of the current block ends. */ + mandoc_vmsg(MANDOCERR_BLK_NEST, mdoc->parse, + line, ppos, "%s breaks %s", + mdoc_macronames[atok], + mdoc_macronames[later->tok]); + endbody = mdoc_endbody_alloc(mdoc, line, ppos, atok, body, ENDBODY_SPACE); + if (tok == MDOC_El) + itblk->flags |= MDOC_ENDED | MDOC_BROKEN; + /* * If a block closing macro taking arguments * breaks another block, put the arguments @@ -637,14 +586,10 @@ blk_exp_close(MACRO_PROT_ARGS) continue; } - /* - * When finding an open sub block, remember the last - * open explicit block, or, in case there are only - * implicit ones, the first open implicit block. - */ + /* Breaking an open sub block. */ - if (later == NULL || - ! (mdoc_macros[later->tok].flags & MDOC_EXPLICIT)) + n->flags |= MDOC_BROKEN; + if (later == NULL) later = n; } @@ -923,9 +868,14 @@ blk_full(MACRO_PROT_ARGS) blk = NULL; for (n = mdoc->last; n != NULL; n = n->parent) { - if (n->flags & (MDOC_VALID | MDOC_BREAK) || - n->type != MDOC_BLOCK) + if (n->flags & MDOC_ENDED) { + if ( ! (n->flags & MDOC_VALID)) + n->flags |= MDOC_BROKEN; + continue; + } + if (n->type != MDOC_BLOCK) continue; + if (tok == MDOC_It && n->tok == MDOC_Bl) { if (blk != NULL) { mandoc_vmsg(MANDOCERR_BLK_BROKEN, @@ -1131,13 +1081,19 @@ blk_full(MACRO_PROT_ARGS) * sub-block. */ for (n = mdoc->last; n && n != head; n = n->parent) { + if (n->flags & MDOC_ENDED) { + if ( ! (n->flags & MDOC_VALID)) + n->flags |= MDOC_BROKEN; + continue; + } if (n->type == MDOC_BLOCK && - mdoc_macros[n->tok].flags & MDOC_EXPLICIT && - ! (n->flags & MDOC_VALID)) { - n->pending = head; - return; + mdoc_macros[n->tok].flags & MDOC_EXPLICIT) { + n->flags = MDOC_BROKEN; + head->flags = MDOC_ENDED; } } + if (head->flags & MDOC_ENDED) + return; /* Close out scopes to remain in a consistent state. */ @@ -1210,16 +1166,28 @@ blk_part_imp(MACRO_PROT_ARGS) for (n = mdoc->last; n && n != body && n != blk->parent; n = n->parent) { + if (n->flags & MDOC_ENDED) { + if ( ! (n->flags & MDOC_VALID)) + n->flags |= MDOC_BROKEN; + continue; + } if (n->type == MDOC_BLOCK && - mdoc_macros[n->tok].flags & MDOC_EXPLICIT && - ! (n->flags & MDOC_VALID)) { - make_pending(mdoc, blk, n, line, ppos); - mdoc_endbody_alloc(mdoc, line, ppos, - tok, body, ENDBODY_NOSPACE); - return; + mdoc_macros[n->tok].flags & MDOC_EXPLICIT) { + n->flags |= MDOC_BROKEN; + if ( ! (body->flags & MDOC_ENDED)) { + mandoc_vmsg(MANDOCERR_BLK_NEST, + mdoc->parse, line, ppos, + "%s breaks %s", mdoc_macronames[tok], + mdoc_macronames[n->tok]); + mdoc_endbody_alloc(mdoc, line, ppos, + tok, body, ENDBODY_NOSPACE); + } } } assert(n == body); + if (body->flags & MDOC_ENDED) + return; + rew_last(mdoc, body); if (nl) append_delims(mdoc, line, pos, buf); @@ -1482,7 +1450,7 @@ phrase_ta(MACRO_PROT_ARGS) body = NULL; for (n = mdoc->last; n != NULL; n = n->parent) { - if (n->flags & (MDOC_VALID | MDOC_BREAK)) + if (n->flags & MDOC_ENDED) continue; if (n->tok == MDOC_It && n->type == MDOC_BODY) body = n; Index: mdoc_html.c =================================================================== RCS file: /home/cvs/mdocml/mdocml/mdoc_html.c,v retrieving revision 1.224 retrieving revision 1.225 diff -Lmdoc_html.c -Lmdoc_html.c -u -p -r1.224 -r1.225 --- mdoc_html.c +++ mdoc_html.c @@ -35,7 +35,7 @@ #define INDENT 5 #define MDOC_ARGS const struct mdoc_meta *meta, \ - const struct mdoc_node *n, \ + struct mdoc_node *n, \ struct html *h #ifndef MIN @@ -267,7 +267,7 @@ void html_mdoc(void *arg, const struct mdoc *mdoc) { - print_mdoc(mdoc_meta(mdoc), mdoc_node(mdoc), + print_mdoc(mdoc_meta(mdoc), mdoc_node(mdoc)->child, (struct html *)arg); putchar('\n'); } @@ -387,6 +387,7 @@ print_mdoc_node(MDOC_ARGS) child = 1; t = h->tags.head; + n->flags &= ~MDOC_ENDED; switch (n->type) { case MDOC_ROOT: @@ -457,7 +458,7 @@ print_mdoc_node(MDOC_ARGS) break; (*mdocs[n->tok].post)(meta, n, h); if (n->end != ENDBODY_NOT) - n->pending->flags |= MDOC_ENDED; + n->body->flags |= MDOC_ENDED; if (n->end == ENDBODY_NOSPACE) h->flags |= HTML_NOSPACE; break; @@ -1122,7 +1123,7 @@ mdoc_bd_pre(MDOC_ARGS) { struct htmlpair tag[2]; int comp, sv; - const struct mdoc_node *nn; + struct mdoc_node *nn; struct roffsu su; if (MDOC_HEAD == n->type) -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv