From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from krisdoz.my.domain (schwarze@localhost [127.0.0.1]) by krisdoz.my.domain (8.14.5/8.14.5) with ESMTP id s86Mdbam017727 for ; Sat, 6 Sep 2014 18:39:37 -0400 (EDT) Received: (from schwarze@localhost) by krisdoz.my.domain (8.14.5/8.14.3/Submit) id s86Mdbds018702; Sat, 6 Sep 2014 18:39:37 -0400 (EDT) Date: Sat, 6 Sep 2014 18:39:37 -0400 (EDT) Message-Id: <201409062239.s86Mdbds018702@krisdoz.my.domain> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: schwarze@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: mdocml: Move main format autodetection from the parser dispatcher to the X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- Move main format autodetection from the parser dispatcher to the roff parser where .Dd and .TH are already detected, anyway. This improves robustness because it correctly handles whitespace or an alternate control character before Dd. In the parser dispatcher, provide a fallback looking ahead in the input buffer instead of always assuming man(7). This corrects autodetection when Dd is preceded by other macros or macro-like handled requests like .ll. Triggered by reports from Daniel Levai about issues on Slackware Linux. Modified Files: -------------- mdocml: libmandoc.h read.c roff.c Revision Data ------------- Index: roff.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/roff.c,v retrieving revision 1.226 retrieving revision 1.227 diff -Lroff.c -Lroff.c -u -p -r1.226 -r1.227 --- roff.c +++ roff.c @@ -122,6 +122,7 @@ struct roff { int options; /* parse options */ int rstacksz; /* current size limit of rstack */ int rstackpos; /* position in rstack */ + int format; /* current file in mdoc or man format */ char control; /* control character */ }; @@ -456,6 +457,7 @@ roff_reset(struct roff *r) { roff_free1(r); + r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); r->control = 0; } @@ -475,6 +477,7 @@ roff_alloc(struct mparse *parse, int opt r = mandoc_calloc(1, sizeof(struct roff)); r->parse = parse; r->options = options; + r->format = options & (MPARSE_MDOC | MPARSE_MAN); r->rstackpos = -1; roffhash_init(); @@ -1776,10 +1779,13 @@ roff_Dd(ROFF_ARGS) { const char *const *cp; - if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options)) + if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0) for (cp = __mdoc_reserved; *cp; cp++) roff_setstr(r, *cp, NULL, 0); + if (r->format == 0) + r->format = MPARSE_MDOC; + return(ROFF_CONT); } @@ -1788,10 +1794,13 @@ roff_TH(ROFF_ARGS) { const char *const *cp; - if (0 == (MPARSE_QUICK & r->options)) + if ((r->options & MPARSE_QUICK) == 0) for (cp = __man_reserved; *cp; cp++) roff_setstr(r, *cp, NULL, 0); + if (r->format == 0) + r->format = MPARSE_MAN; + return(ROFF_CONT); } @@ -2305,6 +2314,13 @@ roff_strdup(const struct roff *r, const res[(int)ssz] = '\0'; return(res); +} + +int +roff_getformat(const struct roff *r) +{ + + return(r->format); } /* Index: read.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/read.c,v retrieving revision 1.82 retrieving revision 1.83 diff -Lread.c -Lread.c -u -p -r1.82 -r1.83 --- read.c +++ read.c @@ -51,21 +51,22 @@ struct buf { }; struct mparse { - enum mandoclevel file_status; /* status of current parse */ - enum mandoclevel wlevel; /* ignore messages below this */ - int line; /* line number in the file */ - int options; /* parser options */ struct man *pman; /* persistent man parser */ struct mdoc *pmdoc; /* persistent mdoc parser */ struct man *man; /* man parser */ struct mdoc *mdoc; /* mdoc parser */ struct roff *roff; /* roff parser (!NULL) */ char *sodest; /* filename pointed to by .so */ - int reparse_count; /* finite interp. stack */ - mandocmsg mmsg; /* warning/error message handler */ - const char *file; - struct buf *secondary; + const char *file; /* filename of current input file */ + struct buf *primary; /* buffer currently being parsed */ + struct buf *secondary; /* preprocessed copy of input */ const char *defos; /* default operating system */ + mandocmsg mmsg; /* warning/error message handler */ + enum mandoclevel file_status; /* status of current parse */ + enum mandoclevel wlevel; /* ignore messages below this */ + int options; /* parser options */ + int reparse_count; /* finite interp. stack */ + int line; /* line number in the file */ }; static void resize_buf(struct buf *, size_t); @@ -248,19 +249,10 @@ resize_buf(struct buf *buf, size_t initi static void pset(const char *buf, int pos, struct mparse *curp) { + char *cp, *ep; + int format; int i; - /* - * Try to intuit which kind of manual parser should be used. If - * passed in by command-line (-man, -mdoc), then use that - * explicitly. If passed as -mandoc, then try to guess from the - * line: either skip dot-lines, use -mdoc when finding `.Dt', or - * default to -man, which is more lenient. - * - * Separate out pmdoc/pman from mdoc/man: the first persists - * through all parsers, while the latter is used per-parse. - */ - if ('.' == buf[0] || '\'' == buf[0]) { for (i = 1; buf[i]; i++) if (' ' != buf[i] && '\t' != buf[i]) @@ -269,15 +261,35 @@ pset(const char *buf, int pos, struct mp return; } - if (MPARSE_MDOC & curp->options) { - curp->mdoc = curp->pmdoc; - return; - } else if (MPARSE_MAN & curp->options) { - curp->man = curp->pman; - return; + /* + * If neither command line arguments -mdoc or -man select + * a parser nor the roff parser found a .Dd or .TH macro + * yet, look ahead in the main input buffer. + */ + + if ((format = roff_getformat(curp->roff)) == 0) { + cp = curp->primary->buf; + ep = cp + curp->primary->sz; + while (cp < ep) { + if (*cp == '.' || *cp != '\'') { + cp++; + if (cp[0] == 'D' && cp[1] == 'd') { + format = MPARSE_MDOC; + break; + } + if (cp[0] == 'T' && cp[1] == 'H') { + format = MPARSE_MAN; + break; + } + } + cp = memchr(cp, '\n', ep - cp); + if (cp == NULL) + break; + cp++; + } } - if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) { + if (format == MPARSE_MDOC) { if (NULL == curp->pmdoc) curp->pmdoc = mdoc_alloc( curp->roff, curp, curp->defos, @@ -287,6 +299,8 @@ pset(const char *buf, int pos, struct mp return; } + /* Fall back to man(7) as a last resort. */ + if (NULL == curp->pman) curp->pman = man_alloc(curp->roff, curp, MPARSE_QUICK & curp->options ? 1 : 0); @@ -720,6 +734,7 @@ mparse_parse_buffer(struct mparse *curp, /* Line number is per-file. */ svfile = curp->file; curp->file = file; + curp->primary = &blk; curp->line = 1; recursion_depth++; Index: libmandoc.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/libmandoc.h,v retrieving revision 1.42 retrieving revision 1.43 diff -Llibmandoc.h -Llibmandoc.h -u -p -r1.42 -r1.43 --- libmandoc.h +++ libmandoc.h @@ -77,6 +77,7 @@ int roff_getreg(const struct roff *, c char *roff_strdup(const struct roff *, const char *); int roff_getcontrol(const struct roff *, const char *, int *); +int roff_getformat(const struct roff *); #if 0 char roff_eqndelim(const struct roff *); void roff_openeqn(struct roff *, const char *, -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv