From: schwarze@mdocml.bsd.lv
To: source@mdocml.bsd.lv
Subject: mdocml: Move main format autodetection from the parser dispatcher to the
Date: Sat, 6 Sep 2014 18:39:37 -0400 (EDT) [thread overview]
Message-ID: <201409062239.s86Mdbds018702@krisdoz.my.domain> (raw)
Log Message:
-----------
Move main format autodetection from the parser dispatcher to the
roff parser where .Dd and .TH are already detected, anyway. This
improves robustness because it correctly handles whitespace or an
alternate control character before Dd. In the parser dispatcher,
provide a fallback looking ahead in the input buffer instead of
always assuming man(7). This corrects autodetection when Dd is
preceded by other macros or macro-like handled requests like .ll.
Triggered by reports from Daniel Levai about issues on Slackware Linux.
Modified Files:
--------------
mdocml:
libmandoc.h
read.c
roff.c
Revision Data
-------------
Index: roff.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/roff.c,v
retrieving revision 1.226
retrieving revision 1.227
diff -Lroff.c -Lroff.c -u -p -r1.226 -r1.227
--- roff.c
+++ roff.c
@@ -122,6 +122,7 @@ struct roff {
int options; /* parse options */
int rstacksz; /* current size limit of rstack */
int rstackpos; /* position in rstack */
+ int format; /* current file in mdoc or man format */
char control; /* control character */
};
@@ -456,6 +457,7 @@ roff_reset(struct roff *r)
{
roff_free1(r);
+ r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
r->control = 0;
}
@@ -475,6 +477,7 @@ roff_alloc(struct mparse *parse, int opt
r = mandoc_calloc(1, sizeof(struct roff));
r->parse = parse;
r->options = options;
+ r->format = options & (MPARSE_MDOC | MPARSE_MAN);
r->rstackpos = -1;
roffhash_init();
@@ -1776,10 +1779,13 @@ roff_Dd(ROFF_ARGS)
{
const char *const *cp;
- if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
+ if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
for (cp = __mdoc_reserved; *cp; cp++)
roff_setstr(r, *cp, NULL, 0);
+ if (r->format == 0)
+ r->format = MPARSE_MDOC;
+
return(ROFF_CONT);
}
@@ -1788,10 +1794,13 @@ roff_TH(ROFF_ARGS)
{
const char *const *cp;
- if (0 == (MPARSE_QUICK & r->options))
+ if ((r->options & MPARSE_QUICK) == 0)
for (cp = __man_reserved; *cp; cp++)
roff_setstr(r, *cp, NULL, 0);
+ if (r->format == 0)
+ r->format = MPARSE_MAN;
+
return(ROFF_CONT);
}
@@ -2305,6 +2314,13 @@ roff_strdup(const struct roff *r, const
res[(int)ssz] = '\0';
return(res);
+}
+
+int
+roff_getformat(const struct roff *r)
+{
+
+ return(r->format);
}
/*
Index: read.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/read.c,v
retrieving revision 1.82
retrieving revision 1.83
diff -Lread.c -Lread.c -u -p -r1.82 -r1.83
--- read.c
+++ read.c
@@ -51,21 +51,22 @@ struct buf {
};
struct mparse {
- enum mandoclevel file_status; /* status of current parse */
- enum mandoclevel wlevel; /* ignore messages below this */
- int line; /* line number in the file */
- int options; /* parser options */
struct man *pman; /* persistent man parser */
struct mdoc *pmdoc; /* persistent mdoc parser */
struct man *man; /* man parser */
struct mdoc *mdoc; /* mdoc parser */
struct roff *roff; /* roff parser (!NULL) */
char *sodest; /* filename pointed to by .so */
- int reparse_count; /* finite interp. stack */
- mandocmsg mmsg; /* warning/error message handler */
- const char *file;
- struct buf *secondary;
+ const char *file; /* filename of current input file */
+ struct buf *primary; /* buffer currently being parsed */
+ struct buf *secondary; /* preprocessed copy of input */
const char *defos; /* default operating system */
+ mandocmsg mmsg; /* warning/error message handler */
+ enum mandoclevel file_status; /* status of current parse */
+ enum mandoclevel wlevel; /* ignore messages below this */
+ int options; /* parser options */
+ int reparse_count; /* finite interp. stack */
+ int line; /* line number in the file */
};
static void resize_buf(struct buf *, size_t);
@@ -248,19 +249,10 @@ resize_buf(struct buf *buf, size_t initi
static void
pset(const char *buf, int pos, struct mparse *curp)
{
+ char *cp, *ep;
+ int format;
int i;
- /*
- * Try to intuit which kind of manual parser should be used. If
- * passed in by command-line (-man, -mdoc), then use that
- * explicitly. If passed as -mandoc, then try to guess from the
- * line: either skip dot-lines, use -mdoc when finding `.Dt', or
- * default to -man, which is more lenient.
- *
- * Separate out pmdoc/pman from mdoc/man: the first persists
- * through all parsers, while the latter is used per-parse.
- */
-
if ('.' == buf[0] || '\'' == buf[0]) {
for (i = 1; buf[i]; i++)
if (' ' != buf[i] && '\t' != buf[i])
@@ -269,15 +261,35 @@ pset(const char *buf, int pos, struct mp
return;
}
- if (MPARSE_MDOC & curp->options) {
- curp->mdoc = curp->pmdoc;
- return;
- } else if (MPARSE_MAN & curp->options) {
- curp->man = curp->pman;
- return;
+ /*
+ * If neither command line arguments -mdoc or -man select
+ * a parser nor the roff parser found a .Dd or .TH macro
+ * yet, look ahead in the main input buffer.
+ */
+
+ if ((format = roff_getformat(curp->roff)) == 0) {
+ cp = curp->primary->buf;
+ ep = cp + curp->primary->sz;
+ while (cp < ep) {
+ if (*cp == '.' || *cp != '\'') {
+ cp++;
+ if (cp[0] == 'D' && cp[1] == 'd') {
+ format = MPARSE_MDOC;
+ break;
+ }
+ if (cp[0] == 'T' && cp[1] == 'H') {
+ format = MPARSE_MAN;
+ break;
+ }
+ }
+ cp = memchr(cp, '\n', ep - cp);
+ if (cp == NULL)
+ break;
+ cp++;
+ }
}
- if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
+ if (format == MPARSE_MDOC) {
if (NULL == curp->pmdoc)
curp->pmdoc = mdoc_alloc(
curp->roff, curp, curp->defos,
@@ -287,6 +299,8 @@ pset(const char *buf, int pos, struct mp
return;
}
+ /* Fall back to man(7) as a last resort. */
+
if (NULL == curp->pman)
curp->pman = man_alloc(curp->roff, curp,
MPARSE_QUICK & curp->options ? 1 : 0);
@@ -720,6 +734,7 @@ mparse_parse_buffer(struct mparse *curp,
/* Line number is per-file. */
svfile = curp->file;
curp->file = file;
+ curp->primary = &blk;
curp->line = 1;
recursion_depth++;
Index: libmandoc.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/libmandoc.h,v
retrieving revision 1.42
retrieving revision 1.43
diff -Llibmandoc.h -Llibmandoc.h -u -p -r1.42 -r1.43
--- libmandoc.h
+++ libmandoc.h
@@ -77,6 +77,7 @@ int roff_getreg(const struct roff *, c
char *roff_strdup(const struct roff *, const char *);
int roff_getcontrol(const struct roff *,
const char *, int *);
+int roff_getformat(const struct roff *);
#if 0
char roff_eqndelim(const struct roff *);
void roff_openeqn(struct roff *, const char *,
--
To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv
reply other threads:[~2014-09-06 22:39 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201409062239.s86Mdbds018702@krisdoz.my.domain \
--to=schwarze@mdocml.bsd.lv \
--cc=source@mdocml.bsd.lv \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).