source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Move main format autodetection from the parser dispatcher to the
@ 2014-09-06 22:39 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2014-09-06 22:39 UTC (permalink / raw)
  To: source

Log Message:
-----------
Move main format autodetection from the parser dispatcher to the
roff parser where .Dd and .TH are already detected, anyway.  This
improves robustness because it correctly handles whitespace or an
alternate control character before Dd.  In the parser dispatcher,
provide a fallback looking ahead in the input buffer instead of
always assuming man(7).  This corrects autodetection when Dd is
preceded by other macros or macro-like handled requests like .ll.

Triggered by reports from Daniel Levai about issues on Slackware Linux.

Modified Files:
--------------
    mdocml:
        libmandoc.h
        read.c
        roff.c

Revision Data
-------------
Index: roff.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/roff.c,v
retrieving revision 1.226
retrieving revision 1.227
diff -Lroff.c -Lroff.c -u -p -r1.226 -r1.227
--- roff.c
+++ roff.c
@@ -122,6 +122,7 @@ struct	roff {
 	int		 options; /* parse options */
 	int		 rstacksz; /* current size limit of rstack */
 	int		 rstackpos; /* position in rstack */
+	int		 format; /* current file in mdoc or man format */
 	char		 control; /* control character */
 };
 
@@ -456,6 +457,7 @@ roff_reset(struct roff *r)
 {
 
 	roff_free1(r);
+	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
 	r->control = 0;
 }
 
@@ -475,6 +477,7 @@ roff_alloc(struct mparse *parse, int opt
 	r = mandoc_calloc(1, sizeof(struct roff));
 	r->parse = parse;
 	r->options = options;
+	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
 	r->rstackpos = -1;
 
 	roffhash_init();
@@ -1776,10 +1779,13 @@ roff_Dd(ROFF_ARGS)
 {
 	const char *const	*cp;
 
-	if (0 == ((MPARSE_MDOC | MPARSE_QUICK) & r->options))
+	if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
 		for (cp = __mdoc_reserved; *cp; cp++)
 			roff_setstr(r, *cp, NULL, 0);
 
+	if (r->format == 0)
+		r->format = MPARSE_MDOC;
+
 	return(ROFF_CONT);
 }
 
@@ -1788,10 +1794,13 @@ roff_TH(ROFF_ARGS)
 {
 	const char *const	*cp;
 
-	if (0 == (MPARSE_QUICK & r->options))
+	if ((r->options & MPARSE_QUICK) == 0)
 		for (cp = __man_reserved; *cp; cp++)
 			roff_setstr(r, *cp, NULL, 0);
 
+	if (r->format == 0)
+		r->format = MPARSE_MAN;
+
 	return(ROFF_CONT);
 }
 
@@ -2305,6 +2314,13 @@ roff_strdup(const struct roff *r, const 
 
 	res[(int)ssz] = '\0';
 	return(res);
+}
+
+int
+roff_getformat(const struct roff *r)
+{
+
+	return(r->format);
 }
 
 /*
Index: read.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/read.c,v
retrieving revision 1.82
retrieving revision 1.83
diff -Lread.c -Lread.c -u -p -r1.82 -r1.83
--- read.c
+++ read.c
@@ -51,21 +51,22 @@ struct	buf {
 };
 
 struct	mparse {
-	enum mandoclevel  file_status; /* status of current parse */
-	enum mandoclevel  wlevel; /* ignore messages below this */
-	int		  line; /* line number in the file */
-	int		  options; /* parser options */
 	struct man	 *pman; /* persistent man parser */
 	struct mdoc	 *pmdoc; /* persistent mdoc parser */
 	struct man	 *man; /* man parser */
 	struct mdoc	 *mdoc; /* mdoc parser */
 	struct roff	 *roff; /* roff parser (!NULL) */
 	char		 *sodest; /* filename pointed to by .so */
-	int		  reparse_count; /* finite interp. stack */
-	mandocmsg	  mmsg; /* warning/error message handler */
-	const char	 *file;
-	struct buf	 *secondary;
+	const char	 *file; /* filename of current input file */
+	struct buf	 *primary; /* buffer currently being parsed */
+	struct buf	 *secondary; /* preprocessed copy of input */
 	const char	 *defos; /* default operating system */
+	mandocmsg	  mmsg; /* warning/error message handler */
+	enum mandoclevel  file_status; /* status of current parse */
+	enum mandoclevel  wlevel; /* ignore messages below this */
+	int		  options; /* parser options */
+	int		  reparse_count; /* finite interp. stack */
+	int		  line; /* line number in the file */
 };
 
 static	void	  resize_buf(struct buf *, size_t);
@@ -248,19 +249,10 @@ resize_buf(struct buf *buf, size_t initi
 static void
 pset(const char *buf, int pos, struct mparse *curp)
 {
+	char		*cp, *ep;
+	int		 format;
 	int		 i;
 
-	/*
-	 * Try to intuit which kind of manual parser should be used.  If
-	 * passed in by command-line (-man, -mdoc), then use that
-	 * explicitly.  If passed as -mandoc, then try to guess from the
-	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
-	 * default to -man, which is more lenient.
-	 *
-	 * Separate out pmdoc/pman from mdoc/man: the first persists
-	 * through all parsers, while the latter is used per-parse.
-	 */
-
 	if ('.' == buf[0] || '\'' == buf[0]) {
 		for (i = 1; buf[i]; i++)
 			if (' ' != buf[i] && '\t' != buf[i])
@@ -269,15 +261,35 @@ pset(const char *buf, int pos, struct mp
 			return;
 	}
 
-	if (MPARSE_MDOC & curp->options) {
-		curp->mdoc = curp->pmdoc;
-		return;
-	} else if (MPARSE_MAN & curp->options) {
-		curp->man = curp->pman;
-		return;
+	/*
+	 * If neither command line arguments -mdoc or -man select
+	 * a parser nor the roff parser found a .Dd or .TH macro
+	 * yet, look ahead in the main input buffer.
+	 */
+
+	if ((format = roff_getformat(curp->roff)) == 0) {
+		cp = curp->primary->buf;
+		ep = cp + curp->primary->sz;
+		while (cp < ep) {
+			if (*cp == '.' || *cp != '\'') {
+				cp++;
+				if (cp[0] == 'D' && cp[1] == 'd') {
+					format = MPARSE_MDOC;
+					break;
+				}
+				if (cp[0] == 'T' && cp[1] == 'H') {
+					format = MPARSE_MAN;
+					break;
+				}
+			}
+			cp = memchr(cp, '\n', ep - cp);
+			if (cp == NULL)
+				break;
+			cp++;
+		}
 	}
 
-	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
+	if (format == MPARSE_MDOC) {
 		if (NULL == curp->pmdoc)
 			curp->pmdoc = mdoc_alloc(
 			    curp->roff, curp, curp->defos,
@@ -287,6 +299,8 @@ pset(const char *buf, int pos, struct mp
 		return;
 	}
 
+	/* Fall back to man(7) as a last resort. */
+
 	if (NULL == curp->pman)
 		curp->pman = man_alloc(curp->roff, curp,
 		    MPARSE_QUICK & curp->options ? 1 : 0);
@@ -720,6 +734,7 @@ mparse_parse_buffer(struct mparse *curp,
 	/* Line number is per-file. */
 	svfile = curp->file;
 	curp->file = file;
+	curp->primary = &blk;
 	curp->line = 1;
 	recursion_depth++;
 
Index: libmandoc.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/libmandoc.h,v
retrieving revision 1.42
retrieving revision 1.43
diff -Llibmandoc.h -Llibmandoc.h -u -p -r1.42 -r1.43
--- libmandoc.h
+++ libmandoc.h
@@ -77,6 +77,7 @@ int		 roff_getreg(const struct roff *, c
 char		*roff_strdup(const struct roff *, const char *);
 int		 roff_getcontrol(const struct roff *,
 			const char *, int *);
+int		 roff_getformat(const struct roff *);
 #if 0
 char		 roff_eqndelim(const struct roff *);
 void		 roff_openeqn(struct roff *, const char *,
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2014-09-06 22:39 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-06 22:39 mdocml: Move main format autodetection from the parser dispatcher to the schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).