source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Pure preprocessor implementation of the roff(7) .ec and .eo
@ 2017-06-04  0:13 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2017-06-04  0:13 UTC (permalink / raw)
  To: source

Log Message:
-----------
Pure preprocessor implementation of the roff(7) .ec and .eo requests
(escape character control), touching nothing after the preprocessing 
stage and keeping even the state variable local to the preprocessor.
Since the escape character is also used for line continuation, this
requires pulling the implementation of line continuation from the
input reader to the preprocessor, which also considerably shortens 
the code required for that.

When the escape character is changed, simply let the preprocessor
replace bare by escaped backslashes and instances of the non-standard
escape character with bare backslashes - that's all we need.

Oh, and if anybody dares to use these requests in OpenBSD manuals, 
sending a medium-sized pack of axe-murderers after them might be a 
worthwhile part of the punishment, but probably insuffient on its own.

Modified Files:
--------------
    mdocml:
        read.c
        roff.7
        roff.c

Revision Data
-------------
Index: roff.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/roff.c,v
retrieving revision 1.302
retrieving revision 1.303
diff -Lroff.c -Lroff.c -u -p -r1.302 -r1.303
--- roff.c
+++ roff.c
@@ -99,6 +99,7 @@ struct	roff {
 	int		 format; /* current file in mdoc or man format */
 	int		 argc; /* number of args of the last macro */
 	char		 control; /* control character */
+	char		 escape; /* escape character */
 };
 
 struct	roffnode {
@@ -155,6 +156,8 @@ static	enum rofferr	 roff_cond(ROFF_ARGS
 static	enum rofferr	 roff_cond_text(ROFF_ARGS);
 static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
 static	enum rofferr	 roff_ds(ROFF_ARGS);
+static	enum rofferr	 roff_ec(ROFF_ARGS);
+static	enum rofferr	 roff_eo(ROFF_ARGS);
 static	enum rofferr	 roff_eqndelim(struct roff *, struct buf *, int);
 static	int		 roff_evalcond(struct roff *r, int, char *, int *);
 static	int		 roff_evalnum(struct roff *, int,
@@ -385,13 +388,13 @@ static	struct roffmac	 roffs[TOKEN_NONE]
 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
-	{ roff_unsupp, NULL, NULL, 0 },  /* ec */
+	{ roff_ec, NULL, NULL, 0 },  /* ec */
 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
 	{ roff_EN, NULL, NULL, 0 },  /* EN */
-	{ roff_unsupp, NULL, NULL, 0 },  /* eo */
+	{ roff_eo, NULL, NULL, 0 },  /* eo */
 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
@@ -751,7 +754,8 @@ roff_reset(struct roff *r)
 {
 	roff_free1(r);
 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
-	r->control = 0;
+	r->control = '\0';
+	r->escape = '\\';
 }
 
 void
@@ -773,6 +777,7 @@ roff_alloc(struct mparse *parse, int opt
 	r->options = options;
 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
 	r->rstackpos = -1;
+	r->escape = '\\';
 	return r;
 }
 
@@ -1149,27 +1154,80 @@ roff_res(struct roff *r, struct buf *buf
 	int		 expand_count;	/* to avoid infinite loops */
 	int		 npos;	/* position in numeric expression */
 	int		 arg_complete; /* argument not interrupted by eol */
+	int		 done;	/* no more input available */
 	char		 term;	/* character terminating the escape */
 
-	expand_count = 0;
+	/* Search forward for comments. */
+
+	done = 0;
 	start = buf->buf + pos;
-	stesc = strchr(start, '\0') - 1;
-	while (stesc-- > start) {
+	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
+		if (stesc[0] != r->escape || stesc[1] == '\0')
+			continue;
+		stesc++;
+		if (*stesc != '"' && *stesc != '#')
+			continue;
+		cp = strchr(stesc--, '\0') - 1;
+		if (*cp == '\n') {
+			done = 1;
+			cp--;
+		}
+		if (*cp == ' ' || *cp == '\t')
+			mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
+			    ln, cp - buf->buf, NULL);
+		while (stesc > start && stesc[-1] == ' ')
+			stesc--;
+		*stesc = '\0';
+		break;
+	}
+	if (stesc == start)
+		return ROFF_CONT;
+	stesc--;
+
+	/* Notice the end of the input. */
+
+	if (*stesc == '\n') {
+		*stesc-- = '\0';
+		done = 1;
+	}
+
+	expand_count = 0;
+	while (stesc >= start) {
 
 		/* Search backwards for the next backslash. */
 
-		if (*stesc != '\\')
+		if (*stesc != r->escape) {
+			if (*stesc == '\\') {
+				*stesc = '\0';
+				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
+				    buf->buf, stesc + 1) + 1;
+				start = nbuf + pos;
+				stesc = nbuf + (stesc - buf->buf);
+				free(buf->buf);
+				buf->buf = nbuf;
+			}
+			stesc--;
 			continue;
+		}
 
 		/* If it is escaped, skip it. */
 
 		for (cp = stesc - 1; cp >= start; cp--)
-			if (*cp != '\\')
+			if (*cp != r->escape)
 				break;
 
 		if ((stesc - cp) % 2 == 0) {
-			stesc = (char *)cp;
+			while (stesc > cp)
+				*stesc-- = '\\';
 			continue;
+		} else if (stesc[1] != '\0') {
+			*stesc = '\\';
+		} else {
+			*stesc-- = '\0';
+			if (done)
+				continue;
+			else
+				return ROFF_APPEND;
 		}
 
 		/* Decide whether to expand or to check only. */
@@ -1195,6 +1253,7 @@ roff_res(struct roff *r, struct buf *buf
 				mandoc_vmsg(MANDOCERR_ESC_BAD,
 				    r->parse, ln, (int)(stesc - buf->buf),
 				    "%.*s", (int)(cp - stesc), stesc);
+			stesc--;
 			continue;
 		}
 
@@ -1409,7 +1468,7 @@ roff_parseln(struct roff *r, int ln, str
 	/* Expand some escape sequences. */
 
 	e = roff_res(r, buf, ln, pos);
-	if (e == ROFF_IGN)
+	if (e == ROFF_IGN || e == ROFF_APPEND)
 		return e;
 	assert(e == ROFF_CONT);
 
@@ -2849,7 +2908,7 @@ roff_cc(ROFF_ARGS)
 	p = buf->buf + pos;
 
 	if (*p == '\0' || (r->control = *p++) == '.')
-		r->control = 0;
+		r->control = '\0';
 
 	if (*p != '\0')
 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
@@ -2859,6 +2918,33 @@ roff_cc(ROFF_ARGS)
 }
 
 static enum rofferr
+roff_ec(ROFF_ARGS)
+{
+	const char	*p;
+
+	p = buf->buf + pos;
+	if (*p == '\0')
+		r->escape = '\\';
+	else {
+		r->escape = *p;
+		if (*++p != '\0')
+			mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
+			    ln, p - buf->buf, "ec ... %s", p);
+	}
+	return ROFF_IGN;
+}
+
+static enum rofferr
+roff_eo(ROFF_ARGS)
+{
+	r->escape = '\0';
+	if (buf->buf[pos] != '\0')
+		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
+		    ln, pos, "eo %s", buf->buf + pos);
+	return ROFF_IGN;
+}
+
+static enum rofferr
 roff_tr(ROFF_ARGS)
 {
 	const char	*p, *first, *second;
@@ -3385,9 +3471,9 @@ roff_getcontrol(const struct roff *r, co
 
 	pos = *ppos;
 
-	if (0 != r->control && cp[pos] == r->control)
+	if (r->control != '\0' && cp[pos] == r->control)
 		pos++;
-	else if (0 != r->control)
+	else if (r->control != '\0')
 		return 0;
 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
 		pos += 2;
Index: read.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/read.c,v
retrieving revision 1.169
retrieving revision 1.170
diff -Lread.c -Lread.c -u -p -r1.169 -r1.170
--- read.c
+++ read.c
@@ -326,7 +326,6 @@ mparse_buf_r(struct mparse *curp, struct
 	const char	*save_file;
 	char		*cp;
 	size_t		 pos; /* byte number in the ln buffer */
-	size_t		 j;  /* auxiliary byte number in the blk buffer */
 	enum rofferr	 rr;
 	int		 of;
 	int		 lnn; /* line number in the real file */
@@ -408,79 +407,14 @@ mparse_buf_r(struct mparse *curp, struct
 				continue;
 			}
 
-			/* Trailing backslash = a plain char. */
-
-			if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
-				ln.buf[pos++] = blk.buf[i++];
-				continue;
-			}
-
-			/*
-			 * Found escape and at least one other character.
-			 * When it's a newline character, skip it.
-			 * When there is a carriage return in between,
-			 * skip that one as well.
-			 */
-
-			if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
-			    '\n' == blk.buf[i + 2])
-				++i;
-			if ('\n' == blk.buf[i + 1]) {
-				i += 2;
-				++lnn;
-				continue;
-			}
-
-			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
-				j = i;
-				i += 2;
-				/* Comment, skip to end of line */
-				for (; i < blk.sz; ++i) {
-					if (blk.buf[i] != '\n')
-						continue;
-					if (blk.buf[i - 1] == ' ' ||
-					    blk.buf[i - 1] == '\t')
-						mandoc_msg(
-						    MANDOCERR_SPACE_EOL,
-						    curp, curp->line,
-						    pos + i-1 - j, NULL);
-					++i;
-					++lnn;
-					break;
-				}
-
-				/* Backout trailing whitespaces */
-				for (; pos > 0; --pos) {
-					if (ln.buf[pos - 1] != ' ')
-						break;
-					if (pos > 2 && ln.buf[pos - 2] == '\\')
-						break;
-				}
-				break;
-			}
-
-			/* Catch escaped bogus characters. */
-
-			c = (unsigned char) blk.buf[i+1];
-
-			if ( ! (isascii(c) &&
-			    (isgraph(c) || isblank(c)))) {
-				mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
-				    curp->line, pos, "0x%x", c);
-				i += 2;
-				ln.buf[pos++] = '?';
-				continue;
-			}
-
-			/* Some other escape sequence, copy & cont. */
-
-			ln.buf[pos++] = blk.buf[i++];
 			ln.buf[pos++] = blk.buf[i++];
 		}
 
-		if (pos >= ln.sz)
+		if (pos + 1 >= ln.sz)
 			resize_buf(&ln, 256);
 
+		if (i == blk.sz || blk.buf[i] == '\0')
+			ln.buf[pos++] = '\n';
 		ln.buf[pos] = '\0';
 
 		/*
Index: roff.7
===================================================================
RCS file: /home/cvs/mdocml/mdocml/roff.7,v
retrieving revision 1.82
retrieving revision 1.83
diff -Lroff.7 -Lroff.7 -u -p -r1.82 -r1.83
--- roff.7
+++ roff.7
@@ -808,8 +808,11 @@ This is a Heirloom extension and current
 Set a trap within a diversion.
 Currently unsupported.
 .It Ic \&ec Op Ar char
-Change the escape character.
-Currently unsupported.
+Enable the escape mechanism and change the escape character.
+The
+.Ar char
+argument defaults to the backslash
+.Pq Sq \e .
 .It Ic \&ecr
 Restore the escape character.
 Currently unsupported.
@@ -839,7 +842,6 @@ See
 .Ic \&EQ .
 .It Ic \&eo
 Disable the escape mechanism completely.
-Currently unsupported.
 .It Ic \&EP
 End a picture started by
 .Ic \&BP .
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2017-06-04  0:13 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-04  0:13 mdocml: Pure preprocessor implementation of the roff(7) .ec and .eo schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).