source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mandoc: Split a new function roff_parse_comment() out of roff_expand()
@ 2022-05-01 16:22 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2022-05-01 16:22 UTC (permalink / raw)
  To: source

Log Message:
-----------
Split a new function roff_parse_comment() out of roff_expand() because this
functionality is not needed when called from roff_getarg().  This makes the
long and complicated function roff_expand() significantly shorter, and also
simpler in so far as it no longer needs to return ROFF_APPEND.
No functional change intended.

Modified Files:
--------------
    mandoc:
        roff.c
    mandoc/regress/roff/esc:
        Makefile

Added Files:
-----------
    mandoc/regress/roff/esc:
        comment.in
        comment.out_ascii
        comment.out_lint

Revision Data
-------------
Index: roff.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/roff.c,v
retrieving revision 1.386
retrieving revision 1.387
diff -Lroff.c -Lroff.c -u -p -r1.386 -r1.387
--- roff.c
+++ roff.c
@@ -234,6 +234,8 @@ static	int		 roff_nr(ROFF_ARGS);
 static	int		 roff_onearg(ROFF_ARGS);
 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
 				int, int);
+static	int		 roff_parse_comment(struct roff *, struct buf *,
+				int, int, char);
 static	int		 roff_parsetext(struct roff *, struct buf *,
 				int, int *);
 static	int		 roff_renamed(ROFF_ARGS);
@@ -1231,6 +1233,98 @@ deroff(char **dest, const struct roff_no
 
 /* --- main functions of the roff parser ---------------------------------- */
 
+static int
+roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos,
+    char newesc)
+{
+	struct roff_node *n;	/* used for header comments */
+	const char	*start;	/* start of the string to process */
+	const char	*cp;	/* for RCS id parsing */
+	char		*stesc;	/* start of an escape sequence ('\\') */
+	char		*ep;	/* end of comment string */
+	int		 rcsid;	/* kind of RCS id seen */
+
+	for (start = stesc = buf->buf + pos;; stesc++) {
+		/* The line ends without continuation or comment. */
+		if (stesc[0] == '\0')
+			return ROFF_CONT;
+
+		/* Unescaped byte: skip it. */
+		if (stesc[0] != newesc)
+			continue;
+
+		/* Backslash at end of line requests line continuation. */
+		if (stesc[1] == '\0') {
+			stesc[0] = '\0';
+			return ROFF_IGN | ROFF_APPEND;
+		}
+
+		/* Found a comment: process it. */
+		if (stesc[1] == '"' || stesc[1] == '#')
+			break;
+
+		/* Escaped escape character: skip them both. */
+		if (stesc[1] == newesc)
+			stesc++;
+	}
+
+	/* Look for an RCS id in the comment. */
+
+	rcsid = 0;
+	if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
+		rcsid = 1 << MANDOC_OS_OPENBSD;
+		cp += 8;
+	} else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
+		rcsid = 1 << MANDOC_OS_NETBSD;
+		cp += 7;
+	}
+	if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
+	    strchr(cp, '$') != NULL) {
+		if (r->man->meta.rcsids & rcsid)
+			mandoc_msg(MANDOCERR_RCS_REP, ln,
+			    (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
+		r->man->meta.rcsids |= rcsid;
+	}
+
+	/* Warn about trailing whitespace at the end of the comment. */
+
+	ep = strchr(stesc + 2, '\0') - 1;
+	if (*ep == '\n')
+		*ep-- = '\0';
+	if (*ep == ' ' || *ep == '\t')
+		mandoc_msg(MANDOCERR_SPACE_EOL,
+		    ln, (int)(ep - buf->buf), NULL);
+
+	/* Save comments preceding the title macro in the syntax tree. */
+
+	if (r->options & MPARSE_COMMENT) {
+		while (*ep == ' ' || *ep == '\t')
+			ep--;
+		ep[1] = '\0';
+		n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
+		    ROFFT_COMMENT, TOKEN_NONE);
+		n->string = mandoc_strdup(stesc + 2);
+		roff_node_append(r->man, n);
+		n->flags |= NODE_VALID | NODE_ENDED;
+		r->man->next = ROFF_NEXT_SIBLING;
+	}
+
+	/* The comment requests line continuation. */
+
+	if (stesc[1] == '#') {
+		*stesc = '\0';
+		return ROFF_IGN | ROFF_APPEND;
+	}
+
+	/* Discard the comment including preceding whitespace. */
+
+	while (stesc > start && stesc[-1] == ' ' &&
+	    (stesc == start + 1 || stesc[-2] != '\\'))
+		stesc--;
+	*stesc = '\0';
+	return ROFF_CONT;
+}
+
 /*
  * In the current line, expand escape sequences that produce parsable
  * input text.  Also check the syntax of the remaining escape sequences,
@@ -1241,11 +1335,9 @@ roff_expand(struct roff *r, struct buf *
 {
 	struct mctx	*ctx;	/* current macro call context */
 	char		 ubuf[24]; /* buffer to print the number */
-	struct roff_node *n;	/* used for header comments */
 	const char	*start;	/* start of the string to process */
 	char		*stesc;	/* start of an escape sequence ('\\') */
 	const char	*esct;	/* type of esccape sequence */
-	char		*ep;	/* end of comment string */
 	const char	*stnam;	/* start of the name, after "[(*" */
 	const char	*cp;	/* end of the name, e.g. before ']' */
 	const char	*res;	/* the string to be substituted */
@@ -1259,98 +1351,15 @@ roff_expand(struct roff *r, struct buf *
 	int		 npos;	/* position in numeric expression */
 	int		 arg_complete; /* argument not interrupted by eol */
 	int		 quote_args; /* true for \\$@, false for \\$* */
-	int		 done;	/* no more input available */
 	int		 deftype; /* type of definition to paste */
-	int		 rcsid;	/* kind of RCS id seen */
 	enum mandocerr	 err;	/* for escape sequence problems */
 	char		 sign;	/* increment number register */
 	char		 term;	/* character terminating the escape */
 
-	/* Search forward for comments. */
-
-	done = 0;
 	start = buf->buf + pos;
-	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
-		if (stesc[0] != newesc || stesc[1] == '\0')
-			continue;
-		stesc++;
-		if (*stesc != '"' && *stesc != '#')
-			continue;
-
-		/* Comment found, look for RCS id. */
-
-		rcsid = 0;
-		if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
-			rcsid = 1 << MANDOC_OS_OPENBSD;
-			cp += 8;
-		} else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
-			rcsid = 1 << MANDOC_OS_NETBSD;
-			cp += 7;
-		}
-		if (cp != NULL &&
-		    isalnum((unsigned char)*cp) == 0 &&
-		    strchr(cp, '$') != NULL) {
-			if (r->man->meta.rcsids & rcsid)
-				mandoc_msg(MANDOCERR_RCS_REP, ln,
-				    (int)(stesc - buf->buf) + 1,
-				    "%s", stesc + 1);
-			r->man->meta.rcsids |= rcsid;
-		}
-
-		/* Handle trailing whitespace. */
-
-		ep = strchr(stesc--, '\0') - 1;
-		if (*ep == '\n') {
-			done = 1;
-			ep--;
-		}
-		if (*ep == ' ' || *ep == '\t')
-			mandoc_msg(MANDOCERR_SPACE_EOL,
-			    ln, (int)(ep - buf->buf), NULL);
-
-		/*
-		 * Save comments preceding the title macro
-		 * in the syntax tree.
-		 */
-
-		if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
-			while (*ep == ' ' || *ep == '\t')
-				ep--;
-			ep[1] = '\0';
-			n = roff_node_alloc(r->man,
-			    ln, stesc + 1 - buf->buf,
-			    ROFFT_COMMENT, TOKEN_NONE);
-			n->string = mandoc_strdup(stesc + 2);
-			roff_node_append(r->man, n);
-			n->flags |= NODE_VALID | NODE_ENDED;
-			r->man->next = ROFF_NEXT_SIBLING;
-		}
-
-		/* Line continuation with comment. */
-
-		if (stesc[1] == '#') {
-			*stesc = '\0';
-			return ROFF_IGN | ROFF_APPEND;
-		}
-
-		/* Discard normal comments. */
-
-		while (stesc > start && stesc[-1] == ' ' &&
-		    (stesc == start + 1 || stesc[-2] != '\\'))
-			stesc--;
-		*stesc = '\0';
-		break;
-	}
-	if (stesc == start)
-		return ROFF_CONT;
-	stesc--;
-
-	/* Notice the end of the input. */
-
-	if (*stesc == '\n') {
+	stesc = strchr(start, '\0') - 1;
+	if (stesc >= start && *stesc == '\n')
 		*stesc-- = '\0';
-		done = 1;
-	}
 
 	expand_count = 0;
 	while (stesc >= start) {
@@ -1389,15 +1398,11 @@ roff_expand(struct roff *r, struct buf *
 			while (stesc > cp)
 				*stesc-- = '\\';
 			continue;
-		} else if (stesc[1] != '\0') {
-			*stesc = '\\';
-		} else {
+		} else if (stesc[1] == '\0') {
 			*stesc-- = '\0';
-			if (done)
-				continue;
-			else
-				return ROFF_IGN | ROFF_APPEND;
-		}
+			continue;
+		} else
+			*stesc = '\\';
 
 		/* Decide whether to expand or to check only. */
 
@@ -1856,7 +1861,12 @@ roff_parseln(struct roff *r, int ln, str
 		assert(e == ROFF_CONT);
 	}
 
-	/* Expand some escape sequences. */
+	/* Handle comments and escape sequences. */
+
+	e = roff_parse_comment(r, buf, ln, pos, r->escape);
+	if ((e & ROFF_MASK) == ROFF_IGN)
+		return e;
+	assert(e == ROFF_CONT);
 
 	e = roff_expand(r, buf, ln, pos, r->escape);
 	if ((e & ROFF_MASK) == ROFF_IGN)
--- /dev/null
+++ regress/roff/esc/comment.out_ascii
@@ -0,0 +1,19 @@
+ROFF-ESC-COMMENT(1)         General Commands Manual        ROFF-ESC-COMMENT(1)
+
+N\bNA\bAM\bME\bE
+     r\bro\bof\bff\bf-\b-e\bes\bsc\bc-\b-c\bco\bom\bmm\bme\ben\bnt\bt - roff(7) comments
+
+D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN
+     text line continuation
+
+     macro line continuation: [-\b-f\bf _\bf_\bi_\bl_\be]
+
+     whitespace   at the end of an input line
+
+     text line with comment
+
+     continuation  requested by a comment
+
+     Surpisingly, the sequence \" does not start a comment.
+
+OpenBSD                           May 1, 2022                          OpenBSD
--- /dev/null
+++ regress/roff/esc/comment.out_lint
@@ -0,0 +1 @@
+mandoc: comment.in:22:29: STYLE: whitespace at end of input line
Index: Makefile
===================================================================
RCS file: /home/cvs/mandoc/mandoc/regress/roff/esc/Makefile,v
retrieving revision 1.9
retrieving revision 1.10
diff -Lregress/roff/esc/Makefile -Lregress/roff/esc/Makefile -u -p -r1.9 -r1.10
--- regress/roff/esc/Makefile
+++ regress/roff/esc/Makefile
@@ -1,10 +1,10 @@
-# $OpenBSD: Makefile,v 1.19 2022/04/27 13:30:19 schwarze Exp $
+# $OpenBSD: Makefile,v 1.20 2022/05/01 16:18:59 schwarze Exp $
 
-REGRESS_TARGETS	 = one two multi
+REGRESS_TARGETS	 = one two multi comment
 REGRESS_TARGETS	+= B bs_man bs_mdoc c c_man E1 e f h hneg l O1 o p w z
 REGRESS_TARGETS	+= ignore invalid unsupp
 HTML_TARGETS	 = f
-LINT_TARGETS	 = B h l O1 w ignore invalid unsupp
+LINT_TARGETS	 = comment B h l O1 w ignore invalid unsupp
 
 # mandoc defect:
 # - \h with a negative argument replaces output characters
--- /dev/null
+++ regress/roff/esc/comment.in
@@ -0,0 +1,25 @@
+.\" $OpenBSD: comment.in,v 1.1 2022/05/01 16:18:59 schwarze Exp $
+.Dd $Mdocdate: May 1 2022 $
+.Dt ROFF-ESC-COMMENT 1
+.Os
+.Sh NAME
+.Nm roff-esc-comment
+.Nd roff(7) comments
+.Sh DESCRIPTION
+text line cont\
+inuation
+.Pp
+macro line continuation:
+.Op Fl f A\
+r file
+.Pp
+whitespace  \&
+at the end of an input line
+.Pp
+text line with  \"not printed\
+comment
+.Pp
+continuation  \#not printed  
+requested by a comment
+.Pp
+Surpisingly, the sequence \\" does not start a comment.
--
 To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-01 16:22 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-01 16:22 mandoc: Split a new function roff_parse_comment() out of roff_expand() schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).