* mandoc: Split a new function roff_parse_comment() out of roff_expand()
@ 2022-05-01 16:22 schwarze
0 siblings, 0 replies; only message in thread
From: schwarze @ 2022-05-01 16:22 UTC (permalink / raw)
To: source
Log Message:
-----------
Split a new function roff_parse_comment() out of roff_expand() because this
functionality is not needed when called from roff_getarg(). This makes the
long and complicated function roff_expand() significantly shorter, and also
simpler in so far as it no longer needs to return ROFF_APPEND.
No functional change intended.
Modified Files:
--------------
mandoc:
roff.c
mandoc/regress/roff/esc:
Makefile
Added Files:
-----------
mandoc/regress/roff/esc:
comment.in
comment.out_ascii
comment.out_lint
Revision Data
-------------
Index: roff.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/roff.c,v
retrieving revision 1.386
retrieving revision 1.387
diff -Lroff.c -Lroff.c -u -p -r1.386 -r1.387
--- roff.c
+++ roff.c
@@ -234,6 +234,8 @@ static int roff_nr(ROFF_ARGS);
static int roff_onearg(ROFF_ARGS);
static enum roff_tok roff_parse(struct roff *, char *, int *,
int, int);
+static int roff_parse_comment(struct roff *, struct buf *,
+ int, int, char);
static int roff_parsetext(struct roff *, struct buf *,
int, int *);
static int roff_renamed(ROFF_ARGS);
@@ -1231,6 +1233,98 @@ deroff(char **dest, const struct roff_no
/* --- main functions of the roff parser ---------------------------------- */
+static int
+roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos,
+ char newesc)
+{
+ struct roff_node *n; /* used for header comments */
+ const char *start; /* start of the string to process */
+ const char *cp; /* for RCS id parsing */
+ char *stesc; /* start of an escape sequence ('\\') */
+ char *ep; /* end of comment string */
+ int rcsid; /* kind of RCS id seen */
+
+ for (start = stesc = buf->buf + pos;; stesc++) {
+ /* The line ends without continuation or comment. */
+ if (stesc[0] == '\0')
+ return ROFF_CONT;
+
+ /* Unescaped byte: skip it. */
+ if (stesc[0] != newesc)
+ continue;
+
+ /* Backslash at end of line requests line continuation. */
+ if (stesc[1] == '\0') {
+ stesc[0] = '\0';
+ return ROFF_IGN | ROFF_APPEND;
+ }
+
+ /* Found a comment: process it. */
+ if (stesc[1] == '"' || stesc[1] == '#')
+ break;
+
+ /* Escaped escape character: skip them both. */
+ if (stesc[1] == newesc)
+ stesc++;
+ }
+
+ /* Look for an RCS id in the comment. */
+
+ rcsid = 0;
+ if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
+ rcsid = 1 << MANDOC_OS_OPENBSD;
+ cp += 8;
+ } else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
+ rcsid = 1 << MANDOC_OS_NETBSD;
+ cp += 7;
+ }
+ if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
+ strchr(cp, '$') != NULL) {
+ if (r->man->meta.rcsids & rcsid)
+ mandoc_msg(MANDOCERR_RCS_REP, ln,
+ (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
+ r->man->meta.rcsids |= rcsid;
+ }
+
+ /* Warn about trailing whitespace at the end of the comment. */
+
+ ep = strchr(stesc + 2, '\0') - 1;
+ if (*ep == '\n')
+ *ep-- = '\0';
+ if (*ep == ' ' || *ep == '\t')
+ mandoc_msg(MANDOCERR_SPACE_EOL,
+ ln, (int)(ep - buf->buf), NULL);
+
+ /* Save comments preceding the title macro in the syntax tree. */
+
+ if (r->options & MPARSE_COMMENT) {
+ while (*ep == ' ' || *ep == '\t')
+ ep--;
+ ep[1] = '\0';
+ n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
+ ROFFT_COMMENT, TOKEN_NONE);
+ n->string = mandoc_strdup(stesc + 2);
+ roff_node_append(r->man, n);
+ n->flags |= NODE_VALID | NODE_ENDED;
+ r->man->next = ROFF_NEXT_SIBLING;
+ }
+
+ /* The comment requests line continuation. */
+
+ if (stesc[1] == '#') {
+ *stesc = '\0';
+ return ROFF_IGN | ROFF_APPEND;
+ }
+
+ /* Discard the comment including preceding whitespace. */
+
+ while (stesc > start && stesc[-1] == ' ' &&
+ (stesc == start + 1 || stesc[-2] != '\\'))
+ stesc--;
+ *stesc = '\0';
+ return ROFF_CONT;
+}
+
/*
* In the current line, expand escape sequences that produce parsable
* input text. Also check the syntax of the remaining escape sequences,
@@ -1241,11 +1335,9 @@ roff_expand(struct roff *r, struct buf *
{
struct mctx *ctx; /* current macro call context */
char ubuf[24]; /* buffer to print the number */
- struct roff_node *n; /* used for header comments */
const char *start; /* start of the string to process */
char *stesc; /* start of an escape sequence ('\\') */
const char *esct; /* type of esccape sequence */
- char *ep; /* end of comment string */
const char *stnam; /* start of the name, after "[(*" */
const char *cp; /* end of the name, e.g. before ']' */
const char *res; /* the string to be substituted */
@@ -1259,98 +1351,15 @@ roff_expand(struct roff *r, struct buf *
int npos; /* position in numeric expression */
int arg_complete; /* argument not interrupted by eol */
int quote_args; /* true for \\$@, false for \\$* */
- int done; /* no more input available */
int deftype; /* type of definition to paste */
- int rcsid; /* kind of RCS id seen */
enum mandocerr err; /* for escape sequence problems */
char sign; /* increment number register */
char term; /* character terminating the escape */
- /* Search forward for comments. */
-
- done = 0;
start = buf->buf + pos;
- for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
- if (stesc[0] != newesc || stesc[1] == '\0')
- continue;
- stesc++;
- if (*stesc != '"' && *stesc != '#')
- continue;
-
- /* Comment found, look for RCS id. */
-
- rcsid = 0;
- if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
- rcsid = 1 << MANDOC_OS_OPENBSD;
- cp += 8;
- } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
- rcsid = 1 << MANDOC_OS_NETBSD;
- cp += 7;
- }
- if (cp != NULL &&
- isalnum((unsigned char)*cp) == 0 &&
- strchr(cp, '$') != NULL) {
- if (r->man->meta.rcsids & rcsid)
- mandoc_msg(MANDOCERR_RCS_REP, ln,
- (int)(stesc - buf->buf) + 1,
- "%s", stesc + 1);
- r->man->meta.rcsids |= rcsid;
- }
-
- /* Handle trailing whitespace. */
-
- ep = strchr(stesc--, '\0') - 1;
- if (*ep == '\n') {
- done = 1;
- ep--;
- }
- if (*ep == ' ' || *ep == '\t')
- mandoc_msg(MANDOCERR_SPACE_EOL,
- ln, (int)(ep - buf->buf), NULL);
-
- /*
- * Save comments preceding the title macro
- * in the syntax tree.
- */
-
- if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
- while (*ep == ' ' || *ep == '\t')
- ep--;
- ep[1] = '\0';
- n = roff_node_alloc(r->man,
- ln, stesc + 1 - buf->buf,
- ROFFT_COMMENT, TOKEN_NONE);
- n->string = mandoc_strdup(stesc + 2);
- roff_node_append(r->man, n);
- n->flags |= NODE_VALID | NODE_ENDED;
- r->man->next = ROFF_NEXT_SIBLING;
- }
-
- /* Line continuation with comment. */
-
- if (stesc[1] == '#') {
- *stesc = '\0';
- return ROFF_IGN | ROFF_APPEND;
- }
-
- /* Discard normal comments. */
-
- while (stesc > start && stesc[-1] == ' ' &&
- (stesc == start + 1 || stesc[-2] != '\\'))
- stesc--;
- *stesc = '\0';
- break;
- }
- if (stesc == start)
- return ROFF_CONT;
- stesc--;
-
- /* Notice the end of the input. */
-
- if (*stesc == '\n') {
+ stesc = strchr(start, '\0') - 1;
+ if (stesc >= start && *stesc == '\n')
*stesc-- = '\0';
- done = 1;
- }
expand_count = 0;
while (stesc >= start) {
@@ -1389,15 +1398,11 @@ roff_expand(struct roff *r, struct buf *
while (stesc > cp)
*stesc-- = '\\';
continue;
- } else if (stesc[1] != '\0') {
- *stesc = '\\';
- } else {
+ } else if (stesc[1] == '\0') {
*stesc-- = '\0';
- if (done)
- continue;
- else
- return ROFF_IGN | ROFF_APPEND;
- }
+ continue;
+ } else
+ *stesc = '\\';
/* Decide whether to expand or to check only. */
@@ -1856,7 +1861,12 @@ roff_parseln(struct roff *r, int ln, str
assert(e == ROFF_CONT);
}
- /* Expand some escape sequences. */
+ /* Handle comments and escape sequences. */
+
+ e = roff_parse_comment(r, buf, ln, pos, r->escape);
+ if ((e & ROFF_MASK) == ROFF_IGN)
+ return e;
+ assert(e == ROFF_CONT);
e = roff_expand(r, buf, ln, pos, r->escape);
if ((e & ROFF_MASK) == ROFF_IGN)
--- /dev/null
+++ regress/roff/esc/comment.out_ascii
@@ -0,0 +1,19 @@
+ROFF-ESC-COMMENT(1) General Commands Manual ROFF-ESC-COMMENT(1)
+
+N\bNA\bAM\bME\bE
+ r\bro\bof\bff\bf-\b-e\bes\bsc\bc-\b-c\bco\bom\bmm\bme\ben\bnt\bt - roff(7) comments
+
+D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN
+ text line continuation
+
+ macro line continuation: [-\b-f\bf _\bf_\bi_\bl_\be]
+
+ whitespace at the end of an input line
+
+ text line with comment
+
+ continuation requested by a comment
+
+ Surpisingly, the sequence \" does not start a comment.
+
+OpenBSD May 1, 2022 OpenBSD
--- /dev/null
+++ regress/roff/esc/comment.out_lint
@@ -0,0 +1 @@
+mandoc: comment.in:22:29: STYLE: whitespace at end of input line
Index: Makefile
===================================================================
RCS file: /home/cvs/mandoc/mandoc/regress/roff/esc/Makefile,v
retrieving revision 1.9
retrieving revision 1.10
diff -Lregress/roff/esc/Makefile -Lregress/roff/esc/Makefile -u -p -r1.9 -r1.10
--- regress/roff/esc/Makefile
+++ regress/roff/esc/Makefile
@@ -1,10 +1,10 @@
-# $OpenBSD: Makefile,v 1.19 2022/04/27 13:30:19 schwarze Exp $
+# $OpenBSD: Makefile,v 1.20 2022/05/01 16:18:59 schwarze Exp $
-REGRESS_TARGETS = one two multi
+REGRESS_TARGETS = one two multi comment
REGRESS_TARGETS += B bs_man bs_mdoc c c_man E1 e f h hneg l O1 o p w z
REGRESS_TARGETS += ignore invalid unsupp
HTML_TARGETS = f
-LINT_TARGETS = B h l O1 w ignore invalid unsupp
+LINT_TARGETS = comment B h l O1 w ignore invalid unsupp
# mandoc defect:
# - \h with a negative argument replaces output characters
--- /dev/null
+++ regress/roff/esc/comment.in
@@ -0,0 +1,25 @@
+.\" $OpenBSD: comment.in,v 1.1 2022/05/01 16:18:59 schwarze Exp $
+.Dd $Mdocdate: May 1 2022 $
+.Dt ROFF-ESC-COMMENT 1
+.Os
+.Sh NAME
+.Nm roff-esc-comment
+.Nd roff(7) comments
+.Sh DESCRIPTION
+text line cont\
+inuation
+.Pp
+macro line continuation:
+.Op Fl f A\
+r file
+.Pp
+whitespace \&
+at the end of an input line
+.Pp
+text line with \"not printed\
+comment
+.Pp
+continuation \#not printed
+requested by a comment
+.Pp
+Surpisingly, the sequence \\" does not start a comment.
--
To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-05-01 16:22 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-01 16:22 mandoc: Split a new function roff_parse_comment() out of roff_expand() schwarze
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).