source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mandoc: Style message about bad input encoding of em-dashes as --
@ 2018-03-16 15:06 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2018-03-16 15:06 UTC (permalink / raw)
  To: source

Log Message:
-----------
Style message about bad input encoding of em-dashes as -- instead of \(em.
Suggested by Thomas Klausner <wiz at NetBSD>; discussed with jmc@.

Modified Files:
--------------
    mandoc:
        mandoc.1
        mandoc.h
        mdoc_validate.c
        read.c

Revision Data
-------------
Index: read.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/read.c,v
retrieving revision 1.194
retrieving revision 1.195
diff -Lread.c -Lread.c -u -p -r1.194 -r1.195
--- read.c
+++ read.c
@@ -106,6 +106,7 @@ static	const char * const	mandocerrs[MAN
 	"no blank before trailing delimiter",
 	"fill mode already enabled, skipping",
 	"fill mode already disabled, skipping",
+	"verbatim \"--\", maybe consider using \\(em",
 	"function name without markup",
 	"whitespace at end of input line",
 	"bad comment style",
Index: mdoc_validate.c
===================================================================
RCS file: /home/cvs/mandoc/mandoc/mdoc_validate.c,v
retrieving revision 1.354
retrieving revision 1.355
diff -Lmdoc_validate.c -Lmdoc_validate.c -u -p -r1.354 -r1.355
--- mdoc_validate.c
+++ mdoc_validate.c
@@ -1,7 +1,7 @@
 /*	$Id$ */
 /*
  * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org>
  * Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -53,10 +53,11 @@ enum	check_ineq {
 typedef	void	(*v_post)(POST_ARGS);
 
 static	int	 build_list(struct roff_man *, int);
-static	void	 check_text(struct roff_man *, int, int, char *);
 static	void	 check_argv(struct roff_man *,
 			struct roff_node *, struct mdoc_argv *);
 static	void	 check_args(struct roff_man *, struct roff_node *);
+static	void	 check_text(struct roff_man *, int, int, char *);
+static	void	 check_text_em(struct roff_man *, int, int, char *);
 static	void	 check_toptext(struct roff_man *, int, int, const char *);
 static	int	 child_an(const struct roff_node *);
 static	size_t		macro2len(enum roff_tok);
@@ -288,7 +289,7 @@ static	const char * const secnames[SEC__
 void
 mdoc_node_validate(struct roff_man *mdoc)
 {
-	struct roff_node *n;
+	struct roff_node *n, *np;
 	const v_post *p;
 
 	n = mdoc->last;
@@ -305,13 +306,18 @@ mdoc_node_validate(struct roff_man *mdoc
 	mdoc->next = ROFF_NEXT_SIBLING;
 	switch (n->type) {
 	case ROFFT_TEXT:
+		np = n->parent;
 		if (n->sec != SEC_SYNOPSIS ||
-		    (n->parent->tok != MDOC_Cd && n->parent->tok != MDOC_Fd))
+		    (np->tok != MDOC_Cd && np->tok != MDOC_Fd))
 			check_text(mdoc, n->line, n->pos, n->string);
-		if (n->parent->tok == MDOC_It ||
-		    (n->parent->type == ROFFT_BODY &&
-		     (n->parent->tok == MDOC_Sh ||
-		      n->parent->tok == MDOC_Ss)))
+		if (np->tok != MDOC_Ql && np->tok != MDOC_Dl &&
+		    (np->tok != MDOC_Bd ||
+		     (mdoc->flags & MDOC_LITERAL) == 0) &&
+		    (np->tok != MDOC_It || np->type != ROFFT_HEAD ||
+		     np->parent->parent->norm->Bl.type != LIST_diag))
+			check_text_em(mdoc, n->line, n->pos, n->string);
+		if (np->tok == MDOC_It || (np->type == ROFFT_BODY &&
+		    (np->tok == MDOC_Sh || np->tok == MDOC_Ss)))
 			check_toptext(mdoc, n->line, n->pos, n->string);
 		break;
 	case ROFFT_EQN:
@@ -392,6 +398,57 @@ check_text(struct roff_man *mdoc, int ln
 	for (cp = p; NULL != (p = strchr(p, '\t')); p++)
 		mandoc_msg(MANDOCERR_FI_TAB, mdoc->parse,
 		    ln, pos + (int)(p - cp), NULL);
+}
+
+static void
+check_text_em(struct roff_man *mdoc, int ln, int pos, char *p)
+{
+	const struct roff_node	*np, *nn;
+	char			*cp;
+
+	np = mdoc->last->prev;
+	nn = mdoc->last->next;
+
+	/* Look for em-dashes wrongly encoded as "--". */
+
+	for (cp = p; *cp != '\0'; cp++) {
+		if (*cp != '-' || *++cp != '-')
+			continue;
+
+		/* Skip input sequences of more than two '-'. */
+
+		if (cp[1] == '-') {
+			while (cp[1] == '-')
+				cp++;
+			continue;
+		}
+
+		/* Skip "--" directly attached to something else. */
+
+		if ((cp - p > 1 && cp[-2] != ' ') ||
+		    (cp[1] != '\0' && cp[1] != ' '))
+			continue;
+
+		/* Require a letter right before or right afterwards. */
+
+		if ((cp - p > 2 ?
+		     isalpha((unsigned char)cp[-3]) :
+		     np != NULL &&
+		     np->type == ROFFT_TEXT &&
+		     np->string != '\0' &&
+		     isalpha((unsigned char)np->string[
+		       strlen(np->string) - 1])) ||
+		    (cp[2] != '\0' ?
+		     isalpha((unsigned char)cp[2]) :
+		     nn != NULL &&
+		     nn->type == ROFFT_TEXT &&
+		     nn->string != '\0' &&
+		     isalpha((unsigned char)*nn->string))) {
+			mandoc_msg(MANDOCERR_DASHDASH, mdoc->parse,
+			    ln, pos + (int)(cp - p) - 1, NULL);
+			break;
+		}
+	}
 }
 
 static void
Index: mandoc.1
===================================================================
RCS file: /home/cvs/mandoc/mandoc/mandoc.1,v
retrieving revision 1.221
retrieving revision 1.222
diff -Lmandoc.1 -Lmandoc.1 -u -p -r1.221 -r1.222
--- mandoc.1
+++ mandoc.1
@@ -1,7 +1,7 @@
 .\"	$Id$
 .\"
 .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
-.\" Copyright (c) 2012, 2014-2017 Ingo Schwarze <schwarze@openbsd.org>
+.\" Copyright (c) 2012, 2014-2018 Ingo Schwarze <schwarze@openbsd.org>
 .\"
 .\" Permission to use, copy, modify, and distribute this software for any
 .\" purpose with or without fee is hereby granted, provided that the above
@@ -952,6 +952,12 @@ An
 request occurs even though the document already switched to no-fill mode
 and did not switch back to fill mode yet.
 It has no effect.
+.It Sy "verbatim \(dq--\(dq, maybe consider using \e(em"
+.Pq mdoc
+Even though the ASCII output device renders an em-dash as
+.Qq \-\- ,
+that is not a good way to write it in an input file
+because it renders poorly on all other output devices.
 .It Sy "function name without markup"
 .Pq mdoc
 A word followed by an empty pair of parentheses occurs on a text line.
Index: mandoc.h
===================================================================
RCS file: /home/cvs/mandoc/mandoc/mandoc.h,v
retrieving revision 1.246
retrieving revision 1.247
diff -Lmandoc.h -Lmandoc.h -u -p -r1.246 -r1.247
--- mandoc.h
+++ mandoc.h
@@ -68,6 +68,7 @@ enum	mandocerr {
 	MANDOCERR_DELIM_NB, /* no blank before trailing delimiter: macro ... */
 	MANDOCERR_FI_SKIP, /* fill mode already enabled, skipping: fi */
 	MANDOCERR_NF_SKIP, /* fill mode already disabled, skipping: nf */
+	MANDOCERR_DASHDASH, /* verbatim "--", maybe consider using \(em */
 	MANDOCERR_FUNC, /* function name without markup: name() */
 	MANDOCERR_SPACE_EOL, /* whitespace at end of input line */
 	MANDOCERR_COMMENT_BAD, /* bad comment style */
--
 To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2018-03-16 15:06 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-16 15:06 mandoc: Style message about bad input encoding of em-dashes as -- schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).