From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from localhost (fantadrom.bsd.lv [local]) by fantadrom.bsd.lv (OpenSMTPD) with ESMTPA id c37bc9b5 for ; Fri, 16 Mar 2018 10:06:14 -0500 (EST) Date: Fri, 16 Mar 2018 10:06:14 -0500 (EST) X-Mailinglist: mandoc-source Reply-To: source@mandoc.bsd.lv MIME-Version: 1.0 From: schwarze@mandoc.bsd.lv To: source@mandoc.bsd.lv Subject: mandoc: Style message about bad input encoding of em-dashes as -- X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Message-Id: <84f50dc2a476f549@fantadrom.bsd.lv> Log Message: ----------- Style message about bad input encoding of em-dashes as -- instead of \(em. Suggested by Thomas Klausner ; discussed with jmc@. Modified Files: -------------- mandoc: mandoc.1 mandoc.h mdoc_validate.c read.c Revision Data ------------- Index: read.c =================================================================== RCS file: /home/cvs/mandoc/mandoc/read.c,v retrieving revision 1.194 retrieving revision 1.195 diff -Lread.c -Lread.c -u -p -r1.194 -r1.195 --- read.c +++ read.c @@ -106,6 +106,7 @@ static const char * const mandocerrs[MAN "no blank before trailing delimiter", "fill mode already enabled, skipping", "fill mode already disabled, skipping", + "verbatim \"--\", maybe consider using \\(em", "function name without markup", "whitespace at end of input line", "bad comment style", Index: mdoc_validate.c =================================================================== RCS file: /home/cvs/mandoc/mandoc/mdoc_validate.c,v retrieving revision 1.354 retrieving revision 1.355 diff -Lmdoc_validate.c -Lmdoc_validate.c -u -p -r1.354 -r1.355 --- mdoc_validate.c +++ mdoc_validate.c @@ -1,7 +1,7 @@ /* $Id$ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons - * Copyright (c) 2010-2017 Ingo Schwarze + * Copyright (c) 2010-2018 Ingo Schwarze * Copyright (c) 2010 Joerg Sonnenberger * * Permission to use, copy, modify, and distribute this software for any @@ -53,10 +53,11 @@ enum check_ineq { typedef void (*v_post)(POST_ARGS); static int build_list(struct roff_man *, int); -static void check_text(struct roff_man *, int, int, char *); static void check_argv(struct roff_man *, struct roff_node *, struct mdoc_argv *); static void check_args(struct roff_man *, struct roff_node *); +static void check_text(struct roff_man *, int, int, char *); +static void check_text_em(struct roff_man *, int, int, char *); static void check_toptext(struct roff_man *, int, int, const char *); static int child_an(const struct roff_node *); static size_t macro2len(enum roff_tok); @@ -288,7 +289,7 @@ static const char * const secnames[SEC__ void mdoc_node_validate(struct roff_man *mdoc) { - struct roff_node *n; + struct roff_node *n, *np; const v_post *p; n = mdoc->last; @@ -305,13 +306,18 @@ mdoc_node_validate(struct roff_man *mdoc mdoc->next = ROFF_NEXT_SIBLING; switch (n->type) { case ROFFT_TEXT: + np = n->parent; if (n->sec != SEC_SYNOPSIS || - (n->parent->tok != MDOC_Cd && n->parent->tok != MDOC_Fd)) + (np->tok != MDOC_Cd && np->tok != MDOC_Fd)) check_text(mdoc, n->line, n->pos, n->string); - if (n->parent->tok == MDOC_It || - (n->parent->type == ROFFT_BODY && - (n->parent->tok == MDOC_Sh || - n->parent->tok == MDOC_Ss))) + if (np->tok != MDOC_Ql && np->tok != MDOC_Dl && + (np->tok != MDOC_Bd || + (mdoc->flags & MDOC_LITERAL) == 0) && + (np->tok != MDOC_It || np->type != ROFFT_HEAD || + np->parent->parent->norm->Bl.type != LIST_diag)) + check_text_em(mdoc, n->line, n->pos, n->string); + if (np->tok == MDOC_It || (np->type == ROFFT_BODY && + (np->tok == MDOC_Sh || np->tok == MDOC_Ss))) check_toptext(mdoc, n->line, n->pos, n->string); break; case ROFFT_EQN: @@ -392,6 +398,57 @@ check_text(struct roff_man *mdoc, int ln for (cp = p; NULL != (p = strchr(p, '\t')); p++) mandoc_msg(MANDOCERR_FI_TAB, mdoc->parse, ln, pos + (int)(p - cp), NULL); +} + +static void +check_text_em(struct roff_man *mdoc, int ln, int pos, char *p) +{ + const struct roff_node *np, *nn; + char *cp; + + np = mdoc->last->prev; + nn = mdoc->last->next; + + /* Look for em-dashes wrongly encoded as "--". */ + + for (cp = p; *cp != '\0'; cp++) { + if (*cp != '-' || *++cp != '-') + continue; + + /* Skip input sequences of more than two '-'. */ + + if (cp[1] == '-') { + while (cp[1] == '-') + cp++; + continue; + } + + /* Skip "--" directly attached to something else. */ + + if ((cp - p > 1 && cp[-2] != ' ') || + (cp[1] != '\0' && cp[1] != ' ')) + continue; + + /* Require a letter right before or right afterwards. */ + + if ((cp - p > 2 ? + isalpha((unsigned char)cp[-3]) : + np != NULL && + np->type == ROFFT_TEXT && + np->string != '\0' && + isalpha((unsigned char)np->string[ + strlen(np->string) - 1])) || + (cp[2] != '\0' ? + isalpha((unsigned char)cp[2]) : + nn != NULL && + nn->type == ROFFT_TEXT && + nn->string != '\0' && + isalpha((unsigned char)*nn->string))) { + mandoc_msg(MANDOCERR_DASHDASH, mdoc->parse, + ln, pos + (int)(cp - p) - 1, NULL); + break; + } + } } static void Index: mandoc.1 =================================================================== RCS file: /home/cvs/mandoc/mandoc/mandoc.1,v retrieving revision 1.221 retrieving revision 1.222 diff -Lmandoc.1 -Lmandoc.1 -u -p -r1.221 -r1.222 --- mandoc.1 +++ mandoc.1 @@ -1,7 +1,7 @@ .\" $Id$ .\" .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons -.\" Copyright (c) 2012, 2014-2017 Ingo Schwarze +.\" Copyright (c) 2012, 2014-2018 Ingo Schwarze .\" .\" Permission to use, copy, modify, and distribute this software for any .\" purpose with or without fee is hereby granted, provided that the above @@ -952,6 +952,12 @@ An request occurs even though the document already switched to no-fill mode and did not switch back to fill mode yet. It has no effect. +.It Sy "verbatim \(dq--\(dq, maybe consider using \e(em" +.Pq mdoc +Even though the ASCII output device renders an em-dash as +.Qq \-\- , +that is not a good way to write it in an input file +because it renders poorly on all other output devices. .It Sy "function name without markup" .Pq mdoc A word followed by an empty pair of parentheses occurs on a text line. Index: mandoc.h =================================================================== RCS file: /home/cvs/mandoc/mandoc/mandoc.h,v retrieving revision 1.246 retrieving revision 1.247 diff -Lmandoc.h -Lmandoc.h -u -p -r1.246 -r1.247 --- mandoc.h +++ mandoc.h @@ -68,6 +68,7 @@ enum mandocerr { MANDOCERR_DELIM_NB, /* no blank before trailing delimiter: macro ... */ MANDOCERR_FI_SKIP, /* fill mode already enabled, skipping: fi */ MANDOCERR_NF_SKIP, /* fill mode already disabled, skipping: nf */ + MANDOCERR_DASHDASH, /* verbatim "--", maybe consider using \(em */ MANDOCERR_FUNC, /* function name without markup: name() */ MANDOCERR_SPACE_EOL, /* whitespace at end of input line */ MANDOCERR_COMMENT_BAD, /* bad comment style */ -- To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv