source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Reduce false positives for the "no blank before trailing
@ 2017-06-10 16:54 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2017-06-10 16:54 UTC (permalink / raw)
  To: source

Log Message:
-----------
Reduce false positives for the "no blank before trailing delimiter" message.
This brings us down to one false positive for about every 18 pages.

Modified Files:
--------------
    mdocml:
        mdoc_validate.c

Revision Data
-------------
Index: mdoc_validate.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/mdoc_validate.c,v
retrieving revision 1.333
retrieving revision 1.334
diff -Lmdoc_validate.c -Lmdoc_validate.c -u -p -r1.333 -r1.334
--- mdoc_validate.c
+++ mdoc_validate.c
@@ -412,9 +412,17 @@ static void
 post_delim(POST_ARGS)
 {
 	const struct roff_node	*nch;
-	const char		*lc;
+	const char		*lc, *cp;
+	int			 nw;
 	enum mdelim		 delim;
+	enum roff_tok		 tok;
 
+	/*
+	 * Find candidates: at least two bytes,
+	 * the last one a closing or middle delimiter.
+	 */
+
+	tok = mdoc->last->tok;
 	nch = mdoc->last->last;
 	if (nch == NULL || nch->type != ROFFT_TEXT)
 		return;
@@ -424,9 +432,74 @@ post_delim(POST_ARGS)
 	delim = mdoc_isdelim(lc);
 	if (delim == DELIM_NONE || delim == DELIM_OPEN)
 		return;
+
+	/*
+	 * Reduce false positives by allowing various cases.
+	 */
+
+	/* Escaped delimiters. */
+	if (lc > nch->string + 1 && lc[-2] == '\\' &&
+	    (lc[-1] == '&' || lc[-1] == 'e'))
+		return;
+
+	/* Specific byte sequences. */
+	switch (*lc) {
+	case ')':
+		for (cp = lc; cp >= nch->string; cp--)
+			if (*cp == '(')
+				return;
+		break;
+	case '.':
+		if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.')
+			return;
+		if (lc[-1] == '.')
+			return;
+		break;
+	case ';':
+		if (tok == MDOC_Vt)
+			return;
+		break;
+	case '?':
+		if (lc[-1] == '?')
+			return;
+		break;
+	case ']':
+		for (cp = lc; cp >= nch->string; cp--)
+			if (*cp == '[')
+				return;
+		break;
+	case '|':
+		if (lc == nch->string + 1 && lc[-1] == '|')
+			return;
+	default:
+		break;
+	}
+
+	/* Exactly two non-alphanumeric bytes. */
+	if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1]))
+		return;
+
+	/* At least three alphabetic words with a sentence ending. */
+	if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em ||
+	    tok == MDOC_Li || tok == MDOC_No || tok == MDOC_Po ||
+	    tok == MDOC_Pq || tok == MDOC_Sy)) {
+		nw = 0;
+		for (cp = lc - 1; cp >= nch->string; cp--) {
+			if (*cp == ' ') {
+				nw++;
+				if (cp > nch->string && cp[-1] == ',')
+					cp--;
+			} else if (isalpha((unsigned int)*cp)) {
+				if (nw > 1)
+					return;
+			} else
+				break;
+		}
+	}
+
 	mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse,
 	    nch->line, nch->pos + (lc - nch->string),
-	    "%s%s %s", roff_name[mdoc->last->tok],
+	    "%s%s %s", roff_name[tok],
 	    nch == mdoc->last->child ? "" : " ...", nch->string);
 }
 
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2017-06-10 16:54 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-10 16:54 mdocml: Reduce false positives for the "no blank before trailing schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).