source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Strip non-graphable input characters from input.
@ 2010-07-20 14:56 kristaps
  0 siblings, 0 replies; only message in thread
From: kristaps @ 2010-07-20 14:56 UTC (permalink / raw)
  To: source

Log Message:
-----------
Strip non-graphable input characters from input.  The manuals
specifically say that this is not allowed, and were it allowed, output
would be inconsistent across output media (-Tps will puke,
non-your-charset terminals will puke, etc.).

With this done, simplify check_text() to only check escapes and for
tabs.  Add in a new tab warning, too.

Modified Files:
--------------
    mdocml:
        main.c
        man_validate.c
        mandoc.h
        mdoc_validate.c

Revision Data
-------------
Index: mdoc_validate.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mdoc_validate.c,v
retrieving revision 1.111
retrieving revision 1.112
diff -Lmdoc_validate.c -Lmdoc_validate.c -u -p -r1.111 -r1.112
--- mdoc_validate.c
+++ mdoc_validate.c
@@ -453,26 +453,29 @@ check_argv(struct mdoc *m, struct mdoc_n
 
 
 static int
-check_text(struct mdoc *mdoc, int line, int pos, char *p)
+check_text(struct mdoc *m, int ln, int pos, char *p)
 {
 	int		 c;
-
-	/* 
-	 * FIXME: we absolutely cannot let \b get through or it will
-	 * destroy some assumptions in terms of format.
-	 */
+	size_t		 sz;
 
 	for ( ; *p; p++, pos++) {
+		sz = strcspn(p, "\t\\");
+		p += (int)sz;
+
+		if ('\0' == *p)
+			break;
+
+		pos += (int)sz;
+
 		if ('\t' == *p) {
-			if ( ! (MDOC_LITERAL & mdoc->flags))
-				if ( ! mdoc_pmsg(mdoc, line, pos, MANDOCERR_BADCHAR))
-					return(0);
-		} else if ( ! isprint((u_char)*p) && ASCII_HYPH != *p)
-			if ( ! mdoc_pmsg(mdoc, line, pos, MANDOCERR_BADCHAR))
-				return(0);
+			if (MDOC_LITERAL & m->flags)
+				continue;
+			if (mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB))
+				continue;
+			return(0);
+		}
 
-		if ('\\' != *p)
-			continue;
+		/* Check the special character. */
 
 		c = mandoc_special(p);
 		if (c) {
@@ -481,8 +484,8 @@ check_text(struct mdoc *mdoc, int line, 
 			continue;
 		}
 
-		c = mdoc_pmsg(mdoc, line, pos, MANDOCERR_BADESCAPE);
-		if ( ! (MDOC_IGN_ESCAPE & mdoc->pflags) && ! c)
+		c = mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE);
+		if ( ! (MDOC_IGN_ESCAPE & m->pflags) && ! c)
 			return(c);
 	}
 
@@ -490,8 +493,6 @@ check_text(struct mdoc *mdoc, int line, 
 }
 
 
-
-
 static int
 check_parent(PRE_ARGS, enum mdoct tok, enum mdoc_type t)
 {
@@ -507,7 +508,6 @@ check_parent(PRE_ARGS, enum mdoct tok, e
 					mdoc_macronames[tok]);
 	return(0);
 }
-
 
 
 static int
Index: mandoc.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.h,v
retrieving revision 1.16
retrieving revision 1.17
diff -Lmandoc.h -Lmandoc.h -u -p -r1.16 -r1.17
--- mandoc.h
+++ mandoc.h
@@ -39,6 +39,7 @@ enum	mandocerr {
 	MANDOCERR_LISTFIRST, /* list type must come first */
 	MANDOCERR_BADSTANDARD, /* bad standard */
 	MANDOCERR_BADLIB, /* bad library */
+	MANDOCERR_BADTAB, /* tab in non-literal context */
 	MANDOCERR_BADESCAPE, /* bad escape sequence */
 	MANDOCERR_BADQUOTE, /* unterminated quoted string */
 	MANDOCERR_NOWIDTHARG, /* argument requires the width argument */
Index: man_validate.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/man_validate.c,v
retrieving revision 1.45
retrieving revision 1.46
diff -Lman_validate.c -Lman_validate.c -u -p -r1.45 -r1.46
--- man_validate.c
+++ man_validate.c
@@ -26,6 +26,7 @@
 #include <limits.h>
 #include <stdarg.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "mandoc.h"
 #include "libman.h"
@@ -206,32 +207,37 @@ check_text(CHKARGS) 
 {
 	char		*p;
 	int		 pos, c;
-
-	assert(n->string);
+	size_t		 sz;
 
 	for (p = n->string, pos = n->pos + 1; *p; p++, pos++) {
-		if ('\\' == *p) {
-			c = mandoc_special(p);
-			if (c) {
-				p += c - 1;
-				pos += c - 1;
-				continue;
-			}
+		sz = strcspn(p, "\t\\");
+		p += (int)sz;
+
+		if ('\0' == *p)
+			break;
 
-			c = man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE);
-			if ( ! (MAN_IGN_ESCAPE & m->pflags) && ! c)
-				return(c);
+		pos += (int)sz;
+
+		if ('\t' == *p) {
+			if (MAN_LITERAL & m->flags)
+				continue;
+			if (man_pmsg(m, n->line, pos, MANDOCERR_BADTAB))
+				continue;
+			return(0);
 		}
 
-		/* 
-		 * FIXME: we absolutely cannot let \b get through or it
-		 * will destroy some assumptions in terms of format.
-	 	 */
+		/* Check the special character. */
 
-		if ('\t' == *p || isprint((u_char)*p) || ASCII_HYPH == *p) 
+		c = mandoc_special(p);
+		if (c) {
+			p += c - 1;
+			pos += c - 1;
 			continue;
-		if ( ! man_pmsg(m, n->line, pos, MANDOCERR_BADCHAR))
-			return(0);
+		}
+
+		c = man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE);
+		if ( ! (MAN_IGN_ESCAPE & m->pflags) && ! c)
+			return(c);
 	}
 
 	return(1);
Index: main.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/main.c,v
retrieving revision 1.98
retrieving revision 1.99
diff -Lmain.c -Lmain.c -u -p -r1.98 -r1.99
--- main.c
+++ main.c
@@ -23,6 +23,7 @@
 #include <sys/stat.h>
 
 #include <assert.h>
+#include <ctype.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -110,6 +111,7 @@ static	const char * const	mandocerrs[MAN
 	"list type must come first",
 	"bad standard",
 	"bad library",
+	"tab in non-literal context",
 	"bad escape sequence",
 	"unterminated quoted string",
 	"argument requires the width argument",
@@ -491,6 +493,26 @@ fdesc(struct curparse *curp)
 				++lnn;
 				break;
 			}
+
+			/* 
+			 * Warn about bogus characters.  If you're using
+			 * non-ASCII encoding, you're screwing your
+			 * readers.  Since I'd rather this not happen,
+			 * I'll be helpful and drop these characters so
+			 * we don't display gibberish.  Note to manual
+			 * writers: use special characters.
+			 */
+
+			if ( ! isgraph((u_char)blk.buf[i]) &&
+					! isblank((u_char)blk.buf[i])) {
+				if ( ! mmsg(MANDOCERR_BADCHAR, curp, 
+						lnn_start, pos, 
+						"ignoring byte"))
+					goto bailout;
+				i++;
+				continue;
+			}
+
 			/* Trailing backslash is like a plain character. */
 			if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
 				if (pos >= (int)ln.sz)
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2010-07-20 14:56 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-07-20 14:56 mdocml: Strip non-graphable input characters from input kristaps

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).