source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: While i already got my fingers dirty on mandoc_escape(), profit
@ 2012-05-31 22:38 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2012-05-31 22:38 UTC (permalink / raw)
  To: source

Log Message:
-----------
While i already got my fingers dirty on mandoc_escape(),
profit of the occasion to pull out some spaghetti, that is,
three confusing variables and fourteen pointless assignments
among them; instead, always operate on the official pointers
**start, **end, and *sz, each of which conveys an obvious meaning.

No functional change intended, and the new tests confirm that
everything still (err...) "works", as far as that word can be
applied to the kind of roff(7) mock-up code i'm polishing here.

"just commit" kristaps@

Modified Files:
--------------
    mdocml:
        mandoc.c

Revision Data
-------------
Index: mandoc.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.c,v
retrieving revision 1.64
retrieving revision 1.65
diff -Lmandoc.c -Lmandoc.c -u -p -r1.64 -r1.65
--- mandoc.c
+++ mandoc.c
@@ -42,20 +42,33 @@ static	char	*time2a(time_t);
 enum mandoc_esc
 mandoc_escape(const char **end, const char **start, int *sz)
 {
-	char		 c, term;
-	int		 i, rlim;
-	const char	*cp, *rstart;
+	const char	*local_start;
+	int		 local_sz;
+	char		 term;
 	enum mandoc_esc	 gly; 
 
-	cp = *end;
-	rstart = cp;
-	if (start)
-		*start = rstart;
-	i = rlim = 0;
+	/*
+	 * When the caller doesn't provide return storage,
+	 * use local storage.
+	 */
+
+	if (NULL == start)
+		start = &local_start;
+	if (NULL == sz)
+		sz = &local_sz;
+
+	/*
+	 * Beyond the backslash, at least one input character
+	 * is part of the escape sequence.  With one exception
+	 * (see below), that character won't be returned.
+	 */
+
 	gly = ESCAPE_ERROR;
+	*start = ++*end;
+	*sz = 0;
 	term = '\0';
 
-	switch ((c = cp[i++])) {
+	switch ((*start)[-1]) {
 	/*
 	 * First the glyphs.  There are several different forms of
 	 * these, but each eventually returns a substring of the glyph
@@ -63,7 +76,7 @@ mandoc_escape(const char **end, const ch
 	 */
 	case ('('):
 		gly = ESCAPE_SPECIAL;
-		rlim = 2;
+		*sz = 2;
 		break;
 	case ('['):
 		gly = ESCAPE_SPECIAL;
@@ -73,14 +86,15 @@ mandoc_escape(const char **end, const ch
 		 * Unicode codepoint.  Here, however, only check whether
 		 * it's not a zero-width escape.
 		 */
-		if ('u' == cp[i] && ']' != cp[i + 1])
+		if ('u' == (*start)[0] && ']' != (*start)[1])
 			gly = ESCAPE_UNICODE;
 		term = ']';
 		break;
 	case ('C'):
-		if ('\'' != cp[i])
+		if ('\'' != **start)
 			return(ESCAPE_ERROR);
 		gly = ESCAPE_SPECIAL;
+		*start = ++*end;
 		term = '\'';
 		break;
 
@@ -91,7 +105,6 @@ mandoc_escape(const char **end, const ch
 	 * let us just skip the next character.
 	 */
 	case ('z'):
-		(*end)++;
 		return(ESCAPE_SKIPCHAR);
 
 	/*
@@ -118,21 +131,17 @@ mandoc_escape(const char **end, const ch
 	case ('f'):
 		if (ESCAPE_ERROR == gly)
 			gly = ESCAPE_FONT;
-
-		rstart= &cp[i];
-		if (start) 
-			*start = rstart;
-
-		switch (cp[i++]) {
+		switch (**start) {
 		case ('('):
-			rlim = 2;
+			*start = ++*end;
+			*sz = 2;
 			break;
 		case ('['):
+			*start = ++*end;
 			term = ']';
 			break;
 		default:
-			rlim = 1;
-			i--;
+			*sz = 1;
 			break;
 		}
 		break;
@@ -154,9 +163,10 @@ mandoc_escape(const char **end, const ch
 	case ('X'):
 		/* FALLTHROUGH */
 	case ('Z'):
-		if ('\'' != cp[i++])
+		if ('\'' != **start)
 			return(ESCAPE_ERROR);
 		gly = ESCAPE_IGNORE;
+		*start = ++*end;
 		term = '\'';
 		break;
 
@@ -182,10 +192,11 @@ mandoc_escape(const char **end, const ch
 	case ('w'):
 		/* FALLTHROUGH */
 	case ('x'):
+		if ('\'' != **start)
+			return(ESCAPE_ERROR);
 		if (ESCAPE_ERROR == gly)
 			gly = ESCAPE_IGNORE;
-		if ('\'' != cp[i++])
-			return(ESCAPE_ERROR);
+		*start = ++*end;
 		term = '\'';
 		break;
 
@@ -194,17 +205,17 @@ mandoc_escape(const char **end, const ch
 	 * XXX Do any other escapes need similar handling?
 	 */
 	case ('N'):
-		if ('\0' == cp[i])
+		if ('\0' == **start)
 			return(ESCAPE_ERROR);
-		*end = &cp[++i];
-		if (isdigit((unsigned char)cp[i-1]))
+		(*end)++;
+		if (isdigit((unsigned char)**start)) {
+			*sz = 1;
 			return(ESCAPE_IGNORE);
+		}
+		(*start)++;
 		while (isdigit((unsigned char)**end))
 			(*end)++;
-		if (start)
-			*start = &cp[i];
-		if (sz)
-			*sz = *end - &cp[i];
+		*sz = *end - *start;
 		if ('\0' != **end)
 			(*end)++;
 		return(ESCAPE_NUMBERED);
@@ -215,54 +226,43 @@ mandoc_escape(const char **end, const ch
 	case ('s'):
 		gly = ESCAPE_IGNORE;
 
-		rstart = &cp[i];
-		if (start) 
-			*start = rstart;
-
 		/* See +/- counts as a sign. */
-		c = cp[i];
-		if ('+' == c || '-' == c || ASCII_HYPH == c)
-			++i;
+		if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
+			(*end)++;
 
-		switch (cp[i++]) {
+		switch (**end) {
 		case ('('):
-			rlim = 2;
+			*start = ++*end;
+			*sz = 2;
 			break;
 		case ('['):
+			*start = ++*end;
 			term = ']';
 			break;
 		case ('\''):
+			*start = ++*end;
 			term = '\'';
 			break;
 		default:
-			rlim = 1;
-			i--;
+			*sz = 1;
 			break;
 		}
 
-		/* See +/- counts as a sign. */
-		c = cp[i];
-		if ('+' == c || '-' == c || ASCII_HYPH == c)
-			++i;
-
 		break;
 
 	/*
 	 * Anything else is assumed to be a glyph.
+	 * In this case, pass back the character after the backslash.
 	 */
 	default:
 		gly = ESCAPE_SPECIAL;
-		rlim = 1;
-		i--;
+		*start = --*end;
+		*sz = 1;
 		break;
 	}
 
 	assert(ESCAPE_ERROR != gly);
 
-	*end = rstart = &cp[i];
-	if (start)
-		*start = rstart;
-
 	/*
 	 * Read up to the terminating character,
 	 * paying attention to nested escapes.
@@ -284,15 +284,13 @@ mandoc_escape(const char **end, const ch
 				break;
 			}
 		}
-		rlim = (*end)++ - rstart;
+		*sz = (*end)++ - *start;
 	} else {
-		assert(rlim > 0);
-		if ((size_t)rlim > strlen(rstart))
+		assert(*sz > 0);
+		if ((size_t)*sz > strlen(*start))
 			return(ESCAPE_ERROR);
-		*end += rlim;
+		*end += *sz;
 	}
-	if (sz)
-		*sz = rlim;
 
 	/* Run post-processors. */
 
@@ -302,12 +300,13 @@ mandoc_escape(const char **end, const ch
 		 * Pretend that the constant-width font modes are the
 		 * same as the regular font modes.
 		 */
-		if (2 == rlim && 'C' == *rstart)
-			rstart++;
-		else if (1 != rlim)
+		if (2 == *sz && 'C' == **start) {
+			(*start)++;
+			(*sz)--;
+		} else if (1 != *sz)
 			break;
 
-		switch (*rstart) {
+		switch (**start) {
 		case ('3'):
 			/* FALLTHROUGH */
 		case ('B'):
@@ -329,9 +328,7 @@ mandoc_escape(const char **end, const ch
 		}
 		break;
 	case (ESCAPE_SPECIAL):
-		if (1 != rlim)
-			break;
-		if ('c' == *rstart)
+		if (1 == *sz && 'c' == **start)
 			gly = ESCAPE_NOSPACE;
 		break;
 	default:
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2012-05-31 22:38 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-31 22:38 mdocml: While i already got my fingers dirty on mandoc_escape(), profit schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).