From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from mailout.scc.kit.edu (mailout.scc.kit.edu [129.13.185.202])
	by krisdoz.my.domain (8.14.5/8.14.5) with ESMTP id q4SD9C1L020142
	for <tech@mdocml.bsd.lv>; Mon, 28 May 2012 09:09:19 -0400 (EDT)
Received: from hekate.usta.de (asta-nat.asta.uni-karlsruhe.de [172.22.63.82])
	by scc-mailout-02.scc.kit.edu with esmtp (Exim 4.72 #1)
	id 1SYzh8-00072a-TI; Mon, 28 May 2012 15:09:10 +0200
Received: from donnerwolke.usta.de ([172.24.96.3])
	by hekate.usta.de with esmtp (Exim 4.77)
	(envelope-from <schwarze@usta.de>)
	id 1SYzh9-00050o-0P
	for tech@mdocml.bsd.lv; Mon, 28 May 2012 15:09:11 +0200
Received: from iris.usta.de ([172.24.96.5] helo=usta.de)
	by donnerwolke.usta.de with esmtp (Exim 4.72)
	(envelope-from <schwarze@usta.de>)
	id 1SYzh8-00072t-TT
	for tech@mdocml.bsd.lv; Mon, 28 May 2012 15:09:10 +0200
Received: from schwarze by usta.de with local (Exim 4.77)
	(envelope-from <schwarze@usta.de>)
	id 1SYzh8-0006V7-Ld
	for tech@mdocml.bsd.lv; Mon, 28 May 2012 15:09:10 +0200
Date: Mon, 28 May 2012 15:09:10 +0200
From: Ingo Schwarze <schwarze@usta.de>
To: tech@mdocml.bsd.lv
Subject: implement the roff \z escape sequence
Message-ID: <20120528130910.GA26820@iris.usta.de>
X-Mailinglist: mdocml-tech
Reply-To: tech@mdocml.bsd.lv
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
User-Agent: Mutt/1.5.21 (2010-09-15)

----- Forwarded message from Ingo Schwarze <schwarze@cvs.openbsd.org> -----

From: Ingo Schwarze <schwarze@cvs.openbsd.org>
Date: Mon, 28 May 2012 07:00:51 -0600 (MDT)
To: source-changes@cvs.openbsd.org

CVSROOT:	/cvs
Module name:	src
Changes by:	schwarze@cvs.openbsd.org	2012/05/28 07:00:51

Modified files:
	usr.bin/mandoc : html.c html.h mandoc.c mandoc.h term.c term.h 
	regress/usr.bin/mandoc/roff: Makefile 
Added files:
	regress/usr.bin/mandoc/roff/esc: Makefile z.in z.out_ascii 

Log message:
Implement the roff \z escape sequence, intended to output the next
character without advancing the cursor position; implement it to
simply skip the next character, as it will usually be overwritten.

With this change, the pod2man(1) preamble user-defined string \*:,
intended to render as a diaeresis or umlaut diacritic above the
preceding character, is rendered in a slightly less ugly way,
though still not correctly.  It was rendered as "z.." and is now
rendered as ".".

Given that the definition of \*: uses elaborate manual \h positioning,
there is little chance for mandoc(1) to ever render it correctly,
but at least we can refrain from printing out a spurious "z", and
we can make the \z do something semi-reasonable for easier cases.

----- End forwarded message -----

Index: html.c
===================================================================
RCS file: /cvs/src/usr.bin/mandoc/html.c,v
retrieving revision 1.29
diff -u -p -r1.29 html.c
--- html.c	9 Oct 2011 17:59:56 -0000	1.29
+++ html.c	28 May 2012 12:33:53 -0000
@@ -1,7 +1,7 @@
 /*	$Id: html.c,v 1.29 2011/10/09 17:59:56 schwarze Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -258,8 +258,8 @@ print_metaf(struct html *h, enum mandoc_
 int
 html_strlen(const char *cp)
 {
-	int		 ssz, sz;
-	const char	*seq, *p;
+	size_t		 rsz;
+	int		 skip, sz;
 
 	/*
 	 * Account for escaped sequences within string length
@@ -270,10 +270,21 @@ html_strlen(const char *cp)
 	 */
 
 	sz = 0;
-	while (NULL != (p = strchr(cp, '\\'))) {
-		sz += (int)(p - cp);
-		++cp;
-		switch (mandoc_escape(&cp, &seq, &ssz)) {
+	skip = 0;
+	while (1) {
+		rsz = strcspn(cp, "\\");
+		if (rsz) {
+			cp += rsz;
+			if (skip) {
+				skip = 0;
+				rsz--;
+			}
+			sz += rsz;
+		}
+		if ('\0' == *cp)
+			break;
+		cp++;
+		switch (mandoc_escape(&cp, NULL, NULL)) {
 		case (ESCAPE_ERROR):
 			return(sz);
 		case (ESCAPE_UNICODE):
@@ -281,15 +292,19 @@ html_strlen(const char *cp)
 		case (ESCAPE_NUMBERED):
 			/* FALLTHROUGH */
 		case (ESCAPE_SPECIAL):
-			sz++;
+			if (skip)
+				skip = 0;
+			else
+				sz++;
+			break;
+		case (ESCAPE_SKIPCHAR):
+			skip = 1;
 			break;
 		default:
 			break;
 		}
 	}
-
-	assert(sz >= 0);
-	return(sz + strlen(cp));
+	return(sz);
 }
 
 static int
@@ -304,6 +319,12 @@ print_encode(struct html *h, const char 
 	nospace = 0;
 
 	while ('\0' != *p) {
+		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
+			h->flags &= ~HTML_SKIPCHAR;
+			p++;
+			continue;
+		}
+
 		sz = strcspn(p, rejs);
 
 		fwrite(p, 1, sz, stdout);
@@ -334,6 +355,31 @@ print_encode(struct html *h, const char 
 			break;
 
 		switch (esc) {
+		case (ESCAPE_FONT):
+			/* FALLTHROUGH */
+		case (ESCAPE_FONTPREV):
+			/* FALLTHROUGH */
+		case (ESCAPE_FONTBOLD):
+			/* FALLTHROUGH */
+		case (ESCAPE_FONTITALIC):
+			/* FALLTHROUGH */
+		case (ESCAPE_FONTROMAN):
+			if (0 == norecurse)
+				print_metaf(h, esc);
+			continue;
+		case (ESCAPE_SKIPCHAR):
+			h->flags |= HTML_SKIPCHAR;
+			continue;
+		default:
+			break;
+		}
+
+		if (h->flags & HTML_SKIPCHAR) {
+			h->flags &= ~HTML_SKIPCHAR;
+			continue;
+		}
+
+		switch (esc) {
 		case (ESCAPE_UNICODE):
 			/* Skip passed "u" header. */
 			c = mchars_num2uc(seq + 1, len - 1);
@@ -351,19 +397,6 @@ print_encode(struct html *h, const char 
 				printf("&#%d;", c);
 			else if (-1 == c && 1 == len)
 				putchar((int)*seq);
-			break;
-		case (ESCAPE_FONT):
-			/* FALLTHROUGH */
-		case (ESCAPE_FONTPREV):
-			/* FALLTHROUGH */
-		case (ESCAPE_FONTBOLD):
-			/* FALLTHROUGH */
-		case (ESCAPE_FONTITALIC):
-			/* FALLTHROUGH */
-		case (ESCAPE_FONTROMAN):
-			if (norecurse)
-				break;
-			print_metaf(h, esc);
 			break;
 		case (ESCAPE_NOSPACE):
 			if ('\0' == *p)
Index: html.h
===================================================================
RCS file: /cvs/src/usr.bin/mandoc/html.h,v
retrieving revision 1.18
diff -u -p -r1.18 html.h
--- html.h	9 Oct 2011 17:59:56 -0000	1.18
+++ html.h	28 May 2012 12:33:53 -0000
@@ -117,6 +117,7 @@ struct	html {
 #define	HTML_PREKEEP	 (1 << 3)
 #define	HTML_NONOSPACE	 (1 << 4) /* never add spaces */
 #define	HTML_LITERAL	 (1 << 5) /* literal (e.g., <PRE>) context */
+#define	HTML_SKIPCHAR	 (1 << 6) /* skip the next character */
 	struct tagq	  tags; /* stack of open tags */
 	struct rofftbl	  tbl; /* current table */
 	struct tag	 *tblt; /* current open table scope */
Index: mandoc.c
===================================================================
RCS file: /cvs/src/usr.bin/mandoc/mandoc.c,v
retrieving revision 1.31
diff -u -p -r1.31 mandoc.c
--- mandoc.c	17 Nov 2011 11:58:11 -0000	1.31
+++ mandoc.c	28 May 2012 12:33:54 -0000
@@ -1,7 +1,7 @@
 /*	$Id: mandoc.c,v 1.31 2011/11/17 11:58:11 schwarze Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -137,6 +137,16 @@ mandoc_escape(const char **end, const ch
 		gly = ESCAPE_SPECIAL;
 		term = '\'';
 		break;
+
+	/*
+	 * The \z escape is supposed to output the following
+	 * character without advancing the cursor position.  
+	 * Since we are mostly dealing with terminal mode,
+	 * let us just skip the next character.
+	 */
+	case ('z'):
+		(*end)++;
+		return(ESCAPE_SKIPCHAR);
 
 	/*
 	 * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
Index: mandoc.h
===================================================================
RCS file: /cvs/src/usr.bin/mandoc/mandoc.h,v
retrieving revision 1.45
diff -u -p -r1.45 mandoc.h
--- mandoc.h	26 May 2012 20:03:34 -0000	1.45
+++ mandoc.h	28 May 2012 12:33:54 -0000
@@ -379,7 +379,8 @@ enum	mandoc_esc {
 	ESCAPE_FONTPREV, /* previous font mode */
 	ESCAPE_NUMBERED, /* a numbered glyph */
 	ESCAPE_UNICODE, /* a unicode codepoint */
-	ESCAPE_NOSPACE /* suppress space if the last on a line */
+	ESCAPE_NOSPACE, /* suppress space if the last on a line */
+	ESCAPE_SKIPCHAR /* skip the next character */
 };
 
 typedef	void	(*mandocmsg)(enum mandocerr, enum mandoclevel,
Index: term.c
===================================================================
RCS file: /cvs/src/usr.bin/mandoc/term.c,v
retrieving revision 1.63
diff -u -p -r1.63 term.c
--- term.c	27 May 2012 01:01:24 -0000	1.63
+++ term.c	28 May 2012 12:33:54 -0000
@@ -29,6 +29,7 @@
 #include "term.h"
 #include "main.h"
 
+static	size_t		 cond_width(const struct termp *, int, int *);
 static	void		 adjbuf(struct termp *p, int);
 static	void		 bufferc(struct termp *, char);
 static	void		 encode(struct termp *, const char *, size_t);
@@ -415,12 +416,17 @@ term_word(struct termp *p, const char *w
 	p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
 
 	while ('\0' != *word) {
-		if ((ssz = strcspn(word, "\\")) > 0)
+		if ('\\' != *word) {
+			if (TERMP_SKIPCHAR & p->flags) {
+				p->flags &= ~TERMP_SKIPCHAR;
+				word++;
+				continue;
+			}
+			ssz = strcspn(word, "\\");
 			encode(p, word, ssz);
-
-		word += (int)ssz;
-		if ('\\' != *word)
+			word += (int)ssz;
 			continue;
+		}
 
 		word++;
 		esc = mandoc_escape(&word, &seq, &sz);
@@ -476,9 +482,14 @@ term_word(struct termp *p, const char *w
 			term_fontlast(p);
 			break;
 		case (ESCAPE_NOSPACE):
-			if ('\0' == *word)
+			if (TERMP_SKIPCHAR & p->flags)
+				p->flags &= ~TERMP_SKIPCHAR;
+			else if ('\0' == *word)
 				p->flags |= TERMP_NOSPACE;
 			break;
+		case (ESCAPE_SKIPCHAR):
+			p->flags |= TERMP_SKIPCHAR;
+			break;
 		default:
 			break;
 		}
@@ -518,6 +529,11 @@ encode1(struct termp *p, int c)
 {
 	enum termfont	  f;
 
+	if (TERMP_SKIPCHAR & p->flags) {
+		p->flags &= ~TERMP_SKIPCHAR;
+		return;
+	}
+
 	if (p->col + 4 >= p->maxcols)
 		adjbuf(p, p->col + 4);
 
@@ -541,6 +557,11 @@ encode(struct termp *p, const char *word
 	enum termfont	  f;
 	int		  i, len;
 
+	if (TERMP_SKIPCHAR & p->flags) {
+		p->flags &= ~TERMP_SKIPCHAR;
+		return;
+	}
+
 	/* LINTED */
 	len = sz;
 
@@ -589,12 +610,22 @@ term_len(const struct termp *p, size_t s
 	return((*p->width)(p, ' ') * sz);
 }
 
+static size_t
+cond_width(const struct termp *p, int c, int *skip)
+{
+
+	if (*skip) {
+		(*skip) = 0;
+		return(0);
+	} else
+		return((*p->width)(p, c));
+}
 
 size_t
 term_strlen(const struct termp *p, const char *cp)
 {
 	size_t		 sz, rsz, i;
-	int		 ssz, c;
+	int		 ssz, skip, c;
 	const char	*seq, *rhs;
 	enum mandoc_esc	 esc;
 	static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };
@@ -606,10 +637,11 @@ term_strlen(const struct termp *p, const
 	 */
 
 	sz = 0;
+	skip = 0;
 	while ('\0' != *cp) {
 		rsz = strcspn(cp, rej);
 		for (i = 0; i < rsz; i++)
-			sz += (*p->width)(p, *cp++);
+			sz += cond_width(p, *cp++, &skip);
 
 		c = 0;
 		switch (*cp) {
@@ -626,14 +658,14 @@ term_strlen(const struct termp *p, const
 						(seq + 1, ssz - 1);
 					if ('\0' == c)
 						break;
-					sz += (*p->width)(p, c);
+					sz += cond_width(p, c, &skip);
 					continue;
 				case (ESCAPE_SPECIAL):
 					c = mchars_spec2cp
 						(p->symtab, seq, ssz);
 					if (c <= 0)
 						break;
-					sz += (*p->width)(p, c);
+					sz += cond_width(p, c, &skip);
 					continue;
 				default:
 					break;
@@ -643,12 +675,12 @@ term_strlen(const struct termp *p, const
 
 			switch (esc) {
 			case (ESCAPE_UNICODE):
-				sz += (*p->width)(p, '?');
+				sz += cond_width(p, '?', &skip);
 				break;
 			case (ESCAPE_NUMBERED):
 				c = mchars_num2char(seq, ssz);
 				if ('\0' != c)
-					sz += (*p->width)(p, c);
+					sz += cond_width(p, c, &skip);
 				break;
 			case (ESCAPE_SPECIAL):
 				rhs = mchars_spec2str
@@ -660,6 +692,9 @@ term_strlen(const struct termp *p, const
 				rhs = seq;
 				rsz = ssz;
 				break;
+			case (ESCAPE_SKIPCHAR):
+				skip = 1;
+				break;
 			default:
 				break;
 			}
@@ -667,15 +702,20 @@ term_strlen(const struct termp *p, const
 			if (NULL == rhs)
 				break;
 
+			if (skip) {
+				skip = 0;
+				break;
+			}
+
 			for (i = 0; i < rsz; i++)
 				sz += (*p->width)(p, *rhs++);
 			break;
 		case (ASCII_NBRSP):
-			sz += (*p->width)(p, ' ');
+			sz += cond_width(p, ' ', &skip);
 			cp++;
 			break;
 		case (ASCII_HYPH):
-			sz += (*p->width)(p, '-');
+			sz += cond_width(p, '-', &skip);
 			cp++;
 			break;
 		default:
Index: term.h
===================================================================
RCS file: /cvs/src/usr.bin/mandoc/term.h,v
retrieving revision 1.35
diff -u -p -r1.35 term.h
--- term.h	27 May 2012 01:01:24 -0000	1.35
+++ term.h	28 May 2012 12:33:54 -0000
@@ -78,6 +78,7 @@ struct	termp {
 #define	TERMP_ANPREC	 (1 << 13)	/* See termp_an_pre(). */
 #define	TERMP_KEEP	 (1 << 14)	/* Keep words together. */
 #define	TERMP_PREKEEP	 (1 << 15)	/* ...starting with the next one. */
+#define	TERMP_SKIPCHAR	 (1 << 16)	/* Skip the next character. */
 	int		 *buf;		/* Output buffer. */
 	enum termenc	  enc;		/* Type of encoding. */
 	struct mchars	 *symtab;	/* Encoded-symbol table. */

--
 To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv