tech@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: Paul Eggert <eggert@cs.ucla.edu>
To: tech@mandoc.bsd.lv
Cc: g.branden.robinson@gmail.com, Ingo Schwarze <schwarze@usta.de>,
	Alejandro Colomar <alx.manpages@gmail.com>
Subject: mandoc mishandles tzfile(5)'s .IP \(bu "\w'\(bu 'u"
Date: Sun, 22 Oct 2023 17:41:28 -0700	[thread overview]
Message-ID: <160e850e-c5eb-45a9-9011-b766ade9dd10@cs.ucla.edu> (raw)
In-Reply-To: <ZTWO6/0aqdxtS6Vg@asta-kit.de>

[-- Attachment #1: Type: text/plain, Size: 781 bytes --]

On 2023-10-22 14:06, Ingo Schwarze wrote:
> mandoc only supports 
> ASCII strings as arguments to \w, not escape sequences or formatting 
> instructions.

For the TZDB man pages mandoc need not support all that, just \(bu.

Just to make sure we're on the same page, I reproduced the problem by 
running the command "mandoc -man -Tascii t.5", where t.5 contains the 
following lines:

         .TH tzfile 5
         .SH NAME
         .IP \(bu "\w'\(bu  'u"
         xxx
         .PP
         yyy

The output should contain two spaces between the bullet's "o" and the 
"x", but with current mandoc it contains five spaces.

Proposed mandoc patch attached. This isn't a perfect emulation of groff, 
nor have I tested with fancy constructs, but it should be good enough 
for tzfile(5).

[-- Attachment #2: mandoc-tzfile-fix.txt --]
[-- Type: text/plain, Size: 5541 bytes --]

Index: roff.c
===================================================================
RCS file: /cvs/mandoc/roff.c,v
retrieving revision 1.398
diff -u -r1.398 roff.c
--- roff.c	22 Oct 2023 16:02:01 -0000	1.398
+++ roff.c	22 Oct 2023 20:59:52 -0000
@@ -1367,6 +1367,7 @@
 	int		 iarg;		/* index beginning the argument */
 	int		 iendarg;	/* index right after the argument */
 	int		 iend;		/* index right after the sequence */
+	int		 icols;		/* output columns of sequence */
 	int		 isrc, idst;	/* to reduce \\ and \. in names */
 	int		 deftype;	/* type of definition to paste */
 	int		 argi;		/* macro argument index */
@@ -1404,7 +1405,7 @@
 		 */

 		if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
-		    &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
+		    &iarg, &iendarg, &iend, &icols) != ESCAPE_EXPAND) {
 			while (pos < iend) {
 				if (buf->buf[pos] == ec) {
 					buf->buf[pos] = '\\';
@@ -1552,7 +1553,7 @@
 			break;
 		case 'w':
 			(void)snprintf(ubuf, sizeof(ubuf),
-			    "%d", (iendarg - iarg) * 24);
+			    "%d", icols * 24);
 			res = ubuf;
 			break;
 		default:
@@ -4030,7 +4031,7 @@
 		if (cp[1] == '{' || cp[1] == '}')
 			break;
 		if (roff_escape(cp, 0, 0, NULL, &inam,
-		    NULL, NULL, &iend) != ESCAPE_UNDEF) {
+		    NULL, NULL, &iend, NULL) != ESCAPE_UNDEF) {
 			mandoc_msg(MANDOCERR_NAMESC, ln, pos,
 			    "%.*s%.*s", namesz, name, iend, cp);
 			cp += iend;
Index: roff_escape.c
===================================================================
RCS file: /cvs/mandoc/roff_escape.c,v
retrieving revision 1.14
diff -u -r1.14 roff_escape.c
--- roff_escape.c	8 Jun 2022 13:23:57 -0000	1.14
+++ roff_escape.c	22 Oct 2023 20:59:52 -0000
@@ -42,7 +42,7 @@
         enum mandoc_esc  rval;

         rval = roff_escape(--*rendarg, 0, 0,
-	    NULL, NULL, &iarg, &iendarg, &iend);
+	    NULL, NULL, &iarg, &iendarg, &iend, NULL);
         assert(rval != ESCAPE_EXPAND);
         if (rarg != NULL)
 	       *rarg = *rendarg + iarg;
@@ -64,14 +64,16 @@
  */
 enum mandoc_esc
 roff_escape(const char *buf, const int ln, const int aesc,
-    int *resc, int *rnam, int *rarg, int *rendarg, int *rend)
+    int *resc, int *rnam, int *rarg, int *rendarg, int *rend, int *rcols)
 {
 	int		 iesc;		/* index of leading escape char */
 	int		 inam;		/* index of escape name */
 	int		 iarg;		/* index beginning the argument */
 	int		 iendarg;	/* index right after the argument */
 	int		 iend;		/* index right after the sequence */
-	int		 sesc, snam, sarg, sendarg, send; /* for sub-escape */
+	int		 icols;		/* column width of sequence */
+	int		 sesc, snam, sarg, sendarg, send, scols;
+					/* for sub-escape */
 	int		 escterm;	/* whether term is escaped */
 	int		 maxl;		/* expected length of the argument */
 	int		 argl;		/* actual length of the argument */
@@ -98,6 +100,7 @@
 	 */

 	iarg = iendarg = iend = inam + 1;
+	icols = 0;
 	maxl = INT_MAX;
 	term = '\0';
 	err = MANDOCERR_OK;
@@ -141,11 +144,13 @@
 	case '\'':
 	case '-':
 	case '0':
-	case ':':
 	case '_':
 	case '`':
 	case 'e':
 	case '~':
+		icols++;
+		/* FALLTHROUGH */
+	case ':':
 		iarg--;
 		argl = 1;
 		rval = ESCAPE_SPECIAL;
@@ -179,6 +184,7 @@
 		break;
 	case '(':
 	case '[':
+		icols++;
 		rval = ESCAPE_SPECIAL;
 		iendarg = iend = --iarg;
 		break;
@@ -208,6 +214,7 @@
 		term = '\b';
 		break;
 	case 'C':
+		icols++;
 		rval = ESCAPE_SPECIAL;
 		term = '\b';
 		break;
@@ -224,6 +231,7 @@
 		term = '\b';
 		break;
 	case 'o':
+		icols++;
 		rval = ESCAPE_OVERSTRIKE;
 		term = '\b';
 		break;
@@ -271,7 +279,7 @@
 	if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) &&
 	    buf[iarg] == buf[iesc]) {
 		stype = roff_escape(buf, ln, iendarg,
-		    &sesc, &snam, &sarg, &sendarg, &send);
+		    &sesc, &snam, &sarg, &sendarg, &send, &scols);
 		if (stype == ESCAPE_EXPAND)
 			goto out_sub;
 	}
@@ -285,11 +293,13 @@
 			    buf[snam]) != NULL) {
 				err = MANDOCERR_ESC_DELIM;
 				iend = send;
+				icols += scols;
 				iarg = iendarg = sesc;
 				goto out;
 			}
 			escterm = 1;
 			iarg = send;
+			icols += scols;
 			term = buf[snam];
 		} else if (strchr("BDHLRSvxNhl", buf[inam]) != NULL &&
 		    strchr(" %&()*+-./0123456789:<=>", buf[iarg]) != NULL) {
@@ -347,10 +357,11 @@
 		}
 		if (buf[iendarg] == buf[iesc]) {
 			stype = roff_escape(buf, ln, iendarg,
-			    &sesc, &snam, &sarg, &sendarg, &send);
+			    &sesc, &snam, &sarg, &sendarg, &send, &scols);
 			if (stype == ESCAPE_EXPAND)
 				goto out_sub;
 			iend = send;
+			icols += scols;
 			if (escterm == 1 &&
 			    (buf[snam] == term || buf[inam] == 'N'))
 				break;
@@ -366,6 +377,8 @@
 				valid_A = 0;
 			if (maxl != INT_MAX)
 				maxl--;
+			if (term == '\'')
+				icols++;
 			iend = ++iendarg;
 		}
 	}
@@ -502,6 +515,7 @@
 	iarg = sarg;
 	iendarg = sendarg;
 	iend = send;
+	icols = scols;
 	rval = ESCAPE_EXPAND;

 out:
@@ -515,6 +529,8 @@
 		*rendarg = iendarg;
 	if (rend != NULL)
 		*rend = iend;
+	if (rcols != NULL)
+		*rcols = icols;
 	if (ln == 0)
 		return rval;

Index: roff_int.h
===================================================================
RCS file: /cvs/mandoc/roff_int.h,v
retrieving revision 1.20
diff -u -r1.20 roff_int.h
--- roff_int.h	2 Jun 2022 11:29:07 -0000	1.20
+++ roff_int.h	22 Oct 2023 20:59:52 -0000
@@ -83,7 +83,7 @@
 void		  roffhash_free(struct ohash *);

 enum mandoc_esc	  roff_escape(const char *, const int, const int,
-			int *, int *, int *, int *, int *);
+			int *, int *, int *, int *, int *, int *);
 void		  roff_state_reset(struct roff_man *);
 void		  roff_validate(struct roff_man *);

       reply	other threads:[~2023-10-23  0:41 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <884cb5d0-27ce-a5ca-b449-972021e62e92@gmail.com>
     [not found] ` <7c3294cf-e3d0-c716-d1c0-5b6c5c757d7e@cs.ucla.edu>
     [not found]   ` <7eb92df5-6c87-8384-c4a8-2a00eabf1c8e@gmail.com>
     [not found]     ` <66ef5b92-6e19-8bea-2840-6c2f0240d225@cs.ucla.edu>
     [not found]       ` <ZTWO6/0aqdxtS6Vg@asta-kit.de>
2023-10-23  0:41         ` Paul Eggert [this message]
     [not found]           ` <20231023083059.h43j6g2cse3e55en@illithid>
2023-10-23 10:33             ` Alejandro Colomar
2023-10-23 21:23           ` Ingo Schwarze
2023-10-24 19:36             ` Paul Eggert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=160e850e-c5eb-45a9-9011-b766ade9dd10@cs.ucla.edu \
    --to=eggert@cs.ucla.edu \
    --cc=alx.manpages@gmail.com \
    --cc=g.branden.robinson@gmail.com \
    --cc=schwarze@usta.de \
    --cc=tech@mandoc.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).