From: Paul Eggert <eggert@cs.ucla.edu>
To: tech@mandoc.bsd.lv
Cc: g.branden.robinson@gmail.com, Ingo Schwarze <schwarze@usta.de>,
Alejandro Colomar <alx.manpages@gmail.com>
Subject: mandoc mishandles tzfile(5)'s .IP \(bu "\w'\(bu 'u"
Date: Sun, 22 Oct 2023 17:41:28 -0700 [thread overview]
Message-ID: <160e850e-c5eb-45a9-9011-b766ade9dd10@cs.ucla.edu> (raw)
In-Reply-To: <ZTWO6/0aqdxtS6Vg@asta-kit.de>
[-- Attachment #1: Type: text/plain, Size: 781 bytes --]
On 2023-10-22 14:06, Ingo Schwarze wrote:
> mandoc only supports
> ASCII strings as arguments to \w, not escape sequences or formatting
> instructions.
For the TZDB man pages mandoc need not support all that, just \(bu.
Just to make sure we're on the same page, I reproduced the problem by
running the command "mandoc -man -Tascii t.5", where t.5 contains the
following lines:
.TH tzfile 5
.SH NAME
.IP \(bu "\w'\(bu 'u"
xxx
.PP
yyy
The output should contain two spaces between the bullet's "o" and the
"x", but with current mandoc it contains five spaces.
Proposed mandoc patch attached. This isn't a perfect emulation of groff,
nor have I tested with fancy constructs, but it should be good enough
for tzfile(5).
[-- Attachment #2: mandoc-tzfile-fix.txt --]
[-- Type: text/plain, Size: 5541 bytes --]
Index: roff.c
===================================================================
RCS file: /cvs/mandoc/roff.c,v
retrieving revision 1.398
diff -u -r1.398 roff.c
--- roff.c 22 Oct 2023 16:02:01 -0000 1.398
+++ roff.c 22 Oct 2023 20:59:52 -0000
@@ -1367,6 +1367,7 @@
int iarg; /* index beginning the argument */
int iendarg; /* index right after the argument */
int iend; /* index right after the sequence */
+ int icols; /* output columns of sequence */
int isrc, idst; /* to reduce \\ and \. in names */
int deftype; /* type of definition to paste */
int argi; /* macro argument index */
@@ -1404,7 +1405,7 @@
*/
if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
- &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
+ &iarg, &iendarg, &iend, &icols) != ESCAPE_EXPAND) {
while (pos < iend) {
if (buf->buf[pos] == ec) {
buf->buf[pos] = '\\';
@@ -1552,7 +1553,7 @@
break;
case 'w':
(void)snprintf(ubuf, sizeof(ubuf),
- "%d", (iendarg - iarg) * 24);
+ "%d", icols * 24);
res = ubuf;
break;
default:
@@ -4030,7 +4031,7 @@
if (cp[1] == '{' || cp[1] == '}')
break;
if (roff_escape(cp, 0, 0, NULL, &inam,
- NULL, NULL, &iend) != ESCAPE_UNDEF) {
+ NULL, NULL, &iend, NULL) != ESCAPE_UNDEF) {
mandoc_msg(MANDOCERR_NAMESC, ln, pos,
"%.*s%.*s", namesz, name, iend, cp);
cp += iend;
Index: roff_escape.c
===================================================================
RCS file: /cvs/mandoc/roff_escape.c,v
retrieving revision 1.14
diff -u -r1.14 roff_escape.c
--- roff_escape.c 8 Jun 2022 13:23:57 -0000 1.14
+++ roff_escape.c 22 Oct 2023 20:59:52 -0000
@@ -42,7 +42,7 @@
enum mandoc_esc rval;
rval = roff_escape(--*rendarg, 0, 0,
- NULL, NULL, &iarg, &iendarg, &iend);
+ NULL, NULL, &iarg, &iendarg, &iend, NULL);
assert(rval != ESCAPE_EXPAND);
if (rarg != NULL)
*rarg = *rendarg + iarg;
@@ -64,14 +64,16 @@
*/
enum mandoc_esc
roff_escape(const char *buf, const int ln, const int aesc,
- int *resc, int *rnam, int *rarg, int *rendarg, int *rend)
+ int *resc, int *rnam, int *rarg, int *rendarg, int *rend, int *rcols)
{
int iesc; /* index of leading escape char */
int inam; /* index of escape name */
int iarg; /* index beginning the argument */
int iendarg; /* index right after the argument */
int iend; /* index right after the sequence */
- int sesc, snam, sarg, sendarg, send; /* for sub-escape */
+ int icols; /* column width of sequence */
+ int sesc, snam, sarg, sendarg, send, scols;
+ /* for sub-escape */
int escterm; /* whether term is escaped */
int maxl; /* expected length of the argument */
int argl; /* actual length of the argument */
@@ -98,6 +100,7 @@
*/
iarg = iendarg = iend = inam + 1;
+ icols = 0;
maxl = INT_MAX;
term = '\0';
err = MANDOCERR_OK;
@@ -141,11 +144,13 @@
case '\'':
case '-':
case '0':
- case ':':
case '_':
case '`':
case 'e':
case '~':
+ icols++;
+ /* FALLTHROUGH */
+ case ':':
iarg--;
argl = 1;
rval = ESCAPE_SPECIAL;
@@ -179,6 +184,7 @@
break;
case '(':
case '[':
+ icols++;
rval = ESCAPE_SPECIAL;
iendarg = iend = --iarg;
break;
@@ -208,6 +214,7 @@
term = '\b';
break;
case 'C':
+ icols++;
rval = ESCAPE_SPECIAL;
term = '\b';
break;
@@ -224,6 +231,7 @@
term = '\b';
break;
case 'o':
+ icols++;
rval = ESCAPE_OVERSTRIKE;
term = '\b';
break;
@@ -271,7 +279,7 @@
if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) &&
buf[iarg] == buf[iesc]) {
stype = roff_escape(buf, ln, iendarg,
- &sesc, &snam, &sarg, &sendarg, &send);
+ &sesc, &snam, &sarg, &sendarg, &send, &scols);
if (stype == ESCAPE_EXPAND)
goto out_sub;
}
@@ -285,11 +293,13 @@
buf[snam]) != NULL) {
err = MANDOCERR_ESC_DELIM;
iend = send;
+ icols += scols;
iarg = iendarg = sesc;
goto out;
}
escterm = 1;
iarg = send;
+ icols += scols;
term = buf[snam];
} else if (strchr("BDHLRSvxNhl", buf[inam]) != NULL &&
strchr(" %&()*+-./0123456789:<=>", buf[iarg]) != NULL) {
@@ -347,10 +357,11 @@
}
if (buf[iendarg] == buf[iesc]) {
stype = roff_escape(buf, ln, iendarg,
- &sesc, &snam, &sarg, &sendarg, &send);
+ &sesc, &snam, &sarg, &sendarg, &send, &scols);
if (stype == ESCAPE_EXPAND)
goto out_sub;
iend = send;
+ icols += scols;
if (escterm == 1 &&
(buf[snam] == term || buf[inam] == 'N'))
break;
@@ -366,6 +377,8 @@
valid_A = 0;
if (maxl != INT_MAX)
maxl--;
+ if (term == '\'')
+ icols++;
iend = ++iendarg;
}
}
@@ -502,6 +515,7 @@
iarg = sarg;
iendarg = sendarg;
iend = send;
+ icols = scols;
rval = ESCAPE_EXPAND;
out:
@@ -515,6 +529,8 @@
*rendarg = iendarg;
if (rend != NULL)
*rend = iend;
+ if (rcols != NULL)
+ *rcols = icols;
if (ln == 0)
return rval;
Index: roff_int.h
===================================================================
RCS file: /cvs/mandoc/roff_int.h,v
retrieving revision 1.20
diff -u -r1.20 roff_int.h
--- roff_int.h 2 Jun 2022 11:29:07 -0000 1.20
+++ roff_int.h 22 Oct 2023 20:59:52 -0000
@@ -83,7 +83,7 @@
void roffhash_free(struct ohash *);
enum mandoc_esc roff_escape(const char *, const int, const int,
- int *, int *, int *, int *, int *);
+ int *, int *, int *, int *, int *, int *);
void roff_state_reset(struct roff_man *);
void roff_validate(struct roff_man *);
next parent reply other threads:[~2023-10-23 0:41 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <884cb5d0-27ce-a5ca-b449-972021e62e92@gmail.com>
[not found] ` <7c3294cf-e3d0-c716-d1c0-5b6c5c757d7e@cs.ucla.edu>
[not found] ` <7eb92df5-6c87-8384-c4a8-2a00eabf1c8e@gmail.com>
[not found] ` <66ef5b92-6e19-8bea-2840-6c2f0240d225@cs.ucla.edu>
[not found] ` <ZTWO6/0aqdxtS6Vg@asta-kit.de>
2023-10-23 0:41 ` Paul Eggert [this message]
[not found] ` <20231023083059.h43j6g2cse3e55en@illithid>
2023-10-23 10:33 ` Alejandro Colomar
2023-10-23 21:23 ` Ingo Schwarze
2023-10-24 19:36 ` Paul Eggert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=160e850e-c5eb-45a9-9011-b766ade9dd10@cs.ucla.edu \
--to=eggert@cs.ucla.edu \
--cc=alx.manpages@gmail.com \
--cc=g.branden.robinson@gmail.com \
--cc=schwarze@usta.de \
--cc=tech@mandoc.bsd.lv \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).