Log Message: ----------- Distinguish between escape sequences that produce no output whatsoever (for example \fR) and escape sequences that produce invisible zero-width output (for example \&). No, i'm not joking, groff does make that distinction, and it has consequences in some situations, for example for vertical spacing in no-fill mode. Heirloom and Plan 9 behaviour is subtly different, but in case of doubt, we want to follow groff. While this fixes the behaviour for the majority of escape sequences, in particular for those most likely to occur in practice, it is not perfect yet because some of the more exotic ESCAPE_IGNORE sequences are actually of the "no output whatsoever" type but treated as "invisible zero-width" for now. With the new ASCII_NBRZW mechanism in place, switching them over one by one when the need arises will no longer be very difficult. Modified Files: -------------- mandoc: mandoc.h term.c term_ascii.c mandoc/regress/mdoc/Bd: blank.in blank.out_ascii blank.out_lint blank.out_markdown Revision Data ------------- Index: mandoc.h =================================================================== RCS file: /home/cvs/mandoc/mandoc/mandoc.h,v retrieving revision 1.279 retrieving revision 1.280 diff -Lmandoc.h -Lmandoc.h -u -p -r1.279 -r1.280 --- mandoc.h +++ mandoc.h @@ -20,8 +20,9 @@ */ #define ASCII_NBRSP 31 /* non-breaking space */ -#define ASCII_HYPH 30 /* breakable hyphen */ -#define ASCII_BREAK 29 /* breakable zero-width space */ +#define ASCII_NBRZW 30 /* non-breaking zero-width space */ +#define ASCII_BREAK 29 /* breakable zero-width space */ +#define ASCII_HYPH 28 /* breakable hyphen */ /* * Status level. This refers to both internal status (i.e., whilst Index: term_ascii.c =================================================================== RCS file: /home/cvs/mandoc/mandoc/term_ascii.c,v retrieving revision 1.66 retrieving revision 1.67 diff -Lterm_ascii.c -Lterm_ascii.c -u -p -r1.66 -r1.67 --- term_ascii.c +++ term_ascii.c @@ -196,7 +196,7 @@ terminal_sepline(void *arg) static size_t ascii_width(const struct termp *p, int c) { - return c != ASCII_BREAK; + return c != ASCII_BREAK && c != ASCII_NBRZW; } void Index: term.c =================================================================== RCS file: /home/cvs/mandoc/mandoc/term.c,v retrieving revision 1.288 retrieving revision 1.289 diff -Lterm.c -Lterm.c -u -p -r1.288 -r1.289 --- term.c +++ term.c @@ -208,7 +208,6 @@ term_flushln(struct termp *p) return; endline(p); - p->viscol = 0; /* * Normally, start the next line at the same indentation @@ -314,6 +313,8 @@ term_fill(struct termp *p, size_t *nbr, vis = term_tab_next(vis); vis -= p->tcol->taboff; break; + case ASCII_NBRZW: /* Non-breakable zero-width. */ + break; case ASCII_NBRSP: /* Non-breakable space. */ p->tcol->buf[ic] = ' '; /* FALLTHROUGH */ @@ -365,6 +366,7 @@ term_field(struct termp *p, size_t vbl, switch (p->tcol->buf[ic]) { case '\n': case ASCII_BREAK: + case ASCII_NBRZW: continue; case '\t': case ' ': @@ -571,18 +573,23 @@ term_word(struct termp *p, const char *w break; case ESCAPE_NUMBERED: uc = mchars_num2char(seq, sz); - if (uc < 0) - continue; - break; + if (uc >= 0) + break; + bufferc(p, ASCII_NBRZW); + continue; case ESCAPE_SPECIAL: if (p->enc == TERMENC_ASCII) { cp = mchars_spec2str(seq, sz, &ssz); if (cp != NULL) encode(p, cp, ssz); + else + bufferc(p, ASCII_NBRZW); } else { uc = mchars_spec2cp(seq, sz); if (uc > 0) encode1(p, uc); + else + bufferc(p, ASCII_NBRZW); } continue; case ESCAPE_UNDEF: @@ -744,6 +751,9 @@ term_word(struct termp *p, const char *w if (p->col > p->tcol->lastcol) p->col = p->tcol->lastcol; continue; + case ESCAPE_IGNORE: + bufferc(p, ASCII_NBRZW); + continue; default: continue; } @@ -935,8 +945,8 @@ term_strlen(const struct termp *p, const int ssz, skip, uc; const char *seq, *rhs; enum mandoc_esc esc; - static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH, - ASCII_BREAK, '\0' }; + static const char rej[] = { '\\', ASCII_NBRSP, ASCII_NBRZW, + ASCII_BREAK, ASCII_HYPH, '\0' }; /* * Account for escaped sequences within string length Index: blank.out_markdown =================================================================== RCS file: /home/cvs/mandoc/mandoc/regress/mdoc/Bd/blank.out_markdown,v retrieving revision 1.2 retrieving revision 1.3 diff -Lregress/mdoc/Bd/blank.out_markdown -Lregress/mdoc/Bd/blank.out_markdown -u -p -r1.2 -r1.3 --- regress/mdoc/Bd/blank.out_markdown +++ regress/mdoc/Bd/blank.out_markdown @@ -14,6 +14,14 @@ BD-BLANK(1) - General Commands Manual line containing space tab space: + line containing a zero-width space: + + line containing an invalid numbered character escape: + <?> + line containing an invalid named character escape: + <?> + line containing a font escape: + line starting with a blank character: x line starting with two blank characters: @@ -30,4 +38,4 @@ An empty one-line literal display: end of test document -OpenBSD - July 4, 2017 +OpenBSD - August 15, 2022 Index: blank.in =================================================================== RCS file: /home/cvs/mandoc/mandoc/regress/mdoc/Bd/blank.in,v retrieving revision 1.2 retrieving revision 1.3 diff -Lregress/mdoc/Bd/blank.in -Lregress/mdoc/Bd/blank.in -u -p -r1.2 -r1.3 --- regress/mdoc/Bd/blank.in +++ regress/mdoc/Bd/blank.in @@ -1,4 +1,4 @@ -.\" $OpenBSD: blank.in,v 1.6 2017/07/04 14:53:24 schwarze Exp $ +.\" $OpenBSD: blank.in,v 1.7 2022/08/15 17:59:00 schwarze Exp $ .Dd $Mdocdate$ .Dt BD-BLANK 1 .Os @@ -15,6 +15,14 @@ line containing two blank characters: line containing space tab space: +line containing a zero-width space: +\& +line containing an invalid numbered character escape: +\N'257' +line containing an invalid named character escape: +\[foobar] +line containing a font escape: +\fR line starting with a blank character: x line starting with two blank characters: Index: blank.out_lint =================================================================== RCS file: /home/cvs/mandoc/mandoc/regress/mdoc/Bd/blank.out_lint,v retrieving revision 1.6 retrieving revision 1.7 diff -Lregress/mdoc/Bd/blank.out_lint -Lregress/mdoc/Bd/blank.out_lint -u -p -r1.6 -r1.7 --- regress/mdoc/Bd/blank.out_lint +++ regress/mdoc/Bd/blank.out_lint @@ -1,8 +1,9 @@ mandoc: blank.in:13:1: STYLE: whitespace at end of input line mandoc: blank.in:15:1: STYLE: whitespace at end of input line mandoc: blank.in:17:1: STYLE: whitespace at end of input line -mandoc: blank.in:22:36: STYLE: whitespace at end of input line -mandoc: blank.in:23:37: STYLE: whitespace at end of input line -mandoc: blank.in:24:32: STYLE: whitespace at end of input line -mandoc: blank.in:31:8: STYLE: whitespace at end of input line -mandoc: blank.in:31:2: WARNING: empty block: Dl +mandoc: blank.in:23:1: ERROR: unknown special character: \[foobar] +mandoc: blank.in:30:36: STYLE: whitespace at end of input line +mandoc: blank.in:31:37: STYLE: whitespace at end of input line +mandoc: blank.in:32:32: STYLE: whitespace at end of input line +mandoc: blank.in:39:8: STYLE: whitespace at end of input line +mandoc: blank.in:39:2: WARNING: empty block: Dl Index: blank.out_ascii =================================================================== RCS file: /home/cvs/mandoc/mandoc/regress/mdoc/Bd/blank.out_ascii,v retrieving revision 1.2 retrieving revision 1.3 diff -Lregress/mdoc/Bd/blank.out_ascii -Lregress/mdoc/Bd/blank.out_ascii -u -p -r1.2 -r1.3 --- regress/mdoc/Bd/blank.out_ascii +++ regress/mdoc/Bd/blank.out_ascii @@ -12,6 +12,13 @@ D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN line containing space tab space: + line containing a zero-width space: + + line containing an invalid numbered character escape: + + line containing an invalid named character escape: + + line containing a font escape: line starting with a blank character: x line starting with two blank characters: @@ -26,4 +33,4 @@ D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN An empty one-line literal display: end of test document -OpenBSD July 4, 2017 OpenBSD +OpenBSD August 15, 2022 OpenBSD -- To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv