From: schwarze@mdocml.bsd.lv
To: source@mdocml.bsd.lv
Subject: mdocml: In terminal output, unify handling of Unicode and numbered
Date: Tue, 28 Oct 2014 20:17:43 -0400 (EDT) [thread overview]
Message-ID: <201410290017.s9T0Hhia030024@krisdoz.my.domain> (raw)
Log Message:
-----------
In terminal output, unify handling of Unicode and numbered character
escape sequences just like it was earlier implemented for -Thtml.
Do not let control characters other than ASCII 9 (horizontal tab)
propagate to the output, even though groff allows them; but that
really doesn't look like a great idea.
Let mchars_num2char() return int such that we can distinguish invalid \N
syntax from \N'0'. This also reduces the danger of signed char issues
popping up.
Modified Files:
--------------
mdocml:
chars.c
html.c
mandoc.h
term.c
Revision Data
-------------
Index: term.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/term.c,v
retrieving revision 1.232
retrieving revision 1.233
diff -Lterm.c -Lterm.c -u -p -r1.232 -r1.233
--- term.c
+++ term.c
@@ -391,7 +391,6 @@ term_word(struct termp *p, const char *w
{
const char nbrsp[2] = { ASCII_NBRSP, 0 };
const char *seq, *cp;
- char c;
int sz, uc;
size_t ssz;
enum mandoc_esc esc;
@@ -443,16 +442,11 @@ term_word(struct termp *p, const char *w
switch (esc) {
case ESCAPE_UNICODE:
uc = mchars_num2uc(seq + 1, sz - 1);
- if (p->enc == TERMENC_ASCII) {
- cp = ascii_uc2str(uc);
- encode(p, cp, strlen(cp));
- } else
- encode1(p, uc);
break;
case ESCAPE_NUMBERED:
- c = mchars_num2char(seq, sz);
- if ('\0' != c)
- encode(p, &c, 1);
+ uc = mchars_num2char(seq, sz);
+ if (uc < 0)
+ continue;
break;
case ESCAPE_SPECIAL:
if (p->enc == TERMENC_ASCII) {
@@ -465,35 +459,50 @@ term_word(struct termp *p, const char *w
if (uc > 0)
encode1(p, uc);
}
- break;
+ continue;
case ESCAPE_FONTBOLD:
term_fontrepl(p, TERMFONT_BOLD);
- break;
+ continue;
case ESCAPE_FONTITALIC:
term_fontrepl(p, TERMFONT_UNDER);
- break;
+ continue;
case ESCAPE_FONTBI:
term_fontrepl(p, TERMFONT_BI);
- break;
+ continue;
case ESCAPE_FONT:
/* FALLTHROUGH */
case ESCAPE_FONTROMAN:
term_fontrepl(p, TERMFONT_NONE);
- break;
+ continue;
case ESCAPE_FONTPREV:
term_fontlast(p);
- break;
+ continue;
case ESCAPE_NOSPACE:
if (TERMP_SKIPCHAR & p->flags)
p->flags &= ~TERMP_SKIPCHAR;
else if ('\0' == *word)
p->flags |= TERMP_NOSPACE;
- break;
+ continue;
case ESCAPE_SKIPCHAR:
p->flags |= TERMP_SKIPCHAR;
- break;
+ continue;
default:
- break;
+ continue;
+ }
+
+ /*
+ * Common handling for Unicode and numbered
+ * character escape sequences.
+ */
+
+ if (p->enc == TERMENC_ASCII) {
+ cp = ascii_uc2str(uc);
+ encode(p, cp, strlen(cp));
+ } else {
+ if ((uc < 0x20 && uc != 0x09) ||
+ (uc > 0x7E && uc < 0xA0))
+ uc = 0xFFFD;
+ encode1(p, uc);
}
}
p->flags &= ~TERMP_NBRWORD;
@@ -645,7 +654,7 @@ size_t
term_strlen(const struct termp *p, const char *cp)
{
size_t sz, rsz, i;
- int ssz, skip, c;
+ int ssz, skip, uc;
const char *seq, *rhs;
enum mandoc_esc esc;
static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
@@ -675,43 +684,60 @@ term_strlen(const struct termp *p, const
switch (esc) {
case ESCAPE_UNICODE:
- c = mchars_num2uc(seq + 1, sz - 1);
- if (p->enc == TERMENC_ASCII) {
- rhs = ascii_uc2str(c);
- rsz = strlen(rhs);
- } else
- sz += cond_width(p, c, &skip);
+ uc = mchars_num2uc(seq + 1, sz - 1);
break;
case ESCAPE_NUMBERED:
- c = mchars_num2char(seq, ssz);
- if ('\0' != c)
- sz += cond_width(p, c, &skip);
+ uc = mchars_num2char(seq, ssz);
+ if (uc < 0)
+ continue;
break;
case ESCAPE_SPECIAL:
- if (p->enc == TERMENC_ASCII)
+ if (p->enc == TERMENC_ASCII) {
rhs = mchars_spec2str(p->symtab,
seq, ssz, &rsz);
- else {
- c = mchars_spec2cp(p->symtab,
+ if (rhs != NULL)
+ break;
+ } else {
+ uc = mchars_spec2cp(p->symtab,
seq, ssz);
- if (c > 0)
- sz += cond_width(p, c, &skip);
+ if (uc > 0)
+ sz += cond_width(p, uc, &skip);
}
- break;
+ continue;
case ESCAPE_SKIPCHAR:
skip = 1;
- break;
+ continue;
default:
- break;
+ continue;
}
- if (NULL == rhs)
- break;
+ /*
+ * Common handling for Unicode and numbered
+ * character escape sequences.
+ */
+
+ if (rhs == NULL) {
+ if (p->enc == TERMENC_ASCII) {
+ rhs = ascii_uc2str(uc);
+ rsz = strlen(rhs);
+ } else {
+ if ((uc < 0x20 && uc != 0x09) ||
+ (uc > 0x7E && uc < 0xA0))
+ uc = 0xFFFD;
+ sz += cond_width(p, uc, &skip);
+ continue;
+ }
+ }
if (skip) {
skip = 0;
break;
}
+
+ /*
+ * Common handling for all escape sequences
+ * printing more than one character.
+ */
for (i = 0; i < rsz; i++)
sz += (*p->width)(p, *rhs++);
Index: chars.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/chars.c,v
retrieving revision 1.64
retrieving revision 1.65
diff -Lchars.c -Lchars.c -u -p -r1.64 -r1.65
--- chars.c
+++ chars.c
@@ -107,15 +107,13 @@ mchars_spec2cp(const struct mchars *arg,
return(ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1);
}
-char
+int
mchars_num2char(const char *p, size_t sz)
{
int i;
- if ((i = mandoc_strntoi(p, sz, 10)) < 0)
- return('\0');
-
- return(i > 0 && i < 256 && isprint(i) ? i : '\0');
+ i = mandoc_strntoi(p, sz, 10);
+ return(i >= 0 && i < 256 ? i : -1);
}
int
Index: html.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/html.c,v
retrieving revision 1.180
retrieving revision 1.181
diff -Lhtml.c -Lhtml.c -u -p -r1.180 -r1.181
--- html.c
+++ html.c
@@ -422,9 +422,13 @@ print_encode(struct html *h, const char
break;
case ESCAPE_NUMBERED:
c = mchars_num2char(seq, len);
+ if (c < 0)
+ continue;
break;
case ESCAPE_SPECIAL:
c = mchars_spec2cp(h->symtab, seq, len);
+ if (c <= 0)
+ continue;
break;
case ESCAPE_NOSPACE:
if ('\0' == *p)
@@ -433,9 +437,8 @@ print_encode(struct html *h, const char
default:
continue;
}
- if (c <= 0)
- continue;
- if (c < 0x20 || (c > 0x7E && c < 0xA0))
+ if ((c < 0x20 && c != 0x09) ||
+ (c > 0x7E && c < 0xA0))
c = 0xFFFD;
if (c > 0x7E)
printf("&#%d;", c);
Index: mandoc.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.h,v
retrieving revision 1.167
retrieving revision 1.168
diff -Lmandoc.h -Lmandoc.h -u -p -r1.167 -r1.168
--- mandoc.h
+++ mandoc.h
@@ -425,7 +425,7 @@ __BEGIN_DECLS
enum mandoc_esc mandoc_escape(const char **, const char **, int *);
struct mchars *mchars_alloc(void);
void mchars_free(struct mchars *);
-char mchars_num2char(const char *, size_t);
+int mchars_num2char(const char *, size_t);
const char *mchars_uc2str(int);
int mchars_num2uc(const char *, size_t);
int mchars_spec2cp(const struct mchars *,
--
To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv
reply other threads:[~2014-10-29 0:17 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201410290017.s9T0Hhia030024@krisdoz.my.domain \
--to=schwarze@mdocml.bsd.lv \
--cc=source@mdocml.bsd.lv \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).