source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Flip on unicode output (via \[uNNNN]) in -T[x]html.
@ 2011-05-17 11:50 kristaps
  0 siblings, 0 replies; only message in thread
From: kristaps @ 2011-05-17 11:50 UTC (permalink / raw)
  To: source

Log Message:
-----------
Flip on unicode output (via \[uNNNN]) in -T[x]html.  Here we go!

Modified Files:
--------------
    mdocml:
        chars.c
        html.c
        mandoc.3
        mandoc.h

Revision Data
-------------
Index: chars.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/chars.c,v
retrieving revision 1.43
retrieving revision 1.44
diff -Lchars.c -Lchars.c -u -p -r1.43 -r1.44
--- chars.c
+++ chars.c
@@ -138,7 +138,7 @@ mchars_res2cp(struct mchars *arg, const 
 }
 
 /*
- * Numbered character to literal character.
+ * Numbered character string to ASCII codepoint.
  * This can only be a printable character (i.e., alnum, punct, space) so
  * prevent the character from ruining our state (backspace, newline, and
  * so on).
@@ -151,8 +151,22 @@ mchars_num2char(const char *p, size_t sz
 
 	if ((i = mandoc_strntou(p, sz, 10)) < 0)
 		return('\0');
-
 	return(isprint(i) ? i : '\0');
+}
+
+/*
+ * Hex character string to Unicode codepoint.
+ * If the character is illegal, returns '\0'.
+ */
+int
+mchars_num2uc(const char *p, size_t sz)
+{
+	int               i;
+
+	if ((i = mandoc_strntou(p, sz, 16)) < 0)
+		return('\0');
+	/* FIXME: make sure we're not in a bogus range. */
+	return(i > 0x80 && i <= 0x10FFFF ? i : '\0');
 }
 
 /* 
Index: mandoc.3
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.3,v
retrieving revision 1.6
retrieving revision 1.7
diff -Lmandoc.3 -Lmandoc.3 -u -p -r1.6 -r1.7
--- mandoc.3
+++ mandoc.3
@@ -26,6 +26,7 @@
 .Nm mchars_alloc ,
 .Nm mchars_free ,
 .Nm mchars_num2char ,
+.Nm mchars_num2uc ,
 .Nm mchars_res2cp ,
 .Nm mchars_res2str ,
 .Nm mchars_spec2cp ,
@@ -64,6 +65,8 @@
 .Fn mchars_free "struct mchars *p"
 .Ft char
 .Fn mchars_num2char "const char *cp" "size_t sz"
+.Ft int
+.Fn mchars_num2uc "const char *cp" "size_t sz"
 .Ft "const char *"
 .Fo mchars_res2str
 .Fa "struct mchars *p"
@@ -188,6 +191,9 @@ library also contains routines for trans
 .Pq see Fn mchars_alloc
 and parsing escape sequences from strings
 .Pq see Fn mandoc_escape .
+.Pp
+This library is
+.Ud
 .Sh REFERENCE
 This section documents the functions, types, and variables available
 via
@@ -247,8 +253,12 @@ The object must be freed with
 Free an object created with
 .Fn mchars_alloc .
 .It Fn mchars_num2char
-Convert a character index as found in \eN\(aq\(aq into a printable
-character.
+Convert a character index (e.g., the \eN\(aq\(aq escape) into a
+printable ASCII character.
+Returns \e0 (the nil character) if the input sequence is malformed.
+.It Fn mchars_num2uc
+Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
+a Unicode codepoint.
 Returns \e0 (the nil character) if the input sequence is malformed.
 .It Fn mchars_res2cp
 Convert a predefined character into a valid Unicode codepoint.
Index: html.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/html.c,v
retrieving revision 1.143
retrieving revision 1.144
diff -Lhtml.c -Lhtml.c -u -p -r1.143 -r1.144
--- html.c
+++ html.c
@@ -315,6 +315,8 @@ html_strlen(const char *cp)
 		switch (mandoc_escape(&cp, &seq, &ssz)) {
 		case (ESCAPE_ERROR):
 			return(sz);
+		case (ESCAPE_UNICODE):
+			/* FALLTHROUGH */
 		case (ESCAPE_NUMBERED):
 			/* FALLTHROUGH */
 		case (ESCAPE_PREDEF):
@@ -373,6 +375,12 @@ print_encode(struct html *h, const char 
 			break;
 
 		switch (esc) {
+		case (ESCAPE_UNICODE):
+			/* Skip passed "u" header. */
+			c = mchars_num2uc(seq + 1, len - 1);
+			if ('\0' != c)
+				printf("&#x%x;", c);
+			break;
 		case (ESCAPE_NUMBERED):
 			c = mchars_num2char(seq, len);
 			if ('\0' != c)
Index: mandoc.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.h,v
retrieving revision 1.75
retrieving revision 1.76
diff -Lmandoc.h -Lmandoc.h -u -p -r1.75 -r1.76
--- mandoc.h
+++ mandoc.h
@@ -330,6 +330,7 @@ enum mandoc_esc	  mandoc_escape(const ch
 
 struct mchars	 *mchars_alloc(void);
 char	 	  mchars_num2char(const char *, size_t);
+int		  mchars_num2uc(const char *, size_t);
 const char	 *mchars_spec2str(struct mchars *, const char *, size_t, size_t *);
 int		  mchars_spec2cp(struct mchars *, const char *, size_t);
 const char	 *mchars_res2str(struct mchars *, const char *, size_t, size_t *);
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2011-05-17 11:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-17 11:50 mdocml: Flip on unicode output (via \[uNNNN]) in -T[x]html kristaps

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).