mailing list of musl libc
 help / color / mirror / code / Atom feed
From: Rich Felker <dalias@aerifal.cx>
To: musl@lists.openwall.com
Subject: Re: multibyte performance findings
Date: Sat, 6 Apr 2013 02:08:52 -0400	[thread overview]
Message-ID: <20130406060852.GH20323@brightrain.aerifal.cx> (raw)
In-Reply-To: <20130406052121.GA20915@brightrain.aerifal.cx>

[-- Attachment #1: Type: text/plain, Size: 302 bytes --]

On Sat, Apr 06, 2013 at 01:21:21AM -0400, Rich Felker wrote:
> Hi all,
> 
> I've been examining performance in the multibyte conversion functions
> (as part of the POSIX locale controversy), and have some interesting
> findings so far:
> [...]

And here's a diff of the proposed changes so far..

Rich

[-- Attachment #2: mb.diff --]
[-- Type: text/plain, Size: 2006 bytes --]

diff --git a/src/multibyte/mbrtowc.c b/src/multibyte/mbrtowc.c
index cc49781..d552652 100644
--- a/src/multibyte/mbrtowc.c
+++ b/src/multibyte/mbrtowc.c
@@ -18,6 +18,7 @@ size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate
 	const unsigned char *s = (const void *)src;
 	const unsigned N = n;
 
+	if (!n) return -2;
 	if (!st) st = (void *)&internal_state;
 	c = *(unsigned *)st;
 	
@@ -27,9 +28,9 @@ size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate
 		n = 1;
 	} else if (!wc) wc = (void *)&wc;
 
-	if (!n) return -2;
+	/* This condition can only be true if *s<0x80 and c==0 */
+	if (*s + c < 0x80) return !!(*wc = *s);
 	if (!c) {
-		if (*s < 0x80) return !!(*wc = *s);
 		if (*s-SA > SB-SA) goto ilseq;
 		c = bittab[*s++-SA]; n--;
 	}
diff --git a/src/multibyte/mbtowc.c b/src/multibyte/mbtowc.c
index b5dd7e3..5ce9281 100644
--- a/src/multibyte/mbtowc.c
+++ b/src/multibyte/mbtowc.c
@@ -11,9 +11,43 @@
 
 #include "internal.h"
 
-int mbtowc(wchar_t *restrict wc, const char *restrict s, size_t n)
+int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n)
 {
-	mbstate_t st = { 0 };
-	n = mbrtowc(wc, s, n, &st);
-	return n+2 ? n : -1;
+	unsigned c;
+	const unsigned char *s = (const void *)src;
+
+	if (!s) return 0;
+	if (!n) goto ilseq;
+	if (!wc) wc = (void *)&wc;
+
+	if (*s < 0x80) return !!(*wc = *s);
+	if (*s-SA > SB-SA) goto ilseq;
+	c = bittab[*s++-SA];
+
+	/* Avoid excessive checks against n: If shifting the state n-1
+	 * times does not clear the high bit, then the value of n is
+	 * insufficient to read a character */
+	if (n<4 && ((c<<(6*n-6)) & (1U<<31))) goto ilseq;
+
+	if (OOB(c,*s)) goto ilseq;
+	c = c<<6 | *s++-0x80;
+	if (!(c&(1U<<31))) {
+		*wc = c;
+		return 2;
+	}
+
+	if (*s-0x80u >= 0x40) goto ilseq;
+	c = c<<6 | *s++-0x80;
+	if (!(c&(1U<<31))) {
+		*wc = c;
+		return 3;
+	}
+
+	if (*s-0x80u >= 0x40) goto ilseq;
+	*wc = c<<6 | *s++-0x80;
+	return 4;
+
+ilseq:
+	errno = EILSEQ;
+	return -1;
 }

  reply	other threads:[~2013-04-06  6:08 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-06  5:21 Rich Felker
2013-04-06  6:08 ` Rich Felker [this message]
2013-04-09  5:54   ` Rich Felker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130406060852.GH20323@brightrain.aerifal.cx \
    --to=dalias@aerifal.cx \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).