From mboxrd@z Thu Jan 1 00:00:00 1970 X-Msuck: nntp://news.gmane.org/gmane.linux.lib.musl.general/3048 Path: news.gmane.org!not-for-mail From: Rich Felker Newsgroups: gmane.linux.lib.musl.general Subject: Re: multibyte performance findings Date: Sat, 6 Apr 2013 02:08:52 -0400 Message-ID: <20130406060852.GH20323@brightrain.aerifal.cx> References: <20130406052121.GA20915@brightrain.aerifal.cx> Reply-To: musl@lists.openwall.com NNTP-Posting-Host: plane.gmane.org Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="/9ZOS6odDaRI+0hI" X-Trace: ger.gmane.org 1365270877 19326 80.91.229.3 (6 Apr 2013 17:54:37 GMT) X-Complaints-To: usenet@ger.gmane.org NNTP-Posting-Date: Sat, 6 Apr 2013 17:54:37 +0000 (UTC) To: musl@lists.openwall.com Original-X-From: musl-return-3053-gllmg-musl=m.gmane.org@lists.openwall.com Sat Apr 06 19:54:39 2013 Return-path: Envelope-to: gllmg-musl@plane.gmane.org Original-Received: from mother.openwall.net ([195.42.179.200]) by plane.gmane.org with smtp (Exim 4.69) (envelope-from ) id 1UOWiP-0005Sr-Q9 for gllmg-musl@plane.gmane.org; Sat, 06 Apr 2013 19:15:45 +0200 Original-Received: (qmail 6123 invoked by uid 550); 6 Apr 2013 06:09:04 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: Original-Received: (qmail 6114 invoked from network); 6 Apr 2013 06:09:04 -0000 Content-Disposition: inline In-Reply-To: <20130406052121.GA20915@brightrain.aerifal.cx> User-Agent: Mutt/1.5.21 (2010-09-15) Xref: news.gmane.org gmane.linux.lib.musl.general:3048 Archived-At: --/9ZOS6odDaRI+0hI Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On Sat, Apr 06, 2013 at 01:21:21AM -0400, Rich Felker wrote: > Hi all, > > I've been examining performance in the multibyte conversion functions > (as part of the POSIX locale controversy), and have some interesting > findings so far: > [...] And here's a diff of the proposed changes so far.. Rich --/9ZOS6odDaRI+0hI Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="mb.diff" diff --git a/src/multibyte/mbrtowc.c b/src/multibyte/mbrtowc.c index cc49781..d552652 100644 --- a/src/multibyte/mbrtowc.c +++ b/src/multibyte/mbrtowc.c @@ -18,6 +18,7 @@ size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate const unsigned char *s = (const void *)src; const unsigned N = n; + if (!n) return -2; if (!st) st = (void *)&internal_state; c = *(unsigned *)st; @@ -27,9 +28,9 @@ size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate n = 1; } else if (!wc) wc = (void *)&wc; - if (!n) return -2; + /* This condition can only be true if *s<0x80 and c==0 */ + if (*s + c < 0x80) return !!(*wc = *s); if (!c) { - if (*s < 0x80) return !!(*wc = *s); if (*s-SA > SB-SA) goto ilseq; c = bittab[*s++-SA]; n--; } diff --git a/src/multibyte/mbtowc.c b/src/multibyte/mbtowc.c index b5dd7e3..5ce9281 100644 --- a/src/multibyte/mbtowc.c +++ b/src/multibyte/mbtowc.c @@ -11,9 +11,43 @@ #include "internal.h" -int mbtowc(wchar_t *restrict wc, const char *restrict s, size_t n) +int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n) { - mbstate_t st = { 0 }; - n = mbrtowc(wc, s, n, &st); - return n+2 ? n : -1; + unsigned c; + const unsigned char *s = (const void *)src; + + if (!s) return 0; + if (!n) goto ilseq; + if (!wc) wc = (void *)&wc; + + if (*s < 0x80) return !!(*wc = *s); + if (*s-SA > SB-SA) goto ilseq; + c = bittab[*s++-SA]; + + /* Avoid excessive checks against n: If shifting the state n-1 + * times does not clear the high bit, then the value of n is + * insufficient to read a character */ + if (n<4 && ((c<<(6*n-6)) & (1U<<31))) goto ilseq; + + if (OOB(c,*s)) goto ilseq; + c = c<<6 | *s++-0x80; + if (!(c&(1U<<31))) { + *wc = c; + return 2; + } + + if (*s-0x80u >= 0x40) goto ilseq; + c = c<<6 | *s++-0x80; + if (!(c&(1U<<31))) { + *wc = c; + return 3; + } + + if (*s-0x80u >= 0x40) goto ilseq; + *wc = c<<6 | *s++-0x80; + return 4; + +ilseq: + errno = EILSEQ; + return -1; } --/9ZOS6odDaRI+0hI--