From: kemal Date: Sun, 01 Aug 2021 19:36:34 +0000 Subject: [PATCH] gui-win32: add support for surrogate pairs in rune16 functions this was required to encode and decode characthers between U+10000-U+10FFFF properly. --- diff c97fe4693f6112504d6f13fab46f7cc8b27685c1 23878a23b80c9004045aebe9ac7b71c26c60f550 --- a/gui-win32/r16.c Mon Jun 28 22:29:39 2021 +++ b/gui-win32/r16.c Sun Aug 1 22:36:34 2021 @@ -21,8 +21,9 @@ Maskx = (1<= Runeself) n = runelen(c); if(p + n >= ep) break; - rc = c; - if(c < Runeself) + if(c < Runeself){ *p++ = c; - else - p += runetochar(p, &rc); + continue; + } + rc = c; + if(c >= LoSurrogate && c <= SurrogateMax) + rc = Bad; + else if(c >= HiSurrogate && c <= 0xDBFF){ /* decode a surrogate pair properly */ + if(p + n+1 >= ep) + rc = Bad; + else if((c = *r) >= LoSurrogate && c <= SurrogateMax){ + rc = 0x10000 | (*(r-1) - HiSurrogate) << 10 | (c - LoSurrogate); + r++; + }else + rc = Bad; + } + p += runetochar(p, &rc); } *p = '\0'; return op; @@ -107,7 +120,12 @@ er = r + nc; while(*p != '\0' && r + 1 < er){ p += chartorune(&rc, p); - *r++ = rc; /* we'll ignore surrogate pairs */ + if(rc >= 0x10000){ /* got to encode it in a surrogate pair */ + rc -= 0x10000; + *r++ = (rc >> 10)+HiSurrogate; + *r++ = (rc & 0x3FF)+LoSurrogate; + }else + *r++ = rc; } *r = '\0'; return or; @@ -138,7 +156,7 @@ wchar_t *ws; n = utflen(s) + 1; - ws = malloc(n*sizeof(wchar_t)); + ws = calloc(n, sizeof(wchar_t)); utftorunes16(ws, s, n); return ws; } @@ -162,7 +180,7 @@ { int n = 0; - while (*ws) + while(*ws) n += runelen(*ws++); return n+1; }