sry, i uploaded the wront patch, it should be int cat, rather than struct msgcat :/ 2017-02-12 14:56 GMT+08:00 He X : > 1. cat is added to the keys, also do a validate > 2. so we what do we deal with the gettextdir() exactly? inline it or > construct a gettextpointer()? > 3. i added a extra locbuf array, and goto is replaced by a loop, memcpy is > replaced by snprintf, compiled, and working well with fcitx > 4. i just found that i forgot to store the keys to new buffer, it's ok to > just use normal expression? or we need atomic operations? > ``` > + p->cat = category; > + p->binding = q; > + p->lm = lm; > ``` > 5. I do want to rewrite all to .UTF8, but it's a bit annoying as your > words, then i changed the code to simply strip. > > > (safe for the user's terminal) > LANG is set by users who are using musl and it's modified to zh_CN at > setlocale(), app will use UTF8 directly, there's no such situation where > charset will cause troubles to users' terminal, except apps which get the > LANG manually by getenv(). I have not seen such strange applications so > far, and most apps only have the UTF8 translation files. > > For moving from glibc to musl, i think doing this way is good for now, we > could delete it later, or just keep it forever. And most people won't use > non-UTF8 at all, if they do use GBK, their app will even fallback to UTF8, > because no translation files for GBK. So, it's not so dagerous, i think > :). > > And for developers, they should not use setlocale to detect the charset, > this is wrong, nl_langinfo is the correct way. If they use, stripping will > let their app know something went wrong. > > Strip .GBK or .UTF-8, so users would be happy that their old settings are > working, developers will notice their mistakes that using setlocale() to > validate charset is wrong. We get a lot more than failing the setlocale() > and return C, the only bad thing is we need to care about a almost > impossible event: an app directly getenv(). > > 2017-02-12 10:34 GMT+08:00 Rich Felker : > >> On Sat, Feb 11, 2017 at 02:00:56PM +0800, He X wrote: >> > --- a/src/locale/dcngettext.c 2017-02-06 14:39:17.860482624 +0000 >> > +++ b/src/locale/dcngettext.c 2017-02-06 14:39:17.860482624 +0000 >> > @@ -100,7 +100,8 @@ >> > size_t map_size; >> > void *volatile plural_rule; >> > volatile int nplurals; >> > - char name[]; >> > + struct binding *binding; >> > + struct __locale_map *lm; >> > }; >> >> As stated in the reply to message body, I think you need the category >> in the keying too, because there can be different .mo files loaded >> depending on which category was requested. >> >> > static char *dummy_gettextdomain() >> > @@ -120,58 +122,87 @@ >> > struct msgcat *p; >> > struct __locale_struct *loc = CURRENT_LOCALE; >> > const struct __locale_map *lm; >> > - const char *dirname, *locname, *catname; >> > - size_t dirlen, loclen, catlen, domlen; >> > + size_t domlen; >> > + struct binding *q; >> > >> > if ((unsigned)category >= LC_ALL) goto notrans; >> > >> > if (!domainname) domainname = __gettextdomain(); >> > >> > domlen = strnlen(domainname, NAME_MAX+1); >> > if (domlen > NAME_MAX) goto notrans; >> > >> > - dirname = gettextdir(domainname, &dirlen); >> > - if (!dirname) goto notrans; >> > + for (q=bindings; q; q=q->next) >> > + if (!strcmp(q->domainname, domainname) && q->active) >> > + break; >> > + if (!q) goto notrans; >> >> Looks ok. I had said this should be a function but it really doesn't >> need to be; it's plenty simple inline. >> >> > lm = loc->cat[category]; >> > if (!lm) { >> > notrans: >> > return (char *) ((n == 1) ? msgid1 : msgid2); >> > } >> > - locname = lm->name; >> > - >> > - catname = catnames[category]; >> > - catlen = catlens[category]; >> > - loclen = strlen(locname); >> > - >> > - size_t namelen = dirlen+1 + loclen+1 + catlen+1 + domlen+3; >> > - char name[namelen+1], *s = name; >> > - >> > - memcpy(s, dirname, dirlen); >> > - s[dirlen] = '/'; >> > - s += dirlen + 1; >> > - memcpy(s, locname, loclen); >> > - s[loclen] = '/'; >> > - s += loclen + 1; >> > - memcpy(s, catname, catlen); >> > - s[catlen] = '/'; >> > - s += catlen + 1; >> > - memcpy(s, domainname, domlen); >> > - s[domlen] = '.'; >> > - s[domlen+1] = 'm'; >> > - s[domlen+2] = 'o'; >> > - s[domlen+3] = 0; >> > >> > for (p=cats; p; p=p->next) >> > - if (!strcmp(p->name, name)) >> > + if (p->binding == q && p->lm == lm) >> > break; >> >> && p->cat == category >> >> > if (!p) { >> > + const char *dirname, *locname, *catname; >> > + size_t dirlen, loclen, catlen; >> > void *old_cats; >> > size_t map_size; >> > + >> > + dirname = q->dirname; >> > + locname = lm->name; >> > + catname = catnames[category]; >> > + >> > + dirlen = q->dirlen; >> > + loclen = strlen(locname); >> > + catlen = catlens[category]; >> >> Now that these are only computed once rather than per-call, optimizing >> out strlen is probably not worthwhile anymore, but it doesn't really >> hurt either. Not something you need to change, just a comment. >> >> > + >> > + size_t namelen = dirlen+1 + loclen+1 + catlen+1 + >> domlen+3; >> > + char name[namelen+1], *s = name; >> > + char *str = name; >> > + >> > + memcpy(s, dirname, dirlen); >> > + s[dirlen] = '/'; >> > + s += dirlen + 1; >> > + memcpy(s, locname, loclen); >> > + s[loclen] = '/'; >> > + s += loclen + 1; >> > +skip_loc: >> > + memcpy(s, catname, catlen); >> > + s[catlen] = '/'; >> > + s += catlen + 1; >> > + memcpy(s, domainname, domlen); >> > + s[domlen] = '.'; >> > + s[domlen+1] = 'm'; >> > + s[domlen+2] = 'o'; >> > + s[domlen+3] = 0; >> >> Actually, now that this code is not a hot path, it should just be >> using snprintf to construct the pathname, I think. It would be a lot >> simpler and easier to ensure correctness. >> >> > + >> > const void *map = __map_file(name, &map_size); >> > - if (!map) goto notrans; >> > + if (!map) { >> > + if (s = strchr(name+dirlen+1, '@')) { >> > + *s++ = '/'; >> > + goto skip_loc;; >> > + } >> > + if ( str && (s = strchr(name+dirlen+1, '_')) && >> (s < strchr(name+dirlen+1, '/')) ) { >> > + if (str = strchr(locname, '@')) { >> > + loclen += locname - str; >> > + memcpy(s, str, loclen); >> > + s[loclen] = '/'; >> > + s += loclen + 1; >> > + str = 0; >> > + goto skip_loc; >> > + } else { >> > + *s++ = '/'; >> > + goto skip_loc; >> > + } >> > + } >> > + goto notrans; >> > + } >> >> Using snprintf should also make it easy to get rid of the goto/retry >> logic here, perhaps even with a 4-iteration loop and array of which >> format modifications happen on each iteration. >> >> > p = calloc(sizeof *p + namelen + 1, 1); >> > if (!p) { >> > __munmap((void *)map, map_size); >> > goto notrans; >> > @@ -209,7 +209,6 @@ >> > } >> > p->map = map; >> > p->map_size = map_size; >> > - memcpy(p->name, name, namelen+1); >> > do { >> > old_cats = cats; >> > p->next = old_cats; >> > --- a/src/locale/locale_map.c 2017-02-06 14:39:17.797148750 +0000 >> > +++ b/src/locale/locale_map.c 2017-02-06 14:39:17.797148750 +0000 >> > @@ -49,8 +49,8 @@ >> > } >> > >> > /* Limit name length and forbid leading dot or any slashes. */ >> > - for (n=0; n> > - if (val[0]=='.' || val[n]) val = "C.UTF-8"; >> > + for (n=0; n> val[n]!='.'; n++); >> > + if (val[0]=='.' || (val[n] && val[n]!='.')) val = "C.UTF-8"; >> > int builtin = (val[0]=='C' && !val[1]) >> > || !strcmp(val, "C.UTF-8") >> > || !strcmp(val, "POSIX"); >> >> This looks ok but might still need some tweaks. Should an input like >> "zh_CN.GBK" get treated as "zh_CN" (thus outputting UTF-8 that might >> appear as junk on the user's terminal) or as "C" (no localization) >> with only ASCII characters (safe for the user's terminal), or even >> cause setlocale to fail and return an error so that the application >> can decide what to do? These are not technical comments on your patch >> but policy matters the community should weigh in on. >> >> Rich >> > >