From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from euclid.skiles.gatech.edu (list@euclid.skiles.gatech.edu [130.207.146.50]) by melb.werple.net.au (8.7.5/8.7.3/2) with ESMTP id CAA09071 for ; Thu, 11 Jul 1996 02:01:19 +1000 (EST) Received: (from list@localhost) by euclid.skiles.gatech.edu (8.7.3/8.7.3) id LAA05203; Wed, 10 Jul 1996 11:47:37 -0400 (EDT) Resent-Date: Wed, 10 Jul 1996 11:47:37 -0400 (EDT) Date: Wed, 10 Jul 1996 13:27:32 +0400 (MOW) From: Andrej Borsenkow X-Sender: bor@itsrm1 Reply-To: borsenkow.msk@sni.de To: Zsh workers mailing list Subject: Support for locale defined sorting/character range Message-ID: MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Resent-Message-ID: <"HbwpY.0.DH1.P2zun"@euclid> Resent-From: zsh-workers@math.gatech.edu X-Mailing-List: archive/latest/1599 X-Loop: zsh-workers@math.gatech.edu Precedence: list Resent-Sender: zsh-workers-request@math.gatech.edu Hi! I have done small changes to zsh-3.0-pre2 to support locale defined sort order. The changes affect: chracter range in [a-z] glob pattern (match if character is in range as determined by current locale setting). sorting of globbing results - now they are sorted in order of current locale. (Ooi) modifiers in parameter substitution - again, sort in order of current locale. I use strcoll(); I have added test for strcoll to configure.in; I assume that config.h.in is easily recreated ;) Note: I put check wether we allocate form permanent storage or form heap in cstrpcmp and invcstrpcmp. Probably, check is not needed - I couldn't trace it far enough. Of course, it all assumes single-byte character set - no multibytes or wide characters. It seems enough for now (well, for Europe/USSR anyway;) thanks for any feedback (I am not on this list - please Cc if replying). greetings ------------------------------------------------------------------------- Andrej Borsenkow Fax: +7 (095) 252 01 05 SNI ITS Moscow Tel: +7 (095) 252 13 88 NERV: borsenkow.msk E-Mail: borsenkow.msk@sni.de ------------------------------------------------------------------------- ==================== diffs ============================= --- Src/glob.c.org Fri Jun 28 17:46:02 1996 +++ Src/glob.c Tue Jul 9 17:01:18 1996 @@ -610,8 +610,13 @@ char *c = *b, *d = *a; int cmp; +#ifdef HAVE_STRCOLL + cmp = strcoll(c, d); +#endif for (; *c == *d && *c; c++, d++); +#ifndef HAVE_STRCOLL cmp = (int)STOUC(*c) - (int)STOUC(*d); +#endif if (isset(NUMERICGLOBSORT) && (idigit(*c) || idigit(*d))) { for (; c > *b && idigit(c[-1]); c--, d--); if (idigit(*c) && idigit(*d)) { @@ -1709,17 +1714,32 @@ #define PAT(X) (pat[X] == Meta ? pat[(X)+1] ^ 32 : untok(pat[X])) #define PPAT(X) (pat[(X)-1] == Meta ? pat[X] ^ 32 : untok(pat[X])) char ch; +#ifdef HAVE_STRCOLL + char l_buf[2], r_buf[2], ch_buf[2]; + + l_buf[1] = r_buf[1] = ch_buf[1] = '\0'; +#endif if (!*pptr) break; ch = *pptr == Meta ? pptr[1] ^ 32 : *pptr; +#ifdef HAVE_STRCOLL + ch_buf[0] = ch; +#endif if (pat[1] == Hat || pat[1] == '^' || pat[1] == '!') { /* group is negated */ pat[1] = Hat; for (pat += 2; *pat != Outbrack && *pat; *pat == Meta ? pat += 2 : pat++) if (*pat == '-' && pat[-1] != Hat && pat[1] != Outbrack) { +#ifdef HAVE_STRCOLL + l_buf[0] = PPAT(-1); + r_buf[0] = PAT(1); + if (strcoll(l_buf, ch_buf) <= 0 && + strcoll(ch_buf, r_buf) <= 0) +#else if (PPAT(-1) <= ch && PAT(1) >= ch) +#endif break; } else if (ch == PAT(0)) break; @@ -1740,7 +1760,14 @@ *pat == Meta ? pat += 2 : pat++) if (*pat == '-' && pat[-1] != Inbrack && pat[1] != Outbrack) { +#ifdef HAVE_STRCOLL + l_buf[0] = PPAT(-1); + r_buf[0] = PAT(1); + if (strcoll(l_buf, ch_buf) <= 0 && + strcoll(ch_buf, r_buf) <= 0) +#else if (PPAT(-1) <= ch && PAT(1) >= ch) +#endif break; } else if (ch == PAT(0)) break; --- Src/subst.c.org Fri Jun 28 18:46:24 1996 +++ Src/subst.c Wed Jul 10 13:01:47 1996 @@ -430,36 +430,80 @@ int strpcmp(const void *a, const void *b) { +#ifdef HAVE_STRCOLL + return strcoll(*(char **)a, *(char **)b); +#else return strcmp(*(char **)a, *(char **)b); +#endif } /**/ int invstrpcmp(const void *a, const void *b) { +#ifdef HAVE_STRCOLL + return -strcoll(*(char **)a, *(char **)b); +#else return -strcmp(*(char **)a, *(char **)b); +#endif } /**/ int cstrpcmp(const void *a, const void *b) { +#ifdef HAVE_STRCOLL + char *c = dupstring(*(char **)a), *d = dupstring(*(char **)b); + char *cc = c, *dd = d; + int cmp; + + while (*cc++) cc[-1] = tulower(cc[-1]); + while (*dd++) dd[-1] = tulower(dd[-1]); + + cmp = strcoll(c, d); + + if (!useheap) { + free(c); + free(d); + } + + return cmp; +#else char *c = *(char **)a, *d = *(char **)b; for (; *c && tulower(*c) == tulower(*d); c++, d++); return (int)STOUC(tulower(*c)) - (int)STOUC(tulower(*d)); +#endif } /**/ int invcstrpcmp(const void *a, const void *b) { +#ifdef HAVE_STRCOLL + char *c = dupstring(*(char **)a), *d = dupstring(*(char **)b); + char *cc = c, *dd = d; + int cmp; + + while (*cc++) cc[-1] = tulower(cc[-1]); + while (*dd++) dd[-1] = tulower(dd[-1]); + + cmp = strcoll(c, d); + + if (!useheap) { + free(c); + free(d); + } + + return -cmp; +#else char *c = *(char **)a, *d = *(char **)b; for (; *c && tulower(*c) == tulower(*d); c++, d++); return (int)STOUC(tulower(*d)) - (int)STOUC(tulower(*c)); +#endif } /**/ --- configure.in.org Fri Jul 5 00:06:46 1996 +++ configure.in Tue Jul 9 14:25:34 1996 @@ -409,7 +409,7 @@ AC_CHECK_FUNCS(strftime waitpid select tcsetpgrp tcgetattr strstr lstat \ getlogin setpgid gettimeofday gethostname mkfifo wait3 difftime \ sigblock sigsetmask sigrelse sighold killpg sigaction getrlimit \ - sigprocmask setuid seteuid setreuid setresuid strerror) + sigprocmask setuid seteuid setreuid setresuid strerror strcoll) dnl ------------- dnl CHECK SIGNALS