From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 7118 invoked from network); 20 Sep 1999 11:01:30 -0000 Received: from mail2.primenet.com.au (HELO primenet.com.au) (?S3F/0rdgEW6tWSCi49b6DTO1l3rxHyCz?@203.24.36.6) by ns1.primenet.com.au with SMTP; 20 Sep 1999 11:01:30 -0000 Received: (qmail 2372 invoked from network); 20 Sep 1999 10:54:46 -0000 Received: from sunsite.auc.dk (130.225.51.30) by mail2.primenet.com.au with SMTP; 20 Sep 1999 10:54:46 -0000 Received: (qmail 3460 invoked by alias); 20 Sep 1999 10:54:19 -0000 Mailing-List: contact zsh-workers-help@sunsite.auc.dk; run by ezmlm Precedence: bulk X-No-Archive: yes X-Seq: 7948 Received: (qmail 3451 invoked from network); 20 Sep 1999 10:54:17 -0000 Message-Id: <9909201018.AA20829@ibmth.df.unipi.it> To: zsh-workers@sunsite.auc.dk Subject: Re: Backreference problem In-Reply-To: "Tanaka Akira"'s message of "19 Sep 1999 19:26:38 DFT." Date: Mon, 20 Sep 1999 12:18:46 +0200 From: Peter Stephenson Tanaka Akira wrote: > Z:akr@is27e1u11% Src/zsh -f > is27e1u11% setopt extendedglob > is27e1u11% [[ $'\0' = (#b)($'\0') ]] && echo $mend > 2 > is27e1u11% > > I think it should be 1. Yes, indeed. Indexes of metafied string elements weren't counted properly. The nasty part is getting this right for offsets when using parameter substitutions, particular global substitutions. The patch below seems to work, but be warned that this makes my brain hurt. --- Src/glob.c.moff Fri Sep 17 16:33:47 1999 +++ Src/glob.c Mon Sep 20 11:59:03 1999 @@ -2002,7 +2002,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr) { char *s = *sp, *t, *start, sav; - int i, l = strlen(*sp), matched = 1; + int i, l = strlen(*sp), ml = ztrlen(*sp), matched = 1; MUSTUSEHEAP("igetmatch"); /* presumably covered by prefork() test */ repllist = NULL; @@ -2053,8 +2053,8 @@ /* Smallest possible match at tail of string: * * move back down string until we get a match. * * There's no optimization here. */ - for (t = s + l; t >= s; t--) { - patoffset = t - s; + patoffset = ml; + for (t = s + l; t >= s; t--, patoffset--) { if (pattry(p, t)) { *sp = get_match_ret(*sp, t - s, l, fl, replstr); patoffset = 0; @@ -2070,8 +2070,7 @@ /* Largest possible match at tail of string: * * move forward along string until we get a match. * * Again there's no optimisation. */ - for (i = 0, t = s; i < l; i++, t++) { - patoffset = i; + for (i = 0, t = s; i < l; i++, t++, patoffset++) { if (pattry(p, t)) { *sp = get_match_ret(*sp, i, l, fl, replstr); patoffset = 0; @@ -2091,15 +2090,14 @@ } /* fall through */ case (SUB_SUBSTR|SUB_LONG): /* longest or smallest at start with substrings */ - start = s; + t = s; if (fl & SUB_GLOBAL) repllist = newlinklist(); do { /* loop over all matches for global substitution */ matched = 0; - for (t = start; t < s + l; t++) { + for (; t < s + l; t++, patoffset++) { /* Find the longest match from this position. */ - patoffset = t - start; if (pattry(p, t) && patinput > t) { char *mpos = patinput; if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { @@ -2117,8 +2115,8 @@ } if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) { *sp = get_match_ret(*sp, t-s, mpos-s, fl, replstr); - if (mpos == start) - mpos++; + if (mpos == t) + METAINC(mpos); } if (!(fl & SUB_GLOBAL)) { if (n) { @@ -2139,7 +2137,9 @@ * which is already marked for replacement. */ matched = 1; - start = mpos; + for ( ; t < mpos; t++, patoffset++) + if (*t == Meta) + t++; break; } if (*t == Meta) @@ -2161,17 +2161,16 @@ case (SUB_END|SUB_SUBSTR): /* Shortest at end with substrings */ - patoffset = l; + patoffset = ml; if (pattry(p, s + l) && !--n) { *sp = get_match_ret(*sp, l, l, fl, replstr); patoffset = 0; return 1; } /* fall through */ - patoffset = 0; case (SUB_END|SUB_LONG|SUB_SUBSTR): /* Longest/shortest at end, matching substrings. */ - for (t = s + l - 1; t >= s; t--) { - patoffset = t - s; + patoffset--; + for (t = s + l - 1; t >= s; t--, patoffset--) { if (t > s && t[-1] == Meta) t--; if (pattry(p, t) && patinput > t && !--n) { @@ -2195,7 +2194,7 @@ return 1; } } - patoffset = l; + patoffset = ml; if ((fl & SUB_LONG) && pattry(p, s + l) && !--n) { *sp = get_match_ret(*sp, l, l, fl, replstr); patoffset = 0; --- Src/pattern.c.moff Mon Sep 20 12:11:45 1999 +++ Src/pattern.c Mon Sep 20 12:04:48 1999 @@ -1335,15 +1335,15 @@ * in the pattern matching part. */ char *str; - int len = patinput - patinstart; + int mlen = ztrsub(patinput, patinstart); PERMALLOC { - str = dupstrpfx(patinstart, len); + str = dupstrpfx(patinstart, patinput - patinstart); } LASTALLOC; setsparam("MATCH", str); setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS))); setiparam("MEND", - (zlong)(len + patoffset + !isset(KSHARRAYS) - 1)); + (zlong)(mlen + patoffset + !isset(KSHARRAYS) - 1)); } if (prog->patnpar && !(patflags & PAT_FILE)) { /* @@ -1374,11 +1374,11 @@ * corresponds to indexing as ${foo[1,1]}. */ sprintf(numbuf, "%ld", - (long)((*sp - patinstart) + patoffset + + (long)(ztrsub(*sp, patinstart) + patoffset + !isset(KSHARRAYS))); mbeginarr[i] = ztrdup(numbuf); sprintf(numbuf, "%ld", - (long)((*ep - patinstart) + patoffset + + (long)(ztrsub(*ep, patinstart) + patoffset + !isset(KSHARRAYS) - 1)); mendarr[i] = ztrdup(numbuf); sp++; -- Peter Stephenson Tel: +39 050 844536 WWW: http://www.ifh.de/~pws/ Dipartimento di Fisica, Via Buonarroti 2, 56127 Pisa, Italy