From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 20661 invoked by alias); 21 Oct 2011 10:35:37 -0000 Mailing-List: contact zsh-workers-help@zsh.org; run by ezmlm Precedence: bulk X-No-Archive: yes List-Id: Zsh Workers List List-Post: List-Help: X-Seq: 29838 Received: (qmail 13623 invoked from network); 21 Oct 2011 10:35:35 -0000 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-4.3 required=5.0 tests=BAYES_00,DKIM_SIGNED, DKIM_VALID,DKIM_VALID_AU,RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received-SPF: none (ns1.primenet.com.au: domain at spodhuis.org does not designate permitted sender hosts) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=spodhuis.org; s=d201107; h=In-Reply-To:Content-Type:MIME-Version:References:Message-ID:Subject:Cc:To:From:Date; bh=07pkKbtuyHPRYES5dDJNxm7piqcZS1oCRADLbM/IZrE=; b=ih4HWlFPe/je+4PpxWP0r5YiweqApFIsHLBJ24MNCU3KOLcMX05s1H9q56cmEyv1PJHCFedlHPJSN+4FFc8qgNy1GAF2QTrfIPM8Xp3GF9q/GHqRIIB0CnSKoE9X5dcOW/sDMKD+jFh1i4x7iBNZAG5PD3qoh2qFarrKJP2aOlw=; Date: Fri, 21 Oct 2011 06:35:30 -0400 From: Phil Pennock To: zsh-workers@zsh.org Cc: Peter Stephenson Subject: Re: UTF-8 and PCRE and metafy Message-ID: <20111021103530.GA78708@redoubt.spodhuis.org> Mail-Followup-To: zsh-workers@zsh.org, Peter Stephenson References: <20110308065216.GB79682@redoubt.spodhuis.org> <20110308095850.12843492@pwslap01u.europe.root.pri> <20111021095624.GA23272@redoubt.spodhuis.org> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="u3/rZRmxL6MmkK24" Content-Disposition: inline In-Reply-To: <20111021095624.GA23272@redoubt.spodhuis.org> --u3/rZRmxL6MmkK24 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On 2011-10-21 at 05:56 -0400, Phil Pennock wrote: > Attached is what looks to me to be a correct patch. With bash_rematch Oops, for the change around line 292, I switched to a new variable but never called unmetafy(). One line added in the attached patch, which replaces the previous patch. Sorry, -Phil --u3/rZRmxL6MmkK24 Content-Type: text/x-diff; charset=us-ascii Content-Disposition: inline; filename="pcre-utf8.patch" Index: Src/Modules/pcre.c =================================================================== RCS file: /home/cvsroot/zsh/Src/Modules/pcre.c,v retrieving revision 1.18 diff -a -u -p -r1.18 pcre.c --- Src/Modules/pcre.c 20 Jan 2010 11:17:11 -0000 1.18 +++ Src/Modules/pcre.c 21 Oct 2011 10:29:14 -0000 @@ -77,6 +77,7 @@ bin_pcre_compile(char *nam, char **args, { int pcre_opts = 0, pcre_errptr; const char *pcre_error; + char *target; if(OPT_ISSET(ops,'a')) pcre_opts |= PCRE_ANCHORED; if(OPT_ISSET(ops,'i')) pcre_opts |= PCRE_CASELESS; @@ -92,8 +93,13 @@ bin_pcre_compile(char *nam, char **args, if (pcre_pattern) pcre_free(pcre_pattern); - pcre_pattern = pcre_compile(*args, pcre_opts, &pcre_error, &pcre_errptr, NULL); + target = ztrdup(*args); + unmetafy(target, NULL); + + pcre_pattern = pcre_compile(target, pcre_opts, &pcre_error, &pcre_errptr, NULL); + free(target); + if (pcre_pattern == NULL) { zwarnnam(nam, "error in regex: %s", pcre_error); @@ -161,7 +167,7 @@ zpcre_get_substrings(char *arg, int *ove sprintf(offset_all, "%d %d", ovec[0], ovec[1]); setsparam("ZPCRE_OP", ztrdup(offset_all)); } - match_all = ztrdup(captures[0]); + match_all = metafy(captures[0], -1, META_DUP); setsparam(matchvar, match_all); /* * If we're setting match, mbegin, mend we only do @@ -169,7 +175,15 @@ zpcre_get_substrings(char *arg, int *ove * (c.f. regex.c). */ if (!want_begin_end || nelem) { - matches = zarrdup(&captures[capture_start]); + char **x, **y; + y = &captures[capture_start]; + matches = x = (char **) zalloc(sizeof(char *) * (arrlen(y) + 1)); + do { + if (*y) + *x++ = metafy(*y, -1, META_DUP); + else + *x++ = NULL; + } while (*y++); setaparam(substravar, matches); } @@ -255,6 +269,7 @@ bin_pcre_match(char *nam, char **args, O { int ret, capcount, *ovec, ovecsize, c; char *matched_portion = NULL; + char *plaintext = NULL; char *receptacle = NULL; int return_value = 1; /* The subject length and offset start are both int values in pcre_exec */ @@ -278,7 +293,7 @@ bin_pcre_match(char *nam, char **args, O } /* For the entire match, 'Return' the offset byte positions instead of the matched string */ if(OPT_ISSET(ops,'b')) want_offset_pair = 1; - + if(!*args) { zwarnnam(nam, "not enough arguments"); } @@ -288,26 +303,28 @@ bin_pcre_match(char *nam, char **args, O zwarnnam(nam, "error %d in fullinfo", ret); return 1; } - + ovecsize = (capcount+1)*3; ovec = zalloc(ovecsize*sizeof(int)); - - subject_len = (int)strlen(*args); + + plaintext = ztrdup(*args); + unmetafy(plaintext, NULL); + subject_len = (int)strlen(plaintext); if (offset_start < 0 || offset_start >= subject_len) ret = PCRE_ERROR_NOMATCH; else - ret = pcre_exec(pcre_pattern, pcre_hints, *args, subject_len, offset_start, 0, ovec, ovecsize); + ret = pcre_exec(pcre_pattern, pcre_hints, plaintext, subject_len, offset_start, 0, ovec, ovecsize); if (ret==0) return_value = 0; else if (ret==PCRE_ERROR_NOMATCH) /* no match */; else if (ret>0) { - zpcre_get_substrings(*args, ovec, ret, matched_portion, receptacle, + zpcre_get_substrings(plaintext, ovec, ret, matched_portion, receptacle, want_offset_pair, 0, 0); return_value = 0; } else { - zwarnnam(nam, "error in pcre_exec"); + zwarnnam(nam, "error in pcre_exec [%d]", ret); } if (ovec) @@ -322,7 +339,8 @@ cond_pcre_match(char **a, int id) { pcre *pcre_pat; const char *pcre_err; - char *lhstr, *rhre, *avar=NULL; + char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar=NULL; + char *p; int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize; int return_value = 0; @@ -331,6 +349,10 @@ cond_pcre_match(char **a, int id) lhstr = cond_str(a,0,0); rhre = cond_str(a,1,0); + lhstr_plain = ztrdup(lhstr); + rhre_plain = ztrdup(rhre); + unmetafy(lhstr_plain, NULL); + unmetafy(rhre_plain, NULL); pcre_pat = NULL; ov = NULL; @@ -339,7 +361,7 @@ cond_pcre_match(char **a, int id) switch(id) { case CPCRE_PLAIN: - pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL); + pcre_pat = pcre_compile(rhre_plain, pcre_opts, &pcre_err, &pcre_errptr, NULL); if (pcre_pat == NULL) { zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err); break; @@ -347,7 +369,7 @@ cond_pcre_match(char **a, int id) pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt); ovsize = (capcnt+1)*3; ov = zalloc(ovsize*sizeof(int)); - r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize); + r = pcre_exec(pcre_pat, NULL, lhstr_plain, strlen(lhstr_plain), 0, 0, ov, ovsize); /* r < 0 => error; r==0 match but not enough size in ov * r > 0 => (r-1) substrings found; r==1 => no substrings */ @@ -356,13 +378,16 @@ cond_pcre_match(char **a, int id) return_value = 1; break; } - else if (r==PCRE_ERROR_NOMATCH) return 0; /* no match */ + else if (r==PCRE_ERROR_NOMATCH) { + return_value = 0; /* no match */ + break; + } else if (r<0) { - zwarn("pcre_exec() error: %d", r); + zwarn("pcre_exec() error [%d]", r); break; } else if (r>0) { - zpcre_get_substrings(lhstr, ov, r, NULL, avar, 0, + zpcre_get_substrings(lhstr_plain, ov, r, NULL, avar, 0, isset(BASHREMATCH), !isset(BASHREMATCH)); return_value = 1; @@ -371,6 +396,10 @@ cond_pcre_match(char **a, int id) break; } + if (lhstr_plain) + free(lhstr_plain); + if(rhre_plain) + free(rhre_plain); if (pcre_pat) pcre_free(pcre_pat); if (ov) --u3/rZRmxL6MmkK24--