From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 13402 invoked by alias); 16 Jun 2012 01:02:33 -0000 Mailing-List: contact zsh-workers-help@zsh.org; run by ezmlm Precedence: bulk X-No-Archive: yes List-Id: Zsh Workers List List-Post: List-Help: X-Seq: 30513 Received: (qmail 8025 invoked from network); 16 Jun 2012 01:02:31 -0000 X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-3.3 required=5.0 tests=BAYES_00,DKIM_ADSP_ALL, DKIM_SIGNED,RCVD_IN_DNSWL_MED,T_DKIM_INVALID,UNPARSEABLE_RELAY autolearn=ham version=3.3.2 Received-SPF: none (ns1.primenet.com.au: domain at spodhuis.org does not designate permitted sender hosts) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=spodhuis.org; s=d201107; h=In-Reply-To:Content-Transfer-Encoding:Content-Type:MIME-Version:References:Message-ID:Subject:Cc:To:From:Date; bh=N+KXpmLXRL6Jb2DbotzZnFZ3lTpL4KVwO/vBLm9i4OQ=; b=wnr2BF6DglETv2OS+HA/3F8pu0iRcIeU4Z7UT8ix0uk6VWoggHI3J9EF6UFKllquvVsrkFpAsNjrzU5dqiqWhgzgF8hNsbLJmNNVl0cTi0g0x4cL/V0BmwCxRJQXruYMB2fX+bhXHBPrOKhWlFxbzYJL+il+MkWMZJJOL0Gxibo=; Date: Fri, 15 Jun 2012 20:44:11 -0400 From: Phil Pennock To: Peter Stephenson Cc: zsh-workers@zsh.org Subject: PATCH: zsh/regex meta fixes for widechar (Re: Possible 4.3.18?) Message-ID: <20120616004411.GA16035@redoubt.spodhuis.org> Mail-Followup-To: Peter Stephenson , zsh-workers@zsh.org References: <87zk89rg1d.fsf@ft.bewatermyfriend.org> <20120611172403.5f87177c@pwslap01u.europe.root.pri> <20120615194210.33ab9abc@pws-pc.ntlworld.com> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8; x-action=pgp-signed Content-Transfer-Encoding: 8bit In-Reply-To: <20120615194210.33ab9abc@pws-pc.ntlworld.com> -----BEGIN PGP SIGNED MESSAGE----- Hash: RIPEMD160 On 2012-06-15 at 19:42 +0100, Peter Stephenson wrote: > Phil was suggesting he might need to make a change before big release, > is that still the case? Oh right, sorry. zsh/regex module needed to support wide characters without getting upset, so some unmeta/meta sprinkling needed. Here's the patch, I'll commit shortly. AFAICT, I'm already using the correct character length counting for $mbegin $mend, and so this patch is sufficient. It seems suspiciously simpler than the pcre.c revision 1.19 change I wrote. With patch: - ----------------------------8< cut here >8------------------------------ % unsetopt rematchpcre % [[ 'aei→bx' =~ ^([aeiou]+)(.)(.) ]] && print -l $match === $mbegin === $mend === $MATCH aei → b === 1 4 5 === 3 4 5 === aei→b - ----------------------------8< cut here >8------------------------------ Without patch: - ----------------------------8< cut here >8------------------------------ % [[ 'aei→bx' =~ ^([aeiou]+)(.)(.) ]] && print -l $match === $mbegin === $mend === $MATCH ae i ? === 1 3 4 === 2 3 4 === aei? - ----------------------------8< cut here >8------------------------------ Index: Src/Modules/regex.c =================================================================== RCS file: /home/cvsroot/remote-repos/zsh-repo/zsh/Src/Modules/regex.c,v retrieving revision 1.7 diff -a -u -p -r1.7 regex.c - --- Src/Modules/regex.c 20 Jan 2010 11:17:11 -0000 1.7 +++ Src/Modules/regex.c 16 Jun 2012 00:30:08 -0000 @@ -3,7 +3,7 @@ * * This file is part of zsh, the Z shell. * - - * Copyright (c) 2007 Phil Pennock + * Copyright (c) 2007,2012 Phil Pennock * All Rights Reserved. * * Permission is hereby granted, without written agreement and without @@ -56,14 +56,19 @@ zcond_regex_match(char **a, int id) regex_t re; regmatch_t *m, *matches = NULL; size_t matchessz = 0; - - char *lhstr, *rhre, *s, **arr, **x; + char *lhstr, *lhstr_zshmeta, *rhre, *rhre_zshmeta, *s, **arr, **x; int r, n, return_value, rcflags, reflags, nelem, start; - - lhstr = cond_str(a,0,0); - - rhre = cond_str(a,1,0); + lhstr_zshmeta = cond_str(a,0,0); + rhre_zshmeta = cond_str(a,1,0); rcflags = reflags = 0; return_value = 0; /* 1 => matched successfully */ + lhstr = ztrdup(lhstr_zshmeta); + unmetafy(lhstr, NULL); + rhre = ztrdup(rhre_zshmeta); + unmetafy(rhre, NULL); + switch(id) { case ZREGEX_EXTENDED: rcflags |= REG_EXTENDED; @@ -101,7 +106,7 @@ zcond_regex_match(char **a, int id) if (nelem) { arr = x = (char **) zalloc(sizeof(char *) * (nelem + 1)); for (m = matches + start, n = start; n <= (int)re.re_nsub; ++n, ++m, ++x) { - - *x = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so); + *x = metafy(lhstr + m->rm_so, m->rm_eo - m->rm_so, META_DUP); } *x = NULL; } @@ -112,7 +117,7 @@ zcond_regex_match(char **a, int id) char *ptr; m = matches; - - s = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so); + s = metafy(lhstr + m->rm_so, m->rm_eo - m->rm_so, META_DUP); setsparam("MATCH", s); /* * Count the characters before the match. @@ -174,12 +179,16 @@ zcond_regex_match(char **a, int id) break; default: DPUTS(1, "bad regex option"); - - return 0; /* nothing to cleanup, especially not "re". */ + return_value = 0; + goto CLEAN_BASEMETA; } if (matches) zfree(matches, matchessz); regfree(&re); +CLEAN_BASEMETA: + free(lhstr); + free(rhre); return return_value; } -----BEGIN PGP SIGNATURE----- iEYEAREDAAYFAk/b1tIACgkQQDBDFTkDY39n9gCeLnbvIM3fpndE0GaNOGdN338s u1cAn064nG9fOcKq80Zf2Dg5IwL/O5R7 =Q7/2 -----END PGP SIGNATURE-----