zsh-workers
 help / color / mirror / code / Atom feed
From: Phil Pennock <zsh-workers+phil.pennock@spodhuis.org>
To: zsh-workers@zsh.org
Subject: [PATCH] Repair BASH_REMATCH with no substrings
Date: Sun, 13 Aug 2017 18:18:06 -0400	[thread overview]
Message-ID: <20170813221806.GA19107@breadbox.private.spodhuis.org> (raw)
In-Reply-To: <20170813211225.GB98824@tower.spodhuis.org>

[-- Attachment #1: Type: text/plain, Size: 7018 bytes --]

On 2017-08-13 at 17:12 -0400, Phil Pennock wrote:
> Definitely; this is a regression from my NUL fixing and trying to
> correctly meta/unmeta all parameters going through.  Change 41308 in
> commit 825f84c77 exposed a bug introduced in 2011 in commit 2f3c16d40f.

From 41815a3b5e7324fa188d24f558ea9b0026a1a110 Mon Sep 17 00:00:00 2001
From: Phil Pennock <phil@pennock-tech.com>
Date: Sun, 13 Aug 2017 18:13:41 -0400
Subject: [PATCH] Repair BASH_REMATCH with no substrings

Change 41308 in commit 825f84c77 exposed a bug introduced in 2011 in
commit 2f3c16d40f.  (Both mine).

When we went off the end of the array but measured the length
implicitly, we got lucky before.  After 41308 we were looking up lengths
in stale memory.

Rename some variables, clean up the logic, be easier to understand.

Add tests.
---
 Src/Modules/pcre.c | 68 ++++++++++++++++++++++++++++++++++--------------------
 Test/V07pcre.ztst  | 24 +++++++++++++++++++
 2 files changed, 67 insertions(+), 25 deletions(-)

diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c
index 27191d709..659fd22d5 100644
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -148,7 +148,7 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f
 
 /**/
 static int
-zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
+zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar,
 		     char *substravar, int want_offset_pair, int matchedinarr,
 		     int want_begin_end)
 {
@@ -156,15 +156,13 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
     char offset_all[50];
     int capture_start = 1;
 
-    if (matchedinarr)
+    if (matchedinarr) {
+	/* bash-style captures[0] entire-matched string in the array */
 	capture_start = 0;
-    if (matchvar == NULL)
-	matchvar = "MATCH";
-    if (substravar == NULL)
-	substravar = "match";
-    
+    }
+
     /* captures[0] will be entire matched string, [1] first substring */
-    if (!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
+    if (!pcre_get_substring_list(arg, ovec, captured_count, (const char ***)&captures)) {
 	int nelem = arrlen(captures)-1;
 	/* Set to the offsets of the complete match */
 	if (want_offset_pair) {
@@ -176,30 +174,43 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
 	 * difference between the two values in each paired entry in ovec.
 	 * ovec is length 2*(1+capture_list_length)
 	 */
-	match_all = metafy(captures[0], ovec[1] - ovec[0], META_DUP);
-	setsparam(matchvar, match_all);
+	if (matchvar) {
+	    match_all = metafy(captures[0], ovec[1] - ovec[0], META_DUP);
+	    setsparam(matchvar, match_all);
+	}
 	/*
 	 * If we're setting match, mbegin, mend we only do
 	 * so if there were parenthesised matches, for consistency
-	 * (c.f. regex.c).
+	 * (c.f. regex.c).  That's the next block after this one.
+	 * Here we handle the simpler case where we don't worry about
+	 * Unicode lengths, etc.
+	 * Either !want_begin_end (ie, this is bash) or nelem; if bash
+	 * then we're invoked always, even without nelem results, to
+	 * set the array variable with one element in it, the complete match.
 	 */
-	if (!want_begin_end || nelem) {
+	if (substravar &&
+	    (!want_begin_end || nelem)) {
 	    char **x, **y;
-	    int vec_off;
+	    int vec_off, i;
 	    y = &captures[capture_start];
-	    matches = x = (char **) zalloc(sizeof(char *) * (arrlen(y) + 1));
-	    vec_off = 2;
-	    do {
+	    matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start));
+	    for (i = capture_start; i < captured_count; i++, y++) {
+		vec_off = 2*i;
 		if (*y)
 		    *x++ = metafy(*y, ovec[vec_off+1]-ovec[vec_off], META_DUP);
 		else
 		    *x++ = NULL;
-		vec_off += 2;
-	    } while (*y++);
+	    }
+	    *x = NULL;
 	    setaparam(substravar, matches);
 	}
 
 	if (want_begin_end) {
+	    /*
+	     * cond-infix rather than builtin; also not bash; so we set a bunch
+	     * of variables and arrays to values which require handling Unicode
+	     * lengths
+	     */
 	    char *ptr = arg;
 	    zlong offs = 0;
 	    int clen, leftlen;
@@ -306,7 +317,9 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
 	zwarnnam(nam, "no pattern has been compiled");
 	return 1;
     }
-    
+
+    matched_portion = "MATCH";
+    receptacle = "match";
     if(OPT_HASARG(ops,c='a')) {
 	receptacle = OPT_ARG(ops,c);
     }
@@ -318,8 +331,8 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
 	    return 1;
     }
     /* For the entire match, 'Return' the offset byte positions instead of the matched string */
-    if(OPT_ISSET(ops,'b')) want_offset_pair = 1; 
-    
+    if(OPT_ISSET(ops,'b')) want_offset_pair = 1;
+
     if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount)))
     {
 	zwarnnam(nam, "error %d in fullinfo", ret);
@@ -360,7 +373,7 @@ cond_pcre_match(char **a, int id)
 {
     pcre *pcre_pat;
     const char *pcre_err;
-    char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar=NULL;
+    char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar, *svar;
     int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;
     int lhstr_plain_len, rhre_plain_len;
     int return_value = 0;
@@ -380,8 +393,13 @@ cond_pcre_match(char **a, int id)
     ov = NULL;
     ovsize = 0;
 
-    if (isset(BASHREMATCH))
-	avar="BASH_REMATCH";
+    if (isset(BASHREMATCH)) {
+	svar = NULL;
+	avar = "BASH_REMATCH";
+    } else {
+	svar = "MATCH";
+	avar = "match";
+    }
 
     switch(id) {
 	 case CPCRE_PLAIN:
@@ -414,7 +432,7 @@ cond_pcre_match(char **a, int id)
 		    break;
 		}
                 else if (r>0) {
-		    zpcre_get_substrings(lhstr_plain, ov, r, NULL, avar, 0,
+		    zpcre_get_substrings(lhstr_plain, ov, r, svar, avar, 0,
 					 isset(BASHREMATCH),
 					 !isset(BASHREMATCH));
 		    return_value = 1;
diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst
index ab41d33dc..9feeb47fb 100644
--- a/Test/V07pcre.ztst
+++ b/Test/V07pcre.ztst
@@ -142,9 +142,33 @@
   print $?
   [[ foo -pcre-match ^g..$ ]]
   print $?
+  [[ ! foo -pcre-match ^g..$ ]]
+  print $?
 0:infix -pcre-match works
 >0
 >1
+>0
+
+# Bash mode; note zsh documents that variables not updated on match failure,
+# which remains different from bash
+  setopt bash_rematch
+  [[ "goo" -pcre-match ^f.+$ ]] ; print $?
+  [[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]}
+  [[ "foot" -pcre-match ^f([aeiou]+)(.)$ ]]; print -l $? _${^BASH_REMATCH[@]}
+  [[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]}
+  [[ ! "goo" -pcre-match ^f.+$ ]] ; print $?
+  unsetopt bash_rematch
+0:bash-compatibility works
+>1
+>0
+>_foo
+>0
+>_foot
+>_oo
+>_t
+>0
+>_foo
+>0
 
 # Subshell because crash on failure
   ( setopt re_match_pcre
-- 
2.14.1


[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 996 bytes --]

      reply	other threads:[~2017-08-13 22:18 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-13 20:49 5.4.1 regression: PCRE with bash_rematch Phil Pennock
2017-08-13 21:12 ` Phil Pennock
2017-08-13 22:18   ` Phil Pennock [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170813221806.GA19107@breadbox.private.spodhuis.org \
    --to=zsh-workers+phil.pennock@spodhuis.org \
    --cc=zsh-workers@zsh.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).