zsh-workers
 help / color / mirror / code / Atom feed
From: "Jun T." <takimoto-j@kba.biglobe.ne.jp>
To: zsh-workers@zsh.org
Subject: Re: Strange behavior of [[
Date: Fri, 8 Jan 2016 22:09:28 +0900	[thread overview]
Message-ID: <9BF380A3-CAEB-46FB-8598-4E80DF45E79D@kba.biglobe.ne.jp> (raw)
In-Reply-To: <5577AE8F.6060902@arthaud.me>

pcre.c has the same problem:

% setopt re_match_pcre
% [[ $'\ua0' =~ . ]] && echo OK
(zsh hangs; 100% CPU usage)

The following is a copy of the patch to regex.c in workers/35448.
Also added a simple test in V07pcre.ztst.

diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c
index 2393cd1..aa5c8ed 100644
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -190,18 +190,25 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
 	if (want_begin_end) {
 	    char *ptr = arg;
 	    zlong offs = 0;
+	    int clen, leftlen;
 
 	    /* Count the characters before the match */
-	    MB_METACHARINIT();
-	    while (ptr < arg + ovec[0]) {
+	    MB_CHARINIT();
+	    leftlen = ovec[0];
+	    while (leftlen) {
 		offs++;
-		ptr += MB_METACHARLEN(ptr);
+		clen = MB_CHARLEN(ptr, leftlen);
+		ptr += clen;
+		leftlen -= clen;
 	    }
 	    setiparam("MBEGIN", offs + !isset(KSHARRAYS));
 	    /* Add on the characters in the match */
-	    while (ptr < arg + ovec[1]) {
+	    leftlen = ovec[1] - ovec[0];
+	    while (leftlen) {
 		offs++;
-		ptr += MB_METACHARLEN(ptr);
+		clen = MB_CHARLEN(ptr, leftlen);
+		ptr += clen;
+		leftlen -= clen;
 	    }
 	    setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
 	    if (nelem) {
@@ -219,17 +226,23 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
 		    ptr = arg;
 		    offs = 0;
 		    /* Find the start offset */
-		    MB_METACHARINIT();
-		    while (ptr < arg + ipair[0]) {
+		    MB_CHARINIT();
+		    leftlen = ipair[0];
+		    while (leftlen) {
 			offs++;
-			ptr += MB_METACHARLEN(ptr);
+			clen = MB_CHARLEN(ptr, leftlen);
+			ptr += clen;
+			leftlen -= clen;
 		    }
 		    convbase(buf, offs + !isset(KSHARRAYS), 10);
 		    *bptr = ztrdup(buf);
 		    /* Continue to the end offset */
-		    while (ptr < arg + ipair[1]) {
+		    leftlen = ipair[1] - ipair[0];
+		    while (leftlen) {
 			offs++;
-			ptr += MB_METACHARLEN(ptr);
+			clen = MB_CHARLEN(ptr, leftlen);
+			ptr += clen;
+			leftlen -= clen;
 		    }
 		    convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
 		    *eptr = ztrdup(buf);
diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst
index ddfd3f5..3907756 100644
--- a/Test/V07pcre.ztst
+++ b/Test/V07pcre.ztst
@@ -37,6 +37,17 @@
 >o→b
 >→
 
+  unset match mend
+  s=$'\u00a0'
+  [[ $s =~ '^.$' ]] && print OK
+  [[ A${s}B =~ .(.). && $match[1] == $s ]] && print OK
+  [[ A${s}${s}B =~ A([^[:ascii:]]*)B && $mend[1] == 3 ]] && print OK
+  unset s
+0:Raw IMETA characters in input string
+>OK
+>OK
+>OK
+
   [[ foo =~ f.+ ]] ; print $?
   [[ foo =~ x.+ ]] ; print $?
   [[ ! foo =~ f.+ ]] ; print $?



  parent reply	other threads:[~2016-01-08 13:09 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-10  3:27 Maxime Arthaud
2015-06-10  5:31 ` Bart Schaefer
2015-06-10  8:55   ` Peter Stephenson
2015-06-11 16:59     ` Peter Stephenson
2015-06-22 15:56       ` m0viefreak
2015-06-22 16:29         ` Peter Stephenson
2016-01-08 13:09 ` Jun T. [this message]
2016-04-23  9:51   ` Segfault with PCRE (Re: Strange behavior of [[) Mikael Berthe
2016-04-23 21:22     ` Bart Schaefer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9BF380A3-CAEB-46FB-8598-4E80DF45E79D@kba.biglobe.ne.jp \
    --to=takimoto-j@kba.biglobe.ne.jp \
    --cc=zsh-workers@zsh.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).