From: Bart Schaefer <schaefer@brasslantern.com>
To: Zsh hackers list <zsh-workers@zsh.org>
Subject: Re: a='foo"'; echo ${a/foo"/"bar} outputs bar
Date: Tue, 3 Oct 2023 21:20:27 -0700 [thread overview]
Message-ID: <CAH+w=7a-_V27oJyxa9ZVQof8217xUCNOXXwPARqNxh4tiyM-1g@mail.gmail.com> (raw)
In-Reply-To: <CAH+w=7aoQsn5BeiWG3+RmsgoUU0aQxsDCJX6=JOubbexPDxYzg@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 2020 bytes --]
On Sun, Aug 6, 2023 at 7:36 PM Bart Schaefer <schaefer@brasslantern.com> wrote:
>
> Circling back to this ...
... and again ...
> With an increased understanding of lex.c gained by stepping through
> variations of ${|...} a thousand times, I think I have identified two
> possible ways to address this.
Turns out there's a fundamental problem with ${var/pat/repl}, namely
that upon reaching paramsubst() the only recognized quoting of the
slash character is to precede it with a backslash. Conversely,
gettokstr() recognizes '...', "...", and $'...' quoting, and may
return STRING tokens containing backslashes that paramsubst() won't be
able to deal with.
Other things that have never worked are e.g. ${a/${b/c/d}/e}, which
gives bad substitution, and ${a/"${b/c/d}"/e} which AFAICT always
substitutes ${a} with no replacements, and ${a/"$( /bin/echo d )"/e}
which is bad pattern: "$(. Fixing this properly would require doing a
full-fledged parse inside paramsubst(), rather than just a search for
unquoted backslashes.
Several of my attempts so far have caused at least Test/Y01 to fail in
comptesteval, so there is evidence for dependency on the existing
behavior.
> Something similar would have to be done with single quotes
I had an idea here that seems to work: while reading single-quoted
strings in gettokstr(), recognize when we're in a pattern and replace
all / with \/. That only fixes '...' and $'...' at best, though.
So, back to workers/51202, my original reply on this thread: handle
double-quotes in paramsubst(), leaving single quotes to gettokstr().
This still isn't perfect (see above regarding ${a/${b/c/d}/e}) but it
fixes the two Xfail tests in D04parameter and handles most of the
straightforward cases, including
a='/x/y/z' zsh -fc 'echo "${a/"/y/"/+}"'
and even the ${a/"${b/c/d}"/e} example, although it'll break on any
more double quotes inside other double quotes.
Other existing tests still pass. Let me know what you think.
[-- Attachment #2: subst-pat-quote.txt --]
[-- Type: text/plain, Size: 3337 bytes --]
diff --git a/Src/lex.c b/Src/lex.c
index 33b17cc95..31b130b07 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -938,7 +938,7 @@ gettokstr(int c, int sub)
{
int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
int intpos = 1, in_brace_param = 0, cmdsubst = 0;
- int inquote, unmatched = 0;
+ int inquote, unmatched = 0, in_pattern = 0;
enum lextok peek;
#ifdef DEBUG
int ocmdsp = cmdsp;
@@ -1160,7 +1160,7 @@ gettokstr(int c, int sub)
if (bct-- == in_brace_param) {
if (cmdsubst)
cmdpop();
- in_brace_param = cmdsubst = 0;
+ in_brace_param = cmdsubst = in_pattern = 0;
}
c = Outbrace;
break;
@@ -1309,7 +1309,8 @@ gettokstr(int c, int sub)
lexbuf.ptr--, lexbuf.len--;
else
break;
- }
+ } else if (in_pattern && c == '/')
+ add(Bnull);
add(c);
}
ALLOWHIST
@@ -1397,26 +1398,36 @@ gettokstr(int c, int sub)
*/
c = Dash;
break;
- case LX2_BANG:
- /*
- * Same logic as Dash, for ! to perform negation in range.
- */
- if (seen_brct)
- c = Bang;
- else
- c = '!';
- }
- add(c);
- c = hgetc();
- if (intpos)
+ case LX2_BANG:
+ /*
+ * Same logic as Dash, for ! to perform negation in range.
+ */
+ if (seen_brct)
+ c = Bang;
+ else
+ c = '!';
+ case LX2_OTHER:
+ if (in_brace_param) {
+ if (c == '/') {
+ if (in_pattern == 0)
+ in_pattern = 2;
+ else
+ --in_pattern;
+ }
+ }
+ }
+ add(c);
+ c = hgetc();
+ if (intpos)
intpos--;
- if (lexstop)
+ if (lexstop)
break;
- if (!cmdsubst && in_brace_param && act == LX2_STRING &&
- (c == '|' || c == Bar || inblank(c))) {
- cmdsubst = in_brace_param;
- cmdpush(CS_CURSH);
- }
+ if (!cmdsubst && in_brace_param && act == LX2_STRING &&
+ (c == '|' || c == Bar || inblank(c))) {
+ cmdsubst = in_brace_param;
+ cmdpush(CS_CURSH);
+ } else if (in_pattern == 2 && c != '/')
+ in_pattern = 1;
}
brk:
if (errflag) {
diff --git a/Src/subst.c b/Src/subst.c
index cdbfc138a..419069f5a 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -3088,6 +3088,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
chuck(ptr);
else
ptr++;
+ } else if (c == Dnull) {
+ chuck(ptr);
+ while (*ptr && *ptr != c)
+ ptr++;
+ if (*ptr == Dnull)
+ chuck(ptr);
+ ptr--; /* Outer loop is about to increment */
}
}
replstr = (*ptr && ptr[1]) ? ptr+1 : "";
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index c2008582c..69a4fd3ec 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -2762,13 +2762,13 @@ F:behavior, see http://austingroupbugs.net/view.php?id=888
slash='/'
print -r -- x${slash/'/'}y
--Df:(users/28784) substituting a single-quoted backslash, part #1: slash
+0:(users/28784) substituting a single-quoted backslash, part #1: slash
>xy
single_quote="'"
- print -r -- x${single_quote/'/'}y
--Df:(users/28784) substituting a single-quoted backslash, part #2: single quote
->x/y
+ print -r -- x${single_quote/$'/'}y
+0:(users/28784) substituting a single-quoted backslash, part #2: single quote
+>x'y
control="foobar"
print -r -- x${control/'bar'}y
next prev parent reply other threads:[~2023-10-04 4:21 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-12-11 17:50 Stephane Chazelas
2022-12-13 6:21 ` Bart Schaefer
2023-08-07 2:36 ` Bart Schaefer
2023-10-04 4:20 ` Bart Schaefer [this message]
2023-10-17 16:44 ` Bart Schaefer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAH+w=7a-_V27oJyxa9ZVQof8217xUCNOXXwPARqNxh4tiyM-1g@mail.gmail.com' \
--to=schaefer@brasslantern.com \
--cc=zsh-workers@zsh.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/zsh/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).