* support negative LEN in ${VAR:OFFSET:LEN}
@ 2011-03-10 11:10 Mikael Magnusson
2011-03-10 15:19 ` Bart Schaefer
0 siblings, 1 reply; 6+ messages in thread
From: Mikael Magnusson @ 2011-03-10 11:10 UTC (permalink / raw)
To: zsh workers
bash 4.2 adds support for this.
http://cgit.mika.l3ib.org/cgit/zsh-cvs/patch/?id=3950051089ce1c87679abf0335edc16fd9289878
From: Mikael Magnusson <mikachu@gmail.com>
Date: Thu, 10 Mar 2011 10:56:19 +0100
Subject: [PATCH] support LEN < 0 in ${VAR:OFFSET:LEN}, new in bash 4.2
---
Doc/Zsh/expn.yo | 12 ++++++++----
Src/subst.c | 36 +++++++++++++++++++++++++++---------
Test/D04parameter.ztst | 14 ++++++++++++++
3 files changed, 49 insertions(+), 13 deletions(-)
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index 9c44913..b48ddd9 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -588,7 +588,7 @@ remove the non-matched elements).
xitem(tt(${)var(name)tt(:)var(offset)tt(}))
item(tt(${)var(name)tt(:)var(offset)tt(:)var(length)tt(}))(
This syntax gives effects similar to parameter subscripting
-in the form tt($)var(name)tt({)var(start)tt(,)var(end)tt(}), but is
+in the form tt($)var(name)tt([)var(start)tt(,)var(end)tt(]), but is
compatible with other shells; note that both var(offset) and var(length)
are interpreted differently from the components of a subscript.
@@ -608,8 +608,12 @@ the option tt(KSH_ARRAYS).
A negative offset counts backwards from the end of the scalar or array,
so that -1 corresponds to the last character or element, and so on.
-var(length) is always treated directly as a length and hence may not be
-negative. The option tt(MULTIBYTE) is obeyed, i.e. the offset and length
+var(length) is treated directly as a length when it is positive.
+When it is negative, it works as an offset just like var(offset). If
+this results in a negative length, a diagnostic will be printed and
+nothing will be substituted.
+
+The option tt(MULTIBYTE) is obeyed, i.e. the offset and length
count multibyte characters where appropriate.
var(offset) and var(length) undergo the same set of shell substitutions
@@ -635,7 +639,7 @@ tt(${)var(name)tt(:-)var(word)tt(}) form of
substitution. Instead, a space
may be inserted before the tt(-). Furthermore, neither var(offset) nor
var(length) may begin with an alphabetic character or tt(&) as these are
used to indicate history-style modifiers. To substitute a value from a
-variable, the recommended approach is to proceed it with a tt($) as this
+variable, the recommended approach is to precede it with a tt($) as this
signifies the intention (parameter substitution can easily be rendered
unreadable); however, as arithmetic substitution is performed, the
expression tt(${var: offs}) does work, retrieving the offset from
diff --git a/Src/subst.c b/Src/subst.c
index 377aba8..141a353 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -2799,7 +2799,8 @@ paramsubst(LinkList l, LinkNode n, char **str,
int qt, int ssub)
char *check_offset = check_colon_subscript(s, &check_offset2);
if (check_offset) {
zlong offset = mathevali(check_offset);
- zlong length = (zlong)-1;
+ zlong length;
+ int length_set = 0;
int offset_hack_argzero = 0;
if (errflag)
return NULL;
@@ -2814,14 +2815,11 @@ paramsubst(LinkList l, LinkNode n, char **str,
int qt, int ssub)
zerr("invalid length: %s", check_offset);
return NULL;
}
- if (check_offset) {
+ if (check_offset) {
length = mathevali(check_offset);
+ length_set = 1;
if (errflag)
return NULL;
- if (length < (zlong)0) {
- zerr("invalid length: %s", check_offset);
- return NULL;
- }
}
}
if (horrible_offset_hack) {
@@ -2849,8 +2847,15 @@ paramsubst(LinkList l, LinkNode n, char **str,
int qt, int ssub)
}
if (offset_hack_argzero)
alen++;
- if (length < 0)
- length = alen;
+ if (length_set) {
+ if (length < 0)
+ length += alen - offset;
+ if (length < 0) {
+ zerr("substring expression < 0: %d", length);
+ return NULL;
+ }
+ } else
+ length = alen;
if (offset > alen)
offset = alen;
if (offset + length > alen)
@@ -2879,11 +2884,24 @@ paramsubst(LinkList l, LinkNode n, char **str,
int qt, int ssub)
offset = 0;
}
MB_METACHARINIT();
+ if (length_set && length < 0)
+ length -= offset;
for (sptr = val; *sptr && offset; ) {
sptr += MB_METACHARLEN(sptr);
offset--;
}
- if (length >= 0) {
+ if (length_set) {
+ if (length < 0) {
+ MB_METACHARINIT();
+ for (eptr = val; *eptr; ) {
+ eptr += MB_METACHARLEN(eptr);
+ length++;
+ }
+ if (length < 0) {
+ zerr("substring expression < 0: %d", length);
+ return NULL;
+ }
+ }
for (eptr = sptr; *eptr && length; ) {
eptr += MB_METACHARLEN(eptr);
length--;
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index c0ad1d2..272fb87 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -1338,6 +1338,7 @@
print ${foo:$(echo 3 + 3):`echo 4 - 3`}
print ${foo: -1}
print ${foo: -10}
+ print ${foo:5:-2}
0:Bash-style offsets, scalar
>456789
>56789
@@ -1349,6 +1350,7 @@
>7
>9
>123456789
+>67
foo=(1 2 3 4 5 6 7 8 9)
print ${foo:3}
@@ -1361,6 +1363,7 @@
print ${foo:$(echo 3 + 3):`echo 4 - 3`}
print ${foo: -1}
print ${foo: -10}
+ print ${foo:5:-2}
0:Bash-style offsets, array
>4 5 6 7 8 9
>5 6 7 8 9
@@ -1372,6 +1375,7 @@
>7
>9
>1 2 3 4 5 6 7 8 9
+>6 7
testfn() {
emulate -L sh
@@ -1410,3 +1414,13 @@
print ${str:0:}
1:Regression test for missing length after offset
?(eval):2: unrecognized modifier
+
+ foo="123456789"
+ print ${foo:5:-6}
+1:Regression test for total length < 0 in string
+?(eval):2: substring expression < 0: -2
+
+ foo=(1 2 3 4 5 6 7 8 9)
+ print ${foo:5:-6}
+1:Regression test for total length < 0 in array
+?(eval):2: substring expression < 0: -2
--
1.7.4-rc1
--
Mikael Magnusson
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: support negative LEN in ${VAR:OFFSET:LEN}
2011-03-10 11:10 support negative LEN in ${VAR:OFFSET:LEN} Mikael Magnusson
@ 2011-03-10 15:19 ` Bart Schaefer
2011-03-10 15:31 ` Mikael Magnusson
0 siblings, 1 reply; 6+ messages in thread
From: Bart Schaefer @ 2011-03-10 15:19 UTC (permalink / raw)
To: zsh workers
On Mar 10, 12:10pm, Mikael Magnusson wrote:
} Subject: support negative LEN in ${VAR:OFFSET:LEN}
}
} +var(length) is treated directly as a length when it is positive.
} +When it is negative, it works as an offset just like var(offset). If
} +this results in a negative length, a diagnostic will be printed and
} +nothing will be substituted.
I don't object to adding the feature, but that documentation is a bit
confusing. Also, when writing documentation, it's almost always better
to avoid passive phrasing like "x will be y". Perhaps:
When positive, var(length) counts from the var(offset) position
toward the end of the scalar or array. When negative, var(length)
counts back from the end. If this results in a position smaller
than var(offset), a diagnostic is printed and nothing is substituted.
The equivalent $var[start,end] expression would not print a diagnostic.
Does bash really work that way? That is, you have to know the length
of the string in order to safely count backwards from the end of it?
} + foo="123456789"
} + print ${foo:5:-6}
} +1:Regression test for total length < 0 in string
} +?(eval):2: substring expression < 0: -2
Is that what bash's diagnostic looks like? If so we should borrow
consistently, but it'd be a lot clearer if it said
substring expression: 3 < 5
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: support negative LEN in ${VAR:OFFSET:LEN}
2011-03-10 15:19 ` Bart Schaefer
@ 2011-03-10 15:31 ` Mikael Magnusson
2011-03-16 15:04 ` Mikael Magnusson
0 siblings, 1 reply; 6+ messages in thread
From: Mikael Magnusson @ 2011-03-10 15:31 UTC (permalink / raw)
To: Bart Schaefer; +Cc: zsh workers
On 10 March 2011 16:19, Bart Schaefer <schaefer@brasslantern.com> wrote:
> On Mar 10, 12:10pm, Mikael Magnusson wrote:
> } Subject: support negative LEN in ${VAR:OFFSET:LEN}
> }
> } +var(length) is treated directly as a length when it is positive.
> } +When it is negative, it works as an offset just like var(offset). If
> } +this results in a negative length, a diagnostic will be printed and
> } +nothing will be substituted.
>
> I don't object to adding the feature, but that documentation is a bit
> confusing. Also, when writing documentation, it's almost always better
> to avoid passive phrasing like "x will be y". Perhaps:
>
> When positive, var(length) counts from the var(offset) position
> toward the end of the scalar or array. When negative, var(length)
> counts back from the end. If this results in a position smaller
> than var(offset), a diagnostic is printed and nothing is substituted.
That's fine by me, I didn't spend a lot of time thinking about that phrasing :).
> The equivalent $var[start,end] expression would not print a diagnostic.
> Does bash really work that way? That is, you have to know the length
> of the string in order to safely count backwards from the end of it?
>
> } + foo="123456789"
> } + print ${foo:5:-6}
> } +1:Regression test for total length < 0 in string
> } +?(eval):2: substring expression < 0: -2
>
> Is that what bash's diagnostic looks like? If so we should borrow
> consistently, but it'd be a lot clearer if it said
>
> substring expression: 3 < 5
It's almost the same,
$ echo ${PATH: -20:-30}
bash: -30: substring expression < 0
% echo ${PATH: -20:-30}
zsh: substring expression < 0: -10
Well, it seems bash simply prints the given length, while I print the
resulting length. I can change this if you want?
Interestingly (well, not really), the feature doesn't work on arrays in bash:
$ set a b c
$ echo ${*:0:2}
bash a
$ echo ${*:0:-2}
bash: -2: substring expression < 0
% echo ${*:0:-2}
zsh a
--
Mikael Magnusson
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: support negative LEN in ${VAR:OFFSET:LEN}
2011-03-10 15:31 ` Mikael Magnusson
@ 2011-03-16 15:04 ` Mikael Magnusson
2011-03-16 16:21 ` Bart Schaefer
0 siblings, 1 reply; 6+ messages in thread
From: Mikael Magnusson @ 2011-03-16 15:04 UTC (permalink / raw)
To: zsh workers
Is someone waiting for me to do something with this? If so, I'm not
exactly sure what.
--
Mikael Magnusson
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: support negative LEN in ${VAR:OFFSET:LEN}
2011-03-16 15:04 ` Mikael Magnusson
@ 2011-03-16 16:21 ` Bart Schaefer
2011-05-11 15:39 ` PATCH: " Mikael Magnusson
0 siblings, 1 reply; 6+ messages in thread
From: Bart Schaefer @ 2011-03-16 16:21 UTC (permalink / raw)
To: zsh workers
On Mar 16, 4:04pm, Mikael Magnusson wrote:
}
} Is someone waiting for me to do something with this? If so, I'm not
} exactly sure what.
Sorry, I was really sick all weekend and really busy the last couple
of days.
I'd prefer either a more helpful error message or zsh's usual behavior
of treating it as NOT an error (e.g. just returning an empty string)
when the end position is less than the start, but I'd also prefer if
someone else (hi, PWS?) were on record as [dis]agreeing with me before
a choice is made.
^ permalink raw reply [flat|nested] 6+ messages in thread
* PATCH: support negative LEN in ${VAR:OFFSET:LEN}
2011-03-16 16:21 ` Bart Schaefer
@ 2011-05-11 15:39 ` Mikael Magnusson
0 siblings, 0 replies; 6+ messages in thread
From: Mikael Magnusson @ 2011-05-11 15:39 UTC (permalink / raw)
To: zsh-workers
Updated with your docs and error messages.
% echo ${path:10:-5}
zsh: substring expression: 8 < 10
% echo ${PATH:150:-150}
zsh: substring expression: 85 < 150
---
Doc/Zsh/expn.yo | 12 ++++++++----
Src/subst.c | 40 +++++++++++++++++++++++++++++++---------
Test/D04parameter.ztst | 14 ++++++++++++++
3 files changed, 53 insertions(+), 13 deletions(-)
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index 3a1c372..5d69dbd 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -588,7 +588,7 @@ remove the non-matched elements).
xitem(tt(${)var(name)tt(:)var(offset)tt(}))
item(tt(${)var(name)tt(:)var(offset)tt(:)var(length)tt(}))(
This syntax gives effects similar to parameter subscripting
-in the form tt($)var(name)tt({)var(start)tt(,)var(end)tt(}), but is
+in the form tt($)var(name)tt([)var(start)tt(,)var(end)tt(]), but is
compatible with other shells; note that both var(offset) and var(length)
are interpreted differently from the components of a subscript.
@@ -608,8 +608,12 @@ the option tt(KSH_ARRAYS).
A negative offset counts backwards from the end of the scalar or array,
so that -1 corresponds to the last character or element, and so on.
-var(length) is always treated directly as a length and hence may not be
-negative. The option tt(MULTIBYTE) is obeyed, i.e. the offset and length
+When positive, var(length) counts from the var(offset) position
+toward the end of the scalar or array. When negative, var(length)
+counts back from the end. If this results in a position smaller
+than var(offset), a diagnostic is printed and nothing is substituted.
+
+The option tt(MULTIBYTE) is obeyed, i.e. the offset and length
count multibyte characters where appropriate.
var(offset) and var(length) undergo the same set of shell substitutions
@@ -635,7 +639,7 @@ tt(${)var(name)tt(:-)var(word)tt(}) form of substitution. Instead, a space
may be inserted before the tt(-). Furthermore, neither var(offset) nor
var(length) may begin with an alphabetic character or tt(&) as these are
used to indicate history-style modifiers. To substitute a value from a
-variable, the recommended approach is to proceed it with a tt($) as this
+variable, the recommended approach is to precede it with a tt($) as this
signifies the intention (parameter substitution can easily be rendered
unreadable); however, as arithmetic substitution is performed, the
expression tt(${var: offs}) does work, retrieving the offset from
diff --git a/Src/subst.c b/Src/subst.c
index 723bb25..ce4dbe6 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -2834,7 +2834,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
char *check_offset = check_colon_subscript(s, &check_offset2);
if (check_offset) {
zlong offset = mathevali(check_offset);
- zlong length = (zlong)-1;
+ zlong length;
+ int length_set = 0;
int offset_hack_argzero = 0;
if (errflag)
return NULL;
@@ -2849,14 +2850,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
zerr("invalid length: %s", check_offset);
return NULL;
}
- if (check_offset) {
+ if (check_offset) {
length = mathevali(check_offset);
+ length_set = 1;
if (errflag)
return NULL;
- if (length < (zlong)0) {
- zerr("invalid length: %s", check_offset);
- return NULL;
- }
}
}
if (horrible_offset_hack) {
@@ -2884,8 +2882,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
}
if (offset_hack_argzero)
alen++;
- if (length < 0)
- length = alen;
+ if (length_set) {
+ if (length < 0)
+ length += alen - offset;
+ if (length < 0) {
+ zerr("substring expression: %d < %d",
+ length + offset, offset);
+ return NULL;
+ }
+ } else
+ length = alen;
if (offset > alen)
offset = alen;
if (offset + length > alen)
@@ -2904,6 +2910,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
aval = newarr;
} else {
char *sptr, *eptr;
+ int given_offset;
if (offset < 0) {
MB_METACHARINIT();
for (sptr = val; *sptr; ) {
@@ -2913,12 +2920,27 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
if (offset < 0)
offset = 0;
}
+ given_offset = offset;
MB_METACHARINIT();
+ if (length_set && length < 0)
+ length -= offset;
for (sptr = val; *sptr && offset; ) {
sptr += MB_METACHARLEN(sptr);
offset--;
}
- if (length >= 0) {
+ if (length_set) {
+ if (length < 0) {
+ MB_METACHARINIT();
+ for (eptr = val; *eptr; ) {
+ eptr += MB_METACHARLEN(eptr);
+ length++;
+ }
+ if (length < 0) {
+ zerr("substring expression: %d < %d",
+ length + given_offset, given_offset);
+ return NULL;
+ }
+ }
for (eptr = sptr; *eptr && length; ) {
eptr += MB_METACHARLEN(eptr);
length--;
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 3646245..b91caaa 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -1346,6 +1346,7 @@
print ${foo:$(echo 3 + 3):`echo 4 - 3`}
print ${foo: -1}
print ${foo: -10}
+ print ${foo:5:-2}
0:Bash-style offsets, scalar
>456789
>56789
@@ -1357,6 +1358,7 @@
>7
>9
>123456789
+>67
foo=(1 2 3 4 5 6 7 8 9)
print ${foo:3}
@@ -1369,6 +1371,7 @@
print ${foo:$(echo 3 + 3):`echo 4 - 3`}
print ${foo: -1}
print ${foo: -10}
+ print ${foo:5:-2}
0:Bash-style offsets, array
>4 5 6 7 8 9
>5 6 7 8 9
@@ -1380,6 +1383,7 @@
>7
>9
>1 2 3 4 5 6 7 8 9
+>6 7
testfn() {
emulate -L sh
@@ -1418,3 +1422,13 @@
print ${str:0:}
1:Regression test for missing length after offset
?(eval):2: unrecognized modifier
+
+ foo="123456789"
+ print ${foo:5:-6}
+1:Regression test for total length < 0 in string
+?(eval):2: substring expression: 3 < 5
+
+ foo=(1 2 3 4 5 6 7 8 9)
+ print ${foo:5:-6}
+1:Regression test for total length < 0 in array
+?(eval):2: substring expression: 3 < 5
--
1.7.4-rc1
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2011-05-11 15:39 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-03-10 11:10 support negative LEN in ${VAR:OFFSET:LEN} Mikael Magnusson
2011-03-10 15:19 ` Bart Schaefer
2011-03-10 15:31 ` Mikael Magnusson
2011-03-16 15:04 ` Mikael Magnusson
2011-03-16 16:21 ` Bart Schaefer
2011-05-11 15:39 ` PATCH: " Mikael Magnusson
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/zsh/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).