zsh-workers
 help / color / mirror / code / Atom feed
* support negative LEN in ${VAR:OFFSET:LEN}
@ 2011-03-10 11:10 Mikael Magnusson
  2011-03-10 15:19 ` Bart Schaefer
  0 siblings, 1 reply; 6+ messages in thread
From: Mikael Magnusson @ 2011-03-10 11:10 UTC (permalink / raw)
  To: zsh workers

bash 4.2 adds support for this.

http://cgit.mika.l3ib.org/cgit/zsh-cvs/patch/?id=3950051089ce1c87679abf0335edc16fd9289878

From: Mikael Magnusson <mikachu@gmail.com>
Date: Thu, 10 Mar 2011 10:56:19 +0100
Subject: [PATCH] support LEN < 0 in ${VAR:OFFSET:LEN}, new in bash 4.2

---
 Doc/Zsh/expn.yo        |   12 ++++++++----
 Src/subst.c            |   36 +++++++++++++++++++++++++++---------
 Test/D04parameter.ztst |   14 ++++++++++++++
 3 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index 9c44913..b48ddd9 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -588,7 +588,7 @@ remove the non-matched elements).
 xitem(tt(${)var(name)tt(:)var(offset)tt(}))
 item(tt(${)var(name)tt(:)var(offset)tt(:)var(length)tt(}))(
 This syntax gives effects similar to parameter subscripting
-in the form tt($)var(name)tt({)var(start)tt(,)var(end)tt(}), but is
+in the form tt($)var(name)tt([)var(start)tt(,)var(end)tt(]), but is
 compatible with other shells; note that both var(offset) and var(length)
 are interpreted differently from the components of a subscript.

@@ -608,8 +608,12 @@ the option tt(KSH_ARRAYS).
 A negative offset counts backwards from the end of the scalar or array,
 so that -1 corresponds to the last character or element, and so on.

-var(length) is always treated directly as a length and hence may not be
-negative.  The option tt(MULTIBYTE) is obeyed, i.e. the offset and length
+var(length) is treated directly as a length when it is positive.
+When it is negative, it works as an offset just like var(offset). If
+this results in a negative length, a diagnostic will be printed and
+nothing will be substituted.
+
+The option tt(MULTIBYTE) is obeyed, i.e. the offset and length
 count multibyte characters where appropriate.

 var(offset) and var(length) undergo the same set of shell substitutions
@@ -635,7 +639,7 @@ tt(${)var(name)tt(:-)var(word)tt(}) form of
substitution.  Instead, a space
 may be inserted before the tt(-).  Furthermore, neither var(offset) nor
 var(length) may begin with an alphabetic character or tt(&) as these are
 used to indicate history-style modifiers.  To substitute a value from a
-variable, the recommended approach is to proceed it with a tt($) as this
+variable, the recommended approach is to precede it with a tt($) as this
 signifies the intention (parameter substitution can easily be rendered
 unreadable); however, as arithmetic substitution is performed, the
 expression tt(${var: offs}) does work, retrieving the offset from
diff --git a/Src/subst.c b/Src/subst.c
index 377aba8..141a353 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -2799,7 +2799,8 @@ paramsubst(LinkList l, LinkNode n, char **str,
int qt, int ssub)
 	    char *check_offset = check_colon_subscript(s, &check_offset2);
 	    if (check_offset) {
 		zlong offset = mathevali(check_offset);
-		zlong length = (zlong)-1;
+		zlong length;
+		int length_set = 0;
 		int offset_hack_argzero = 0;
 		if (errflag)
 		    return NULL;
@@ -2814,14 +2815,11 @@ paramsubst(LinkList l, LinkNode n, char **str,
int qt, int ssub)
 			zerr("invalid length: %s", check_offset);
 			return NULL;
 		    }
-                    if (check_offset) {
+		    if (check_offset) {
 			length = mathevali(check_offset);
+			length_set = 1;
 			if (errflag)
 			    return NULL;
-			if (length < (zlong)0) {
-			    zerr("invalid length: %s", check_offset);
-			    return NULL;
-			}
 		    }
 		}
 		if (horrible_offset_hack) {
@@ -2849,8 +2847,15 @@ paramsubst(LinkList l, LinkNode n, char **str,
int qt, int ssub)
 		    }
 		    if (offset_hack_argzero)
 			alen++;
-		    if (length < 0)
-		      length = alen;
+		    if (length_set) {
+			if (length < 0)
+			    length += alen - offset;
+			if (length < 0) {
+			    zerr("substring expression < 0: %d", length);
+			    return NULL;
+			}
+		    } else
+			length = alen;
 		    if (offset > alen)
 			offset = alen;
 		    if (offset + length > alen)
@@ -2879,11 +2884,24 @@ paramsubst(LinkList l, LinkNode n, char **str,
int qt, int ssub)
 			    offset = 0;
 		    }
 		    MB_METACHARINIT();
+		    if (length_set && length < 0)
+			length -= offset;
 		    for (sptr = val; *sptr && offset; ) {
 			sptr += MB_METACHARLEN(sptr);
 			offset--;
 		    }
-		    if (length >= 0) {
+		    if (length_set) {
+			if (length < 0) {
+			    MB_METACHARINIT();
+			    for (eptr = val; *eptr; ) {
+				eptr += MB_METACHARLEN(eptr);
+				length++;
+			    }
+			    if (length < 0) {
+				zerr("substring expression < 0: %d", length);
+				return NULL;
+			    }
+			}
 			for (eptr = sptr; *eptr && length; ) {
 			    eptr += MB_METACHARLEN(eptr);
 			    length--;
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index c0ad1d2..272fb87 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -1338,6 +1338,7 @@
    print ${foo:$(echo 3 + 3):`echo 4 - 3`}
    print ${foo: -1}
    print ${foo: -10}
+   print ${foo:5:-2}
 0:Bash-style offsets, scalar
 >456789
 >56789
@@ -1349,6 +1350,7 @@
 >7
 >9
 >123456789
+>67

    foo=(1 2 3 4 5 6 7 8 9)
    print ${foo:3}
@@ -1361,6 +1363,7 @@
    print ${foo:$(echo 3 + 3):`echo 4 - 3`}
    print ${foo: -1}
    print ${foo: -10}
+   print ${foo:5:-2}
 0:Bash-style offsets, array
 >4 5 6 7 8 9
 >5 6 7 8 9
@@ -1372,6 +1375,7 @@
 >7
 >9
 >1 2 3 4 5 6 7 8 9
+>6 7

    testfn() {
      emulate -L sh
@@ -1410,3 +1414,13 @@
    print ${str:0:}
 1:Regression test for missing length after offset
 ?(eval):2: unrecognized modifier
+
+   foo="123456789"
+   print ${foo:5:-6}
+1:Regression test for total length < 0 in string
+?(eval):2: substring expression < 0: -2
+
+   foo=(1 2 3 4 5 6 7 8 9)
+   print ${foo:5:-6}
+1:Regression test for total length < 0 in array
+?(eval):2: substring expression < 0: -2
-- 
1.7.4-rc1

-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: support negative LEN in ${VAR:OFFSET:LEN}
  2011-03-10 11:10 support negative LEN in ${VAR:OFFSET:LEN} Mikael Magnusson
@ 2011-03-10 15:19 ` Bart Schaefer
  2011-03-10 15:31   ` Mikael Magnusson
  0 siblings, 1 reply; 6+ messages in thread
From: Bart Schaefer @ 2011-03-10 15:19 UTC (permalink / raw)
  To: zsh workers

On Mar 10, 12:10pm, Mikael Magnusson wrote:
} Subject: support negative LEN in ${VAR:OFFSET:LEN}
}
} +var(length) is treated directly as a length when it is positive.
} +When it is negative, it works as an offset just like var(offset). If
} +this results in a negative length, a diagnostic will be printed and
} +nothing will be substituted.

I don't object to adding the feature, but that documentation is a bit
confusing.  Also, when writing documentation, it's almost always better
to avoid passive phrasing like "x will be y".  Perhaps:

  When positive, var(length) counts from the var(offset) position
  toward the end of the scalar or array.  When negative, var(length)
  counts back from the end.  If this results in a position smaller
  than var(offset), a diagnostic is printed and nothing is substituted.

The equivalent $var[start,end] expression would not print a diagnostic.
Does bash really work that way?  That is, you have to know the length
of the string in order to safely count backwards from the end of it?

} +   foo="123456789"
} +   print ${foo:5:-6}
} +1:Regression test for total length < 0 in string
} +?(eval):2: substring expression < 0: -2

Is that what bash's diagnostic looks like?  If so we should borrow
consistently, but it'd be a lot clearer if it said

    substring expression: 3 < 5


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: support negative LEN in ${VAR:OFFSET:LEN}
  2011-03-10 15:19 ` Bart Schaefer
@ 2011-03-10 15:31   ` Mikael Magnusson
  2011-03-16 15:04     ` Mikael Magnusson
  0 siblings, 1 reply; 6+ messages in thread
From: Mikael Magnusson @ 2011-03-10 15:31 UTC (permalink / raw)
  To: Bart Schaefer; +Cc: zsh workers

On 10 March 2011 16:19, Bart Schaefer <schaefer@brasslantern.com> wrote:
> On Mar 10, 12:10pm, Mikael Magnusson wrote:
> } Subject: support negative LEN in ${VAR:OFFSET:LEN}
> }
> } +var(length) is treated directly as a length when it is positive.
> } +When it is negative, it works as an offset just like var(offset). If
> } +this results in a negative length, a diagnostic will be printed and
> } +nothing will be substituted.
>
> I don't object to adding the feature, but that documentation is a bit
> confusing.  Also, when writing documentation, it's almost always better
> to avoid passive phrasing like "x will be y".  Perhaps:
>
>  When positive, var(length) counts from the var(offset) position
>  toward the end of the scalar or array.  When negative, var(length)
>  counts back from the end.  If this results in a position smaller
>  than var(offset), a diagnostic is printed and nothing is substituted.

That's fine by me, I didn't spend a lot of time thinking about that phrasing :).

> The equivalent $var[start,end] expression would not print a diagnostic.
> Does bash really work that way?  That is, you have to know the length
> of the string in order to safely count backwards from the end of it?
>
> } +   foo="123456789"
> } +   print ${foo:5:-6}
> } +1:Regression test for total length < 0 in string
> } +?(eval):2: substring expression < 0: -2
>
> Is that what bash's diagnostic looks like?  If so we should borrow
> consistently, but it'd be a lot clearer if it said
>
>    substring expression: 3 < 5

It's almost the same,
$ echo ${PATH: -20:-30}
bash: -30: substring expression < 0
% echo ${PATH: -20:-30}
zsh: substring expression < 0: -10

Well, it seems bash simply prints the given length, while I print the
resulting length. I can change this if you want?

Interestingly (well, not really), the feature doesn't work on arrays in bash:
$ set a b c
$ echo ${*:0:2}
bash a
$ echo ${*:0:-2}
bash: -2: substring expression < 0

% echo ${*:0:-2}
zsh a

-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: support negative LEN in ${VAR:OFFSET:LEN}
  2011-03-10 15:31   ` Mikael Magnusson
@ 2011-03-16 15:04     ` Mikael Magnusson
  2011-03-16 16:21       ` Bart Schaefer
  0 siblings, 1 reply; 6+ messages in thread
From: Mikael Magnusson @ 2011-03-16 15:04 UTC (permalink / raw)
  To: zsh workers

Is someone waiting for me to do something with this? If so, I'm not
exactly sure what.

-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: support negative LEN in ${VAR:OFFSET:LEN}
  2011-03-16 15:04     ` Mikael Magnusson
@ 2011-03-16 16:21       ` Bart Schaefer
  2011-05-11 15:39         ` PATCH: " Mikael Magnusson
  0 siblings, 1 reply; 6+ messages in thread
From: Bart Schaefer @ 2011-03-16 16:21 UTC (permalink / raw)
  To: zsh workers

On Mar 16,  4:04pm, Mikael Magnusson wrote:
}
} Is someone waiting for me to do something with this? If so, I'm not
} exactly sure what.

Sorry, I was really sick all weekend and really busy the last couple
of days.

I'd prefer either a more helpful error message or zsh's usual behavior
of treating it as NOT an error (e.g. just returning an empty string)
when the end position is less than the start, but I'd also prefer if
someone else (hi, PWS?) were on record as [dis]agreeing with me before
a choice is made.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* PATCH: support negative LEN in ${VAR:OFFSET:LEN}
  2011-03-16 16:21       ` Bart Schaefer
@ 2011-05-11 15:39         ` Mikael Magnusson
  0 siblings, 0 replies; 6+ messages in thread
From: Mikael Magnusson @ 2011-05-11 15:39 UTC (permalink / raw)
  To: zsh-workers

Updated with your docs and error messages.

% echo ${path:10:-5}
zsh: substring expression: 8 < 10
% echo ${PATH:150:-150}
zsh: substring expression: 85 < 150

---
 Doc/Zsh/expn.yo        |   12 ++++++++----
 Src/subst.c            |   40 +++++++++++++++++++++++++++++++---------
 Test/D04parameter.ztst |   14 ++++++++++++++
 3 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index 3a1c372..5d69dbd 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -588,7 +588,7 @@ remove the non-matched elements).
 xitem(tt(${)var(name)tt(:)var(offset)tt(}))
 item(tt(${)var(name)tt(:)var(offset)tt(:)var(length)tt(}))(
 This syntax gives effects similar to parameter subscripting
-in the form tt($)var(name)tt({)var(start)tt(,)var(end)tt(}), but is
+in the form tt($)var(name)tt([)var(start)tt(,)var(end)tt(]), but is
 compatible with other shells; note that both var(offset) and var(length)
 are interpreted differently from the components of a subscript.
 
@@ -608,8 +608,12 @@ the option tt(KSH_ARRAYS).
 A negative offset counts backwards from the end of the scalar or array,
 so that -1 corresponds to the last character or element, and so on.
 
-var(length) is always treated directly as a length and hence may not be
-negative.  The option tt(MULTIBYTE) is obeyed, i.e. the offset and length
+When positive, var(length) counts from the var(offset) position
+toward the end of the scalar or array.  When negative, var(length)
+counts back from the end.  If this results in a position smaller
+than var(offset), a diagnostic is printed and nothing is substituted.
+
+The option tt(MULTIBYTE) is obeyed, i.e. the offset and length
 count multibyte characters where appropriate.
 
 var(offset) and var(length) undergo the same set of shell substitutions
@@ -635,7 +639,7 @@ tt(${)var(name)tt(:-)var(word)tt(}) form of substitution.  Instead, a space
 may be inserted before the tt(-).  Furthermore, neither var(offset) nor
 var(length) may begin with an alphabetic character or tt(&) as these are
 used to indicate history-style modifiers.  To substitute a value from a
-variable, the recommended approach is to proceed it with a tt($) as this
+variable, the recommended approach is to precede it with a tt($) as this
 signifies the intention (parameter substitution can easily be rendered
 unreadable); however, as arithmetic substitution is performed, the
 expression tt(${var: offs}) does work, retrieving the offset from
diff --git a/Src/subst.c b/Src/subst.c
index 723bb25..ce4dbe6 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -2834,7 +2834,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 	    char *check_offset = check_colon_subscript(s, &check_offset2);
 	    if (check_offset) {
 		zlong offset = mathevali(check_offset);
-		zlong length = (zlong)-1;
+		zlong length;
+		int length_set = 0;
 		int offset_hack_argzero = 0;
 		if (errflag)
 		    return NULL;
@@ -2849,14 +2850,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 			zerr("invalid length: %s", check_offset);
 			return NULL;
 		    }
-                    if (check_offset) {
+		    if (check_offset) {
 			length = mathevali(check_offset);
+			length_set = 1;
 			if (errflag)
 			    return NULL;
-			if (length < (zlong)0) {
-			    zerr("invalid length: %s", check_offset);
-			    return NULL;
-			}
 		    }
 		}
 		if (horrible_offset_hack) {
@@ -2884,8 +2882,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		    }
 		    if (offset_hack_argzero)
 			alen++;
-		    if (length < 0)
-		      length = alen;
+		    if (length_set) {
+			if (length < 0)
+			    length += alen - offset;
+			if (length < 0) {
+			    zerr("substring expression: %d < %d",
+			         length + offset, offset);
+			    return NULL;
+			}
+		    } else
+			length = alen;
 		    if (offset > alen)
 			offset = alen;
 		    if (offset + length > alen)
@@ -2904,6 +2910,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 		    aval = newarr;
 		} else {
 		    char *sptr, *eptr;
+		    int given_offset;
 		    if (offset < 0) {
 			MB_METACHARINIT();
 			for (sptr = val; *sptr; ) {
@@ -2913,12 +2920,27 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
 			if (offset < 0)
 			    offset = 0;
 		    }
+		    given_offset = offset;
 		    MB_METACHARINIT();
+		    if (length_set && length < 0)
+			length -= offset;
 		    for (sptr = val; *sptr && offset; ) {
 			sptr += MB_METACHARLEN(sptr);
 			offset--;
 		    }
-		    if (length >= 0) {
+		    if (length_set) {
+			if (length < 0) {
+			    MB_METACHARINIT();
+			    for (eptr = val; *eptr; ) {
+				eptr += MB_METACHARLEN(eptr);
+				length++;
+			    }
+			    if (length < 0) {
+				zerr("substring expression: %d < %d",
+				     length + given_offset, given_offset);
+				return NULL;
+			    }
+			}
 			for (eptr = sptr; *eptr && length; ) {
 			    eptr += MB_METACHARLEN(eptr);
 			    length--;
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 3646245..b91caaa 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -1346,6 +1346,7 @@
    print ${foo:$(echo 3 + 3):`echo 4 - 3`}
    print ${foo: -1}
    print ${foo: -10}
+   print ${foo:5:-2}
 0:Bash-style offsets, scalar
 >456789
 >56789
@@ -1357,6 +1358,7 @@
 >7
 >9
 >123456789
+>67
 
    foo=(1 2 3 4 5 6 7 8 9)
    print ${foo:3}
@@ -1369,6 +1371,7 @@
    print ${foo:$(echo 3 + 3):`echo 4 - 3`}
    print ${foo: -1}
    print ${foo: -10}
+   print ${foo:5:-2}
 0:Bash-style offsets, array
 >4 5 6 7 8 9
 >5 6 7 8 9
@@ -1380,6 +1383,7 @@
 >7
 >9
 >1 2 3 4 5 6 7 8 9
+>6 7
 
    testfn() {
      emulate -L sh
@@ -1418,3 +1422,13 @@
    print ${str:0:}
 1:Regression test for missing length after offset
 ?(eval):2: unrecognized modifier
+
+   foo="123456789"
+   print ${foo:5:-6}
+1:Regression test for total length < 0 in string
+?(eval):2: substring expression: 3 < 5
+
+   foo=(1 2 3 4 5 6 7 8 9)
+   print ${foo:5:-6}
+1:Regression test for total length < 0 in array
+?(eval):2: substring expression: 3 < 5
-- 
1.7.4-rc1


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-05-11 15:39 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-03-10 11:10 support negative LEN in ${VAR:OFFSET:LEN} Mikael Magnusson
2011-03-10 15:19 ` Bart Schaefer
2011-03-10 15:31   ` Mikael Magnusson
2011-03-16 15:04     ` Mikael Magnusson
2011-03-16 16:21       ` Bart Schaefer
2011-05-11 15:39         ` PATCH: " Mikael Magnusson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).