PATCH: parse from even deeper in hell

zsh-workers
 help / color / mirror / code / Atom feed

* PATCH: parse from even deeper in hell
@ 2015-02-19 10:13 Peter Stephenson
  2015-02-19 21:47 ` Mikael Magnusson
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Stephenson @ 2015-02-19 10:13 UTC (permalink / raw)
  To: Zsh Hackers' List

  And Tomlinson looked down and down, and saw beneath his feet
  The frontlet of a tortured star milk-white in Hell-Mouth heat.

% print $((echo one); (echo two))
zsh: bad math expression: operator expected at `one); (ech...'

At the point this goes wrong, we've actually already established this is
a command substitution, not a math expression.  However, we're now in
the substitution code and it doesn't have any marker that that's
happened.  Instead, it just looks to see if there are two parentheses at
the end, which there are.

Note that it's not a fix to count active parentheses in the middle at
that point: those aren't tokenized because we're parsing this as a
string for later expansion.  So the ones at the end are the first that
skipparens() picks up.  In any case re-counting when we've already
established what's supposed to happen is a pretty kludgy fix.

The fix here is to use different tokens for the first and last
parenthesis for math.  We then just look for the matching close marker
when we find the open marker.  We can't have nested math expansions so I
think this ought to be robust.

I've incremented the version as this changes the way strings are
tokenized.

The tests might more logically be with command substitution rather than
arithmetic, but I've left them in order to keep the tests for is / isn't
arithmetic in one place for easy comparison.

pws

diff --git a/Config/version.mk b/Config/version.mk
index eb51638..a8eafa5 100644
--- a/Config/version.mk
+++ b/Config/version.mk
@@ -27,5 +27,5 @@
 # This must also serve as a shell script, so do not add spaces around the
 # `=' signs.
 
-VERSION=5.0.7-dev-0
-VERSION_DATE='October 8, 2014'
+VERSION=5.0.7-dev-1
+VERSION_DATE='February 19, 2014'
diff --git a/Src/lex.c b/Src/lex.c
index 0068485..307b6e9 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -35,7 +35,7 @@
 /* tokens */
 
 /**/
-mod_export char ztokens[] = "#$^*()$=|{}[]`<>>?~`,'\"\\\\";
+mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,'\"\\\\";
 
 /* parts of the current token */
 
@@ -473,8 +473,14 @@ add(int c)
 	}							      \
     }
 
+enum {
+    CMD_OR_MATH_CMD,
+    CMD_OR_MATH_MATH,
+    CMD_OR_MATH_ERR
+};
+
 /*
- * Return 1 for math, 0 for a command, 2 for an error.  If it couldn't be
+ * Return one of the above.  If it couldn't be
  * parsed as math, but there was no gross error, it's a command.
  */
 
@@ -496,13 +502,13 @@ cmd_or_math(int cs_type)
 	/* Successfully parsed, see if it was math */
 	c = hgetc();
 	if (c == ')')
-	    return 1; /* yes */
+	    return CMD_OR_MATH_MATH; /* yes */
 	hungetc(c);
 	lexstop = 0;
 	c = ')';
     } else if (lexstop) {
 	/* we haven't got anything to unget */
-	return 2;
+	return CMD_OR_MATH_ERR;
     }
     /* else unsuccessful: unget the whole thing */
     hungetc(c);
@@ -513,15 +519,15 @@ cmd_or_math(int cs_type)
 		ztokens[*lexbuf.ptr - Pound] : *lexbuf.ptr);
     }
     if (errflag)
-	return 2;
+	return CMD_OR_MATH_ERR;
     hungetc('(');
-    return errflag ? 2 : 0;
+    return errflag ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
 }
 
 
 /*
  * Parse either a $(( ... )) or a $(...)
- * Return 0 on success, 1 on failure.
+ * Return the same as cmd_or_math().
  */
 static int
 cmd_or_math_sub(void)
@@ -529,21 +535,23 @@ cmd_or_math_sub(void)
     int c = hgetc(), ret;
 
     if (c == '(') {
+	int lexpos = (int)(lexbuf.ptr - tokstr);
 	add(Inpar);
 	add('(');
-	if ((ret = cmd_or_math(CS_MATHSUBST)) == 1) {
+	if ((ret = cmd_or_math(CS_MATHSUBST)) == CMD_OR_MATH_MATH) {
+	    tokstr[lexpos] = Inparmath;
 	    add(')');
-	    return 0;
+	    return CMD_OR_MATH_MATH;
 	}
-	if (ret == 2)
-	    return 1;
+	if (ret == CMD_OR_MATH_ERR)
+	    return CMD_OR_MATH_ERR;
 	lexbuf.ptr -= 2;
 	lexbuf.len -= 2;
     } else {
 	hungetc(c);
 	lexstop = 0;
     }
-    return skipcomm();
+    return skipcomm() ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
 }
 
 /* Check whether we're looking at valid numeric globbing syntax      *
@@ -764,10 +772,10 @@ gettok(void)
 		lexbuf.ptr = tokstr = (char *)
 		    hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
 		switch (cmd_or_math(CS_MATH)) {
-		case 1:
+		case CMD_OR_MATH_MATH:
 		    return DINPAR;
 
-		case 0:
+		case CMD_OR_MATH_CMD:
 		    /*
 		     * Not math, so we don't return the contents
 		     * as a string in this case.
@@ -987,12 +995,19 @@ gettokstr(int c, int sub)
 		c = Outbrack;
 	    } else if (e == '(') {
 		add(String);
-		c = cmd_or_math_sub();
-		if (c) {
+		switch (cmd_or_math_sub()) {
+		case CMD_OR_MATH_CMD:
+		    c = Outpar;
+		    break;
+
+		case CMD_OR_MATH_MATH:
+		    c = Outparmath;
+		    break;
+
+		default:
 		    peek = LEXERR;
 		    goto brk;
 		}
-		c = Outpar;
 	    } else {
 		if (e == '{') {
 		    add(c);
@@ -1400,8 +1415,19 @@ dquote_parse(char endchar, int sub)
 	    c = hgetc();
 	    if (c == '(') {
 		add(Qstring);
-		err = cmd_or_math_sub();
-		c = Outpar;
+		switch (cmd_or_math_sub()) {
+		case CMD_OR_MATH_CMD:
+		    c = Outpar;
+		    break;
+
+		case CMD_OR_MATH_MATH:
+		    c = Outparmath;
+		    break;
+
+		default:
+		    err = 1;
+		    break;
+		}
 	    } else if (c == '[') {
 		add(String);
 		add(Inbrack);
diff --git a/Src/subst.c b/Src/subst.c
index a2bb648..056b12b 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -195,7 +195,7 @@ stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub)
 
     while (!errflag && (c = *str)) {
 	if ((qt = c == Qstring) || c == String) {
-	    if ((c = str[1]) == Inpar) {
+	    if ((c = str[1]) == Inpar || c == Inparmath) {
 		if (!qt)
 		    list->list.flags |= LF_ARRAY;
 		str++;
@@ -258,6 +258,22 @@ stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub)
 		skipparens(Inpar, Outpar, &str);
 #endif
 		str--;
+	    } else if (c == Inparmath) {
+		/* Math substitution of the form $((...)) */
+		str[-1] = '\0';
+		while (*str != Outparmath && *str)
+		    str++;
+		if (*str != Outparmath) {
+		    zerr("Failed to find end of math substitution");
+		    return NULL;
+		}
+		str[-1] = '\0';
+		if (isset(EXECOPT))
+		    str = arithsubst(str2 + 2, &str3, str+1);
+		else
+		    strncpy(str3, str2, 1);
+		setdata(node, (void *) str3);
+		continue;
 	    } else {
 		endchar = c;
 		*str = '\0';
@@ -266,16 +282,6 @@ stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub)
 		    DPUTS(!*str, "BUG: parse error in command substitution");
 	    }
 	    *str++ = '\0';
-	    if (endchar == Outpar && str2[1] == '(' && str[-2] == ')') {
-		/* Math substitution of the form $((...)) */
-		str[-2] = '\0';
-		if (isset(EXECOPT))
-		    str = arithsubst(str2 + 2, &str3, str);
-		else
-		    strncpy(str3, str2, 1);
-		setdata(node, (void *) str3);
-		continue;
-	    }
 
 	    /* It is a command substitution, which will be parsed again   *
 	     * by the lexer, so we untokenize it first, but we cannot use *
diff --git a/Src/zsh.h b/Src/zsh.h
index dd946d2..9a97263 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -163,40 +163,42 @@ struct mathfunc {
 #define Hat		((char) 0x86)
 #define Star		((char) 0x87)
 #define Inpar		((char) 0x88)
-#define Outpar		((char) 0x89)
-#define Qstring	        ((char) 0x8a)
-#define Equals		((char) 0x8b)
-#define Bar	      	((char) 0x8c)
-#define Inbrace	        ((char) 0x8d)
-#define Outbrace	((char) 0x8e)
-#define Inbrack	        ((char) 0x8f)
-#define Outbrack	((char) 0x90)
-#define Tick		((char) 0x91)
-#define Inang		((char) 0x92)
-#define Outang		((char) 0x93)
-#define OutangProc	((char) 0x94)
-#define Quest		((char) 0x95)
-#define Tilde		((char) 0x96)
-#define Qtick		((char) 0x97)
-#define Comma		((char) 0x98)
+#define Inparmath	((char) 0x89)
+#define Outpar		((char) 0x8a)
+#define Outparmath	((char) 0x8b)
+#define Qstring	        ((char) 0x8c)
+#define Equals		((char) 0x8d)
+#define Bar	      	((char) 0x8e)
+#define Inbrace	        ((char) 0x8f)
+#define Outbrace	((char) 0x90)
+#define Inbrack	        ((char) 0x91)
+#define Outbrack	((char) 0x92)
+#define Tick		((char) 0x93)
+#define Inang		((char) 0x94)
+#define Outang		((char) 0x95)
+#define OutangProc	((char) 0x96)
+#define Quest		((char) 0x97)
+#define Tilde		((char) 0x98)
+#define Qtick		((char) 0x99)
+#define Comma		((char) 0x9a)
 /*
  * Null arguments: placeholders for single and double quotes
  * and backslashes.
  */
-#define Snull		((char) 0x99)
-#define Dnull		((char) 0x9a)
-#define Bnull		((char) 0x9b)
+#define Snull		((char) 0x9b)
+#define Dnull		((char) 0x9c)
+#define Bnull		((char) 0x9d)
 /*
  * Backslash which will be returned to "\" instead of being stripped
  * when we turn the string into a printable format.
  */
-#define Bnullkeep       ((char) 0x9c)
+#define Bnullkeep       ((char) 0x9e)
 /*
  * Null argument that does not correspond to any character.
  * This should be last as it does not appear in ztokens and
  * is used to initialise the IMETA type in inittyptab().
  */
-#define Nularg		((char) 0x9d)
+#define Nularg		((char) 0x9f)
 
 /*
  * Take care to update the use of IMETA appropriately when adding
diff --git a/Test/C01arith.ztst b/Test/C01arith.ztst
index 09c0822..67d78ee 100644
--- a/Test/C01arith.ztst
+++ b/Test/C01arith.ztst
@@ -353,3 +353,26 @@
   '
 0:Non-arithmetic subst with command subsitution parse from hell
 >yes, this one after case in subshell
+
+  print "a$((echo one subst)
+  (echo two subst))b"
+0:Another tricky case that is actually a command substitution
+>aone subst
+>two substb
+
+  print "x$((echo one frob); (echo two frob))y"
+0:Same on a single line
+>xone frob
+>two froby
+
+  # This case actually only works by accident: if it wasn't for the
+  # unbalanced parenthesis this would be a valid math substitution.
+  # Hence it's definitely not recommended code.  However, it does give
+  # the algorithm an extra check.
+  print $((case foo in
+  foo)
+  print Worked OK
+  ;;
+  esac))
+0:Would-be math expansion with extra parenthesis making it a cmd subst
+>Worked OK


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-19 10:13 PATCH: parse from even deeper in hell Peter Stephenson
@ 2015-02-19 21:47 ` Mikael Magnusson
  2015-02-19 22:03   ` Peter Stephenson
  0 siblings, 1 reply; 24+ messages in thread
From: Mikael Magnusson @ 2015-02-19 21:47 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh Hackers' List

On Thu, Feb 19, 2015 at 11:13 AM, Peter Stephenson
<p.stephenson@samsung.com> wrote:
>   And Tomlinson looked down and down, and saw beneath his feet
>   The frontlet of a tortured star milk-white in Hell-Mouth heat.
>
> % print $((echo one); (echo two))
> zsh: bad math expression: operator expected at `one); (ech...'
>
> At the point this goes wrong, we've actually already established this is
> a command substitution, not a math expression.  However, we're now in
> the substitution code and it doesn't have any marker that that's
> happened.  Instead, it just looks to see if there are two parentheses at
> the end, which there are.
>
> Note that it's not a fix to count active parentheses in the middle at
> that point: those aren't tokenized because we're parsing this as a
> string for later expansion.  So the ones at the end are the first that
> skipparens() picks up.  In any case re-counting when we've already
> established what's supposed to happen is a pretty kludgy fix.
>
> The fix here is to use different tokens for the first and last
> parenthesis for math.  We then just look for the matching close marker
> when we find the open marker.  We can't have nested math expansions so I
> think this ought to be robust.
>
> I've incremented the version as this changes the way strings are
> tokenized.
>
> The tests might more logically be with command substitution rather than
> arithmetic, but I've left them in order to keep the tests for is / isn't
> arithmetic in one place for easy comparison.

I get a crapton of "bad(2) wordsplit reading history:" with this
patch. It seems like all the failed lines have metafied characters in
them, if that's a hint. Most don't contain any syntax characters at
all, for example:
 hist.c:3499: bad(2) wordsplit reading history: mp3info 好きになり\M-c\M-^Aい.mp3
at: 好きになり\M-c\M-^Aい.mp3
word: 好きになり\M-c\M-^Aい.mp3
The (2) means it's the second of the two bad=1; assignments
triggering. (Left over from last time I had a problem in that area).
I'm also not sure why the utf8 is slightly mishandled in the output
there. It has at least been unmetafied, the raw string in the history
file is more or less:
mp3info 好ぃ�になゃ�たぃ�.mp3

-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-19 21:47 ` Mikael Magnusson
@ 2015-02-19 22:03   ` Peter Stephenson
  2015-02-20  3:16     ` Mikael Magnusson
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Stephenson @ 2015-02-19 22:03 UTC (permalink / raw)
  To: Zsh Hackers' List

On Thu, 19 Feb 2015 22:47:12 +0100
Mikael Magnusson <mikachu@gmail.com> wrote:
> I get a crapton of "bad(2) wordsplit reading history:" with this
> patch. It seems like all the failed lines have metafied characters in
> them, if that's a hint. Most don't contain any syntax characters at
> all, for example:
>  hist.c:3499: bad(2) wordsplit reading history: mp3info 好きになり\M-c\M-^Aい.mp3
> at: 好きになり\M-c\M-^Aい.mp3s
> word: 好きになり\M-c\M-^Aい.mp3

Unless I'm missing something, I don't think you've said what the real
characters you're expecting are.  The broken ones aren't much use for
testing.

> The (2) means it's the second of the two bad=1; assignments
> triggering.

At line 3490?

> I'm also not sure why the utf8 is slightly mishandled in the output
> there. It has at least been unmetafied, the raw string in the history
> file is more or less:
> mp3info 好ぃ�になゃ�たぃ�.mp3

So those aren't actually valid characters?  Does that mean metafied
characters are getting into the history?  I've made it necessary for two
more bytes to be metafied, so if the shell was expecting them to be
metafied in the history file they won't be.  The bytes are 0x9e and
0x9f.  I guess we could special case those, but do we really output
metafied characters to the history file?

pws

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-19 22:03   ` Peter Stephenson
@ 2015-02-20  3:16     ` Mikael Magnusson
  2015-02-20  3:22       ` Mikael Magnusson
  0 siblings, 1 reply; 24+ messages in thread
From: Mikael Magnusson @ 2015-02-20  3:16 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh Hackers' List

On Thu, Feb 19, 2015 at 11:03 PM, Peter Stephenson
<p.w.stephenson@ntlworld.com> wrote:
> On Thu, 19 Feb 2015 22:47:12 +0100
> Mikael Magnusson <mikachu@gmail.com> wrote:
>> I get a crapton of "bad(2) wordsplit reading history:" with this
>> patch. It seems like all the failed lines have metafied characters in
>> them, if that's a hint. Most don't contain any syntax characters at
>> all, for example:
>>  hist.c:3499: bad(2) wordsplit reading history: mp3info 好きになり\M-c\M-^Aい.mp3
>> at: 好きになり\M-c\M-^Aい.mp3s
>> word: 好きになり\M-c\M-^Aい.mp3
>
> Unless I'm missing something, I don't think you've said what the real
> characters you're expecting are.  The broken ones aren't much use for
> testing.
>
>> The (2) means it's the second of the two bad=1; assignments
>> triggering.
>
> At line 3490?

Yes.

>> I'm also not sure why the utf8 is slightly mishandled in the output
>> there. It has at least been unmetafied, the raw string in the history
>> file is more or less:
>> mp3info 好ぃ�になゃ�たぃ�.mp3
>
> So those aren't actually valid characters?  Does that mean metafied
> characters are getting into the history?  I've made it necessary for two
> more bytes to be metafied, so if the shell was expecting them to be
> metafied in the history file they won't be.  The bytes are 0x9e and
> 0x9f.  I guess we could special case those, but do we really output
> metafied characters to the history file?

The actual line in the history is
mp3info 好きになりたい.mp3
but in the history _file_, it's stored metafied, which is hard to
paste into an email. I'm not sure why pasting the original string
didn't occur to me. AFAIK, history files have always been metafied.
I'm not sure why the た is mangled in the error message is what I tried
to say originally. The final byte is 9f which I suppose is an esc with
the 8th bit set. Maybe something is trying to double unmetafy? Running
it through unmetafy() twice doesn't cause any problems though...

-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-20  3:16     ` Mikael Magnusson
@ 2015-02-20  3:22       ` Mikael Magnusson
  2015-02-20  3:33         ` Mikael Magnusson
  0 siblings, 1 reply; 24+ messages in thread
From: Mikael Magnusson @ 2015-02-20  3:22 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh Hackers' List

On Fri, Feb 20, 2015 at 4:16 AM, Mikael Magnusson <mikachu@gmail.com> wrote:
> On Thu, Feb 19, 2015 at 11:03 PM, Peter Stephenson
> <p.w.stephenson@ntlworld.com> wrote:
>> On Thu, 19 Feb 2015 22:47:12 +0100
>> Mikael Magnusson <mikachu@gmail.com> wrote:
>>> I get a crapton of "bad(2) wordsplit reading history:" with this
>>> patch. It seems like all the failed lines have metafied characters in
>>> them, if that's a hint. Most don't contain any syntax characters at
>>> all, for example:
>>>  hist.c:3499: bad(2) wordsplit reading history: mp3info 好きになり\M-c\M-^Aい.mp3
>>> at: 好きになり\M-c\M-^Aい.mp3s
>>> word: 好きになり\M-c\M-^Aい.mp3
>>
>> Unless I'm missing something, I don't think you've said what the real
>> characters you're expecting are.  The broken ones aren't much use for
>> testing.
>>
>>> The (2) means it's the second of the two bad=1; assignments
>>> triggering.
>>
>> At line 3490?
>
> Yes.
>
>>> I'm also not sure why the utf8 is slightly mishandled in the output
>>> there. It has at least been unmetafied, the raw string in the history
>>> file is more or less:
>>> mp3info 好ぃ�になゃ�たぃ�.mp3
>>
>> So those aren't actually valid characters?  Does that mean metafied
>> characters are getting into the history?  I've made it necessary for two
>> more bytes to be metafied, so if the shell was expecting them to be
>> metafied in the history file they won't be.  The bytes are 0x9e and
>> 0x9f.  I guess we could special case those, but do we really output
>> metafied characters to the history file?
>
> The actual line in the history is
> mp3info 好きになりたい.mp3
> but in the history _file_, it's stored metafied, which is hard to
> paste into an email. I'm not sure why pasting the original string
> didn't occur to me. AFAIK, history files have always been metafied.
> I'm not sure why the た is mangled in the error message is what I tried
> to say originally. The final byte is 9f which I suppose is an esc with
> the 8th bit set. Maybe something is trying to double unmetafy? Running
> it through unmetafy() twice doesn't cause any problems though...

Just looked at the debug code and found out about ZSH_DEBUG_LOG, turns
out there's also a 0x8A just before the \M-c\M-^

-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-20  3:22       ` Mikael Magnusson
@ 2015-02-20  3:33         ` Mikael Magnusson
  2015-02-20  3:43           ` Mikael Magnusson
  0 siblings, 1 reply; 24+ messages in thread
From: Mikael Magnusson @ 2015-02-20  3:33 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh Hackers' List

On Fri, Feb 20, 2015 at 4:22 AM, Mikael Magnusson <mikachu@gmail.com> wrote:
> On Fri, Feb 20, 2015 at 4:16 AM, Mikael Magnusson <mikachu@gmail.com> wrote:
>> On Thu, Feb 19, 2015 at 11:03 PM, Peter Stephenson
>> <p.w.stephenson@ntlworld.com> wrote:
>>> On Thu, 19 Feb 2015 22:47:12 +0100
>>> Mikael Magnusson <mikachu@gmail.com> wrote:
>>>> I get a crapton of "bad(2) wordsplit reading history:" with this
>>>> patch. It seems like all the failed lines have metafied characters in
>>>> them, if that's a hint. Most don't contain any syntax characters at
>>>> all, for example:
>>>>  hist.c:3499: bad(2) wordsplit reading history: mp3info 好きになり\M-c\M-^Aい.mp3
>>>> at: 好きになり\M-c\M-^Aい.mp3s
>>>> word: 好きになり\M-c\M-^Aい.mp3
>>>
>>> Unless I'm missing something, I don't think you've said what the real
>>> characters you're expecting are.  The broken ones aren't much use for
>>> testing.
>>>
>>>> The (2) means it's the second of the two bad=1; assignments
>>>> triggering.
>>>
>>> At line 3490?
>>
>> Yes.
>>
>>>> I'm also not sure why the utf8 is slightly mishandled in the output
>>>> there. It has at least been unmetafied, the raw string in the history
>>>> file is more or less:
>>>> mp3info 好ぃ�になゃ�たぃ�.mp3
>>>
>>> So those aren't actually valid characters?  Does that mean metafied
>>> characters are getting into the history?  I've made it necessary for two
>>> more bytes to be metafied, so if the shell was expecting them to be
>>> metafied in the history file they won't be.  The bytes are 0x9e and
>>> 0x9f.  I guess we could special case those, but do we really output
>>> metafied characters to the history file?
>>
>> The actual line in the history is
>> mp3info 好きになりたい.mp3
>> but in the history _file_, it's stored metafied, which is hard to
>> paste into an email. I'm not sure why pasting the original string
>> didn't occur to me. AFAIK, history files have always been metafied.
>> I'm not sure why the た is mangled in the error message is what I tried
>> to say originally. The final byte is 9f which I suppose is an esc with
>> the 8th bit set. Maybe something is trying to double unmetafy? Running
>> it through unmetafy() twice doesn't cause any problems though...
>
> Just looked at the debug code and found out about ZSH_DEBUG_LOG, turns
> out there's also a 0x8A just before the \M-c\M-^

Rerunning the original command seems to produce a different metafied
string than what was in the history before. What's weird is that it
does import correctly into the session from both lines... The line
from running it again also does not cause the wordsplit error.
grepping both of them into my unmetafy program also produces identical
utf8 strings.
This is the one causing a problem,
mp3info M-eM-%M-=M-cM-^AM-^CM--M-cM-^AM-+M-cM-^AM-*M-cM-^BM-^CM-*M-cM-^AM-^_M-cM-^AM-^CM-$.mp3
and this is what we store now which is fine,
mp3info M-eM-%M-=M-cM-^AM-^CM--M-cM-^AM-+M-cM-^AM-*M-cM-^BM-^CM-*M-cM-^AM-^CM-?M-cM-^AM-^CM-$.mp3

Any idea why I have a bunch of history entries stored differently,
that do unmetafy to the correct string, but are parsed weirdly with a
patch that changes how $(( is parsed? I don't quite see the connection
yet :).

-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-20  3:33         ` Mikael Magnusson
@ 2015-02-20  3:43           ` Mikael Magnusson
  2015-02-20  4:19             ` Ray Andrews
  2015-02-20 10:00             ` Peter Stephenson
  0 siblings, 2 replies; 24+ messages in thread
From: Mikael Magnusson @ 2015-02-20  3:43 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh Hackers' List

On Fri, Feb 20, 2015 at 4:33 AM, Mikael Magnusson <mikachu@gmail.com> wrote:
> On Fri, Feb 20, 2015 at 4:22 AM, Mikael Magnusson <mikachu@gmail.com> wrote:
>> On Fri, Feb 20, 2015 at 4:16 AM, Mikael Magnusson <mikachu@gmail.com> wrote:
>>> On Thu, Feb 19, 2015 at 11:03 PM, Peter Stephenson
>>> <p.w.stephenson@ntlworld.com> wrote:
>>>> On Thu, 19 Feb 2015 22:47:12 +0100
>>>> Mikael Magnusson <mikachu@gmail.com> wrote:
>>>>> I get a crapton of "bad(2) wordsplit reading history:" with this
>>>>> patch. It seems like all the failed lines have metafied characters in
>>>>> them, if that's a hint. Most don't contain any syntax characters at
>>>>> all, for example:
>>>>>  hist.c:3499: bad(2) wordsplit reading history: mp3info 好きになり\M-c\M-^Aい.mp3
>>>>> at: 好きになり\M-c\M-^Aい.mp3s
>>>>> word: 好きになり\M-c\M-^Aい.mp3
>>>>
>>>> Unless I'm missing something, I don't think you've said what the real
>>>> characters you're expecting are.  The broken ones aren't much use for
>>>> testing.
>>>>
>>>>> The (2) means it's the second of the two bad=1; assignments
>>>>> triggering.
>>>>
>>>> At line 3490?
>>>
>>> Yes.
>>>
>>>>> I'm also not sure why the utf8 is slightly mishandled in the output
>>>>> there. It has at least been unmetafied, the raw string in the history
>>>>> file is more or less:
>>>>> mp3info 好ぃ�になゃ�たぃ�.mp3
>>>>
>>>> So those aren't actually valid characters?  Does that mean metafied
>>>> characters are getting into the history?  I've made it necessary for two
>>>> more bytes to be metafied, so if the shell was expecting them to be
>>>> metafied in the history file they won't be.  The bytes are 0x9e and
>>>> 0x9f.  I guess we could special case those, but do we really output
>>>> metafied characters to the history file?
>>>
>>> The actual line in the history is
>>> mp3info 好きになりたい.mp3
>>> but in the history _file_, it's stored metafied, which is hard to
>>> paste into an email. I'm not sure why pasting the original string
>>> didn't occur to me. AFAIK, history files have always been metafied.
>>> I'm not sure why the た is mangled in the error message is what I tried
>>> to say originally. The final byte is 9f which I suppose is an esc with
>>> the 8th bit set. Maybe something is trying to double unmetafy? Running
>>> it through unmetafy() twice doesn't cause any problems though...
>>
>> Just looked at the debug code and found out about ZSH_DEBUG_LOG, turns
>> out there's also a 0x8A just before the \M-c\M-^
>
> Rerunning the original command seems to produce a different metafied
> string than what was in the history before. What's weird is that it
> does import correctly into the session from both lines... The line
> from running it again also does not cause the wordsplit error.
> grepping both of them into my unmetafy program also produces identical
> utf8 strings.
> This is the one causing a problem,
> mp3info M-eM-%M-=M-cM-^AM-^CM--M-cM-^AM-+M-cM-^AM-*M-cM-^BM-^CM-*M-cM-^AM-^_M-cM-^AM-^CM-$.mp3
> and this is what we store now which is fine,
> mp3info M-eM-%M-=M-cM-^AM-^CM--M-cM-^AM-+M-cM-^AM-*M-cM-^BM-^CM-*M-cM-^AM-^CM-?M-cM-^AM-^CM-$.mp3
>
> Any idea why I have a bunch of history entries stored differently,
> that do unmetafy to the correct string, but are parsed weirdly with a
> patch that changes how $(( is parsed? I don't quite see the connection
> yet :).

Oh I see, you renumbered a bunch of stuff in zsh.h, so text would be
metafied differently now. But only metafy uses the table, unmetafy
doesn't. That explains why the strings are different, but not why the
old string causes an error. Unless we are parsing it before
unmetafying it, which means any random bytes that weren't special
before but are now would be interpreted specially. Can we
unmetafy+metafy the string before lexing? I guess that might be slower
though. (Sorry for the 500 mails).

-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-20  3:43           ` Mikael Magnusson
@ 2015-02-20  4:19             ` Ray Andrews
  2015-02-20  9:54               ` Peter Stephenson
  2015-02-20 10:00             ` Peter Stephenson
  1 sibling, 1 reply; 24+ messages in thread
From: Ray Andrews @ 2015-02-20  4:19 UTC (permalink / raw)
  To: zsh-workers

 > Oh I see, you renumbered a bunch of stuff in zsh.h, so text would be 
metafied differently now. But only metafy uses the table, unmetafy 
doesn't. That explains why the strings are different, but not why the 
old string causes an error. Unless we are parsing it before unmetafying 
it, which means any random bytes that weren't special before but are now 
would be interpreted specially. Can we unmetafy+metafy the string before 
lexing? I guess that might be slower though. (Sorry for the 500 mails).

Would some kind soul explain (if an explanation is possible) why the 
original string is not just saved verbatim? (Or maybe after history 
expansion is handled).  Why would a line in history not just be what it is?


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-20  4:19             ` Ray Andrews
@ 2015-02-20  9:54               ` Peter Stephenson
  0 siblings, 0 replies; 24+ messages in thread
From: Peter Stephenson @ 2015-02-20  9:54 UTC (permalink / raw)
  To: zsh-workers

On Thu, 19 Feb 2015 20:19:15 -0800
Ray Andrews <rayandrews@eastlink.ca> wrote:
> Would some kind soul explain (if an explanation is possible) why the 
> original string is not just saved verbatim? (Or maybe after history 
> expansion is handled).  Why would a line in history not just be what it is?

That's a good question, and while we're probably stuck with the current
behaviour --- history files need to be shared between different
versions of the shell --- it would be good to know why.

The reason will probably be one of

- It just got forgotten about.  It works within any version of the shell
because the string is well-defined; it's only when we add a token,
as I did yesterday, that we get incompatibilities.  So no one noticed
it wasn't really pukka.

- It's so we can get embedded zero bytes.  Zsh attempts to treat these
internally as ordinary characters, rather than string terminators.
However, they're ordinary characters in file text, too, so it's not
clear this is a good reason.  Further, we already escape some characters
in an ad hoc fashion, notably newlines, so there's no reason we
shouldn't do that here, too.

The mailing list archive from the dim and distant past may cast light
upon this but I think current folk memory isn't up to it.

pws


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-20  3:43           ` Mikael Magnusson
  2015-02-20  4:19             ` Ray Andrews
@ 2015-02-20 10:00             ` Peter Stephenson
  2015-02-20 10:12               ` Mikael Magnusson
  1 sibling, 1 reply; 24+ messages in thread
From: Peter Stephenson @ 2015-02-20 10:00 UTC (permalink / raw)
  To: Zsh Hackers' List

On Fri, 20 Feb 2015 04:43:49 +0100
Mikael Magnusson <mikachu@gmail.com> wrote:
> Oh I see, you renumbered a bunch of stuff in zsh.h, so text would be
> metafied differently now. But only metafy uses the table, unmetafy
> doesn't. That explains why the strings are different, but not why the
> old string causes an error. Unless we are parsing it before
> unmetafying it, which means any random bytes that weren't special
> before but are now would be interpreted specially.

I haven't confirmed the behaviour in detail, but I think what's
happening is the old string read in from history now looks like
an already metafied byte and when we scan the line gets unmetafied into
an invalid 8-bit character which is where things are falling over.

> Can we
> unmetafy+metafy the string before lexing? I guess that might be slower
> though. (Sorry for the 500 mails).

I believe any character read in from the history file that appears as
"needs metafication" is illegal, because we don't output tokenised
text.  So we simply metafy it at that point.  We can pre-scan for this
so we don't treat lines we don't need to it.

The question is where to put this in on history read.  I think it's
going to affect non-lexical history, too, but the error on reading won't
be flagged up.

pws

-- 
Peter Stephenson | Principal Engineer Samsung Cambridge Solution Centre
Email: p.stephenson@samsung.com | Phone: +44 1223 434724 |
www.samsung.com

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-20 10:00             ` Peter Stephenson
@ 2015-02-20 10:12               ` Mikael Magnusson
  2015-02-22 18:26                 ` Peter Stephenson
  0 siblings, 1 reply; 24+ messages in thread
From: Mikael Magnusson @ 2015-02-20 10:12 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh Hackers' List

On Fri, Feb 20, 2015 at 11:00 AM, Peter Stephenson
<p.stephenson@samsung.com> wrote:
> On Fri, 20 Feb 2015 04:43:49 +0100
> Mikael Magnusson <mikachu@gmail.com> wrote:
>> Oh I see, you renumbered a bunch of stuff in zsh.h, so text would be
>> metafied differently now. But only metafy uses the table, unmetafy
>> doesn't. That explains why the strings are different, but not why the
>> old string causes an error. Unless we are parsing it before
>> unmetafying it, which means any random bytes that weren't special
>> before but are now would be interpreted specially.
>
> I haven't confirmed the behaviour in detail, but I think what's
> happening is the old string read in from history now looks like
> an already metafied byte and when we scan the line gets unmetafied into
> an invalid 8-bit character which is where things are falling over.
>
>> Can we
>> unmetafy+metafy the string before lexing? I guess that might be slower
>> though. (Sorry for the 500 mails).
>
> I believe any character read in from the history file that appears as
> "needs metafication" is illegal, because we don't output tokenised
> text.  So we simply metafy it at that point.  We can pre-scan for this
> so we don't treat lines we don't need to it.
>
> The question is where to put this in on history read.  I think it's
> going to affect non-lexical history, too, but the error on reading won't
> be flagged up.

I don't think so, unmetafy() doesn't care about the table. And as I
checked earlier, both the old and new version of the string in my
history file is unmetafied to the correct UTF-8 string. The 'only'
problem is that the lexer is looking at some bytes before it's
unmetafied and some stuff that should have been metafied to avoid
being parsed as tokens, isn't, because they weren't special in the old
version. That's why I think running unmetafy before lexing is
needed... And if the lexer wants metafied text then we'd just have to
metafy it again right away.

My theory doesn't fully explain the weird output from the error
message, but maybe that's because of dingbats in the wobblifier.

-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-20 10:12               ` Mikael Magnusson
@ 2015-02-22 18:26                 ` Peter Stephenson
  2015-02-23  9:54                   ` Peter Stephenson
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Stephenson @ 2015-02-22 18:26 UTC (permalink / raw)
  To: Zsh Hackers' List

On Fri, 20 Feb 2015 11:12:39 +0100
Mikael Magnusson <mikachu@gmail.com> wrote:
> > The question is where to put this in on history read.  I think it's
> > going to affect non-lexical history, too, but the error on reading won't
> > be flagged up.
> 
> I don't think so, unmetafy() doesn't care about the table. And as I
> checked earlier, both the old and new version of the string in my
> history file is unmetafied to the correct UTF-8 string. The 'only'
> problem is that the lexer is looking at some bytes before it's
> unmetafied and some stuff that should have been metafied to avoid
> being parsed as tokens, isn't, because they weren't special in the old
> version. That's why I think running unmetafy before lexing is
> needed... And if the lexer wants metafied text then we'd just have to
> metafy it again right away.

See if this fixes the problems, then.

Note we're almost out of meta characters with this limitation --- we
can't expand beyond the range of 32 we currently reserve if we need to
keep compatibility with history.  We're only just getting away with it
with 0xa0 because 0x80 isn't a meta character, as for historical reasons
they start at 0x83.

pws

diff --git a/Src/hist.c b/Src/hist.c
index 381c7e2..acc4259 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -3377,11 +3377,45 @@ histsplitwords(char *lineptr, short **wordsp, int *nwordsp, int *nwordposp,
     char *start = lineptr;
 
     if (uselex) {
-	LinkList wordlist = bufferwords(NULL, lineptr, NULL,
-					LEXFLAGS_COMMENTS_KEEP);
+	LinkList wordlist;
 	LinkNode wordnode;
-	int nwords_max;
+	int nwords_max, remeta = 0;
+	char *ptr;
+
+	/*
+	 * Handle the special case that we're reading from an
+	 * old shell with fewer meta characters, so we need to
+	 * metafy some more.  (It's not clear why the history
+	 * file is metafied at all; some would say this is plain
+	 * stupid.  But we're stuck with it now without some
+	 * hairy workarounds for compatibility).
+	 *
+	 * This is rare so doesn't need to be that efficient; just
+	 * allocate space off the heap.
+	 *
+	 * Note that our it's currently believed this all comes out in
+	 * the wash in the non-uselex case owing to where unmetafication
+	 * and metafication happen.
+	 */
+	for (ptr = lineptr; *ptr; ptr++) {
+	    if (*ptr != Meta && imeta(*ptr))
+		remeta++;
+	}
+	if (remeta) {
+	    char *ptr2, *line2;
+	    ptr2 = line2 = (char *)zhalloc((ptr - lineptr) + remeta + 1);
+	    for (ptr = lineptr; *ptr; ptr++) {
+		if (*ptr != Meta && imeta(*ptr)) {
+		    *ptr2++ = Meta;
+		    *ptr2++ = *ptr ^ 32;
+		} else
+		    *ptr2++ = *ptr;
+	    }
+	    lineptr = line2;
+	}
 
+	wordlist = bufferwords(NULL, lineptr, NULL,
+			       LEXFLAGS_COMMENTS_KEEP);
 	nwords_max = 2 * countlinknodes(wordlist);
 	if (nwords_max > nwords) {
 	    *nwordsp = nwords = nwords_max;


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-22 18:26                 ` Peter Stephenson
@ 2015-02-23  9:54                   ` Peter Stephenson
  2015-02-23 10:11                     ` Peter Stephenson
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Stephenson @ 2015-02-23  9:54 UTC (permalink / raw)
  To: Zsh Hackers' List

Apparently this message from Mikael didn't go to the list...

I've shortened the quotation.

I'll reply to it separately.

pws

On Sun, Feb 22, 2015 at 7:26 PM, Peter Stephenson
<p.w.stephenson@ntlworld.com> wrote:
> On Fri, 20 Feb 2015 11:12:39 +0100
> Mikael Magnusson <mikachu@gmail.com> wrote:
>> > The question is where to put this in on history read.  I think it's
>> > going to affect non-lexical history, too, but the error on reading won't
>> > be flagged up.
>>
>> I don't think so, unmetafy() doesn't care about the table. And as I
>> checked earlier, both the old and new version of the string in my
>> history file is unmetafied to the correct UTF-8 string. The 'only'
>> problem is that the lexer is looking at some bytes before it's
>> unmetafied and some stuff that should have been metafied to avoid
>> being parsed as tokens, isn't, because they weren't special in the old
>> version. That's why I think running unmetafy before lexing is
>> needed... And if the lexer wants metafied text then we'd just have to
>> metafy it again right away.
>
> See if this fixes the problems, then.

I actually thought the patch in 34587 had fixed it, turns out I just
lost my --enable-zsh-debug flag. This patch doesn't fix it either
though, and I get a different set of errors every time I open a
terminal now, so that's fun. Which is to say it's all the same
wordsplit error, but it's printed for a different set of lines. Some
of them are the same though.

I noticed another thing from these errors too, they're printed also
when I exit zsh. There's not much point in lexing the history at
write-out time, is there?

I tried playing around with the code a bit. The thing that looks
suspicious to me is
    if (*ptr != Meta && imeta(*ptr)) {
Shouldn't they check ptr[0] and ptr[1] or so? I tried this,
    if ((ptr == lineptr || ptr[-1] != Meta) && imeta(ptr[0])) {
(in both places) but it didn't improve matters.

I tried the following instead of the for+if:
    unmetafy(lineptr, NULL);
    lineptr = metafy(lineptr, -1, META_USEHEAP);
and it gets rid of the errors, but it also causes insert-last-word to
do nothing. So maybe my whole theory is wrong.

-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-23  9:54                   ` Peter Stephenson
@ 2015-02-23 10:11                     ` Peter Stephenson
  2015-02-23 11:35                       ` Mikael Magnusson
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Stephenson @ 2015-02-23 10:11 UTC (permalink / raw)
  To: Zsh Hackers' List

> > Mikael Magnusson <mikachu@gmail.com> wrote:
> I actually thought the patch in 34587 had fixed it, turns out I just
> lost my --enable-zsh-debug flag. This patch doesn't fix it either
> though, and I get a different set of errors every time I open a
> terminal now, so that's fun. Which is to say it's all the same
> wordsplit error, but it's printed for a different set of lines. Some
> of them are the same though.

Obviously, if you want to go down that route, you'll have to look at see
what's up with those lines.  It seemed to work on the line you originall
gave.

> I noticed another thing from these errors too, they're printed also
> when I exit zsh. There's not much point in lexing the history at
> write-out time, is there?

I can't remember what that is, but I've noticed it before and I have a
vague feeling I looked and decided I wasn't going to look any more.

> I tried playing around with the code a bit. The thing that looks
> suspicious to me is
>     if (*ptr != Meta && imeta(*ptr)) {
> Shouldn't they check ptr[0] and ptr[1] or so? I tried this,
>     if ((ptr == lineptr || ptr[-1] != Meta) && imeta(ptr[0])) {
> (in both places) but it didn't improve matters.

No, the problem here isn't a standard one with Tok -> Meta + NonTok.
It's that there's something that looks like Tok but isn't.  So we
need to turn it into Meta + NonTok.  That's the second part of the
test.  However imeta(*ptr) triggers if *ptr is Meta, which isn't what we
want because in that case it means we've hit a correct Meta + NonTok
sequence.

One thing I didn't think I needed to do, but may have got wrong,
is skip the byte after the Meta, i.e.

if (*ptr == Meta)
    ptr++;

(which acts together with the existing increment).

You could try that.

> I tried the following instead of the for+if:
>     unmetafy(lineptr, NULL);
>     lineptr = metafy(lineptr, -1, META_USEHEAP);
> and it gets rid of the errors, but it also causes insert-last-word to
> do nothing. So maybe my whole theory is wrong.

I'd be worried about doing that on every single line -- we already know
HIST_LEX_WORDS can be very slow, which is the only reason it's an option
(it's logically the right thing to do, modulo lex failures).

However, it should be possible to combine that with the "remeta" check,
i.e. see if the line needs it first, but not use the value of remeta,
just whether it's non-zero, and then it becomes easy and not too
intrusive --- and also safe about false positives.

I don't see why this would cause failures later on, particularly ones
apparently unrelated to the meta state of the string.

pws

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-23 10:11                     ` Peter Stephenson
@ 2015-02-23 11:35                       ` Mikael Magnusson
  2015-02-23 12:36                         ` Peter Stephenson
  0 siblings, 1 reply; 24+ messages in thread
From: Mikael Magnusson @ 2015-02-23 11:35 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh Hackers' List

> Apparently this message from Mikael didn't go to the list...

I made a draft last night before going to bed, and decided I was too
tired to send, apparently when I woke up I was still too tired to
check if I had set reply-to-all. :)

On Mon, Feb 23, 2015 at 11:11 AM, Peter Stephenson
<p.stephenson@samsung.com> wrote:
>> > Mikael Magnusson <mikachu@gmail.com> wrote:
>> I actually thought the patch in 34587 had fixed it, turns out I just
>> lost my --enable-zsh-debug flag. This patch doesn't fix it either
>> though, and I get a different set of errors every time I open a
>> terminal now, so that's fun. Which is to say it's all the same
>> wordsplit error, but it's printed for a different set of lines. Some
>> of them are the same though.
>
> Obviously, if you want to go down that route, you'll have to look at see
> what's up with those lines.  It seemed to work on the line you originall
> gave.
>
>> I noticed another thing from these errors too, they're printed also
>> when I exit zsh. There's not much point in lexing the history at
>> write-out time, is there?
>
> I can't remember what that is, but I've noticed it before and I have a
> vague feeling I looked and decided I wasn't going to look any more.

Okay, that sounds ominous enough for me to drop the idea.

>> I tried playing around with the code a bit. The thing that looks
>> suspicious to me is
>>     if (*ptr != Meta && imeta(*ptr)) {
>> Shouldn't they check ptr[0] and ptr[1] or so? I tried this,
>>     if ((ptr == lineptr || ptr[-1] != Meta) && imeta(ptr[0])) {
>> (in both places) but it didn't improve matters.
>
> No, the problem here isn't a standard one with Tok -> Meta + NonTok.
> It's that there's something that looks like Tok but isn't.  So we
> need to turn it into Meta + NonTok.  That's the second part of the
> test.  However imeta(*ptr) triggers if *ptr is Meta, which isn't what we
> want because in that case it means we've hit a correct Meta + NonTok
> sequence.
>
> One thing I didn't think I needed to do, but may have got wrong,
> is skip the byte after the Meta, i.e.
>
> if (*ptr == Meta)
>     ptr++;
>
> (which acts together with the existing increment).
>
> You could try that.
>
>> I tried the following instead of the for+if:
>>     unmetafy(lineptr, NULL);
>>     lineptr = metafy(lineptr, -1, META_USEHEAP);
>> and it gets rid of the errors, but it also causes insert-last-word to
>> do nothing. So maybe my whole theory is wrong.
>
> I'd be worried about doing that on every single line -- we already know
> HIST_LEX_WORDS can be very slow, which is the only reason it's an option
> (it's logically the right thing to do, modulo lex failures).
>
> However, it should be possible to combine that with the "remeta" check,
> i.e. see if the line needs it first, but not use the value of remeta,
> just whether it's non-zero, and then it becomes easy and not too
> intrusive --- and also safe about false positives.
>
> I don't see why this would cause failures later on, particularly ones
> apparently unrelated to the meta state of the string.

I figured out that when we assign lineptr after fiddling with it, we
also need to update start, it records the location of *lineptr on
entry to the function, and is used to calculate things later on. With
that addition, the unmetafy + metafy mostly works. insert-last-word
still gets "stuck" on words that came from the old metafication and
starts over from the end of history, leaving the old word in place.

Indeed fc -l lists this line differently still, even though it appears
correctly in zle.
10673* echo mp3info 好きになり\M-c\M-^Aい.mp3
10675* echo mp3info 好きになりたい.mp3

This is with the following code, which fixes(?) all the errors and
seems to make things work pretty well. However, even doing the remeta
unconditionally, I still get the above result with fc -l and
insert-last-word. It also happens, unsurprisingly, if I don't use
hist_lex_words. If I accept a line recalled in this way, the new
history entry is saved correctly and works fine in new shells.

    for (ptr = lineptr; *ptr; ptr++)
        if (*ptr != Meta && imeta(*ptr)) {
            remeta = 1;
            break;
        }
    if (remeta) {
        unmetafy(lineptr, &remeta);
        start = lineptr = metafy(lineptr, remeta, META_USEHEAP);
    }


-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-23 11:35                       ` Mikael Magnusson
@ 2015-02-23 12:36                         ` Peter Stephenson
  2015-02-23 12:57                           ` Peter Stephenson
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Stephenson @ 2015-02-23 12:36 UTC (permalink / raw)
  To: Zsh Hackers' List

On Mon, 23 Feb 2015 12:35:51 +0100
Mikael Magnusson <mikachu@gmail.com> wrote:
> I figured out that when we assign lineptr after fiddling with it, we
> also need to update start, it records the location of *lineptr on
> entry to the function, and is used to calculate things later on. With
> that addition, the unmetafy + metafy mostly works. insert-last-word
> still gets "stuck" on words that came from the old metafication and
> starts over from the end of history, leaving the old word in place.

What's the line this happens with?  Is it the same as the one that
originally failed?

I wonder if it's getting information by some other path, though it seems
strange the last word wouldn't come from the bit we've apparently fixed
up.

> Indeed fc -l lists this line differently still, even though it appears
> correctly in zle.
> 10673* echo mp3info 好きになり\M-c\M-^Aい.mp3
> 10675* echo mp3info 好きになりたい.mp3

That suggests this is indeed a different code path to the one that got
fixed --- it sounds like the same line entered in a new shell is OK so
it can only be that there's some bit of the history that's not fixed.
So we may need the equivalent earlier or elsewhere in addition; the
former would be better.

> This is with the following code, which fixes(?) all the errors and
> seems to make things work pretty well. However, even doing the remeta
> unconditionally, I still get the above result with fc -l and
> insert-last-word. It also happens, unsurprisingly, if I don't use
> hist_lex_words.

That's yet another pointer that the code needs to go elsewhere.

> If I accept a line recalled in this way, the new
> history entry is saved correctly and works fine in new shells.
> 
>     for (ptr = lineptr; *ptr; ptr++)
>         if (*ptr != Meta && imeta(*ptr)) {
>             remeta = 1;
>             break;
>         }
>     if (remeta) {
>         unmetafy(lineptr, &remeta);
>         start = lineptr = metafy(lineptr, remeta, META_USEHEAP);
>     }

That looks an entirely reasonable fix for the problem, if we can work
out what's still wrong and move things around.

At worst, ensuring the file gets completely written out from scratch
(turning off INC_APPEND_HISTORY) should fix things for subsequent
shells, whether new or old.

pws

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-23 12:36                         ` Peter Stephenson
@ 2015-02-23 12:57                           ` Peter Stephenson
  2015-02-23 13:38                             ` Mikael Magnusson
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Stephenson @ 2015-02-23 12:57 UTC (permalink / raw)
  To: Zsh Hackers' List

On Mon, 23 Feb 2015 12:36:07 +0000
Peter Stephenson <p.stephenson@samsung.com> wrote:
> On Mon, 23 Feb 2015 12:35:51 +0100
> Mikael Magnusson <mikachu@gmail.com> wrote:
> > I figured out that when we assign lineptr after fiddling with it, we
> > also need to update start, it records the location of *lineptr on
> > entry to the function, and is used to calculate things later on. With
> > that addition, the unmetafy + metafy mostly works. insert-last-word
> > still gets "stuck" on words that came from the old metafication and
> > starts over from the end of history, leaving the old word in place.
> 
> What's the line this happens with?  Is it the same as the one that
> originally failed?

Yes, I see it is.

> I wonder if it's getting information by some other path, though it seems
> strange the last word wouldn't come from the bit we've apparently fixed
> up.

Ah, simply moving the hunk up seems to help this problem, at least the
one I'm seeing.

I've also implemented the suggestion of skipping the character
after any Meta found, though we need to check that's not '\0' since
this is has come from the big wide world.

Anything still broken?

diff --git a/Src/hist.c b/Src/hist.c
index acc4259..da79771 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -3372,47 +3372,44 @@ mod_export void
 histsplitwords(char *lineptr, short **wordsp, int *nwordsp, int *nwordposp,
 	       int uselex)
 {
-    int nwords = *nwordsp, nwordpos = 0;
+    int nwords = *nwordsp, nwordpos = 0, remeta = 0;
     short *words = *wordsp;
-    char *start = lineptr;
+    char *start, *ptr;
+
+    /*
+     * Handle the special case that we're reading from an
+     * old shell with fewer meta characters, so we need to
+     * metafy some more.  (It's not clear why the history
+     * file is metafied at all; some would say this is plain
+     * stupid.  But we're stuck with it now without some
+     * hairy workarounds for compatibility).
+     *
+     * This is rare so doesn't need to be that efficient; just
+     * allocate space off the heap.
+     *
+     * Note that our it's currently believed this all comes out in
+     * the wash in the non-uselex case owing to where unmetafication
+     * and metafication happen.
+     */
+    for (ptr = lineptr; *ptr; ptr++) {
+	if (*ptr == Meta && ptr[1])
+	    ptr++;
+	else if (imeta(*ptr)) {
+	    remeta = 1;
+	    break;
+	}
+    }
+    if (remeta) {
+	unmetafy(lineptr, &remeta);
+	lineptr = metafy(lineptr, remeta, META_USEHEAP);
+    }
+
+    start = lineptr;
 
     if (uselex) {
 	LinkList wordlist;
 	LinkNode wordnode;
-	int nwords_max, remeta = 0;
-	char *ptr;
-
-	/*
-	 * Handle the special case that we're reading from an
-	 * old shell with fewer meta characters, so we need to
-	 * metafy some more.  (It's not clear why the history
-	 * file is metafied at all; some would say this is plain
-	 * stupid.  But we're stuck with it now without some
-	 * hairy workarounds for compatibility).
-	 *
-	 * This is rare so doesn't need to be that efficient; just
-	 * allocate space off the heap.
-	 *
-	 * Note that our it's currently believed this all comes out in
-	 * the wash in the non-uselex case owing to where unmetafication
-	 * and metafication happen.
-	 */
-	for (ptr = lineptr; *ptr; ptr++) {
-	    if (*ptr != Meta && imeta(*ptr))
-		remeta++;
-	}
-	if (remeta) {
-	    char *ptr2, *line2;
-	    ptr2 = line2 = (char *)zhalloc((ptr - lineptr) + remeta + 1);
-	    for (ptr = lineptr; *ptr; ptr++) {
-		if (*ptr != Meta && imeta(*ptr)) {
-		    *ptr2++ = Meta;
-		    *ptr2++ = *ptr ^ 32;
-		} else
-		    *ptr2++ = *ptr;
-	    }
-	    lineptr = line2;
-	}
+	int nwords_max;
 
 	wordlist = bufferwords(NULL, lineptr, NULL,
 			       LEXFLAGS_COMMENTS_KEEP);


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-23 12:57                           ` Peter Stephenson
@ 2015-02-23 13:38                             ` Mikael Magnusson
  2015-02-23 13:46                               ` Mikael Magnusson
  0 siblings, 1 reply; 24+ messages in thread
From: Mikael Magnusson @ 2015-02-23 13:38 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh Hackers' List

On Mon, Feb 23, 2015 at 1:57 PM, Peter Stephenson
<p.stephenson@samsung.com> wrote:
> On Mon, 23 Feb 2015 12:36:07 +0000
> Peter Stephenson <p.stephenson@samsung.com> wrote:
>> On Mon, 23 Feb 2015 12:35:51 +0100
>> Mikael Magnusson <mikachu@gmail.com> wrote:
>> > I figured out that when we assign lineptr after fiddling with it, we
>> > also need to update start, it records the location of *lineptr on
>> > entry to the function, and is used to calculate things later on. With
>> > that addition, the unmetafy + metafy mostly works. insert-last-word
>> > still gets "stuck" on words that came from the old metafication and
>> > starts over from the end of history, leaving the old word in place.
>>
>> What's the line this happens with?  Is it the same as the one that
>> originally failed?
>
> Yes, I see it is.

If you start dev-0 and run
echo た
then you'll end up with a trouble-causing entry in your history file.
Simply holding down alt-. in a new dev-1 shell then just inserts a
bunch of た forever for me.

>> I wonder if it's getting information by some other path, though it seems
>> strange the last word wouldn't come from the bit we've apparently fixed
>> up.

At least the function calling this function seems to store the raw
string in lasthist.text, but that's outside the while loop, so I think
only for the final history entry? I get my problem even when the bad
string is in an earlier line.

Oh, there's also this line,
he->node.nam = ztrdup(pt);
(pt is the history line)

Do we want to move the whole thing before that? I didn't look too
closely at this function yet.

> Ah, simply moving the hunk up seems to help this problem, at least the
> one I'm seeing.
>
> I've also implemented the suggestion of skipping the character
> after any Meta found, though we need to check that's not '\0' since
> this is has come from the big wide world.
>
> Anything still broken?

This patch works as well for me as my previous one.

-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: parse from even deeper in hell
  2015-02-23 13:38                             ` Mikael Magnusson
@ 2015-02-23 13:46                               ` Mikael Magnusson
  2015-02-23 13:51                                 ` PATCH: Remeta one frame earlier Mikael Magnusson
  0 siblings, 1 reply; 24+ messages in thread
From: Mikael Magnusson @ 2015-02-23 13:46 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh Hackers' List

On Mon, Feb 23, 2015 at 2:38 PM, Mikael Magnusson <mikachu@gmail.com> wrote:
> On Mon, Feb 23, 2015 at 1:57 PM, Peter Stephenson
> <p.stephenson@samsung.com> wrote:
>> On Mon, 23 Feb 2015 12:36:07 +0000
>> Peter Stephenson <p.stephenson@samsung.com> wrote:
>>> On Mon, 23 Feb 2015 12:35:51 +0100
>>> Mikael Magnusson <mikachu@gmail.com> wrote:
>>> > I figured out that when we assign lineptr after fiddling with it, we
>>> > also need to update start, it records the location of *lineptr on
>>> > entry to the function, and is used to calculate things later on. With
>>> > that addition, the unmetafy + metafy mostly works. insert-last-word
>>> > still gets "stuck" on words that came from the old metafication and
>>> > starts over from the end of history, leaving the old word in place.
>>>
>>> What's the line this happens with?  Is it the same as the one that
>>> originally failed?
>>
>> Yes, I see it is.
>
> If you start dev-0 and run
> echo た
> then you'll end up with a trouble-causing entry in your history file.
> Simply holding down alt-. in a new dev-1 shell then just inserts a
> bunch of た forever for me.
>
>>> I wonder if it's getting information by some other path, though it seems
>>> strange the last word wouldn't come from the bit we've apparently fixed
>>> up.
>
> At least the function calling this function seems to store the raw
> string in lasthist.text, but that's outside the while loop, so I think
> only for the final history entry? I get my problem even when the bad
> string is in an earlier line.
>
> Oh, there's also this line,
> he->node.nam = ztrdup(pt);
> (pt is the history line)
>
> Do we want to move the whole thing before that? I didn't look too
> closely at this function yet.

Doing so fixes the issue for me.

gmail-mangled patch follows, I also deleted the paragraph about
non-uselex since it's now outside that function.


diff --git i/Src/hist.c w/Src/hist.c
index ee55431..ae255d6 100644
--- i/Src/hist.c
+++ w/Src/hist.c
@@ -2502,11 +2502,42 @@ readhistfile(char *fn, int err, int readflags)
         newflags |= HIST_MAKEUNIQUE;
     while (fpos = ftell(in), (l = readhistline(0, &buf, &bufsiz, in))) {
         char *pt = buf;
+        char *ptr;
+        int remeta;

         if (l < 0) {
         zerr("corrupt history file %s", fn);
         break;
         }
+
+        /*
+         * Handle the special case that we're reading from an
+         * old shell with fewer meta characters, so we need to
+         * metafy some more.  (It's not clear why the history
+         * file is metafied at all; some would say this is plain
+         * stupid.  But we're stuck with it now without some
+         * hairy workarounds for compatibility).
+         *
+         * This is rare so doesn't need to be that efficient; just
+         * allocate space off the heap.
+         */
+        for (ptr = pt; *ptr; ptr++) {
+        if (*ptr == Meta && ptr[1])
+            ptr++;
+        else if (imeta(*ptr)) {
+            remeta = 1;
+            break;
+        }
+        }
+        if (remeta) {
+        unmetafy(pt, &remeta);
+        pt = metafy(pt, remeta, META_USEHEAP);
+        }
+
         if (*pt == ':') {
         pt++;
         stim = zstrtol(pt, NULL, 0);
@@ -3375,37 +3406,7 @@ histsplitwords(char *lineptr, short
 {
     int nwords = *nwordsp, nwordpos = 0, remeta = 0;
     short *words = *wordsp;
-    char *start, *ptr;
-
-    /*
-     * Handle the special case that we're reading from an
-     * old shell with fewer meta characters, so we need to
-     * metafy some more.  (It's not clear why the history
-     * file is metafied at all; some would say this is plain
-     * stupid.  But we're stuck with it now without some
-     * hairy workarounds for compatibility).
-     *
-     * This is rare so doesn't need to be that efficient; just
-     * allocate space off the heap.
-     *
-     * Note that our it's currently believed this all comes out in
-     * the wash in the non-uselex case owing to where unmetafication
-     * and metafication happen.
-     */
-    for (ptr = lineptr; *ptr; ptr++) {
-    if (*ptr == Meta && ptr[1])
-        ptr++;
-    else if (imeta(*ptr)) {
-        remeta = 1;
-        break;
-    }
-    }
-    if (remeta) {
-    unmetafy(lineptr, &remeta);
-    lineptr = metafy(lineptr, remeta, META_USEHEAP);
-    }
-
-    start = lineptr;
+    char *start = lineptr;

     if (uselex) {
     LinkList wordlist;


-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 24+ messages in thread

* PATCH: Remeta one frame earlier
  2015-02-23 13:46                               ` Mikael Magnusson
@ 2015-02-23 13:51                                 ` Mikael Magnusson
  2015-02-23 13:58                                   ` Peter Stephenson
  2015-02-23 14:05                                   ` Peter Stephenson
  0 siblings, 2 replies; 24+ messages in thread
From: Mikael Magnusson @ 2015-02-23 13:51 UTC (permalink / raw)
  To: zsh-workers

I realized I generated the last patch on top of yours since I applied it
with git-am, so here's a proper patch send from git send-mail instead,
against upstream git master.

---
 Src/hist.c | 62 ++++++++++++++++++++++++++++----------------------------------
 1 file changed, 28 insertions(+), 34 deletions(-)

diff --git a/Src/hist.c b/Src/hist.c
index 689a793..c5cf43e 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -2502,11 +2502,38 @@ readhistfile(char *fn, int err, int readflags)
 	    newflags |= HIST_MAKEUNIQUE;
 	while (fpos = ftell(in), (l = readhistline(0, &buf, &bufsiz, in))) {
 	    char *pt = buf;
+	    char *ptr;
+	    int remeta;
 
 	    if (l < 0) {
 		zerr("corrupt history file %s", fn);
 		break;
 	    }
+
+	    /*
+	     * Handle the special case that we're reading from an
+	     * old shell with fewer meta characters, so we need to
+	     * metafy some more.  (It's not clear why the history
+	     * file is metafied at all; some would say this is plain
+	     * stupid.  But we're stuck with it now without some
+	     * hairy workarounds for compatibility).
+	     *
+	     * This is rare so doesn't need to be that efficient; just
+	     * allocate space off the heap.
+	     */
+	    for (ptr = pt; *ptr; ptr++) {
+		if (*ptr == Meta && ptr[1])
+		    ptr++;
+		else if (imeta(*ptr)) {
+		    remeta = 1;
+		    break;
+		}
+	    }
+	    if (remeta) {
+		unmetafy(pt, &remeta);
+		pt = metafy(pt, remeta, META_USEHEAP);
+	    }
+
 	    if (*pt == ':') {
 		pt++;
 		stim = zstrtol(pt, NULL, 0);
@@ -3380,40 +3407,7 @@ histsplitwords(char *lineptr, short **wordsp, int *nwordsp, int *nwordposp,
     if (uselex) {
 	LinkList wordlist;
 	LinkNode wordnode;
-	int nwords_max, remeta = 0;
-	char *ptr;
-
-	/*
-	 * Handle the special case that we're reading from an
-	 * old shell with fewer meta characters, so we need to
-	 * metafy some more.  (It's not clear why the history
-	 * file is metafied at all; some would say this is plain
-	 * stupid.  But we're stuck with it now without some
-	 * hairy workarounds for compatibility).
-	 *
-	 * This is rare so doesn't need to be that efficient; just
-	 * allocate space off the heap.
-	 *
-	 * Note that our it's currently believed this all comes out in
-	 * the wash in the non-uselex case owing to where unmetafication
-	 * and metafication happen.
-	 */
-	for (ptr = lineptr; *ptr; ptr++) {
-	    if (*ptr != Meta && imeta(*ptr))
-		remeta++;
-	}
-	if (remeta) {
-	    char *ptr2, *line2;
-	    ptr2 = line2 = (char *)zhalloc((ptr - lineptr) + remeta + 1);
-	    for (ptr = lineptr; *ptr; ptr++) {
-		if (*ptr != Meta && imeta(*ptr)) {
-		    *ptr2++ = Meta;
-		    *ptr2++ = *ptr ^ 32;
-		} else
-		    *ptr2++ = *ptr;
-	    }
-	    lineptr = line2;
-	}
+	int nwords_max;
 
 	wordlist = bufferwords(NULL, lineptr, NULL,
 			       LEXFLAGS_COMMENTS_KEEP);
-- 
2.2.0.GIT


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: Remeta one frame earlier
  2015-02-23 13:51                                 ` PATCH: Remeta one frame earlier Mikael Magnusson
@ 2015-02-23 13:58                                   ` Peter Stephenson
  2015-02-23 14:05                                   ` Peter Stephenson
  1 sibling, 0 replies; 24+ messages in thread
From: Peter Stephenson @ 2015-02-23 13:58 UTC (permalink / raw)
  To: zsh-workers

On Mon, 23 Feb 2015 14:51:09 +0100
Mikael Magnusson <mikachu@gmail.com> wrote:
> I realized I generated the last patch on top of yours since I applied it
> with git-am, so here's a proper patch send from git send-mail instead,
> against upstream git master.

Yes, the zle stuff comes near the top, so this should, too.

Looks fine... I can think of alternative memory allocation strategies
(allocate from malloc, free the old buf, and assign the result to the
new buf), but I don't think that's critical for what we need.

pws


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: Remeta one frame earlier
  2015-02-23 13:51                                 ` PATCH: Remeta one frame earlier Mikael Magnusson
  2015-02-23 13:58                                   ` Peter Stephenson
@ 2015-02-23 14:05                                   ` Peter Stephenson
  2015-02-23 14:32                                     ` Mikael Magnusson
  1 sibling, 1 reply; 24+ messages in thread
From: Peter Stephenson @ 2015-02-23 14:05 UTC (permalink / raw)
  To: zsh-workers

On Mon, 23 Feb 2015 14:51:09 +0100
Mikael Magnusson <mikachu@gmail.com> wrote:
> +	    int remeta;
>  
>  	    if (l < 0) {
>  		zerr("corrupt history file %s", fn);
>  		break;
>  	    }
> +
> +	    /*
> +	     * Handle the special case that we're reading from an
> +	     * old shell with fewer meta characters, so we need to
> +	     * metafy some more.  (It's not clear why the history
> +	     * file is metafied at all; some would say this is plain
> +	     * stupid.  But we're stuck with it now without some
> +	     * hairy workarounds for compatibility).
> +	     *
> +	     * This is rare so doesn't need to be that efficient; just
> +	     * allocate space off the heap.
> +	     */
> +	    for (ptr = pt; *ptr; ptr++) {
> +		if (*ptr == Meta && ptr[1])
> +		    ptr++;
> +		else if (imeta(*ptr)) {
> +		    remeta = 1;
> +		    break;
> +		}
> +	    }
> +	    if (remeta) {
> +		unmetafy(pt, &remeta);
> +		pt = metafy(pt, remeta, META_USEHEAP);
> +	    }
> +

... except you haven't initialised remeta to 0.

If we do stick with the heap, I'm wondering if we need to free it more
often than we do.  Maybe the freeheap() below should be if (uselex ||
remeta), and maybe it should be right at the end of the loop for
safety.

Long discussion for moving a bit of code...

pws


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: Remeta one frame earlier
  2015-02-23 14:05                                   ` Peter Stephenson
@ 2015-02-23 14:32                                     ` Mikael Magnusson
  2015-02-23 17:32                                       ` Peter Stephenson
  0 siblings, 1 reply; 24+ messages in thread
From: Mikael Magnusson @ 2015-02-23 14:32 UTC (permalink / raw)
  To: zsh-workers

On Mon, Feb 23, 2015 at 3:05 PM, Peter Stephenson <p.stephenson@samsung.com> wrote:
> On Mon, 23 Feb 2015 14:51:09 +0100
> Mikael Magnusson <mikachu@gmail.com> wrote:
>> +         int remeta;
>
> ... except you haven't initialised remeta to 0.

Oops, I added it here after the compiler complained, and then removed
it from the other file after, so overlooked the initialization. I guess
I got lucky with my zero pages. Here's an incremental patch that fixes this,
and stops having two separate pt/ptr variables.

> If we do stick with the heap, I'm wondering if we need to free it more
> often than we do.  Maybe the freeheap() below should be if (uselex ||
> remeta), and maybe it should be right at the end of the loop for
> safety.

I'll leave this part to you... At least nothing after the freeheap()
uses pt, so it shouldn't be any less safe than before™.

> Long discussion for moving a bit of code...

But super fun~

---
 Src/hist.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/Src/hist.c b/Src/hist.c
index c5cf43e..5a8c75e 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -2501,9 +2501,8 @@ readhistfile(char *fn, int err, int readflags)
 	 || (hist_ignore_all_dups && newflags & hist_skip_flags))
 	    newflags |= HIST_MAKEUNIQUE;
 	while (fpos = ftell(in), (l = readhistline(0, &buf, &bufsiz, in))) {
-	    char *pt = buf;
-	    char *ptr;
-	    int remeta;
+	    char *pt;
+	    int remeta = 0;
 
 	    if (l < 0) {
 		zerr("corrupt history file %s", fn);
@@ -2521,17 +2520,19 @@ readhistfile(char *fn, int err, int readflags)
 	     * This is rare so doesn't need to be that efficient; just
 	     * allocate space off the heap.
 	     */
-	    for (ptr = pt; *ptr; ptr++) {
-		if (*ptr == Meta && ptr[1])
-		    ptr++;
-		else if (imeta(*ptr)) {
+	    for (pt = buf; *pt; pt++) {
+		if (*pt == Meta && pt[1])
+		    pt++;
+		else if (imeta(*pt)) {
 		    remeta = 1;
 		    break;
 		}
 	    }
 	    if (remeta) {
-		unmetafy(pt, &remeta);
-		pt = metafy(pt, remeta, META_USEHEAP);
+		unmetafy(buf, &remeta);
+		pt = metafy(buf, remeta, META_USEHEAP);
+	    } else {
+		pt = buf;
 	    }
 
 	    if (*pt == ':') {
-- 
2.2.0.GIT


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: PATCH: Remeta one frame earlier
  2015-02-23 14:32                                     ` Mikael Magnusson
@ 2015-02-23 17:32                                       ` Peter Stephenson
  0 siblings, 0 replies; 24+ messages in thread
From: Peter Stephenson @ 2015-02-23 17:32 UTC (permalink / raw)
  To: zsh-workers

On Mon, 23 Feb 2015 15:32:55 +0100
Mikael Magnusson <mikachu@gmail.com> wrote:
> > If we do stick with the heap, I'm wondering if we need to free it more
> > often than we do.  Maybe the freeheap() below should be if (uselex ||
> > remeta), and maybe it should be right at the end of the loop for
> > safety.
> 
> I'll leave this part to you... At least nothing after the freeheap()
> uses pt, so it shouldn't be any less safe than before™.

pws

diff --git a/Src/hist.c b/Src/hist.c
index c530e72..aa07ce8 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -2593,8 +2593,6 @@ readhistfile(char *fn, int err, int readflags)
 	    start = pt;
 	    uselex = isset(HISTLEXWORDS) && !(readflags & HFILE_FAST);
 	    histsplitwords(pt, &words, &nwords, &nwordpos, uselex);
-	    if (uselex)
-		freeheap();
 
 	    he->nwords = nwordpos/2;
 	    if (he->nwords) {
@@ -2607,6 +2605,12 @@ readhistfile(char *fn, int err, int readflags)
 		freehistnode(&he->node);
 		curhist--;
 	    }
+	    /*
+	     * Do this last out of paranoia in case use of
+	     * heap is disguised...
+	     */
+	    if (uselex || remeta)
+		freeheap();
 	}
 	if (start && readflags & HFILE_USE_OPTIONS) {
 	    zsfree(lasthist.text);


^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2015-02-23 17:32 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-19 10:13 PATCH: parse from even deeper in hell Peter Stephenson
2015-02-19 21:47 ` Mikael Magnusson
2015-02-19 22:03   ` Peter Stephenson
2015-02-20  3:16     ` Mikael Magnusson
2015-02-20  3:22       ` Mikael Magnusson
2015-02-20  3:33         ` Mikael Magnusson
2015-02-20  3:43           ` Mikael Magnusson
2015-02-20  4:19             ` Ray Andrews
2015-02-20  9:54               ` Peter Stephenson
2015-02-20 10:00             ` Peter Stephenson
2015-02-20 10:12               ` Mikael Magnusson
2015-02-22 18:26                 ` Peter Stephenson
2015-02-23  9:54                   ` Peter Stephenson
2015-02-23 10:11                     ` Peter Stephenson
2015-02-23 11:35                       ` Mikael Magnusson
2015-02-23 12:36                         ` Peter Stephenson
2015-02-23 12:57                           ` Peter Stephenson
2015-02-23 13:38                             ` Mikael Magnusson
2015-02-23 13:46                               ` Mikael Magnusson
2015-02-23 13:51                                 ` PATCH: Remeta one frame earlier Mikael Magnusson
2015-02-23 13:58                                   ` Peter Stephenson
2015-02-23 14:05                                   ` Peter Stephenson
2015-02-23 14:32                                     ` Mikael Magnusson
2015-02-23 17:32                                       ` Peter Stephenson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).