zsh-workers
 help / color / mirror / code / Atom feed
* coredump on C-c
@ 2013-09-26 17:52 Eitan Adler
  2013-09-26 21:31 ` Bart Schaefer
  0 siblings, 1 reply; 10+ messages in thread
From: Eitan Adler @ 2013-09-26 17:52 UTC (permalink / raw)
  To: zsh-workers

Sometimes, when pressing Ctrl-c to exit a program I get a coredump.  I
have not been able to isolate the exact case when this happens.

The following is some debugging information.  I am happy to provide
more information as well as test patches.

[10005 eitan@gravity (99%) ~/badzsh ]%uname -rms
FreeBSD 10.0-CURRENT amd64
[10027 eitan@gravity (99%) ~/badzsh !127!]%gdb761 zsh zsh.core
GNU gdb (GDB) 7.6.1 [GDB v7.6.1 for FreeBSD]
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-portbld-freebsd10.0".
For bug reporting instructions, please see:
Core was generated by `zsh'.
Program terminated with signal 11, Segmentation fault.
#0  0x000000000046cbc2 in freeparamnode (hn=0x990140) at params.c:4756
4756            pm->gsu.s->unsetfn(pm, 1);
gdb$ bt
#0  0x000000000046cbc2 in freeparamnode (hn=0x990140) at params.c:4756
#1  0x0000000000435c43 in resizehashtable (ht=0x98d938, newsize=0x110)
at hashtable.c:491
#2  0x0000000000435bcb in emptyhashtable (ht=0x98d938) at hashtable.c:516
#3  0x0000000000434f05 in deletehashtable (ht=0x98d938) at hashtable.c:126
#4  0x000000000046d6ea in deleteparamtable (t=0x98d938) at params.c:516
#5  0x000000000046c931 in hashsetfn (pm=0x988ec0, x=0x0) at params.c:3197
#6  0x000000000046c483 in stdunsetfn (pm=0x988ec0, exp=0x0) at params.c:3076
#7  0x000000000047749c in unsetparam_pm (pm=0x988ec0, altflag=0x0,
exp=0x0) at params.c:2983
#8  0x0000000000479901 in scanendscope (hn=0x988ec0, flags=0x0) at params.c:4738
#9  0x0000000000435a9f in scanmatchtable (ht=0x8ca0d8, pprog=0x0,
sorted=0x0, flags1=0x0, flags2=0x0, scanfunc=0x479630 <scanendscope>,
scanflags=0x0) at hashtable.c:428
#10 0x0000000000435b9f in scanhashtable (ht=0x8ca0d8, sorted=0x0,
flags1=0x0, flags2=0x0, scanfunc=0x479630 <scanendscope>,
scanflags=0x0) at hashtable.c:444
#11 0x0000000000479624 in endparamscope () at params.c:4673
#12 0x000000000053ded6 in zleaftertrap (dummy=0x87ea50 <zshhooks+80>,
dat=0x0) at zle_main.c:1856
#13 0x0000000000462ed5 in runhookdef (h=0x87ea50 <zshhooks+80>, d=0x0)
at module.c:990
#14 0x00000000004959ff in dotrapargs (sig=0x2, sigtr=0x8b9778
<sigtrapped+8>, sigfn=0x984e88) at signals.c:1238
#15 0x0000000000495d23 in dotrap (sig=0x2) at signals.c:1312
#16 0x00000000004943d8 in handletrap (sig=0x2) at signals.c:1060
#17 0x000000000049389b in zhandler (sig=0x2) at signals.c:603
#18 0x000000000045ed67 in zfree (p=0x98fbc8, sz=0x0) at mem.c:1481
#19 0x000000000046cc10 in freeparamnode (hn=0x98fbc8) at params.c:4761
#20 0x0000000000435c43 in resizehashtable (ht=0x98d938, newsize=0x110)
at hashtable.c:491
#21 0x0000000000435bcb in emptyhashtable (ht=0x98d938) at hashtable.c:516
#22 0x0000000000434f05 in deletehashtable (ht=0x98d938) at hashtable.c:126
#23 0x000000000046d6ea in deleteparamtable (t=0x98d938) at params.c:516
#24 0x000000000046c931 in hashsetfn (pm=0x988ec0, x=0x0) at params.c:3197
#25 0x000000000046c483 in stdunsetfn (pm=0x988ec0, exp=0x0) at params.c:3076
#26 0x000000000047749c in unsetparam_pm (pm=0x988ec0, altflag=0x0,
exp=0x0) at params.c:2983
#27 0x0000000000479901 in scanendscope (hn=0x988ec0, flags=0x0) at params.c:4738
#28 0x0000000000435a9f in scanmatchtable (ht=0x8ca0d8, pprog=0x0,
sorted=0x0, flags1=0x0, flags2=0x0, scanfunc=0x479630 <scanendscope>,
scanflags=0x0) at hashtable.c:428
#29 0x0000000000435b9f in scanhashtable (ht=0x8ca0d8, sorted=0x0,
flags1=0x0, flags2=0x0, scanfunc=0x479630 <scanendscope>,
scanflags=0x0) at hashtable.c:444
#30 0x0000000000479624 in endparamscope () at params.c:4673
#31 0x000000000042186d in runshfunc (prog=0xaa0df0, wrap=0x0,
name=0x800930040 "_main_complete") at exec.c:4854
#32 0x00000000004efddb in comp_wrapper (prog=0xaa0df0, w=0x0,
name=0x800930040 "_main_complete") at complete.c:1455
#33 0x0000000000421791 in runshfunc (prog=0xaa0df0, wrap=0x8863b0
<wrapper>, name=0x800930040 "_main_complete") at exec.c:4834
#34 0x0000000000421438 in doshfunc (shfunc=0x9773b8, doshargs=0x0,
noreturnval=0x0) at exec.c:4742
#35 0x00000000004fe9b0 in callcompfunc (s=0x8008e5838 "al",
fn=0x97b320 "_main_complete") at compcore.c:839
#36 0x00000000004f31ff in makecomplist (s=0x8008e5838 "al", incmd=0x0,
lst=0x0) at compcore.c:990
#37 0x00000000004f2994 in do_completion (dummy=0x88c0a8 <zlehooks+40>,
dat=0x7fffffffcc00) at compcore.c:349
#38 0x0000000000462f55 in runhookdef (h=0x88c0a8 <zlehooks+40>,
d=0x7fffffffcc00) at module.c:996
#39 0x000000000055b463 in docompletion (s=0xad80b0 "al", lst=0x0,
incmd=0x0) at zle_tricky.c:2226
#40 0x00000000005529f4 in docomplete (lst=0x0) at zle_tricky.c:866
#41 0x0000000000552dc6 in expandorcomplete (args=0x8945e8 <zlenoargs>)
at zle_tricky.c:315
#42 0x00000000005516f5 in completecall (args=0x8945e8 <zlenoargs>) at
zle_tricky.c:208
#43 0x000000000053c16d in execzlefunc (func=0x887098 <thingies+1880>,
args=0x8945e8 <zlenoargs>, set_bindk=0x0) at zle_main.c:1320
#44 0x000000000053bcf3 in zlecore () at zle_main.c:1053
#45 0x000000000053cd78 in zleread (lp=0x8b8d90 <prompt>, rp=0x8b8cd8
<rprompt>, flags=0x3, context=0x0) at zle_main.c:1228
#46 0x000000000053dac5 in zle_main_entry (cmd=0x1, ap=0x7fffffffd350)
at zle_main.c:1883
#47 0x0000000000447143 in zleentry (cmd=0x1) at init.c:1462
#48 0x0000000000448722 in inputline () at input.c:281
#49 0x000000000044838b in ingetc () at input.c:217
#50 0x000000000043975d in ihgetc () at hist.c:279
#51 0x00000000004521b7 in gettok () at lex.c:714
#52 0x0000000000451ef8 in zshlex () at lex.c:395
#53 0x0000000000479e7e in parse_event () at parse.c:451
#54 0x0000000000442beb in loop (toplevel=0x1, justonce=0x0) at init.c:132
#55 0x000000000044778d in zsh_main (argc=0x1, argv=0x7fffffffd6c0) at
init.c:1617
#56 0x0000000000400412 in main (argc=0x1, argv=0x7fffffffd6c0) at ./main.c:93

gdb$ frame 0
Stack level 0, frame at 0x7fffffffbc50:
 rip = 0x46cbc2 in freeparamnode (params.c:4756); saved rip 0x435c43
 called by frame at 0x7fffffffbc90
 source language c.
 Arglist at 0x7fffffffbc40, args: hn=0x990140
 Locals at 0x7fffffffbc40, Previous frame's sp is 0x7fffffffbc50
 Saved registers:
  rbp at 0x7fffffffbc40, rip at 0x7fffffffbc48
hn = 0x990140
pm = 0x990140
gdb$ info registers
rax            0x0      0x0
rbx            0x0      0x0
rcx            0x808    0x808
rdx            0x0      0x0
rsi            0x1      0x1
rdi            0x990140 0x990140
rbp            0x7fffffffbc40   0x7fffffffbc40
rsp            0x7fffffffbc30   0x7fffffffbc30
r8             0xffffffff8142e028       0xffffffff8142e028
r9             0x0      0x0
r10            0x0      0x0
r11            0x246    0x246
r12            0x7fffffffd6c0   0x7fffffffd6c0
r13            0x0      0x0
r14            0x1      0x1
r15            0x7fffffffd6d0   0x7fffffffd6d0
rip            0x46cbc2 0x46cbc2 <freeparamnode+50>
eflags         0x10202  0x10202
cs             0x43     0x43
ss             0x3b     0x3b
ds             0x0      0x0
es             0x0      0x0
fs             0x0      0x0
gs             0x0      0x0
Current language:  auto; currently minimal

-- 
Eitan Adler


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: coredump on C-c
  2013-09-26 17:52 coredump on C-c Eitan Adler
@ 2013-09-26 21:31 ` Bart Schaefer
  2013-09-27  1:10   ` Eitan Adler
                     ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Bart Schaefer @ 2013-09-26 21:31 UTC (permalink / raw)
  To: Eitan Adler; +Cc: Zsh hackers list

[-- Attachment #1: Type: text/plain, Size: 2375 bytes --]

On Thu, Sep 26, 2013 at 10:52 AM, Eitan Adler <lists@eitanadler.com> wrote:

> Sometimes, when pressing Ctrl-c to exit a program I get a coredump.  I
> have not been able to isolate the exact case when this happens.
>
> #12 0x000000000053ded6 in zleaftertrap (dummy=0x87ea50 <zshhooks+80>,
> dat=0x0) at zle_main.c:1856

...

> name=0x800930040 "_main_complete") at exec.c:4854
>


If this is an accurate backtrace, and you've accurately described the
circumstances in which you pressed ctrl-c, then it indicates that you've
invoked completion, which then started an external program, which you then
killed with ctrl-c.  This is causing the hook function set by zle to
attempt to delete the scope for the special completion variables from
inside the signal handler, which fails because (if I'm not mistaken) that
parameter scope has already begun to be deleted before the signal handler
was called.

A first question is, is the first sentence of the previous paragraph
correct?  And if so, what are you doing invoking a program that you might
have to kill off with ctrl-c from inside a completion widget?  I'd say that
particular widget is in need of a rewrite.

However, from the "it still shouldn't crash" perspective, there's some kind
of a race condition, the gist of which is that  somewhere in this chain ...

#17 0x000000000049389b in zhandler (sig=0x2) at signals.c:603
#18 0x000000000045ed67 in zfree (p=0x98fbc8, sz=0x0) at mem.c:1481
#19 0x000000000046cc10 in freeparamnode (hn=0x98fbc8) at params.c:4761
#20 0x0000000000435c43 in resizehashtable (ht=0x98d938, newsize=0x110)
at hashtable.c:491
#21 0x0000000000435bcb in emptyhashtable (ht=0x98d938) at hashtable.c:516
#22 0x0000000000434f05 in deletehashtable (ht=0x98d938) at hashtable.c:126
#23 0x000000000046d6ea in deleteparamtable (t=0x98d938) at params.c:516
#24 0x000000000046c931 in hashsetfn (pm=0x988ec0, x=0x0) at params.c:3197
#25 0x000000000046c483 in stdunsetfn (pm=0x988ec0, exp=0x0) at params.c:3076
#26 0x000000000047749c in unsetparam_pm (pm=0x988ec0, altflag=0x0,
exp=0x0) at params.c:2983

... we should be queuing signals.  zfree() does it internally, but that's
not enough to stop corruption in freeparamnode() if the signal arrives
before all the parts of the node are cleaned p, and we probably ought to be
queuing signals around the entire "free all the nodes" loop in
resizehashtable().

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: coredump on C-c
  2013-09-26 21:31 ` Bart Schaefer
@ 2013-09-27  1:10   ` Eitan Adler
  2013-09-27  3:49   ` Bart Schaefer
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Eitan Adler @ 2013-09-27  1:10 UTC (permalink / raw)
  To: Bart Schaefer; +Cc: Zsh hackers list

On Thu, Sep 26, 2013 at 5:31 PM, Bart Schaefer
<schaefer@brasslantern.com> wrote:
> On Thu, Sep 26, 2013 at 10:52 AM, Eitan Adler <lists@eitanadler.com> wrote:
>>
>> Sometimes, when pressing Ctrl-c to exit a program I get a coredump.  I
>> have not been able to isolate the exact case when this happens.
>>
>> #12 0x000000000053ded6 in zleaftertrap (dummy=0x87ea50 <zshhooks+80>,
>> dat=0x0) at zle_main.c:1856
>
> ...
>>
>> name=0x800930040 "_main_complete") at exec.c:4854
>
>
>
> If this is an accurate backtrace, and you've accurately described the
> circumstances in which you pressed ctrl-c, then it indicates that you've
> invoked completion,

This sounds possible and even likely.

> A first question is, is the first sentence of the previous paragraph
> correct?  And if so, what are you doing invoking a program that you might
> have to kill off with ctrl-c from inside a completion widget?

Now that I think about it, it may be pressing ctrl-c to try and stop a
<tab> from completing due the time it takes.  I have not been able to
isolate the coredumps yet but I'll be on the lookout for this

>   I'd say that
> particular widget is in need of a rewrite.

This is my zshrc: https://code.google.com/p/config/source/browse/zsh/zshrc
and these are the files referenced by my zshrc:
https://code.google.com/p/config/source/browse/#hg%2Fzsh

I will try to press ctrl-c during completion to see if that causes the crash.

> ... we should be queuing signals.  zfree() does it internally, but that's
> not enough to stop corruption in freeparamnode() if the signal arrives
> before all the parts of the node are cleaned p, and we probably ought to be
> queuing signals around the entire "free all the nodes" loop in
> resizehashtable().

I'm happy to test patches, but I don't know zsh internals well enough
to comment on this.


-- 
Eitan Adler


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: coredump on C-c
  2013-09-26 21:31 ` Bart Schaefer
  2013-09-27  1:10   ` Eitan Adler
@ 2013-09-27  3:49   ` Bart Schaefer
  2013-09-27  4:20     ` Bart Schaefer
  2013-09-27  5:00   ` Eitan Adler
  2013-10-16 21:40   ` Eitan Adler
  3 siblings, 1 reply; 10+ messages in thread
From: Bart Schaefer @ 2013-09-27  3:49 UTC (permalink / raw)
  To: Zsh hackers list

On Sep 26,  2:31pm, Bart Schaefer wrote:
}
} ... we should be queuing signals.  zfree() does it internally, but that's
} not enough to stop corruption in freeparamnode() if the signal arrives
} before all the parts of the node are cleaned p, and we probably ought to be
} queuing signals around the entire "free all the nodes" loop in
} resizehashtable().

I'm a bit surprised we haven't run into this before.  Nearly everything
in hashtable.c would benefit from queuing signals around it.  The litte
bin_hashinfo() builtin that's enabled by ZSH_HASH_DEBUG already does,
and it probably needs it least of anything.

How carried away do we want to get with this?

For example, there may be a performance hit for queuing signals around
all the hash table traversals in the add* and scan* functions.  If we
assume restartable syscalls that's probably OK for scan, and only a
little dangerous for add, which is likely why this hasn't come up in
the past.

The free* functions definitely need it, though ... which probably means
similar functions in other modules do as well.  Unless we can be sure
they're only called from somewhere that already does queuing, that is.

It's a tradeoff between doing the queueing in the smallest area possible,
and toggling it as seldom as possible.

Here's a stab at it, but I won't commit this until there's been some
more discussion.

diff --git a/Src/hashtable.c b/Src/hashtable.c
index ef18792..5c16efb 100644
--- a/Src/hashtable.c
+++ b/Src/hashtable.c
@@ -96,6 +96,8 @@ newhashtable(int size, UNUSED(char const *name), UNUSED(PrintTableStats printinf
 {
     HashTable ht;
 
+    queue_signals();
+
     ht = (HashTable) zshcalloc(sizeof *ht);
 #ifdef ZSH_HASH_DEBUG
     ht->next = NULL;
@@ -113,6 +115,8 @@ newhashtable(int size, UNUSED(char const *name), UNUSED(PrintTableStats printinf
     ht->ct = 0;
     ht->scan = NULL;
     ht->scantab = NULL;
+
+    unqueue_signals();
     return ht;
 }
 
@@ -123,6 +127,8 @@ newhashtable(int size, UNUSED(char const *name), UNUSED(PrintTableStats printinf
 mod_export void
 deletehashtable(HashTable ht)
 {
+    queue_signals();
+
     ht->emptytable(ht);
 #ifdef ZSH_HASH_DEBUG
     if(ht->next)
@@ -137,6 +143,8 @@ deletehashtable(HashTable ht)
 #endif /* ZSH_HASH_DEBUG */
     zfree(ht->nodes, ht->hsize * sizeof(HashNode));
     zfree(ht, sizeof(*ht));
+
+    unqueue_signals();
 }
 
 /* Add a node to a hash table.                          *
@@ -279,6 +287,8 @@ removehashnode(HashTable ht, const char *nam)
     if (!hp)
 	return NULL;
 
+    queue_signals();
+
     /* else check if the key in the first one matches */
     if (ht->cmpnodes(hp->nam, nam) == 0) {
 	ht->nodes[hashval] = hp->next;
@@ -294,6 +304,7 @@ removehashnode(HashTable ht, const char *nam)
 	    } else if(ht->scan->u.u == hp)
 		ht->scan->u.u = hp->next;
 	}
+	unqueue_signals();
 	return hp;
     }
 
@@ -307,6 +318,8 @@ removehashnode(HashTable ht, const char *nam)
 	}
     }
 
+    unqueue_signals();
+
     /* else it is not in the list, so return NULL */
     return NULL;
 }
@@ -455,6 +468,8 @@ expandhashtable(HashTable ht)
     struct hashnode **onodes, **ha, *hn, *hp;
     int i, osize;
 
+    queue_signals();
+
     osize = ht->hsize;
     onodes = ht->nodes;
 
@@ -472,6 +487,8 @@ expandhashtable(HashTable ht)
 	}
     }
     zfree(onodes, osize * sizeof(HashNode));
+
+    unqueue_signals();
 }
 
 /* Empty the hash table and resize it if necessary */
@@ -483,6 +500,8 @@ resizehashtable(HashTable ht, int newsize)
     struct hashnode **ha, *hn, *hp;
     int i;
 
+    queue_signals();
+
     /* free all the hash nodes */
     ha = ht->nodes;
     for (i = 0; i < ht->hsize; i++, ha++) {
@@ -505,6 +524,8 @@ resizehashtable(HashTable ht, int newsize)
     }
 
     ht->ct = 0;
+
+    unqueue_signals();
 }
 
 /* Generic method to empty a hash table */
@@ -720,11 +741,15 @@ freecmdnamnode(HashNode hn)
 {
     Cmdnam cn = (Cmdnam) hn;
  
+    queue_signals();
+
     zsfree(cn->node.nam);
     if (cn->node.flags & HASHED)
 	zsfree(cn->u.cmd);
  
     zfree(cn, sizeof(struct cmdnam));
+
+    unqueue_signals();
 }
 
 /* Print an element of the cmdnamtab hash table (external command) */
@@ -884,6 +909,8 @@ freeshfuncnode(HashNode hn)
 {
     Shfunc shf = (Shfunc) hn;
 
+    queue_signals();
+
     zsfree(shf->node.nam);
     if (shf->funcdef)
 	freeeprog(shf->funcdef);
@@ -898,6 +925,8 @@ freeshfuncnode(HashNode hn)
 	zfree(shf->sticky, sizeof(*shf->sticky));
     }
     zfree(shf, sizeof(struct shfunc));
+
+    unqueue_signals();
 }
 
 /* Print a shell function */
@@ -1129,10 +1158,14 @@ static void
 freealiasnode(HashNode hn)
 {
     Alias al = (Alias) hn;
- 
+
+    queue_signals();
+
     zsfree(al->node.nam);
     zsfree(al->text);
     zfree(al, sizeof(struct alias));
+
+    unqueue_signals();
 }
 
 /* Print an alias */
@@ -1327,6 +1360,8 @@ freehistdata(Histent he, int unlink)
     if (!(he->node.flags & (HIST_DUP | HIST_TMPSTORE)))
 	removehashnode(histtab, he->node.nam);
 
+    queue_signals();
+
     zsfree(he->node.nam);
     if (he->nwords)
 	zfree(he->words, he->nwords*2*sizeof(short));
@@ -1341,4 +1376,6 @@ freehistdata(Histent he, int unlink)
 	    he->down->up = he->up;
 	}
     }
+
+    unqueue_signals();
 }


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: coredump on C-c
  2013-09-27  3:49   ` Bart Schaefer
@ 2013-09-27  4:20     ` Bart Schaefer
  2013-09-27 15:50       ` Peter Stephenson
  0 siblings, 1 reply; 10+ messages in thread
From: Bart Schaefer @ 2013-09-27  4:20 UTC (permalink / raw)
  To: Zsh hackers list

On Sep 26,  8:49pm, Bart Schaefer wrote:
}
} For example, there may be a performance hit for queuing signals around
} all the hash table traversals in the add* and scan* functions.  If we
} assume restartable syscalls that's probably OK for scan

By "that's" I mean "NOT queuing signals there is" in case that wasn't
clear.

It occurs to me, though, that queuing signals all over hashtable.c may
be an over-reaction.  Maybe the problem is just with re-entering the
endparamscope() routine, and that's where queue_signals() is needed.

Which is a much smaller change, and probably harmless even if it does
not help:

diff --git a/Src/params.c b/Src/params.c
index 8649178..d6711e4 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -4667,10 +4667,12 @@ startparamscope(void)
 mod_export void
 endparamscope(void)
 {
+    queue_signals();
     locallevel--;
     /* This pops anything from a higher locallevel */
     saveandpophiststack(0, HFILE_USE_OPTIONS);
     scanhashtable(paramtab, 0, 0, 0, scanendscope, 0);
+    unqueue_signals();
 }
 
 /**/


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: coredump on C-c
  2013-09-26 21:31 ` Bart Schaefer
  2013-09-27  1:10   ` Eitan Adler
  2013-09-27  3:49   ` Bart Schaefer
@ 2013-09-27  5:00   ` Eitan Adler
  2013-10-16 21:40   ` Eitan Adler
  3 siblings, 0 replies; 10+ messages in thread
From: Eitan Adler @ 2013-09-27  5:00 UTC (permalink / raw)
  To: Bart Schaefer; +Cc: Zsh hackers list

On Thu, Sep 26, 2013 at 5:31 PM, Bart Schaefer
<schaefer@brasslantern.com> wrote:

> If this is an accurate backtrace, and you've accurately described the
> circumstances in which you pressed ctrl-c, t....

OK, I've managed to figure out how to reproduce this crash:

git <tab> C-c.

so your analysis about this being in the completion function is 100% correct.

I previously thought it was on exit from a program because I have a
habit of typing my command/hitting tab before the programs fully exit.



-- 
Eitan Adler


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: coredump on C-c
  2013-09-27  4:20     ` Bart Schaefer
@ 2013-09-27 15:50       ` Peter Stephenson
  2013-09-27 19:50         ` Bart Schaefer
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Stephenson @ 2013-09-27 15:50 UTC (permalink / raw)
  To: Zsh hackers list

On Thu, 26 Sep 2013 21:20:36 -0700
Bart Schaefer <schaefer@brasslantern.com> wrote:
> It occurs to me, though, that queuing signals all over hashtable.c may
> be an over-reaction.  Maybe the problem is just with re-entering the
> endparamscope() routine, and that's where queue_signals() is needed.

Really, we should be doing as little from signals as possible.  The fact
that the trap (immediately down (numerically) the backtrace from the bit
you quoted) is running an arbitrary function while anything else can be
happening outside the handler is a bit worrying.  I have a feeling we
decided at one time that it would be a good strategy only to have trap
handlers run at certain safe points (i.e. the reverse strategy from
blocking them at dangerous points)?

endparamscope() certainly seems an unobjectionable place to queue
signals, though.  If it turns out to be doing good, maybe the next thing
is to look through the next higher function, runshfunc(), for other
similar points.

But presumably there's nothing to prevent the shell from doing any of
the hashtable things you looked at before, except caused by actions in
the function rather than change of scope, at the point where the SIGINT
arrives and the trap gets run.

pws


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: coredump on C-c
  2013-09-27 15:50       ` Peter Stephenson
@ 2013-09-27 19:50         ` Bart Schaefer
  0 siblings, 0 replies; 10+ messages in thread
From: Bart Schaefer @ 2013-09-27 19:50 UTC (permalink / raw)
  To: Zsh hackers list

On Fri, Sep 27, 2013 at 8:50 AM, Peter Stephenson
<p.w.stephenson@ntlworld.com> wrote:
>
> On Thu, 26 Sep 2013 21:20:36 -0700
> Bart Schaefer <schaefer@brasslantern.com> wrote:
> > It occurs to me, though, that queuing signals all over hashtable.c may
> > be an over-reaction.  Maybe the problem is just with re-entering the
> > endparamscope() routine, and that's where queue_signals() is needed.
>
> Really, we should be doing as little from signals as possible.  The fact
> that the trap (immediately down (numerically) the backtrace from the bit
> you quoted) is running an arbitrary function while anything else can be
> happening outside the handler is a bit worrying.

This particular instance is a special case, where the internal SIGINT
handler is calling the "after_hook" function installed by the ZLE
modules, and (if I'm interpreting things correctly) the result is that
the special scope for ZLE parameters is ended twice.  There may be a
different way to end the ZLE scope when the editor is INTerrupted,
which would also avoid this problem.

> I have a feeling we
> decided at one time that it would be a good strategy only to have trap
> handlers run at certain safe points (i.e. the reverse strategy from
> blocking them at dangerous points)?

Yes, we're using signal masks that way, it's the strategy we adopted
to fix some SIGWINCH bugs, but this case involves a place where we
need to be responsive to SIGINT (lest a runaway widget lock up the
shell).

> But presumably there's nothing to prevent the shell from doing any of
> the hashtable things you looked at before, except caused by actions in
> the function rather than change of scope, at the point where the SIGINT
> arrives and the trap gets run.

I think user-defined traps that might run arbitrary shell code are
deferred to "safe" places in most cases.  In this special case,
though, it's an internally-defined trap gone rogue, and given that
similar rogues could come in from other loadable modules, some
defensive programming in the core shell seems a good idea.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: coredump on C-c
  2013-09-26 21:31 ` Bart Schaefer
                     ` (2 preceding siblings ...)
  2013-09-27  5:00   ` Eitan Adler
@ 2013-10-16 21:40   ` Eitan Adler
  2013-10-17  0:04     ` Bart Schaefer
  3 siblings, 1 reply; 10+ messages in thread
From: Eitan Adler @ 2013-10-16 21:40 UTC (permalink / raw)
  To: Bart Schaefer; +Cc: Zsh hackers list

On Thu, Sep 26, 2013 at 5:31 PM, Bart Schaefer
<schaefer@brasslantern.com> wrote:
> On Thu, Sep 26, 2013 at 10:52 AM, Eitan Adler <lists@eitanadler.com> wrote:

> ... we should be queuing signals.  zfree() does it internally, but that's
> not enough to stop corruption in freeparamnode() if the signal arrives
> before all the parts of the node are cleaned p, and we probably ought to be
> queuing signals around the entire "free all the nodes" loop in
> resizehashtable().

I'm hoping to prod about this again, this time with a slightly
different backtrace.

gdb$ bt
#0  0x0000000000493e05 in wait_for_processes () at signals.c:474
#1  0x0000000000493859 in zhandler (sig=0x14) at signals.c:592
#2  0x000000000045ed67 in zfree (p=0x989208, sz=0x0) at mem.c:1481
#3  0x000000000046159a in free (p=0x989208) at mem.c:1487
#4  0x0000000000421bf0 in execrestore () at exec.c:5108
#5  0x0000000000495a28 in dotrapargs (sig=0x2, sigtr=0x8b9778
<sigtrapped+8>, sigfn=0x985ef8) at signals.c:1246
#6  0x0000000000495d23 in dotrap (sig=0x2) at signals.c:1312
#7  0x00000000004943d8 in handletrap (sig=0x2) at signals.c:1060
#8  0x000000000049389b in zhandler (sig=0x2) at signals.c:603
#9  0x000000000044c29c in waitforpid (pid=0xef7d, wait_cmd=0x0) at jobs.c:1291
#10 0x000000000041e9de in getoutput (cmd=0x8008e53fc "__vcs_dir",
qt=0x1) at exec.c:3741
#11 0x0000000000498372 in stringsubst (list=0x7fffffffcfb8,
node=0x7fffffffcfa0, pf_flags=0x4, asssub=0x0) at subst.c:293
#12 0x0000000000497483 in prefork (list=0x7fffffffcfb8, flags=0x6) at subst.c:77
#13 0x0000000000426756 in addvars (state=0x7fffffffd170, pc=0x930468,
addflags=0x0) at exec.c:2199
#14 0x000000000041cd63 in execsimple (state=0x7fffffffd170) at exec.c:1097
#15 0x000000000041c334 in execlist (state=0x7fffffffd170,
dont_change_job=0x1, exiting=0x0) at exec.c:1216
#16 0x000000000041bf07 in execode (p=0x8c6930, dont_change_job=0x1,
exiting=0x0, context=0x62546e "shfunc") at exec.c:1057
#17 0x0000000000421845 in runshfunc (prog=0x8c6930, wrap=0x0,
name=0x8008e50b0 "setCurrentPS1") at exec.c:4849
#18 0x0000000000421438 in doshfunc (shfunc=0x8c6968, doshargs=0x0,
noreturnval=0x1) at exec.c:4742
#19 0x00000000004aaffa in callhookfunc (name=0x62eb2a "precmd",
lnklst=0x0, arrayp=0x1, retval=0x0) at utils.c:1265
#20 0x00000000004ab34e in preprompt () at utils.c:1326
#21 0x0000000000442b97 in loop (toplevel=0x1, justonce=0x0) at init.c:121
#22 0x000000000044778d in zsh_main (argc=0x1, argv=0x7fffffffd6d8) at
init.c:1617
#23 0x0000000000400412 in main (argc=0x1, argv=0x7fffffffd6d8) at ./main.c:93

gdb$ info registers
rax            0x700    0x700
rbx            0x0      0x0
rcx            0x98a4a8 0x98a4a8
rdx            0xffffffffffffffff       0xffffffffffffffff
rsi            0x7fffffffc718   0x7fffffffc718
rdi            0xffffffff       0xffffffff
rbp            0x7fffffffc720   0x7fffffffc720
rsp            0x7fffffffc630   0x7fffffffc630
r8             0xffffffff8154e928       0xffffffff8154e928
r9             0xfffffe0231bc1868       0xfffffe0231bc1868
r10            0x7fffffffc650   0x7fffffffc650
r11            0x203    0x203
r12            0x7fffffffd6d8   0x7fffffffd6d8
r13            0x0      0x0
r14            0x1      0x1
r15            0x7fffffffd6e8   0x7fffffffd6e8
rip            0x493e05 0x493e05 <wait_for_processes+309>
eflags         0x10202  [ IF RF ]
cs             0x43     0x43
ss             0x3b     0x3b
ds             *value not available*
es             *value not available*
fs             *value not available*
gs             *value not available*


I don't know zsh internals quite well, but I'm willing to provide
debugging information as well as test patches.

-- 
Eitan Adler


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: coredump on C-c
  2013-10-16 21:40   ` Eitan Adler
@ 2013-10-17  0:04     ` Bart Schaefer
  0 siblings, 0 replies; 10+ messages in thread
From: Bart Schaefer @ 2013-10-17  0:04 UTC (permalink / raw)
  To: Zsh hackers list

On Wed, Oct 16, 2013 at 2:40 PM, Eitan Adler <lists@eitanadler.com> wrote:
>
> I'm hoping to prod about this again, this time with a slightly
> different backtrace.

"Slightly" is a bit of an understatement; that's an almost entirely
different backtrace.

I'm also scratching my head a bit about why you're able to land
interrupts in inconvenient locations so easily when nobody else has
reported this kind of issue in several years.

What follows is somewhat stream-of-consciousness, sorry.

In this new case you appear to have a trap set on the INT signal,
which was invoked because you interrupted your setCurrentPS1
precmd-hook, and then had a CHLD signal come in while the INT trap was
unwinding itself.

All of which appears to have been possible because __vcs_dir was
taking a long time?

Signals are being blocked or queued in most of the appropriate places
-- the zhandler at the top of the backtrace is being run when zfree is
already finished and releases the signal queue, and the zhandler in
the middle of the trace is being invoked while signals are unblocked
to wait for _vcs_dir to exit.

At first I thought execrestore() needs to queue signals just as
endparamscope() does.  Why you (Eitan) seem to be uniquely able to
deliver signals into the middle of functions that are little more than
a few assignment statements must have something to do with your
hardware environment.  I guess both of those functions are a bit more
than that because they free memory in addition to moving pointers
around.

On closer examination I'm not sure that will work, because all of this
is happening inside waitforpid() which prefaces it with an explicit
dont_queue_signals().  So it's not impossible that we're also open to
improper signaling inside zfree() here, but the stack trace makes that
look unlikely.  Still, it seems to be possible to reorder operations
in execrestore() to avoid the immediate problem of exstack pointing to
the wrong thing.  That's at least worthwhile, though the possibility
of an execsave/execrestore pair occurring in the middle of an
execrestore in progress makes my head hurt, so I think perhaps both
changes are in order.

execsave(), incidentally, already does things in a "safe" order, so it
probably doesn't need the signal queuing.

diff --git a/Src/exec.c b/Src/exec.c
index de1b484..8efbbd4 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -5078,29 +5078,33 @@ execsave(void)
 void
 execrestore(void)
 {
-    struct execstack *en;
+    struct execstack *en = exstack;

     DPUTS(!exstack, "BUG: execrestore() without execsave()");
-    list_pipe_pid = exstack->list_pipe_pid;
-    nowait = exstack->nowait;
-    pline_level = exstack->pline_level;
-    list_pipe_child = exstack->list_pipe_child;
-    list_pipe_job = exstack->list_pipe_job;
-    strcpy(list_pipe_text, exstack->list_pipe_text);
-    lastval = exstack->lastval;
-    noeval = exstack->noeval;
-    badcshglob = exstack->badcshglob;
-    cmdoutpid = exstack->cmdoutpid;
-    cmdoutval = exstack->cmdoutval;
-    use_cmdoutval = exstack->use_cmdoutval;
-    trap_return = exstack->trap_return;
-    trap_state = exstack->trap_state;
-    trapisfunc = exstack->trapisfunc;
-    traplocallevel = exstack->traplocallevel;
-    noerrs = exstack->noerrs;
-    setunderscore(exstack->underscore);
-    zsfree(exstack->underscore);
-    en = exstack->next;
-    free(exstack);
-    exstack = en;
+
+    queue_signals();
+    exstack = exstack->next;
+
+    list_pipe_pid = en->list_pipe_pid;
+    nowait = en->nowait;
+    pline_level = en->pline_level;
+    list_pipe_child = en->list_pipe_child;
+    list_pipe_job = en->list_pipe_job;
+    strcpy(list_pipe_text, en->list_pipe_text);
+    lastval = en->lastval;
+    noeval = en->noeval;
+    badcshglob = en->badcshglob;
+    cmdoutpid = en->cmdoutpid;
+    cmdoutval = en->cmdoutval;
+    use_cmdoutval = en->use_cmdoutval;
+    trap_return = en->trap_return;
+    trap_state = en->trap_state;
+    trapisfunc = en->trapisfunc;
+    traplocallevel = en->traplocallevel;
+    noerrs = en->noerrs;
+    setunderscore(en->underscore);
+    zsfree(en->underscore);
+    free(en);
+
+    unqueue_signals();
 }


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-10-17  0:04 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-09-26 17:52 coredump on C-c Eitan Adler
2013-09-26 21:31 ` Bart Schaefer
2013-09-27  1:10   ` Eitan Adler
2013-09-27  3:49   ` Bart Schaefer
2013-09-27  4:20     ` Bart Schaefer
2013-09-27 15:50       ` Peter Stephenson
2013-09-27 19:50         ` Bart Schaefer
2013-09-27  5:00   ` Eitan Adler
2013-10-16 21:40   ` Eitan Adler
2013-10-17  0:04     ` Bart Schaefer

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).