From mboxrd@z Thu Jan 1 00:00:00 1970 From: erik quanstrom Date: Fri, 21 Aug 2009 14:37:53 -0400 To: 9fans@9fans.net Message-ID: In-Reply-To: <16322979e064dd6e89b14d24c02a5ad2@coraid.com> References: <16322979e064dd6e89b14d24c02a5ad2@coraid.com> MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit Subject: Re: [9fans] ps hang Topicbox-Message-UUID: 50652c18-ead5-11e9-9d60-3106f5b1d025 with patch this time. whattya know. the process with the held lock broke. (but may not have made it to the broken state.) the process with the locked debug lock is 16302 > minooka# acid -k 55053 /n/dump/2009/0819/386/9pccpu > /n/dump/2009/0819/386/9pccpu:386 plan 9 boot image > /sys/lib/acid/port > /sys/lib/acid/386 > acid: stk() > sched()+0x12d /sys/src/9/port/proc.c:155 > qlock(q=0xf03d9244)+0x141 /sys/src/9/port/qlock.c:48 > procopen(omode=0x0,c=0xf6e52ab8)+0x193 /sys/src/9/port/devproc.c:369 > namec(aname=0xf709f318,amode=0x3,omode=0x0,perm=0x0)+0x6d9 /sys/src/9/port/chan.c:1491 > sysopen(arg=0xf041c1a0)+0xa9 /sys/src/9/port/sysfile.c:279 > syscall(ureg=0xf6810e6c)+0x1b5 /sys/src/9/pc/trap.c:712 > _syscallintr()+0x18 /sys/src/9/pc/plan9l.s:45 > 0xf6810e6c ?file?:0 Wed Aug 19 18:30:29: 16302 acme: checked 163 page table entries ========================================================= 0xf03d90f8 16302: acme sah pc 0xf01aab8a kproc (Wakeme) ut 203 st 347 qpc 0xf019ef57 gotolabel(label=0xf0016028)+0x0 /sys/src/9/pc/l.s:946 sleep(r=0xf6992914,arg=0xf69926e8,f=0xf0117a08)+0x263 /sys/src/9/port/proc.c:796 mountio(m=0xf6a19ec8,r=0xf69926e8)+0x285 /sys/src/9/port/devmnt.c:808 mountrpc(r=0xf69926e8,m=0xf6a19ec8)+0x27 /sys/src/9/port/devmnt.c:745 mntrdwr(c=0xf6aad8d8,buf=0xf6663418,type=0x76,off=0x0,n=0x43)+0xfb /sys/src/9/port/devmnt.c:713 mntwrite(c=0xf6aad8d8,buf=0xf6663418,n=0x43,off=0x0)+0x38 /sys/src/9/port/devmnt.c:681 pprint(fmt=0xf023857e)+0x157 /sys/src/9/port/devcons.c:335 notify(ureg=0xf666371c)+0x162 /sys/src/9/pc/trap.c:797 trap(ureg=0xf666371c)+0x24d /sys/src/9/pc/trap.c:449 forkret()+0x0 /sys/src/9/pc/l.s:1010 0xf666371c ?file?:0 i believe the problem is that pprint() is not guarenteed not to block for a really long time. the debug lock should be dropped before pprint'ing. /n/dump/2009/0821/sys/src/9/pc/trap.c:765,771 - trap.c:765,772 int notify(Ureg* ureg) { - int l; + char buf[ERRMAX]; + int l, flag; ulong s, sp; Note *n; /n/dump/2009/0821/sys/src/9/pc/trap.c:792,801 - trap.c:793,804 } if(n->flag!=NUser && (up->notified || up->notify==0)){ - if(n->flag == NDebug) - pprint("suicide: %s\n", n->msg); + flag = n->flag; + strcpy(buf, n->msg); qunlock(&up->debug); - pexit(n->msg, n->flag!=NDebug); + if(flag == NDebug) + pprint("suicide: %s\n", buf); + pexit(buf, flag!=NDebug); } if(up->notified){ /n/dump/2009/0821/sys/src/9/pc/trap.c:816,823 - trap.c:819,826 if(!okaddr((ulong)up->notify, 1, 0) || !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){ - pprint("suicide: bad address in notify\n"); qunlock(&up->debug); + pprint("suicide: bad address in notify\n"); pexit("Suicide", 0); } /n/dump/2009/0821/sys/src/9/pc/trap.c:849,855 - trap.c:852,858 noted(Ureg* ureg, ulong arg0) { Ureg *nureg; - ulong oureg, sp; + ulong unk, oureg, sp; qlock(&up->debug); if(arg0!=NRSTR && !up->notified) { /n/dump/2009/0821/sys/src/9/pc/trap.c:866,873 - trap.c:869,876 /* sanity clause */ oureg = (ulong)nureg; if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){ - pprint("bad ureg in noted or call to noted when not notified\n"); qunlock(&up->debug); + pprint("bad ureg in noted or call to noted when not notified\n"); pexit("Suicide", 0); } /n/dump/2009/0821/sys/src/9/pc/trap.c:881,888 - trap.c:884,891 if((nureg->cs & 0xFFFF) != UESEL || (nureg->ss & 0xFFFF) != UDSEL || (nureg->ds & 0xFFFF) != UDSEL || (nureg->es & 0xFFFF) != UDSEL || (nureg->fs & 0xFFFF) != UDSEL || (nureg->gs & 0xFFFF) != UDSEL){ - pprint("bad segment selector in noted\n"); qunlock(&up->debug); + pprint("bad segment selector in noted\n"); pexit("Suicide", 0); } /n/dump/2009/0821/sys/src/9/pc/trap.c:891,896 - trap.c:894,900 memmove(ureg, nureg, sizeof(Ureg)); + unk = NCONT; switch(arg0){ case NCONT: case NRSTR: /n/dump/2009/0821/sys/src/9/pc/trap.c:921,927 - trap.c:925,931 break; default: - pprint("unknown noted arg 0x%lux\n", arg0); + unk = arg0; up->lastnote.flag = NDebug; /* fall through */ /n/dump/2009/0821/sys/src/9/pc/trap.c:928,933 - trap.c:932,939 case NDFLT: if(up->lastnote.flag == NDebug){ qunlock(&up->debug); + if(unk != NCONT) + pprint("unknown noted arg 0x%lux\n", arg0); pprint("suicide: %s\n", up->lastnote.msg); } else qunlock(&up->debug); - erik