9fans - fans of the OS Plan 9 from Bell Labs
 help / color / mirror / Atom feed
* [9fans] ps hang
@ 2009-08-21 16:20 erik quanstrom
  2009-08-21 18:37 ` erik quanstrom
  0 siblings, 1 reply; 2+ messages in thread
From: erik quanstrom @ 2009-08-21 16:20 UTC (permalink / raw)
  To: 9fans

i know this has been reported before.  and i've
guessed at exactly this problem, but it's happened
again here.  and this time i have a bit of evidence.

i haven't quite tracked everything down, or
what the original problem is (that is, who
owns the lock and more importantly, why)
but the hang is because of the qlock of
p->debug in procopen.

minooka# acid -k 55053 /n/dump/2009/0819/386/9pccpu
/n/dump/2009/0819/386/9pccpu:386 plan 9 boot image
/sys/lib/acid/port
/sys/lib/acid/386
acid: stk()
sched()+0x12d /sys/src/9/port/proc.c:155
qlock(q=0xf03d9244)+0x141 /sys/src/9/port/qlock.c:48
procopen(omode=0x0,c=0xf6e52ab8)+0x193 /sys/src/9/port/devproc.c:369
namec(aname=0xf709f318,amode=0x3,omode=0x0,perm=0x0)+0x6d9 /sys/src/9/port/chan.c:1491
sysopen(arg=0xf041c1a0)+0xa9 /sys/src/9/port/sysfile.c:279
syscall(ureg=0xf6810e6c)+0x1b5 /sys/src/9/pc/trap.c:712
_syscallintr()+0x18 /sys/src/9/pc/plan9l.s:45
0xf6810e6c ?file?:0

- erik



^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [9fans] ps hang
  2009-08-21 16:20 [9fans] ps hang erik quanstrom
@ 2009-08-21 18:37 ` erik quanstrom
  0 siblings, 0 replies; 2+ messages in thread
From: erik quanstrom @ 2009-08-21 18:37 UTC (permalink / raw)
  To: 9fans

with patch this time.

whattya know. the process with the held lock
broke.  (but may not have made it to the
broken state.)  the process with the locked
debug lock is 16302

> minooka# acid -k 55053 /n/dump/2009/0819/386/9pccpu
> /n/dump/2009/0819/386/9pccpu:386 plan 9 boot image
> /sys/lib/acid/port
> /sys/lib/acid/386
> acid: stk()
> sched()+0x12d /sys/src/9/port/proc.c:155
> qlock(q=0xf03d9244)+0x141 /sys/src/9/port/qlock.c:48
> procopen(omode=0x0,c=0xf6e52ab8)+0x193 /sys/src/9/port/devproc.c:369
> namec(aname=0xf709f318,amode=0x3,omode=0x0,perm=0x0)+0x6d9 /sys/src/9/port/chan.c:1491
> sysopen(arg=0xf041c1a0)+0xa9 /sys/src/9/port/sysfile.c:279
> syscall(ureg=0xf6810e6c)+0x1b5 /sys/src/9/pc/trap.c:712
> _syscallintr()+0x18 /sys/src/9/pc/plan9l.s:45
> 0xf6810e6c ?file?:0

Wed Aug 19 18:30:29: 16302 acme: checked 163 page table entries

=========================================================
0xf03d90f8 16302: acme sah pc 0xf01aab8a kproc (Wakeme) ut 203 st 347 qpc 0xf019ef57
gotolabel(label=0xf0016028)+0x0 /sys/src/9/pc/l.s:946
sleep(r=0xf6992914,arg=0xf69926e8,f=0xf0117a08)+0x263 /sys/src/9/port/proc.c:796
mountio(m=0xf6a19ec8,r=0xf69926e8)+0x285 /sys/src/9/port/devmnt.c:808
mountrpc(r=0xf69926e8,m=0xf6a19ec8)+0x27 /sys/src/9/port/devmnt.c:745
mntrdwr(c=0xf6aad8d8,buf=0xf6663418,type=0x76,off=0x0,n=0x43)+0xfb /sys/src/9/port/devmnt.c:713
mntwrite(c=0xf6aad8d8,buf=0xf6663418,n=0x43,off=0x0)+0x38 /sys/src/9/port/devmnt.c:681
pprint(fmt=0xf023857e)+0x157 /sys/src/9/port/devcons.c:335
notify(ureg=0xf666371c)+0x162 /sys/src/9/pc/trap.c:797
trap(ureg=0xf666371c)+0x24d /sys/src/9/pc/trap.c:449
forkret()+0x0 /sys/src/9/pc/l.s:1010
0xf666371c ?file?:0

i believe the problem is that pprint() is not guarenteed not to
block for a really long time.  the debug lock should be dropped
before pprint'ing.

/n/dump/2009/0821/sys/src/9/pc/trap.c:765,771 - trap.c:765,772
  int
  notify(Ureg* ureg)
  {
- 	int l;
+ 	char buf[ERRMAX];
+ 	int l, flag;
  	ulong s, sp;
  	Note *n;

/n/dump/2009/0821/sys/src/9/pc/trap.c:792,801 - trap.c:793,804
  	}

  	if(n->flag!=NUser && (up->notified || up->notify==0)){
- 		if(n->flag == NDebug)
- 			pprint("suicide: %s\n", n->msg);
+ 		flag = n->flag;
+ 		strcpy(buf, n->msg);
  		qunlock(&up->debug);
- 		pexit(n->msg, n->flag!=NDebug);
+ 		if(flag == NDebug)
+ 			pprint("suicide: %s\n", buf);
+ 		pexit(buf, flag!=NDebug);
  	}

  	if(up->notified){
/n/dump/2009/0821/sys/src/9/pc/trap.c:816,823 - trap.c:819,826

  	if(!okaddr((ulong)up->notify, 1, 0)
  	|| !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
- 		pprint("suicide: bad address in notify\n");
  		qunlock(&up->debug);
+ 		pprint("suicide: bad address in notify\n");
  		pexit("Suicide", 0);
  	}

/n/dump/2009/0821/sys/src/9/pc/trap.c:849,855 - trap.c:852,858
  noted(Ureg* ureg, ulong arg0)
  {
  	Ureg *nureg;
- 	ulong oureg, sp;
+ 	ulong unk, oureg, sp;

  	qlock(&up->debug);
  	if(arg0!=NRSTR && !up->notified) {
/n/dump/2009/0821/sys/src/9/pc/trap.c:866,873 - trap.c:869,876
  	/* sanity clause */
  	oureg = (ulong)nureg;
  	if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
- 		pprint("bad ureg in noted or call to noted when not notified\n");
  		qunlock(&up->debug);
+ 		pprint("bad ureg in noted or call to noted when not notified\n");
  		pexit("Suicide", 0);
  	}

/n/dump/2009/0821/sys/src/9/pc/trap.c:881,888 - trap.c:884,891
  	if((nureg->cs & 0xFFFF) != UESEL || (nureg->ss & 0xFFFF) != UDSEL
  	  || (nureg->ds & 0xFFFF) != UDSEL || (nureg->es & 0xFFFF) != UDSEL
  	  || (nureg->fs & 0xFFFF) != UDSEL || (nureg->gs & 0xFFFF) != UDSEL){
- 		pprint("bad segment selector in noted\n");
  		qunlock(&up->debug);
+ 		pprint("bad segment selector in noted\n");
  		pexit("Suicide", 0);
  	}

/n/dump/2009/0821/sys/src/9/pc/trap.c:891,896 - trap.c:894,900

  	memmove(ureg, nureg, sizeof(Ureg));

+ 	unk = NCONT;
  	switch(arg0){
  	case NCONT:
  	case NRSTR:
/n/dump/2009/0821/sys/src/9/pc/trap.c:921,927 - trap.c:925,931
  		break;

  	default:
- 		pprint("unknown noted arg 0x%lux\n", arg0);
+ 		unk = arg0;
  		up->lastnote.flag = NDebug;
  		/* fall through */

/n/dump/2009/0821/sys/src/9/pc/trap.c:928,933 - trap.c:932,939
  	case NDFLT:
  		if(up->lastnote.flag == NDebug){
  			qunlock(&up->debug);
+ 			if(unk != NCONT)
+ 				pprint("unknown noted arg 0x%lux\n", arg0);
  			pprint("suicide: %s\n", up->lastnote.msg);
  		} else
  			qunlock(&up->debug);


- erik



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2009-08-21 18:37 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-08-21 16:20 [9fans] ps hang erik quanstrom
2009-08-21 18:37 ` erik quanstrom

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).