9fans - fans of the OS Plan 9 from Bell Labs
 help / color / mirror / Atom feed
From: Philippe Anel <xigh@bouyapop.org>
To: Fans of the OS Plan 9 from Bell Labs <9fans@9fans.net>
Subject: Re: [9fans] 9vx, kproc and *double sleep*
Date: Fri, 11 Jun 2010 19:31:58 +0200	[thread overview]
Message-ID: <4C12730E.8060601@bouyapop.org> (raw)
In-Reply-To: <20100611171151.2E04B5B8B@mail.bitblocks.com>


I only did my tests on 9vx. I have a version that I instrumented with
a circular log buffer, and I have some gdb macros which dumps the
buffer.

I can put the whole source somewhere and even a log with my comments
of the bug if you want to see it. But please note that I made several
changes (because I had to understand how it works) and I would rather
copy my changes to the latest 9vx source tree so that everyone can
read it. What do you think ?

At the end of this post I added some part of the instruments.

Please, I would like to insist on the fact I'm not saying the promela
model is wrong. And I realize that the fix I propose might not be the
good one. Maybe the problem is even elsewhere. All these is just
feelings, logs and headache.

Phil;


The logs look like this :

(gdb) k9log 10
 - kernel history size: 524288
          element size: 64
         element count: 8192
   History stack: 56300 elems:
    dbeb: m= 3 pc=44e235 sp=    51f02020 up=0 :0  xp=0 :0  r=
0 a=       0 # kproc: runproc: psleep
    dbea: m= 3 pc=44e235 sp=    51f02020 up=0 :0  xp=0 :0  r=
0 a=       0 # kproc: runproc: search
    dbe9: m= 3 pc=44e235 sp=    51f02020 up=0 :0  xp=0 :0  r=
0 a=       0 # kproc: runproc
    dbe8: m= 3 pc=44dfb2 sp=    51f02070 up=0 :0  xp=4 :8  r=
0 a=       0 # proc: sched: calling runproc
    dbe7: m= 3 pc=44dfb2 sp=    51f02070 up=0 :0  xp=4 :8  r=
0 a=       0 # proc: sched
    dbe6: m= 3 pc=40e41b sp=    51f020b0 up=4 :8  xp=0 :0  r=
0 a=       0 # proc: schedinit: disable up
    dbe5: m= 3 pc=40e41b sp=    51f020b0 up=4 :8  xp=0 :0  r=
983360 a=       0 # proc: sleep: unlock r
    dbe4: m= 3 pc=40e41b sp=    51f020b0 up=4 :8  xp=0 :0  r=
983360 a=       0 # proc: sleep: unlock up
    dbe3: m= 3 pc=40e41b sp=    51f020b0 up=4 :8  xp=0 :0  r=
0 a=       0 # proc: schedinit: up still active
    dbe2: m= 3 pc=40e41b sp=    51f020b0 up=4 :8  xp=0 :0  r=
0 a=       0 # proc: schedinit: start

where the first column is the serial of the logged operation, and 'a'
just a int.

-------------------------------------------------------------------------------------
void
sleep(Rendez *r, int (*ftest)(void*), void *arg)
{
    int s;
    void (*pt)(Proc*, int, vlong);
    void * pc;

    pc = getcallerpc(&r);
    k9log("proc: sleep", pc, 0, r, 0);

    s = splhi();

    if (up == nil)
        dbgbreak();

    k9log("proc: sleep: lock r", pc, 0, r, 0);

    lock(&r->xlk);

    if(r->xp){
        k9log("proc: sleep: ** double sleep **", pc, r->xp, r, 0);
        panic("cpu%d: up=%d *double sleep* r=%p r->p=%d caller=%p\n",
              m->machno, up ? up->pid : 0, r, r->xp ? r->xp->pid : 0, pc);
    }

    /*
     *  Wakeup only knows there may be something to do by testing
     *  r->p in order to get something to lock on.
     *  Flush that information out to memory in case the sleep is
     *  committed.
     */
    r->xp = up;
    r->xm = m;
    r->xpc = pc;

    k9log("proc: sleep: lock up", pc, 0, r, 0);

    lock(&up->rlock);

//    if (up->state != Running)
//        dbgbreak();

    k9log("proc: sleep: condition", pc, 0, r, up->nlocks.ref);

    if ((*ftest)(arg)) {
        k9log("proc: sleep: happened", pc, 0, r, 0);

    done:
        /*
         *  if condition happened or a note is pending
         *  never mind
         */
        r->xp = nil;

        k9log("proc: sleep: unlock up", pc, 0, r, 0);

        unlock(&up->rlock);

        k9log("proc: sleep: unlock r", pc, 0, r, 0);

        unlock(&r->xlk);
    }
        else if (up->notepending) {
        k9log("proc: sleep: note pending", pc, 0, r, 0);
        goto done;
    }
    else {
        /*
         *  now we are committed to
         *  change state and call scheduler
         */
        pt = proctrace;
        if(pt)
            pt(up, SSleep, 0);
        up->state = Wakeme;
        up->rx = r;

        /* statistics */
        m->cs++;

        k9log("proc: sleep: sleeping", pc, 0, r, 0);

        procsave(up);
        if(setlabel(&up->sched)) {
            /*
             *  here when the process is awakened
             */
            k9log("proc: sleep: awakened", pc, r->xp, r, 0);

            procrestore(up);
            spllo();
        } else {
            /*
             *  here to go to sleep (i.e. stop Running)
             */

//            up->rmu = 1;

//            k9log("proc: sleep: unlock up", pc, 0, r, 0);

//            unlock(&up->rlock);

//            k9log("proc: sleep: unlock r", pc, 0, r, 0);

//            unlock(&r->xlk);

            k9log("proc: sleep: going sched", pc, 0, r, 0);

            gotolabel(&m->sched);
        }
    }

    k9log("proc: sleep: done", pc, 0, r, 0);

    if(up->notepending) {
        k9log("proc: sleep: forward note", pc, 0, r, 0);

        up->notepending = 0;
        splx(s);
        if(up->procctl == Proc_exitme && up->closingfgrp)
            forceclosefgrp();
        error(Eintr);
    }

    splx(s);
}



struct {
    int32 count;
    struct {
        char *         op;
        void *         pc;
        void *        sp;
        int         p0;
        int        s0;
        int         p1;
        int        s1;
        int        xm;    // machno
        Rendez *     r;
        int32        a;
        int32         n;
    } hist[8192];
} k9logs;


void
k9log(char * op, void * pc, Proc * p, void * r, int32 a)
{
    static int32 serial = 0;
    int32 n, i;

    n = __sync_fetch_and_add_4(&serial, 1);
    i = __sync_fetch_and_add_4(&k9logs.count, 1) % nelem(k9logs.hist);

    k9logs.hist[i].n = n;
    k9logs.hist[i].r = r;
    k9logs.hist[i].a = a;
    k9logs.hist[i].xm = getmach()->machno;
    k9logs.hist[i].op = op;
    k9logs.hist[i].pc = pc;
    k9logs.hist[i].sp = &p;
    if (up) {
        k9logs.hist[i].p0 = up->pid;
        k9logs.hist[i].s0 = up->state;
    }
    else {
        k9logs.hist[i].p0 = 0;
        k9logs.hist[i].s0 = 0;
    }
    if (p) {
        k9logs.hist[i].p1 = p->pid;
        k9logs.hist[i].s1 = p->state;
    }
    else {
        k9logs.hist[i].p1 = 0;
        k9logs.hist[i].s1 = 0;
    }
}

Bakul Shah wrote:
> On Fri, 11 Jun 2010 16:59:42 +0200 Philippe Anel <xigh@bouyapop.org>  wrote:
>
>> Ooops I forgot to answer this :
>>
>>> - does changing spl* to manipulation of a per-cpu lock solve the problem?
>>> sometimes preventing anything else from running on your mach is
>>> exactly what you want.
>>>
>>>
>> No ... I don't think so. I think the problem comes from the fact the
>> process is no longer exclusively tied to the current Mach when going
>> (back) to schedinit() ... hence the change I did.
>>
>
> Were you able to verify your hypothesis by adding a bit of
> trapping code + assertion(s) in the original sources?  At the
> point of double sleep one can check state to see if the
> expected preconditions are true.  Alternatively one can check
> when the expected conditions become true, set a variable and
> test it where the double sleep print occurs.  Then one can sort
> of walk back to the earliest point where things go wrong.
>
>
>




  reply	other threads:[~2010-06-11 17:31 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-11 14:06 Philippe Anel
2010-06-11 14:40 ` ron minnich
2010-06-11 14:49   ` erik quanstrom
2010-06-11 14:54     ` Philippe Anel
2010-06-11 15:03       ` erik quanstrom
2010-06-11 15:22         ` Philippe Anel
2010-06-11 15:25           ` Philippe Anel
2010-06-11 14:59     ` Philippe Anel
2010-06-11 17:11       ` Bakul Shah
2010-06-11 17:31         ` Philippe Anel [this message]
2010-06-11 18:34           ` Bakul Shah
2010-06-11 18:36             ` erik quanstrom
2010-06-11 18:51             ` Philippe Anel
2010-06-12  7:02     ` Philippe Anel
2010-06-12  9:22       ` Philippe Anel
2010-06-12 11:51         ` erik quanstrom
2010-06-13 13:01       ` Richard Miller
2010-06-13 13:43         ` Philippe Anel
2010-06-13 14:26         ` Philippe Anel
2010-06-13 16:20           ` ron minnich
2010-06-13 16:34             ` Philippe Anel
2010-06-13 17:23               ` Philippe Anel
2010-06-13 18:03             ` Philippe Anel
2010-06-14 19:15               ` Charles Forsyth
2010-06-14 19:36                 ` Philippe Anel
2010-06-15  2:57                 ` ron minnich
2010-06-15  3:36               ` ron minnich
2010-06-12 20:15     ` Richard Miller
2010-06-12 20:30       ` ron minnich
2010-06-12 22:15         ` Charles Forsyth
2010-06-13  0:04           ` ron minnich
2010-06-13 13:32           ` erik quanstrom
2010-06-13 22:34             ` Charles Forsyth
2010-06-13  9:00         ` Richard Miller
2010-06-11 14:49   ` Philippe Anel
2010-06-11 14:59     ` ron minnich
2010-06-11 15:02 ` ron minnich
2010-06-11 15:04   ` erik quanstrom
2010-06-11 15:43     ` ron minnich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4C12730E.8060601@bouyapop.org \
    --to=xigh@bouyapop.org \
    --cc=9fans@9fans.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).