Hello, I've been experiencing a seemingly impossible kernel panic when messing with vgactl. Occasionally, writing multiple commands to /dev/vgactl rapidly may cause an assert to fail (/sys/src/9/port/devdraw.c:/assert). The panic is perfectly reproducible by sending `softscreen off' commands quickly (tested on x61t): ; for(i in `{seq 10}) echo softscreen off >/dev/vgactl The panic occurs within the first few iterations. Moving some locks out of individual functions to hold them for the entire step fixes this. The patch below works, but qunlock may be called without the drawlock being held, I need to recheck and clean up before commiting anything. Credit to moody for suggesting looking in this direction! Thoughts? Thanks, qwx diff 69352f66684cbc75e2cd68c80cd028324964c5aa uncommitted --- a//sys/src/9/pc/devvga.c +++ b//sys/src/9/pc/devvga.c @@ -296,7 +296,8 @@ error(Ebadarg); if(r.max.x > scr->width || r.max.y > scr->height) error("physical screen bigger than virtual"); - deletescreenimage(); + qlock(&drawlock); + _deletescreenimage(); setactualsize(scr, r); goto Resized; @@ -324,10 +325,11 @@ } if(scr->gscreen == nil) return; + qlock(&drawlock); r = actualscreensize(scr); chan = scr->gscreen->chan; z = scr->gscreen->depth; - deletescreenimage(); + _deletescreenimage(); setscreensize(scr, scr->width, scr->height, z, chan, tilt); setactualsize(scr, r); /* no break */ @@ -339,8 +341,9 @@ hwblank = scr->blank != nil; hwaccel = scr->fill != nil || scr->scroll != nil; Resized: - vgascreenwin(scr); - resetscreenimage(); + _vgascreenwin(scr); + _resetscreenimage(); + qunlock(&drawlock); return; case CMlinear: --- a//sys/src/9/pc/screen.c +++ b//sys/src/9/pc/screen.c @@ -64,17 +64,11 @@ void setactualsize(VGAscr *scr, Rectangle r) { - qlock(&drawlock); - r.min = ZP; r.max = tiltsize(scr->tilt, r.max); - if(rectclip(&r, scr->gscreen->r) == 0){ - qunlock(&drawlock); + if(rectclip(&r, scr->gscreen->r) == 0) return; - } scr->gscreen->clipr = r; - - qunlock(&drawlock); } static char* @@ -136,11 +130,8 @@ { char *err; - qlock(&drawlock); - if(waserror()){ - qunlock(&drawlock); + if(waserror()) nexterror(); - } if(memimageinit() < 0) error("memimageinit failed"); @@ -169,7 +160,6 @@ cursoron(); } - qunlock(&drawlock); poperror(); } --- a//sys/src/9/pc/screen.h +++ b//sys/src/9/pc/screen.h @@ -151,6 +151,8 @@ /* devdraw.c */ extern void deletescreenimage(void); extern void resetscreenimage(void); +extern void _deletescreenimage(void); +extern void _resetscreenimage(void); extern void setscreenimageclipr(Rectangle); extern void drawflush(void); extern QLock drawlock; @@ -157,6 +159,7 @@ /* vga.c */ extern void vgascreenwin(VGAscr*); +extern void _vgascreenwin(VGAscr*); extern void vgaimageinit(ulong); extern void vgalinearpci(VGAscr*); extern void vgalinearaddr(VGAscr*, uvlong, vlong); --- a//sys/src/9/pc/vga.c +++ b//sys/src/9/pc/vga.c @@ -176,7 +176,7 @@ } void -vgascreenwin(VGAscr* scr) +_vgascreenwin(VGAscr* scr) { Memimage *i; Rectangle r; @@ -183,8 +183,6 @@ Point p; int h; - qlock(&drawlock); - h = scr->memdefont->height; r = scr->gscreen->r; @@ -215,12 +213,16 @@ curpos = window.min; flushmemscreen(r); - - qunlock(&drawlock); - vgascreenputs(kmesg.buf, kmesg.n); - screenputs = vgascreenputs; +} + +void +vgascreenwin(VGAscr* scr) +{ + qlock(&drawlock); + _vgascreenwin(scr); + qunlock(&drawlock); } /* --- a//sys/src/9/port/devdraw.c +++ b//sys/src/9/port/devdraw.c @@ -947,9 +947,8 @@ } void -deletescreenimage(void) +_deletescreenimage(void) { - dlock(); if(screenimage){ /* will be freed via screendimage; disable */ screenimage->clipr = ZR; @@ -959,14 +958,27 @@ drawfreedimage(screendimage); screendimage = nil; } +} + +void +deletescreenimage(void) +{ + dlock(); + _deletescreenimage(); dunlock(); } void +_resetscreenimage(void) +{ + initscreenimage(); +} + +void resetscreenimage(void) { dlock(); - initscreenimage(); + _resetscreenimage(); dunlock(); }
On Tue Aug 16 08:12:41 +0200 2022, qwx@sciops.net wrote:
> Hello,
>
> I've been experiencing a seemingly impossible kernel panic when
> messing with vgactl. Occasionally, writing multiple commands to
> /dev/vgactl rapidly may cause an assert to fail
> (/sys/src/9/port/devdraw.c:/assert). The panic is perfectly
> reproducible by sending `softscreen off' commands quickly (tested on
> x61t):
>
> ; for(i in `{seq 10}) echo softscreen off >/dev/vgactl
>
> The panic occurs within the first few iterations. Moving some locks
> out of individual functions to hold them for the entire step fixes
> this. The patch below works, but qunlock may be called without the
> drawlock being held, I need to recheck and clean up before commiting
> anything. Credit to moody for suggesting looking in this direction!
Here's the final patch, mostly the same. I'll commit this later
unless there are any objections.
Thanks,
qwx
diff 3839ad4d3c9b26b8a5d564d1d8f09490799e4cd7 uncommitted
--- a//sys/src/9/pc/devvga.c
+++ b//sys/src/9/pc/devvga.c
@@ -296,7 +296,8 @@
error(Ebadarg);
if(r.max.x > scr->width || r.max.y > scr->height)
error("physical screen bigger than virtual");
- deletescreenimage();
+ qlock(&drawlock);
+ _deletescreenimage();
setactualsize(scr, r);
goto Resized;
@@ -324,14 +325,16 @@
}
if(scr->gscreen == nil)
return;
+ qlock(&drawlock);
r = actualscreensize(scr);
chan = scr->gscreen->chan;
z = scr->gscreen->depth;
- deletescreenimage();
+ _deletescreenimage();
setscreensize(scr, scr->width, scr->height, z, chan, tilt);
setactualsize(scr, r);
/* no break */
case CMdrawinit:
+ canqlock(&drawlock);
if(scr->gscreen == nil)
error(Enoscreen);
if(scr->dev && scr->dev->drawinit)
@@ -339,8 +342,9 @@
hwblank = scr->blank != nil;
hwaccel = scr->fill != nil || scr->scroll != nil;
Resized:
- vgascreenwin(scr);
- resetscreenimage();
+ _vgascreenwin(scr);
+ _resetscreenimage();
+ qunlock(&drawlock);
return;
case CMlinear:
--- a//sys/src/9/pc/screen.c
+++ b//sys/src/9/pc/screen.c
@@ -64,17 +64,11 @@
void
setactualsize(VGAscr *scr, Rectangle r)
{
- qlock(&drawlock);
-
r.min = ZP;
r.max = tiltsize(scr->tilt, r.max);
- if(rectclip(&r, scr->gscreen->r) == 0){
- qunlock(&drawlock);
+ if(rectclip(&r, scr->gscreen->r) == 0)
return;
- }
scr->gscreen->clipr = r;
-
- qunlock(&drawlock);
}
static char*
@@ -136,11 +130,8 @@
{
char *err;
- qlock(&drawlock);
- if(waserror()){
- qunlock(&drawlock);
+ if(waserror())
nexterror();
- }
if(memimageinit() < 0)
error("memimageinit failed");
@@ -169,7 +160,6 @@
cursoron();
}
- qunlock(&drawlock);
poperror();
}
--- a//sys/src/9/pc/screen.h
+++ b//sys/src/9/pc/screen.h
@@ -151,6 +151,8 @@
/* devdraw.c */
extern void deletescreenimage(void);
extern void resetscreenimage(void);
+extern void _deletescreenimage(void);
+extern void _resetscreenimage(void);
extern void setscreenimageclipr(Rectangle);
extern void drawflush(void);
extern QLock drawlock;
@@ -157,6 +159,7 @@
/* vga.c */
extern void vgascreenwin(VGAscr*);
+extern void _vgascreenwin(VGAscr*);
extern void vgaimageinit(ulong);
extern void vgalinearpci(VGAscr*);
extern void vgalinearaddr(VGAscr*, uvlong, vlong);
--- a//sys/src/9/pc/vga.c
+++ b//sys/src/9/pc/vga.c
@@ -176,7 +176,7 @@
}
void
-vgascreenwin(VGAscr* scr)
+_vgascreenwin(VGAscr* scr)
{
Memimage *i;
Rectangle r;
@@ -183,8 +183,6 @@
Point p;
int h;
- qlock(&drawlock);
-
h = scr->memdefont->height;
r = scr->gscreen->r;
@@ -215,12 +213,16 @@
curpos = window.min;
flushmemscreen(r);
-
- qunlock(&drawlock);
-
vgascreenputs(kmesg.buf, kmesg.n);
-
screenputs = vgascreenputs;
+}
+
+void
+vgascreenwin(VGAscr* scr)
+{
+ qlock(&drawlock);
+ _vgascreenwin(scr);
+ qunlock(&drawlock);
}
/*
--- a//sys/src/9/port/devdraw.c
+++ b//sys/src/9/port/devdraw.c
@@ -947,9 +947,8 @@
}
void
-deletescreenimage(void)
+_deletescreenimage(void)
{
- dlock();
if(screenimage){
/* will be freed via screendimage; disable */
screenimage->clipr = ZR;
@@ -959,14 +958,27 @@
drawfreedimage(screendimage);
screendimage = nil;
}
+}
+
+void
+deletescreenimage(void)
+{
+ dlock();
+ _deletescreenimage();
dunlock();
}
void
+_resetscreenimage(void)
+{
+ initscreenimage();
+}
+
+void
resetscreenimage(void)
{
dlock();
- initscreenimage();
+ _resetscreenimage();
dunlock();
}
On 8/16/22 14:44, qwx@sciops.net wrote:
> On Tue Aug 16 08:12:41 +0200 2022, qwx@sciops.net wrote:
>> Hello,
>>
>> I've been experiencing a seemingly impossible kernel panic when
>> messing with vgactl. Occasionally, writing multiple commands to
>> /dev/vgactl rapidly may cause an assert to fail
>> (/sys/src/9/port/devdraw.c:/assert). The panic is perfectly
>> reproducible by sending `softscreen off' commands quickly (tested on
>> x61t):
>>
>> ; for(i in `{seq 10}) echo softscreen off >/dev/vgactl
>>
>> The panic occurs within the first few iterations. Moving some locks
>> out of individual functions to hold them for the entire step fixes
>> this. The patch below works, but qunlock may be called without the
>> drawlock being held, I need to recheck and clean up before commiting
>> anything. Credit to moody for suggesting looking in this direction!
>
> Here's the final patch, mostly the same. I'll commit this later
> unless there are any objections.
>
> Thanks,
> qwx
>
Checks out to me.
qwx@sciops.net once said:
> Here's the final patch, mostly the same. I'll commit this later
> unless there are any objections.
There's only two callers of resetscreenimage, two callers of
vgascreenwin, and four callers of deletescreenimage.
I would adjust the call sites to include them within the locked
sections, remove the locking inside them and get rid of the
repetitive underscore functions.
As an aside, all the wet floors involving locks in the vga code
could use some mopping up. Best keep the mold away.
Cheers,
Anthony
On Wed Aug 17 01:02:08 +0200 2022, ality@pbrane.org wrote:
> qwx@sciops.net once said:
> > Here's the final patch, mostly the same. I'll commit this later
> > unless there are any objections.
>
> There's only two callers of resetscreenimage, two callers of
> vgascreenwin, and four callers of deletescreenimage.
>
> I would adjust the call sites to include them within the locked
> sections, remove the locking inside them and get rid of the
> repetitive underscore functions.
Makes sense, here's a revised patch.
Thanks!
qwx
diff d86a7ed412555192e2000a9a34b3372f380ec3d0 uncommitted
--- a//sys/src/9/pc/devvga.c
+++ b//sys/src/9/pc/devvga.c
@@ -280,8 +280,10 @@
error("bad channel");
if(chantodepth(chan) != z)
error("depth, channel do not match");
+ qlock(&drawlock);
deletescreenimage();
setscreensize(scr, r.max.x, r.max.y, z, chan, scr->tilt);
+ qunlock(&drawlock);
return;
case CMactualsize:
@@ -296,6 +298,7 @@
error(Ebadarg);
if(r.max.x > scr->width || r.max.y > scr->height)
error("physical screen bigger than virtual");
+ qlock(&drawlock);
deletescreenimage();
setactualsize(scr, r);
goto Resized;
@@ -324,6 +327,7 @@
}
if(scr->gscreen == nil)
return;
+ qlock(&drawlock);
r = actualscreensize(scr);
chan = scr->gscreen->chan;
z = scr->gscreen->depth;
@@ -332,8 +336,11 @@
setactualsize(scr, r);
/* no break */
case CMdrawinit:
- if(scr->gscreen == nil)
+ canqlock(&drawlock);
+ if(scr->gscreen == nil){
+ qunlock(&drawlock);
error(Enoscreen);
+ }
if(scr->dev && scr->dev->drawinit)
scr->dev->drawinit(scr);
hwblank = scr->blank != nil;
@@ -341,6 +348,7 @@
Resized:
vgascreenwin(scr);
resetscreenimage();
+ qunlock(&drawlock);
return;
case CMlinear:
--- a//sys/src/9/pc/screen.c
+++ b//sys/src/9/pc/screen.c
@@ -64,17 +64,11 @@
void
setactualsize(VGAscr *scr, Rectangle r)
{
- qlock(&drawlock);
-
r.min = ZP;
r.max = tiltsize(scr->tilt, r.max);
- if(rectclip(&r, scr->gscreen->r) == 0){
- qunlock(&drawlock);
+ if(rectclip(&r, scr->gscreen->r) == 0)
return;
- }
scr->gscreen->clipr = r;
-
- qunlock(&drawlock);
}
static char*
@@ -136,11 +130,8 @@
{
char *err;
- qlock(&drawlock);
- if(waserror()){
- qunlock(&drawlock);
+ if(waserror())
nexterror();
- }
if(memimageinit() < 0)
error("memimageinit failed");
@@ -169,7 +160,6 @@
cursoron();
}
- qunlock(&drawlock);
poperror();
}
--- a//sys/src/9/pc/vga.c
+++ b//sys/src/9/pc/vga.c
@@ -183,8 +183,6 @@
Point p;
int h;
- qlock(&drawlock);
-
h = scr->memdefont->height;
r = scr->gscreen->r;
@@ -215,11 +213,7 @@
curpos = window.min;
flushmemscreen(r);
-
- qunlock(&drawlock);
-
vgascreenputs(kmesg.buf, kmesg.n);
-
screenputs = vgascreenputs;
}
--- a//sys/src/9/port/devdraw.c
+++ b//sys/src/9/port/devdraw.c
@@ -949,7 +949,6 @@
void
deletescreenimage(void)
{
- dlock();
if(screenimage){
/* will be freed via screendimage; disable */
screenimage->clipr = ZR;
@@ -959,15 +958,12 @@
drawfreedimage(screendimage);
screendimage = nil;
}
- dunlock();
}
void
resetscreenimage(void)
{
- dlock();
initscreenimage();
- dunlock();
}
static Chan*
--- a//sys/src/9/zynq/screen.c
+++ b//sys/src/9/zynq/screen.c
@@ -299,8 +299,8 @@
if(chantodepth(chan) != z)
error("depth, channel do not match");
- deletescreenimage();
eqlock(&drawlock);
+ deletescreenimage();
if(memimageinit() < 0){
qunlock(&drawlock);
error("memimageinit failed");
@@ -310,13 +310,14 @@
gscreen = nil;
}
gscreen = allocmemimage(Rect(0,0,x,y), chan);
- qunlock(&drawlock);
/* wet floor */
case CMinit:
if(gscreen == nil)
error("no framebuffer");
+ canqlock(&drawlock);
resetscreenimage();
+ qunlock(&drawlock);
break;
}
free(cb);
Quoth qwx@sciops.net:
> case CMdrawinit:
> - if(scr->gscreen == nil)
> + canqlock(&drawlock);
> + if(scr->gscreen == nil){
> + qunlock(&drawlock);
> error(Enoscreen);
> + }
this canqlock without an if looks supsicious to me;
if someone else holds the lock, the canqlock fails,
but that means we unlock *someone elses* lock.
On Wed Aug 17 14:17:35 +0200 2022, ori@eigenstate.org wrote:
> Quoth qwx@sciops.net:
> > case CMdrawinit:
> > - if(scr->gscreen == nil)
> > + canqlock(&drawlock);
> > + if(scr->gscreen == nil){
> > + qunlock(&drawlock);
> > error(Enoscreen);
> > + }
>
> this canqlock without an if looks supsicious to me;
> if someone else holds the lock, the canqlock fails,
> but that means we unlock *someone elses* lock.
You're absolutely right. Here's another attempt; the code is a mess,
but I'm not up for refactoring it all right now.
What do you think?
Thanks,
qwx
diff d86a7ed412555192e2000a9a34b3372f380ec3d0 uncommitted
--- a//sys/src/9/pc/devvga.c
+++ b//sys/src/9/pc/devvga.c
@@ -280,8 +280,10 @@
error("bad channel");
if(chantodepth(chan) != z)
error("depth, channel do not match");
+ qlock(&drawlock);
deletescreenimage();
setscreensize(scr, r.max.x, r.max.y, z, chan, scr->tilt);
+ qunlock(&drawlock);
return;
case CMactualsize:
@@ -296,6 +298,7 @@
error(Ebadarg);
if(r.max.x > scr->width || r.max.y > scr->height)
error("physical screen bigger than virtual");
+ qlock(&drawlock);
deletescreenimage();
setactualsize(scr, r);
goto Resized;
@@ -324,6 +327,7 @@
}
if(scr->gscreen == nil)
return;
+ qlock(&drawlock);
r = actualscreensize(scr);
chan = scr->gscreen->chan;
z = scr->gscreen->depth;
@@ -331,9 +335,11 @@
setscreensize(scr, scr->width, scr->height, z, chan, tilt);
setactualsize(scr, r);
/* no break */
- case CMdrawinit:
- if(scr->gscreen == nil)
+ Init:
+ if(scr->gscreen == nil){
+ qunlock(&drawlock);
error(Enoscreen);
+ }
if(scr->dev && scr->dev->drawinit)
scr->dev->drawinit(scr);
hwblank = scr->blank != nil;
@@ -341,7 +347,12 @@
Resized:
vgascreenwin(scr);
resetscreenimage();
+ qunlock(&drawlock);
return;
+
+ case CMdrawinit:
+ qlock(&drawlock);
+ goto Init;
case CMlinear:
if(cb->nf!=2 && cb->nf!=3)
--- a//sys/src/9/pc/screen.c
+++ b//sys/src/9/pc/screen.c
@@ -64,17 +64,11 @@
void
setactualsize(VGAscr *scr, Rectangle r)
{
- qlock(&drawlock);
-
r.min = ZP;
r.max = tiltsize(scr->tilt, r.max);
- if(rectclip(&r, scr->gscreen->r) == 0){
- qunlock(&drawlock);
+ if(rectclip(&r, scr->gscreen->r) == 0)
return;
- }
scr->gscreen->clipr = r;
-
- qunlock(&drawlock);
}
static char*
@@ -136,11 +130,8 @@
{
char *err;
- qlock(&drawlock);
- if(waserror()){
- qunlock(&drawlock);
+ if(waserror())
nexterror();
- }
if(memimageinit() < 0)
error("memimageinit failed");
@@ -169,7 +160,6 @@
cursoron();
}
- qunlock(&drawlock);
poperror();
}
--- a//sys/src/9/pc/vga.c
+++ b//sys/src/9/pc/vga.c
@@ -183,8 +183,6 @@
Point p;
int h;
- qlock(&drawlock);
-
h = scr->memdefont->height;
r = scr->gscreen->r;
@@ -215,11 +213,7 @@
curpos = window.min;
flushmemscreen(r);
-
- qunlock(&drawlock);
-
vgascreenputs(kmesg.buf, kmesg.n);
-
screenputs = vgascreenputs;
}
--- a//sys/src/9/port/devdraw.c
+++ b//sys/src/9/port/devdraw.c
@@ -949,7 +949,6 @@
void
deletescreenimage(void)
{
- dlock();
if(screenimage){
/* will be freed via screendimage; disable */
screenimage->clipr = ZR;
@@ -959,15 +958,12 @@
drawfreedimage(screendimage);
screendimage = nil;
}
- dunlock();
}
void
resetscreenimage(void)
{
- dlock();
initscreenimage();
- dunlock();
}
static Chan*
--- a//sys/src/9/zynq/screen.c
+++ b//sys/src/9/zynq/screen.c
@@ -299,8 +299,8 @@
if(chantodepth(chan) != z)
error("depth, channel do not match");
- deletescreenimage();
eqlock(&drawlock);
+ deletescreenimage();
if(memimageinit() < 0){
qunlock(&drawlock);
error("memimageinit failed");
@@ -310,14 +310,18 @@
gscreen = nil;
}
gscreen = allocmemimage(Rect(0,0,x,y), chan);
- qunlock(&drawlock);
- /* wet floor */
-
- case CMinit:
- if(gscreen == nil)
+ Init:
+ if(gscreen == nil){
+ qunlock(&drawlock);
error("no framebuffer");
+ }
resetscreenimage();
+ qunlock(&drawlock);
break;
+
+ case CMinit:
+ qlock(&drawlock);
+ goto Init;
}
free(cb);
poperror();
Quoth qwx@sciops.net:
>
> You're absolutely right. Here's another attempt; the code is a mess,
> but I'm not up for refactoring it all right now.
>
> What do you think?
>
looks ok to me.
Though, I wonder if we can just simplify by taking the lock on
entry, and release it on return; I don't think that any of
these operations happen often, so we don't need to be fine
grained.
On Thu Aug 18 04:22:59 +0200 2022, ori@eigenstate.org wrote:
> Quoth qwx@sciops.net:
> >
> > You're absolutely right. Here's another attempt; the code is a mess,
> > but I'm not up for refactoring it all right now.
> >
> > What do you think?
> >
>
> looks ok to me.
>
> Though, I wonder if we can just simplify by taking the lock on
> entry, and release it on return; I don't think that any of
> these operations happen often, so we don't need to be fine
> grained.
Alright, pushed. I'll leave refactoring all of this for later.
Thanks!
Cheers,
qwx