mailing list of musl libc
 help / color / mirror / code / Atom feed
* [musl] infinite loop in mallocng's try_avail
@ 2023-01-23  1:33 Dominique MARTINET
  2023-01-24  8:37 ` Rich Felker
  0 siblings, 1 reply; 6+ messages in thread
From: Dominique MARTINET @ 2023-01-23  1:33 UTC (permalink / raw)
  To: musl

Hi,

A coworker of mine has been observing hangs during shutdown on our ARM
alpine/openrc board, where the openrc shutdown process is stuck in
userspace after having stopped almost everything once in a blue moon
(running reboot in a loop, this is reproducible roughly one time every
three days)

It turns out it's stuck in malloc() as follow:
(gdb) bt
#0  try_avail (pm=0xb6f692e8 <__malloc_context+48>) at src/malloc/mallocng/malloc.c:156
#1  alloc_slot (sc=sc@entry=0, req=req@entry=8) at src/malloc/mallocng/malloc.c:288
#2  0xb6efdc94 in __libc_malloc_impl (n=8) at src/malloc/mallocng/malloc.c:369
#3  0xb6e9d532 in xmalloc (size=size@entry=8) at ../src/shared/helpers.h:64
#4  0xb6e9d598 in rc_stringlist_new () at ../src/librc/librc-stringlist.c:24
#5  0xb6e9b91e in get_provided (depinfo=depinfo@entry=0xb6f585d0, runlevel=runlevel@entry=0xbe940f99 "shutdown", options=options@entry=3)
    at ../src/librc/librc-depend.c:269
#6  0xb6e9bce8 in visit_service (options=3, runlevel=0xbe940f99 "shutdown", depinfo=0xb6f5ee30, visited=0xb6f57b00, sorted=0xb6f57af0, types=0xb6f575a0, 
    deptree=0xb6ed8ec0) at ../src/librc/librc-depend.c:419
#7  visit_service (deptree=0xb6ed8ec0, types=0xb6f575a0, sorted=0xb6f57af0, visited=0xb6f57b00, depinfo=0xb6f5ee30, runlevel=0xbe940f99 "shutdown", options=3)
    at ../src/librc/librc-depend.c:357
#8  0xb6e9bcb2 in visit_service (options=3, runlevel=0xbe940f99 "shutdown", depinfo=0xb6f62f50, visited=0xb6f57b00, sorted=0xb6f57af0, types=0xb6f575a0, 
    deptree=0xb6ed8ec0) at ../src/librc/librc-depend.c:405
#9  visit_service (deptree=0xb6ed8ec0, types=0xb6f575a0, sorted=0xb6f57af0, visited=0xb6f57b00, depinfo=0xb6f62f50, runlevel=0xbe940f99 "shutdown", options=3)
    at ../src/librc/librc-depend.c:357
#10 0xb6e9bcb2 in visit_service (options=3, runlevel=0xbe940f99 "shutdown", depinfo=0xb6ed85a0, visited=0xb6f57b00, sorted=0xb6f57af0, types=0xb6f575a0, 
    deptree=0xb6ed8ec0) at ../src/librc/librc-depend.c:405
#11 visit_service (deptree=0xb6ed8ec0, types=0xb6f575a0, sorted=0xb6f57af0, visited=0xb6f57b00, depinfo=0xb6ed85a0, runlevel=0xbe940f99 "shutdown", options=3)
    at ../src/librc/librc-depend.c:357
#12 0xb6e9bffa in rc_deptree_depends (deptree=deptree@entry=0xb6ed8ec0, types=types@entry=0xb6f575a0, services=services@entry=0xb6f57ab0, 
    runlevel=0xbe940f99 "shutdown", options=options@entry=3) at ../src/librc/librc-depend.c:482
#13 0x004226ea in do_stop_services (going_down=true, parallel=true, newlevel=0xbe940f99 "shutdown", deptree=0xb6ed8ec0, stop_services=<optimized out>, 
    start_services=0xb6f57d70, types_nw=0xb6f575a0) at ../src/openrc/rc.c:592
#14 main (argc=<optimized out>, argv=<optimized out>) at ../src/openrc/rc.c:1022

(this is musl 1.2.4 with a couple of patchs, none around malloc:
https://gitlab.alpinelinux.org/alpine/aports/-/tree/3.17-stable/main/musl
)

For convenience, I've copied the incriminated loop here:
        int cnt = m->mem->active_idx + 2;
        int size = size_classes[m->sizeclass]*UNIT;
        int span = UNIT + size*cnt;
        // activate up to next 4k boundary
        while ((span^(span+size-1)) < 4096) {
                cnt++;
                span += size;
        }

Here, we have UNIT=16 (constant), we're considering a malloc(8) so
sizeclass is 0 -> size_classes[0] = 1 -> size = 16, so we're looping
on this condition:
  while (span ^ (span + 15) < 4096)
with span being a multiple of 16.

I'm not good at maths, but I'm fairly confindent this never gets false:
0x0ff0 ^ 0x0fff = f
0x1000 ^ 0x100f = f
It could become true without the -1 (span+16), is that was intended ?
(It works for other sizes because of the UNIT offset in size, so
e.g. for size=32 we have 0x0ff0 ^ 0x100f which is 8191 alright, but
that'd also work without the -1 as far as I can tell)


Anyway, I'm not ruling out a problem in openrc yet -- rc_parallel is
known to be buggy -- but looking at the mallocng ctx I can't see any
obvious problem so I'm asking here first:
Is that supposed to work? Now I've gotten it down so far it might be
possible to build a reproducer; unfortunately I do not know how ctx was
when it entered try_avail (the function entered the if (!mask) so the
first entry was dequeued), but here's it's current state (please ask if
you need anything else)
------
(gdb) p __malloc_context            
$94 = {
  secret = 15756413639004407235,
  init_done = 1,
  mmap_counter = 135,
  free_meta_head = 0x0,
  avail_meta = 0x18a3f70,
  avail_meta_count = 6,
  avail_meta_area_count = 0,
  meta_alloc_shift = 0,
  meta_area_head = 0x18a3000,
  meta_area_tail = 0x18a3000,
  avail_meta_areas = 0x18a4000 <error: Cannot access memory at address 0x18a4000>,
  active = {0x18a3e98, 0x18a3eb0, 0x18a3208, 0x18a3280, 0x0, 0x0, 0x0, 0x18a31c0, 0x0, 0x0, 0x0, 0x18a3148, 0x0, 0x0, 0x0, 0x18a3dd8, 0x0, 0x0, 0x0, 0x18a3d90, 0x0, 
    0x18a31f0, 0x0, 0x18a3b68, 0x0, 0x18a3f28, 0x0, 0x0, 0x0, 0x18a3238, 0x0 <repeats 18 times>},
  usage_by_class = {2580, 600, 10, 7, 0 <repeats 11 times>, 96, 0, 0, 0, 20, 0, 3, 0, 8, 0, 3, 0, 0, 0, 3, 0 <repeats 18 times>},
  unmap_seq = '\000' <repeats 31 times>,
  bounces = '\000' <repeats 18 times>, "w", '\000' <repeats 12 times>,
  seq = 1 '\001',
  brk = 25837568
}
(gdb) p *__malloc_context->active[0]
$95 = {
  prev = 0x18a3f40,
  next = 0x18a3e80,
  mem = 0xb6f57b30,
  avail_mask = 1073741822,
  freed_mask = 0,
  last_idx = 29,
  freeable = 1,
  sizeclass = 0,
  maplen = 0
}
(gdb) p *__malloc_context->active[0]->mem
$97 = {
  meta = 0x18a3e98,
  active_idx = 29 '\035',
  pad = "\000\000\000\000\000\000\000\000\377\000",
  storage = 0xb6f57b40 ""
}
------


I'm leaving that process stuck in limbo so I can get more infos for a
few days.

Meanwhile we'll also be trying to see if the following patch helps --
given we do not have a good reproducer there won't be any feedback on
this side for a while.
----
From f46bbe3117674becb3c5e4ffb7c8f4e4776084db Mon Sep 17 00:00:00 2001
From: Dominique Martinet <dominique.martinet@atmark-techno.com>
Date: Mon, 23 Jan 2023 10:12:55 +0900
Subject: [PATCH] mallocng: try_avail: fix infinite loop

sizeclass=0 has span being a multiple of size = 16, which will
never stop having "(span^(span+size-1)) < 4096".
Removing the -1 makes that ending condition reachable for all
class sizes.
---
 src/malloc/mallocng/malloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/malloc/mallocng/malloc.c b/src/malloc/mallocng/malloc.c
index d695ab8ec982..9ef0b176c31e 100644
--- a/src/malloc/mallocng/malloc.c
+++ b/src/malloc/mallocng/malloc.c
@@ -151,7 +151,7 @@ static uint32_t try_avail(struct meta **pm)
 				int size = size_classes[m->sizeclass]*UNIT;
 				int span = UNIT + size*cnt;
 				// activate up to next 4k boundary
-				while ((span^(span+size-1)) < 4096) {
+				while ((span^(span+size)) < 4096) {
 					cnt++;
 					span += size;
 				}
----

Thank you,
-- 
Dominique Martinet

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [musl] infinite loop in mallocng's try_avail
  2023-01-23  1:33 [musl] infinite loop in mallocng's try_avail Dominique MARTINET
@ 2023-01-24  8:37 ` Rich Felker
  2023-01-25  0:33   ` Dominique MARTINET
  0 siblings, 1 reply; 6+ messages in thread
From: Rich Felker @ 2023-01-24  8:37 UTC (permalink / raw)
  To: Dominique MARTINET; +Cc: musl

On Mon, Jan 23, 2023 at 10:33:39AM +0900, Dominique MARTINET wrote:
> Hi,
> 
> A coworker of mine has been observing hangs during shutdown on our ARM
> alpine/openrc board, where the openrc shutdown process is stuck in
> userspace after having stopped almost everything once in a blue moon
> (running reboot in a loop, this is reproducible roughly one time every
> three days)
> 
> It turns out it's stuck in malloc() as follow:
> (gdb) bt
> #0  try_avail (pm=0xb6f692e8 <__malloc_context+48>) at src/malloc/mallocng/malloc.c:156
> #1  alloc_slot (sc=sc@entry=0, req=req@entry=8) at src/malloc/mallocng/malloc.c:288
> #2  0xb6efdc94 in __libc_malloc_impl (n=8) at src/malloc/mallocng/malloc.c:369
> #3  0xb6e9d532 in xmalloc (size=size@entry=8) at ../src/shared/helpers.h:64
> #4  0xb6e9d598 in rc_stringlist_new () at ../src/librc/librc-stringlist.c:24
> #5  0xb6e9b91e in get_provided (depinfo=depinfo@entry=0xb6f585d0, runlevel=runlevel@entry=0xbe940f99 "shutdown", options=options@entry=3)
>     at ../src/librc/librc-depend.c:269
> #6  0xb6e9bce8 in visit_service (options=3, runlevel=0xbe940f99 "shutdown", depinfo=0xb6f5ee30, visited=0xb6f57b00, sorted=0xb6f57af0, types=0xb6f575a0, 
>     deptree=0xb6ed8ec0) at ../src/librc/librc-depend.c:419
> #7  visit_service (deptree=0xb6ed8ec0, types=0xb6f575a0, sorted=0xb6f57af0, visited=0xb6f57b00, depinfo=0xb6f5ee30, runlevel=0xbe940f99 "shutdown", options=3)
>     at ../src/librc/librc-depend.c:357
> #8  0xb6e9bcb2 in visit_service (options=3, runlevel=0xbe940f99 "shutdown", depinfo=0xb6f62f50, visited=0xb6f57b00, sorted=0xb6f57af0, types=0xb6f575a0, 
>     deptree=0xb6ed8ec0) at ../src/librc/librc-depend.c:405
> #9  visit_service (deptree=0xb6ed8ec0, types=0xb6f575a0, sorted=0xb6f57af0, visited=0xb6f57b00, depinfo=0xb6f62f50, runlevel=0xbe940f99 "shutdown", options=3)
>     at ../src/librc/librc-depend.c:357
> #10 0xb6e9bcb2 in visit_service (options=3, runlevel=0xbe940f99 "shutdown", depinfo=0xb6ed85a0, visited=0xb6f57b00, sorted=0xb6f57af0, types=0xb6f575a0, 
>     deptree=0xb6ed8ec0) at ../src/librc/librc-depend.c:405
> #11 visit_service (deptree=0xb6ed8ec0, types=0xb6f575a0, sorted=0xb6f57af0, visited=0xb6f57b00, depinfo=0xb6ed85a0, runlevel=0xbe940f99 "shutdown", options=3)
>     at ../src/librc/librc-depend.c:357
> #12 0xb6e9bffa in rc_deptree_depends (deptree=deptree@entry=0xb6ed8ec0, types=types@entry=0xb6f575a0, services=services@entry=0xb6f57ab0, 
>     runlevel=0xbe940f99 "shutdown", options=options@entry=3) at ../src/librc/librc-depend.c:482
> #13 0x004226ea in do_stop_services (going_down=true, parallel=true, newlevel=0xbe940f99 "shutdown", deptree=0xb6ed8ec0, stop_services=<optimized out>, 
>     start_services=0xb6f57d70, types_nw=0xb6f575a0) at ../src/openrc/rc.c:592
> #14 main (argc=<optimized out>, argv=<optimized out>) at ../src/openrc/rc.c:1022
> 
> (this is musl 1.2.4 with a couple of patchs, none around malloc:
> https://gitlab.alpinelinux.org/alpine/aports/-/tree/3.17-stable/main/musl
> )
> 
> For convenience, I've copied the incriminated loop here:
>         int cnt = m->mem->active_idx + 2;
>         int size = size_classes[m->sizeclass]*UNIT;
>         int span = UNIT + size*cnt;
>         // activate up to next 4k boundary
>         while ((span^(span+size-1)) < 4096) {
>                 cnt++;
>                 span += size;
>         }

This code should not be reachable for size class 0 or any size class
allocated inside a larger-size-class slot. That case has active_idx =
cnt-1 (set at line 272).

If this code is being reached, either the allocator state has been
corrupted by some UB in the application, or there's a logic bug in
mallocng. The sequence of events that seem to have to happen to get
there are:

1. Previously active group has no more available slots (line 120).

2. Freed mask of newly activating group (line 131 or 138) is either
   zero (line 145) or the active_idx (read from in-band memory
   susceptible to application buffer overflows etc) is wrong and
   produces zero when its bits are anded with the freed mask (line
   145).

> Here, we have UNIT=16 (constant), we're considering a malloc(8) so
> sizeclass is 0 -> size_classes[0] = 1 -> size = 16, so we're looping
> on this condition:
>   while (span ^ (span + 15) < 4096)
> with span being a multiple of 16.
> 
> I'm not good at maths, but I'm fairly confindent this never gets false:
> 0x0ff0 ^ 0x0fff = f
> 0x1000 ^ 0x100f = f
> It could become true without the -1 (span+16), is that was intended ?
> (It works for other sizes because of the UNIT offset in size, so
> e.g. for size=32 we have 0x0ff0 ^ 0x100f which is 8191 alright, but
> that'd also work without the -1 as far as I can tell)
> 
> 
> Anyway, I'm not ruling out a problem in openrc yet -- rc_parallel is
> known to be buggy -- but looking at the mallocng ctx I can't see any
> obvious problem so I'm asking here first:
> Is that supposed to work? Now I've gotten it down so far it might be
> possible to build a reproducer; unfortunately I do not know how ctx was
> when it entered try_avail (the function entered the if (!mask) so the
> first entry was dequeued), but here's it's current state (please ask if
> you need anything else)
> ------
> (gdb) p __malloc_context            
> $94 = {
>   secret = 15756413639004407235,
>   init_done = 1,
>   mmap_counter = 135,
>   free_meta_head = 0x0,
>   avail_meta = 0x18a3f70,
>   avail_meta_count = 6,
>   avail_meta_area_count = 0,
>   meta_alloc_shift = 0,
>   meta_area_head = 0x18a3000,
>   meta_area_tail = 0x18a3000,
>   avail_meta_areas = 0x18a4000 <error: Cannot access memory at address 0x18a4000>,
>   active = {0x18a3e98, 0x18a3eb0, 0x18a3208, 0x18a3280, 0x0, 0x0, 0x0, 0x18a31c0, 0x0, 0x0, 0x0, 0x18a3148, 0x0, 0x0, 0x0, 0x18a3dd8, 0x0, 0x0, 0x0, 0x18a3d90, 0x0, 
>     0x18a31f0, 0x0, 0x18a3b68, 0x0, 0x18a3f28, 0x0, 0x0, 0x0, 0x18a3238, 0x0 <repeats 18 times>},
>   usage_by_class = {2580, 600, 10, 7, 0 <repeats 11 times>, 96, 0, 0, 0, 20, 0, 3, 0, 8, 0, 3, 0, 0, 0, 3, 0 <repeats 18 times>},
>   unmap_seq = '\000' <repeats 31 times>,
>   bounces = '\000' <repeats 18 times>, "w", '\000' <repeats 12 times>,
>   seq = 1 '\001',
>   brk = 25837568
> }
> (gdb) p *__malloc_context->active[0]
> $95 = {
>   prev = 0x18a3f40,
>   next = 0x18a3e80,
>   mem = 0xb6f57b30,
>   avail_mask = 1073741822,
>   freed_mask = 0,
>   last_idx = 29,
>   freeable = 1,
>   sizeclass = 0,
>   maplen = 0
> }
> (gdb) p *__malloc_context->active[0]->mem
> $97 = {
>   meta = 0x18a3e98,
>   active_idx = 29 '\035',
>   pad = "\000\000\000\000\000\000\000\000\377\000",
>   storage = 0xb6f57b40 ""
> }

This is really weird, because at the point of the infinite loop, the
new group should not yet be activated (line 163), so
__malloc_context->active[0] should still point to the old active
group. But its avail_mask has all bits set and active_idx is not
corrupted, so try_avail should just have obtained an available slot
from it without ever entering the block at line 120. So I'm confused
how it got to the loop.

One odd thing I noticed is that the backtrace pm=0xb6f692e8 does not
match the __malloc_context->active[0] address. Were thse from
different runs?

> ------
> 
> 
> I'm leaving that process stuck in limbo so I can get more infos for a
> few days.
> 
> Meanwhile we'll also be trying to see if the following patch helps --
> given we do not have a good reproducer there won't be any feedback on
> this side for a while.
> ----
> >From f46bbe3117674becb3c5e4ffb7c8f4e4776084db Mon Sep 17 00:00:00 2001
> From: Dominique Martinet <dominique.martinet@atmark-techno.com>
> Date: Mon, 23 Jan 2023 10:12:55 +0900
> Subject: [PATCH] mallocng: try_avail: fix infinite loop
> 
> sizeclass=0 has span being a multiple of size = 16, which will
> never stop having "(span^(span+size-1)) < 4096".
> Removing the -1 makes that ending condition reachable for all
> class sizes.
> ---
>  src/malloc/mallocng/malloc.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/malloc/mallocng/malloc.c b/src/malloc/mallocng/malloc.c
> index d695ab8ec982..9ef0b176c31e 100644
> --- a/src/malloc/mallocng/malloc.c
> +++ b/src/malloc/mallocng/malloc.c
> @@ -151,7 +151,7 @@ static uint32_t try_avail(struct meta **pm)
>  				int size = size_classes[m->sizeclass]*UNIT;
>  				int span = UNIT + size*cnt;
>  				// activate up to next 4k boundary
> -				while ((span^(span+size-1)) < 4096) {
> +				while ((span^(span+size)) < 4096) {
>  					cnt++;
>  					span += size;
>  				}
> ----
> 
> Thank you,
> -- 
> Dominique Martinet

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [musl] infinite loop in mallocng's try_avail
  2023-01-24  8:37 ` Rich Felker
@ 2023-01-25  0:33   ` Dominique MARTINET
  2023-01-25  5:53     ` Rich Felker
  0 siblings, 1 reply; 6+ messages in thread
From: Dominique MARTINET @ 2023-01-25  0:33 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

Thanks for the reply,

Rich Felker wrote on Tue, Jan 24, 2023 at 03:37:48AM -0500:
> > (this is musl 1.2.4 with a couple of patchs, none around malloc:

(I had meant 1.2.3)

> > https://gitlab.alpinelinux.org/alpine/aports/-/tree/3.17-stable/main/musl
> > )
> > 
> > For convenience, I've copied the incriminated loop here:
> >         int cnt = m->mem->active_idx + 2;
> >         int size = size_classes[m->sizeclass]*UNIT;
> >         int span = UNIT + size*cnt;
> >         // activate up to next 4k boundary
> >         while ((span^(span+size-1)) < 4096) {
> >                 cnt++;
> >                 span += size;
> >         }
> 
> This code should not be reachable for size class 0 or any size class
> allocated inside a larger-size-class slot.
> That case has active_idx = cnt-1 (set at line 272).

I figured that it might be "normally" unreachable but did not see why,
thanks for confirming that intention.

> If this code is being reached, either the allocator state has been
> corrupted by some UB in the application, or there's a logic bug in
> mallocng. The sequence of events that seem to have to happen to get
> there are:
> 
> 1. Previously active group has no more available slots (line 120).

Right, that one has already likely been dequeued (or at least
traversed), so I do not see how to look at it but that sounds possible.

> 2. Freed mask of newly activating group (line 131 or 138) is either
>    zero (line 145) or the active_idx (read from in-band memory
>    susceptible to application buffer overflows etc) is wrong and
>    produces zero when its bits are anded with the freed mask (line
>    145).

m->freed_mask looks like it is zero from values below; I cannot tell if
that comes from a corruption outside of musl or not.

> > (gdb) p __malloc_context            
> > $94 = {
> >   secret = 15756413639004407235,
> >   init_done = 1,
> >   mmap_counter = 135,
> >   free_meta_head = 0x0,
> >   avail_meta = 0x18a3f70,
> >   avail_meta_count = 6,
> >   avail_meta_area_count = 0,
> >   meta_alloc_shift = 0,
> >   meta_area_head = 0x18a3000,
> >   meta_area_tail = 0x18a3000,
> >   avail_meta_areas = 0x18a4000 <error: Cannot access memory at address 0x18a4000>,
> >   active = {0x18a3e98, 0x18a3eb0, 0x18a3208, 0x18a3280, 0x0, 0x0, 0x0, 0x18a31c0, 0x0, 0x0, 0x0, 0x18a3148, 0x0, 0x0, 0x0, 0x18a3dd8, 0x0, 0x0, 0x0, 0x18a3d90, 0x0, 
> >     0x18a31f0, 0x0, 0x18a3b68, 0x0, 0x18a3f28, 0x0, 0x0, 0x0, 0x18a3238, 0x0 <repeats 18 times>},
> >   usage_by_class = {2580, 600, 10, 7, 0 <repeats 11 times>, 96, 0, 0, 0, 20, 0, 3, 0, 8, 0, 3, 0, 0, 0, 3, 0 <repeats 18 times>},
> >   unmap_seq = '\000' <repeats 31 times>,
> >   bounces = '\000' <repeats 18 times>, "w", '\000' <repeats 12 times>,
> >   seq = 1 '\001',
> >   brk = 25837568
> > }
> > (gdb) p *__malloc_context->active[0]
> > $95 = {
> >   prev = 0x18a3f40,
> >   next = 0x18a3e80,
> >   mem = 0xb6f57b30,
> >   avail_mask = 1073741822,
> >   freed_mask = 0,
> >   last_idx = 29,
> >   freeable = 1,
> >   sizeclass = 0,
> >   maplen = 0
> > }
> > (gdb) p *__malloc_context->active[0]->mem
> > $97 = {
> >   meta = 0x18a3e98,
> >   active_idx = 29 '\035',
> >   pad = "\000\000\000\000\000\000\000\000\377\000",
> >   storage = 0xb6f57b40 ""
> > }
> 
> This is really weird, because at the point of the infinite loop, the
> new group should not yet be activated (line 163), so
> __malloc_context->active[0] should still point to the old active
> group. But its avail_mask has all bits set and active_idx is not
> corrupted, so try_avail should just have obtained an available slot
> from it without ever entering the block at line 120. So I'm confused
> how it got to the loop.

try_avail's pm is `__malloc_context->active[0]`, which is overwritten by
either dequeue(pm, m) or *pm = m (lines 123,128), so the original
m->avail_mask could have been zero, with the next element having a zero
freed mask?

I'm really not familiar with the slot managemnet logic here, that might
not normally be possible without corruption, but the structures look
fairly sensible to me... Not that it proves there wasn't some sort of
outside corruption, I wish this was easier to reproduce so I could just
run it in valgrind or asan to detect overflows...

> One odd thing I noticed is that the backtrace pm=0xb6f692e8 does not
> match the __malloc_context->active[0] address. Were thse from
> different runs?

These were from the same run, I've only observed this single occurence
first-hand.

pm is &__malloc_context->active[0], so it's not 0x18a3e98 (first value
of active) but its address (e.g. __malloc_context+48 as per gdb symbol
resolution in the backtrace)
I didn't print __malloc_context but I don't see why gdb would have
gotten that wrong.


Cheers,
-- 
Dominique

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [musl] infinite loop in mallocng's try_avail
  2023-01-25  0:33   ` Dominique MARTINET
@ 2023-01-25  5:53     ` Rich Felker
  2023-01-25  6:48       ` Dominique MARTINET
  0 siblings, 1 reply; 6+ messages in thread
From: Rich Felker @ 2023-01-25  5:53 UTC (permalink / raw)
  To: Dominique MARTINET; +Cc: musl

On Wed, Jan 25, 2023 at 09:33:52AM +0900, Dominique MARTINET wrote:
> > If this code is being reached, either the allocator state has been
> > corrupted by some UB in the application, or there's a logic bug in
> > mallocng. The sequence of events that seem to have to happen to get
> > there are:
> > 
> > 1. Previously active group has no more available slots (line 120).
> 
> Right, that one has already likely been dequeued (or at least
> traversed), so I do not see how to look at it but that sounds possible.
> 
> > 2. Freed mask of newly activating group (line 131 or 138) is either
> >    zero (line 145) or the active_idx (read from in-band memory
> >    susceptible to application buffer overflows etc) is wrong and
> >    produces zero when its bits are anded with the freed mask (line
> >    145).
> 
> m->freed_mask looks like it is zero from values below; I cannot tell if
> that comes from a corruption outside of musl or not.
> 
> > > (gdb) p __malloc_context            
> > > $94 = {
> > >   secret = 15756413639004407235,
> > >   init_done = 1,
> > >   mmap_counter = 135,
> > >   free_meta_head = 0x0,
> > >   avail_meta = 0x18a3f70,
> > >   avail_meta_count = 6,
> > >   avail_meta_area_count = 0,
> > >   meta_alloc_shift = 0,
> > >   meta_area_head = 0x18a3000,
> > >   meta_area_tail = 0x18a3000,
> > >   avail_meta_areas = 0x18a4000 <error: Cannot access memory at address 0x18a4000>,
> > >   active = {0x18a3e98, 0x18a3eb0, 0x18a3208, 0x18a3280, 0x0, 0x0, 0x0, 0x18a31c0, 0x0, 0x0, 0x0, 0x18a3148, 0x0, 0x0, 0x0, 0x18a3dd8, 0x0, 0x0, 0x0, 0x18a3d90, 0x0, 
> > >     0x18a31f0, 0x0, 0x18a3b68, 0x0, 0x18a3f28, 0x0, 0x0, 0x0, 0x18a3238, 0x0 <repeats 18 times>},
> > >   usage_by_class = {2580, 600, 10, 7, 0 <repeats 11 times>, 96, 0, 0, 0, 20, 0, 3, 0, 8, 0, 3, 0, 0, 0, 3, 0 <repeats 18 times>},
> > >   unmap_seq = '\000' <repeats 31 times>,
> > >   bounces = '\000' <repeats 18 times>, "w", '\000' <repeats 12 times>,
> > >   seq = 1 '\001',
> > >   brk = 25837568
> > > }
> > > (gdb) p *__malloc_context->active[0]
> > > $95 = {
> > >   prev = 0x18a3f40,
> > >   next = 0x18a3e80,
> > >   mem = 0xb6f57b30,
> > >   avail_mask = 1073741822,
> > >   freed_mask = 0,
> > >   last_idx = 29,
> > >   freeable = 1,
> > >   sizeclass = 0,
> > >   maplen = 0
> > > }
> > > (gdb) p *__malloc_context->active[0]->mem
> > > $97 = {
> > >   meta = 0x18a3e98,
> > >   active_idx = 29 '\035',
> > >   pad = "\000\000\000\000\000\000\000\000\377\000",
> > >   storage = 0xb6f57b40 ""
> > > }
> > 
> > This is really weird, because at the point of the infinite loop, the
> > new group should not yet be activated (line 163), so
> > __malloc_context->active[0] should still point to the old active
> > group. But its avail_mask has all bits set and active_idx is not
> > corrupted, so try_avail should just have obtained an available slot
> > from it without ever entering the block at line 120. So I'm confused
> > how it got to the loop.
> 
> try_avail's pm is `__malloc_context->active[0]`, which is overwritten by
> either dequeue(pm, m) or *pm = m (lines 123,128), so the original
> m->avail_mask could have been zero, with the next element having a zero
> freed mask?

No, avail_mask is only supposed to be able to be nonzero after
activate_group, which is only called on the head of an active list
(free.c:86 or malloc.c:163) and which atomically pulls bits off
freed_mask to move them to avail_mask. If we're observing avail_mask
nonzero at the point you saw it, some invariant seems to have been
violated.

> > One odd thing I noticed is that the backtrace pm=0xb6f692e8 does not
> > match the __malloc_context->active[0] address. Were thse from
> > different runs?
> 
> These were from the same run, I've only observed this single occurence
> first-hand.
> 
> pm is &__malloc_context->active[0], so it's not 0x18a3e98 (first value
> of active) but its address (e.g. __malloc_context+48 as per gdb symbol
> resolution in the backtrace)
> I didn't print __malloc_context but I don't see why gdb would have
> gotten that wrong.

Ah, I forgot I was looking at an additional level of indirection here.
It would be nice to know if m is the same active[0] as at entry; that
would help figure out where things went wrong...

Rich

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [musl] infinite loop in mallocng's try_avail
  2023-01-25  5:53     ` Rich Felker
@ 2023-01-25  6:48       ` Dominique MARTINET
  2023-01-27  6:20         ` Dominique MARTINET
  0 siblings, 1 reply; 6+ messages in thread
From: Dominique MARTINET @ 2023-01-25  6:48 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

Rich Felker wrote on Wed, Jan 25, 2023 at 12:53:23AM -0500:
> > > This is really weird, because at the point of the infinite loop, the
> > > new group should not yet be activated (line 163), so
> > > __malloc_context->active[0] should still point to the old active
> > > group. But its avail_mask has all bits set and active_idx is not
> > > corrupted, so try_avail should just have obtained an available slot
> > > from it without ever entering the block at line 120. So I'm confused
> > > how it got to the loop.
> > 
> > try_avail's pm is `__malloc_context->active[0]`, which is overwritten by
> > either dequeue(pm, m) or *pm = m (lines 123,128), so the original
> > m->avail_mask could have been zero, with the next element having a zero
> > freed mask?
> 
> No, avail_mask is only supposed to be able to be nonzero after
> activate_group, which is only called on the head of an active list
> (free.c:86 or malloc.c:163) and which atomically pulls bits off
> freed_mask to move them to avail_mask. If we're observing avail_mask
> nonzero at the point you saw it, some invariant seems to have been
> violated.

Ok, so if I understood that correctly, the second item in the list
should not have avail_mask set, so it having a non-zero value after we
poped the first element is unexpected?

The avail_mask value is 1073741822, which from a naive interpretation
looks a lot like it got through alloc_group (2<<29 -1), 29 being its
last_idx which match up with active_idx (line 272 and 280), then
perhaps alloc_slot (avail_mask--) to make up for that extra -1... But
that does not explain how it got in second place in the active list, and
guessing in a code base I haven't even fully read will only get so far.


Alright, it does not look like we can use more informations from the
currently pending process, I will try to get more traces.

I'll add a circular buffer to log things like the active[0] at entry and
its mask values, then set my board up to reproduce again, which will
probably bring us to next Monday.

If there is anything else you'd like to see, please ask.

-- 
Dominique Martinet

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [musl] infinite loop in mallocng's try_avail
  2023-01-25  6:48       ` Dominique MARTINET
@ 2023-01-27  6:20         ` Dominique MARTINET
  0 siblings, 0 replies; 6+ messages in thread
From: Dominique MARTINET @ 2023-01-27  6:20 UTC (permalink / raw)
  To: Rich Felker; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 1496 bytes --]

Dominique MARTINET wrote on Wed, Jan 25, 2023 at 03:48:37PM +0900:
> I'll add a circular buffer to log things like the active[0] at entry and
> its mask values, then set my board up to reproduce again, which will
> probably bring us to next Monday.

I've reproduced with that, it seems to confirm that we entered
try_avail() with m->avail == 0 and the next element had freed == 0...
(format: '__func__ (__LINE__): <text>', m is printed with %p,
masks with %x -- lines moved due to the debug statements, I've attached
both the patch and full log to this mail for history, however ugly the
code is)

In particular, m->next is logged as identical to m here, but when
looking at gdb "almost immediately" after we can see that m->next isn't
m anymore:
----
alloc_slot (324): 0x2436f40: avail 0, freed 0
try_avail (145): new m: 0x2436f88, avail 3ffffffe, freed 0
try_avail (171): mask 0, mem active_idx: 29, m/m->next 0x2436f88/0x2436f88
try_avail (178): BUGGED
(gdb) p (*pm)      
$6 = (struct meta *) 0x2436f88
(gdb) p (*pm)->next
$8 = (struct meta *) 0x2436ee0
----

This is on a single core arm board (i.MX6 ULL), so there should be no
room for cache problems, and there aren't any thread, but... openrc
handles SIGCHLD, and I just confirmed it calls free() in its signal
handler.....

Since malloc/free aren't signal-safe, that explains everything we've
seen and it's a bug I can now fix in openrc (also quite recomforting to
confirm this isn't a musl bug)

Thank you for your help!
-- 
Dominique

[-- Attachment #2: 0001-mallocng-debug-statements.patch --]
[-- Type: text/x-diff, Size: 3416 bytes --]

From d95eba3d44b7e3154fc2a89755494f80d49e0e59 Mon Sep 17 00:00:00 2001
From: Dominique Martinet <dominique.martinet@atmark-techno.com>
Date: Wed, 25 Jan 2023 16:19:37 +0900
Subject: [PATCH] mallocng debug statements

---
 src/malloc/mallocng/malloc.c | 43 ++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/src/malloc/mallocng/malloc.c b/src/malloc/mallocng/malloc.c
index d695ab8ec982..99855e7e0bd9 100644
--- a/src/malloc/mallocng/malloc.c
+++ b/src/malloc/mallocng/malloc.c
@@ -39,6 +39,24 @@ static const uint8_t med_cnt_tab[4] = { 28, 24, 20, 32 };
 
 struct malloc_context ctx = { 0 };
 
+char dbg2_buf[1024*3] = { 0 };
+size_t dbg2_off = 0;
+
+#define dbg2(fmt, args...) do { \
+	if (dbg2_off > sizeof(dbg2_buf) - 100) dbg2_off = 0; \
+	dbg2_off += snprintf(dbg2_buf + dbg2_off, sizeof(dbg2_buf) - dbg2_off - 1, \
+		 "%s (%d): " fmt "\n", __func__, __LINE__, ##args); \
+} while (0)
+
+char dbg_buf[1024*100] = { 0 };
+size_t dbg_off = 0;
+
+#define dbg(fmt, args...) do { \
+	if (dbg_off > sizeof(dbg_buf) - 200) dbg_off = 0; \
+	dbg_off += snprintf(dbg_buf + dbg_off, sizeof(dbg_buf) - dbg_off - 1, \
+		 "%s (%d): " fmt "\n", __func__, __LINE__, ##args); \
+} while (0)
+
 struct meta *alloc_meta(void)
 {
 	struct meta *m;
@@ -123,9 +141,13 @@ static uint32_t try_avail(struct meta **pm)
 			dequeue(pm, m);
 			m = *pm;
 			if (!m) return 0;
+			if (m->sizeclass == 0)
+				dbg("new m: %p, avail %x, freed %x", m, m->avail_mask, m->freed_mask);
 		} else {
 			m = m->next;
 			*pm = m;
+			if (m->sizeclass == 0)
+				dbg("new m: %p, avail %x, freed %x", m, m->avail_mask, m->freed_mask);
 		}
 
 		mask = m->freed_mask;
@@ -136,6 +158,8 @@ static uint32_t try_avail(struct meta **pm)
 			m = m->next;
 			*pm = m;
 			mask = m->freed_mask;
+			if (m->sizeclass == 0)
+				dbg("new m: %p, avail %x, freed %x", m, m->avail_mask, m->freed_mask);
 		}
 
 		// activate more slots in a not-fully-active group
@@ -143,10 +167,19 @@ static uint32_t try_avail(struct meta **pm)
 		// any other group with free slots. this avoids
 		// touching & dirtying as-yet-unused pages.
 		if (!(mask & ((2u<<m->mem->active_idx)-1))) {
+			if (m->sizeclass == 0) {
+				dbg("mask %x, mem active_idx: %d, m/m->next %p/%p", mask, m->mem->active_idx, m, m->next);
+			}
 			if (m->next != m) {
 				m = m->next;
 				*pm = m;
 			} else {
+				if (m->sizeclass == 0) {
+					dbg("BUGGED");
+					char msg[] = "\n\nSHOULD NEVER GET HERE!\n\n\n";
+					write(2, msg, sizeof(msg));
+					while(1);
+				}
 				int cnt = m->mem->active_idx + 2;
 				int size = size_classes[m->sizeclass]*UNIT;
 				int span = UNIT + size*cnt;
@@ -280,11 +313,18 @@ static struct meta *alloc_group(int sc, size_t req)
 	m->last_idx = cnt-1;
 	m->freeable = 1;
 	m->sizeclass = sc;
+	dbg2("%p: sc %d idx %d", m, sc, active_idx);
+	dbg("%p: sc %d idx %d", m, sc, active_idx);
 	return m;
 }
 
 static int alloc_slot(int sc, size_t req)
 {
+	if (sc == 0) {
+		dbg("%p: avail %x, freed %x", ctx.active[sc],
+		    ctx.active[sc] ? ctx.active[sc]->avail_mask : 0,
+		    ctx.active[sc] ? ctx.active[sc]->freed_mask : 0);
+	}
 	uint32_t first = try_avail(&ctx.active[sc]);
 	if (first) return a_ctz_32(first);
 
@@ -293,6 +333,9 @@ static int alloc_slot(int sc, size_t req)
 
 	g->avail_mask--;
 	queue(&ctx.active[sc], g);
+	if (sc == 0) {
+		dbg("%p: avail %x", g, g->avail_mask);
+	}
 	return 0;
 }
 
-- 
2.39.0


[-- Attachment #3: dbg.log.xz --]
[-- Type: application/x-xz, Size: 3356 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-01-27  6:21 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-23  1:33 [musl] infinite loop in mallocng's try_avail Dominique MARTINET
2023-01-24  8:37 ` Rich Felker
2023-01-25  0:33   ` Dominique MARTINET
2023-01-25  5:53     ` Rich Felker
2023-01-25  6:48       ` Dominique MARTINET
2023-01-27  6:20         ` Dominique MARTINET

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).