[RFC PATCH] reduce severity of ldso reclaim

mailing list of musl libc
 help / color / mirror / code / Atom feed

* [RFC PATCH] reduce severity of ldso reclaim_gaps hack
@ 2017-06-28 16:52 Alexander Monakov
  2018-04-11 20:10 ` Rich Felker
  0 siblings, 1 reply; 6+ messages in thread
From: Alexander Monakov @ 2017-06-28 16:52 UTC (permalink / raw)
  To: musl; +Cc: Timo Teräs

---
In part inspired by an earlier patch by Timo Teräs.

Without --gc-sections, __malloc_donate is dead weight in static linking. This
can be solved by moving it to a separate translation unit (but for that matter,
realloc is much heavier and can be factored out too).

Extra jumps from 'free' to 'bin_chunk' and their non-contiguous code layout is
admittedly a problem, but I believe the impact is miniscule. As a minor
consolation, entries to bin_chunk from malloc now require fewer branches, and
hopefully new code is easier to follow.

Alexander

 ldso/dynlink.c      | 15 +++--------
 src/malloc/malloc.c | 71 ++++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index d20dbd87..985592ce 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -475,23 +475,14 @@ static void redo_lazy_relocs()
 /* A huge hack: to make up for the wastefulness of shared libraries
  * needing at least a page of dirty memory even if they have no global
  * data, we reclaim the gaps at the beginning and end of writable maps
- * and "donate" them to the heap by setting up minimal malloc
- * structures and then freeing them. */
+ * and "donate" them to the heap. */
 
 static void reclaim(struct dso *dso, size_t start, size_t end)
 {
-	size_t *a, *z;
+	void __malloc_donate(char *, char *);
 	if (start >= dso->relro_start && start < dso->relro_end) start = dso->relro_end;
 	if (end   >= dso->relro_start && end   < dso->relro_end) end = dso->relro_start;
-	start = start + 6*sizeof(size_t)-1 & -4*sizeof(size_t);
-	end = (end & -4*sizeof(size_t)) - 2*sizeof(size_t);
-	if (start>end || end-start < 4*sizeof(size_t)) return;
-	a = laddr(dso, start);
-	z = laddr(dso, end);
-	a[-2] = 1;
-	a[-1] = z[0] = end-start + 2*sizeof(size_t) | 1;
-	z[1] = 1;
-	free(a);
+	__malloc_donate(laddr(dso, start), laddr(dso, end));
 }
 
 static void reclaim_gaps(struct dso *dso)
diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c
index ef4c7368..b56fdaa2 100644
--- a/src/malloc/malloc.c
+++ b/src/malloc/malloc.c
@@ -299,6 +299,8 @@ static int pretrim(struct chunk *self, size_t n, int i, int j)
 	return 1;
 }
 
+static void bin_chunk(struct chunk *);
+
 static void trim(struct chunk *self, size_t n)
 {
 	size_t n1 = CHUNK_SIZE(self);
@@ -314,7 +316,7 @@ static void trim(struct chunk *self, size_t n)
 	next->psize = n1-n | C_INUSE;
 	self->csize = n | C_INUSE;
 
-	free(CHUNK_TO_MEM(split));
+	bin_chunk(split);
 }
 
 void *malloc(size_t n)
@@ -410,10 +412,9 @@ void *realloc(void *p, size_t n)
 		size_t newlen = n + extra;
 		/* Crash on realloc of freed chunk */
 		if (extra & 1) a_crash();
-		if (newlen < PAGE_SIZE && (new = malloc(n))) {
-			memcpy(new, p, n-OVERHEAD);
-			free(p);
-			return new;
+		if (newlen < PAGE_SIZE && (new = malloc(n-OVERHEAD))) {
+			n0 = n;
+			goto copy_free_ret;
 		}
 		newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE;
 		if (oldlen == newlen) return p;
@@ -456,34 +457,20 @@ copy_realloc:
 	/* As a last resort, allocate a new chunk and copy to it. */
 	new = malloc(n-OVERHEAD);
 	if (!new) return 0;
+copy_free_ret:
 	memcpy(new, p, n0-OVERHEAD);
 	free(CHUNK_TO_MEM(self));
 	return new;
 }
 
-void free(void *p)
+static void bin_chunk(struct chunk *self)
 {
-	struct chunk *self, *next;
+	struct chunk *next = NEXT_CHUNK(self);
 	size_t final_size, new_size, size;
 	int reclaim=0;
 	int i;
 
-	if (!p) return;
-
-	self = MEM_TO_CHUNK(p);
-
-	if (IS_MMAPPED(self)) {
-		size_t extra = self->psize;
-		char *base = (char *)self - extra;
-		size_t len = CHUNK_SIZE(self) + extra;
-		/* Crash on double free */
-		if (extra & 1) a_crash();
-		__munmap(base, len);
-		return;
-	}
-
 	final_size = new_size = CHUNK_SIZE(self);
-	next = NEXT_CHUNK(self);
 
 	/* Crash on corrupted footer (likely from buffer overflow) */
 	if (next->psize != self->csize) a_crash();
@@ -544,3 +531,43 @@ void free(void *p)
 
 	unlock_bin(i);
 }
+
+#if defined(__GNUC__)
+__attribute__((cold))
+#endif
+static void unmap_chunk(struct chunk *self)
+{
+	size_t extra = self->psize;
+	char *base = (char *)self - extra;
+	size_t len = CHUNK_SIZE(self) + extra;
+	/* Crash on double free */
+	if (extra & 1) a_crash();
+	__munmap(base, len);
+}
+
+void free(void *p)
+{
+	if (!p) return;
+
+	struct chunk *self = MEM_TO_CHUNK(p);
+
+	if (IS_MMAPPED(self))
+		unmap_chunk(self);
+	else
+		bin_chunk(self);
+}
+
+void __malloc_donate(char *start, char *end)
+{
+	if (end - start < 2*OVERHEAD + SIZE_ALIGN) return;
+	start += OVERHEAD + SIZE_ALIGN - 1;
+	start -= (uintptr_t)start & (SIZE_ALIGN - 1);
+	end -= (uintptr_t)end & (SIZE_ALIGN - 1);
+	if (end - start < OVERHEAD + SIZE_ALIGN) return;
+	if (end - start >= MMAP_THRESHOLD) return;
+
+	struct chunk *c = MEM_TO_CHUNK(start), *n = MEM_TO_CHUNK(end);
+	c->psize = n->csize = C_INUSE;
+	c->csize = n->psize = (end - start) | C_INUSE;
+	bin_chunk(c);
+}
-- 
2.11.0



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] reduce severity of ldso reclaim_gaps hack
  2017-06-28 16:52 [RFC PATCH] reduce severity of ldso reclaim_gaps hack Alexander Monakov
@ 2018-04-11 20:10 ` Rich Felker
  2018-04-11 21:19   ` Alexander Monakov
  0 siblings, 1 reply; 6+ messages in thread
From: Rich Felker @ 2018-04-11 20:10 UTC (permalink / raw)
  To: musl

On Wed, Jun 28, 2017 at 07:52:43PM +0300, Alexander Monakov wrote:
> ---
> In part inspired by an earlier patch by Timo Teräs.
> 
> Without --gc-sections, __malloc_donate is dead weight in static linking. This
> can be solved by moving it to a separate translation unit (but for that matter,
> realloc is much heavier and can be factored out too).
> 
> Extra jumps from 'free' to 'bin_chunk' and their non-contiguous code layout is
> admittedly a problem, but I believe the impact is miniscule. As a minor
> consolation, entries to bin_chunk from malloc now require fewer branches, and
> hopefully new code is easier to follow.

Sorry this lingered so long without review. I think it's good aside
from a few minor details, but I do have a few questions to make sure I
understand it, and that will hopefully reveal any problems if I'm
wrong.

>  ldso/dynlink.c      | 15 +++--------
>  src/malloc/malloc.c | 71 ++++++++++++++++++++++++++++++++++++-----------------
>  2 files changed, 52 insertions(+), 34 deletions(-)
> 
> diff --git a/ldso/dynlink.c b/ldso/dynlink.c
> index d20dbd87..985592ce 100644
> --- a/ldso/dynlink.c
> +++ b/ldso/dynlink.c
> @@ -475,23 +475,14 @@ static void redo_lazy_relocs()
>  /* A huge hack: to make up for the wastefulness of shared libraries
>   * needing at least a page of dirty memory even if they have no global
>   * data, we reclaim the gaps at the beginning and end of writable maps
> - * and "donate" them to the heap by setting up minimal malloc
> - * structures and then freeing them. */
> + * and "donate" them to the heap. */
>  
>  static void reclaim(struct dso *dso, size_t start, size_t end)
>  {
> -	size_t *a, *z;
> +	void __malloc_donate(char *, char *);
>  	if (start >= dso->relro_start && start < dso->relro_end) start = dso->relro_end;
>  	if (end   >= dso->relro_start && end   < dso->relro_end) end = dso->relro_start;
> -	start = start + 6*sizeof(size_t)-1 & -4*sizeof(size_t);
> -	end = (end & -4*sizeof(size_t)) - 2*sizeof(size_t);
> -	if (start>end || end-start < 4*sizeof(size_t)) return;
> -	a = laddr(dso, start);
> -	z = laddr(dso, end);
> -	a[-2] = 1;
> -	a[-1] = z[0] = end-start + 2*sizeof(size_t) | 1;
> -	z[1] = 1;
> -	free(a);
> +	__malloc_donate(laddr(dso, start), laddr(dso, end));
>  }

Looks ok.

>  static void reclaim_gaps(struct dso *dso)
> diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c
> index ef4c7368..b56fdaa2 100644
> --- a/src/malloc/malloc.c
> +++ b/src/malloc/malloc.c
> @@ -299,6 +299,8 @@ static int pretrim(struct chunk *self, size_t n, int i, int j)
>  	return 1;
>  }
>  
> +static void bin_chunk(struct chunk *);
> +
>  static void trim(struct chunk *self, size_t n)
>  {
>  	size_t n1 = CHUNK_SIZE(self);
> @@ -314,7 +316,7 @@ static void trim(struct chunk *self, size_t n)
>  	next->psize = n1-n | C_INUSE;
>  	self->csize = n | C_INUSE;
>  
> -	free(CHUNK_TO_MEM(split));
> +	bin_chunk(split);
>  }

Looks ok.

>  void *malloc(size_t n)
> @@ -410,10 +412,9 @@ void *realloc(void *p, size_t n)
>  		size_t newlen = n + extra;
>  		/* Crash on realloc of freed chunk */
>  		if (extra & 1) a_crash();
> -		if (newlen < PAGE_SIZE && (new = malloc(n))) {
> -			memcpy(new, p, n-OVERHEAD);
> -			free(p);
> -			return new;
> +		if (newlen < PAGE_SIZE && (new = malloc(n-OVERHEAD))) {
> +			n0 = n;
> +			goto copy_free_ret;
>  		}
>  		newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE;
>  		if (oldlen == newlen) return p;
> @@ -456,34 +457,20 @@ copy_realloc:
>  	/* As a last resort, allocate a new chunk and copy to it. */
>  	new = malloc(n-OVERHEAD);
>  	if (!new) return 0;
> +copy_free_ret:
>  	memcpy(new, p, n0-OVERHEAD);
>  	free(CHUNK_TO_MEM(self));
>  	return new;
>  }

This looks like an independent change that fixes a separate
slight-overallocation bug. Is it related?

> -void free(void *p)
> +static void bin_chunk(struct chunk *self)
>  {
> -	struct chunk *self, *next;
> +	struct chunk *next = NEXT_CHUNK(self);
>  	size_t final_size, new_size, size;
>  	int reclaim=0;
>  	int i;
>  
> -	if (!p) return;
> -
> -	self = MEM_TO_CHUNK(p);
> -
> -	if (IS_MMAPPED(self)) {
> -		size_t extra = self->psize;
> -		char *base = (char *)self - extra;
> -		size_t len = CHUNK_SIZE(self) + extra;
> -		/* Crash on double free */
> -		if (extra & 1) a_crash();
> -		__munmap(base, len);
> -		return;
> -	}
> -
>  	final_size = new_size = CHUNK_SIZE(self);
> -	next = NEXT_CHUNK(self);
>  
>  	/* Crash on corrupted footer (likely from buffer overflow) */
>  	if (next->psize != self->csize) a_crash();
> @@ -544,3 +531,43 @@ void free(void *p)
>  
>  	unlock_bin(i);
>  }
> +
> +#if defined(__GNUC__)
> +__attribute__((cold))
> +#endif

This can't be used as-is. It would need a configure check (gcc version
dependent) and __cold__ if we want to, but unless there's a strong
reason to include it I'd just omit it.

> +static void unmap_chunk(struct chunk *self)
> +{
> +	size_t extra = self->psize;
> +	char *base = (char *)self - extra;
> +	size_t len = CHUNK_SIZE(self) + extra;
> +	/* Crash on double free */
> +	if (extra & 1) a_crash();
> +	__munmap(base, len);
> +}
> +
> +void free(void *p)
> +{
> +	if (!p) return;
> +
> +	struct chunk *self = MEM_TO_CHUNK(p);
> +
> +	if (IS_MMAPPED(self))
> +		unmap_chunk(self);
> +	else
> +		bin_chunk(self);
> +}
> +
> +void __malloc_donate(char *start, char *end)
> +{

It's confusing to see that this is equivalent to what's being removed
from dynlink.c, but I think it may be correct.

> +	if (end - start < 2*OVERHEAD + SIZE_ALIGN) return;

This does not seem like a sufficient bound to ensure the block is
usable, but the next check after alignment may cover it.

> +	start += OVERHEAD + SIZE_ALIGN - 1;
> +	start -= (uintptr_t)start & (SIZE_ALIGN - 1);

This looks correct.

> +	end -= (uintptr_t)end & (SIZE_ALIGN - 1);

This does not subtract the OVERHEAD, but I think it's just a
notational difference; my "end" pointed to the end of the chunk to be
freed, and your "end" points to the beginning of the next
(nonexistent) chunk. The MEM_TO_CHUNK below should compensate.

> +	if (end - start < OVERHEAD + SIZE_ALIGN) return;

At this point, start and end both point just past a chunk header,
meaning they have to differ by a multiple of SIZE_ALIGN. I don't see
why OVERHEAD is needed here too. The check should be against
SIZE_ALIGN I think (although by alignment they're equivalent).

> +	if (end - start >= MMAP_THRESHOLD) return;

This does not seem necessary. Free chunks in the last bin can be
larger than MMAP_THRESHOLD; they're just broken up to satisfy
allocations. Of course it's unlikely to happen anyway.

> +	struct chunk *c = MEM_TO_CHUNK(start), *n = MEM_TO_CHUNK(end);
> +	c->psize = n->csize = C_INUSE;
> +	c->csize = n->psize = (end - start) | C_INUSE;
> +	bin_chunk(c);
> +}

This looks ok, and the "end-start" vs my "end-start+2*sizeof(size_t)"
matches the above differences.

Rich


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] reduce severity of ldso reclaim_gaps hack
  2018-04-11 20:10 ` Rich Felker
@ 2018-04-11 21:19   ` Alexander Monakov
  2018-04-12  4:04     ` Rich Felker
  0 siblings, 1 reply; 6+ messages in thread
From: Alexander Monakov @ 2018-04-11 21:19 UTC (permalink / raw)
  To: musl

> > @@ -410,10 +412,9 @@ void *realloc(void *p, size_t n)
> >  		size_t newlen = n + extra;
> >  		/* Crash on realloc of freed chunk */
> >  		if (extra & 1) a_crash();
> > -		if (newlen < PAGE_SIZE && (new = malloc(n))) {
> > -			memcpy(new, p, n-OVERHEAD);
> > -			free(p);
> > -			return new;
> > +		if (newlen < PAGE_SIZE && (new = malloc(n-OVERHEAD))) {
> > +			n0 = n;
> > +			goto copy_free_ret;
> >  		}
> >  		newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE;
> >  		if (oldlen == newlen) return p;
> > @@ -456,34 +457,20 @@ copy_realloc:
> >  	/* As a last resort, allocate a new chunk and copy to it. */
> >  	new = malloc(n-OVERHEAD);
> >  	if (!new) return 0;
> > +copy_free_ret:
> >  	memcpy(new, p, n0-OVERHEAD);
> >  	free(CHUNK_TO_MEM(self));
> >  	return new;
> >  }
> 
> This looks like an independent change that fixes a separate
> slight-overallocation bug. Is it related?

No, it would be nicer to commit it separately.

> > +#if defined(__GNUC__)
> > +__attribute__((cold))
> > +#endif
> 
> This can't be used as-is. It would need a configure check (gcc version
> dependent) and __cold__ if we want to, but unless there's a strong
> reason to include it I'd just omit it.

malloc.c is compiled with -O3, causing 'free' to be inlined in all local
callers. This combats code growth and should also slightly improve code
layout in the new 'free' (not that it would help much, of course).

> > +static void unmap_chunk(struct chunk *self)
> > +{
> > +	size_t extra = self->psize;
> > +	char *base = (char *)self - extra;
> > +	size_t len = CHUNK_SIZE(self) + extra;
> > +	/* Crash on double free */
> > +	if (extra & 1) a_crash();
> > +	__munmap(base, len);
> > +}
> > +
> > +void free(void *p)
> > +{
> > +	if (!p) return;
> > +
> > +	struct chunk *self = MEM_TO_CHUNK(p);
> > +
> > +	if (IS_MMAPPED(self))
> > +		unmap_chunk(self);
> > +	else
> > +		bin_chunk(self);
> > +}
> > +
> > +void __malloc_donate(char *start, char *end)
> > +{
> 
> It's confusing to see that this is equivalent to what's being removed
> from dynlink.c, but I think it may be correct.
> 
> > +	if (end - start < 2*OVERHEAD + SIZE_ALIGN) return;
> 
> This does not seem like a sufficient bound to ensure the block is
> usable, but the next check after alignment may cover it.

Yes, this is to ensure that we don't create invalid pointers when aligning,

> > +	start += OVERHEAD + SIZE_ALIGN - 1;
> > +	start -= (uintptr_t)start & (SIZE_ALIGN - 1);
> 
> This looks correct.
> 
> > +	end -= (uintptr_t)end & (SIZE_ALIGN - 1);
> 
> This does not subtract the OVERHEAD, but I think it's just a
> notational difference; my "end" pointed to the end of the chunk to be
> freed, and your "end" points to the beginning of the next
> (nonexistent) chunk. The MEM_TO_CHUNK below should compensate.
> 
> > +	if (end - start < OVERHEAD + SIZE_ALIGN) return;
> 
> At this point, start and end both point just past a chunk header,
> meaning they have to differ by a multiple of SIZE_ALIGN. I don't see
> why OVERHEAD is needed here too. The check should be against
> SIZE_ALIGN I think (although by alignment they're equivalent).

I don't recall if I had a specific reason to spell it like that.
 
> > +	if (end - start >= MMAP_THRESHOLD) return;
> 
> This does not seem necessary. Free chunks in the last bin can be
> larger than MMAP_THRESHOLD; they're just broken up to satisfy
> allocations. Of course it's unlikely to happen anyway.

Do such oversized chunks appear in normal operation? This seems non-obvious,
so a comment pointing that out would probably be helpful.

Thanks.
Alexander


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] reduce severity of ldso reclaim_gaps hack
  2018-04-11 21:19   ` Alexander Monakov
@ 2018-04-12  4:04     ` Rich Felker
  2018-04-12  6:40       ` Alexander Monakov
  0 siblings, 1 reply; 6+ messages in thread
From: Rich Felker @ 2018-04-12  4:04 UTC (permalink / raw)
  To: musl

On Thu, Apr 12, 2018 at 12:19:24AM +0300, Alexander Monakov wrote:
> > > @@ -410,10 +412,9 @@ void *realloc(void *p, size_t n)
> > >  		size_t newlen = n + extra;
> > >  		/* Crash on realloc of freed chunk */
> > >  		if (extra & 1) a_crash();
> > > -		if (newlen < PAGE_SIZE && (new = malloc(n))) {
> > > -			memcpy(new, p, n-OVERHEAD);
> > > -			free(p);
> > > -			return new;
> > > +		if (newlen < PAGE_SIZE && (new = malloc(n-OVERHEAD))) {
> > > +			n0 = n;
> > > +			goto copy_free_ret;
> > >  		}
> > >  		newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE;
> > >  		if (oldlen == newlen) return p;
> > > @@ -456,34 +457,20 @@ copy_realloc:
> > >  	/* As a last resort, allocate a new chunk and copy to it. */
> > >  	new = malloc(n-OVERHEAD);
> > >  	if (!new) return 0;
> > > +copy_free_ret:
> > >  	memcpy(new, p, n0-OVERHEAD);
> > >  	free(CHUNK_TO_MEM(self));
> > >  	return new;
> > >  }
> > 
> > This looks like an independent change that fixes a separate
> > slight-overallocation bug. Is it related?
> 
> No, it would be nicer to commit it separately.

OK.

> > > +#if defined(__GNUC__)
> > > +__attribute__((cold))
> > > +#endif
> > 
> > This can't be used as-is. It would need a configure check (gcc version
> > dependent) and __cold__ if we want to, but unless there's a strong
> > reason to include it I'd just omit it.
> 
> malloc.c is compiled with -O3, causing 'free' to be inlined in all local
> callers. This combats code growth and should also slightly improve code
> layout in the new 'free' (not that it would help much, of course).

I'd like to actually do away with the -O3, so maybe now is a good time
to look at that, at least for malloc. I think the main reason it was
used is already covered by the always_inline, which was to avoid GOT
register reloads at each call frame level on i386 and similar archs.
I suspect it might turn out better to just use -O2 instead of -Os
globally (possibly with -falign-*=1) than doing the per-dir/glob stuff
we're doing now, especially on modern gcc versions, but -O3 might
still be preferable for string functions if we want to get
vectorization there.

Of course I don't want to hold your patch up on this issue, but I
think we can just omit the cold with a plan to investigate -O level
and inlining issues soon.

> > > +static void unmap_chunk(struct chunk *self)
> > > +{
> > > +	size_t extra = self->psize;
> > > +	char *base = (char *)self - extra;
> > > +	size_t len = CHUNK_SIZE(self) + extra;
> > > +	/* Crash on double free */
> > > +	if (extra & 1) a_crash();
> > > +	__munmap(base, len);
> > > +}
> > > +
> > > +void free(void *p)
> > > +{
> > > +	if (!p) return;
> > > +
> > > +	struct chunk *self = MEM_TO_CHUNK(p);
> > > +
> > > +	if (IS_MMAPPED(self))
> > > +		unmap_chunk(self);
> > > +	else
> > > +		bin_chunk(self);
> > > +}
> > > +
> > > +void __malloc_donate(char *start, char *end)
> > > +{
> > 
> > It's confusing to see that this is equivalent to what's being removed
> > from dynlink.c, but I think it may be correct.
> > 
> > > +	if (end - start < 2*OVERHEAD + SIZE_ALIGN) return;
> > 
> > This does not seem like a sufficient bound to ensure the block is
> > usable, but the next check after alignment may cover it.
> 
> Yes, this is to ensure that we don't create invalid pointers when aligning,

OK.

> > > +	start += OVERHEAD + SIZE_ALIGN - 1;
> > > +	start -= (uintptr_t)start & (SIZE_ALIGN - 1);
> > 
> > This looks correct.
> > 
> > > +	end -= (uintptr_t)end & (SIZE_ALIGN - 1);
> > 
> > This does not subtract the OVERHEAD, but I think it's just a
> > notational difference; my "end" pointed to the end of the chunk to be
> > freed, and your "end" points to the beginning of the next
> > (nonexistent) chunk. The MEM_TO_CHUNK below should compensate.
> > 
> > > +	if (end - start < OVERHEAD + SIZE_ALIGN) return;
> > 
> > At this point, start and end both point just past a chunk header,
> > meaning they have to differ by a multiple of SIZE_ALIGN. I don't see
> > why OVERHEAD is needed here too. The check should be against
> > SIZE_ALIGN I think (although by alignment they're equivalent).

I think I was wrong about them being equivalent, but...

> I don't recall if I had a specific reason to spell it like that.

...end-start==SIZE_ALIGN is certainly a valid free chunk size
(yielding SIZE_ALIGN-OVERHEAD == OVERHEAD bytes of storage), albeit
not terribly helpful.

> > > +	if (end - start >= MMAP_THRESHOLD) return;
> > 
> > This does not seem necessary. Free chunks in the last bin can be
> > larger than MMAP_THRESHOLD; they're just broken up to satisfy
> > allocations. Of course it's unlikely to happen anyway.
> 
> Do such oversized chunks appear in normal operation? This seems non-obvious,
> so a comment pointing that out would probably be helpful.

The only way I could see it happening is on an arch ABI that allows
very large pages (and has the ELF load segments aligned accordingly,
as x86_64 does). In this case if the kernel/hardware only supported
large (e.g. 2MB) pages, you'd pretty much always end up with >1.5MB of
reclaimed space per DSO. IMO this is an awful kernel/hardware
constraint to have, very wasteful, but it's exactly the situation
where you'd most care about the gaps getting reclaimed for something
useful.

Rich


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] reduce severity of ldso reclaim_gaps hack
  2018-04-12  4:04     ` Rich Felker
@ 2018-04-12  6:40       ` Alexander Monakov
  2018-04-12 13:26         ` Rich Felker
  0 siblings, 1 reply; 6+ messages in thread
From: Alexander Monakov @ 2018-04-12  6:40 UTC (permalink / raw)
  To: musl

On Thu, 12 Apr 2018, Rich Felker wrote:
> > > This does not seem necessary. Free chunks in the last bin can be
> > > larger than MMAP_THRESHOLD; they're just broken up to satisfy
> > > allocations. Of course it's unlikely to happen anyway.
> > 
> > Do such oversized chunks appear in normal operation? This seems non-obvious,
> > so a comment pointing that out would probably be helpful.
> 
> The only way I could see it happening is on an arch ABI that allows
> very large pages (and has the ELF load segments aligned accordingly,
> as x86_64 does). In this case if the kernel/hardware only supported
> large (e.g. 2MB) pages, you'd pretty much always end up with >1.5MB of
> reclaimed space per DSO. IMO this is an awful kernel/hardware
> constraint to have, very wasteful, but it's exactly the situation
> where you'd most care about the gaps getting reclaimed for something
> useful.

What I meant to ask is: apart from chunks created via reclaim_gaps, can
such oversized chunks appear as a result of malloc-family calls invoked
by the program?

Alexander


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] reduce severity of ldso reclaim_gaps hack
  2018-04-12  6:40       ` Alexander Monakov
@ 2018-04-12 13:26         ` Rich Felker
  0 siblings, 0 replies; 6+ messages in thread
From: Rich Felker @ 2018-04-12 13:26 UTC (permalink / raw)
  To: musl

On Thu, Apr 12, 2018 at 09:40:23AM +0300, Alexander Monakov wrote:
> On Thu, 12 Apr 2018, Rich Felker wrote:
> > > > This does not seem necessary. Free chunks in the last bin can be
> > > > larger than MMAP_THRESHOLD; they're just broken up to satisfy
> > > > allocations. Of course it's unlikely to happen anyway.
> > > 
> > > Do such oversized chunks appear in normal operation? This seems non-obvious,
> > > so a comment pointing that out would probably be helpful.
> > 
> > The only way I could see it happening is on an arch ABI that allows
> > very large pages (and has the ELF load segments aligned accordingly,
> > as x86_64 does). In this case if the kernel/hardware only supported
> > large (e.g. 2MB) pages, you'd pretty much always end up with >1.5MB of
> > reclaimed space per DSO. IMO this is an awful kernel/hardware
> > constraint to have, very wasteful, but it's exactly the situation
> > where you'd most care about the gaps getting reclaimed for something
> > useful.
> 
> What I meant to ask is: apart from chunks created via reclaim_gaps, can
> such oversized chunks appear as a result of malloc-family calls invoked
> by the program?

Yes, simple:

void *p[1000];
for (i=0; i<1000; i++) p[i]=malloc(1000);
for (i=0; i<1000; i++) free(p[i]);

Rich


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-04-12 13:26 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-28 16:52 [RFC PATCH] reduce severity of ldso reclaim_gaps hack Alexander Monakov
2018-04-11 20:10 ` Rich Felker
2018-04-11 21:19   ` Alexander Monakov
2018-04-12  4:04     ` Rich Felker
2018-04-12  6:40       ` Alexander Monakov
2018-04-12 13:26         ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).