[PATCH 1/2] malloc: fix an over-allocation bug

mailing list of musl libc
 help / color / mirror / code / Atom feed

* [PATCH 1/2] malloc: fix an over-allocation bug
@ 2018-04-16 17:54 Alexander Monakov
  2018-04-16 17:54 ` [PATCH 2/2] ldso, malloc: implement reclaim_gaps via __malloc_donate Alexander Monakov
  2018-04-16 22:40 ` [PATCH 1/2] malloc: fix an over-allocation bug Rich Felker
  0 siblings, 2 replies; 10+ messages in thread
From: Alexander Monakov @ 2018-04-16 17:54 UTC (permalink / raw)
  To: musl

Fix an instance where realloc code would overallocate by OVERHEAD bytes
amount. Manually arrange for reuse of memcpy-free-return exit sequence.
---
 src/malloc/malloc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c
index 9e05e1d6..1af4ae5a 100644
--- a/src/malloc/malloc.c
+++ b/src/malloc/malloc.c
@@ -397,10 +397,9 @@ void *realloc(void *p, size_t n)
 		size_t newlen = n + extra;
 		/* Crash on realloc of freed chunk */
 		if (extra & 1) a_crash();
-		if (newlen < PAGE_SIZE && (new = malloc(n))) {
-			memcpy(new, p, n-OVERHEAD);
-			free(p);
-			return new;
+		if (newlen < PAGE_SIZE && (new = malloc(n-OVERHEAD))) {
+			n0 = n;
+			goto copy_free_ret;
 		}
 		newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE;
 		if (oldlen == newlen) return p;
@@ -443,6 +442,7 @@ copy_realloc:
 	/* As a last resort, allocate a new chunk and copy to it. */
 	new = malloc(n-OVERHEAD);
 	if (!new) return 0;
+copy_free_ret:
 	memcpy(new, p, n0-OVERHEAD);
 	free(CHUNK_TO_MEM(self));
 	return new;
-- 
2.11.0



^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2/2] ldso, malloc: implement reclaim_gaps via __malloc_donate
  2018-04-16 17:54 [PATCH 1/2] malloc: fix an over-allocation bug Alexander Monakov
@ 2018-04-16 17:54 ` Alexander Monakov
  2018-04-16 22:39   ` Rich Felker
  2018-04-16 22:40 ` [PATCH 1/2] malloc: fix an over-allocation bug Rich Felker
  1 sibling, 1 reply; 10+ messages in thread
From: Alexander Monakov @ 2018-04-16 17:54 UTC (permalink / raw)
  To: musl

Split 'free' into unmap_chunk and bin_chunk, use the latter to introduce
__malloc_donate and use it in reclaim_gaps instead of calling 'free'.
---
v2:
* drop attribute-cold annotation
* adjust __malloc_donate:
	 * drop comparison against MMAP_THRESHOLD
	 * check donated chunk size just once

 ldso/dynlink.c      | 15 +++-----------
 src/malloc/malloc.c | 60 +++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 45 insertions(+), 30 deletions(-)

diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index 9bf6924b..bdf901dd 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -476,23 +476,14 @@ static void redo_lazy_relocs()
 /* A huge hack: to make up for the wastefulness of shared libraries
  * needing at least a page of dirty memory even if they have no global
  * data, we reclaim the gaps at the beginning and end of writable maps
- * and "donate" them to the heap by setting up minimal malloc
- * structures and then freeing them. */
+ * and "donate" them to the heap. */
 
 static void reclaim(struct dso *dso, size_t start, size_t end)
 {
-	size_t *a, *z;
+	void __malloc_donate(char *, char *);
 	if (start >= dso->relro_start && start < dso->relro_end) start = dso->relro_end;
 	if (end   >= dso->relro_start && end   < dso->relro_end) end = dso->relro_start;
-	start = start + 6*sizeof(size_t)-1 & -4*sizeof(size_t);
-	end = (end & -4*sizeof(size_t)) - 2*sizeof(size_t);
-	if (start>end || end-start < 4*sizeof(size_t)) return;
-	a = laddr(dso, start);
-	z = laddr(dso, end);
-	a[-2] = 1;
-	a[-1] = z[0] = end-start + 2*sizeof(size_t) | 1;
-	z[1] = 1;
-	free(a);
+	__malloc_donate(laddr(dso, start), laddr(dso, end));
 }
 
 static void reclaim_gaps(struct dso *dso)
diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c
index 1af4ae5a..2666fe1b 100644
--- a/src/malloc/malloc.c
+++ b/src/malloc/malloc.c
@@ -299,6 +299,8 @@ static int pretrim(struct chunk *self, size_t n, int i, int j)
 	return 1;
 }
 
+static void bin_chunk(struct chunk *);
+
 static void trim(struct chunk *self, size_t n)
 {
 	size_t n1 = CHUNK_SIZE(self);
@@ -314,7 +316,7 @@ static void trim(struct chunk *self, size_t n)
 	next->psize = n1-n | C_INUSE;
 	self->csize = n | C_INUSE;
 
-	free(CHUNK_TO_MEM(split));
+	bin_chunk(split);
 }
 
 void *malloc(size_t n)
@@ -448,29 +450,14 @@ copy_free_ret:
 	return new;
 }
 
-void free(void *p)
+static void bin_chunk(struct chunk *self)
 {
-	struct chunk *self, *next;
+	struct chunk *next = NEXT_CHUNK(self);
 	size_t final_size, new_size, size;
 	int reclaim=0;
 	int i;
 
-	if (!p) return;
-
-	self = MEM_TO_CHUNK(p);
-
-	if (IS_MMAPPED(self)) {
-		size_t extra = self->psize;
-		char *base = (char *)self - extra;
-		size_t len = CHUNK_SIZE(self) + extra;
-		/* Crash on double free */
-		if (extra & 1) a_crash();
-		__munmap(base, len);
-		return;
-	}
-
 	final_size = new_size = CHUNK_SIZE(self);
-	next = NEXT_CHUNK(self);
 
 	/* Crash on corrupted footer (likely from buffer overflow) */
 	if (next->psize != self->csize) a_crash();
@@ -531,3 +518,40 @@ void free(void *p)
 
 	unlock_bin(i);
 }
+
+static void unmap_chunk(struct chunk *self)
+{
+	size_t extra = self->psize;
+	char *base = (char *)self - extra;
+	size_t len = CHUNK_SIZE(self) + extra;
+	/* Crash on double free */
+	if (extra & 1) a_crash();
+	__munmap(base, len);
+}
+
+void free(void *p)
+{
+	if (!p) return;
+
+	struct chunk *self = MEM_TO_CHUNK(p);
+
+	if (IS_MMAPPED(self))
+		unmap_chunk(self);
+	else
+		bin_chunk(self);
+}
+
+void __malloc_donate(char *start, char *end)
+{
+	ssize_t align_start_up = (SIZE_ALIGN - 1) & -(uintptr_t)start;
+	ssize_t align_end_down = (SIZE_ALIGN - 1) &  (uintptr_t)end;
+	ssize_t chunk_size = end - start - (OVERHEAD + align_start_up + align_end_down);
+	if (chunk_size < OVERHEAD + SIZE_ALIGN) return;
+	start += align_start_up;
+	end   -= align_end_down;
+
+	struct chunk *c = MEM_TO_CHUNK(start + OVERHEAD), *n = MEM_TO_CHUNK(end);
+	c->psize = n->csize = C_INUSE;
+	c->csize = n->psize = C_INUSE | chunk_size;
+	bin_chunk(c);
+}
-- 
2.11.0



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] ldso, malloc: implement reclaim_gaps via __malloc_donate
  2018-04-16 17:54 ` [PATCH 2/2] ldso, malloc: implement reclaim_gaps via __malloc_donate Alexander Monakov
@ 2018-04-16 22:39   ` Rich Felker
  2018-04-17  5:30     ` Alexander Monakov
  0 siblings, 1 reply; 10+ messages in thread
From: Rich Felker @ 2018-04-16 22:39 UTC (permalink / raw)
  To: musl

On Mon, Apr 16, 2018 at 08:54:36PM +0300, Alexander Monakov wrote:
> Split 'free' into unmap_chunk and bin_chunk, use the latter to introduce
> __malloc_donate and use it in reclaim_gaps instead of calling 'free'.
> ---
> v2:
> * drop attribute-cold annotation
> * adjust __malloc_donate:
> 	 * drop comparison against MMAP_THRESHOLD
> 	 * check donated chunk size just once
> 
>  ldso/dynlink.c      | 15 +++-----------
>  src/malloc/malloc.c | 60 +++++++++++++++++++++++++++++++++++++----------------
>  2 files changed, 45 insertions(+), 30 deletions(-)
> 
> [...]
> 
> +void __malloc_donate(char *start, char *end)
> +{
> +	ssize_t align_start_up = (SIZE_ALIGN - 1) & -(uintptr_t)start;
> +	ssize_t align_end_down = (SIZE_ALIGN - 1) &  (uintptr_t)end;
> +	ssize_t chunk_size = end - start - (OVERHEAD + align_start_up + align_end_down);
> +	if (chunk_size < OVERHEAD + SIZE_ALIGN) return;
> +	start += align_start_up;
> +	end   -= align_end_down;
> +
> +	struct chunk *c = MEM_TO_CHUNK(start + OVERHEAD), *n = MEM_TO_CHUNK(end);
> +	c->psize = n->csize = C_INUSE;
> +	c->csize = n->psize = C_INUSE | chunk_size;
> +	bin_chunk(c);
> +}

I think this version of the size logic is harder to read than the old
one, and inconsistent with how malloc does accounting internally. In
the notation used everywhere else, "chunk size" always includes
OVERHEAD plus the usable space; it's the distance between the chunk
header and the next chunk header.

I also don't like use of signed arithmetic with sizes. I thought all
the values were nonnegative anyway, so I wasn't sure why you
introduced signed types, but apparently chunk_size can be negative and
the comparison against OVERHEAD+SIZE_ALIGN relies on the RHS being
signed (and on the unsigned result of the expression initializing
chunk_size getting coerced to signed) to work.

I think the above code may also be wrong. start is aligned mod
SIZE_ALIGN, so start+OVERHEAD is misaligned, and therefore not a valid
argument to MEM_TO_CHUNK. Continuing further along this line of
reasoning, aligning start up mod SIZE_ALIGN like you're doing is not
sufficient. There must also be space for a header below the aligned
point.

What about the following version, based on your original?

	// Ensure that adjustment can't move start past end.
	if (end-start < SIZE_ALIGN + OVERFLOW) return;

	// Reserve space for header below start and then align.
	start += OVERHEAD + SIZE_ALIGN - 1;
	start -= (uintptr_t)start & (SIZE_ALIGN - 1);
	end -= (uintptr_t)end & (SIZE_ALIGN - 1);

	// At this point, end-start is a multiple of SIZE_ALIGN,
	// and is thus a valid size unless it's zero.
	if (start == end) return;

	// Setup chunk headers and bin the new free chunk.
	struct chunk *c = MEM_TO_CHUNK(start), *n = MEM_TO_CHUNK(end);
	c->psize = n->csize = C_INUSE;
	c->csize = n->psize = C_INUSE | chunk_size;
	bin_chunk(c);

I don't see any clean way to write the first size check such that it
encompasses the second one, and trying to wait til the second one to
do any checking risks overflow issues that move end past start and
thus give wrong results when computing end-start. That could be
avoided by checking end-start<PTRDIFF_MAX explicitly, but then we
reintroduce 2 branches and it seems just as expensive, but less
obvious, than doing it the above way.

Rich

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] ldso, malloc: implement reclaim_gaps via __malloc_donate
  2018-04-16 22:39   ` Rich Felker
@ 2018-04-17  5:30     ` Alexander Monakov
  2018-04-17 15:57       ` Rich Felker
  0 siblings, 1 reply; 10+ messages in thread
From: Alexander Monakov @ 2018-04-17  5:30 UTC (permalink / raw)
  To: musl

> > +void __malloc_donate(char *start, char *end)
> > +{
> > +	ssize_t align_start_up = (SIZE_ALIGN - 1) & -(uintptr_t)start;
> > +	ssize_t align_end_down = (SIZE_ALIGN - 1) &  (uintptr_t)end;
> > +	ssize_t chunk_size = end - start - (OVERHEAD + align_start_up + align_end_down);
> > +	if (chunk_size < OVERHEAD + SIZE_ALIGN) return;
> > +	start += align_start_up;
> > +	end   -= align_end_down;
> > +
> > +	struct chunk *c = MEM_TO_CHUNK(start + OVERHEAD), *n = MEM_TO_CHUNK(end);
> > +	c->psize = n->csize = C_INUSE;
> > +	c->csize = n->psize = C_INUSE | chunk_size;
> > +	bin_chunk(c);
> > +}
> 
> I think this version of the size logic is harder to read than the old
> one, and inconsistent with how malloc does accounting internally. In
> the notation used everywhere else, "chunk size" always includes
> OVERHEAD plus the usable space; it's the distance between the chunk
> header and the next chunk header.

Same here. In 'ssize_t chunk_size = ...', OVERHEAD is subtracted once to
account for the sentinel/footer; to compute usable size, OVERHEAD would
need to be subtracted twice.

> I also don't like use of signed arithmetic with sizes. I thought all
> the values were nonnegative anyway, so I wasn't sure why you
> introduced signed types, but apparently chunk_size can be negative and
> the comparison against OVERHEAD+SIZE_ALIGN relies on the RHS being
> signed (and on the unsigned result of the expression initializing
> chunk_size getting coerced to signed) to work.

Well, 'end - start' is a signed expression to begin with, so I doubt a
coercion is taking place there.

Is there a problem with assuming OVERHEAD+SIZE_ALIGN is signed?

> I think the above code may also be wrong. start is aligned mod
> SIZE_ALIGN, so start+OVERHEAD is misaligned, and therefore not a valid
> argument to MEM_TO_CHUNK. Continuing further along this line of
> reasoning, aligning start up mod SIZE_ALIGN like you're doing is not
> sufficient. There must also be space for a header below the aligned
> point.

Yes, that's an oversight, but easily corrected: we need to adjust 'start'
so it's congruent to -OVERHEAD (rather than 0) modulo SIZE_ALIGN:

void __malloc_donate(char *start, char *end)
{
	ssize_t align_start_up = (SIZE_ALIGN - 1) & (-(uintptr_t)start - OVERHEAD);
	ssize_t align_end_down = (SIZE_ALIGN - 1) & (uintptr_t)end;
	ssize_t chunk_size = end - start - (OVERHEAD + align_start_up + align_end_down);
	if (chunk_size < OVERHEAD + SIZE_ALIGN) return;
	start += align_start_up;
	end   -= align_end_down;

	struct chunk *c = MEM_TO_CHUNK(start + OVERHEAD), *n = MEM_TO_CHUNK(end);
	c->psize = n->csize = C_INUSE;
	c->csize = n->psize = C_INUSE | chunk_size;
	bin_chunk(c);
}

The above addresses the alignment issue, and I've responded to other
concerns. Do you need a new patch with this?

Alexander


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] ldso, malloc: implement reclaim_gaps via __malloc_donate
  2018-04-17  5:30     ` Alexander Monakov
@ 2018-04-17 15:57       ` Rich Felker
  2018-04-17 17:06         ` Rich Felker
  0 siblings, 1 reply; 10+ messages in thread
From: Rich Felker @ 2018-04-17 15:57 UTC (permalink / raw)
  To: musl

On Tue, Apr 17, 2018 at 08:30:40AM +0300, Alexander Monakov wrote:
> > > +void __malloc_donate(char *start, char *end)
> > > +{
> > > +	ssize_t align_start_up = (SIZE_ALIGN - 1) & -(uintptr_t)start;
> > > +	ssize_t align_end_down = (SIZE_ALIGN - 1) &  (uintptr_t)end;
> > > +	ssize_t chunk_size = end - start - (OVERHEAD + align_start_up + align_end_down);
> > > +	if (chunk_size < OVERHEAD + SIZE_ALIGN) return;
> > > +	start += align_start_up;
> > > +	end   -= align_end_down;
> > > +
> > > +	struct chunk *c = MEM_TO_CHUNK(start + OVERHEAD), *n = MEM_TO_CHUNK(end);
> > > +	c->psize = n->csize = C_INUSE;
> > > +	c->csize = n->psize = C_INUSE | chunk_size;
> > > +	bin_chunk(c);
> > > +}
> > 
> > I think this version of the size logic is harder to read than the old
> > one, and inconsistent with how malloc does accounting internally. In
> > the notation used everywhere else, "chunk size" always includes
> > OVERHEAD plus the usable space; it's the distance between the chunk
> > header and the next chunk header.
> 
> Same here. In 'ssize_t chunk_size = ...', OVERHEAD is subtracted once to
> account for the sentinel/footer; to compute usable size, OVERHEAD would
> need to be subtracted twice.

In the above code,
	-start - align_start_up = 0 mod SIZE_ALIGN
	end - align_end_down = 0 mod SIZE_ALIGN
	-OVERHEAD = -OVERHEAD mod SIZE_ALIGN
so chunk_size is -OVERHEAD mod SIZE_ALIGN, not 0 mod SIZE_ALIGN.

With the below fix to the definition of align_start_up, I think it's
correct, but I still don't understand what you meant by the above
"subtracted once for the sentinel/footer".

> > I also don't like use of signed arithmetic with sizes. I thought all
> > the values were nonnegative anyway, so I wasn't sure why you
> > introduced signed types, but apparently chunk_size can be negative and
> > the comparison against OVERHEAD+SIZE_ALIGN relies on the RHS being
> > signed (and on the unsigned result of the expression initializing
> > chunk_size getting coerced to signed) to work.
> 
> Well, 'end - start' is a signed expression to begin with, so I doubt a
> coercion is taking place there.

Indeed. I was mistakenly thinking they had type uintptr_t, which would
probably be preferable when working with addresses that aren't
actually pointers to objects, but as-written the difference is signed
and it seems ok.

> Is there a problem with assuming OVERHEAD+SIZE_ALIGN is signed?

Indeed, I didn't notice but it's actually false:

#define SIZE_ALIGN (4*sizeof(size_t))
#define OVERHEAD (2*sizeof(size_t))

So yes it's a problem.

Note that with things fixed so chunk_size is a multiple of SIZE_ALIGN,
this issue goes away, because all you need is:

	if (chunk_size <= 0) return;

This is because (chunk_size>0 && chunk_size%SIZE_ALIGN==0) implies
(algebraically) chunk_size>=SIZE_ALIGN.

> > I think the above code may also be wrong. start is aligned mod
> > SIZE_ALIGN, so start+OVERHEAD is misaligned, and therefore not a valid
> > argument to MEM_TO_CHUNK. Continuing further along this line of
> > reasoning, aligning start up mod SIZE_ALIGN like you're doing is not
> > sufficient. There must also be space for a header below the aligned
> > point.
> 
> Yes, that's an oversight, but easily corrected: we need to adjust 'start'
> so it's congruent to -OVERHEAD (rather than 0) modulo SIZE_ALIGN:
> 
> void __malloc_donate(char *start, char *end)
> {
> 	ssize_t align_start_up = (SIZE_ALIGN - 1) & (-(uintptr_t)start - OVERHEAD);

Upon first reading I thought this was just wrong -- it doesn't reserve
space and align, it only aligns to OVERHEAD mod SIZE_ALIGN, possibly
without reserving any space. However, I see the space is later
reserved via start+OVERHEAD (passed to MEM_TO_CHUNK).

> 	ssize_t align_end_down = (SIZE_ALIGN - 1) & (uintptr_t)end;
> 	ssize_t chunk_size = end - start - (OVERHEAD + align_start_up + align_end_down);

Because OVERHEAD is unsigned, this transits through unsigned and back
to signed by assignment, but it should be safe anyway...

> 	if (chunk_size < OVERHEAD + SIZE_ALIGN) return;
> 	start += align_start_up;
> 	end   -= align_end_down;
> 
> 	struct chunk *c = MEM_TO_CHUNK(start + OVERHEAD), *n = MEM_TO_CHUNK(end);
> 	c->psize = n->csize = C_INUSE;
> 	c->csize = n->psize = C_INUSE | chunk_size;
> 	bin_chunk(c);
> }
> 
> The above addresses the alignment issue, and I've responded to other
> concerns. Do you need a new patch with this?

I want something that I'm confident is safe to apply. And I want
progress made reviewing to be a step towards commit, not something
that gets thrown out every time there's a new version of the patch
with a completely different approach.

I'm perfectly ok with committing the slightly-fixed variant of your
first version I posted, and that's probably my leaning unless there's
a strong reason to prefer a different approach. If there is, the new
patch needs to be convincing that it's correct, and should not require
restarting the review process all over again...

Rich

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] ldso, malloc: implement reclaim_gaps via __malloc_donate
  2018-04-17 15:57       ` Rich Felker
@ 2018-04-17 17:06         ` Rich Felker
  2018-04-17 19:01           ` Rich Felker
  0 siblings, 1 reply; 10+ messages in thread
From: Rich Felker @ 2018-04-17 17:06 UTC (permalink / raw)
  To: musl

On Tue, Apr 17, 2018 at 11:57:27AM -0400, Rich Felker wrote:
> > Is there a problem with assuming OVERHEAD+SIZE_ALIGN is signed?
> 
> Indeed, I didn't notice but it's actually false:
> 
> #define SIZE_ALIGN (4*sizeof(size_t))
> #define OVERHEAD (2*sizeof(size_t))
> 
> So yes it's a problem.
> 
> Note that with things fixed so chunk_size is a multiple of SIZE_ALIGN,
> this issue goes away, because all you need is:
> 
> 	if (chunk_size <= 0) return;
> 
> This is because (chunk_size>0 && chunk_size%SIZE_ALIGN==0) implies
> (algebraically) chunk_size>=SIZE_ALIGN.
> 
> > > I think the above code may also be wrong. start is aligned mod
> > > SIZE_ALIGN, so start+OVERHEAD is misaligned, and therefore not a valid
> > > argument to MEM_TO_CHUNK. Continuing further along this line of
> > > reasoning, aligning start up mod SIZE_ALIGN like you're doing is not
> > > sufficient. There must also be space for a header below the aligned
> > > point.
> > 
> > Yes, that's an oversight, but easily corrected: we need to adjust 'start'
> > so it's congruent to -OVERHEAD (rather than 0) modulo SIZE_ALIGN:
> > 
> > void __malloc_donate(char *start, char *end)
> > {
> > 	ssize_t align_start_up = (SIZE_ALIGN - 1) & (-(uintptr_t)start - OVERHEAD);
> 
> Upon first reading I thought this was just wrong -- it doesn't reserve
> space and align, it only aligns to OVERHEAD mod SIZE_ALIGN, possibly
> without reserving any space. However, I see the space is later
> reserved via start+OVERHEAD (passed to MEM_TO_CHUNK).
> 
> > 	ssize_t align_end_down = (SIZE_ALIGN - 1) & (uintptr_t)end;
> > 	ssize_t chunk_size = end - start - (OVERHEAD + align_start_up + align_end_down);
> 
> Because OVERHEAD is unsigned, this transits through unsigned and back
> to signed by assignment, but it should be safe anyway...
> 
> > 	if (chunk_size < OVERHEAD + SIZE_ALIGN) return;
> > 	start += align_start_up;
> > 	end   -= align_end_down;
> > 
> > 	struct chunk *c = MEM_TO_CHUNK(start + OVERHEAD), *n = MEM_TO_CHUNK(end);
> > 	c->psize = n->csize = C_INUSE;
> > 	c->csize = n->psize = C_INUSE | chunk_size;
> > 	bin_chunk(c);
> > }
> > 
> > The above addresses the alignment issue, and I've responded to other
> > concerns. Do you need a new patch with this?
> 
> I want something that I'm confident is safe to apply. And I want
> progress made reviewing to be a step towards commit, not something
> that gets thrown out every time there's a new version of the patch
> with a completely different approach.
> 
> I'm perfectly ok with committing the slightly-fixed variant of your
> first version I posted, and that's probably my leaning unless there's
> a strong reason to prefer a different approach. If there is, the new
> patch needs to be convincing that it's correct, and should not require
> restarting the review process all over again...

If you prefer the new approach, is the following version okay? It
avoids mixing signed and unsigned arithmetic (or any expressions where
the the value could exceed the range [0,SIZE_MAX/2-1]).

void __malloc_donate(char *start, char *end)
{
	size_t align_start_up = (SIZE_ALIGN - 1) & (-(uintptr_t)start - OVERHEAD);
	size_t align_end_down = (SIZE_ALIGN - 1) & (uintptr_t)end;

	if (end - start <= OVERHEAD + align_start_up + align_end_down)
		return;
	start += align_start_up + OVERHEAD;
	end   -= align_end_down;

	struct chunk *c = MEM_TO_CHUNK(start), *n = MEM_TO_CHUNK(end);
	c->psize = n->csize = C_INUSE;
	c->csize = n->psize = C_INUSE | (end-start);
	bin_chunk(c);
}

Strictly speaking moving +OVERHEAD into start wasn't needed, but I did
it so the idiomatic end-start could be used for the size below rather
than recomputing it in a potentially inconsistent or un-idiomatic way.

At this point it's actually very close to the proposed fix-up (with
another small fix) I made for your original version:

void __malloc_donate(char *start, char *end)
{
	if (end-start < SIZE_ALIGN + OVERFLOW) return;
	start += OVERHEAD + SIZE_ALIGN - 1;
	start -= (uintptr_t)start & (SIZE_ALIGN - 1);
	end -= (uintptr_t)end & (SIZE_ALIGN - 1);
	if (start == end) return;

	struct chunk *c = MEM_TO_CHUNK(start), *n = MEM_TO_CHUNK(end);
	c->psize = n->csize = C_INUSE;
	c->csize = n->psize = C_INUSE | (end-start);
	bin_chunk(c);
}

The last 4 lines are identical; your newer approach just has one
conditional instead of 2, but more temp vars.

Rich


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] ldso, malloc: implement reclaim_gaps via __malloc_donate
  2018-04-17 17:06         ` Rich Felker
@ 2018-04-17 19:01           ` Rich Felker
  2018-04-17 20:52             ` Alexander Monakov
  0 siblings, 1 reply; 10+ messages in thread
From: Rich Felker @ 2018-04-17 19:01 UTC (permalink / raw)
  To: musl

On Tue, Apr 17, 2018 at 01:06:24PM -0400, Rich Felker wrote:
> On Tue, Apr 17, 2018 at 11:57:27AM -0400, Rich Felker wrote:
> > > Is there a problem with assuming OVERHEAD+SIZE_ALIGN is signed?
> > 
> > Indeed, I didn't notice but it's actually false:
> > 
> > #define SIZE_ALIGN (4*sizeof(size_t))
> > #define OVERHEAD (2*sizeof(size_t))
> > 
> > So yes it's a problem.
> > 
> > Note that with things fixed so chunk_size is a multiple of SIZE_ALIGN,
> > this issue goes away, because all you need is:
> > 
> > 	if (chunk_size <= 0) return;
> > 
> > This is because (chunk_size>0 && chunk_size%SIZE_ALIGN==0) implies
> > (algebraically) chunk_size>=SIZE_ALIGN.
> > 
> > > > I think the above code may also be wrong. start is aligned mod
> > > > SIZE_ALIGN, so start+OVERHEAD is misaligned, and therefore not a valid
> > > > argument to MEM_TO_CHUNK. Continuing further along this line of
> > > > reasoning, aligning start up mod SIZE_ALIGN like you're doing is not
> > > > sufficient. There must also be space for a header below the aligned
> > > > point.
> > > 
> > > Yes, that's an oversight, but easily corrected: we need to adjust 'start'
> > > so it's congruent to -OVERHEAD (rather than 0) modulo SIZE_ALIGN:
> > > 
> > > void __malloc_donate(char *start, char *end)
> > > {
> > > 	ssize_t align_start_up = (SIZE_ALIGN - 1) & (-(uintptr_t)start - OVERHEAD);
> > 
> > Upon first reading I thought this was just wrong -- it doesn't reserve
> > space and align, it only aligns to OVERHEAD mod SIZE_ALIGN, possibly
> > without reserving any space. However, I see the space is later
> > reserved via start+OVERHEAD (passed to MEM_TO_CHUNK).
> > 
> > > 	ssize_t align_end_down = (SIZE_ALIGN - 1) & (uintptr_t)end;
> > > 	ssize_t chunk_size = end - start - (OVERHEAD + align_start_up + align_end_down);
> > 
> > Because OVERHEAD is unsigned, this transits through unsigned and back
> > to signed by assignment, but it should be safe anyway...
> > 
> > > 	if (chunk_size < OVERHEAD + SIZE_ALIGN) return;
> > > 	start += align_start_up;
> > > 	end   -= align_end_down;
> > > 
> > > 	struct chunk *c = MEM_TO_CHUNK(start + OVERHEAD), *n = MEM_TO_CHUNK(end);
> > > 	c->psize = n->csize = C_INUSE;
> > > 	c->csize = n->psize = C_INUSE | chunk_size;
> > > 	bin_chunk(c);
> > > }
> > > 
> > > The above addresses the alignment issue, and I've responded to other
> > > concerns. Do you need a new patch with this?
> > 
> > I want something that I'm confident is safe to apply. And I want
> > progress made reviewing to be a step towards commit, not something
> > that gets thrown out every time there's a new version of the patch
> > with a completely different approach.
> > 
> > I'm perfectly ok with committing the slightly-fixed variant of your
> > first version I posted, and that's probably my leaning unless there's
> > a strong reason to prefer a different approach. If there is, the new
> > patch needs to be convincing that it's correct, and should not require
> > restarting the review process all over again...
> 
> If you prefer the new approach, is the following version okay? It
> avoids mixing signed and unsigned arithmetic (or any expressions where
> the the value could exceed the range [0,SIZE_MAX/2-1]).
> 
> void __malloc_donate(char *start, char *end)
> {
> 	size_t align_start_up = (SIZE_ALIGN - 1) & (-(uintptr_t)start - OVERHEAD);
> 	size_t align_end_down = (SIZE_ALIGN - 1) & (uintptr_t)end;
> 
> 	if (end - start <= OVERHEAD + align_start_up + align_end_down)
> 		return;
> 	start += align_start_up + OVERHEAD;
> 	end   -= align_end_down;
> 
> 	struct chunk *c = MEM_TO_CHUNK(start), *n = MEM_TO_CHUNK(end);
> 	c->psize = n->csize = C_INUSE;
> 	c->csize = n->psize = C_INUSE | (end-start);
> 	bin_chunk(c);
> }
> 
> Strictly speaking moving +OVERHEAD into start wasn't needed, but I did
> it so the idiomatic end-start could be used for the size below rather
> than recomputing it in a potentially inconsistent or un-idiomatic way.
> 
> At this point it's actually very close to the proposed fix-up (with
> [...]

Testing now, at least one problem still remains: end<start is actually
possible due to the way ldso calls __malloc_donate. The current
upstream reclaim() checked that explicitly and bailed out; the patch
removes the check.

The form:

	if (chunk_size < OVERHEAD + SIZE_ALIGN) return;

would catch this if not for the signedness issue. But at this point
I'm really tired of trying to work out all sorts of fragile subtleties
like that. I'm just going to put the check back where it was and
commit something based on this, and we can make further improvements
later if desired.

Rich


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] ldso, malloc: implement reclaim_gaps via __malloc_donate
  2018-04-17 19:01           ` Rich Felker
@ 2018-04-17 20:52             ` Alexander Monakov
  2018-04-17 22:21               ` Rich Felker
  0 siblings, 1 reply; 10+ messages in thread
From: Alexander Monakov @ 2018-04-17 20:52 UTC (permalink / raw)
  To: musl

> I'm really tired of trying to work out all sorts of fragile subtleties
> like that.

... and I regret pinging the patch :)

I did not anticipate this to cause a sizeable distraction and hoped on
balance it'd be a positive experience. Seems not.

Is a roll back to a non-irritated state possible?

Alexander


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] ldso, malloc: implement reclaim_gaps via __malloc_donate
  2018-04-17 20:52             ` Alexander Monakov
@ 2018-04-17 22:21               ` Rich Felker
  0 siblings, 0 replies; 10+ messages in thread
From: Rich Felker @ 2018-04-17 22:21 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 689 bytes --]

On Tue, Apr 17, 2018 at 11:52:59PM +0300, Alexander Monakov wrote:
> > I'm really tired of trying to work out all sorts of fragile subtleties
> > like that.
> 
> .... and I regret pinging the patch :)

:)

Don't worry, the outcome is better than if you hadn't I think. It was
a prerequisite for other stuff people are requesting about
interposable malloc and sparked some other fixes I have pending now.

> I did not anticipate this to cause a sizeable distraction and hoped on
> balance it'd be a positive experience. Seems not.
> 
> Is a roll back to a non-irritated state possible?

Yes. I'll attach what I have queued up for possible push. Let me know
if it looks okay with you.

Rich

[-- Attachment #2: 0001-ldso-malloc-implement-reclaim_gaps-via-__malloc_dona.patch --]
[-- Type: text/plain, Size: 4068 bytes --]

From 4c651a999f00f5be3a2882c2e8f37a0431fc4347 Mon Sep 17 00:00:00 2001
From: Alexander Monakov <amonakov@ispras.ru>
Date: Mon, 16 Apr 2018 20:54:36 +0300
Subject: [PATCH] ldso, malloc: implement reclaim_gaps via __malloc_donate

Split 'free' into unmap_chunk and bin_chunk, use the latter to introduce
__malloc_donate and use it in reclaim_gaps instead of calling 'free'.
---
 ldso/dynlink.c      | 16 ++++----------
 src/malloc/malloc.c | 61 +++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 47 insertions(+), 30 deletions(-)

diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index 9bf6924..b9ff41b 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -476,23 +476,15 @@ static void redo_lazy_relocs()
 /* A huge hack: to make up for the wastefulness of shared libraries
  * needing at least a page of dirty memory even if they have no global
  * data, we reclaim the gaps at the beginning and end of writable maps
- * and "donate" them to the heap by setting up minimal malloc
- * structures and then freeing them. */
+ * and "donate" them to the heap. */
 
 static void reclaim(struct dso *dso, size_t start, size_t end)
 {
-	size_t *a, *z;
+	void __malloc_donate(char *, char *);
 	if (start >= dso->relro_start && start < dso->relro_end) start = dso->relro_end;
 	if (end   >= dso->relro_start && end   < dso->relro_end) end = dso->relro_start;
-	start = start + 6*sizeof(size_t)-1 & -4*sizeof(size_t);
-	end = (end & -4*sizeof(size_t)) - 2*sizeof(size_t);
-	if (start>end || end-start < 4*sizeof(size_t)) return;
-	a = laddr(dso, start);
-	z = laddr(dso, end);
-	a[-2] = 1;
-	a[-1] = z[0] = end-start + 2*sizeof(size_t) | 1;
-	z[1] = 1;
-	free(a);
+	if (start >= end) return;
+	__malloc_donate(laddr(dso, start), laddr(dso, end));
 }
 
 static void reclaim_gaps(struct dso *dso)
diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c
index db19bc3..6605ec3 100644
--- a/src/malloc/malloc.c
+++ b/src/malloc/malloc.c
@@ -299,6 +299,8 @@ static int pretrim(struct chunk *self, size_t n, int i, int j)
 	return 1;
 }
 
+static void bin_chunk(struct chunk *);
+
 static void trim(struct chunk *self, size_t n)
 {
 	size_t n1 = CHUNK_SIZE(self);
@@ -314,7 +316,7 @@ static void trim(struct chunk *self, size_t n)
 	next->psize = n1-n | C_INUSE;
 	self->csize = n | C_INUSE;
 
-	free(CHUNK_TO_MEM(split));
+	bin_chunk(split);
 }
 
 void *malloc(size_t n)
@@ -465,29 +467,14 @@ copy_free_ret:
 	return new;
 }
 
-void free(void *p)
+static void bin_chunk(struct chunk *self)
 {
-	struct chunk *self, *next;
+	struct chunk *next = NEXT_CHUNK(self);
 	size_t final_size, new_size, size;
 	int reclaim=0;
 	int i;
 
-	if (!p) return;
-
-	self = MEM_TO_CHUNK(p);
-
-	if (IS_MMAPPED(self)) {
-		size_t extra = self->psize;
-		char *base = (char *)self - extra;
-		size_t len = CHUNK_SIZE(self) + extra;
-		/* Crash on double free */
-		if (extra & 1) a_crash();
-		__munmap(base, len);
-		return;
-	}
-
 	final_size = new_size = CHUNK_SIZE(self);
-	next = NEXT_CHUNK(self);
 
 	/* Crash on corrupted footer (likely from buffer overflow) */
 	if (next->psize != self->csize) a_crash();
@@ -548,3 +535,41 @@ void free(void *p)
 
 	unlock_bin(i);
 }
+
+static void unmap_chunk(struct chunk *self)
+{
+	size_t extra = self->psize;
+	char *base = (char *)self - extra;
+	size_t len = CHUNK_SIZE(self) + extra;
+	/* Crash on double free */
+	if (extra & 1) a_crash();
+	__munmap(base, len);
+}
+
+void free(void *p)
+{
+	if (!p) return;
+
+	struct chunk *self = MEM_TO_CHUNK(p);
+
+	if (IS_MMAPPED(self))
+		unmap_chunk(self);
+	else
+		bin_chunk(self);
+}
+
+void __malloc_donate(char *start, char *end)
+{
+	size_t align_start_up = (SIZE_ALIGN-1) & (-(uintptr_t)start - OVERHEAD);
+	size_t align_end_down = (SIZE_ALIGN-1) & (uintptr_t)end;
+
+	if (end - start <= OVERHEAD + align_start_up + align_end_down)
+		return;
+	start += align_start_up + OVERHEAD;
+	end   -= align_end_down;
+
+	struct chunk *c = MEM_TO_CHUNK(start), *n = MEM_TO_CHUNK(end);
+	c->psize = n->csize = C_INUSE;
+	c->csize = n->psize = C_INUSE | (end-start);
+	bin_chunk(c);
+}
-- 
2.10.0


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] malloc: fix an over-allocation bug
  2018-04-16 17:54 [PATCH 1/2] malloc: fix an over-allocation bug Alexander Monakov
  2018-04-16 17:54 ` [PATCH 2/2] ldso, malloc: implement reclaim_gaps via __malloc_donate Alexander Monakov
@ 2018-04-16 22:40 ` Rich Felker
  1 sibling, 0 replies; 10+ messages in thread
From: Rich Felker @ 2018-04-16 22:40 UTC (permalink / raw)
  To: musl

On Mon, Apr 16, 2018 at 08:54:35PM +0300, Alexander Monakov wrote:
> Fix an instance where realloc code would overallocate by OVERHEAD bytes
> amount. Manually arrange for reuse of memcpy-free-return exit sequence.
> ---
>  src/malloc/malloc.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c
> index 9e05e1d6..1af4ae5a 100644
> --- a/src/malloc/malloc.c
> +++ b/src/malloc/malloc.c
> @@ -397,10 +397,9 @@ void *realloc(void *p, size_t n)
>  		size_t newlen = n + extra;
>  		/* Crash on realloc of freed chunk */
>  		if (extra & 1) a_crash();
> -		if (newlen < PAGE_SIZE && (new = malloc(n))) {
> -			memcpy(new, p, n-OVERHEAD);
> -			free(p);
> -			return new;
> +		if (newlen < PAGE_SIZE && (new = malloc(n-OVERHEAD))) {
> +			n0 = n;
> +			goto copy_free_ret;
>  		}
>  		newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE;
>  		if (oldlen == newlen) return p;
> @@ -443,6 +442,7 @@ copy_realloc:
>  	/* As a last resort, allocate a new chunk and copy to it. */
>  	new = malloc(n-OVERHEAD);
>  	if (!new) return 0;
> +copy_free_ret:
>  	memcpy(new, p, n0-OVERHEAD);
>  	free(CHUNK_TO_MEM(self));
>  	return new;

Looks ok. I've got this queued as submitted.

Rich


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-04-17 22:21 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-16 17:54 [PATCH 1/2] malloc: fix an over-allocation bug Alexander Monakov
2018-04-16 17:54 ` [PATCH 2/2] ldso, malloc: implement reclaim_gaps via __malloc_donate Alexander Monakov
2018-04-16 22:39   ` Rich Felker
2018-04-17  5:30     ` Alexander Monakov
2018-04-17 15:57       ` Rich Felker
2018-04-17 17:06         ` Rich Felker
2018-04-17 19:01           ` Rich Felker
2018-04-17 20:52             ` Alexander Monakov
2018-04-17 22:21               ` Rich Felker
2018-04-16 22:40 ` [PATCH 1/2] malloc: fix an over-allocation bug Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).