From: Stefan Jumarea <stefanjumarea02@gmail.com>
To: musl@lists.openwall.com
Cc: dalias@libc.org, Stefan Jumarea <stefanjumarea02@gmail.com>
Subject: [musl] [PATCH v2] mallocng: Add MTE support for Aarch64
Date: Mon, 5 Aug 2024 12:07:44 +0300 [thread overview]
Message-ID: <20240805090743.2402762-2-stefanjumarea02@gmail.com> (raw)
Add support for Memory Tagging Extension.
All the memory tagging code is placed within compiler guards, and is
enabled by using the `--enable-mte` configure option.
The option can only be used if compiling for Aarch64.
All the primitives for generating, storing and loading the memory tags
are placed in a new header under `arch/aarch64/`.
For now, only the actual user data is tagged. All metadata is untagged.
Signed-off-by: Stefan Jumarea <stefanjumarea02@gmail.com>
---
arch/aarch64/mte.h | 41 +++++++++++++++
configure | 17 +++++++
src/malloc/mallocng/aligned_alloc.c | 20 ++++----
src/malloc/mallocng/free.c | 25 +++++++--
src/malloc/mallocng/malloc.c | 9 +++-
src/malloc/mallocng/meta.h | 79 ++++++++++++++++++++++++-----
src/malloc/mallocng/realloc.c | 24 ++++++---
7 files changed, 181 insertions(+), 34 deletions(-)
create mode 100644 arch/aarch64/mte.h
diff --git a/arch/aarch64/mte.h b/arch/aarch64/mte.h
new file mode 100644
index 00000000..0cd52e1a
--- /dev/null
+++ b/arch/aarch64/mte.h
@@ -0,0 +1,41 @@
+#include <stdint.h>
+
+#define MTE_TAG_GRANULE 16
+#define MTE_TAG_MASK (0xFULL << 56)
+
+/**
+ * Read the allocated tag for `addr`.
+ */
+static inline uintptr_t mte_load_tag(uintptr_t addr)
+{
+ uintptr_t tag;
+
+ __asm__ __volatile__ ("ldg %0, [%1]\n"
+ : "=&r" (tag) : "r"(addr));
+
+ return tag;
+}
+
+/**
+ * Store the allocated tag for `addr`.
+ * The tag is derived from `addr`.
+ */
+static inline void mte_store_tag(uintptr_t addr)
+{
+ __asm__ __volatile__ ("stg %0, [%0]\n"
+ : : "r"(addr) : "memory");
+}
+
+/**
+ * Tag `addr` with random tag.
+ * If the address is already tagged, make sure the new tag differs.
+ */
+static inline uintptr_t mte_insert_random_tag(uintptr_t addr)
+{
+ uintptr_t reg;
+
+ __asm__ __volatile__("gmi %0, %1, xzr\n"
+ "irg %1, %1, %0\n"
+ : "=&r"(reg), "+r" (addr));
+ return addr;
+}
diff --git a/configure b/configure
index bc9fbe48..edcd4911 100755
--- a/configure
+++ b/configure
@@ -34,6 +34,8 @@ Optional features:
--enable-wrapper=... build given musl toolchain wrapper [auto]
--disable-shared inhibit building shared library [enabled]
--disable-static inhibit building static library [enabled]
+ --enable-mte build with MTE support [disabled]
+ only available for aarch64 and mallocng
Optional packages:
--with-malloc=... choose malloc implementation [mallocng]
@@ -139,6 +141,7 @@ debug=no
warnings=yes
shared=auto
static=yes
+mte=no
wrapper=auto
gcc_wrapper=no
clang_wrapper=no
@@ -158,6 +161,8 @@ case "$arg" in
--disable-shared|--enable-shared=no) shared=no ;;
--enable-static|--enable-static=yes) static=yes ;;
--disable-static|--enable-static=no) static=no ;;
+--enable-mte|--enable-mte=yes) mte=yes ;;
+--disable-mte|--enable-mte=no) mte=no ;;
--enable-optimize) optimize=yes ;;
--enable-optimize=*) optimize=${arg#*=} ;;
--disable-optimize) optimize=no ;;
@@ -790,6 +795,18 @@ if trycppif "__FAST_MATH__" \
fail "$0: error: compiler has broken floating point; check CFLAGS"
fi
+if test "$mte" = "yes" ; then
+ printf "Checking whether target architecture supports MTE... "
+ if test "$ARCH" != "aarch64"; then
+ printf "no\n"
+ fail "$0: error: mte only supported with aarch64"
+ fi
+
+ printf "yes\n"
+ CFLAGS_AUTO="$CFLAGS_AUTO -DMEMTAG -march=armv8.5-a+memtag"
+ SUBARCH=${SUBARCH}+memtag
+fi
+
printf "creating config.mak... "
cmdline=$(quote "$0")
diff --git a/src/malloc/mallocng/aligned_alloc.c b/src/malloc/mallocng/aligned_alloc.c
index e0862a83..7a7182ed 100644
--- a/src/malloc/mallocng/aligned_alloc.c
+++ b/src/malloc/mallocng/aligned_alloc.c
@@ -25,31 +25,33 @@ void *aligned_alloc(size_t align, size_t len)
if (!p)
return 0;
+ unsigned char *untagged = untag(p);
struct meta *g = get_meta(p);
- int idx = get_slot_index(p);
+ int idx = get_slot_index(untagged);
size_t stride = get_stride(g);
unsigned char *start = g->mem->storage + stride*idx;
unsigned char *end = g->mem->storage + stride*(idx+1) - IB;
size_t adj = -(uintptr_t)p & (align-1);
if (!adj) {
- set_size(p, end, len);
+ set_size(untagged, end, len);
return p;
}
p += adj;
+ untagged += adj;
uint32_t offset = (size_t)(p-g->mem->storage)/UNIT;
if (offset <= 0xffff) {
- *(uint16_t *)(p-2) = offset;
- p[-4] = 0;
+ *(uint16_t *)(untagged-2) = offset;
+ untagged[-4] = 0;
} else {
// use a 32-bit offset if 16-bit doesn't fit. for this,
// 16-bit field must be zero, [-4] byte nonzero.
- *(uint16_t *)(p-2) = 0;
- *(uint32_t *)(p-8) = offset;
- p[-4] = 1;
+ *(uint16_t *)(untagged-2) = 0;
+ *(uint32_t *)(untagged-8) = offset;
+ untagged[-4] = 1;
}
- p[-3] = idx;
- set_size(p, end, len);
+ untagged[-3] = idx;
+ set_size(untagged, end, len);
// store offset to aligned enframing. this facilitates cycling
// offset and also iteration of heap for debugging/measurement.
// for extreme overalignment it won't fit but these are classless
diff --git a/src/malloc/mallocng/free.c b/src/malloc/mallocng/free.c
index 43f32aad..39993a56 100644
--- a/src/malloc/mallocng/free.c
+++ b/src/malloc/mallocng/free.c
@@ -25,8 +25,9 @@ static struct mapinfo free_group(struct meta *g)
mi.len = g->maplen*4096UL;
} else {
void *p = g->mem;
+ unsigned char *untagged = untag(p);
struct meta *m = get_meta(p);
- int idx = get_slot_index(p);
+ int idx = get_slot_index(untagged);
g->mem->meta = 0;
// not checking size/reserved here; it's intentionally invalid
mi = nontrivial_free(m, idx);
@@ -102,17 +103,31 @@ void free(void *p)
{
if (!p) return;
+ void *untagged = untag(p);
+
struct meta *g = get_meta(p);
- int idx = get_slot_index(p);
+ int idx = get_slot_index(untagged);
size_t stride = get_stride(g);
unsigned char *start = g->mem->storage + stride*idx;
unsigned char *end = start + stride - IB;
- get_nominal_size(p, end);
+
+#ifdef MEMTAG
+ size_t nom_size = get_nominal_size(untagged, end);
+
+ // Check that p has the proper tag before zero-tagging
+ // Should raise an exception if p has the wrong tag.
+ // If the pointer was obtained via a 0-size alloc, skip the tag check.
+ if (nom_size > 0)
+ ((unsigned char *)p)[0] = 0;
+
+ untag_region(untagged, 0, nom_size);
+#endif
+
uint32_t self = 1u<<idx, all = (2u<<g->last_idx)-1;
- ((unsigned char *)p)[-3] = 255;
+ ((unsigned char *)untagged)[-3] = 255;
// invalidate offset to group header, and cycle offset of
// used region within slot if current offset is zero.
- *(uint16_t *)((char *)p-2) = 0;
+ *(uint16_t *)((char *)untagged-2) = 0;
// release any whole pages contained in the slot to be freed
// unless it's a single-slot group that will be unmapped.
diff --git a/src/malloc/mallocng/malloc.c b/src/malloc/mallocng/malloc.c
index d695ab8e..1d910b26 100644
--- a/src/malloc/mallocng/malloc.c
+++ b/src/malloc/mallocng/malloc.c
@@ -304,6 +304,11 @@ void *malloc(size_t n)
int sc;
int idx;
int ctr;
+#ifdef MEMTAG
+ size_t required_size = ALIGN_UP(n, 16);
+#else
+ size_t required_size = n;
+#endif
if (n >= MMAP_THRESHOLD) {
size_t needed = n + IB + UNIT;
@@ -376,7 +381,9 @@ void *malloc(size_t n)
success:
ctr = ctx.mmap_counter;
unlock();
- return enframe(g, idx, n, ctr);
+
+ void *ptr = enframe(g, idx, required_size, ctr);
+ return tag_region(ptr, n);
}
int is_allzero(void *p)
diff --git a/src/malloc/mallocng/meta.h b/src/malloc/mallocng/meta.h
index 61ec53f9..98ba17be 100644
--- a/src/malloc/mallocng/meta.h
+++ b/src/malloc/mallocng/meta.h
@@ -4,6 +4,9 @@
#include <stdint.h>
#include <errno.h>
#include <limits.h>
+#ifdef MEMTAG
+#include <mte.h>
+#endif
#include "glue.h"
__attribute__((__visibility__("hidden")))
@@ -14,6 +17,10 @@ extern const uint16_t size_classes[];
#define UNIT 16
#define IB 4
+#ifndef ALIGN_UP
+#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
+#endif
+
struct group {
struct meta *meta;
unsigned char active_idx:5;
@@ -72,6 +79,44 @@ struct meta *alloc_meta(void);
__attribute__((__visibility__("hidden")))
int is_allzero(void *);
+static inline unsigned char *untag(void *p)
+{
+#ifdef MEMTAG
+ return (unsigned char *)((uintptr_t)p & ~MTE_TAG_MASK);
+#else
+ return (unsigned char *)p;
+#endif
+}
+
+static inline void *tag_region(void *p, size_t n)
+{
+#ifdef MEMTAG
+ uintptr_t addr = mte_insert_random_tag((uintptr_t)p);
+
+ // if n == 0 implement the allocation as a wrong tag
+ // (the address is not tagged, but the returned pointer is).
+ // The pointer can be passed to free(), but accessing it will
+ // result in a tag mismatch.
+ if (n == 0)
+ return (void *)addr;
+
+ for (size_t i = 0; i < ALIGN_UP(n, 16); i += 16)
+ mte_store_tag(addr + i);
+
+ return (void *)addr;
+#else
+ return p;
+#endif
+}
+
+static inline void untag_region(void *p, size_t start, size_t end)
+{
+#ifdef MEMTAG
+ for (size_t i = ALIGN_UP(start, 16); i < ALIGN_UP(end, 16); i += 16)
+ mte_store_tag((uintptr_t)((char *)p + i));
+#endif
+}
+
static inline void queue(struct meta **phead, struct meta *m)
{
assert(!m->next);
@@ -129,14 +174,15 @@ static inline int get_slot_index(const unsigned char *p)
static inline struct meta *get_meta(const unsigned char *p)
{
assert(!((uintptr_t)p & 15));
- int offset = *(const uint16_t *)(p - 2);
- int index = get_slot_index(p);
- if (p[-4]) {
+ const unsigned char *untagged = untag((void *)p);
+ int offset = *(const uint16_t *)(untagged - 2);
+ int index = get_slot_index(untagged);
+ if (untagged[-4]) {
assert(!offset);
- offset = *(uint32_t *)(p - 8);
+ offset = *(uint32_t *)(untagged - 8);
assert(offset > 0xffff);
}
- const struct group *base = (const void *)(p - UNIT*offset - UNIT);
+ const struct group *base = (const void *)(untagged - UNIT*offset - UNIT);
const struct meta *meta = base->meta;
assert(meta->mem == base);
assert(index <= meta->last_idx);
@@ -199,10 +245,11 @@ static inline void *enframe(struct meta *g, int idx, size_t n, int ctr)
size_t slack = (stride-IB-n)/UNIT;
unsigned char *p = g->mem->storage + stride*idx;
unsigned char *end = p+stride-IB;
+ unsigned char *untagged = untag(p);
// cycle offset within slot to increase interval to address
// reuse, facilitate trapping double-free.
- int off = (p[-3] ? *(uint16_t *)(p-2) + 1 : ctr) & 255;
- assert(!p[-4]);
+ int off = (untagged[-3] ? *(uint16_t *)(untagged-2) + 1 : ctr) & 255;
+ assert(!untagged[-4]);
if (off > slack) {
size_t m = slack;
m |= m>>1; m |= m>>2; m |= m>>4;
@@ -213,21 +260,27 @@ static inline void *enframe(struct meta *g, int idx, size_t n, int ctr)
if (off) {
// store offset in unused header at offset zero
// if enframing at non-zero offset.
- *(uint16_t *)(p-2) = off;
- p[-3] = 7<<5;
+ *(uint16_t *)(untagged-2) = off;
+ untagged[-3] = 7<<5;
p += UNIT*off;
+ untagged += UNIT*off;
// for nonzero offset there is no permanent check
// byte, so make one.
- p[-4] = 0;
+ untagged[-4] = 0;
}
- *(uint16_t *)(p-2) = (size_t)(p-g->mem->storage)/UNIT;
- p[-3] = idx;
- set_size(p, end, n);
+ *(uint16_t *)(untagged-2) = (size_t)(untagged-g->mem->storage)/UNIT;
+ untagged[-3] = idx;
+ set_size(untagged, end, n);
+
return p;
}
static inline int size_to_class(size_t n)
{
+#ifdef MEMTAG
+ n = ALIGN_UP(n, 16);
+#endif
+
n = (n+IB-1)>>4;
if (n<10) return n;
n++;
diff --git a/src/malloc/mallocng/realloc.c b/src/malloc/mallocng/realloc.c
index 18769f42..a22b8226 100644
--- a/src/malloc/mallocng/realloc.c
+++ b/src/malloc/mallocng/realloc.c
@@ -1,4 +1,5 @@
#define _GNU_SOURCE
+#include <stdint.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <string.h>
@@ -9,20 +10,31 @@ void *realloc(void *p, size_t n)
if (!p) return malloc(n);
if (size_overflows(n)) return 0;
+#ifdef MEMTAG
+ size_t required_size = ALIGN_UP(n, 16);
+#else
+ size_t required_size = n;
+#endif
+ unsigned char *untagged = untag(p);
struct meta *g = get_meta(p);
- int idx = get_slot_index(p);
+ int idx = get_slot_index(untagged);
size_t stride = get_stride(g);
unsigned char *start = g->mem->storage + stride*idx;
unsigned char *end = start + stride - IB;
- size_t old_size = get_nominal_size(p, end);
- size_t avail_size = end-(unsigned char *)p;
+ size_t old_size = get_nominal_size(untagged, end);
+ size_t avail_size = end-(unsigned char *)untagged;
void *new;
// only resize in-place if size class matches
- if (n <= avail_size && n<MMAP_THRESHOLD
+ if (required_size <= avail_size && n<MMAP_THRESHOLD
&& size_to_class(n)+1 >= g->sizeclass) {
- set_size(p, end, n);
- return p;
+
+ // zero-tag the free space left
+ untag_region(untagged, n, old_size);
+
+ set_size(untagged, end, required_size);
+
+ return tag_region(p, n);
}
// use mremap if old and new size are both mmap-worthy
--
2.43.0
next reply other threads:[~2024-08-05 9:09 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-05 9:07 Stefan Jumarea [this message]
-- strict thread matches above, loose matches on Subject: below --
2024-06-26 8:43 Stefan Jumarea
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240805090743.2402762-2-stefanjumarea02@gmail.com \
--to=stefanjumarea02@gmail.com \
--cc=dalias@libc.org \
--cc=musl@lists.openwall.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/musl/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).