From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=-2.8 required=5.0 tests=DKIM_ADSP_CUSTOM_MED, DKIM_INVALID,DKIM_SIGNED,FREEMAIL_FORGED_FROMDOMAIN,FREEMAIL_FROM, HEADER_FROM_DIFFERENT_DOMAINS,MAILING_LIST_MULTI,RCVD_IN_DNSWL_MED, RCVD_IN_MSPIKE_H4,RCVD_IN_MSPIKE_WL autolearn=ham autolearn_force=no version=3.4.4 Received: from second.openwall.net (second.openwall.net [193.110.157.125]) by inbox.vuxu.org (Postfix) with SMTP id DE06E22B7F for ; Wed, 26 Jun 2024 10:44:19 +0200 (CEST) Received: (qmail 23989 invoked by uid 550); 26 Jun 2024 08:44:14 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Reply-To: musl@lists.openwall.com Received: (qmail 23945 invoked from network); 26 Jun 2024 08:44:14 -0000 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1719391444; x=1719996244; darn=lists.openwall.com; h=content-transfer-encoding:mime-version:message-id:date:subject:cc :to:from:from:to:cc:subject:date:message-id:reply-to; bh=HEhWDjUWhsuTQLt+Yaem20J4TXVgSttEDPVE+y73jLc=; b=BDhEjP03Xz3g4sQ+4/cacO8dnHFEphvbzCrXaCHM1O0+GJMZ7yMDTSsVZygqr0hnqy p+cbNcUT025SVhRlJ45ZJx56N9v3xahLOLO7l6c8NCOEaLdPoHizC7lnbMcvww1Fiono rvpbsp/Ig+D3OgYOkVp/eWvXKDwHvkD7Hk/l/rcfDB1uQ4nie9jd7aDe5A11XVOlaolS dAgRAV+xXRWRB1TkJoXUfLBWtVqhk63QYgBPuH/rm/7+KmcRh152c/KU2o4LxCp1l8Xj gXXQxph5sbyP/UifksqTouoL8G6HyKo4fd3tJzh0hezKpkO1pp2ols13VFSO7vNuFHUP lldQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1719391444; x=1719996244; h=content-transfer-encoding:mime-version:message-id:date:subject:cc :to:from:x-gm-message-state:from:to:cc:subject:date:message-id :reply-to; bh=HEhWDjUWhsuTQLt+Yaem20J4TXVgSttEDPVE+y73jLc=; b=TmcItj3GgP9qGvRrutl7dZQICbnCJECS70hKmXyrJ2J2nWuVrxU9ycnZbPuMOwoiWc xuVuysJilqZBz0+/j46stK286u7bAtZayjelNpNeJLVevU8ONwA5AZqo4Xpg8nIH4GBt xndmJvUvPcUrwMDpLf5ii3cO34MhgIOAksSOHowzzv4jszMYWQPGT68LYQgeji56Rup0 qf/DiFVzu44QRH7FSaFgCZAI5rxCIzmSezF54pnDqfEikZlvkiAF91Rk9fbr+5T06MJo vCDTWdjE7YgKUyEWpj4YG/j2mU2uDYUwcakAN7pXQufc+cA/aiHHz+7On2YnSLIVizsV UktA== X-Gm-Message-State: AOJu0YzyiXMZ+b/Wx/8NzR0O5xTtGFQvATG4G/N7H6Bkn93SIM7wFrHH FqlYx+K/wUlY+fj0XgqWZ2+EQJdgqbJxSU93D8x9pBiaI0snQUMklC2hI45n X-Google-Smtp-Source: AGHT+IHkLubkLpWTJbTSPy6xaYqpApDacx1TbLgE9Oke1P3PjW1BZ/qMZ9sRUgrz91mdWW2nSpnzog== X-Received: by 2002:a05:600c:4fc9:b0:424:8dc4:ee3f with SMTP id 5b1f17b1804b1-4248dc4ef5bmr89387755e9.32.1719391443791; Wed, 26 Jun 2024 01:44:03 -0700 (PDT) From: Stefan Jumarea To: musl@lists.openwall.com Cc: dalias@libc.org Date: Wed, 26 Jun 2024 11:43:45 +0300 Message-ID: <20240626084359.3241921-1-stefanjumarea02@gmail.com> X-Mailer: git-send-email 2.43.0 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [musl] [PATCH v2] mallocng: Add MTE support for Aarch64 Add support for Memory Tagging Extension. All the memory tagging code is placed within compiler guards, and is enabled by using the `--enable-mte` configure option. The option can only be used if compiling for Aarch64. All the primitives for generating, storing and loading the memory tags are placed in a new header under `arch/aarch64/`. For now, only the actual user data is tagged. All metadata is untagged. Signed-off-by: Stefan Jumarea --- arch/aarch64/mte.h | 41 +++++++++++++++ configure | 17 +++++++ src/malloc/mallocng/aligned_alloc.c | 20 ++++---- src/malloc/mallocng/free.c | 25 +++++++-- src/malloc/mallocng/malloc.c | 9 +++- src/malloc/mallocng/meta.h | 79 ++++++++++++++++++++++++----- src/malloc/mallocng/realloc.c | 24 ++++++--- 7 files changed, 181 insertions(+), 34 deletions(-) create mode 100644 arch/aarch64/mte.h diff --git a/arch/aarch64/mte.h b/arch/aarch64/mte.h new file mode 100644 index 00000000..0cd52e1a --- /dev/null +++ b/arch/aarch64/mte.h @@ -0,0 +1,41 @@ +#include + +#define MTE_TAG_GRANULE 16 +#define MTE_TAG_MASK (0xFULL << 56) + +/** + * Read the allocated tag for `addr`. + */ +static inline uintptr_t mte_load_tag(uintptr_t addr) +{ + uintptr_t tag; + + __asm__ __volatile__ ("ldg %0, [%1]\n" + : "=&r" (tag) : "r"(addr)); + + return tag; +} + +/** + * Store the allocated tag for `addr`. + * The tag is derived from `addr`. + */ +static inline void mte_store_tag(uintptr_t addr) +{ + __asm__ __volatile__ ("stg %0, [%0]\n" + : : "r"(addr) : "memory"); +} + +/** + * Tag `addr` with random tag. + * If the address is already tagged, make sure the new tag differs. + */ +static inline uintptr_t mte_insert_random_tag(uintptr_t addr) +{ + uintptr_t reg; + + __asm__ __volatile__("gmi %0, %1, xzr\n" + "irg %1, %1, %0\n" + : "=&r"(reg), "+r" (addr)); + return addr; +} diff --git a/configure b/configure index bc9fbe48..edcd4911 100755 --- a/configure +++ b/configure @@ -34,6 +34,8 @@ Optional features: --enable-wrapper=... build given musl toolchain wrapper [auto] --disable-shared inhibit building shared library [enabled] --disable-static inhibit building static library [enabled] + --enable-mte build with MTE support [disabled] + only available for aarch64 and mallocng Optional packages: --with-malloc=... choose malloc implementation [mallocng] @@ -139,6 +141,7 @@ debug=no warnings=yes shared=auto static=yes +mte=no wrapper=auto gcc_wrapper=no clang_wrapper=no @@ -158,6 +161,8 @@ case "$arg" in --disable-shared|--enable-shared=no) shared=no ;; --enable-static|--enable-static=yes) static=yes ;; --disable-static|--enable-static=no) static=no ;; +--enable-mte|--enable-mte=yes) mte=yes ;; +--disable-mte|--enable-mte=no) mte=no ;; --enable-optimize) optimize=yes ;; --enable-optimize=*) optimize=${arg#*=} ;; --disable-optimize) optimize=no ;; @@ -790,6 +795,18 @@ if trycppif "__FAST_MATH__" \ fail "$0: error: compiler has broken floating point; check CFLAGS" fi +if test "$mte" = "yes" ; then + printf "Checking whether target architecture supports MTE... " + if test "$ARCH" != "aarch64"; then + printf "no\n" + fail "$0: error: mte only supported with aarch64" + fi + + printf "yes\n" + CFLAGS_AUTO="$CFLAGS_AUTO -DMEMTAG -march=armv8.5-a+memtag" + SUBARCH=${SUBARCH}+memtag +fi + printf "creating config.mak... " cmdline=$(quote "$0") diff --git a/src/malloc/mallocng/aligned_alloc.c b/src/malloc/mallocng/aligned_alloc.c index e0862a83..7a7182ed 100644 --- a/src/malloc/mallocng/aligned_alloc.c +++ b/src/malloc/mallocng/aligned_alloc.c @@ -25,31 +25,33 @@ void *aligned_alloc(size_t align, size_t len) if (!p) return 0; + unsigned char *untagged = untag(p); struct meta *g = get_meta(p); - int idx = get_slot_index(p); + int idx = get_slot_index(untagged); size_t stride = get_stride(g); unsigned char *start = g->mem->storage + stride*idx; unsigned char *end = g->mem->storage + stride*(idx+1) - IB; size_t adj = -(uintptr_t)p & (align-1); if (!adj) { - set_size(p, end, len); + set_size(untagged, end, len); return p; } p += adj; + untagged += adj; uint32_t offset = (size_t)(p-g->mem->storage)/UNIT; if (offset <= 0xffff) { - *(uint16_t *)(p-2) = offset; - p[-4] = 0; + *(uint16_t *)(untagged-2) = offset; + untagged[-4] = 0; } else { // use a 32-bit offset if 16-bit doesn't fit. for this, // 16-bit field must be zero, [-4] byte nonzero. - *(uint16_t *)(p-2) = 0; - *(uint32_t *)(p-8) = offset; - p[-4] = 1; + *(uint16_t *)(untagged-2) = 0; + *(uint32_t *)(untagged-8) = offset; + untagged[-4] = 1; } - p[-3] = idx; - set_size(p, end, len); + untagged[-3] = idx; + set_size(untagged, end, len); // store offset to aligned enframing. this facilitates cycling // offset and also iteration of heap for debugging/measurement. // for extreme overalignment it won't fit but these are classless diff --git a/src/malloc/mallocng/free.c b/src/malloc/mallocng/free.c index 43f32aad..39993a56 100644 --- a/src/malloc/mallocng/free.c +++ b/src/malloc/mallocng/free.c @@ -25,8 +25,9 @@ static struct mapinfo free_group(struct meta *g) mi.len = g->maplen*4096UL; } else { void *p = g->mem; + unsigned char *untagged = untag(p); struct meta *m = get_meta(p); - int idx = get_slot_index(p); + int idx = get_slot_index(untagged); g->mem->meta = 0; // not checking size/reserved here; it's intentionally invalid mi = nontrivial_free(m, idx); @@ -102,17 +103,31 @@ void free(void *p) { if (!p) return; + void *untagged = untag(p); + struct meta *g = get_meta(p); - int idx = get_slot_index(p); + int idx = get_slot_index(untagged); size_t stride = get_stride(g); unsigned char *start = g->mem->storage + stride*idx; unsigned char *end = start + stride - IB; - get_nominal_size(p, end); + +#ifdef MEMTAG + size_t nom_size = get_nominal_size(untagged, end); + + // Check that p has the proper tag before zero-tagging + // Should raise an exception if p has the wrong tag. + // If the pointer was obtained via a 0-size alloc, skip the tag check. + if (nom_size > 0) + ((unsigned char *)p)[0] = 0; + + untag_region(untagged, 0, nom_size); +#endif + uint32_t self = 1u<last_idx)-1; - ((unsigned char *)p)[-3] = 255; + ((unsigned char *)untagged)[-3] = 255; // invalidate offset to group header, and cycle offset of // used region within slot if current offset is zero. - *(uint16_t *)((char *)p-2) = 0; + *(uint16_t *)((char *)untagged-2) = 0; // release any whole pages contained in the slot to be freed // unless it's a single-slot group that will be unmapped. diff --git a/src/malloc/mallocng/malloc.c b/src/malloc/mallocng/malloc.c index d695ab8e..1d910b26 100644 --- a/src/malloc/mallocng/malloc.c +++ b/src/malloc/mallocng/malloc.c @@ -304,6 +304,11 @@ void *malloc(size_t n) int sc; int idx; int ctr; +#ifdef MEMTAG + size_t required_size = ALIGN_UP(n, 16); +#else + size_t required_size = n; +#endif if (n >= MMAP_THRESHOLD) { size_t needed = n + IB + UNIT; @@ -376,7 +381,9 @@ void *malloc(size_t n) success: ctr = ctx.mmap_counter; unlock(); - return enframe(g, idx, n, ctr); + + void *ptr = enframe(g, idx, required_size, ctr); + return tag_region(ptr, n); } int is_allzero(void *p) diff --git a/src/malloc/mallocng/meta.h b/src/malloc/mallocng/meta.h index 61ec53f9..98ba17be 100644 --- a/src/malloc/mallocng/meta.h +++ b/src/malloc/mallocng/meta.h @@ -4,6 +4,9 @@ #include #include #include +#ifdef MEMTAG +#include +#endif #include "glue.h" __attribute__((__visibility__("hidden"))) @@ -14,6 +17,10 @@ extern const uint16_t size_classes[]; #define UNIT 16 #define IB 4 +#ifndef ALIGN_UP +#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) +#endif + struct group { struct meta *meta; unsigned char active_idx:5; @@ -72,6 +79,44 @@ struct meta *alloc_meta(void); __attribute__((__visibility__("hidden"))) int is_allzero(void *); +static inline unsigned char *untag(void *p) +{ +#ifdef MEMTAG + return (unsigned char *)((uintptr_t)p & ~MTE_TAG_MASK); +#else + return (unsigned char *)p; +#endif +} + +static inline void *tag_region(void *p, size_t n) +{ +#ifdef MEMTAG + uintptr_t addr = mte_insert_random_tag((uintptr_t)p); + + // if n == 0 implement the allocation as a wrong tag + // (the address is not tagged, but the returned pointer is). + // The pointer can be passed to free(), but accessing it will + // result in a tag mismatch. + if (n == 0) + return (void *)addr; + + for (size_t i = 0; i < ALIGN_UP(n, 16); i += 16) + mte_store_tag(addr + i); + + return (void *)addr; +#else + return p; +#endif +} + +static inline void untag_region(void *p, size_t start, size_t end) +{ +#ifdef MEMTAG + for (size_t i = ALIGN_UP(start, 16); i < ALIGN_UP(end, 16); i += 16) + mte_store_tag((uintptr_t)((char *)p + i)); +#endif +} + static inline void queue(struct meta **phead, struct meta *m) { assert(!m->next); @@ -129,14 +174,15 @@ static inline int get_slot_index(const unsigned char *p) static inline struct meta *get_meta(const unsigned char *p) { assert(!((uintptr_t)p & 15)); - int offset = *(const uint16_t *)(p - 2); - int index = get_slot_index(p); - if (p[-4]) { + const unsigned char *untagged = untag((void *)p); + int offset = *(const uint16_t *)(untagged - 2); + int index = get_slot_index(untagged); + if (untagged[-4]) { assert(!offset); - offset = *(uint32_t *)(p - 8); + offset = *(uint32_t *)(untagged - 8); assert(offset > 0xffff); } - const struct group *base = (const void *)(p - UNIT*offset - UNIT); + const struct group *base = (const void *)(untagged - UNIT*offset - UNIT); const struct meta *meta = base->meta; assert(meta->mem == base); assert(index <= meta->last_idx); @@ -199,10 +245,11 @@ static inline void *enframe(struct meta *g, int idx, size_t n, int ctr) size_t slack = (stride-IB-n)/UNIT; unsigned char *p = g->mem->storage + stride*idx; unsigned char *end = p+stride-IB; + unsigned char *untagged = untag(p); // cycle offset within slot to increase interval to address // reuse, facilitate trapping double-free. - int off = (p[-3] ? *(uint16_t *)(p-2) + 1 : ctr) & 255; - assert(!p[-4]); + int off = (untagged[-3] ? *(uint16_t *)(untagged-2) + 1 : ctr) & 255; + assert(!untagged[-4]); if (off > slack) { size_t m = slack; m |= m>>1; m |= m>>2; m |= m>>4; @@ -213,21 +260,27 @@ static inline void *enframe(struct meta *g, int idx, size_t n, int ctr) if (off) { // store offset in unused header at offset zero // if enframing at non-zero offset. - *(uint16_t *)(p-2) = off; - p[-3] = 7<<5; + *(uint16_t *)(untagged-2) = off; + untagged[-3] = 7<<5; p += UNIT*off; + untagged += UNIT*off; // for nonzero offset there is no permanent check // byte, so make one. - p[-4] = 0; + untagged[-4] = 0; } - *(uint16_t *)(p-2) = (size_t)(p-g->mem->storage)/UNIT; - p[-3] = idx; - set_size(p, end, n); + *(uint16_t *)(untagged-2) = (size_t)(untagged-g->mem->storage)/UNIT; + untagged[-3] = idx; + set_size(untagged, end, n); + return p; } static inline int size_to_class(size_t n) { +#ifdef MEMTAG + n = ALIGN_UP(n, 16); +#endif + n = (n+IB-1)>>4; if (n<10) return n; n++; diff --git a/src/malloc/mallocng/realloc.c b/src/malloc/mallocng/realloc.c index 18769f42..a22b8226 100644 --- a/src/malloc/mallocng/realloc.c +++ b/src/malloc/mallocng/realloc.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#include #include #include #include @@ -9,20 +10,31 @@ void *realloc(void *p, size_t n) if (!p) return malloc(n); if (size_overflows(n)) return 0; +#ifdef MEMTAG + size_t required_size = ALIGN_UP(n, 16); +#else + size_t required_size = n; +#endif + unsigned char *untagged = untag(p); struct meta *g = get_meta(p); - int idx = get_slot_index(p); + int idx = get_slot_index(untagged); size_t stride = get_stride(g); unsigned char *start = g->mem->storage + stride*idx; unsigned char *end = start + stride - IB; - size_t old_size = get_nominal_size(p, end); - size_t avail_size = end-(unsigned char *)p; + size_t old_size = get_nominal_size(untagged, end); + size_t avail_size = end-(unsigned char *)untagged; void *new; // only resize in-place if size class matches - if (n <= avail_size && n= g->sizeclass) { - set_size(p, end, n); - return p; + + // zero-tag the free space left + untag_region(untagged, n, old_size); + + set_size(untagged, end, required_size); + + return tag_region(p, n); } // use mremap if old and new size are both mmap-worthy -- 2.43.0