From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=-2.9 required=5.0 tests=DKIM_ADSP_CUSTOM_MED, DKIM_INVALID,DKIM_SIGNED,FREEMAIL_FORGED_FROMDOMAIN,FREEMAIL_FROM, HEADER_FROM_DIFFERENT_DOMAINS,MAILING_LIST_MULTI,RCVD_IN_DNSWL_MED, RCVD_IN_MSPIKE_H4,RCVD_IN_MSPIKE_WL,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.4 Received: from second.openwall.net (second.openwall.net [193.110.157.125]) by inbox.vuxu.org (Postfix) with SMTP id E7DF42B3AC for ; Mon, 10 Jun 2024 14:38:24 +0200 (CEST) Received: (qmail 10188 invoked by uid 550); 10 Jun 2024 12:38:20 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-ID: Reply-To: musl@lists.openwall.com Received: (qmail 10150 invoked from network); 10 Jun 2024 12:38:20 -0000 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1718023091; x=1718627891; darn=lists.openwall.com; h=content-transfer-encoding:mime-version:message-id:date:subject:cc :to:from:from:to:cc:subject:date:message-id:reply-to; bh=u6GCCkXidThRxPdL5EIWe1vor5CXyi3bGbIi+m6aNs8=; b=VntGSRKjZZe5gcTZ+vYOUwmqyh0VYXtW8yQe/I5o5xyMhOOEvjv0IaAVPOhRGC25QD IJc3c/Md2tk+6escDa2XcrqD9f9QLVay03TEH0IppLhvNbtO+UTRE8Il6eWtSPOyVJsv J/I8+ZopCSSliQ16jDAmtJPK4PMsQNATDC5i+wcEASJAKmlWeorZ5FIT5Ij8vPmzI13o 4Q1h/k9PawFTMYHsC1+n1uMwQZHdG63r+pVgaUVOD3Q3qKh4z9bHanWKNAxLhR0KMd0j 6/ZqcIiuhrAvveJAexln2NaZNmU6kDz9zyWrqcyL78+5p/ee4d311+1TijvaS2sEYZoK x49g== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1718023091; x=1718627891; h=content-transfer-encoding:mime-version:message-id:date:subject:cc :to:from:x-gm-message-state:from:to:cc:subject:date:message-id :reply-to; bh=u6GCCkXidThRxPdL5EIWe1vor5CXyi3bGbIi+m6aNs8=; b=RJvgmT4H3WzAZpAuXaKaNbTxp3KMn/FXazxD8yxYY0NVhNr9c6tcpQFaoYnLofIzWw W0KKUplz/3X5+VL+EvEl2dCgehn14D7aKWVlHv6sXrehooJhXEnpBAI8VMNZBZOByeoA 3OhcZUD8pv+C1ZyBK8yev3GSBIMyN6jIUyZXq5xMOV0PJ2+Wd8l0pyZypyBM4HXYJGkT ke1guM+MA3WnYcUyfu34yTnN0GsdOrPyZfZwvlIrFlOmzLdnem0H0YG4IMJ+rn95zAZq PzUY7TIJXbSgKH2Edd6ivcqZXtKYXcbWiqULBCBcPX0g1ggq0A6EpT+7C6wwqdm5shVO j/MA== X-Gm-Message-State: AOJu0YzqZF4zywvN1Dr4W2bYipd8mNf6EEtPgcT8vQCXmQ7uBxexkpNR EwMEQ56usGgqImuHxURDRlcuYLi+QHnyuqJs+6k/hu1uzBkZ3ghA62tTybBp X-Google-Smtp-Source: AGHT+IGmOCgzhVvaUXuPrFG3JQcEPVGhwqg2JPaunobf95PD50X12avVjimCRis3R3YQLCwxihKsTQ== X-Received: by 2002:a17:906:22cf:b0:a6f:df9:6da4 with SMTP id a640c23a62f3a-a6f0df96e7amr331195266b.44.1718023090580; Mon, 10 Jun 2024 05:38:10 -0700 (PDT) From: Stefan Jumarea To: musl@lists.openwall.com Cc: dalias@libc.org, Stefan Jumarea Date: Mon, 10 Jun 2024 15:36:25 +0300 Message-ID: <20240610123624.305051-2-stefanjumarea02@gmail.com> X-Mailer: git-send-email 2.43.0 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [musl] [PATCH] mallocng: Add MTE support for Aarch64 Add support for Memory Tagging Extension. All the memory tagging code is placed within compiler guards, and is enabled by using the `--enable-mte` configure option. The option can only be used if compiling for Aarch64. All the primitives for generating, storing and loading the memory tags are placed in a new header under `arch/aarch64/`. For now, only the actual user data is tagged. All metadata is untagged. Signed-off-by: Stefan Jumarea --- arch/aarch64/mte.h | 44 ++++++++++++++++++++++++++++ configure | 17 +++++++++++ src/malloc/mallocng/aligned_alloc.c | 24 +++++++++------ src/malloc/mallocng/free.c | 28 ++++++++++++++---- src/malloc/mallocng/malloc.c | 18 ++++++++++++ src/malloc/mallocng/meta.h | 45 ++++++++++++++++++++--------- src/malloc/mallocng/realloc.c | 34 ++++++++++++++++++---- 7 files changed, 178 insertions(+), 32 deletions(-) create mode 100644 arch/aarch64/mte.h diff --git a/arch/aarch64/mte.h b/arch/aarch64/mte.h new file mode 100644 index 00000000..35148381 --- /dev/null +++ b/arch/aarch64/mte.h @@ -0,0 +1,44 @@ +#include + +#define MTE_TAG_GRANULE 16 +#define MTE_TAG_MASK (0xFULL << 56) + +static inline uint64_t mte_load_tag(uint64_t addr) +{ + uint64_t tag; + + __asm__ __volatile__ ("ldg %x0, [%x1]\n" + : "=&r"(tag) : "r"(addr)); + + return tag; +} + +static inline void mte_store_tag(uint64_t addr) +{ + __asm__ __volatile__ ("stg %0, [%0]" + : : "r"(addr) : "memory"); +} + +static inline void mte_store_zero_tag(uint64_t addr) +{ + __asm__ __volatile__ ("stzg %x0, [%x0]" + : : "r"(addr) : "memory"); +} + +static inline uint64_t mte_get_exclude_mask(uint64_t addr) +{ + uint64_t reg; + + __asm__ __volatile__("gmi %x0, %x1, xzr\n" + : "=r"(reg) : "r" (addr)); + return reg; +} + +static inline uint64_t mte_insert_random_tag(uint64_t addr, uint64_t mask) +{ + uint64_t reg; + + __asm__ __volatile__("irg %x0, %x2, %x1\n" + : "=r"(reg) : "r" (mask), "r" (addr)); + return reg; +} diff --git a/configure b/configure index bc9fbe48..edcd4911 100755 --- a/configure +++ b/configure @@ -34,6 +34,8 @@ Optional features: --enable-wrapper=... build given musl toolchain wrapper [auto] --disable-shared inhibit building shared library [enabled] --disable-static inhibit building static library [enabled] + --enable-mte build with MTE support [disabled] + only available for aarch64 and mallocng Optional packages: --with-malloc=... choose malloc implementation [mallocng] @@ -139,6 +141,7 @@ debug=no warnings=yes shared=auto static=yes +mte=no wrapper=auto gcc_wrapper=no clang_wrapper=no @@ -158,6 +161,8 @@ case "$arg" in --disable-shared|--enable-shared=no) shared=no ;; --enable-static|--enable-static=yes) static=yes ;; --disable-static|--enable-static=no) static=no ;; +--enable-mte|--enable-mte=yes) mte=yes ;; +--disable-mte|--enable-mte=no) mte=no ;; --enable-optimize) optimize=yes ;; --enable-optimize=*) optimize=${arg#*=} ;; --disable-optimize) optimize=no ;; @@ -790,6 +795,18 @@ if trycppif "__FAST_MATH__" \ fail "$0: error: compiler has broken floating point; check CFLAGS" fi +if test "$mte" = "yes" ; then + printf "Checking whether target architecture supports MTE... " + if test "$ARCH" != "aarch64"; then + printf "no\n" + fail "$0: error: mte only supported with aarch64" + fi + + printf "yes\n" + CFLAGS_AUTO="$CFLAGS_AUTO -DMEMTAG -march=armv8.5-a+memtag" + SUBARCH=${SUBARCH}+memtag +fi + printf "creating config.mak... " cmdline=$(quote "$0") diff --git a/src/malloc/mallocng/aligned_alloc.c b/src/malloc/mallocng/aligned_alloc.c index e0862a83..2205f6bb 100644 --- a/src/malloc/mallocng/aligned_alloc.c +++ b/src/malloc/mallocng/aligned_alloc.c @@ -25,31 +25,37 @@ void *aligned_alloc(size_t align, size_t len) if (!p) return 0; +#ifdef MEMTAG + unsigned char *untagged = (unsigned char *)((uint64_t)p & ~MTE_TAG_MASK); +#else + unsigned char *untagged = p; +#endif struct meta *g = get_meta(p); - int idx = get_slot_index(p); + int idx = get_slot_index(untagged); size_t stride = get_stride(g); unsigned char *start = g->mem->storage + stride*idx; unsigned char *end = g->mem->storage + stride*(idx+1) - IB; size_t adj = -(uintptr_t)p & (align-1); if (!adj) { - set_size(p, end, len); + set_size(untagged, end, len); return p; } p += adj; + untagged += adj; uint32_t offset = (size_t)(p-g->mem->storage)/UNIT; if (offset <= 0xffff) { - *(uint16_t *)(p-2) = offset; - p[-4] = 0; + *(uint16_t *)(untagged-2) = offset; + untagged[-4] = 0; } else { // use a 32-bit offset if 16-bit doesn't fit. for this, // 16-bit field must be zero, [-4] byte nonzero. - *(uint16_t *)(p-2) = 0; - *(uint32_t *)(p-8) = offset; - p[-4] = 1; + *(uint16_t *)(untagged-2) = 0; + *(uint32_t *)(untagged-8) = offset; + untagged[-4] = 1; } - p[-3] = idx; - set_size(p, end, len); + untagged[-3] = idx; + set_size(untagged, end, len); // store offset to aligned enframing. this facilitates cycling // offset and also iteration of heap for debugging/measurement. // for extreme overalignment it won't fit but these are classless diff --git a/src/malloc/mallocng/free.c b/src/malloc/mallocng/free.c index 43f32aad..1a86c8eb 100644 --- a/src/malloc/mallocng/free.c +++ b/src/malloc/mallocng/free.c @@ -25,8 +25,13 @@ static struct mapinfo free_group(struct meta *g) mi.len = g->maplen*4096UL; } else { void *p = g->mem; +#ifdef MEMTAG + unsigned char *untagged = (unsigned char *)((uint64_t)p & ~MTE_TAG_MASK); +#else + unsigned char *untagged = p; +#endif struct meta *m = get_meta(p); - int idx = get_slot_index(p); + int idx = get_slot_index(untagged); g->mem->meta = 0; // not checking size/reserved here; it's intentionally invalid mi = nontrivial_free(m, idx); @@ -102,17 +107,30 @@ void free(void *p) { if (!p) return; +#ifdef MEMTAG + void *untagged = (void *)((uint64_t)p & ~MTE_TAG_MASK); +#else + void *untagged = p; +#endif + struct meta *g = get_meta(p); - int idx = get_slot_index(p); + int idx = get_slot_index(untagged); size_t stride = get_stride(g); unsigned char *start = g->mem->storage + stride*idx; unsigned char *end = start + stride - IB; - get_nominal_size(p, end); +#ifdef MEMTAG + size_t nom_size = get_nominal_size(untagged, end); +#endif uint32_t self = 1u<last_idx)-1; - ((unsigned char *)p)[-3] = 255; + ((unsigned char *)untagged)[-3] = 255; // invalidate offset to group header, and cycle offset of // used region within slot if current offset is zero. - *(uint16_t *)((char *)p-2) = 0; + *(uint16_t *)((char *)untagged-2) = 0; + +#ifdef MEMTAG + for (size_t i = 0; i < nom_size; i += 16) + mte_store_tag((uint64_t)((unsigned char *)untagged + i)); +#endif // release any whole pages contained in the slot to be freed // unless it's a single-slot group that will be unmapped. diff --git a/src/malloc/mallocng/malloc.c b/src/malloc/mallocng/malloc.c index d695ab8e..89294526 100644 --- a/src/malloc/mallocng/malloc.c +++ b/src/malloc/mallocng/malloc.c @@ -298,6 +298,8 @@ static int alloc_slot(int sc, size_t req) void *malloc(size_t n) { + n = ALIGN_UP(n, 16); + if (size_overflows(n)) return 0; struct meta *g; uint32_t mask, first; @@ -310,6 +312,9 @@ void *malloc(size_t n) void *p = mmap(0, needed, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (p==MAP_FAILED) return 0; + + + wrlock(); step_seq(); g = alloc_meta(); @@ -376,7 +381,20 @@ void *malloc(size_t n) success: ctr = ctx.mmap_counter; unlock(); + +#if MEMTAG + void *ptr = enframe(g, idx, n, ctr); + + uint64_t mask_mte = mte_get_exclude_mask((uint64_t)ptr); + uint64_t addr = mte_insert_random_tag((uint64_t)ptr, mask_mte); + + for (size_t i = 0; i < n; i += 16) + mte_store_tag(addr + i); + + return (void *)addr; +#else return enframe(g, idx, n, ctr); +#endif } int is_allzero(void *p) diff --git a/src/malloc/mallocng/meta.h b/src/malloc/mallocng/meta.h index 61ec53f9..f5896fe4 100644 --- a/src/malloc/mallocng/meta.h +++ b/src/malloc/mallocng/meta.h @@ -4,6 +4,9 @@ #include #include #include +#ifdef MEMTAG +#include +#endif #include "glue.h" __attribute__((__visibility__("hidden"))) @@ -14,6 +17,10 @@ extern const uint16_t size_classes[]; #define UNIT 16 #define IB 4 +#ifndef ALIGN_UP +#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) +#endif + struct group { struct meta *meta; unsigned char active_idx:5; @@ -129,14 +136,19 @@ static inline int get_slot_index(const unsigned char *p) static inline struct meta *get_meta(const unsigned char *p) { assert(!((uintptr_t)p & 15)); - int offset = *(const uint16_t *)(p - 2); - int index = get_slot_index(p); - if (p[-4]) { +#ifdef MEMTAG + const unsigned char *untagged = (const unsigned char *)((uint64_t)p & ~MTE_TAG_MASK); +#else + const unsigned char *untagged = p; +#endif + int offset = *(const uint16_t *)(untagged - 2); + int index = get_slot_index(untagged); + if (untagged[-4]) { assert(!offset); - offset = *(uint32_t *)(p - 8); + offset = *(uint32_t *)(untagged - 8); assert(offset > 0xffff); } - const struct group *base = (const void *)(p - UNIT*offset - UNIT); + const struct group *base = (const void *)(untagged - UNIT*offset - UNIT); const struct meta *meta = base->meta; assert(meta->mem == base); assert(index <= meta->last_idx); @@ -199,10 +211,15 @@ static inline void *enframe(struct meta *g, int idx, size_t n, int ctr) size_t slack = (stride-IB-n)/UNIT; unsigned char *p = g->mem->storage + stride*idx; unsigned char *end = p+stride-IB; +#ifdef MEMTAG + unsigned char *untagged = (unsigned char *)((uint64_t)p & ~MTE_TAG_MASK); +#else + unsigned char *untagged = p; +#endif // cycle offset within slot to increase interval to address // reuse, facilitate trapping double-free. - int off = (p[-3] ? *(uint16_t *)(p-2) + 1 : ctr) & 255; - assert(!p[-4]); + int off = (untagged[-3] ? *(uint16_t *)(untagged-2) + 1 : ctr) & 255; + assert(!untagged[-4]); if (off > slack) { size_t m = slack; m |= m>>1; m |= m>>2; m |= m>>4; @@ -213,16 +230,18 @@ static inline void *enframe(struct meta *g, int idx, size_t n, int ctr) if (off) { // store offset in unused header at offset zero // if enframing at non-zero offset. - *(uint16_t *)(p-2) = off; - p[-3] = 7<<5; + *(uint16_t *)(untagged-2) = off; + untagged[-3] = 7<<5; p += UNIT*off; + untagged += UNIT*off; // for nonzero offset there is no permanent check // byte, so make one. - p[-4] = 0; + untagged[-4] = 0; } - *(uint16_t *)(p-2) = (size_t)(p-g->mem->storage)/UNIT; - p[-3] = idx; - set_size(p, end, n); + *(uint16_t *)(untagged-2) = (size_t)(untagged-g->mem->storage)/UNIT; + untagged[-3] = idx; + set_size(untagged, end, n); + return p; } diff --git a/src/malloc/mallocng/realloc.c b/src/malloc/mallocng/realloc.c index 18769f42..0fab0df7 100644 --- a/src/malloc/mallocng/realloc.c +++ b/src/malloc/mallocng/realloc.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#include #include #include #include @@ -6,23 +7,46 @@ void *realloc(void *p, size_t n) { + n = ALIGN_UP(n, 16); if (!p) return malloc(n); if (size_overflows(n)) return 0; +#ifdef MEMTAG + unsigned char *untagged = (unsigned char *)((uint64_t)p & ~MTE_TAG_MASK); +#else + unsigned char *untagged = p; +#endif struct meta *g = get_meta(p); - int idx = get_slot_index(p); + int idx = get_slot_index(untagged); size_t stride = get_stride(g); unsigned char *start = g->mem->storage + stride*idx; unsigned char *end = start + stride - IB; - size_t old_size = get_nominal_size(p, end); - size_t avail_size = end-(unsigned char *)p; + size_t old_size = get_nominal_size(untagged, end); + size_t avail_size = end-(unsigned char *)untagged; void *new; // only resize in-place if size class matches if (n <= avail_size && n= g->sizeclass) { - set_size(p, end, n); - return p; + + uint64_t addr; + +#ifdef MEMTAG + for (size_t i = 0; i < old_size; i += 16) + mte_store_tag((uint64_t)(untagged + i)); + + uint64_t mask_mte = mte_get_exclude_mask((uint64_t)p); + addr = mte_insert_random_tag((uint64_t)p, mask_mte); + + for (size_t i = 0; i < n; i += 16) + mte_store_tag(addr + i); +#else + addr = (uint64_t)p; +#endif + + set_size(untagged, end, n); + + return (void *)addr; } // use mremap if old and new size are both mmap-worthy -- 2.43.0