From 1d43cec30cc9b140985010e507155026b974ac47 Mon Sep 17 00:00:00 2001 From: oreo639 <31916379+Oreo639@users.noreply.github.com> Date: Wed, 22 Jun 2022 15:12:12 -0700 Subject: [PATCH] musl: update realpath/readlink and add gettid --- srcpkgs/musl/patches/gettid.patch | 49 ++++ ...tly_instead_of_using_procfs_readlink.patch | 219 ++++++++++++++++++ ...in_readlink_syscall_with_zero_buffer.patch | 59 +++++ srcpkgs/musl/template | 2 +- 4 files changed, 328 insertions(+), 1 deletion(-) create mode 100644 srcpkgs/musl/patches/gettid.patch create mode 100644 srcpkgs/musl/patches/implement_realpath_directly_instead_of_using_procfs_readlink.patch create mode 100644 srcpkgs/musl/patches/work_around_linux_bug_in_readlink_syscall_with_zero_buffer.patch diff --git a/srcpkgs/musl/patches/gettid.patch b/srcpkgs/musl/patches/gettid.patch new file mode 100644 index 000000000000..bb8bf5905b88 --- /dev/null +++ b/srcpkgs/musl/patches/gettid.patch @@ -0,0 +1,49 @@ +From d49cf07541bb54a5ac7aec1feec8514db33db8ea Mon Sep 17 00:00:00 2001 +From: Rich Felker +Date: Mon, 17 Aug 2020 20:12:53 -0400 +Subject: [PATCH] add gettid function + +this is a prerequisite for addition of other interfaces that use +kernel tids, including futex and SIGEV_THREAD_ID. + +there is some ambiguity as to whether the semantic return type should +be int or pid_t. either way, futex API imposes a contract that the +values fit in int (excluding some upper reserved bits). glibc used +pid_t, so in the interest of not having gratuitous mismatch (the +underlying types are the same anyway), pid_t is used here as well. + +while conceptually this is a syscall, the copy stored in the thread +structure is always valid in all contexts where it's valid to call +libc functions, so it's used to avoid the syscall. +--- + include/unistd.h | 1 + + src/linux/gettid.c | 8 ++++++++ + 2 files changed, 9 insertions(+) + create mode 100644 src/linux/gettid.c + +diff --git a/include/unistd.h b/include/unistd.h +index 7bcbff943..07584a23e 100644 +--- a/include/unistd.h ++++ b/include/unistd.h +@@ -190,6 +190,7 @@ int syncfs(int); + int euidaccess(const char *, int); + int eaccess(const char *, int); + ssize_t copy_file_range(int, off_t *, int, off_t *, size_t, unsigned); ++pid_t gettid(void); + #endif + + #if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE) +diff --git a/src/linux/gettid.c b/src/linux/gettid.c +new file mode 100644 +index 000000000..70767137e +--- /dev/null ++++ b/src/linux/gettid.c +@@ -0,0 +1,8 @@ ++#define _GNU_SOURCE ++#include ++#include "pthread_impl.h" ++ ++pid_t gettid(void) ++{ ++ return __pthread_self()->tid; ++} diff --git a/srcpkgs/musl/patches/implement_realpath_directly_instead_of_using_procfs_readlink.patch b/srcpkgs/musl/patches/implement_realpath_directly_instead_of_using_procfs_readlink.patch new file mode 100644 index 000000000000..569a1f52c7cd --- /dev/null +++ b/srcpkgs/musl/patches/implement_realpath_directly_instead_of_using_procfs_readlink.patch @@ -0,0 +1,219 @@ +From 29ff7599a448232f2527841c2362643d246cee36 Mon Sep 17 00:00:00 2001 +From: Rich Felker +Date: Mon, 30 Nov 2020 12:14:47 -0500 +Subject: [PATCH] implement realpath directly instead of using procfs readlink + +inability to use realpath in chroot/container without procfs access +and at early boot prior to mount of /proc has been an ongoing issue, +and it turns out realpath was one of the last remaining interfaces +that needed procfs for its core functionality. during investigation +while reimplementing, it was determined that there were also serious +problems with the procfs-based implementation. most seriously it was +unsafe on pre-O_PATH kernels, and unlike other places where O_PATH was +used, the unsafety was hard or impossible to fix because O_NOFOLLOW +can't be used (since the whole purpose was to follow symlinks). + +the new implementation is a direct one, performing readlink on each +path component to resolve it. an explicit stack, as opposed to +recursion, is used to represent the remaining components to be +processed. the stack starts out holding just the input string, and +reading a link pushes the link contents onto the stack. + +unlike many other implementations, this one does not call getcwd +initially for relative pathnames. instead it accumulates initial .. +components to be applied to the working directory if the result is +still a relative path. this avoids calling getcwd (which may fail) at +all when symlink traversal will eventually yield an absolute path. it +also doesn't use any form of stat operation; instead it arranges for +readlink to tell it when a non-directory is used in a context where a +directory is needed. this minimizes the number of syscalls needed, +avoids accessing inodes when the directory table suffices, and reduces +the amount of code pulled in for static linking. +--- + src/misc/realpath.c | 159 +++++++++++++++++++++++++++++++++++++------- + 1 file changed, 136 insertions(+), 23 deletions(-) + +diff --git a/src/misc/realpath.c b/src/misc/realpath.c +index d2708e59d..db8b74dc8 100644 +--- a/src/misc/realpath.c ++++ b/src/misc/realpath.c +@@ -1,43 +1,156 @@ + #include + #include +-#include +-#include + #include + #include + #include +-#include "syscall.h" ++ ++static size_t slash_len(const char *s) ++{ ++ const char *s0 = s; ++ while (*s == '/') s++; ++ return s-s0; ++} + + char *realpath(const char *restrict filename, char *restrict resolved) + { +- int fd; +- ssize_t r; +- struct stat st1, st2; +- char buf[15+3*sizeof(int)]; +- char tmp[PATH_MAX]; ++ char stack[PATH_MAX+1]; ++ char output[PATH_MAX]; ++ size_t p, q, l, l0, cnt=0, nup=0; ++ int check_dir=0; + + if (!filename) { + errno = EINVAL; + return 0; + } ++ l = strnlen(filename, sizeof stack); ++ if (!l) { ++ errno = ENOENT; ++ return 0; ++ } ++ if (l >= PATH_MAX) goto toolong; ++ p = sizeof stack - l - 1; ++ q = 0; ++ memcpy(stack+p, filename, l+1); ++ ++ /* Main loop. Each iteration pops the next part from stack of ++ * remaining path components and consumes any slashes that follow. ++ * If not a link, it's moved to output; if a link, contents are ++ * pushed to the stack. */ ++restart: ++ for (; ; p+=slash_len(stack+p)) { ++ /* If stack starts with /, the whole component is / or // ++ * and the output state must be reset. */ ++ if (stack[p] == '/') { ++ check_dir=0; ++ nup=0; ++ q=0; ++ output[q++] = '/'; ++ p++; ++ /* Initial // is special. */ ++ if (stack[p] == '/' && stack[p+1] != '/') ++ output[q++] = '/'; ++ continue; ++ } ++ ++ char *z = __strchrnul(stack+p, '/'); ++ l0 = l = z-(stack+p); + +- fd = sys_open(filename, O_PATH|O_NONBLOCK|O_CLOEXEC); +- if (fd < 0) return 0; +- __procfdname(buf, fd); ++ if (!l && !check_dir) break; + +- r = readlink(buf, tmp, sizeof tmp - 1); +- if (r < 0) goto err; +- tmp[r] = 0; ++ /* Skip any . component but preserve check_dir status. */ ++ if (l==1 && stack[p]=='.') { ++ p += l; ++ continue; ++ } + +- fstat(fd, &st1); +- r = stat(tmp, &st2); +- if (r<0 || st1.st_dev != st2.st_dev || st1.st_ino != st2.st_ino) { +- if (!r) errno = ELOOP; +- goto err; ++ /* Copy next component onto output at least temporarily, to ++ * call readlink, but wait to advance output position until ++ * determining it's not a link. */ ++ if (q && output[q-1] != '/') { ++ if (!p) goto toolong; ++ stack[--p] = '/'; ++ l++; ++ } ++ if (q+l >= PATH_MAX) goto toolong; ++ memcpy(output+q, stack+p, l); ++ output[q+l] = 0; ++ p += l; ++ ++ int up = 0; ++ if (l0==2 && stack[p-2]=='.' && stack[p-1]=='.') { ++ up = 1; ++ /* Any non-.. path components we could cancel start ++ * after nup repetitions of the 3-byte string "../"; ++ * if there are none, accumulate .. components to ++ * later apply to cwd, if needed. */ ++ if (q <= 3*nup) { ++ nup++; ++ q += l; ++ continue; ++ } ++ /* When previous components are already known to be ++ * directories, processing .. can skip readlink. */ ++ if (!check_dir) goto skip_readlink; ++ } ++ ssize_t k = readlink(output, stack, p); ++ if (k==p) goto toolong; ++ if (!k) { ++ errno = ENOENT; ++ return 0; ++ } ++ if (k<0) { ++ if (errno != EINVAL) return 0; ++skip_readlink: ++ check_dir = 0; ++ if (up) { ++ while(q && output[q-1]!='/') q--; ++ if (q>1 && (q>2 || output[0]!='/')) q--; ++ continue; ++ } ++ if (l0) q += l; ++ check_dir = stack[p]; ++ continue; ++ } ++ if (++cnt == SYMLOOP_MAX) { ++ errno = ELOOP; ++ return 0; ++ } ++ ++ /* If link contents end in /, strip any slashes already on ++ * stack to avoid /->// or //->/// or spurious toolong. */ ++ if (stack[k-1]=='/') while (stack[p]=='/') p++; ++ p -= k; ++ memmove(stack+p, stack, k); ++ ++ /* Skip the stack advancement in case we have a new ++ * absolute base path. */ ++ goto restart; + } + +- __syscall(SYS_close, fd); +- return resolved ? strcpy(resolved, tmp) : strdup(tmp); +-err: +- __syscall(SYS_close, fd); ++ output[q] = 0; ++ ++ if (output[0] != '/') { ++ if (!getcwd(stack, sizeof stack)) return 0; ++ l = strlen(stack); ++ /* Cancel any initial .. components. */ ++ p = 0; ++ while (nup--) { ++ while(l>1 && stack[l-1]!='/') l--; ++ if (l>1) l--; ++ p += 2; ++ if (p= PATH_MAX) goto toolong; ++ memmove(output + l, output + p, q - p + 1); ++ memcpy(output, stack, l); ++ q = l + q-p; ++ } ++ ++ if (resolved) return memcpy(resolved, output, q+1); ++ else return strdup(output); ++ ++toolong: ++ errno = ENAMETOOLONG; + return 0; + } diff --git a/srcpkgs/musl/patches/work_around_linux_bug_in_readlink_syscall_with_zero_buffer.patch b/srcpkgs/musl/patches/work_around_linux_bug_in_readlink_syscall_with_zero_buffer.patch new file mode 100644 index 000000000000..3286432ebea2 --- /dev/null +++ b/srcpkgs/musl/patches/work_around_linux_bug_in_readlink_syscall_with_zero_buffer.patch @@ -0,0 +1,59 @@ +From e2fa720be7024cce4fc489f3877476d35da48ee2 Mon Sep 17 00:00:00 2001 +From: Rich Felker +Date: Mon, 23 Nov 2020 19:44:19 -0500 +Subject: [PATCH] work around linux bug in readlink syscall with zero buffer + size + +linux fails with EINVAL when a zero buffer size is passed to the +syscall. this is non-conforming because POSIX already defines EINVAL +with a significantly different meaning: the target is not a symlink. + +since the request is semantically valid, patch it up by using a dummy +buffer of length one, and truncating the return value to zero if it +succeeds. +--- + src/unistd/readlink.c | 11 +++++++++-- + src/unistd/readlinkat.c | 9 ++++++++- + 2 files changed, 17 insertions(+), 3 deletions(-) + +diff --git a/src/unistd/readlink.c b/src/unistd/readlink.c +index a152d5249..32f4537f9 100644 +--- a/src/unistd/readlink.c ++++ b/src/unistd/readlink.c +@@ -4,9 +4,16 @@ + + ssize_t readlink(const char *restrict path, char *restrict buf, size_t bufsize) + { ++ char dummy[1]; ++ if (!bufsize) { ++ buf = dummy; ++ bufsize = 1; ++ } + #ifdef SYS_readlink +- return syscall(SYS_readlink, path, buf, bufsize); ++ int r = __syscall(SYS_readlink, path, buf, bufsize); + #else +- return syscall(SYS_readlinkat, AT_FDCWD, path, buf, bufsize); ++ int r = __syscall(SYS_readlinkat, AT_FDCWD, path, buf, bufsize); + #endif ++ if (buf == dummy && r > 0) r = 0; ++ return __syscall_ret(r); + } +diff --git a/src/unistd/readlinkat.c b/src/unistd/readlinkat.c +index 9af45cd5a..f79d3d142 100644 +--- a/src/unistd/readlinkat.c ++++ b/src/unistd/readlinkat.c +@@ -3,5 +3,12 @@ + + ssize_t readlinkat(int fd, const char *restrict path, char *restrict buf, size_t bufsize) + { +- return syscall(SYS_readlinkat, fd, path, buf, bufsize); ++ char dummy[1]; ++ if (!bufsize) { ++ buf = dummy; ++ bufsize = 1; ++ } ++ int r = __syscall(SYS_readlinkat, fd, path, buf, bufsize); ++ if (buf == dummy && r > 0) r = 0; ++ return __syscall_ret(r); + } diff --git a/srcpkgs/musl/template b/srcpkgs/musl/template index d30c99a563a0..5bdc5c48e623 100644 --- a/srcpkgs/musl/template +++ b/srcpkgs/musl/template @@ -2,7 +2,7 @@ pkgname=musl reverts="1.2.0_1" version=1.1.24 -revision=9 +revision=10 archs="*-musl" bootstrap=yes build_style=gnu-configure