mailing list of musl libc
 help / color / mirror / code / Atom feed
* [PATCH] Wasm support patch 3 (the actual arch/wasm)
@ 2017-11-28 12:39 Nicholas Wilson
  2017-11-29 20:36 ` Rich Felker
  0 siblings, 1 reply; 3+ messages in thread
From: Nicholas Wilson @ 2017-11-28 12:39 UTC (permalink / raw)
  To: musl

Hi,

Here's the patch with the actual Wasm arch implementation!

It's got a certain amount of boilerplate, mainly in "arch/wasm/bits" where certain structures have to be defined per-architecture. I just copied x32 where possible, since Wasm a new arch and we have free choice of how to define things, and x32 is another modern arch that's like i386 but with some legacy cruft removed.

As you can see, I'm using the "static syscalls" approach. There's a README explaining the approach in "src/internal/wasm", where I explain that most syscalls are to be provided by the embedding environment, but some syscalls like brk can actually be implemented using native Wasm intrinsics, which I've done in "src/internal/wasm".

The result of these three patches (this and the last two emails), is a version of Musl that builds using Clang's Wasm support, and creates executable Wasm modules that can use malloc, do I/O, and all sorts of other things. I haven't tested the full range of syscalls yet, but the ones I have tried work well.

Ready for feedback!

All the best,
Nick

diff --git a/arch/wasm/atomic_arch.h b/arch/wasm/atomic_arch.h
new file mode 100644
index 00000000..9da04059
--- /dev/null
+++ b/arch/wasm/atomic_arch.h
@@ -0,0 +1,74 @@
+#include <stdint.h>
+
+#define a_ctz_l a_ctz_l
+static inline int a_ctz_l(unsigned long x)
+{
+  return __builtin_ctzl(x);
+}
+
+#define a_ctz_64 a_ctz_64
+static inline int a_ctz_64(uint64_t x)
+{
+  return __builtin_ctzll(x);
+}
+
+#define a_and_64 a_and_64
+static inline void a_and_64(volatile uint64_t *p, uint64_t v)
+{
+  // TODO use a WebAssembly CAS builtin, when those arrive with the threads feature
+  //__atomic_fetch_and(p, v, __ATOMIC_SEQ_CST);
+  *p &= v;
+}
+
+#define a_or_64 a_or_64
+static inline void a_or_64(volatile uint64_t *p, uint64_t v)
+{
+  // TODO use a WebAssembly CAS builtin, when those arrive with the threads feature
+  //__atomic_fetch_or(p, v, __ATOMIC_SEQ_CST);
+  *p |= v;
+}
+
+#define a_cas a_cas
+static inline int a_cas(volatile int *p, int t, int s)
+{
+  // TODO use a WebAssembly CAS builtin, when those arrive with the threads feature
+  //__atomic_compare_exchange_n(p, &t, s, 0, __ATOMIC_SEQ_CST,
+  //                            __ATOMIC_SEQ_CST);
+  //return t;
+  int old = *p;
+  if (old == t)
+    *p = s;
+  return old;
+}
+
+#define a_swap a_swap
+static inline int a_swap(volatile int *p, int v)
+{
+  // TODO use a WebAssembly CAS builtin, when those arrive with the threads feature
+  //return __atomic_exchange_n(p, v, __ATOMIC_SEQ_CST);
+  int old = *p;
+  *p = v;
+  return old;
+}
+
+#define a_store a_store
+static inline void a_store(volatile int *p, int x)
+{
+  // TODO use a WebAssembly CAS builtin, when those arrive with the threads feature
+  //__atomic_store(p, x, __ATOMIC_RELEASE);
+  *p = x;
+}
+
+#define a_barrier a_barrier
+static inline void a_barrier()
+{
+  // TODO use a WebAssembly CAS builtin, when those arrive with the threads feature
+  //__atomic_thread_fence(__ATOMIC_SEQ_CST);
+}
+
+#define a_crash a_crash
+static inline void a_crash()
+{
+  // This generates the Wasm "unreachable" instruction which traps when reached
+  __builtin_unreachable();
+}
diff --git a/arch/wasm/bits/alltypes.h.in b/arch/wasm/bits/alltypes.h.in
new file mode 100644
index 00000000..ca4e94a2
--- /dev/null
+++ b/arch/wasm/bits/alltypes.h.in
@@ -0,0 +1,33 @@
+#ifdef __wasm32__
+#define _Addr int
+#define _Int64 long long
+#define _Reg int
+
+#elif defined __wasm64__
+#define _Addr long
+#define _Int64 long long
+#define _Reg long
+
+#endif
+
+TYPEDEF __builtin_va_list va_list;
+TYPEDEF __builtin_va_list __isoc_va_list;
+
+#ifndef __cplusplus
+TYPEDEF __WCHAR_TYPE__ wchar_t;
+#endif
+TYPEDEF __WINT_TYPE__ wint_t;
+
+TYPEDEF float float_t;
+TYPEDEF double double_t;
+
+TYPEDEF long time_t;
+TYPEDEF long suseconds_t;
+
+TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
+TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
+TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
+TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
+TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
+TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
+TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;
diff --git a/arch/wasm/bits/float.h b/arch/wasm/bits/float.h
new file mode 100644
index 00000000..9a56ad14
--- /dev/null
+++ b/arch/wasm/bits/float.h
@@ -0,0 +1,16 @@
+#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
+
+#define LDBL_TRUE_MIN __LDBL_DENORM_MIN__
+#define LDBL_MIN __LDBL_MIN__
+#define LDBL_MAX __LDBL_MAX__
+#define LDBL_EPSILON __LDBL_EPSILON__
+
+#define LDBL_MANT_DIG __LDBL_MANT_DIG__
+#define LDBL_MIN_EXP __LDBL_MIN_EXP__
+#define LDBL_MAX_EXP __LDBL_MAX_EXP__
+
+#define LDBL_DIG __LDBL_DIG__
+#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__
+#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__
+
+#define DECIMAL_DIG __DECIMAL_DIG__
diff --git a/arch/wasm/bits/limits.h b/arch/wasm/bits/limits.h
new file mode 100644
index 00000000..649d7d74
--- /dev/null
+++ b/arch/wasm/bits/limits.h
@@ -0,0 +1,9 @@
+#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
+ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+// The WebAssembly fixed page size is 64KiB
+#define PAGE_SIZE 65536
+#define LONG_BIT (__SIZEOF_LONG__*8)
+#endif
+
+#define LONG_MAX  __LONG_MAX__
+#define LLONG_MAX __LONG_LONG_MAX__
diff --git a/arch/wasm/bits/posix.h b/arch/wasm/bits/posix.h
new file mode 100644
index 00000000..c37b94c1
--- /dev/null
+++ b/arch/wasm/bits/posix.h
@@ -0,0 +1,2 @@
+#define _POSIX_V6_LP64_OFF64  1
+#define _POSIX_V7_LP64_OFF64  1
diff --git a/arch/wasm/bits/setjmp.h b/arch/wasm/bits/setjmp.h
new file mode 100644
index 00000000..a9262a64
--- /dev/null
+++ b/arch/wasm/bits/setjmp.h
@@ -0,0 +1 @@
+typedef unsigned long long __jmp_buf[8];
diff --git a/arch/wasm/bits/signal.h b/arch/wasm/bits/signal.h
new file mode 100644
index 00000000..c8d10a81
--- /dev/null
+++ b/arch/wasm/bits/signal.h
@@ -0,0 +1,77 @@
+#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
+ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+
+#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+#define MINSIGSTKSZ 2048
+#define SIGSTKSZ 8192
+#endif
+
+// I'm not expecting any of this to be actually usable... these definitions are
+// just the bare minimum so that src/signal/*.c compiles.
+
+typedef struct {
+	unsigned long long __ip_dummy;
+} mcontext_t;
+
+struct sigaltstack {
+	void *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+};
+
+typedef struct __ucontext {
+	unsigned long uc_flags;
+	struct __ucontext *uc_link;
+//	stack_t uc_stack;
+	mcontext_t uc_mcontext;
+	sigset_t uc_sigmask;
+//	unsigned long long __fpregs_mem[64];
+} ucontext_t;
+
+#define SA_NOCLDSTOP  1
+#define SA_NOCLDWAIT  2
+#define SA_SIGINFO    4
+#define SA_ONSTACK    0x08000000
+#define SA_RESTART    0x10000000
+#define SA_NODEFER    0x40000000
+#define SA_RESETHAND  0x80000000
+#define SA_RESTORER   0x04000000
+
+#endif
+
+#define SIGHUP    1
+#define SIGINT    2
+#define SIGQUIT   3
+#define SIGILL    4
+#define SIGTRAP   5
+#define SIGABRT   6
+#define SIGIOT    SIGABRT
+#define SIGBUS    7
+#define SIGFPE    8
+#define SIGKILL   9
+#define SIGUSR1   10
+#define SIGSEGV   11
+#define SIGUSR2   12
+#define SIGPIPE   13
+#define SIGALRM   14
+#define SIGTERM   15
+#define SIGSTKFLT 16
+#define SIGCHLD   17
+#define SIGCONT   18
+#define SIGSTOP   19
+#define SIGTSTP   20
+#define SIGTTIN   21
+#define SIGTTOU   22
+#define SIGURG    23
+#define SIGXCPU   24
+#define SIGXFSZ   25
+#define SIGVTALRM 26
+#define SIGPROF   27
+#define SIGWINCH  28
+#define SIGIO     29
+#define SIGPOLL   29
+#define SIGPWR    30
+#define SIGSYS    31
+#define SIGUNUSED SIGSYS
+
+#define _NSIG 65
diff --git a/arch/wasm/bits/stat.h b/arch/wasm/bits/stat.h
new file mode 100644
index 00000000..16422333
--- /dev/null
+++ b/arch/wasm/bits/stat.h
@@ -0,0 +1,21 @@
+// Use the x32 structures for Wasm
+
+struct stat {
+	dev_t st_dev;
+	ino_t st_ino;
+	nlink_t st_nlink;
+
+	mode_t st_mode;
+	uid_t st_uid;
+	gid_t st_gid;
+	unsigned int    __pad0;
+	dev_t st_rdev;
+	off_t st_size;
+	blksize_t st_blksize;
+	blkcnt_t st_blocks;
+
+	struct timespec st_atim;
+	struct timespec st_mtim;
+	struct timespec st_ctim;
+	long long __unused[3];
+};
diff --git a/arch/wasm/bits/stdint.h b/arch/wasm/bits/stdint.h
new file mode 100644
index 00000000..12778594
--- /dev/null
+++ b/arch/wasm/bits/stdint.h
@@ -0,0 +1,31 @@
+typedef int32_t int_fast16_t;
+typedef int32_t int_fast32_t;
+typedef uint32_t uint_fast16_t;
+typedef uint32_t uint_fast32_t;
+
+#define INT_FAST16_MIN  INT32_MIN
+#define INT_FAST32_MIN  INT32_MIN
+
+#define INT_FAST16_MAX  INT32_MAX
+#define INT_FAST32_MAX  INT32_MAX
+
+#define UINT_FAST16_MAX UINT32_MAX
+#define UINT_FAST32_MAX UINT32_MAX
+
+#ifdef __wasm32__
+#define INTPTR_MIN      INT32_MIN
+#define INTPTR_MAX      INT32_MAX
+#define UINTPTR_MAX     UINT32_MAX
+#define PTRDIFF_MIN     INT32_MIN
+#define PTRDIFF_MAX     INT32_MAX
+#define SIZE_MAX        UINT32_MAX
+
+#elif defined __wasm64__
+#define INTPTR_MIN      INT64_MIN
+#define INTPTR_MAX      INT64_MAX
+#define UINTPTR_MAX     UINT64_MAX
+#define PTRDIFF_MIN     INT64_MIN
+#define PTRDIFF_MAX     INT64_MAX
+#define SIZE_MAX        UINT64_MAX
+
+#endif
diff --git a/arch/wasm/bits/syscall.h.in b/arch/wasm/bits/syscall.h.in
new file mode 100644
index 00000000..00c31889
--- /dev/null
+++ b/arch/wasm/bits/syscall.h.in
@@ -0,0 +1,321 @@
+// For Wasm, we don't use syscall numbers!  We statically link in only the
+// syscalls which are invoked.
+
+#define SYS_accept4 syscall_accept4
+#define SYS_accept syscall_accept
+#define SYS_access syscall_access
+#define SYS_acct syscall_acct
+#define SYS_add_key syscall_add_key
+#define SYS_adjtimex syscall_adjtimex
+#define SYS_afs_syscall syscall_afs_syscall
+#define SYS_alarm syscall_alarm
+#define SYS_arch_prctl syscall_arch_prctl
+#define SYS_bind syscall_bind
+#define SYS_bpf syscall_bpf
+#define SYS_brk syscall_brk
+#define SYS_capget syscall_capget
+#define SYS_capset syscall_capset
+#define SYS_chdir syscall_chdir
+#define SYS_chmod syscall_chmod
+#define SYS_chown syscall_chown
+#define SYS_chroot syscall_chroot
+#define SYS_clock_adjtime syscall_clock_adjtime
+#define SYS_clock_getres syscall_clock_getres
+#define SYS_clock_gettime syscall_clock_gettime
+#define SYS_clock_nanosleep syscall_clock_nanosleep
+#define SYS_clock_settime syscall_clock_settime
+#define SYS_clone syscall_clone
+#define SYS_close syscall_close
+#define SYS_connect syscall_connect
+#define SYS_copy_file_range syscall_copy_file_range
+#define SYS_creat syscall_creat
+#define SYS_delete_module syscall_delete_module
+#define SYS_dup2 syscall_dup2
+#define SYS_dup3 syscall_dup3
+#define SYS_dup syscall_dup
+#define SYS_epoll_create1 syscall_epoll_create1
+#define SYS_epoll_create syscall_epoll_create
+#define SYS_epoll_ctl syscall_epoll_ctl
+#define SYS_epoll_pwait syscall_epoll_pwait
+#define SYS_epoll_wait syscall_epoll_wait
+#define SYS_eventfd2 syscall_eventfd2
+#define SYS_eventfd syscall_eventfd
+#define SYS_execveat syscall_execveat
+#define SYS_execve syscall_execve
+#define SYS_exit syscall_exit
+#define SYS_exit_group syscall_exit_group
+#define SYS_faccessat syscall_faccessat
+#define SYS_fadvise64 syscall_fadvise64
+#define SYS_fallocate syscall_fallocate
+#define SYS_fanotify_init syscall_fanotify_init
+#define SYS_fanotify_mark syscall_fanotify_mark
+#define SYS_fchdir syscall_fchdir
+#define SYS_fchmodat syscall_fchmodat
+#define SYS_fchmod syscall_fchmod
+#define SYS_fchownat syscall_fchownat
+#define SYS_fchown syscall_fchown
+#define SYS_fcntl syscall_fcntl
+#define SYS_fdatasync syscall_fdatasync
+#define SYS_fgetxattr syscall_fgetxattr
+#define SYS_finit_module syscall_finit_module
+#define SYS_flistxattr syscall_flistxattr
+#define SYS_flock syscall_flock
+#define SYS_fork syscall_fork
+#define SYS_fremovexattr syscall_fremovexattr
+#define SYS_fsetxattr syscall_fsetxattr
+#define SYS_fstatfs syscall_fstatfs
+#define SYS_fstat syscall_fstat
+#define SYS_fsync syscall_fsync
+#define SYS_ftruncate syscall_ftruncate
+#define SYS_futex syscall_futex
+#define SYS_futimesat syscall_futimesat
+#define SYS_getcpu syscall_getcpu
+#define SYS_getcwd syscall_getcwd
+#define SYS_getdents64 syscall_getdents64
+#define SYS_getdents syscall_getdents
+#define SYS_getegid syscall_getegid
+#define SYS_geteuid syscall_geteuid
+#define SYS_getgid syscall_getgid
+#define SYS_getgroups syscall_getgroups
+#define SYS_getitimer syscall_getitimer
+#define SYS_get_mempolicy syscall_get_mempolicy
+#define SYS_getpeername syscall_getpeername
+#define SYS_getpgid syscall_getpgid
+#define SYS_getpgrp syscall_getpgrp
+#define SYS_getpid syscall_getpid
+#define SYS_getpmsg syscall_getpmsg
+#define SYS_getppid syscall_getppid
+#define SYS_getpriority syscall_getpriority
+#define SYS_getrandom syscall_getrandom
+#define SYS_getresgid syscall_getresgid
+#define SYS_getresuid syscall_getresuid
+#define SYS_getrlimit syscall_getrlimit
+#define SYS_get_robust_list syscall_get_robust_list
+#define SYS_getrusage syscall_getrusage
+#define SYS_getsid syscall_getsid
+#define SYS_getsockname syscall_getsockname
+#define SYS_getsockopt syscall_getsockopt
+#define SYS_gettid syscall_gettid
+#define SYS_gettimeofday syscall_gettimeofday
+#define SYS_getuid syscall_getuid
+#define SYS_getxattr syscall_getxattr
+#define SYS_init_module syscall_init_module
+#define SYS_inotify_add_watch syscall_inotify_add_watch
+#define SYS_inotify_init1 syscall_inotify_init1
+#define SYS_inotify_init syscall_inotify_init
+#define SYS_inotify_rm_watch syscall_inotify_rm_watch
+#define SYS_io_cancel syscall_io_cancel
+#define SYS_ioctl syscall_ioctl
+#define SYS_io_destroy syscall_io_destroy
+#define SYS_io_getevents syscall_io_getevents
+#define SYS_ioperm syscall_ioperm
+#define SYS_iopl syscall_iopl
+#define SYS_ioprio_get syscall_ioprio_get
+#define SYS_ioprio_set syscall_ioprio_set
+#define SYS_io_setup syscall_io_setup
+#define SYS_io_submit syscall_io_submit
+#define SYS_kcmp syscall_kcmp
+#define SYS_kexec_file_load syscall_kexec_file_load
+#define SYS_kexec_load syscall_kexec_load
+#define SYS_keyctl syscall_keyctl
+#define SYS_kill syscall_kill
+#define SYS_lchown syscall_lchown
+#define SYS_lgetxattr syscall_lgetxattr
+#define SYS_linkat syscall_linkat
+#define SYS_link syscall_link
+#define SYS_listen syscall_listen
+#define SYS_listxattr syscall_listxattr
+#define SYS_llistxattr syscall_llistxattr
+#define SYS_lookup_dcookie syscall_lookup_dcookie
+#define SYS_lremovexattr syscall_lremovexattr
+#define SYS_lseek syscall_lseek
+#define SYS_lsetxattr syscall_lsetxattr
+#define SYS_lstat syscall_lstat
+#define SYS_madvise syscall_madvise
+#define SYS_mbind syscall_mbind
+#define SYS_membarrier syscall_membarrier
+#define SYS_memfd_create syscall_memfd_create
+#define SYS_migrate_pages syscall_migrate_pages
+#define SYS_mincore syscall_mincore
+#define SYS_mkdirat syscall_mkdirat
+#define SYS_mkdir syscall_mkdir
+#define SYS_mknodat syscall_mknodat
+#define SYS_mknod syscall_mknod
+#define SYS_mlock2 syscall_mlock2
+#define SYS_mlockall syscall_mlockall
+#define SYS_mlock syscall_mlock
+#define SYS_mmap syscall_mmap
+#define SYS_modify_ldt syscall_modify_ldt
+#define SYS_mount syscall_mount
+#define SYS_move_pages syscall_move_pages
+#define SYS_mprotect syscall_mprotect
+#define SYS_mq_getsetattr syscall_mq_getsetattr
+#define SYS_mq_notify syscall_mq_notify
+#define SYS_mq_open syscall_mq_open
+#define SYS_mq_timedreceive syscall_mq_timedreceive
+#define SYS_mq_timedsend syscall_mq_timedsend
+#define SYS_mq_unlink syscall_mq_unlink
+#define SYS_mremap syscall_mremap
+#define SYS_msgctl syscall_msgctl
+#define SYS_msgget syscall_msgget
+#define SYS_msgrcv syscall_msgrcv
+#define SYS_msgsnd syscall_msgsnd
+#define SYS_msync syscall_msync
+#define SYS_munlockall syscall_munlockall
+#define SYS_munlock syscall_munlock
+#define SYS_munmap syscall_munmap
+#define SYS_name_to_handle_at syscall_name_to_handle_at
+#define SYS_nanosleep syscall_nanosleep
+#define SYS_newfstatat syscall_newfstatat
+#define SYS_openat syscall_openat
+#define SYS_open_by_handle_at syscall_open_by_handle_at
+#define SYS_open syscall_open
+#define SYS_pause syscall_pause
+#define SYS_perf_event_open syscall_perf_event_open
+#define SYS_personality syscall_personality
+#define SYS_pipe2 syscall_pipe2
+#define SYS_pipe syscall_pipe
+#define SYS_pivot_root syscall_pivot_root
+#define SYS_poll syscall_poll
+#define SYS_ppoll syscall_ppoll
+#define SYS_prctl syscall_prctl
+#define SYS_pread64 syscall_pread64
+#define SYS_preadv2 syscall_preadv2
+#define SYS_preadv syscall_preadv
+#define SYS_prlimit64 syscall_prlimit64
+#define SYS_process_vm_readv syscall_process_vm_readv
+#define SYS_process_vm_writev syscall_process_vm_writev
+#define SYS_pselect6 syscall_pselect6
+#define SYS_ptrace syscall_ptrace
+#define SYS_putpmsg syscall_putpmsg
+#define SYS_pwrite64 syscall_pwrite64
+#define SYS_pwritev2 syscall_pwritev2
+#define SYS_pwritev syscall_pwritev
+#define SYS_quotactl syscall_quotactl
+#define SYS_readahead syscall_readahead
+#define SYS_readlinkat syscall_readlinkat
+#define SYS_readlink syscall_readlink
+#define SYS_read syscall_read
+#define SYS_readv syscall_readv
+#define SYS_reboot syscall_reboot
+#define SYS_recvfrom syscall_recvfrom
+#define SYS_recvmmsg syscall_recvmmsg
+#define SYS_recvmsg syscall_recvmsg
+#define SYS_remap_file_pages syscall_remap_file_pages
+#define SYS_removexattr syscall_removexattr
+#define SYS_renameat2 syscall_renameat2
+#define SYS_renameat syscall_renameat
+#define SYS_rename syscall_rename
+#define SYS_request_key syscall_request_key
+#define SYS_restart_syscall syscall_restart_syscall
+#define SYS_rmdir syscall_rmdir
+#define SYS_rt_sigaction syscall_rt_sigaction
+#define SYS_rt_sigpending syscall_rt_sigpending
+#define SYS_rt_sigprocmask syscall_rt_sigprocmask
+#define SYS_rt_sigqueueinfo syscall_rt_sigqueueinfo
+#define SYS_rt_sigreturn syscall_rt_sigreturn
+#define SYS_rt_sigsuspend syscall_rt_sigsuspend
+#define SYS_rt_sigtimedwait syscall_rt_sigtimedwait
+#define SYS_rt_tgsigqueueinfo syscall_rt_tgsigqueueinfo
+#define SYS_sched_getaffinity syscall_sched_getaffinity
+#define SYS_sched_getattr syscall_sched_getattr
+#define SYS_sched_getparam syscall_sched_getparam
+#define SYS_sched_get_priority_max syscall_sched_get_priority_max
+#define SYS_sched_get_priority_min syscall_sched_get_priority_min
+#define SYS_sched_getscheduler syscall_sched_getscheduler
+#define SYS_sched_rr_get_interval syscall_sched_rr_get_interval
+#define SYS_sched_setaffinity syscall_sched_setaffinity
+#define SYS_sched_setattr syscall_sched_setattr
+#define SYS_sched_setparam syscall_sched_setparam
+#define SYS_sched_setscheduler syscall_sched_setscheduler
+#define SYS_sched_yield syscall_sched_yield
+#define SYS_seccomp syscall_seccomp
+#define SYS_security syscall_security
+#define SYS_select syscall_select
+#define SYS_semctl syscall_semctl
+#define SYS_semget syscall_semget
+#define SYS_semop syscall_semop
+#define SYS_semtimedop syscall_semtimedop
+#define SYS_sendfile syscall_sendfile
+#define SYS_sendmmsg syscall_sendmmsg
+#define SYS_sendmsg syscall_sendmsg
+#define SYS_sendto syscall_sendto
+#define SYS_setdomainname syscall_setdomainname
+#define SYS_setfsgid syscall_setfsgid
+#define SYS_setfsuid syscall_setfsuid
+#define SYS_setgid syscall_setgid
+#define SYS_setgroups syscall_setgroups
+#define SYS_sethostname syscall_sethostname
+#define SYS_setitimer syscall_setitimer
+#define SYS_set_mempolicy syscall_set_mempolicy
+#define SYS_setns syscall_setns
+#define SYS_setpgid syscall_setpgid
+#define SYS_setpriority syscall_setpriority
+#define SYS_setregid syscall_setregid
+#define SYS_setresgid syscall_setresgid
+#define SYS_setresuid syscall_setresuid
+#define SYS_setreuid syscall_setreuid
+#define SYS_setrlimit syscall_setrlimit
+#define SYS_set_robust_list syscall_set_robust_list
+#define SYS_setsid syscall_setsid
+#define SYS_setsockopt syscall_setsockopt
+#define SYS_set_tid_address syscall_set_tid_address
+#define SYS_settimeofday syscall_settimeofday
+#define SYS_setuid syscall_setuid
+#define SYS_setxattr syscall_setxattr
+#define SYS_shmat syscall_shmat
+#define SYS_shmctl syscall_shmctl
+#define SYS_shmdt syscall_shmdt
+#define SYS_shmget syscall_shmget
+#define SYS_shutdown syscall_shutdown
+#define SYS_sigaltstack syscall_sigaltstack
+#define SYS_signalfd4 syscall_signalfd4
+#define SYS_signalfd syscall_signalfd
+#define SYS_socketpair syscall_socketpair
+#define SYS_socket syscall_socket
+#define SYS_splice syscall_splice
+#define SYS_statfs syscall_statfs
+#define SYS_stat syscall_stat
+#define SYS_swapoff syscall_swapoff
+#define SYS_swapon syscall_swapon
+#define SYS_symlinkat syscall_symlinkat
+#define SYS_symlink syscall_symlink
+#define SYS_sync_file_range syscall_sync_file_range
+#define SYS_syncfs syscall_syncfs
+#define SYS_sync syscall_sync
+#define SYS_sysfs syscall_sysfs
+#define SYS_sysinfo syscall_sysinfo
+#define SYS_syslog syscall_syslog
+#define SYS_tee syscall_tee
+#define SYS_tgkill syscall_tgkill
+#define SYS_timer_create syscall_timer_create
+#define SYS_timer_delete syscall_timer_delete
+#define SYS_timerfd_create syscall_timerfd_create
+#define SYS_timerfd_gettime syscall_timerfd_gettime
+#define SYS_timerfd_settime syscall_timerfd_settime
+#define SYS_timer_getoverrun syscall_timer_getoverrun
+#define SYS_timer_gettime syscall_timer_gettime
+#define SYS_timer_settime syscall_timer_settime
+#define SYS_times syscall_times
+#define SYS_time syscall_time
+#define SYS_tkill syscall_tkill
+#define SYS_truncate syscall_truncate
+#define SYS_tuxcall syscall_tuxcall
+#define SYS_umask syscall_umask
+#define SYS_umount2 syscall_umount2
+#define SYS_uname syscall_uname
+#define SYS_unlinkat syscall_unlinkat
+#define SYS_unlink syscall_unlink
+#define SYS_unshare syscall_unshare
+#define SYS_userfaultfd syscall_userfaultfd
+#define SYS_ustat syscall_ustat
+#define SYS_utimensat syscall_utimensat
+#define SYS_utimes syscall_utimes
+#define SYS_utime syscall_utime
+#define SYS_vfork syscall_vfork
+#define SYS_vhangup syscall_vhangup
+#define SYS_vmsplice syscall_vmsplice
+#define SYS_wait4 syscall_wait4
+#define SYS_waitid syscall_waitid
+#define SYS_writev syscall_writev
+#define SYS_write syscall_write
diff --git a/arch/wasm/crt_arch.h b/arch/wasm/crt_arch.h
new file mode 100644
index 00000000..e69de29b
diff --git a/arch/wasm/pthread_arch.h b/arch/wasm/pthread_arch.h
new file mode 100644
index 00000000..75aac668
--- /dev/null
+++ b/arch/wasm/pthread_arch.h
@@ -0,0 +1,5 @@
+static inline struct pthread *__pthread_self(void) { return pthread_self(); }
+
+#define TP_ADJ(p) (p)
+
+#define MC_PC __ip_dummy
diff --git a/arch/wasm/reloc.h b/arch/wasm/reloc.h
new file mode 100644
index 00000000..eca58d9b
--- /dev/null
+++ b/arch/wasm/reloc.h
@@ -0,0 +1 @@
+#define CRTJMP(pc,sp) __builtin_unreachable()
\ No newline at end of file
diff --git a/arch/wasm/syscall_arch.h b/arch/wasm/syscall_arch.h
new file mode 100644
index 00000000..41baf881
--- /dev/null
+++ b/arch/wasm/syscall_arch.h
@@ -0,0 +1,333 @@
+// For both Wasm32 and Wasm64, we assume that the host environment can't
+// provide 64-bit types, and split 64-bit values into two arguments.  A Wasm
+// interpreter *could* provide i64 support, but in practice web browsers aren't
+// doing that right now, and any i64 we pass out to a syscall will get chopped
+// to 58-bit precision.
+#define __SYSCALL_LL_E(x) \
+((union { long long ll; long l[2]; }){ .ll = x }).l[0], \
+((union { long long ll; long l[2]; }){ .ll = x }).l[1]
+#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
+
+long __syscall_accept4(long arg1, ...);
+long __syscall_accept(long arg1, ...);
+long __syscall_access(long arg1, ...);
+long __syscall_acct(long arg1, ...);
+long __syscall_add_key(long arg1, ...);
+long __syscall_adjtimex(long arg1, ...);
+long __syscall_afs_syscall(long arg1, ...);
+long __syscall_alarm(long arg1, ...);
+long __syscall_arch_prctl(long arg1, ...);
+long __syscall_bind(long arg1, ...);
+long __syscall_bpf(long arg1, ...);
+long __syscall_brk(long arg1, ...);
+long __syscall_capget(long arg1, ...);
+long __syscall_capset(long arg1, ...);
+long __syscall_chdir(long arg1, ...);
+long __syscall_chmod(long arg1, ...);
+long __syscall_chown(long arg1, ...);
+long __syscall_chroot(long arg1, ...);
+long __syscall_clock_adjtime(long arg1, ...);
+long __syscall_clock_getres(long arg1, ...);
+long __syscall_clock_gettime(long arg1, ...);
+long __syscall_clock_nanosleep(long arg1, ...);
+long __syscall_clock_settime(long arg1, ...);
+long __syscall_clone(long arg1, ...);
+long __syscall_close(long arg1, ...);
+long __syscall_connect(long arg1, ...);
+long __syscall_copy_file_range(long arg1, ...);
+long __syscall_creat(long arg1, ...);
+long __syscall_delete_module(long arg1, ...);
+long __syscall_dup2(long arg1, ...);
+long __syscall_dup3(long arg1, ...);
+long __syscall_dup(long arg1, ...);
+long __syscall_epoll_create1(long arg1, ...);
+long __syscall_epoll_create(long arg1, ...);
+long __syscall_epoll_ctl(long arg1, ...);
+long __syscall_epoll_pwait(long arg1, ...);
+long __syscall_epoll_wait(long arg1, ...);
+long __syscall_eventfd2(long arg1, ...);
+long __syscall_eventfd(long arg1, ...);
+long __syscall_execveat(long arg1, ...);
+long __syscall_execve(long arg1, ...);
+long __syscall_exit(long arg1, ...);
+long __syscall_exit_group(long arg1, ...);
+long __syscall_faccessat(long arg1, ...);
+long __syscall_fadvise64(long arg1, ...);
+long __syscall_fallocate(long arg1, ...);
+long __syscall_fanotify_init(long arg1, ...);
+long __syscall_fanotify_mark(long arg1, ...);
+long __syscall_fchdir(long arg1, ...);
+long __syscall_fchmodat(long arg1, ...);
+long __syscall_fchmod(long arg1, ...);
+long __syscall_fchownat(long arg1, ...);
+long __syscall_fchown(long arg1, ...);
+long __syscall_fcntl(long arg1, ...);
+long __syscall_fdatasync(long arg1, ...);
+long __syscall_fgetxattr(long arg1, ...);
+long __syscall_finit_module(long arg1, ...);
+long __syscall_flistxattr(long arg1, ...);
+long __syscall_flock(long arg1, ...);
+long __syscall_fork(long arg1, ...);
+long __syscall_fremovexattr(long arg1, ...);
+long __syscall_fsetxattr(long arg1, ...);
+long __syscall_fstatfs(long arg1, ...);
+long __syscall_fstat(long arg1, ...);
+long __syscall_fsync(long arg1, ...);
+long __syscall_ftruncate(long arg1, ...);
+long __syscall_futex(long arg1, ...);
+long __syscall_futimesat(long arg1, ...);
+long __syscall_getcpu(long arg1, ...);
+long __syscall_getcwd(long arg1, ...);
+long __syscall_getdents64(long arg1, ...);
+long __syscall_getdents(long arg1, ...);
+long __syscall_getegid(long arg1, ...);
+long __syscall_geteuid(long arg1, ...);
+long __syscall_getgid(long arg1, ...);
+long __syscall_getgroups(long arg1, ...);
+long __syscall_getitimer(long arg1, ...);
+long __syscall_get_mempolicy(long arg1, ...);
+long __syscall_getpeername(long arg1, ...);
+long __syscall_getpgid(long arg1, ...);
+long __syscall_getpgrp(long arg1, ...);
+long __syscall_getpid(long arg1, ...);
+long __syscall_getpmsg(long arg1, ...);
+long __syscall_getppid(long arg1, ...);
+long __syscall_getpriority(long arg1, ...);
+long __syscall_getrandom(long arg1, ...);
+long __syscall_getresgid(long arg1, ...);
+long __syscall_getresuid(long arg1, ...);
+long __syscall_getrlimit(long arg1, ...);
+long __syscall_get_robust_list(long arg1, ...);
+long __syscall_getrusage(long arg1, ...);
+long __syscall_getsid(long arg1, ...);
+long __syscall_getsockname(long arg1, ...);
+long __syscall_getsockopt(long arg1, ...);
+long __syscall_gettid(long arg1, ...);
+long __syscall_gettimeofday(long arg1, ...);
+long __syscall_getuid(long arg1, ...);
+long __syscall_getxattr(long arg1, ...);
+long __syscall_init_module(long arg1, ...);
+long __syscall_inotify_add_watch(long arg1, ...);
+long __syscall_inotify_init1(long arg1, ...);
+long __syscall_inotify_init(long arg1, ...);
+long __syscall_inotify_rm_watch(long arg1, ...);
+long __syscall_io_cancel(long arg1, ...);
+long __syscall_ioctl(long arg1, ...);
+long __syscall_io_destroy(long arg1, ...);
+long __syscall_io_getevents(long arg1, ...);
+long __syscall_ioperm(long arg1, ...);
+long __syscall_iopl(long arg1, ...);
+long __syscall_ioprio_get(long arg1, ...);
+long __syscall_ioprio_set(long arg1, ...);
+long __syscall_io_setup(long arg1, ...);
+long __syscall_io_submit(long arg1, ...);
+long __syscall_kcmp(long arg1, ...);
+long __syscall_kexec_file_load(long arg1, ...);
+long __syscall_kexec_load(long arg1, ...);
+long __syscall_keyctl(long arg1, ...);
+long __syscall_kill(long arg1, ...);
+long __syscall_lchown(long arg1, ...);
+long __syscall_lgetxattr(long arg1, ...);
+long __syscall_linkat(long arg1, ...);
+long __syscall_link(long arg1, ...);
+long __syscall_listen(long arg1, ...);
+long __syscall_listxattr(long arg1, ...);
+long __syscall_llistxattr(long arg1, ...);
+long __syscall_lookup_dcookie(long arg1, ...);
+long __syscall_lremovexattr(long arg1, ...);
+long __syscall_lseek(long arg1, ...);
+long __syscall_lsetxattr(long arg1, ...);
+long __syscall_lstat(long arg1, ...);
+long __syscall_madvise(long arg1, ...);
+long __syscall_mbind(long arg1, ...);
+long __syscall_membarrier(long arg1, ...);
+long __syscall_memfd_create(long arg1, ...);
+long __syscall_migrate_pages(long arg1, ...);
+long __syscall_mincore(long arg1, ...);
+long __syscall_mkdirat(long arg1, ...);
+long __syscall_mkdir(long arg1, ...);
+long __syscall_mknodat(long arg1, ...);
+long __syscall_mknod(long arg1, ...);
+long __syscall_mlock2(long arg1, ...);
+long __syscall_mlockall(long arg1, ...);
+long __syscall_mlock(long arg1, ...);
+long __syscall_mmap(long arg1, ...);
+long __syscall_modify_ldt(long arg1, ...);
+long __syscall_mount(long arg1, ...);
+long __syscall_move_pages(long arg1, ...);
+long __syscall_mprotect(long arg1, ...);
+long __syscall_mq_getsetattr(long arg1, ...);
+long __syscall_mq_notify(long arg1, ...);
+long __syscall_mq_open(long arg1, ...);
+long __syscall_mq_timedreceive(long arg1, ...);
+long __syscall_mq_timedsend(long arg1, ...);
+long __syscall_mq_unlink(long arg1, ...);
+long __syscall_mremap(long arg1, ...);
+long __syscall_msgctl(long arg1, ...);
+long __syscall_msgget(long arg1, ...);
+long __syscall_msgrcv(long arg1, ...);
+long __syscall_msgsnd(long arg1, ...);
+long __syscall_msync(long arg1, ...);
+long __syscall_munlockall(long arg1, ...);
+long __syscall_munlock(long arg1, ...);
+long __syscall_munmap(long arg1, ...);
+long __syscall_name_to_handle_at(long arg1, ...);
+long __syscall_nanosleep(long arg1, ...);
+long __syscall_newfstatat(long arg1, ...);
+long __syscall_openat(long arg1, ...);
+long __syscall_open_by_handle_at(long arg1, ...);
+long __syscall_open(long arg1, ...);
+long __syscall_pause(long arg1, ...);
+long __syscall_perf_event_open(long arg1, ...);
+long __syscall_personality(long arg1, ...);
+long __syscall_pipe2(long arg1, ...);
+long __syscall_pipe(long arg1, ...);
+long __syscall_pivot_root(long arg1, ...);
+long __syscall_poll(long arg1, ...);
+long __syscall_ppoll(long arg1, ...);
+long __syscall_prctl(long arg1, ...);
+long __syscall_pread64(long arg1, ...);
+long __syscall_preadv2(long arg1, ...);
+long __syscall_preadv(long arg1, ...);
+long __syscall_prlimit64(long arg1, ...);
+long __syscall_process_vm_readv(long arg1, ...);
+long __syscall_process_vm_writev(long arg1, ...);
+long __syscall_pselect6(long arg1, ...);
+long __syscall_ptrace(long arg1, ...);
+long __syscall_putpmsg(long arg1, ...);
+long __syscall_pwrite64(long arg1, ...);
+long __syscall_pwritev2(long arg1, ...);
+long __syscall_pwritev(long arg1, ...);
+long __syscall_quotactl(long arg1, ...);
+long __syscall_readahead(long arg1, ...);
+long __syscall_readlinkat(long arg1, ...);
+long __syscall_readlink(long arg1, ...);
+long __syscall_read(long arg1, ...);
+long __syscall_readv(long arg1, ...);
+long __syscall_reboot(long arg1, ...);
+long __syscall_recvfrom(long arg1, ...);
+long __syscall_recvmmsg(long arg1, ...);
+long __syscall_recvmsg(long arg1, ...);
+long __syscall_remap_file_pages(long arg1, ...);
+long __syscall_removexattr(long arg1, ...);
+long __syscall_renameat2(long arg1, ...);
+long __syscall_renameat(long arg1, ...);
+long __syscall_rename(long arg1, ...);
+long __syscall_request_key(long arg1, ...);
+long __syscall_restart_syscall(long arg1, ...);
+long __syscall_rmdir(long arg1, ...);
+long __syscall_rt_sigaction(long arg1, ...);
+long __syscall_rt_sigpending(long arg1, ...);
+long __syscall_rt_sigprocmask(long arg1, ...);
+long __syscall_rt_sigqueueinfo(long arg1, ...);
+long __syscall_rt_sigreturn(long arg1, ...);
+long __syscall_rt_sigsuspend(long arg1, ...);
+long __syscall_rt_sigtimedwait(long arg1, ...);
+long __syscall_rt_tgsigqueueinfo(long arg1, ...);
+long __syscall_sched_getaffinity(long arg1, ...);
+long __syscall_sched_getattr(long arg1, ...);
+long __syscall_sched_getparam(long arg1, ...);
+long __syscall_sched_get_priority_max(long arg1, ...);
+long __syscall_sched_get_priority_min(long arg1, ...);
+long __syscall_sched_getscheduler(long arg1, ...);
+long __syscall_sched_rr_get_interval(long arg1, ...);
+long __syscall_sched_setaffinity(long arg1, ...);
+long __syscall_sched_setattr(long arg1, ...);
+long __syscall_sched_setparam(long arg1, ...);
+long __syscall_sched_setscheduler(long arg1, ...);
+long __syscall_sched_yield(long arg1, ...);
+long __syscall_seccomp(long arg1, ...);
+long __syscall_security(long arg1, ...);
+long __syscall_select(long arg1, ...);
+long __syscall_semctl(long arg1, ...);
+long __syscall_semget(long arg1, ...);
+long __syscall_semop(long arg1, ...);
+long __syscall_semtimedop(long arg1, ...);
+long __syscall_sendfile(long arg1, ...);
+long __syscall_sendmmsg(long arg1, ...);
+long __syscall_sendmsg(long arg1, ...);
+long __syscall_sendto(long arg1, ...);
+long __syscall_setdomainname(long arg1, ...);
+long __syscall_setfsgid(long arg1, ...);
+long __syscall_setfsuid(long arg1, ...);
+long __syscall_setgid(long arg1, ...);
+long __syscall_setgroups(long arg1, ...);
+long __syscall_sethostname(long arg1, ...);
+long __syscall_setitimer(long arg1, ...);
+long __syscall_set_mempolicy(long arg1, ...);
+long __syscall_setns(long arg1, ...);
+long __syscall_setpgid(long arg1, ...);
+long __syscall_setpriority(long arg1, ...);
+long __syscall_setregid(long arg1, ...);
+long __syscall_setresgid(long arg1, ...);
+long __syscall_setresuid(long arg1, ...);
+long __syscall_setreuid(long arg1, ...);
+long __syscall_setrlimit(long arg1, ...);
+long __syscall_set_robust_list(long arg1, ...);
+long __syscall_setsid(long arg1, ...);
+long __syscall_setsockopt(long arg1, ...);
+long __syscall_set_tid_address(long arg1, ...);
+long __syscall_settimeofday(long arg1, ...);
+long __syscall_setuid(long arg1, ...);
+long __syscall_setxattr(long arg1, ...);
+long __syscall_shmat(long arg1, ...);
+long __syscall_shmctl(long arg1, ...);
+long __syscall_shmdt(long arg1, ...);
+long __syscall_shmget(long arg1, ...);
+long __syscall_shutdown(long arg1, ...);
+long __syscall_sigaltstack(long arg1, ...);
+long __syscall_signalfd4(long arg1, ...);
+long __syscall_signalfd(long arg1, ...);
+long __syscall_socketpair(long arg1, ...);
+long __syscall_socket(long arg1, ...);
+long __syscall_splice(long arg1, ...);
+long __syscall_statfs(long arg1, ...);
+long __syscall_stat(long arg1, ...);
+long __syscall_swapoff(long arg1, ...);
+long __syscall_swapon(long arg1, ...);
+long __syscall_symlinkat(long arg1, ...);
+long __syscall_symlink(long arg1, ...);
+long __syscall_sync_file_range(long arg1, ...);
+long __syscall_syncfs(long arg1, ...);
+long __syscall_sync(long arg1, ...);
+long __syscall_sysfs(long arg1, ...);
+long __syscall_sysinfo(long arg1, ...);
+long __syscall_syslog(long arg1, ...);
+long __syscall_tee(long arg1, ...);
+long __syscall_tgkill(long arg1, ...);
+long __syscall_timer_create(long arg1, ...);
+long __syscall_timer_delete(long arg1, ...);
+long __syscall_timerfd_create(long arg1, ...);
+long __syscall_timerfd_gettime(long arg1, ...);
+long __syscall_timerfd_settime(long arg1, ...);
+long __syscall_timer_getoverrun(long arg1, ...);
+long __syscall_timer_gettime(long arg1, ...);
+long __syscall_timer_settime(long arg1, ...);
+long __syscall_times(long arg1, ...);
+long __syscall_time(long arg1, ...);
+long __syscall_tkill(long arg1, ...);
+long __syscall_truncate(long arg1, ...);
+long __syscall_tuxcall(long arg1, ...);
+long __syscall_umask(long arg1, ...);
+long __syscall_umount2(long arg1, ...);
+long __syscall_uname(long arg1, ...);
+long __syscall_unlinkat(long arg1, ...);
+long __syscall_unlink(long arg1, ...);
+long __syscall_unshare(long arg1, ...);
+long __syscall_userfaultfd(long arg1, ...);
+long __syscall_ustat(long arg1, ...);
+long __syscall_utimensat(long arg1, ...);
+long __syscall_utimes(long arg1, ...);
+long __syscall_utime(long arg1, ...);
+long __syscall_vfork(long arg1, ...);
+long __syscall_vhangup(long arg1, ...);
+long __syscall_vmsplice(long arg1, ...);
+long __syscall_wait4(long arg1, ...);
+long __syscall_waitid(long arg1, ...);
+long __syscall_writev(long arg1, ...);
+long __syscall_write(long arg1, ...);
+
+#undef SYSCALL_STATIC
+#define SYSCALL_STATIC 1
+
+#define SYSCALL_FADVISE_6_ARG
diff --git a/configure b/configure
index 02b736ce..f320660d 100755
--- a/configure
+++ b/configure
@@ -328,6 +328,7 @@ powerpc64*) ARCH=powerpc64 ;;
 powerpc*) ARCH=powerpc ;;
 sh[1-9bel-]*|sh|superh*) ARCH=sh ;;
 s390x*) ARCH=s390x ;;
+wasm*) ARCH=wasm ;;
 unknown) fail "$0: unable to detect target arch; try $0 --target=..." ;;
 *) fail "$0: unknown or unsupported target \"$target\"" ;;
 esac
diff --git a/src/internal/wasm/Readme b/src/internal/wasm/Readme
new file mode 100644
index 00000000..270db03c
--- /dev/null
+++ b/src/internal/wasm/Readme
@@ -0,0 +1,58 @@
+### Wasm overview
+
+Wasm ("WebAssembly") is a bytecode format, an ISA for an abstract virtual
+processor.  It is intended to offer a similar feature-set to real ISAs like
+x86/arm, so that it can be very quickly translated to native code for execution
+at full native speed, but it's low-level enough that a compiler targetting
+Wasm output is able to do all the useful optimisations at compile-time, so that
+the just-in-time translation to native code is nowhere near as intensive as it
+is to optimise a high-level language like C.
+
+In short, it's the ideal language for a web browser to execute.
+
+The "host environment" refers to the executor/interpreter of the Wasm binary,
+and the functions it provides to the compiled binary to fulfill its unresolved
+symbols.
+
+### Wasm syscalls
+
+Musl's support for Wasm assumes that the host environment will provide the
+"kernel" at runtime.  The host application is probably a web browser, and will
+implement syscalls such that __syscall_writev prints lines to the browser's
+debugging console (for printf support), and so on.
+
+The Wasm format additionally includes its own controls, provided as part of the
+ISA, including sbrk, and in future futex and more.  These are provided directly
+to the C code via compiler builtins.
+
+Thus, Musl implements certain syscalls itself, namely those which WebAssembly
+provides support for.  The hosting application should not have to care about
+these (indeed it can't provide them).
+
+Anything which relates to the browser however (file access, date/time, ...) is
+outside the knowledge of a generic Wasm binary.
+
+Musl statically links in the syscalls that the calling application actually
+uses.  Thus, the application can use any syscall-based functionality it chooses,
+as long as the environment executing the Wasm binary is able to provide
+definitions for those syscalls.
+
+No attempt is made to turn off irrelevant functionality.  For example, if the
+C application uses "kill()", that's fine - the hosting environment simply has
+to provide __syscall_kill.  It's not Musl's business to restrict how much or
+how little of a full Linux environment the host implements.  An interpreter
+may not be a web browser, but could be a complete interpreter running the Wasm
+binary by forwarding the syscalls to the host's kernel - or it could be web
+browser implementing only a small number of syscalls and emulating a "safe"
+environment within a browser.
+
+This assumes that the hosting environment and the C application are developed
+together; or, that the C environment is written according to the known
+capabilities of a target hosting environment.
+
+### Details
+
+* Due to the type-safe nature of Wasm linkage, syscalls cannot actually be
+  variadic if defined externally.  Any syscalls called with a variable argument
+  count, and not provided here by Musl, could be fixed on a case-by-case basis
+  as needed.
\ No newline at end of file
diff --git a/src/internal/wasm/syscall_brk.c b/src/internal/wasm/syscall_brk.c
new file mode 100644
index 00000000..28c5fc23
--- /dev/null
+++ b/src/internal/wasm/syscall_brk.c
@@ -0,0 +1,17 @@
+#include <limits.h>
+#include "syscall.h"
+
+long __syscall_brk(long arg1, ...)
+{
+  unsigned long newbrk = (unsigned long)arg1;
+  unsigned long pages = __builtin_wasm_current_memory();
+  if (newbrk % PAGE_SIZE)
+    goto end;
+  unsigned long new_pages = newbrk / PAGE_SIZE;
+  if (new_pages <= pages || new_pages >= (0xffffffffu / PAGE_SIZE))
+    goto end;
+  if (__builtin_wasm_grow_memory(new_pages - pages) != (unsigned long)-1)
+    pages = new_pages;
+  end:
+  return pages * PAGE_SIZE;
+}
diff --git a/src/internal/wasm/syscall_futex.c b/src/internal/wasm/syscall_futex.c
new file mode 100644
index 00000000..9c14fa0b
--- /dev/null
+++ b/src/internal/wasm/syscall_futex.c
@@ -0,0 +1,45 @@
+#include <futex.h>
+#include <stdarg.h>
+#include <errno.h>
+#include "syscall.h"
+
+// Wasm doesn't *yet* have futex(), but it's being planned as part of the
+// threaded-Wasm support in Spring 2018.
+//
+// For now, Wasm is single-threaded and we simply assert that the lock is not
+// held, and abort if a wait would be required (assume it's a corrupted lock).
+
+long __syscall_futex(long arg1, ...)
+{
+  va_list va;
+  va_start(va, arg1);
+
+  volatile int* addr = (volatile int*)arg1;
+  long op = va_arg(va, long);
+
+  op &= ~FUTEX_PRIVATE;
+
+  if (op == FUTEX_WAIT) {
+    int val = (int)va_arg(va, long);
+    // arg4 would be the timeout as a timespec*
+    va_end(va);
+
+    if (*addr == val) {
+      // trap, Wasm can't block
+      // TODO use a WebAssembly futex builtin, when those arrive!
+      __builtin_unreachable();
+    }
+    return 0;
+  }
+  if (op == FUTEX_WAKE) {
+    // arg3 would be the number of waiters to wake as an int
+    va_end(va);
+
+    // Wasm can't block/wait
+    // TODO use a WebAssembly futex builtin, when those arrive!
+    return 0;
+  }
+
+  va_end(va);
+  return -ENOSYS;
+}
diff --git a/src/internal/wasm/syscall_mmap.c b/src/internal/wasm/syscall_mmap.c
new file mode 100644
index 00000000..d37afd99
--- /dev/null
+++ b/src/internal/wasm/syscall_mmap.c
@@ -0,0 +1,21 @@
+#include <errno.h>
+#include "syscall.h"
+
+// Wasm doesn't have mmap!  There's just a single linear memory block.
+
+long __syscall_madvise(long arg1, ...)
+{
+  return 0;
+}
+long __syscall_mmap(long arg1, ...)
+{
+  return -ENOSYS;
+}
+long __syscall_mremap(long arg1, ...)
+{
+  return -ENOSYS;
+}
+long __syscall_munmap(long arg1, ...)
+{
+  return 0;
+}


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Wasm support patch 3 (the actual arch/wasm)
  2017-11-28 12:39 [PATCH] Wasm support patch 3 (the actual arch/wasm) Nicholas Wilson
@ 2017-11-29 20:36 ` Rich Felker
  2017-12-15 17:02   ` Nicholas Wilson
  0 siblings, 1 reply; 3+ messages in thread
From: Rich Felker @ 2017-11-29 20:36 UTC (permalink / raw)
  To: musl

On Tue, Nov 28, 2017 at 12:39:03PM +0000, Nicholas Wilson wrote:
> Hi,
> 
> Here's the patch with the actual Wasm arch implementation!
> 
> It's got a certain amount of boilerplate, mainly in "arch/wasm/bits"
> where certain structures have to be defined per-architecture. I just
> copied x32 where possible, since Wasm a new arch and we have free
> choice of how to define things, and x32 is another modern arch
> that's like i386 but with some legacy cruft removed.
> 
> As you can see, I'm using the "static syscalls" approach. There's a
> README explaining the approach in "src/internal/wasm", where I
> explain that most syscalls are to be provided by the embedding
> environment, but some syscalls like brk can actually be implemented
> using native Wasm intrinsics, which I've done in
> "src/internal/wasm".
> 
> The result of these three patches (this and the last two emails), is
> a version of Musl that builds using Clang's Wasm support, and
> creates executable Wasm modules that can use malloc, do I/O, and all
> sorts of other things. I haven't tested the full range of syscalls
> yet, but the ones I have tried work well.
> 
> Ready for feedback!

See inline below:

> diff --git a/arch/wasm/atomic_arch.h b/arch/wasm/atomic_arch.h
> new file mode 100644
> index 00000000..9da04059
> --- /dev/null
> +++ b/arch/wasm/atomic_arch.h
> @@ -0,0 +1,74 @@
> +#include <stdint.h>
> +
> +#define a_ctz_l a_ctz_l
> +static inline int a_ctz_l(unsigned long x)
> +{
> +  return __builtin_ctzl(x);
> +}
> +#define a_ctz_64 a_ctz_64
> +static inline int a_ctz_64(uint64_t x)
> +{
> +  return __builtin_ctzll(x);
> +}
> +

Unsure about whether this is good; you can just omit them and the
higher-level header provides default definitions. I think clang even
optimizes them to a single logical insn.

Also note that coding style is to indent with tabs not spaces.

> +#define a_and_64 a_and_64
> +static inline void a_and_64(volatile uint64_t *p, uint64_t v)
> +{
> +  // TODO use a WebAssembly CAS builtin, when those arrive with the threads feature
> +  //__atomic_fetch_and(p, v, __ATOMIC_SEQ_CST);
> +  *p &= v;
> +}

These need to be non-dummy at some point but we can discuss that
later. However you shouldn't define dummy versions of all of them like
this. Either just define dummy a_ll and a_sc, or dummy a_cas. Let the
higher level framework take care of all the rest unless you really
have a working, better-optimized version of them like x86 does.

> +#define a_crash a_crash
> +static inline void a_crash()
> +{
> +  // This generates the Wasm "unreachable" instruction which traps when reached
> +  __builtin_unreachable();
> +}

Shouldn't it be __builtin_trap()? __builtin_unreachable allows the
compiler to assume the code path is not reachable, which is exactly
the opposite of what we want.

> diff --git a/arch/wasm/bits/alltypes.h.in b/arch/wasm/bits/alltypes.h.in
> new file mode 100644
> index 00000000..ca4e94a2
> --- /dev/null
> +++ b/arch/wasm/bits/alltypes.h.in
> @@ -0,0 +1,33 @@
> +#ifdef __wasm32__
> +#define _Addr int
> +#define _Int64 long long
> +#define _Reg int
> +
> +#elif defined __wasm64__
> +#define _Addr long
> +#define _Int64 long long
> +#define _Reg long
> +
> +#endif

Generally we don't do 32/64-bit archs together as one arch with
#ifdefs but as separate ones. There are a number of additional things
that have to be different, like...

> +TYPEDEF __builtin_va_list va_list;
> +TYPEDEF __builtin_va_list __isoc_va_list;
> +
> +#ifndef __cplusplus
> +TYPEDEF __WCHAR_TYPE__ wchar_t;
> +#endif
> +TYPEDEF __WINT_TYPE__ wint_t;
> +
> +TYPEDEF float float_t;
> +TYPEDEF double double_t;
> +
> +TYPEDEF long time_t;
> +TYPEDEF long suseconds_t;
> +
> +TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
> +TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
> +TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
> +TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
> +TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
> +TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
> +TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;

..despite these being in the arch headers, musl policy actually
requires all 32-bit archs and all 64-bit archs to have the same
pthread type sizes/definitions. Using the 32-bit definitions on a
64-bit arch will not work because of how pthread_impl.h lays out the
actual usage of the slots; even if it did for some of them it wouldn't
be future-proof.

> diff --git a/arch/wasm/bits/float.h b/arch/wasm/bits/float.h
> new file mode 100644
> index 00000000..9a56ad14
> --- /dev/null
> +++ b/arch/wasm/bits/float.h
> @@ -0,0 +1,16 @@
> +#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
> +
> +#define LDBL_TRUE_MIN __LDBL_DENORM_MIN__
> +#define LDBL_MIN __LDBL_MIN__
> +#define LDBL_MAX __LDBL_MAX__
> +#define LDBL_EPSILON __LDBL_EPSILON__
> +
> +#define LDBL_MANT_DIG __LDBL_MANT_DIG__
> +#define LDBL_MIN_EXP __LDBL_MIN_EXP__
> +#define LDBL_MAX_EXP __LDBL_MAX_EXP__
> +
> +#define LDBL_DIG __LDBL_DIG__
> +#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__
> +#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__
> +
> +#define DECIMAL_DIG __DECIMAL_DIG__

These should be defined explicitly to the correct values so that there
are not silently varying arch parameters resulting in undocumentedly
incompatible ABIs. I would assume they're the same as double, no?

> diff --git a/arch/wasm/bits/limits.h b/arch/wasm/bits/limits.h
> new file mode 100644
> index 00000000..649d7d74
> --- /dev/null
> +++ b/arch/wasm/bits/limits.h
> @@ -0,0 +1,9 @@
> +#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
> + || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> +// The WebAssembly fixed page size is 64KiB
> +#define PAGE_SIZE 65536
> +#define LONG_BIT (__SIZEOF_LONG__*8)
> +#endif
> +
> +#define LONG_MAX  __LONG_MAX__
> +#define LLONG_MAX __LONG_LONG_MAX__
> diff --git a/arch/wasm/bits/posix.h b/arch/wasm/bits/posix.h
> new file mode 100644
> index 00000000..c37b94c1
> --- /dev/null
> +++ b/arch/wasm/bits/posix.h
> @@ -0,0 +1,2 @@
> +#define _POSIX_V6_LP64_OFF64  1
> +#define _POSIX_V7_LP64_OFF64  1

These differ by 32/64-bit.

> diff --git a/arch/wasm/bits/setjmp.h b/arch/wasm/bits/setjmp.h
> new file mode 100644
> index 00000000..a9262a64
> --- /dev/null
> +++ b/arch/wasm/bits/setjmp.h
> @@ -0,0 +1 @@
> +typedef unsigned long long __jmp_buf[8];

Are you sure that makes sense? What does the wasm register file look
like? What registers are call-saved?

> diff --git a/arch/wasm/bits/signal.h b/arch/wasm/bits/signal.h
> new file mode 100644
> index 00000000..c8d10a81
> --- /dev/null
> +++ b/arch/wasm/bits/signal.h
> @@ -0,0 +1,77 @@
> +#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
> + || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> +
> +#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> +#define MINSIGSTKSZ 2048
> +#define SIGSTKSZ 8192
> +#endif
> +
> +// I'm not expecting any of this to be actually usable... these definitions are
> +// just the bare minimum so that src/signal/*.c compiles.
> +
> +typedef struct {
> +	unsigned long long __ip_dummy;
> +} mcontext_t;

Likewise here.

> +struct sigaltstack {
> +	void *ss_sp;
> +	int ss_flags;
> +	size_t ss_size;
> +};
> +
> +typedef struct __ucontext {
> +	unsigned long uc_flags;
> +	struct __ucontext *uc_link;
> +//	stack_t uc_stack;
> +	mcontext_t uc_mcontext;
> +	sigset_t uc_sigmask;
> +//	unsigned long long __fpregs_mem[64];
> +} ucontext_t;

Why is uc_stack omitted?

> diff --git a/arch/wasm/bits/syscall.h.in b/arch/wasm/bits/syscall.h.in
> new file mode 100644
> index 00000000..00c31889
> --- /dev/null
> +++ b/arch/wasm/bits/syscall.h.in
> @@ -0,0 +1,321 @@
> +// For Wasm, we don't use syscall numbers!  We statically link in only the
> +// syscalls which are invoked.
> +
> +#define SYS_accept4 syscall_accept4

They probably need casts to an arithmetic type and need to be in a
reserved namespace (like __syscall_accept4, etc.).

> diff --git a/arch/wasm/crt_arch.h b/arch/wasm/crt_arch.h
> new file mode 100644
> index 00000000..e69de29b

If this is empty, how does program entry point get created? How does
__libc_start_main get called?

> diff --git a/arch/wasm/pthread_arch.h b/arch/wasm/pthread_arch.h
> new file mode 100644
> index 00000000..75aac668
> --- /dev/null
> +++ b/arch/wasm/pthread_arch.h
> @@ -0,0 +1,5 @@
> +static inline struct pthread *__pthread_self(void) { return pthread_self(); }

This is a circular definition. You need code to load the thread
pointer from whatever part of the register file it's stored in.

> diff --git a/arch/wasm/reloc.h b/arch/wasm/reloc.h
> new file mode 100644
> index 00000000..eca58d9b
> --- /dev/null
> +++ b/arch/wasm/reloc.h
> @@ -0,0 +1 @@
> +#define CRTJMP(pc,sp) __builtin_unreachable()
> \ No newline at end of file

Needs newline. This is probably not actually important without dynamic
linking.

> diff --git a/arch/wasm/syscall_arch.h b/arch/wasm/syscall_arch.h
> new file mode 100644
> index 00000000..41baf881
> --- /dev/null
> +++ b/arch/wasm/syscall_arch.h
> @@ -0,0 +1,333 @@
> +// For both Wasm32 and Wasm64, we assume that the host environment can't
> +// provide 64-bit types, and split 64-bit values into two arguments.  A Wasm
> +// interpreter *could* provide i64 support, but in practice web browsers aren't
> +// doing that right now, and any i64 we pass out to a syscall will get chopped
> +// to 58-bit precision.
> +#define __SYSCALL_LL_E(x) \
> +((union { long long ll; long l[2]; }){ .ll = x }).l[0], \
> +((union { long long ll; long l[2]; }){ .ll = x }).l[1]
> +#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
> +
> +long __syscall_accept4(long arg1, ...);

...

> +### Details
> +
> +* Due to the type-safe nature of Wasm linkage, syscalls cannot actually be
> +  variadic if defined externally.  Any syscalls called with a variable argument
> +  count, and not provided here by Musl, could be fixed on a case-by-case basis
> +  as needed.
> \ No newline at end of file

If variadic is a problem you could make them just always take 6 fixed
long args and make the syscall_arch.h machinery just pass dummy zeros
for the unused slots.

> diff --git a/src/internal/wasm/syscall_brk.c b/src/internal/wasm/syscall_brk.c
> new file mode 100644
> index 00000000..28c5fc23
> --- /dev/null
> +++ b/src/internal/wasm/syscall_brk.c
> @@ -0,0 +1,17 @@
> +#include <limits.h>
> +#include "syscall.h"
> +
> +long __syscall_brk(long arg1, ...)
> +{
> +  unsigned long newbrk = (unsigned long)arg1;
> +  unsigned long pages = __builtin_wasm_current_memory();
> +  if (newbrk % PAGE_SIZE)
> +    goto end;
> +  unsigned long new_pages = newbrk / PAGE_SIZE;
> +  if (new_pages <= pages || new_pages >= (0xffffffffu / PAGE_SIZE))
> +    goto end;
> +  if (__builtin_wasm_grow_memory(new_pages - pages) != (unsigned long)-1)
> +    pages = new_pages;
> +  end:
> +  return pages * PAGE_SIZE;
> +}

As noted before I think it makes sense to just drop brk. It's not
needed.

> diff --git a/src/internal/wasm/syscall_futex.c b/src/internal/wasm/syscall_futex.c
> new file mode 100644
> index 00000000..9c14fa0b
> --- /dev/null
> +++ b/src/internal/wasm/syscall_futex.c
> @@ -0,0 +1,45 @@
> +#include <futex.h>
> +#include <stdarg.h>
> +#include <errno.h>
> +#include "syscall.h"
> +
> +// Wasm doesn't *yet* have futex(), but it's being planned as part of the
> +// threaded-Wasm support in Spring 2018.
> +//
> +// For now, Wasm is single-threaded and we simply assert that the lock is not
> +// held, and abort if a wait would be required (assume it's a corrupted lock).

I think this is probably a bogus assumption; you can't assume anything
about how futex is being used to implement locks. Note that a
perfectly valid implementation of futex is just one that always
returns success; it will simply result in spinning at 100% cpu load
waiting for the event to happen rather than going to sleep.

This was all a pretty quick, high-level review, but I hope it's
helpful.

Rich


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Wasm support patch 3 (the actual arch/wasm)
  2017-11-29 20:36 ` Rich Felker
@ 2017-12-15 17:02   ` Nicholas Wilson
  0 siblings, 0 replies; 3+ messages in thread
From: Nicholas Wilson @ 2017-12-15 17:02 UTC (permalink / raw)
  To: musl

Hi,

Thank you for this feedback, it was helpful. It was also put in my spam bin, hence the late reply :(

I've moved the patch into a GitHub branch while I'm working on it, so the latest version of the patch is now here: https://github.com/NWilson/musl/pull/1

On 29 November 2017 20:36, Rich Felker wrote:
>> +#define a_ctz_64 a_ctz_64
>> +static inline int a_ctz_64(uint64_t x)
>> +{
>> +  return __builtin_ctzll(x);
>> +}
>> +
> Unsure about whether this is good; you can just omit them and the
> higher-level header provides default definitions. I think clang even
> optimizes them to a single logical insn.

If Clang can optimise the DeBruijn thing to the native instruction, that's magic :)
Maybe it's target-specific though. I tested with the a_ctz_64 definition from atomic.h, and found that unfortunately it's *not* optimised with -O2 for Wasm.

Perhaps Musl should default to using the intrinsic for all platforms that don't provide it, since the intrinsic's fallback ought to be as good as Musl's.

Anyway, using the builtin works for now, and it's the only way I've found to get the fast instruction to be used.

> Also note that coding style is to indent with tabs not spaces.
Fixed :)

>> +#define a_and_64 a_and_64
>> +static inline void a_and_64(volatile uint64_t *p, uint64_t v)
>> +{
>> +  // TODO use a WebAssembly CAS builtin, when those arrive with the threads feature
>> +  //__atomic_fetch_and(p, v, __ATOMIC_SEQ_CST);
>> +  *p &= v;
>> +}
> These need to be non-dummy at some point but we can discuss that
> later. However you shouldn't define dummy versions of all of them like
> this. Either just define dummy a_ll and a_sc, or dummy a_cas. Let the
> higher level framework take care of all the rest unless you really
> have a working, better-optimized version of them like x86 does.

I put a placeholder in wasm/atomic_arch.h for each atomic instruction in the Wasm atomics proposal, as a reminder and to show what's coming. Multithreaded Wasm support will come sometime in 2018, for now it's impossible to have races or threads, so the placeholders are equivalent to what would be generated via a_cas.

>> +#define a_crash a_crash
>> +static inline void a_crash()
>> +{
>> +  // This generates the Wasm "unreachable" instruction which traps when reached
>> +  __builtin_unreachable();
>> +}
> Shouldn't it be __builtin_trap()? __builtin_unreachable allows the
> compiler to assume the code path is not reachable, which is exactly
> the opposite of what we want.

Good point, __builtin_unreachable is emitted as a trap on Wasm (has the same effect), but you're right the C frontend will probably treat them differently with respect to inlining and branch elimination. __builtin_trap won't cause the code leading to it to be culled.

Fixed.

> Generally we don't do 32/64-bit archs together as one arch with
> #ifdefs but as separate ones.

We might find that we can get away with it on Wasm though? Apart from the size of long and void* changing, they are pretty much architectures (like running a CPU in 32/64 bit "mode").

I could take out the Wasm64 support for now, given that it's hypothetical and the toolchain's not done yet. I just thought it would save time later if the headers were 64-bit ready.

>> +TYPEDEF struct { union { int __i[9]; volatile int __vi[9]; unsigned __s[9]; } __u; } pthread_attr_t;
>> +TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } pthread_mutex_t;
>> +TYPEDEF struct { union { int __i[6]; volatile int __vi[6]; volatile void *volatile __p[6]; } __u; } mtx_t;
>> +TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } pthread_cond_t;
>> +TYPEDEF struct { union { int __i[12]; volatile int __vi[12]; void *__p[12]; } __u; } cnd_t;
>> +TYPEDEF struct { union { int __i[8]; volatile int __vi[8]; void *__p[8]; } __u; } pthread_rwlock_t;
>> +TYPEDEF struct { union { int __i[5]; volatile int __vi[5]; void *__p[5]; } __u; } pthread_barrier_t;
> ..despite these being in the arch headers, musl policy actually
> requires all 32-bit archs and all 64-bit archs to have the same
> pthread type sizes/definitions. Using the 32-bit definitions on a
> 64-bit arch will not work because of how pthread_impl.h lays out the
> actual usage of the slots; even if it did for some of them it wouldn't
> be future-proof.

Interesting, I had wondered what the rationale was. It looks like most of the slots are unused? I can't work out why the 64-bit structures actually need to be bigger than the 32-bit ones.

In any case these are just stubs for now - threading won't work on Wasm. It's easier to put some dummy defs in the headers than to hack out all the source files which use threading.

>> +++ b/arch/wasm/bits/float.h
> These should be defined explicitly to the correct values so that there
> are not silently varying arch parameters resulting in undocumentedly
> incompatible ABIs. I would assume they're the same as double, no?

Fair enough, I've replaced the float constants with hardcoded values.

Slightly surprisingly to me, long double seems to be 80-bit precision, like on ARM64. I haven't really thought about how it should work, I've not been involved with that part of the compiler frontend. float/double are normal IEEE on Wasm, so presumably we've got the compiler to emulate higher-precision.

>> diff --git a/arch/wasm/bits/limits.h b/arch/wasm/bits/limits.h
>> +#define _POSIX_V6_LP64_OFF64  1
>> +#define _POSIX_V7_LP64_OFF64  1
> These differ by 32/64-bit.

Thanks! Dan Gohman (Mozilla) spotted it too, hence the patch for x32 the other day.

>> diff --git a/arch/wasm/bits/setjmp.h b/arch/wasm/bits/setjmp.h
>> +typedef unsigned long long __jmp_buf[8];
> Are you sure that makes sense? What does the wasm register file look
> like? What registers are call-saved?

setjmp doesn't make sense for Wasm. We don't support setjmp, and Wasm doesn't even have any registers at all. It's a stack-based architecture, where instructions push/pop data to the stack for subsequent instructions to consume.

All the things that don't make sense for Wasm just have dummy definitions, to keep the build working. In this case, signal.c includes setjmp.h, which in turn depends on __jmp_buf being defined. Any actual calls to setjmp will fail to link though, because Wasm doesn't define it.

It's not ideal, but putting these dummies in the arch/wasm headers seems nicer than messing with code outside the wasm directories.

> Why is uc_stack omitted?

Ditto for arch/wasm/bits/signal.h, where I've got a comment explaining the definitions are dummies. All the members of struct __ucontext that are not referenced by the platform-independent code (only used by arch-specific files) can be omitted from Wasm.

>> diff --git a/arch/wasm/bits/syscall.h.in b/arch/wasm/bits/syscall.h.in
>> +#define SYS_accept4 syscall_accept4
> They probably need casts to an arithmetic type and need to be in a
> reserved namespace (like __syscall_accept4, etc.).

Yes, I've tweaked it now so that the syscall casts work well. I think it's quite a satisfactory solution for the static syscalls now.

>> diff --git a/arch/wasm/crt_arch.h b/arch/wasm/crt_arch.h
>> new file mode 100644
>> index 00000000..e69de29b
> If this is empty, how does program entry point get created? How does
> __libc_start_main get called?

It's not empty now :) I've added two entrypoints:
* The standard _start entrypoint is there for people who want to run run "exit(main(...))" straight away. We think that very few people will actually want this however.
* In src/internal/wasm/start_wasm.c I've added a function "_start_wasm" which initialises stdio and calls pre-main constructors, then simply returns. We're expecting that this (or something like it) will become the default entrypoint for the Wasm linker.

Typical usage for Wasm will be calling into the (compiled) WebAssembly module from JavaScript. The Wasm module will sit there doing nothing until you call a function from it, which then becomes the bottom of the stack and we enter into the Wasm code. When that invocation returns, the heap and all the global variables etc are still there in memory, the Wasm module just sits doing nothing (no execution stack) until the next time you call into it from JavaScript.

The linker arranges for _start_wasm to run automatically at the point when the browser first loads the module, before any other code is executed.

The Wasm linker is currently being reworked not to use the .init_array section anymore, this may need some tweaking soon. Future support will be available exclusively via the @llvm.global.ctors intrinsic - this is the mechanism that C++ constructors and __attribute__((ctor)) functions hook into.

So - although I thought it was sorted last week, it's now back in flux.

>> diff --git a/arch/wasm/pthread_arch.h b/arch/wasm/pthread_arch.h
>> +static inline struct pthread *__pthread_self(void) { return pthread_self(); }
> This is a circular definition. You need code to load the thread
> pointer from whatever part of the register file it's stored in.

Hm, you're right, I just inherited this from some Emscripten code.

There are no registers or threads, I'll have to implement this using a global.

>> diff --git a/arch/wasm/reloc.h b/arch/wasm/reloc.h
>> +#define CRTJMP(pc,sp) __builtin_unreachable()
> Needs newline. This is probably not actually important without dynamic
> linking.

Newline added. Making it unreachable seems a good compromise - attempts to use the dynamic linker won't work, but no special work needs to be done to adapt the existing code to Wasm yet.

>> +* Due to the type-safe nature of Wasm linkage, syscalls cannot actually be
>> +  variadic if defined externally.  Any syscalls called with a variable argument
>> +  count, and not provided here by Musl, could be fixed on a case-by-case basis
>> +  as needed.
>> \ No newline at end of file
> If variadic is a problem you could make them just always take 6 fixed
> long args and make the syscall_arch.h machinery just pass dummy zeros
> for the unused slots.

Thanks, that's exactly what I ended up doing. :)

> As noted before I think it makes sense to just drop brk. It's not
> needed.

I've done that, and implemented mmap instead.

>> +// Wasm doesn't *yet* have futex(), but it's being planned as part of the
>> +// threaded-Wasm support in Spring 2018.
>> +//
>> +// For now, Wasm is single-threaded and we simply assert that the lock is not
>> +// held, and abort if a wait would be required (assume it's a corrupted lock).
> I think this is probably a bogus assumption; you can't assume anything
> about how futex is being used to implement locks. Note that a
> perfectly valid implementation of futex is just one that always
> returns success; it will simply result in spinning at 100% cpu load
> waiting for the event to happen rather than going to sleep.

I'd argue it is valid. In Wasm, there's only one thread of execution, so if the condition isn't set now then it won't ever be. It doesn't matter that futex might have spurious wakeups - on "normal" archs futex will wait, so if a single-threaded arch like Wasm finds itself trying to wait on condition that can't ever happen, then it's reasonable to abort.

It's certainly more useful for debugging that causing a spin at 100% CPU.

Thanks very much for your feedback Rich, I'll try to keep maintaining my branch and keeping you updated on it, if you're happy with the idea that Wasm might be an accepted architecture (when the code's right).

Nick

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-12-15 17:02 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-28 12:39 [PATCH] Wasm support patch 3 (the actual arch/wasm) Nicholas Wilson
2017-11-29 20:36 ` Rich Felker
2017-12-15 17:02   ` Nicholas Wilson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).