mailing list of musl libc
 help / color / mirror / code / Atom feed
* Data structures defined by both linux and musl
@ 2018-12-18 19:41 Arnd Bergmann
  2018-12-20  0:30 ` Rich Felker
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Arnd Bergmann @ 2018-12-18 19:41 UTC (permalink / raw)
  To: musl, y2038 Mailman List; +Cc: Rich Felker, Adhemerval Zanella, Maxim Kuvyrkov

I recently discussed with Rich about the work needed to get 64-bit time_t
support into musl. One of the first steps he identified was to find out which
interfaces we would want to abstract or wrap for a new ABI given that we
have to make a binary incompatible interface anyway.

I have found all the data structures that are provided by both the kernel
headers and the musl headers now, and annotated what I think we the
path forward could be. I already provided the same list on IRC, but
here is a (slightly updated) copy for everyone else.

The takeaway is that we probably need to add new definitions for
flock64, statfs, stat, termios, {msg,sem,shm}{buf,info,id_ds}, ipc_perm,
rlimit, rusage, sched_param, time_t, timeval, timespec, itimerval,
itimerspec, and timex, and then wrap all kernel interfaces that
use those.

The same list can also be helpful when we try to clean up the kernel
header files -- my idea was that we may want to prefix each struct
tag with __kernel_ as we do for typedefs, and then have a kernel
header that redefines them like

#ifdef __WANT_KERNEL_STRUCTS
#define __kernel_flock flock
#endif
struct __kernel_flock {
   ...
};

    Arnd

/* sparc and mips are incompatible, keep wrapping flock64 */
include/uapi/asm-generic/fcntl.h:struct flock {
arch/mips/include/uapi/asm/fcntl.h:struct flock {

/* pt_regs and sigcontext are arch specific, cannot abstract */
arch/*/include/uapi/asm/ptrace.h:struct pt_regs {
arch/*/include/uapi/asm/ptrace.h:struct user_regs_struct {
arch/arm64/include/uapi/asm/sigcontext.h:struct _aarch64_ctx {
arch/arm64/include/uapi/asm/sigcontext.h:struct esr_context {
arch/arm64/include/uapi/asm/sigcontext.h:struct extra_context {
arch/arm64/include/uapi/asm/sigcontext.h:struct sve_context {
arch/*/include/uapi/asm/sigcontext.h:struct sigcontext {

/* arch specific, has wrapper */
arch/*/include/uapi/asm/signal.h:struct sigaction {
include/uapi/asm-generic/signal.h:struct sigaction {

/* arch specific, maybe add wrapper? */
arch/*/include/uapi/asm/signal.h:typedef struct sigaltstack {
include/uapi/asm-generic/signal.h:typedef struct sigaltstack {

/* arch specific, need to look closer for incompatibilities */
include/uapi/asm-generic/siginfo.h:typedef struct sigevent {

/* arch specific, should add wrapper */
arch/*/include/uapi/asm/statfs.h:struct statfs {
include/uapi/asm-generic/statfs.h:struct statfs {

/* arch specific, wrap statx instead */
arch/*/include/uapi/asm/stat.h:struct stat {
include/uapi/asm-generic/stat.h:struct stat {
include/uapi/linux/stat.h:struct statx {
include/uapi/linux/stat.h:struct statx_timestamp {

/* arch specific, should wrap termios2 where possible,
 * need to check what musl does now */
arch/*/include/uapi/asm/termbits.h:struct termios {
include/uapi/asm-generic/termbits.h:struct termios {

/* IPC: wrap them all */
include/uapi/linux/mqueue.h:struct mq_attr {
include/uapi/linux/msg.h:struct msgbuf {
include/uapi/linux/msg.h:struct msginfo {
include/uapi/linux/msg.h:struct msqid_ds {
include/uapi/linux/sem.h:struct sembuf {
include/uapi/linux/sem.h:struct semid_ds {
include/uapi/linux/sem.h:struct  seminfo {
include/uapi/linux/shm.h:struct shmid_ds {
include/uapi/linux/shm.h:struct shm_info {
include/uapi/linux/shm.h:struct    shminfo {

/* rlimit/rlimit64: keep using only rlimit64 */
include/uapi/linux/resource.h:struct rlimit {
include/uapi/linux/resource.h:struct rlimit64 {

/* rusuage: need to wrap: getrusage, wait4 */
include/uapi/linux/resource.h:struct    rusage {

/* wrapped already, replace with a more extensible one */
include/uapi/linux/sched/types.h:struct sched_param {

/* prctl(PR_SET_MM); broken in kernel compat mode?
 * could be wrapped if necessary */
include/uapi/linux/prctl.h:struct prctl_mm_map {

/* inconsistent amount of padding, maybe wrap */
include/uapi/linux/sysinfo.h:struct sysinfo {

/* time64: need to use 64-bit versions of time_t */
include/uapi/linux/time.h:timespec {
include/uapi/linux/time.h:struct itimerspec {

/* need to wrap */
include/uapi/linux/utime.h:struct utimbuf {
include/uapi/linux/time.h:timeval {
include/uapi/linux/time.h:struct itimerval {

/* no need to change */
include/uapi/linux/time.h:struct timezone {

/* probably need to wrap (depending on kernel decision) */
include/uapi/linux/timex.h:struct timex {

/* incompatible on x32 */
include/uapi/linux/times.h:struct tms {
include/uapi/linux/uio.h:struct iovec {

/* tape driver ioctls, musl copy is incompatible
 * on mips64, sparc64 */
include/uapi/linux/mtio.h:struct    mtget {
include/uapi/linux/mtio.h:struct    mtop {
include/uapi/linux/mtio.h:struct    mtpos {

/* compatible, no need to wrap */
include/uapi/asm-generic/fcntl.h:struct f_owner_ex {
include/uapi/asm-generic/poll.h:struct pollfd {
include/uapi/asm-generic/termios.h:struct winsize {
include/uapi/linux/acct.h:struct acct_v3
include/uapi/linux/eventpoll.h:struct epoll_event {
include/uapi/linux/fanotify.h:struct fanotify_event_metadata {
include/uapi/linux/fanotify.h:struct fanotify_response {
include/uapi/linux/signalfd.h:struct signalfd_siginfo {

/* fixed wire format */
include/uapi/linux/udp.h:struct udphdr {
include/uapi/linux/icmp.h:struct icmphdr {
include/uapi/linux/if_arp.h:struct arphdr {
include/uapi/linux/tcp.h:struct tcphdr {
include/uapi/linux/if_ether.h:struct ethhdr {
include/uapi/linux/ip.h:struct iphdr {

/* other network stuff, fixed format */
include/uapi/linux/icmpv6.h:struct icmp6_filter {
include/uapi/linux/if_arp.h:struct arpreq {
include/uapi/linux/if_arp.h:struct arpreq_old {
include/uapi/linux/if.h:struct ifconf  {
include/uapi/linux/if.h:struct ifmap {
include/uapi/linux/if.h:struct ifreq {
include/uapi/linux/if_packet.h:struct packet_mreq {
include/uapi/linux/if_packet.h:struct sockaddr_ll {
include/uapi/linux/in6.h:struct in6_addr {
include/uapi/linux/in6.h:struct ipv6_mreq {
include/uapi/linux/in6.h:struct sockaddr_in6 {
include/uapi/linux/in.h:struct group_filter {
include/uapi/linux/in.h:struct group_req {
include/uapi/linux/in.h:struct group_source_req {
include/uapi/linux/in.h:struct in_addr {
include/uapi/linux/in.h:struct in_pktinfo {
include/uapi/linux/in.h:struct ip_mreq  {
include/uapi/linux/in.h:struct ip_mreqn {
include/uapi/linux/in.h:struct ip_mreq_source {
include/uapi/linux/in.h:struct ip_msfilter {
include/uapi/linux/in.h:struct sockaddr_in {
include/uapi/linux/inotify.h:struct inotify_event {
include/uapi/linux/ipc.h:struct ipc_perm
include/uapi/linux/ipv6.h:struct in6_pktinfo {
include/uapi/linux/ipv6.h:struct ip6_mtuinfo {
include/uapi/linux/ipv6_route.h:struct in6_rtmsg {
include/uapi/linux/route.h:struct rtentry {
include/uapi/linux/tcp.h:struct tcp_diag_md5sig {
include/uapi/linux/tcp.h:struct tcp_info {
include/uapi/linux/tcp.h:struct tcp_md5sig {
include/uapi/linux/tcp.h:struct tcp_repair_window {
include/uapi/linux/un.h:struct sockaddr_un {

/* shared typedefs: all in ELF format; can't change */
arch/*/include/uapi/asm/elf.h:typedef ... elf_fpregset_t;
arch/*/include/uapi/asm/elf.h:typedef ... elf_greg_t;
arch/*/include/uapi/asm/elf.h:typedef elf_greg_t elf_gregset_t[ELF_NGREG];
arch/sparc/include/uapi/asm/uctx.h:} mcontext_t;
arch/sparc/include/uapi/asm/uctx.h:typedef struct ucontext ucontext_t;
include/uapi/linux/elf.h:typedef struct elf32_hdr Elf32_Ehdr;
include/uapi/linux/elf.h:typedef struct elf64_hdr Elf64_Ehdr;
include/uapi/linux/elf.h:typedef struct {...} Elf32_Shdr;
include/uapi/linux/elf.h:typedef struct {...} Elf64_Shdr;
include/uapi/linux/elf.h:typedef struct {...} Elf32_Chdr;
include/uapi/linux/elf.h:typedef struct {...} Elf64_Chdr;
include/uapi/linux/elf.h:typedef struct {...} Elf32_Nhdr;
include/uapi/linux/elf.h:typedef struct {...} Elf64_Nhdr;
include/uapi/linux/elf.h:typedef ...
include/uapi/linux/elfcore.h:typedef elf_gregset_t gregset_t;
include/uapi/linux/elfcore.h:   elf_gregset_t pr_reg;   /* GP registers */
include/uapi/linux/elfcore.h:typedef elf_greg_t greg_t;
include/uapi/linux/elfcore.h:typedef elf_gregset_t gregset_t;
include/uapi/linux/elfcore.h:typedef elf_fpregset_t fpregset_t;
include/uapi/linux/elfcore.h:struct elf_prpsinfo
include/uapi/linux/elfcore.h:struct elf_prstatus
include/uapi/linux/elfcore.h:struct elf_siginfo

/* sg.h missing from exported kernel headers, can't change */
include/scsi/sg.h:typedef struct sg_iovec sg_iovec_t;
include/scsi/sg.h:typedef struct sg_io_hdr sg_io_hdr_t;
include/scsi/sg.h-struct sg_scsi_id {
include/scsi/sg.h:typedef struct sg_req_info  sg_req_info_t;
include/scsi/sg.h:typedef struct sg_io_hdr Sg_io_hdr;
include/scsi/sg.h:typedef struct sg_io_vec Sg_io_vec;
include/scsi/sg.h:typedef struct sg_scsi_id Sg_scsi_id;
include/scsi/sg.h:typedef struct sg_req_info Sg_req_info;
include/scsi/sg.h-struct sg_header {

/* 32-bit on alpha, used in ustat (not provided by musl) */
include/uapi/asm-generic/posix_types.h:typedef __kernel_ulong_t __kernel_ino_t;
/* 64-bit on mips64, used in mtio (should fix?) and ustat */
include/uapi/asm-generic/posix_types.h:typedef int    __kernel_daddr_t;

/* 16 bit on older architectures but only used in IPC interfaces,
   which will get wrapped anyway */
include/uapi/asm-generic/posix_types.h:typedef unsigned int    __kernel_mode_t;
include/uapi/asm-generic/posix_types.h:typedef int
__kernel_ipc_pid_t;
include/uapi/asm-generic/posix_types.h:typedef unsigned int    __kernel_uid_t;
include/uapi/asm-generic/posix_types.h:typedef unsigned int    __kernel_gid_t;
_______________________________________________
Y2038 mailing list
Y2038@lists.linaro.org
https://lists.linaro.org/mailman/listinfo/y2038

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Data structures defined by both linux and musl
  2018-12-18 19:41 Data structures defined by both linux and musl Arnd Bergmann
@ 2018-12-20  0:30 ` Rich Felker
  2018-12-20 10:33   ` Szabolcs Nagy
  2019-01-18 16:50 ` Arnd Bergmann
  2019-01-18 17:06 ` Arnd Bergmann
  2 siblings, 1 reply; 10+ messages in thread
From: Rich Felker @ 2018-12-20  0:30 UTC (permalink / raw)
  To: musl

On Tue, Dec 18, 2018 at 08:41:53PM +0100, Arnd Bergmann wrote:
> I recently discussed with Rich about the work needed to get 64-bit time_t
> support into musl. One of the first steps he identified was to find out which
> interfaces we would want to abstract or wrap for a new ABI given that we
> have to make a binary incompatible interface anyway.
> 
> I have found all the data structures that are provided by both the kernel
> headers and the musl headers now, and annotated what I think we the
> path forward could be. I already provided the same list on IRC, but
> here is a (slightly updated) copy for everyone else.

Thank you. For those just joining now, the context of this is that
support for 64-bit time_t on 32-bit archs probably requires, and at
least is best done with, a new ABI, so far known as the ".2" ABI
(because the ldso would end in ".2" instead of ".1"). This would be an
opportunity to fix lots of ABI mistakes where extensibility of even
support for current functionality is lacking. If done, this would not
be a fork of musl and would not be dropping existing ABIs. Rather,
internal refactoring would eliminate the assumption that the
ABI-with-application types match the ABI-with-kernel/syscall types,
performing translation back and forth where needed. On the existing
".1" ABIs, this translation would mostly be the identity
transformation, but on archs where we're already doing some hacks to
fix up kernel ABI bugs (sysvipc on big endian, mips stat structure,
x32 stuff, etc.) the hacks could be replaced by used of this
translation infrastructure.

It may also be possible that we can add 64-bit time_t on the existing
ABIs (preserving app-to-libc ABI, not app/library-to-library ABI when
types involving time_t are part of their interface) just by remapping
all of the affected interfaces in the headers. I'm not sure if this is
a good idea. It's imperfect and might derail adoption of the better
ABI, but it would probably bring 64-bit time_t to mainstream use more
quickly.

BTW regarding 64-bit time_t on 32-bit archs, Arnd has been working to
make this happen for a long time. I believe it was over 3 years ago we
first spoke about working on it in musl. Basically we've reached the
point where 32-bit archs are a dead-end for developing embedded stuff
that needs to run indefinitely without the ability to upgrade, and
this domain is the main place where 32-bit archs are still very
relevant. Once nice thing about making a new clean ABI is that
embedded users who don't care about binary ecosystems can switch
immediately, and desktop/server distros can take their time and switch
from .1 to .2 when it works best for them.

I indicated to Arnd that, in order for this to move forward, we need
to be able to evaluate the size of the interface surface affected by
time_t change, to evaluate whether the .2 ABI makes sense, and further
get an idea of what other types are ABI dead-ends that should be fixed
at the same time. The list I'm replying to is the start of that work.

Now, a few comments on findings so far. These won't be complete but
they're a start:

> The takeaway is that we probably need to add new definitions for
> flock64, statfs, stat, termios, {msg,sem,shm}{buf,info,id_ds}, ipc_perm,

Not clear on how flock[64?] is affected.

stat and ipc structures contain time_t's and definitely need to
change.

I think termios is listed here because .2 ABI overhaul is a great
opportunity to switch to the "termios2" interfaces, unify the
userspace types, and make support for custom baud work right.

> rlimit, rusage, sched_param, time_t, timeval, timespec, itimerval,
> itimerspec, and timex, and then wrap all kernel interfaces that
> use those.

Not clear on how rlimit is affected, but most of these definitely are.

sched_param is another opportunity. Ours is already a bit larger than
glibc's due to my (unmotivated, inadvertent) inclusion of fields for
the optional POSIX sporadic stuff, but not a lot. glibc already hit
new kernel features they can't provide with the existing ABI.

Rich


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Data structures defined by both linux and musl
  2018-12-20  0:30 ` Rich Felker
@ 2018-12-20 10:33   ` Szabolcs Nagy
  2018-12-20 18:08     ` Rich Felker
  0 siblings, 1 reply; 10+ messages in thread
From: Szabolcs Nagy @ 2018-12-20 10:33 UTC (permalink / raw)
  To: musl

* Rich Felker <dalias@libc.org> [2018-12-19 19:30:44 -0500]:
> On Tue, Dec 18, 2018 at 08:41:53PM +0100, Arnd Bergmann wrote:
> > I recently discussed with Rich about the work needed to get 64-bit time_t
> > support into musl. One of the first steps he identified was to find out which
> > interfaces we would want to abstract or wrap for a new ABI given that we
> > have to make a binary incompatible interface anyway.
> > 
> > I have found all the data structures that are provided by both the kernel
> > headers and the musl headers now, and annotated what I think we the
> > path forward could be. I already provided the same list on IRC, but
> > here is a (slightly updated) copy for everyone else.
> 
> Thank you. For those just joining now, the context of this is that
> support for 64-bit time_t on 32-bit archs probably requires, and at
> least is best done with, a new ABI, so far known as the ".2" ABI
> (because the ldso would end in ".2" instead of ".1"). This would be an
> opportunity to fix lots of ABI mistakes where extensibility of even
> support for current functionality is lacking. If done, this would not
> be a fork of musl and would not be dropping existing ABIs. Rather,
> internal refactoring would eliminate the assumption that the
> ABI-with-application types match the ABI-with-kernel/syscall types,
> performing translation back and forth where needed. On the existing
> ".1" ABIs, this translation would mostly be the identity
> transformation, but on archs where we're already doing some hacks to
> fix up kernel ABI bugs (sysvipc on big endian, mips stat structure,
> x32 stuff, etc.) the hacks could be replaced by used of this
> translation infrastructure.

lesson of ilp32 was that libc cannot generally translate between
a user and kernel abi (otherwise it could be done in userspace).

the problematic cases are when user talks to the kernel directly
using libc types in a way that the libc cannot do the translation.

interfaces where the libc does not know the type, just an opaque
pointer: ioctl, fcntl, getsockopt, setsockopt, raw syscall

interfaces where translation would require malloc, but should be
as-safe and not fail with ENOMEM: sendmmsg, readv, writev,...

direct communication channel to the kernel that may expose the
abi incompatibility: netlink, sysfs, procfs

types related to signal handling that may require sighandler
wrapping to translate: siginfo_t, ucontext_t

time_t may not be affected by these, but it shows that translation
is fragile in general, i wonder if we can ensure correct behaviour
in all cases. there is also the problem of linux headers which may
use and redefine libc types and user code may need to use those.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Data structures defined by both linux and musl
  2018-12-20 10:33   ` Szabolcs Nagy
@ 2018-12-20 18:08     ` Rich Felker
  0 siblings, 0 replies; 10+ messages in thread
From: Rich Felker @ 2018-12-20 18:08 UTC (permalink / raw)
  To: musl

On Thu, Dec 20, 2018 at 11:33:59AM +0100, Szabolcs Nagy wrote:
> * Rich Felker <dalias@libc.org> [2018-12-19 19:30:44 -0500]:
> > On Tue, Dec 18, 2018 at 08:41:53PM +0100, Arnd Bergmann wrote:
> > > I recently discussed with Rich about the work needed to get 64-bit time_t
> > > support into musl. One of the first steps he identified was to find out which
> > > interfaces we would want to abstract or wrap for a new ABI given that we
> > > have to make a binary incompatible interface anyway.
> > > 
> > > I have found all the data structures that are provided by both the kernel
> > > headers and the musl headers now, and annotated what I think we the
> > > path forward could be. I already provided the same list on IRC, but
> > > here is a (slightly updated) copy for everyone else.
> > 
> > Thank you. For those just joining now, the context of this is that
> > support for 64-bit time_t on 32-bit archs probably requires, and at
> > least is best done with, a new ABI, so far known as the ".2" ABI
> > (because the ldso would end in ".2" instead of ".1"). This would be an
> > opportunity to fix lots of ABI mistakes where extensibility of even
> > support for current functionality is lacking. If done, this would not
> > be a fork of musl and would not be dropping existing ABIs. Rather,
> > internal refactoring would eliminate the assumption that the
> > ABI-with-application types match the ABI-with-kernel/syscall types,
> > performing translation back and forth where needed. On the existing
> > ".1" ABIs, this translation would mostly be the identity
> > transformation, but on archs where we're already doing some hacks to
> > fix up kernel ABI bugs (sysvipc on big endian, mips stat structure,
> > x32 stuff, etc.) the hacks could be replaced by used of this
> > translation infrastructure.
> 
> lesson of ilp32 was that libc cannot generally translate between
> a user and kernel abi (otherwise it could be done in userspace).

Part of the purpose of this research/this thread is to determine what
the affected interfaces are and how/whether to fix them. This may
inform the choice of which additional types to abstract away and
translate. The only things that absolutely have to change are
structures containing time_t.

> the problematic cases are when user talks to the kernel directly
> using libc types in a way that the libc cannot do the translation.
> 
> interfaces where the libc does not know the type, just an opaque
> pointer: ioctl, fcntl, getsockopt, setsockopt, raw syscall

Ultimately all of these *can* be translated just by enumerating all
the broken interfaces and special-casing them. It's not pretty,
though. What would probably happen (Arnd, do you know?) would be
redefining the ioctl numbers etc. to "time64" versions of the
interfaces, and for interfaces which are actually "important" to have
work on old kernels, including translations to/from the corresponding
old ioctl. Depending on the scope, that might be all or nearly all of
them.

> interfaces where translation would require malloc, but should be
> as-safe and not fail with ENOMEM: sendmmsg, readv, writev,...

IOV_MAX is 1024 so it's reasonable to do on-stack and simply fail
requests that are too large. But we don't have to translate iovecs
anyway.

> direct communication channel to the kernel that may expose the
> abi incompatibility: netlink, sysfs, procfs

Netlink is the worst here since it's "hidden" behind normal read/write
calls where the data is abstract bytes. If there's anything that needs
to be fixed at the netlink layer it probably just requires redefining
part of the _API_ to use fixed-width types rather than time_t or such.

> types related to signal handling that may require sighandler
> wrapping to translate: siginfo_t, ucontext_t

Yes. I'm not proposing we do sighandler wrapping/translation now or in
the future because it's a pain, but there are some good motivations to
do it, so I'd like to keep the option open.

> time_t may not be affected by these, but it shows that translation
> is fragile in general, i wonder if we can ensure correct behaviour
> in all cases. there is also the problem of linux headers which may
> use and redefine libc types and user code may need to use those.

Redefining libc types is already broken, and the kernel headers that
do it can't be used from userspace when libc headers are included.
This issue is independent of type sizes/layouts matching.

I don't think any kernel headers _use_ libc types either. They
generally use their own stuff.

Rich


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Data structures defined by both linux and musl
  2018-12-18 19:41 Data structures defined by both linux and musl Arnd Bergmann
  2018-12-20  0:30 ` Rich Felker
@ 2019-01-18 16:50 ` Arnd Bergmann
  2019-01-18 19:48   ` A. Wilcox
  2019-01-18 17:06 ` Arnd Bergmann
  2 siblings, 1 reply; 10+ messages in thread
From: Arnd Bergmann @ 2019-01-18 16:50 UTC (permalink / raw)
  To: musl, y2038 Mailman List; +Cc: Rich Felker, Adhemerval Zanella, Maxim Kuvyrkov

(Sorry for replying late again, I was not subscribed to the list then (I am now)
and did not get Cc'd on the follow-ups to my original mail)

On Wed, 19 Dec 2018, Rich Felker wrote:
>
> BTW regarding 64-bit time_t on 32-bit archs, Arnd has been working to
> make this happen for a long time. I believe it was over 3 years ago we
> first spoke about working on it in musl. Basically we've reached the
> point where 32-bit archs are a dead-end for developing embedded stuff
> that needs to run indefinitely without the ability to upgrade, and
> this domain is the main place where 32-bit archs are still very
> relevant. Once nice thing about making a new clean ABI is that
> embedded users who don't care about binary ecosystems can switch
> immediately, and desktop/server distros can take their time and switch
> from .1 to .2 when it works best for them.

FWIW, I have now uploaded a series that has a chance of getting
merged for 5.1 in my y2038 tree:
https://git.kernel.org/pub/scm/linux/kernel/git/arnd/playground.git/log/?h=y2038

I still have to repeat the LTP tests I did over the summer
after getting musl to build again with the changes that
happened in the meantime, but this should be fairly close to
what we get. Any comments on the kernel ABI changes
are highly welcome.

> Now, a few comments on findings so far. These won't be complete but
> they're a start:
>
> > The takeaway is that we probably need to add new definitions for
> > flock64, statfs, stat, termios, {msg,sem,shm}{buf,info,id_ds}, ipc_perm,
>
> Not clear on how flock[64?] is affected.

In my list, I had mentioned that the kernel's flock64 is different
from musl's flock structure on sparc64 (which has an extra
padding field) and on mips (I may have been mistaken there,
only flock differs on mips32, flock64 is apparently fine).

If we don't care about musl on sparc, there may be no need to
do anything here.

> stat and ipc structures contain time_t's and definitely need to
> change.

Right, the traditional kernel definitions here have numerous
problems, most importantly the fact that they are different
on each of the old architectures.

> I think termios is listed here because .2 ABI overhaul is a great
> opportunity to switch to the "termios2" interfaces, unify the
> userspace types, and make support for custom baud work right.

Correct. There is also some inconsistency between the architectures
here.

> > rlimit, rusage, sched_param, time_t, timeval, timespec, itimerval,
> > itimerspec, and timex, and then wrap all kernel interfaces that
> > use those.
>
> Not clear on how rlimit is affected, but most of these definitely are.

I probably had it mixed up with rusage here.

       Arnd
_______________________________________________
Y2038 mailing list
Y2038@lists.linaro.org
https://lists.linaro.org/mailman/listinfo/y2038

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Data structures defined by both linux and musl
  2018-12-18 19:41 Data structures defined by both linux and musl Arnd Bergmann
  2018-12-20  0:30 ` Rich Felker
  2019-01-18 16:50 ` Arnd Bergmann
@ 2019-01-18 17:06 ` Arnd Bergmann
  2019-01-18 18:55   ` Rich Felker
  2 siblings, 1 reply; 10+ messages in thread
From: Arnd Bergmann @ 2019-01-18 17:06 UTC (permalink / raw)
  To: musl, y2038 Mailman List; +Cc: Rich Felker, Adhemerval Zanella, Maxim Kuvyrkov

> On Thu, Dec 20, 2018 at 11:33:59AM +0100, Szabolcs Nagy wrote:
> > * Rich Felker <dalias@...c.org> [2018-12-19 19:30:44 -0500]:
> > > On Tue, Dec 18, 2018 at 08:41:53PM +0100, Arnd Bergmann wrote:
> > > ".1" ABIs, this translation would mostly be the identity
> > > transformation, but on archs where we're already doing some hacks to
> > > fix up kernel ABI bugs (sysvipc on big endian, mips stat structure,
> > > x32 stuff, etc.) the hacks could be replaced by used of this
> > > translation infrastructure.
> >
> > lesson of ilp32 was that libc cannot generally translate between
> > a user and kernel abi (otherwise it could be done in userspace).
> >
> > the problematic cases are when user talks to the kernel directly
> > using libc types in a way that the libc cannot do the translation.
> >
> > interfaces where the libc does not know the type, just an opaque
> > pointer: ioctl, fcntl, getsockopt, setsockopt, raw syscall
>
> Ultimately all of these *can* be translated just by enumerating all
> the broken interfaces and special-casing them. It's not pretty,
> though. What would probably happen (Arnd, do you know?) would be
> redefining the ioctl numbers etc. to "time64" versions of the
> interfaces, and for interfaces which are actually "important" to have
> work on old kernels, including translations to/from the corresponding
> old ioctl. Depending on the scope, that might be all or nearly all of
> them.

We've done it for most of them by now. In a lot of cases we
got lucky because the ioctl command code changes with
sizeof(time_t), so all we had to do in the kernel was to interpret
those ioctl commands for 32-bit and 64-bit time_t.

In other cases, we have redefined the ioctl command codes
in the header with some clever (hopefully not too clever) trick:

#if __BITS_PER_LONG == 64
#define LPSETTIMEOUT LPSETTIMEOUT_OLD
#else
#define LPSETTIMEOUT (sizeof(time_t) > sizeof(__kernel_long_t) ? \
    LPSETTIMEOUT_NEW : LPSETTIMEOUT_OLD)
#endif

This way, we guarantee that we can still detect the data type
expected by an application calling LPSETTIMEOUT.
The same approach is used for setsockopt and some other
interfaces.

In other cases (in particular when we never pass absolute
CLOCK_REALTIME data), we changed the type inside
of a structure from time_t to 'long' or 'unsigned long', in
order to keep the ABI unchanged. The disadvantage here
is that it requires user space to use updated kernel headers,
which is a problem for applications that ship with a copy of
the kernel header.

I think for fcntl we were lucky that nothing passesa time_t.

> > direct communication channel to the kernel that may expose the
> > abi incompatibility: netlink, sysfs, procfs
>
> Netlink is the worst here since it's "hidden" behind normal read/write
> calls where the data is abstract bytes. If there's anything that needs
> to be fixed at the netlink layer it probably just requires redefining
> part of the _API_ to use fixed-width types rather than time_t or such.

I don't remember seeing any such case with netlink. Generally
speaking, netlink already has to use fixed-width types in order
to support compat mode, but there may be a couple of exceptions
where the kernel requires nasty hacks here. The same is true
for read/write based chardev interfaces such as /dev/input/eventX,
which we had to redefine to use a structure based on 'unsigned long'
instead of 'time_t' and require to use CLOCK_MONOTONIC to
avoid the overflow.

> > types related to signal handling that may require sighandler
> > wrapping to translate: siginfo_t, ucontext_t
>
> Yes. I'm not proposing we do sighandler wrapping/translation now or in
> the future because it's a pain, but there are some good motivations to
> do it, so I'd like to keep the option open.

I'm certainly not planning to touch any of those in musl ;--)

> > time_t may not be affected by these, but it shows that translation
> > is fragile in general, i wonder if we can ensure correct behaviour
> > in all cases. there is also the problem of linux headers which may
> > use and redefine libc types and user code may need to use those.
>
> Redefining libc types is already broken, and the kernel headers that
> do it can't be used from userspace when libc headers are included.
> This issue is independent of type sizes/layouts matching.
>
> I don't think any kernel headers _use_ libc types either. They
> generally use their own stuff.

'struct timespec' is a notable exception here, but probably not
the only one. At the moment, both libc and kernel define this
structure (and timeval, itimerval, itimerspec, ...), and in my
work on the kernel interfaces I assumed that the libc version
is the one that will prevail, while the kernel version should get
removed.

      Arnd
_______________________________________________
Y2038 mailing list
Y2038@lists.linaro.org
https://lists.linaro.org/mailman/listinfo/y2038

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Re: Data structures defined by both linux and musl
  2019-01-18 17:06 ` Arnd Bergmann
@ 2019-01-18 18:55   ` Rich Felker
  2019-01-18 21:07     ` Arnd Bergmann
  0 siblings, 1 reply; 10+ messages in thread
From: Rich Felker @ 2019-01-18 18:55 UTC (permalink / raw)
  To: musl

On Fri, Jan 18, 2019 at 06:06:01PM +0100, Arnd Bergmann wrote:
> > On Thu, Dec 20, 2018 at 11:33:59AM +0100, Szabolcs Nagy wrote:
> > > * Rich Felker <dalias@...c.org> [2018-12-19 19:30:44 -0500]:
> > > > On Tue, Dec 18, 2018 at 08:41:53PM +0100, Arnd Bergmann wrote:
> > > > ".1" ABIs, this translation would mostly be the identity
> > > > transformation, but on archs where we're already doing some hacks to
> > > > fix up kernel ABI bugs (sysvipc on big endian, mips stat structure,
> > > > x32 stuff, etc.) the hacks could be replaced by used of this
> > > > translation infrastructure.
> > >
> > > lesson of ilp32 was that libc cannot generally translate between
> > > a user and kernel abi (otherwise it could be done in userspace).
> > >
> > > the problematic cases are when user talks to the kernel directly
> > > using libc types in a way that the libc cannot do the translation.
> > >
> > > interfaces where the libc does not know the type, just an opaque
> > > pointer: ioctl, fcntl, getsockopt, setsockopt, raw syscall
> >
> > Ultimately all of these *can* be translated just by enumerating all
> > the broken interfaces and special-casing them. It's not pretty,
> > though. What would probably happen (Arnd, do you know?) would be
> > redefining the ioctl numbers etc. to "time64" versions of the
> > interfaces, and for interfaces which are actually "important" to have
> > work on old kernels, including translations to/from the corresponding
> > old ioctl. Depending on the scope, that might be all or nearly all of
> > them.
> 
> We've done it for most of them by now. In a lot of cases we
> got lucky because the ioctl command code changes with
> sizeof(time_t), so all we had to do in the kernel was to interpret
> those ioctl commands for 32-bit and 64-bit time_t.
> 
> In other cases, we have redefined the ioctl command codes
> in the header with some clever (hopefully not too clever) trick:
> 
> #if __BITS_PER_LONG == 64
> #define LPSETTIMEOUT LPSETTIMEOUT_OLD
> #else
> #define LPSETTIMEOUT (sizeof(time_t) > sizeof(__kernel_long_t) ? \
>     LPSETTIMEOUT_NEW : LPSETTIMEOUT_OLD)
> #endif
> 
> This way, we guarantee that we can still detect the data type
> expected by an application calling LPSETTIMEOUT.
> The same approach is used for setsockopt and some other
> interfaces.

Unless I'm misunderstanding something, this still leaves new programs
using 64-bit time_t unable to make the ioctls on old kernels that lack
the updated ioctl command. There's probably some significant subset of
important commands that ioctl.c needs to be able to intercept and
emulate.

> In other cases (in particular when we never pass absolute
> CLOCK_REALTIME data), we changed the type inside
> of a structure from time_t to 'long' or 'unsigned long', in
> order to keep the ABI unchanged. The disadvantage here
> is that it requires user space to use updated kernel headers,
> which is a problem for applications that ship with a copy of
> the kernel header.

I think this is reasonable. It's not reasonable for kernel structures
to have standard userspace types like time_t in them (except
fixed-size ones like uint32_t, but kernel has __u32 for that anyway)
and shipping copies of such headers was likewise a bug that should be
corrected. It may be a moderate pain for distro ppl fixing this until
the affected upstreams do, tho.

> I think for fcntl we were lucky that nothing passesa time_t.

Indeed.

> > > direct communication channel to the kernel that may expose the
> > > abi incompatibility: netlink, sysfs, procfs
> >
> > Netlink is the worst here since it's "hidden" behind normal read/write
> > calls where the data is abstract bytes. If there's anything that needs
> > to be fixed at the netlink layer it probably just requires redefining
> > part of the _API_ to use fixed-width types rather than time_t or such.
> 
> I don't remember seeing any such case with netlink. Generally
> speaking, netlink already has to use fixed-width types in order
> to support compat mode, but there may be a couple of exceptions
> where the kernel requires nasty hacks here.

OK, that sounds good.

> The same is true
> for read/write based chardev interfaces such as /dev/input/eventX,
> which we had to redefine to use a structure based on 'unsigned long'

Uhg. How does this work with a 32-bit userspace running on a 64-bit
kernel?! These should never have used long, only u32 or u64. Is it
fixable? Or is there some reasonable way for userspace to detect which
protocol the kernel is using?

> instead of 'time_t' and require to use CLOCK_MONOTONIC to
> avoid the overflow.

Well, avoid it for devices that don't go more than 136 years without
reboot... :)

> > > time_t may not be affected by these, but it shows that translation
> > > is fragile in general, i wonder if we can ensure correct behaviour
> > > in all cases. there is also the problem of linux headers which may
> > > use and redefine libc types and user code may need to use those.
> >
> > Redefining libc types is already broken, and the kernel headers that
> > do it can't be used from userspace when libc headers are included.
> > This issue is independent of type sizes/layouts matching.
> >
> > I don't think any kernel headers _use_ libc types either. They
> > generally use their own stuff.
> 
> 'struct timespec' is a notable exception here, but probably not
> the only one. At the moment, both libc and kernel define this
> structure (and timeval, itimerval, itimerspec, ...), and in my
> work on the kernel interfaces I assumed that the libc version
> is the one that will prevail, while the kernel version should get
> removed.

Yes, I think any type defined by userspace standards/interface
definitions inherently belongs to userspace implementation, and kernel
headers should not touch it.

Rich


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Re: Data structures defined by both linux and musl
  2019-01-18 16:50 ` Arnd Bergmann
@ 2019-01-18 19:48   ` A. Wilcox
  2019-01-18 21:09     ` Arnd Bergmann
  0 siblings, 1 reply; 10+ messages in thread
From: A. Wilcox @ 2019-01-18 19:48 UTC (permalink / raw)
  To: musl


[-- Attachment #1.1: Type: text/plain, Size: 928 bytes --]

On 01/18/19 10:50, Arnd Bergmann wrote:
> On Wed, 19 Dec 2018, Rich Felker wrote:
>>> The takeaway is that we probably need to add new definitions for
>>> flock64, statfs, stat, termios, {msg,sem,shm}{buf,info,id_ds}, ipc_perm,
>>
>> Not clear on how flock[64?] is affected.
> 
> In my list, I had mentioned that the kernel's flock64 is different
> from musl's flock structure on sparc64 (which has an extra
> padding field) and on mips (I may have been mistaken there,
> only flock differs on mips32, flock64 is apparently fine).
> 
> If we don't care about musl on sparc, there may be no need to
> do anything here.

We have a sparc64 port in progress for musl.  It's in the planning
stages, and the goal was to get something shipping in late 2019.

As usual, I guess we're going to be too little, too late.

--arw


-- 
A. Wilcox (awilfox)
Project Lead, Adélie Linux
https://www.adelielinux.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Re: Data structures defined by both linux and musl
  2019-01-18 18:55   ` Rich Felker
@ 2019-01-18 21:07     ` Arnd Bergmann
  0 siblings, 0 replies; 10+ messages in thread
From: Arnd Bergmann @ 2019-01-18 21:07 UTC (permalink / raw)
  To: musl

On Fri, Jan 18, 2019 at 7:55 PM Rich Felker <dalias@libc.org> wrote:
> On Fri, Jan 18, 2019 at 06:06:01PM +0100, Arnd Bergmann wrote:
> > #if __BITS_PER_LONG == 64
> > #define LPSETTIMEOUT LPSETTIMEOUT_OLD
> > #else
> > #define LPSETTIMEOUT (sizeof(time_t) > sizeof(__kernel_long_t) ? \
> >     LPSETTIMEOUT_NEW : LPSETTIMEOUT_OLD)
> > #endif
> >
> > This way, we guarantee that we can still detect the data type
> > expected by an application calling LPSETTIMEOUT.
> > The same approach is used for setsockopt and some other
> > interfaces.
>
> Unless I'm misunderstanding something, this still leaves new programs
> using 64-bit time_t unable to make the ioctls on old kernels that lack
> the updated ioctl command. There's probably some significant subset of
> important commands that ioctl.c needs to be able to intercept and
> emulate.

That is correct. The number of ioctls that are affected here is
fairly small, and some of them are in rather obscure drivers,
but I was still hoping that we could avoid emulating them
in libc, and just require newer kernels for anyone who really
wants to run 64-bit time_t and use those drivers.

> > In other cases (in particular when we never pass absolute
> > CLOCK_REALTIME data), we changed the type inside
> > of a structure from time_t to 'long' or 'unsigned long', in
> > order to keep the ABI unchanged. The disadvantage here
> > is that it requires user space to use updated kernel headers,
> > which is a problem for applications that ship with a copy of
> > the kernel header.
>
> I think this is reasonable. It's not reasonable for kernel structures
> to have standard userspace types like time_t in them (except
> fixed-size ones like uint32_t, but kernel has __u32 for that anyway)
> and shipping copies of such headers was likewise a bug that should be
> corrected. It may be a moderate pain for distro ppl fixing this until
> the affected upstreams do, tho.

A lot of those structures have a long history, e.g. rusage has
always been defined in terms of timeval. I'd like to change this,
but it will be a lot of work to go through all uapi structures.

I can also understand the reasons for shipping copies of the
latest kernel headers in packages, this is not unlike what you
do in musl with its own version of the files. When a user space
package only cares about one header for a particular subsystem,
it tends to be easier to have a current copy of that header, and
just deal with binary compatibility for old kernels in the package,
than to have to support all combinations of library, header and
running kernel.

> > The same is true
> > for read/write based chardev interfaces such as /dev/input/eventX,
> > which we had to redefine to use a structure based on 'unsigned long'
>
> Uhg. How does this work with a 32-bit userspace running on a 64-bit
> kernel?! These should never have used long, only u32 or u64. Is it
> fixable? Or is there some reasonable way for userspace to detect which
> protocol the kernel is using?

We have a hack in that driver that detects when it's being called by
a compat mode thread, and another hack to detect x32 mode on
top of the first. Unfortunately, the kernel cannot detect the definition
of the time_t type that was used in the process reading the file
descriptor, so after long discussions we settled on leaving it at
the ABI and not adding more hacks to it, but requiring user space
to be aware that it has to use the updated kernel headers in this
case (which don't use time_t).

> > > > time_t may not be affected by these, but it shows that translation
> > > > is fragile in general, i wonder if we can ensure correct behaviour
> > > > in all cases. there is also the problem of linux headers which may
> > > > use and redefine libc types and user code may need to use those.
> > >
> > > Redefining libc types is already broken, and the kernel headers that
> > > do it can't be used from userspace when libc headers are included.
> > > This issue is independent of type sizes/layouts matching.
> > >
> > > I don't think any kernel headers _use_ libc types either. They
> > > generally use their own stuff.
> >
> > 'struct timespec' is a notable exception here, but probably not
> > the only one. At the moment, both libc and kernel define this
> > structure (and timeval, itimerval, itimerspec, ...), and in my
> > work on the kernel interfaces I assumed that the libc version
> > is the one that will prevail, while the kernel version should get
> > removed.
>
> Yes, I think any type defined by userspace standards/interface
> definitions inherently belongs to userspace implementation, and kernel
> headers should not touch it.

That is a sensible rule, it just doesn't match what has been done
historically. Now we have to retrofit it while trying to break as
little as possible in the process.

      Arnd


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Re: Data structures defined by both linux and musl
  2019-01-18 19:48   ` A. Wilcox
@ 2019-01-18 21:09     ` Arnd Bergmann
  0 siblings, 0 replies; 10+ messages in thread
From: Arnd Bergmann @ 2019-01-18 21:09 UTC (permalink / raw)
  To: musl

On Fri, Jan 18, 2019 at 8:48 PM A. Wilcox <awilfox@adelielinux.org> wrote:
>
> On 01/18/19 10:50, Arnd Bergmann wrote:
> > On Wed, 19 Dec 2018, Rich Felker wrote:
> >>> The takeaway is that we probably need to add new definitions for
> >>> flock64, statfs, stat, termios, {msg,sem,shm}{buf,info,id_ds}, ipc_perm,
> >>
> >> Not clear on how flock[64?] is affected.
> >
> > In my list, I had mentioned that the kernel's flock64 is different
> > from musl's flock structure on sparc64 (which has an extra
> > padding field) and on mips (I may have been mistaken there,
> > only flock differs on mips32, flock64 is apparently fine).
> >
> > If we don't care about musl on sparc, there may be no need to
> > do anything here.
>
> We have a sparc64 port in progress for musl.  It's in the planning
> stages, and the goal was to get something shipping in late 2019.
>
> As usual, I guess we're going to be too little, too late.

Have you thought about how to handle timeval and flock64 here?
In both cases, the generic structure definition is different between
sparc64 kernels and the rest of the world including all other 64-bit
architectures and the musl definition.

      Arnd


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-01-18 21:09 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-18 19:41 Data structures defined by both linux and musl Arnd Bergmann
2018-12-20  0:30 ` Rich Felker
2018-12-20 10:33   ` Szabolcs Nagy
2018-12-20 18:08     ` Rich Felker
2019-01-18 16:50 ` Arnd Bergmann
2019-01-18 19:48   ` A. Wilcox
2019-01-18 21:09     ` Arnd Bergmann
2019-01-18 17:06 ` Arnd Bergmann
2019-01-18 18:55   ` Rich Felker
2019-01-18 21:07     ` Arnd Bergmann

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).