mailing list of musl libc
 help / color / mirror / code / Atom feed
* [PATCH] mlock2 and memfd_create
@ 2018-06-19 20:43 Szabolcs Nagy
  2018-06-22  0:16 ` Andrei Vagin
  0 siblings, 1 reply; 6+ messages in thread
From: Szabolcs Nagy @ 2018-06-19 20:43 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 46 bytes --]

separate patches for mlock2 and memfd_create.

[-- Attachment #2: 0001-Add-mlock2-linux-syscall-wrapper.patch --]
[-- Type: text/x-diff, Size: 1844 bytes --]

From ee3fa6fea375a941f0f11da9a5767f35dd53c6f0 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sat, 28 Apr 2018 17:25:41 +0000
Subject: [PATCH 1/2] Add mlock2 linux syscall wrapper

mlock2 syscall was added in linux v4.4 and glibc has api for it.
It falls back to mlock in case of flags==0, so that case works
even on older kernels.

MLOCK_ONFAULT is moved under _GNU_SOURCE following glibc.
---
 include/sys/mman.h | 11 ++++++++---
 src/linux/mlock2.c | 10 ++++++++++
 2 files changed, 18 insertions(+), 3 deletions(-)
 create mode 100644 src/linux/mlock2.c

diff --git a/include/sys/mman.h b/include/sys/mman.h
index 19dd844e..80e1da75 100644
--- a/include/sys/mman.h
+++ b/include/sys/mman.h
@@ -94,6 +94,13 @@ extern "C" {
 #define MADV_SOFT_OFFLINE 101
 #endif
 
+#ifdef _GNU_SOURCE
+#define MREMAP_MAYMOVE 1
+#define MREMAP_FIXED 2
+
+#define MLOCK_ONFAULT 0x01
+#endif
+
 #include <bits/mman.h>
 
 void *mmap (void *, size_t, int, int, int, off_t);
@@ -110,14 +117,12 @@ int mlockall (int);
 int munlockall (void);
 
 #ifdef _GNU_SOURCE
-#define MREMAP_MAYMOVE 1
-#define MREMAP_FIXED 2
 void *mremap (void *, size_t, size_t, int, ...);
 int remap_file_pages (void *, size_t, int, size_t, int);
+int mlock2 (const void *, size_t, unsigned);
 #endif
 
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define MLOCK_ONFAULT   0x01
 int madvise (void *, size_t, int);
 int mincore (void *, size_t, unsigned char *);
 #endif
diff --git a/src/linux/mlock2.c b/src/linux/mlock2.c
new file mode 100644
index 00000000..10132742
--- /dev/null
+++ b/src/linux/mlock2.c
@@ -0,0 +1,10 @@
+#define _GNU_SOURCE 1
+#include <sys/mman.h>
+#include "syscall.h"
+
+int mlock2(const void *addr, size_t len, unsigned flags)
+{
+	if (flags == 0)
+		return mlock(addr, len);
+	return syscall(SYS_mlock2, addr, len, flags);
+}
-- 
2.16.3


[-- Attachment #3: 0002-Add-memfd_create-syscall-wrapper.patch --]
[-- Type: text/x-diff, Size: 1407 bytes --]

From ed7309bd4156c77d813390ae5b2507719b43face Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Tue, 19 Jun 2018 20:28:03 +0000
Subject: [PATCH 2/2] Add memfd_create syscall wrapper

memfd_create was added in linux v3.17 and glibc has api for it.
---
 include/sys/mman.h       | 5 +++++
 src/linux/memfd_create.c | 8 ++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 src/linux/memfd_create.c

diff --git a/include/sys/mman.h b/include/sys/mman.h
index 80e1da75..99d02a2e 100644
--- a/include/sys/mman.h
+++ b/include/sys/mman.h
@@ -99,6 +99,10 @@ extern "C" {
 #define MREMAP_FIXED 2
 
 #define MLOCK_ONFAULT 0x01
+
+#define MFD_CLOEXEC 0x0001U
+#define MFD_ALLOW_SEALING 0x0002U
+#define MFD_HUGETLB 0x0004U
 #endif
 
 #include <bits/mman.h>
@@ -119,6 +123,7 @@ int munlockall (void);
 #ifdef _GNU_SOURCE
 void *mremap (void *, size_t, size_t, int, ...);
 int remap_file_pages (void *, size_t, int, size_t, int);
+int memfd_create (const char *, unsigned);
 int mlock2 (const void *, size_t, unsigned);
 #endif
 
diff --git a/src/linux/memfd_create.c b/src/linux/memfd_create.c
new file mode 100644
index 00000000..1649fe55
--- /dev/null
+++ b/src/linux/memfd_create.c
@@ -0,0 +1,8 @@
+#define _GNU_SOURCE 1
+#include <sys/mman.h>
+#include "syscall.h"
+
+int memfd_create(const char *name, unsigned flags)
+{
+	return syscall(SYS_memfd_create, name, flags);
+}
-- 
2.16.3


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mlock2 and memfd_create
  2018-06-19 20:43 [PATCH] mlock2 and memfd_create Szabolcs Nagy
@ 2018-06-22  0:16 ` Andrei Vagin
  2018-06-22  9:10   ` Szabolcs Nagy
  0 siblings, 1 reply; 6+ messages in thread
From: Andrei Vagin @ 2018-06-22  0:16 UTC (permalink / raw)
  To: musl

On Tue, Jun 19, 2018 at 10:43:14PM +0200, Szabolcs Nagy wrote:
> separate patches for mlock2 and memfd_create.

> From ee3fa6fea375a941f0f11da9a5767f35dd53c6f0 Mon Sep 17 00:00:00 2001
> From: Szabolcs Nagy <nsz@port70.net>
> Date: Sat, 28 Apr 2018 17:25:41 +0000
> Subject: [PATCH 1/2] Add mlock2 linux syscall wrapper
> 
> mlock2 syscall was added in linux v4.4 and glibc has api for it.
> It falls back to mlock in case of flags==0, so that case works
> even on older kernels.
> 
> MLOCK_ONFAULT is moved under _GNU_SOURCE following glibc.
> ---
>  include/sys/mman.h | 11 ++++++++---
>  src/linux/mlock2.c | 10 ++++++++++
>  2 files changed, 18 insertions(+), 3 deletions(-)
>  create mode 100644 src/linux/mlock2.c
> 
> diff --git a/include/sys/mman.h b/include/sys/mman.h
> index 19dd844e..80e1da75 100644
> --- a/include/sys/mman.h
> +++ b/include/sys/mman.h
> @@ -94,6 +94,13 @@ extern "C" {
>  #define MADV_SOFT_OFFLINE 101
>  #endif
>  
> +#ifdef _GNU_SOURCE
> +#define MREMAP_MAYMOVE 1
> +#define MREMAP_FIXED 2
> +
> +#define MLOCK_ONFAULT 0x01
> +#endif
> +
>  #include <bits/mman.h>
>  
>  void *mmap (void *, size_t, int, int, int, off_t);
> @@ -110,14 +117,12 @@ int mlockall (int);
>  int munlockall (void);
>  
>  #ifdef _GNU_SOURCE
> -#define MREMAP_MAYMOVE 1
> -#define MREMAP_FIXED 2
>  void *mremap (void *, size_t, size_t, int, ...);
>  int remap_file_pages (void *, size_t, int, size_t, int);
> +int mlock2 (const void *, size_t, unsigned);
>  #endif
>  
>  #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> -#define MLOCK_ONFAULT   0x01
>  int madvise (void *, size_t, int);
>  int mincore (void *, size_t, unsigned char *);
>  #endif
> diff --git a/src/linux/mlock2.c b/src/linux/mlock2.c
> new file mode 100644
> index 00000000..10132742
> --- /dev/null
> +++ b/src/linux/mlock2.c
> @@ -0,0 +1,10 @@
> +#define _GNU_SOURCE 1
> +#include <sys/mman.h>
> +#include "syscall.h"
> +
> +int mlock2(const void *addr, size_t len, unsigned flags)
> +{
> +	if (flags == 0)
> +		return mlock(addr, len);
> +	return syscall(SYS_mlock2, addr, len, flags);

I would prefer another way to support old kernels:

	int ret;

	ret = syscall(SYS_mlock2, addr, len, flags);
	if (ret == -1 && errno == ENOSYS && flags == 0)
		return mlock(addr, len);
	return ret;

This way works a bit slower on old kernels, but it doesn't have side
effects if mlock2 is supported.

For example, the user can set seccomp rules, and he will not expect that
the mlock syscall will be executed, when he calls mlock2() in a code.

Thanks,
Andrei

> +}
> -- 
> 2.16.3
> 

> From ed7309bd4156c77d813390ae5b2507719b43face Mon Sep 17 00:00:00 2001
> From: Szabolcs Nagy <nsz@port70.net>
> Date: Tue, 19 Jun 2018 20:28:03 +0000
> Subject: [PATCH 2/2] Add memfd_create syscall wrapper
> 
> memfd_create was added in linux v3.17 and glibc has api for it.
> ---
>  include/sys/mman.h       | 5 +++++
>  src/linux/memfd_create.c | 8 ++++++++
>  2 files changed, 13 insertions(+)
>  create mode 100644 src/linux/memfd_create.c
> 
> diff --git a/include/sys/mman.h b/include/sys/mman.h
> index 80e1da75..99d02a2e 100644
> --- a/include/sys/mman.h
> +++ b/include/sys/mman.h
> @@ -99,6 +99,10 @@ extern "C" {
>  #define MREMAP_FIXED 2
>  
>  #define MLOCK_ONFAULT 0x01
> +
> +#define MFD_CLOEXEC 0x0001U
> +#define MFD_ALLOW_SEALING 0x0002U
> +#define MFD_HUGETLB 0x0004U
>  #endif
>  
>  #include <bits/mman.h>
> @@ -119,6 +123,7 @@ int munlockall (void);
>  #ifdef _GNU_SOURCE
>  void *mremap (void *, size_t, size_t, int, ...);
>  int remap_file_pages (void *, size_t, int, size_t, int);
> +int memfd_create (const char *, unsigned);
>  int mlock2 (const void *, size_t, unsigned);
>  #endif
>  
> diff --git a/src/linux/memfd_create.c b/src/linux/memfd_create.c
> new file mode 100644
> index 00000000..1649fe55
> --- /dev/null
> +++ b/src/linux/memfd_create.c
> @@ -0,0 +1,8 @@
> +#define _GNU_SOURCE 1
> +#include <sys/mman.h>
> +#include "syscall.h"
> +
> +int memfd_create(const char *name, unsigned flags)
> +{
> +	return syscall(SYS_memfd_create, name, flags);
> +}
> -- 
> 2.16.3
> 



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mlock2 and memfd_create
  2018-06-22  0:16 ` Andrei Vagin
@ 2018-06-22  9:10   ` Szabolcs Nagy
  2018-06-22 17:58     ` Andrei Vagin
  2018-06-22 19:02     ` Rich Felker
  0 siblings, 2 replies; 6+ messages in thread
From: Szabolcs Nagy @ 2018-06-22  9:10 UTC (permalink / raw)
  To: musl

* Andrei Vagin <avagin@gmail.com> [2018-06-21 17:16:03 -0700]:
> On Tue, Jun 19, 2018 at 10:43:14PM +0200, Szabolcs Nagy wrote:
> > +
> > +int mlock2(const void *addr, size_t len, unsigned flags)
> > +{
> > +	if (flags == 0)
> > +		return mlock(addr, len);
> > +	return syscall(SYS_mlock2, addr, len, flags);
> 
> I would prefer another way to support old kernels:
> 
> 	int ret;
> 
> 	ret = syscall(SYS_mlock2, addr, len, flags);
> 	if (ret == -1 && errno == ENOSYS && flags == 0)
> 		return mlock(addr, len);
> 	return ret;
> 
> This way works a bit slower on old kernels, but it doesn't have side
> effects if mlock2 is supported.
> 
> For example, the user can set seccomp rules, and he will not expect that
> the mlock syscall will be executed, when he calls mlock2() in a code.
> 

mlock2 is documented to be equivalent to mlock if flags==0,
the glibc logic is the same and seccomp (or whatever else
operating on the syscall layer) has to deal with mlock
anyway (unless we change the mlock implementation too).
so i would not be too worried about this.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mlock2 and memfd_create
  2018-06-22  9:10   ` Szabolcs Nagy
@ 2018-06-22 17:58     ` Andrei Vagin
  2018-06-22 18:25       ` Szabolcs Nagy
  2018-06-22 19:02     ` Rich Felker
  1 sibling, 1 reply; 6+ messages in thread
From: Andrei Vagin @ 2018-06-22 17:58 UTC (permalink / raw)
  To: musl

On Fri, Jun 22, 2018 at 11:10:52AM +0200, Szabolcs Nagy wrote:
> * Andrei Vagin <avagin@gmail.com> [2018-06-21 17:16:03 -0700]:
> > On Tue, Jun 19, 2018 at 10:43:14PM +0200, Szabolcs Nagy wrote:
> > > +
> > > +int mlock2(const void *addr, size_t len, unsigned flags)
> > > +{
> > > +	if (flags == 0)
> > > +		return mlock(addr, len);
> > > +	return syscall(SYS_mlock2, addr, len, flags);
> > 
> > I would prefer another way to support old kernels:
> > 
> > 	int ret;
> > 
> > 	ret = syscall(SYS_mlock2, addr, len, flags);
> > 	if (ret == -1 && errno == ENOSYS && flags == 0)
> > 		return mlock(addr, len);
> > 	return ret;
> > 
> > This way works a bit slower on old kernels, but it doesn't have side
> > effects if mlock2 is supported.
> > 
> > For example, the user can set seccomp rules, and he will not expect that
> > the mlock syscall will be executed, when he calls mlock2() in a code.
> > 
> 
> mlock2 is documented to be equivalent to mlock if flags==0,
> the glibc logic is the same and seccomp (or whatever else
> operating on the syscall layer) has to deal with mlock
> anyway (unless we change the mlock implementation too).
> so i would not be too worried about this.

Glibc has the __ASSUME_MLOCK2 option, and if it is set only mlock2() is
used. Modern distributions will probably build glibc with this option.
I mean the glibc logic isn't exectly the same. And a concern about
seccomp is still valid.

Anyway, I don't have strong objections about this patch, I just suggest
another way how it can be done and, from my point of view, it is better.

Thanks,
Andrei


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mlock2 and memfd_create
  2018-06-22 17:58     ` Andrei Vagin
@ 2018-06-22 18:25       ` Szabolcs Nagy
  0 siblings, 0 replies; 6+ messages in thread
From: Szabolcs Nagy @ 2018-06-22 18:25 UTC (permalink / raw)
  To: musl

* Andrei Vagin <avagin@gmail.com> [2018-06-22 10:58:34 -0700]:
> On Fri, Jun 22, 2018 at 11:10:52AM +0200, Szabolcs Nagy wrote:
> > * Andrei Vagin <avagin@gmail.com> [2018-06-21 17:16:03 -0700]:
> > > On Tue, Jun 19, 2018 at 10:43:14PM +0200, Szabolcs Nagy wrote:
> > > > +
> > > > +int mlock2(const void *addr, size_t len, unsigned flags)
> > > > +{
> > > > +	if (flags == 0)
> > > > +		return mlock(addr, len);
> > > > +	return syscall(SYS_mlock2, addr, len, flags);
> > > 
> > > I would prefer another way to support old kernels:
> > > 
> > > 	int ret;
> > > 
> > > 	ret = syscall(SYS_mlock2, addr, len, flags);
> > > 	if (ret == -1 && errno == ENOSYS && flags == 0)
> > > 		return mlock(addr, len);
> > > 	return ret;
> > > 
> > > This way works a bit slower on old kernels, but it doesn't have side
> > > effects if mlock2 is supported.
> > > 
> > > For example, the user can set seccomp rules, and he will not expect that
> > > the mlock syscall will be executed, when he calls mlock2() in a code.
> > > 
> > 
> > mlock2 is documented to be equivalent to mlock if flags==0,
> > the glibc logic is the same and seccomp (or whatever else
> > operating on the syscall layer) has to deal with mlock
> > anyway (unless we change the mlock implementation too).
> > so i would not be too worried about this.
> 
> Glibc has the __ASSUME_MLOCK2 option, and if it is set only mlock2() is
> used. Modern distributions will probably build glibc with this option.
> I mean the glibc logic isn't exectly the same. And a concern about
> seccomp is still valid.
> 

glibc sets __ASSUME_MLOCK2 to true when the minimum required kernel
version is linux v4.4 and then glibc stops working on older kernels.

i don't know what distros set the minimum version to (the default
is 3.2.0 and i don't expect it to be set higher by distros otherwise
running in a container on a stable host with old kernel would fail),
but musl tries not to break behaviour on older kernels whenever
possible, so there is no minimum required kernel version, i.e. musl
is equivalent to glibc with all __ASSUME_* set to false.

> Anyway, I don't have strong objections about this patch, I just suggest
> another way how it can be done and, from my point of view, it is better.
> 
> Thanks,
> Andrei


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mlock2 and memfd_create
  2018-06-22  9:10   ` Szabolcs Nagy
  2018-06-22 17:58     ` Andrei Vagin
@ 2018-06-22 19:02     ` Rich Felker
  1 sibling, 0 replies; 6+ messages in thread
From: Rich Felker @ 2018-06-22 19:02 UTC (permalink / raw)
  To: musl

On Fri, Jun 22, 2018 at 11:10:52AM +0200, Szabolcs Nagy wrote:
> * Andrei Vagin <avagin@gmail.com> [2018-06-21 17:16:03 -0700]:
> > On Tue, Jun 19, 2018 at 10:43:14PM +0200, Szabolcs Nagy wrote:
> > > +
> > > +int mlock2(const void *addr, size_t len, unsigned flags)
> > > +{
> > > +	if (flags == 0)
> > > +		return mlock(addr, len);
> > > +	return syscall(SYS_mlock2, addr, len, flags);
> > 
> > I would prefer another way to support old kernels:
> > 
> > 	int ret;
> > 
> > 	ret = syscall(SYS_mlock2, addr, len, flags);
> > 	if (ret == -1 && errno == ENOSYS && flags == 0)
> > 		return mlock(addr, len);
> > 	return ret;
> > 
> > This way works a bit slower on old kernels, but it doesn't have side
> > effects if mlock2 is supported.
> > 
> > For example, the user can set seccomp rules, and he will not expect that
> > the mlock syscall will be executed, when he calls mlock2() in a code.
> > 
> 
> mlock2 is documented to be equivalent to mlock if flags==0,
> the glibc logic is the same and seccomp (or whatever else
> operating on the syscall layer) has to deal with mlock
> anyway (unless we change the mlock implementation too).
> so i would not be too worried about this.

Generally my leaning is not to program around seccomp, and further to
treat seccomp filters that forbid one operation but allow a
semantically-equivalent (or even logical-permissions-equivalent) one
as a bug in the seccomp filter. Yes that does make a little bit more
work for anyone writing seccomp filters, but it's positive work --
it's making the filters more-portable, less-specific to a particular
libc implementation.

Rich


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-06-22 19:02 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-19 20:43 [PATCH] mlock2 and memfd_create Szabolcs Nagy
2018-06-22  0:16 ` Andrei Vagin
2018-06-22  9:10   ` Szabolcs Nagy
2018-06-22 17:58     ` Andrei Vagin
2018-06-22 18:25       ` Szabolcs Nagy
2018-06-22 19:02     ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).