mailing list of musl libc
 help / color / mirror / code / Atom feed
* probably need to emulate SO_TIMESTAMP
@ 2019-12-17  5:08 Rich Felker
  0 siblings, 0 replies; only message in thread
From: Rich Felker @ 2019-12-17  5:08 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 2661 bytes --]

It's come to my attention that a good deal of software uses
SO_TIMESTAMP (the socket option for receiving timestamps as control
messages attached to datagrams), whose definition changes for time64.
Without any further measures, it fails with ENOPROTOOPT on pre-5.1
kernels. Some programs handle this gracefully; others treat it as a
hard error. Apparently it's old enough, and widely available enough on
non-Linux, that it's fairly "reasonable" to just assume it works.

Since otherwise this looks like it's going to be a big breaking part
of the time64 release, I'm strongly leaning towards emulating
SO_TIMESTAMP (and SO_TIMESTAMPNS) for pre-5.1 kernels. This involves
converting the setsockopt/getsockopt calls to use the old option as a
fallback, but more importantly, having recvmsg scan and translate
received control messages. A draft of the patch is attached.

Since recvmsg can't distinguish between being called by a time32
binary (that intentionally requested SO_TIMESTAMP_OLD) and being
called by a time64 binary (that got SO_TIMESTAMP_OLD as a fallback),
it doesn't replace the old control message (this couldn't be done
in-place anyway, since the new one is larger) but appends it. Programs
running on pre-5.1 kernels, and all legacy time32 callers, will then
see both SCM_TIMESTAMP_OLD and SCM_TIMESTAMP[_NEW] control messages,
assuming both fit (if not, perhaps we should set MSG_CTRUNC flag like
the kernel would).

The related SO_TIMESTAMPING option is significantly harder to emulate
in a feature-complete manner, since it allows SCM_TIMESTAMPING to be
sent as a control message on a per-sendmsg basis, to control send
timestamp generation. A feature-incomplete version could be done just
like the other two, but it's not clear how beneficial that would be;
from my research so far (Debian Code Search, mostly, after some
initial reports of problems) it seems that SO_TIMESTAMPING is pretty
much always treated as a feature that might or might not be present or
work, and that absence is non-fatal. This makes sense because it's a
lot newer and seems Linux-specific.

One issue I've not mentioned yet is recvmmsg, which is an utter mess.
The O(1)-space emulation approach that works for recvmsg becomes O(n)
with recvmmsg, since there's nowhere to keep the original
msg_controllen for each msghdr. This means we need to limit vlen,
emulate it with recvmsg in a loop, or limit vlen in each loop
iteration but loop until the whole requested vlen is completed or
other exit conditions occur. My leaning would be just calling recvmmsg
in a loop since this whole function is such a mess.

Thoughts? Any potential problems with the attached?

Rich

[-- Attachment #2: emulate_so_timestamp.diff --]
[-- Type: text/plain, Size: 4429 bytes --]

diff --git a/src/internal/syscall.h b/src/internal/syscall.h
index 9f2784db..d768fb64 100644
--- a/src/internal/syscall.h
+++ b/src/internal/syscall.h
@@ -306,6 +306,13 @@ hidden long __syscall_ret(unsigned long),
 #define SO_SNDTIMEO_OLD  21
 #endif
 
+#define SO_TIMESTAMP_OLD    29
+#define SO_TIMESTAMPNS_OLD  35
+#define SO_TIMESTAMPING_OLD 37
+#define SCM_TIMESTAMP_OLD    SO_TIMESTAMP_OLD
+#define SCM_TIMESTAMPNS_OLD  SO_TIMESTAMPNS_OLD
+#define SCM_TIMESTAMPING_OLD SO_TIMESTAMPING_OLD
+
 #ifndef SIOCGSTAMP_OLD
 #define SIOCGSTAMP_OLD 0x8906
 #endif
diff --git a/src/mman/mmap.c b/src/mman/mmap.c
index eff88d82..2da11b87 100644
--- a/src/mman/mmap.c
+++ b/src/mman/mmap.c
@@ -1,3 +1,4 @@
+#define SYSCALL_NO_TLS 1
 #include <unistd.h>
 #include <sys/mman.h>
 #include <errno.h>
diff --git a/src/network/getsockopt.c b/src/network/getsockopt.c
index e871d624..d3640d9c 100644
--- a/src/network/getsockopt.c
+++ b/src/network/getsockopt.c
@@ -26,6 +26,15 @@ int getsockopt(int fd, int level, int optname, void *restrict optval, socklen_t
 			tv->tv_sec = tv32[0];
 			tv->tv_usec = tv32[1];
 			*optlen = sizeof *tv;
+			break;
+		case SO_TIMESTAMP:
+		case SO_TIMESTAMPNS:
+			if (SO_TIMESTAMP == SO_TIMESTAMP_OLD) break;
+			if (optname==SO_TIMESTAMP) optname=SO_TIMESTAMP_OLD;
+			if (optname==SO_TIMESTAMPNS) optname=SO_TIMESTAMPNS_OLD;
+			r = __socketcall(getsockopt, fd, level,
+				optname, optval, optlen, 0);
+			break;
 		}
 	}
 	return __syscall_ret(r);
diff --git a/src/network/recvmsg.c b/src/network/recvmsg.c
index 4ca7da8b..50ea082d 100644
--- a/src/network/recvmsg.c
+++ b/src/network/recvmsg.c
@@ -1,10 +1,62 @@
 #include <sys/socket.h>
 #include <limits.h>
+#include <time.h>
+#include <sys/time.h>
+#include <string.h>
 #include "syscall.h"
 
+static void convert_scm_timestamps(struct msghdr *msg, socklen_t csize)
+{
+	if (SCM_TIMESTAMP == SCM_TIMESTAMP_OLD) return;
+	if (!msg->msg_control || !msg->msg_controllen) return;
+
+	int have_tv=0, have_ts=0;
+	struct timeval tv;
+	struct timespec ts;
+	struct cmsghdr *cmsg, *last=0;
+	socklen_t clen = msg->msg_controllen;
+
+	for (cmsg=CMSG_FIRSTHDR(msg); cmsg; cmsg=CMSG_NXTHDR(msg, cmsg)) {
+		if (cmsg->cmsg_level==SOL_SOCKET) switch (cmsg->cmsg_type) {
+		case SCM_TIMESTAMP_OLD:
+			have_tv = 1;
+			tv.tv_sec = *(long *)(CMSG_DATA(cmsg) + 0);
+			tv.tv_usec = *(long *)(CMSG_DATA(cmsg) + 4);
+			break;
+		case SCM_TIMESTAMPNS_OLD:
+			have_ts = 1;
+			ts.tv_sec = *(long *)(CMSG_DATA(cmsg) + 0);
+			ts.tv_nsec = *(long *)(CMSG_DATA(cmsg) + 4);
+			break;
+		}
+		last = cmsg;
+	}
+	if (!last) return;
+	msg->msg_controllen = csize;
+	cmsg = CMSG_NXTHDR(msg, last);
+	if (cmsg && have_ts && CMSG_SPACE(sizeof ts)<=csize-clen) {
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_TIMESTAMPNS;
+		cmsg->cmsg_len = CMSG_LEN(sizeof ts);
+		memcpy(CMSG_DATA(cmsg), &ts, sizeof ts);
+		cmsg = CMSG_NXTHDR(msg, cmsg);
+		clen += CMSG_SPACE(sizeof ts);
+	}
+	if (cmsg && have_tv && CMSG_SPACE(sizeof tv)<=csize-clen) {
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_TIMESTAMP;
+		cmsg->cmsg_len = CMSG_LEN(sizeof tv);
+		memcpy(CMSG_DATA(cmsg), &tv, sizeof tv);
+		cmsg = CMSG_NXTHDR(msg, cmsg);
+		clen += CMSG_SPACE(sizeof tv);
+	}
+	msg->msg_controllen = clen;
+}
+
 ssize_t recvmsg(int fd, struct msghdr *msg, int flags)
 {
 	ssize_t r;
+	socklen_t orig_controllen = msg->msg_controllen;
 #if LONG_MAX > INT_MAX
 	struct msghdr h, *orig = msg;
 	if (msg) {
@@ -14,6 +66,7 @@ ssize_t recvmsg(int fd, struct msghdr *msg, int flags)
 	}
 #endif
 	r = socketcall_cp(recvmsg, fd, msg, flags, 0, 0, 0);
+	if (r >= 0) convert_scm_timestamps(msg, orig_controllen);
 #if LONG_MAX > INT_MAX
 	if (orig) *orig = h;
 #endif
diff --git a/src/network/setsockopt.c b/src/network/setsockopt.c
index 2c188a96..612a1947 100644
--- a/src/network/setsockopt.c
+++ b/src/network/setsockopt.c
@@ -31,6 +31,15 @@ int setsockopt(int fd, int level, int optname, const void *optval, socklen_t opt
 
 			r = __socketcall(setsockopt, fd, level, optname,
 				((long[]){s, CLAMP(us)}), 2*sizeof(long), 0);
+			break;
+		case SO_TIMESTAMP:
+		case SO_TIMESTAMPNS:
+			if (SO_TIMESTAMP == SO_TIMESTAMP_OLD) break;
+			if (optname==SO_TIMESTAMP) optname=SO_TIMESTAMP_OLD;
+			if (optname==SO_TIMESTAMPNS) optname=SO_TIMESTAMPNS_OLD;
+			r = __socketcall(setsockopt, fd, level,
+				optname, optval, optlen, 0);
+			break;
 		}
 	}
 	return __syscall_ret(r);

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2019-12-17  5:08 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-17  5:08 probably need to emulate SO_TIMESTAMP Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).