zsh-workers
 help / color / mirror / code / Atom feed
* [PATCH] Enable sub-second timeout in zsystem flock
@ 2019-07-29 20:35 Cedric Ware
  2019-07-29 22:25 ` Bart Schaefer
  0 siblings, 1 reply; 33+ messages in thread
From: Cedric Ware @ 2019-07-29 20:35 UTC (permalink / raw)
  To: zsh-workers

[-- Attachment #1: Type: text/plain, Size: 1762 bytes --]


	Hello,

I'm interested in using the zsystem flock function in the zsh/system
module to lock files, with a timeout if the lock is already taken, but
currently the timeout value has a whole-second granularity and I'd like
to specify sub-second timeouts.

The attached patch (against zsh 5.7.1) modifies the bin_zsystem_flock()
function, allowing the -t option to take floating-point parameters, and
handles the timeout with microsecond granularity.  It also adds option
-p to control the retrying period, which was previously hardcoded to
1 second.

I kept the same function's logic of periodically retrying to acquire
the lock in non-blocking mode.  However, there may be a better way to
implement this timeout: set a timer such as setitimer(2) which sends
a SIGALRM on timeout, trap SIGALRM without SA_RESTART, then acquire the
lock in blocking mode; fcntl() will then block until SIGALRM is caught,
after which errno will be EINTR.  Unfortunately, lacking knowledge of
zsh internals especially signal handling, I'm not confident that I can
reimplement it this way by myself.

The patch modifies Src/Modules/system.c and also Src/utils.c (to add
a function akin to time(NULL) but that returns microseconds, using
clock_gettime(CLOCK_MONOTONIC) if supported, or else gettimeofday()).

I did not modify the documentation yet, not being sure of people being
interested in this feature, nor of whether I should edit just the
Doc/Zsh/mod_system.yo file, or also Doc/{zshmodules.1,zsh.texi} or
others.

Also, I'm afraid that I only did minimal testing.  I checked that
invoking zsystem flock with various values for -t and -p seems to do
what I intended.  Is there a more rigorous test framework for this
kind of thing?

					Thanks, best regards,
					Cedric Ware.

[-- Attachment #2: zsh-5.7.1-flock-timeout.patch --]
[-- Type: text/x-diff, Size: 3047 bytes --]

diff -ur zsh-5.7.1.orig/Src/Modules/system.c zsh-5.7.1/Src/Modules/system.c
--- zsh-5.7.1.orig/Src/Modules/system.c	2018-12-14 08:50:17.000000000 +0100
+++ zsh-5.7.1/Src/Modules/system.c	2019-07-29 20:19:48.835060428 +0200
@@ -532,6 +532,8 @@
 {
     int cloexec = 1, unlock = 0, readlock = 0;
     zlong timeout = -1;
+    long timeout_retry = 1e6;
+    mnumber timeout_param;
     char *fdvar = NULL;
 #ifdef HAVE_FCNTL_H
     struct flock lck;
@@ -583,7 +585,35 @@
 		} else {
 		    optarg = *args++;
 		}
-		timeout = mathevali(optarg);
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout = (zlong)(timeout_param.u.d * 1e6);
+		break;
+
+	    case 'p':
+		/* retry period in seconds */
+		if (optptr[1]) {
+		    optarg = optptr + 1;
+		    optptr += strlen(optarg) - 1;
+		} else if (!*args) {
+		    zwarnnam(nam,
+			     "flock: option %c requires a numeric retry period",
+			     opt);
+		    return 1;
+		} else {
+		    optarg = *args++;
+		}
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		zlong timeout_retry_tmp = timeout_param.u.d * 1e6;
+		timeout_retry = (timeout_retry_tmp > LONG_MAX) ?
+		    LONG_MAX : timeout_retry_tmp;
 		break;
 
 	    case 'u':
@@ -647,7 +677,7 @@
     lck.l_len = 0;  /* lock the whole file */
 
     if (timeout > 0) {
-	time_t end = time(NULL) + (time_t)timeout;
+	zlong end = time_clock_us() + timeout;
 	while (fcntl(flock_fd, F_SETLK, &lck) < 0) {
 	    if (errflag) {
                 zclose(flock_fd);
@@ -658,11 +688,15 @@
 		zwarnnam(nam, "failed to lock file %s: %e", args[0], errno);
 		return 1;
 	    }
-	    if (time(NULL) >= end) {
+	    zlong now = time_clock_us();
+	    if (now >= end) {
                 zclose(flock_fd);
 		return 2;
             }
-	    sleep(1);
+	    if (now + timeout_retry > end) {
+		timeout_retry = end - now;
+	    }
+	    zsleep(timeout_retry);
 	}
     } else {
 	while (fcntl(flock_fd, timeout == 0 ? F_SETLK : F_SETLKW, &lck) < 0) {
diff -ur zsh-5.7.1.orig/Src/utils.c zsh-5.7.1/Src/utils.c
--- zsh-5.7.1.orig/Src/utils.c	2019-02-01 01:37:34.000000000 +0100
+++ zsh-5.7.1/Src/utils.c	2019-07-29 20:24:57.827073902 +0200
@@ -2724,6 +2724,26 @@
 }
 
 /*
+ * Return the current time in microseconds, using the system's
+ * monotonic clock if supported, the wall clock if not.
+ */
+
+/**/
+zlong
+time_clock_us(void)
+{
+#if defined(HAS_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return ts.tv_sec * (zlong)1e6 + ts.tv_nsec / 1000;
+#else
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec * (zlong)1e6 + tv.tv_usec;
+#endif
+}
+
+/*
  * Sleep for the given number of microseconds --- must be within
  * range of a long at the moment, but this is only used for
  * limited internal purposes.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2019-07-29 20:35 [PATCH] Enable sub-second timeout in zsystem flock Cedric Ware
@ 2019-07-29 22:25 ` Bart Schaefer
  2020-01-04 18:47   ` Cedric Ware
  0 siblings, 1 reply; 33+ messages in thread
From: Bart Schaefer @ 2019-07-29 22:25 UTC (permalink / raw)
  To: Cedric Ware; +Cc: zsh-workers

On Mon, Jul 29, 2019 at 1:36 PM Cedric Ware
<cedric.ware__bml@normalesup.org> wrote:
>
> The attached patch (against zsh 5.7.1) modifies the bin_zsystem_flock()

In future, please either inline patches or attach them as text/plain
(e.g. with a ".txt" extension instead of ".patch").  Thanks!

> I kept the same function's logic of periodically retrying to acquire
> the lock in non-blocking mode.  However, there may be a better way to
> implement this timeout: set a timer such as setitimer(2) which sends
> a SIGALRM on timeout

Unfortunately that technique can't be used because ALRM is a signal
that the user is allowed to trap via the "trap" command or TRAPALRM
function, and which is controlled by the TMOUT variable which is also
user settable.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2019-07-29 22:25 ` Bart Schaefer
@ 2020-01-04 18:47   ` Cedric Ware
  2020-01-05 18:42     ` dana
  0 siblings, 1 reply; 33+ messages in thread
From: Cedric Ware @ 2020-01-04 18:47 UTC (permalink / raw)
  To: zsh-workers


	Hello,

This is a re-send/update of a patch I'd proposed some time ago
(see http://www.zsh.org/mla/workers/2019/msg00620.html), to allow a
sub-second granularity for the zsystem flock function's timeout in the
zsh/system module.  This version of the patch is against 5.7.1-test-2,
and also updates the documentation in Doc/Zsh/mod_system.yo .

At the time, I'd been answered:

Bart Schaefer (Monday 2019-07-29):
> In future, please either inline patches or attach them as text/plain
> (e.g. with a ".txt" extension instead of ".patch").  Thanks!

Sure, please find it at the end of this mail.

> > implement this timeout: set a timer such as setitimer(2) which sends
> > a SIGALRM on timeout
> 
> Unfortunately that technique can't be used because ALRM is a signal
> that the user is allowed to trap via the "trap" command or TRAPALRM
> function, and which is controlled by the TMOUT variable which is also
> user settable.

Indeed, I see now.  But, to be clear, I didn't use that technique here.
That was just speculation.  With this patch, the function still polls
the lock periodically as before, except now the period can be specified
instead of being hardcoded to 1 second.

Also, I did some testing, but only checked that invoking zsystem flock
with various values for -t and -p seemed to do what I intended.
Is there a more rigorous test framework for this kind of thing, or
anything else to be done that might help this patch be included?

					Thanks, best regards,
					Cedric Ware.


diff -ru zsh-5.7.1-test-2.orig/Doc/Zsh/mod_system.yo zsh-5.7.1-test-2/Doc/Zsh/mod_system.yo
--- zsh-5.7.1-test-2.orig/Doc/Zsh/mod_system.yo	2019-08-17 23:14:27.000000000 +0200
+++ zsh-5.7.1-test-2/Doc/Zsh/mod_system.yo	2020-01-03 21:12:34.623024717 +0100
@@ -196,9 +196,10 @@
 
 By default the shell waits indefinitely for the lock to succeed.
 The option tt(-t) var(timeout) specifies a timeout for the lock in
-seconds; currently this must be an integer.  The shell will attempt
-to lock the file once a second during this period.  If the attempt
-times out, status 2 is returned.
+seconds; fractional seconds are allowed.  The shell will attempt
+to lock the file every var(period) seconds during this period:
+once a second by default, or according to the  value set by option
+tt(-p).  If the attempt times out, status 2 is returned.
 
 If the option tt(-e) is given, the file descriptor for the lock is
 preserved when the shell uses tt(exec) to start a new process;
diff -ru zsh-5.7.1-test-2.orig/Src/Modules/system.c zsh-5.7.1-test-2/Src/Modules/system.c
--- zsh-5.7.1-test-2.orig/Src/Modules/system.c	2019-08-17 23:14:27.000000000 +0200
+++ zsh-5.7.1-test-2/Src/Modules/system.c	2020-01-03 21:13:34.019588503 +0100
@@ -532,6 +532,8 @@
 {
     int cloexec = 1, unlock = 0, readlock = 0;
     zlong timeout = -1;
+    long timeout_retry = 1e6;
+    mnumber timeout_param;
     char *fdvar = NULL;
 #ifdef HAVE_FCNTL_H
     struct flock lck;
@@ -583,7 +585,35 @@
 		} else {
 		    optarg = *args++;
 		}
-		timeout = mathevali(optarg);
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout = (zlong)(timeout_param.u.d * 1e6);
+		break;
+
+	    case 'p':
+		/* retry period in seconds */
+		if (optptr[1]) {
+		    optarg = optptr + 1;
+		    optptr += strlen(optarg) - 1;
+		} else if (!*args) {
+		    zwarnnam(nam,
+			     "flock: option %c requires a numeric retry period",
+			     opt);
+		    return 1;
+		} else {
+		    optarg = *args++;
+		}
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		zlong timeout_retry_tmp = timeout_param.u.d * 1e6;
+		timeout_retry = (timeout_retry_tmp > LONG_MAX) ?
+		    LONG_MAX : timeout_retry_tmp;
 		break;
 
 	    case 'u':
@@ -647,7 +677,7 @@
     lck.l_len = 0;  /* lock the whole file */
 
     if (timeout > 0) {
-	time_t end = time(NULL) + (time_t)timeout;
+	zlong end = time_clock_us() + timeout;
 	while (fcntl(flock_fd, F_SETLK, &lck) < 0) {
 	    if (errflag) {
                 zclose(flock_fd);
@@ -658,11 +688,15 @@
 		zwarnnam(nam, "failed to lock file %s: %e", args[0], errno);
 		return 1;
 	    }
-	    if (time(NULL) >= end) {
+	    zlong now = time_clock_us();
+	    if (now >= end) {
                 zclose(flock_fd);
 		return 2;
             }
-	    sleep(1);
+	    if (now + timeout_retry > end) {
+		timeout_retry = end - now;
+	    }
+	    zsleep(timeout_retry);
 	}
     } else {
 	while (fcntl(flock_fd, timeout == 0 ? F_SETLK : F_SETLKW, &lck) < 0) {
diff -ru zsh-5.7.1-test-2.orig/Src/utils.c zsh-5.7.1-test-2/Src/utils.c
--- zsh-5.7.1-test-2.orig/Src/utils.c	2019-12-21 09:29:14.000000000 +0100
+++ zsh-5.7.1-test-2/Src/utils.c	2020-01-03 21:13:34.023588812 +0100
@@ -2749,6 +2749,26 @@
 }
 
 /*
+ * Return the current time in microseconds, using the system's
+ * monotonic clock if supported, the wall clock if not.
+ */
+
+/**/
+zlong
+time_clock_us(void)
+{
+#if defined(HAS_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return ts.tv_sec * (zlong)1e6 + ts.tv_nsec / 1000;
+#else
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec * (zlong)1e6 + tv.tv_usec;
+#endif
+}
+
+/*
  * Sleep for the given number of microseconds --- must be within
  * range of a long at the moment, but this is only used for
  * limited internal purposes.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-01-04 18:47   ` Cedric Ware
@ 2020-01-05 18:42     ` dana
  2020-01-05 21:49       ` dana
  2020-01-06 17:30       ` Cedric Ware
  0 siblings, 2 replies; 33+ messages in thread
From: dana @ 2020-01-05 18:42 UTC (permalink / raw)
  To: Cedric Ware; +Cc: zsh-workers

On 4 Jan 2020, at 12:47, Cedric Ware <cedric.ware__bml@normalesup.org> wrote:
> This is a re-send/update of a patch I'd proposed some time ago

Feedback off the top of my head (haven't tested and don't know whether anyone
else would object to the change):

> +seconds; fractional seconds are allowed.  The shell will attempt
> +to lock the file every var(period) seconds during this period:
> +once a second by default, or according to the  value set by option
> +tt(-p).

The first thing that occurs to me is that you've now decoupled the polling
interval and time-out, which means that not only can you *manually* specify a
polling interval greater than the time-out, it's very possible for the
*default* interval to be greater than the time-out. In fact any -t value less
than one second is kind of pointless unless you also give -p

I'm not sure how you'd address this functionality-wise. Should it be an error?
idk. It should at least be mentioned in the documentation, though

Also this addition is worded a bit strangely i think. period is an argument to
-p; it isn't used unless -p is. Maybe something like: 'During this period, the
shell will attempt to lock the file every period seconds if the -p option is
given, otherwise once a second.' Something to that effect anyway

> +    long timeout_retry = 1e6;
> +    mnumber timeout_param;

It's kind of confusing that these are both named 'timeout' when they don't
specifically have anything to do with the time-out

> +		zlong timeout_retry_tmp = timeout_param.u.d * 1e6;

zsh mandates C89, you can't declare variables in the middle of a block

> +		timeout_retry = (timeout_retry_tmp > LONG_MAX) ?
> +		    LONG_MAX : timeout_retry_tmp;

Should *this* kind of thing be an error? I guess there's not really any
error-checking in the existing option-handling code, though

> +time_clock_us(void)

Feels like a weird function name, but i don't have a better suggestion

btw, it would probably be ideal if shtimer (the variable the shell uses for
the time built-in, the SECONDS parameter, &c.) was monotonic as well. But
that's a separate thing

> Is there a more rigorous test framework for this kind of thing, or
> anything else to be done that might help this patch be included?

The test stuff is in the Test directory in the repo. There don't seem to be
any existing tests for zsystem, so you would have to make like a
V13zsystem.ztst (see the other V* files for inspiration). Not sure what the
tests would actually do, though. Maybe you could call it with low -p/-t values
and use SECONDS to ensure that it times out roughly when it's supposed to?

dana


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-01-05 18:42     ` dana
@ 2020-01-05 21:49       ` dana
  2020-01-06 17:30       ` Cedric Ware
  1 sibling, 0 replies; 33+ messages in thread
From: dana @ 2020-01-05 21:49 UTC (permalink / raw)
  To: Cedric Ware; +Cc: zsh-workers

On 5 Jan 2020, at 12:42, dana <dana@dana.is> wrote:
> It's kind of confusing that these are both named 'timeout' when they don't
> specifically have anything to do with the time-out

Actually, this caught my eye again and i realised that they're named that way
because the polling interval is only used when you give a time-out. So never
mind this part, the confusion was mine

dana


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-01-05 18:42     ` dana
  2020-01-05 21:49       ` dana
@ 2020-01-06 17:30       ` Cedric Ware
  2020-01-06 17:36         ` Peter Stephenson
  2020-01-07  3:48         ` dana
  1 sibling, 2 replies; 33+ messages in thread
From: Cedric Ware @ 2020-01-06 17:30 UTC (permalink / raw)
  To: dana; +Cc: zsh-workers


dana (Sunday 2020-01-05):
> Feedback off the top of my head (haven't tested and don't know whether anyone
> else would object to the change):

Thanks for the feedback.  Revised patch below.

> The first thing that occurs to me is that you've now decoupled the polling
> interval and time-out, which means that not only can you *manually* specify a
> polling interval greater than the time-out, it's very possible for the
> *default* interval to be greater than the time-out. In fact any -t value less
> than one second is kind of pointless unless you also give -p

Not quite pointless: in the last iteration of the retry loop, if the
timeout is closer to the current time than the retry interval, zsleep()
only waits until the timeout, then there's a last locking attempt, and
only then the function fails.

So, if the retry interval is longer than the timeout, there will still
be 2 locking attempts, one at the start, one at the timeout.  One could
object that this behavior does not quite follow what one would expect
when specifying a retry interval, but as long as the documentation
mentions it, this seems reasonable to me.

> Also this addition is worded a bit strangely i think. period is an argument to
> -p; it isn't used unless -p is. Maybe something like: 'During this period, the
> shell will attempt to lock the file every period seconds if the -p option is
> given, otherwise once a second.' Something to that effect anyway

That's clearer, indeed.  Also I changed "-p period" to "-i interval":
I had in mind "period-as-in-periodicity", but that was ambiguous.

> > +    long timeout_retry = 1e6;
> > +    mnumber timeout_param;
> 
> It's kind of confusing that these are both named 'timeout' when they don't
> specifically have anything to do with the time-out

As you mentioned in your other email, they're only used when -t is
given.  Also, I thought I'd reuse the same mnumber variable to parse
the arguments of both -t and -i, but I can change that if it breaks the
coding style.

> > +		zlong timeout_retry_tmp = timeout_param.u.d * 1e6;
> 
> zsh mandates C89, you can't declare variables in the middle of a block

OK.

> > +		timeout_retry = (timeout_retry_tmp > LONG_MAX) ?
> > +		    LONG_MAX : timeout_retry_tmp;
> 
> Should *this* kind of thing be an error? I guess there's not really any
> error-checking in the existing option-handling code, though

What I had in mind was that the retry interval has to be a long because
of zsleep(), which on 32-bit systems limits it to about 35 minutes.  It
seems likely that a user could specify a value that leads to an
overflow; but OTOH I thought that it wasn't a problem if locking was
attempted more often than specified.

Now that I've thought about it again, I added a similar check for the
timeout value even though it's a zlong, and I documented that the
interval is silently capped at LONG_MAX microseconds.  Do you think
it should be reported as an error instead?

Also, I assumed that zlong was at least 64-bit; can it happen that it
be only 32-bit on some systems?  That would be a problem to implement
the microsecond clock function.

> > +time_clock_us(void)
> 
> Feels like a weird function name, but i don't have a better suggestion

Likewise.

> btw, it would probably be ideal if shtimer (the variable the shell uses for
> the time built-in, the SECONDS parameter, &c.) was monotonic as well. But
> that's a separate thing

Maybe.  I believe CLOCK_MONOTONIC stops when the system is suspended;
Linux has CLOCK_BOOTIME that adds the suspend time, but it's not
portable.  I don't know what would surprise users more between having
SECONDS run backwards (which it could now), and not including suspend
time (if it was changed to CLOCK_MONOTONIC).

> The test stuff is in the Test directory in the repo. There don't seem to be
> any existing tests for zsystem, so you would have to make like a
> V13zsystem.ztst (see the other V* files for inspiration). Not sure what the
> tests would actually do, though. Maybe you could call it with low -p/-t values
> and use SECONDS to ensure that it times out roughly when it's supposed to?

Thanks for the pointer.  Yes, that could be done, though I don't have
the time right now.  What I actually had in mind was a test suite I
could run to check that I didn't break anything elsewhere.  Which is
"make test", I should have thought of it.  The current tests are still
successful with the patch.

					Best regards,
					Cedric Ware.


diff -ru zsh-5.7.1-test-2.orig/Doc/Zsh/mod_system.yo zsh-5.7.1-test-2/Doc/Zsh/mod_system.yo
--- zsh-5.7.1-test-2.orig/Doc/Zsh/mod_system.yo	2019-08-17 23:14:27.000000000 +0200
+++ zsh-5.7.1-test-2/Doc/Zsh/mod_system.yo	2020-01-06 17:57:35.835049585 +0100
@@ -166,7 +166,7 @@
 printed in the last case, but the parameter tt(ERRNO) will reflect
 the error that occurred.
 )
-xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
+xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-i) var(interval) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
 item(tt(zsystem flock -u) var(fd_expr))(
 The builtin tt(zsystem)'s subcommand tt(flock) performs advisory file
 locking (via the manref(fcntl)(2) system call) over the entire contents
@@ -196,9 +196,16 @@
 
 By default the shell waits indefinitely for the lock to succeed.
 The option tt(-t) var(timeout) specifies a timeout for the lock in
-seconds; currently this must be an integer.  The shell will attempt
-to lock the file once a second during this period.  If the attempt
-times out, status 2 is returned.
+seconds; fractional seconds are allowed.  During this period, the
+shell will attempt to lock the file every var(interval) seconds
+if the tt(-i) var(interval) option is given, otherwise once a second.
+(This var(interval) is shortened before the last attempt if needed,
+so that the shell waits only until the var(timeout) and not longer.)
+If the attempt times out, status 2 is returned.
+
+(Note: var(interval) is silently capped at LONG_MAX microseconds.
+This is many millenia on 64-bit systems, but only about 35 minutes
+on 32-bit systems.)
 
 If the option tt(-e) is given, the file descriptor for the lock is
 preserved when the shell uses tt(exec) to start a new process;
diff -ru zsh-5.7.1-test-2.orig/Src/Modules/system.c zsh-5.7.1-test-2/Src/Modules/system.c
--- zsh-5.7.1-test-2.orig/Src/Modules/system.c	2019-08-17 23:14:27.000000000 +0200
+++ zsh-5.7.1-test-2/Src/Modules/system.c	2020-01-06 18:14:41.045767126 +0100
@@ -532,6 +532,9 @@
 {
     int cloexec = 1, unlock = 0, readlock = 0;
     zlong timeout = -1;
+    double timeout_tmp;
+    long timeout_retry = 1e6;
+    mnumber timeout_param;
     char *fdvar = NULL;
 #ifdef HAVE_FCNTL_H
     struct flock lck;
@@ -583,7 +586,38 @@
 		} else {
 		    optarg = *args++;
 		}
-		timeout = mathevali(optarg);
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout_tmp = timeout_param.u.d * 1e6;
+		timeout = (timeout_tmp > ZLONG_MAX / 2) ?
+		    ZLONG_MAX / 2 : (zlong)timeout_tmp;
+		break;
+
+	    case 'i':
+		/* retry interval in seconds */
+		if (optptr[1]) {
+		    optarg = optptr + 1;
+		    optptr += strlen(optarg) - 1;
+		} else if (!*args) {
+		    zwarnnam(nam,
+			     "flock: option %c requires "
+			     "a numeric retry interval",
+			     opt);
+		    return 1;
+		} else {
+		    optarg = *args++;
+		}
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout_tmp = timeout_param.u.d * 1e6;
+		timeout_retry = (timeout_tmp > LONG_MAX) ?
+		    LONG_MAX : (long)timeout_tmp;
 		break;
 
 	    case 'u':
@@ -647,7 +681,8 @@
     lck.l_len = 0;  /* lock the whole file */
 
     if (timeout > 0) {
-	time_t end = time(NULL) + (time_t)timeout;
+	zlong now;
+	zlong end = time_clock_us() + timeout;
 	while (fcntl(flock_fd, F_SETLK, &lck) < 0) {
 	    if (errflag) {
                 zclose(flock_fd);
@@ -658,11 +693,15 @@
 		zwarnnam(nam, "failed to lock file %s: %e", args[0], errno);
 		return 1;
 	    }
-	    if (time(NULL) >= end) {
+	    now = time_clock_us();
+	    if (now >= end) {
                 zclose(flock_fd);
 		return 2;
             }
-	    sleep(1);
+	    if (now + timeout_retry > end) {
+		timeout_retry = end - now;
+	    }
+	    zsleep(timeout_retry);
 	}
     } else {
 	while (fcntl(flock_fd, timeout == 0 ? F_SETLK : F_SETLKW, &lck) < 0) {
diff -ru zsh-5.7.1-test-2.orig/Src/utils.c zsh-5.7.1-test-2/Src/utils.c
--- zsh-5.7.1-test-2.orig/Src/utils.c	2019-12-21 09:29:14.000000000 +0100
+++ zsh-5.7.1-test-2/Src/utils.c	2020-01-03 21:13:34.023588812 +0100
@@ -2749,6 +2749,26 @@
 }
 
 /*
+ * Return the current time in microseconds, using the system's
+ * monotonic clock if supported, the wall clock if not.
+ */
+
+/**/
+zlong
+time_clock_us(void)
+{
+#if defined(HAS_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return ts.tv_sec * (zlong)1e6 + ts.tv_nsec / 1000;
+#else
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec * (zlong)1e6 + tv.tv_usec;
+#endif
+}
+
+/*
  * Sleep for the given number of microseconds --- must be within
  * range of a long at the moment, but this is only used for
  * limited internal purposes.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-01-06 17:30       ` Cedric Ware
@ 2020-01-06 17:36         ` Peter Stephenson
  2020-01-07  3:48         ` dana
  1 sibling, 0 replies; 33+ messages in thread
From: Peter Stephenson @ 2020-01-06 17:36 UTC (permalink / raw)
  To: zsh-workers, Cedric Ware

On Mon, 2020-01-06 at 18:30 +0100, Cedric Ware wrote:
> Also, I assumed that zlong was at least 64-bit; can it happen that it
> be only 32-bit on some systems?  That would be a problem to implement
> the microsecond clock function.

That shouldn't happen unless the configuration script can't actually
find a 64-bit type, implying either the compiler doesn't support it or
it's not using standard types.  I think you can reasonably consider that
such a failure on a system that actually does support 64-bit integers
isn't your problem.

pws


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-01-06 17:30       ` Cedric Ware
  2020-01-06 17:36         ` Peter Stephenson
@ 2020-01-07  3:48         ` dana
  2020-01-11 15:41           ` Cedric Ware
  1 sibling, 1 reply; 33+ messages in thread
From: dana @ 2020-01-07  3:48 UTC (permalink / raw)
  To: Cedric Ware; +Cc: zsh-workers

On 6 Jan 2020, at 11:30, Cedric Ware <cedric.ware__bml@normalesup.org> wrote:
> Not quite pointless: in the last iteration of the retry loop, if the
> timeout is closer to the current time than the retry interval, zsleep()
> only waits until the timeout, then there's a last locking attempt, and
> only then the function fails.

Oh, i missed that, sorry. That doesn't seem so bad.

On 6 Jan 2020, at 11:30, Cedric Ware <cedric.ware__bml@normalesup.org> wrote:
> Also, I thought I'd reuse the same mnumber variable to parse
> the arguments of both -t and -i, but I can change that if it breaks the
> coding style.

It's fine afaik

On 6 Jan 2020, at 11:30, Cedric Ware <cedric.ware__bml@normalesup.org> wrote:
> Now that I've thought about it again, I added a similar check for the
> timeout value even though it's a zlong, and I documented that the
> interval is silently capped at LONG_MAX microseconds.  Do you think
> it should be reported as an error instead?

Like i said, it doesn't seem to do much error-checking anywhere else, so i
guess it's consistent... but if it were me i'd probably make any out-of-range
value return an error, yeah.

On 6 Jan 2020, at 11:30, Cedric Ware <cedric.ware__bml@normalesup.org> wrote:
> Maybe.  I believe CLOCK_MONOTONIC stops when the system is suspended;
> Linux has CLOCK_BOOTIME that adds the suspend time, but it's not
> portable.  I don't know what would surprise users more between having
> SECONDS run backwards (which it could now), and not including suspend
> time (if it was changed to CLOCK_MONOTONIC).

Since it's mainly used for timing things i think that'd be fine, but maybe
there's something i'm not considering.

On 6 Jan 2020, at 11:30, Cedric Ware <cedric.ware__bml@normalesup.org> wrote:
> Thanks for the pointer.  Yes, that could be done, though I don't have
> the time right now.  What I actually had in mind was a test suite I
> could run to check that I didn't break anything elsewhere.  Which is
> "make test", I should have thought of it.  The current tests are still
> successful with the patch.

I'm slightly busy again too but i could try to write a script for it later.
It'd be going into 5.9 anyway

dana


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-01-07  3:48         ` dana
@ 2020-01-11 15:41           ` Cedric Ware
  2020-01-11 19:36             ` dana
  0 siblings, 1 reply; 33+ messages in thread
From: Cedric Ware @ 2020-01-11 15:41 UTC (permalink / raw)
  To: dana; +Cc: zsh-workers


dana (Monday 2020-01-06):
> Like i said, it doesn't seem to do much error-checking anywhere else, so i
> guess it's consistent... but if it were me i'd probably make any out-of-range
> value return an error, yeah.

Easy enough, except that producing a useful error message, with the
actual maximum value, is trickier than I'd expected.

zerrmsg() (and thus zwarnnam()) only supports displaying a long (%L)
if DEBUG is enabled (see "#ifdef DEBUG" lines 290 and 328 of
Src/utils.c), and no floating-point types.  Is this deliberate?
I can see why floats would be harder to support, but long seems
harmless.

If %L can be enabled in zerrmsg() then, getting back to zsystem/flock,
if there's an overflow on the value of the retry interval, it's easy to
display an error message saying that the value is limited to
LONG_MAX / 1000000L seconds (slightly underestimated for display
purposes).  As for an overflow on the (64-bit) timeout value, it's
harder to display the maximum value, but then users are less likely to
stumble into an overflow so I'd leave it silent and perhaps document it.

If not, there's always the possibility of displaying a generic "value
too large" message.


dana (Monday 2020-01-06):
> On 6 Jan 2020, at 11:30, Cedric Ware <cedric.ware__bml@normalesup.org> wrote:
> > Thanks for the pointer.  Yes, that could be done, though I don't have
> > the time right now.  What I actually had in mind was a test suite I
> > could run to check that I didn't break anything elsewhere.  Which is
> > "make test", I should have thought of it.  The current tests are still
> > successful with the patch.
> 
> I'm slightly busy again too but i could try to write a script for it later.

As I've never done this before, I'd feel more confident adding tests to
an existing script than trying to learn and write one from scratch.

					Thanks, best regards,
					Cedric Ware.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-01-11 15:41           ` Cedric Ware
@ 2020-01-11 19:36             ` dana
  2020-01-12  4:25               ` dana
  0 siblings, 1 reply; 33+ messages in thread
From: dana @ 2020-01-11 19:36 UTC (permalink / raw)
  To: Cedric Ware; +Cc: zsh-workers

On 11 Jan 2020, at 09:41, Cedric Ware <cedric.ware__bml@normalesup.org> wrote:
> zerrmsg() (and thus zwarnnam()) only supports displaying a long (%L)
> if DEBUG is enabled (see "#ifdef DEBUG" lines 290 and 328 of
> Src/utils.c), and no floating-point types.  Is this deliberate?

It was deliberate at one time. Daniel actually changed it in master a week or
two ago, so it should be usable now. I don't even think it needs to be that
elaborate though, 'invalid value' or whatever would be fine

On 11 Jan 2020, at 09:41, Cedric Ware <cedric.ware__bml@normalesup.org> wrote:
> As I've never done this before, I'd feel more confident adding tests to
> an existing script than trying to learn and write one from scratch.

np. I should have time this week end actually, will get back to you

dana


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-01-11 19:36             ` dana
@ 2020-01-12  4:25               ` dana
  2020-03-08 18:39                 ` Cedric Ware
  0 siblings, 1 reply; 33+ messages in thread
From: dana @ 2020-01-12  4:25 UTC (permalink / raw)
  To: Cedric Ware; +Cc: zsh-workers

On 11 Jan 2020, at 13:36, dana <dana@dana.is> wrote:
> I should have time this week end actually, will get back to you

Here's a basic, almost-does-nothing test script for it. Obv need to change the
expected error messages in the second test to whatever you decide on. You can
run it with `make check TESTNUM=V13`

dana


diff --git a/Test/V13system.ztst b/Test/V13system.ztst
new file mode 100644
index 000000000..e3faca37d
--- /dev/null
+++ b/Test/V13system.ztst
@@ -0,0 +1,30 @@
+# Test zsh/system module
+
+%prep
+
+  if zmodload -s zsh/system; then
+    tst_dir=V13.tmp
+    mkdir -p -- $tst_dir
+  else
+    ZTST_unimplemented='the zsh/system module is not available'
+  fi
+
+%test
+
+  (
+    : > $tst_dir/file
+    zsystem flock -t 0.1 -i 0.1 $tst_dir/file
+  )
+0:zsystem flock valid time arguments
+
+  (
+    zsystem flock -t -1                  $tst_dir/file ||
+    zsystem flock -t 9999999999999999999 $tst_dir/file ||
+    zsystem flock -i -1                  $tst_dir/file ||
+    zsystem flock -i 9999999999999999999 $tst_dir/file
+  )
+1:zsystem flock invalid time arguments
+?(eval):zsystem:2: flock: invalid timeout
+?(eval):zsystem:3: flock: invalid timeout
+?(eval):zsystem:4: flock: invalid interval
+?(eval):zsystem:5: flock: invalid interval


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-01-12  4:25               ` dana
@ 2020-03-08 18:39                 ` Cedric Ware
  2020-03-12 18:46                   ` dana
  2020-03-13 14:26                   ` dana
  0 siblings, 2 replies; 33+ messages in thread
From: Cedric Ware @ 2020-03-08 18:39 UTC (permalink / raw)
  To: dana; +Cc: zsh-workers


	Hello,

dana (Saturday 2020-01-11):
> Here's a basic, almost-does-nothing test script for it. Obv need to change the
> expected error messages in the second test to whatever you decide on. You can
> run it with `make check TESTNUM=V13`

Thank you.  Please find below a revised patch against 5.8.  This is
still to enable fractional-seconds timeout and retry interval for
zsystem flock.

One thing about the tests: I've used the (...)& syntax to launch a
sub-shell in the background for concurrency checks.  But does the test
script wait for sub-processes to finish between tests?  If not, then we
may need to add a 0.1-second sleep after the 3rd test
("zsystem flock unsuccessful wait test") because it starts a sub-shell
that waits for 0.2 seconds ("zselect -t 20") but the rest of the test
times out on purpose after only 0.11 seconds; so the sub-shell will
still be holding the lock for 0.09 seconds.

Also note that the test script requires zselect in addition to zsystem,
because I needed something that would reliably sleep a fractional
number of seconds.  The regular sleep command isn't guaranteed to
support that, AFAIK.

					Thanks, best regards,
					Cedric Ware.


diff -ruN zsh-5.8.orig/Doc/Zsh/mod_system.yo zsh-5.8/Doc/Zsh/mod_system.yo
--- zsh-5.8.orig/Doc/Zsh/mod_system.yo	2020-01-13 06:39:15.000000000 +0100
+++ zsh-5.8/Doc/Zsh/mod_system.yo	2020-03-08 18:39:32.672683779 +0100
@@ -166,7 +166,7 @@
 printed in the last case, but the parameter tt(ERRNO) will reflect
 the error that occurred.
 )
-xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
+xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-i) var(interval) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
 item(tt(zsystem flock -u) var(fd_expr))(
 The builtin tt(zsystem)'s subcommand tt(flock) performs advisory file
 locking (via the manref(fcntl)(2) system call) over the entire contents
@@ -196,9 +196,16 @@
 
 By default the shell waits indefinitely for the lock to succeed.
 The option tt(-t) var(timeout) specifies a timeout for the lock in
-seconds; currently this must be an integer.  The shell will attempt
-to lock the file once a second during this period.  If the attempt
-times out, status 2 is returned.
+seconds; fractional seconds are allowed.  During this period, the
+shell will attempt to lock the file every var(interval) seconds
+if the tt(-i) var(interval) option is given, otherwise once a second.
+(This var(interval) is shortened before the last attempt if needed,
+so that the shell waits only until the var(timeout) and not longer.)
+If the attempt times out, status 2 is returned.
+
+(Note: var(interval) must be less than LONG_MAX microseconds.
+This is many millenia on 64-bit systems, but only about 35 minutes
+on 32-bit systems.)
 
 If the option tt(-e) is given, the file descriptor for the lock is
 preserved when the shell uses tt(exec) to start a new process;
diff -ruN zsh-5.8.orig/Src/Modules/system.c zsh-5.8/Src/Modules/system.c
--- zsh-5.8.orig/Src/Modules/system.c	2020-02-06 20:53:18.000000000 +0100
+++ zsh-5.8/Src/Modules/system.c	2020-03-08 18:43:02.756951315 +0100
@@ -532,6 +532,9 @@
 {
     int cloexec = 1, unlock = 0, readlock = 0;
     zlong timeout = -1;
+    double timeout_tmp;
+    long timeout_retry = 1e6;
+    mnumber timeout_param;
     char *fdvar = NULL;
 #ifdef HAVE_FCNTL_H
     struct flock lck;
@@ -583,7 +586,44 @@
 		} else {
 		    optarg = *args++;
 		}
-		timeout = mathevali(optarg);
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout_tmp = timeout_param.u.d * 1e6;
+		if ((timeout_tmp < 1) || (timeout_tmp > ZLONG_MAX / 2)) {
+		    zwarnnam(nam, "flock: invalid timeout value");
+		    return 1;
+		}
+		timeout = (zlong)timeout_tmp;
+		break;
+
+	    case 'i':
+		/* retry interval in seconds */
+		if (optptr[1]) {
+		    optarg = optptr + 1;
+		    optptr += strlen(optarg) - 1;
+		} else if (!*args) {
+		    zwarnnam(nam,
+			     "flock: option %c requires "
+			     "a numeric retry interval",
+			     opt);
+		    return 1;
+		} else {
+		    optarg = *args++;
+		}
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout_tmp = timeout_param.u.d * 1e6;
+		if ((timeout_tmp < 1) || (timeout_tmp > LONG_MAX)) {
+		    zwarnnam(nam, "flock: invalid interval value");
+		    return 1;
+		}
+		timeout_retry = (long)timeout_tmp;
 		break;
 
 	    case 'u':
@@ -647,7 +687,8 @@
     lck.l_len = 0;  /* lock the whole file */
 
     if (timeout > 0) {
-	time_t end = time(NULL) + (time_t)timeout;
+	zlong now;
+	zlong end = time_clock_us() + timeout;
 	while (fcntl(flock_fd, F_SETLK, &lck) < 0) {
 	    if (errflag) {
                 zclose(flock_fd);
@@ -658,11 +699,15 @@
 		zwarnnam(nam, "failed to lock file %s: %e", args[0], errno);
 		return 1;
 	    }
-	    if (time(NULL) >= end) {
+	    now = time_clock_us();
+	    if (now >= end) {
                 zclose(flock_fd);
 		return 2;
             }
-	    sleep(1);
+	    if (now + timeout_retry > end) {
+		timeout_retry = end - now;
+	    }
+	    zsleep(timeout_retry);
 	}
     } else {
 	while (fcntl(flock_fd, timeout == 0 ? F_SETLK : F_SETLKW, &lck) < 0) {
diff -ruN zsh-5.8.orig/Src/utils.c zsh-5.8/Src/utils.c
--- zsh-5.8.orig/Src/utils.c	2020-01-13 06:39:15.000000000 +0100
+++ zsh-5.8/Src/utils.c	2020-03-08 18:43:02.756951315 +0100
@@ -2745,6 +2745,26 @@
 }
 
 /*
+ * Return the current time in microseconds, using the system's
+ * monotonic clock if supported, the wall clock if not.
+ */
+
+/**/
+zlong
+time_clock_us(void)
+{
+#if defined(HAS_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return ts.tv_sec * (zlong)1e6 + ts.tv_nsec / 1000;
+#else
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec * (zlong)1e6 + tv.tv_usec;
+#endif
+}
+
+/*
  * Sleep for the given number of microseconds --- must be within
  * range of a long at the moment, but this is only used for
  * limited internal purposes.
diff -ruN zsh-5.8.orig/Test/V13system.ztst zsh-5.8/Test/V13system.ztst
--- zsh-5.8.orig/Test/V13system.ztst	1970-01-01 01:00:00.000000000 +0100
+++ zsh-5.8/Test/V13system.ztst	2020-03-08 19:04:56.550666311 +0100
@@ -0,0 +1,98 @@
+# Test zsh/system module
+
+%prep
+
+  if zmodload -s zsh/system && zmodload -s zsh/zselect; then
+    tst_dir=V13.tmp
+    mkdir -p -- $tst_dir
+  else
+    ZTST_unimplemented='the zsh/system and zsh/zselect modules are not available'
+  fi
+
+%test
+
+  (
+    : > $tst_dir/file
+    zsystem flock -t 0.1 -i 0.000001 $tst_dir/file
+  )
+0:zsystem flock valid time arguments
+
+  (
+    zsystem flock -t -1      $tst_dir/file ||
+    zsystem flock -t 0.49e-6 $tst_dir/file ||
+    zsystem flock -t 1e100   $tst_dir/file ||
+    zsystem flock -i -1      $tst_dir/file ||
+    zsystem flock -i 0.49e-6 $tst_dir/file ||
+    zsystem flock -i 1e100   $tst_dir/file
+  )
+1:zsystem flock invalid time arguments
+?(eval):zsystem:2: flock: invalid timeout value
+?(eval):zsystem:3: flock: invalid timeout value
+?(eval):zsystem:4: flock: invalid timeout value
+?(eval):zsystem:5: flock: invalid interval value
+?(eval):zsystem:6: flock: invalid interval value
+?(eval):zsystem:7: flock: invalid interval value
+
+  (
+    (zsystem flock $tst_dir/file && zselect -t 20) &
+    zselect -t 1
+    zsystem flock -t 0.1 $tst_dir/file
+  )
+2:zsystem flock unsuccessful wait test
+
+  (
+    typeset -F SECONDS
+    start=$SECONDS
+    (zsystem flock $tst_dir/file && zselect -t 50) &
+    zselect -t 1
+    if zsystem flock $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.3 && $elapsed -le 0.7 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 0.5 second" 1>&2
+      fi
+    else
+      false
+    fi
+  )
+0:zsystem flock successful wait test, no timeout
+?elapsed time seems OK
+
+  (
+    typeset -F SECONDS
+    start=$SECONDS
+    (zsystem flock $tst_dir/file && zselect -t 50) &
+    zselect -t 1
+    if zsystem flock -t 1 $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.9 && $elapsed -le 1.1 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 1.01 second" 1>&2
+      fi
+    else
+      false
+    fi
+  )
+0:zsystem flock successful wait test, integral seconds
+?elapsed time seems OK
+
+  (
+    typeset -F SECONDS
+    start=$SECONDS
+    (zsystem flock $tst_dir/file && zselect -t 25) &
+    zselect -t 1
+    if zsystem flock -t 0.4 -i 0.1 $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.2 && $elapsed -le 0.5 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 0.3 second" 1>&2
+      fi
+    else
+      false
+    fi
+  )
+0:zsystem flock successful wait test, fractional seconds
+?elapsed time seems OK

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-08 18:39                 ` Cedric Ware
@ 2020-03-12 18:46                   ` dana
  2020-03-12 19:13                     ` dana
  2020-03-14 21:04                     ` Cedric Ware
  2020-03-13 14:26                   ` dana
  1 sibling, 2 replies; 33+ messages in thread
From: dana @ 2020-03-12 18:46 UTC (permalink / raw)
  To: Cedric Ware; +Cc: zsh-workers

Sorry for the delay.

On 8 Mar 2020, at 13:39, Cedric Ware <cedric.ware__bml@normalesup.org> wrote:
> One thing about the tests: I've used the (...)& syntax to launch a
> sub-shell in the background for concurrency checks.  But does the test
> script wait for sub-processes to finish between tests?

It does not, no.

Some more comments:

> +  (
> +    : > $tst_dir/file
> +    zsystem flock -t 0.1 -i 0.000001 $tst_dir/file
> +  )

If this file is meant to be used by the other tests, it seems like it might
make more sense to create it in %prep. Like it'll be fine this way, but unless
there's a specific reason to do it like that...

> +  (
> +    typeset -F SECONDS
> +    start=$SECONDS
> +    (zsystem flock $tst_dir/file && zselect -t 50) &
> +    zselect -t 1
> +    if zsystem flock $tst_dir/file; then
> +      elapsed=$[ $SECONDS - $start ]
> +      if [[ $elapsed -ge 0.3 && $elapsed -le 0.7 ]]; then
> +        echo "elapsed time seems OK" 1>&2
> +      else
> +        echo "elapsed time $elapsed should be ~ 0.5 second" 1>&2
> +      fi
> +    else
> +      false
> +    fi
> +  )

1. All of the `else false`es in these tests seem redundant. If the condition
   doesn't pass it'll return >0 automatically, and the test will fail if it
   doesn't get the right output anyway

2. This particular test doesn't seem reliable on my machine. Within the test
   harness, it normally takes about 0.078 seconds. Probably the fork over-head
   (which is pretty high on macOS) is greater than the amount of time you're
   giving it wait? If i change `zselect -t 1` to `zselect -t 10` it seems to
   work better... but it still feels very brittle. Very much dependent on the
   hardware, the OS, and the current resource utilisation

dana


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-12 18:46                   ` dana
@ 2020-03-12 19:13                     ` dana
  2020-03-14 21:04                     ` Cedric Ware
  1 sibling, 0 replies; 33+ messages in thread
From: dana @ 2020-03-12 19:13 UTC (permalink / raw)
  To: Cedric Ware; +Cc: zsh-workers

On 12 Mar 2020, at 13:46, dana <dana@dana.is> wrote:
> 1. All of the `else false`es in these tests seem redundant. If the condition
>    doesn't pass it'll return >0 automatically

This part is wrong of course :p

The other part stands though

dana


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-08 18:39                 ` Cedric Ware
  2020-03-12 18:46                   ` dana
@ 2020-03-13 14:26                   ` dana
  1 sibling, 0 replies; 33+ messages in thread
From: dana @ 2020-03-13 14:26 UTC (permalink / raw)
  To: Cedric Ware; +Cc: zsh-workers

On 8 Mar 2020, at 13:39, Cedric Ware <cedric.ware__bml@normalesup.org> wrote:
> diff -ruN zsh-5.8.orig/Test/V13system.ztst zsh-5.8/Test/V13system.ztst

btw, i just realised that the 5.9 merge gave us a V13zformat. So i suppose
this needs to be V14system now

dana


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-12 18:46                   ` dana
  2020-03-12 19:13                     ` dana
@ 2020-03-14 21:04                     ` Cedric Ware
  2020-03-15  0:50                       ` Daniel Shahaf
  2020-03-15  1:04                       ` Daniel Shahaf
  1 sibling, 2 replies; 33+ messages in thread
From: Cedric Ware @ 2020-03-14 21:04 UTC (permalink / raw)
  To: dana; +Cc: zsh-workers


dana (Thursday 2020-03-12):
> 2. This particular test doesn't seem reliable on my machine. Within the test
>    harness, it normally takes about 0.078 seconds. Probably the fork over-head
>    (which is pretty high on macOS) is greater than the amount of time you're
>    giving it wait? If i change `zselect -t 1` to `zselect -t 10` it seems to
>    work better... but it still feels very brittle. Very much dependent on the
>    hardware, the OS, and the current resource utilisation

I see.  Here's a new version of the patch.  As long as the tests are
run sequentially and not in parallel, there shouldn't be any race
condition left.  Instead of just waiting 10 ms and hoping that the
sub-shell has had time to start in the background, I'm now actually
testing the presence of a file that it creates.

It might still fail if the background sub-shell completes, including
the several-tenths-of-a-second wait, before the next part of the test
is run.  Do you think it's still too likely?

I've also addressed the "else false" and the numbering change, and
added some comments to make it clearer.

					Best regards,
					Cedric Ware.


diff -ruN zsh-5.8.orig/Doc/Zsh/mod_system.yo zsh-5.8/Doc/Zsh/mod_system.yo
--- zsh-5.8.orig/Doc/Zsh/mod_system.yo	2020-01-13 06:39:15.000000000 +0100
+++ zsh-5.8/Doc/Zsh/mod_system.yo	2020-03-08 18:39:32.672683779 +0100
@@ -166,7 +166,7 @@
 printed in the last case, but the parameter tt(ERRNO) will reflect
 the error that occurred.
 )
-xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
+xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-i) var(interval) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
 item(tt(zsystem flock -u) var(fd_expr))(
 The builtin tt(zsystem)'s subcommand tt(flock) performs advisory file
 locking (via the manref(fcntl)(2) system call) over the entire contents
@@ -196,9 +196,16 @@
 
 By default the shell waits indefinitely for the lock to succeed.
 The option tt(-t) var(timeout) specifies a timeout for the lock in
-seconds; currently this must be an integer.  The shell will attempt
-to lock the file once a second during this period.  If the attempt
-times out, status 2 is returned.
+seconds; fractional seconds are allowed.  During this period, the
+shell will attempt to lock the file every var(interval) seconds
+if the tt(-i) var(interval) option is given, otherwise once a second.
+(This var(interval) is shortened before the last attempt if needed,
+so that the shell waits only until the var(timeout) and not longer.)
+If the attempt times out, status 2 is returned.
+
+(Note: var(interval) must be less than LONG_MAX microseconds.
+This is many millenia on 64-bit systems, but only about 35 minutes
+on 32-bit systems.)
 
 If the option tt(-e) is given, the file descriptor for the lock is
 preserved when the shell uses tt(exec) to start a new process;
diff -ruN zsh-5.8.orig/Src/Modules/system.c zsh-5.8/Src/Modules/system.c
--- zsh-5.8.orig/Src/Modules/system.c	2020-02-06 20:53:18.000000000 +0100
+++ zsh-5.8/Src/Modules/system.c	2020-03-08 18:43:02.756951315 +0100
@@ -532,6 +532,9 @@
 {
     int cloexec = 1, unlock = 0, readlock = 0;
     zlong timeout = -1;
+    double timeout_tmp;
+    long timeout_retry = 1e6;
+    mnumber timeout_param;
     char *fdvar = NULL;
 #ifdef HAVE_FCNTL_H
     struct flock lck;
@@ -583,7 +586,44 @@
 		} else {
 		    optarg = *args++;
 		}
-		timeout = mathevali(optarg);
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout_tmp = timeout_param.u.d * 1e6;
+		if ((timeout_tmp < 1) || (timeout_tmp > ZLONG_MAX / 2)) {
+		    zwarnnam(nam, "flock: invalid timeout value");
+		    return 1;
+		}
+		timeout = (zlong)timeout_tmp;
+		break;
+
+	    case 'i':
+		/* retry interval in seconds */
+		if (optptr[1]) {
+		    optarg = optptr + 1;
+		    optptr += strlen(optarg) - 1;
+		} else if (!*args) {
+		    zwarnnam(nam,
+			     "flock: option %c requires "
+			     "a numeric retry interval",
+			     opt);
+		    return 1;
+		} else {
+		    optarg = *args++;
+		}
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout_tmp = timeout_param.u.d * 1e6;
+		if ((timeout_tmp < 1) || (timeout_tmp > LONG_MAX)) {
+		    zwarnnam(nam, "flock: invalid interval value");
+		    return 1;
+		}
+		timeout_retry = (long)timeout_tmp;
 		break;
 
 	    case 'u':
@@ -647,7 +687,8 @@
     lck.l_len = 0;  /* lock the whole file */
 
     if (timeout > 0) {
-	time_t end = time(NULL) + (time_t)timeout;
+	zlong now;
+	zlong end = time_clock_us() + timeout;
 	while (fcntl(flock_fd, F_SETLK, &lck) < 0) {
 	    if (errflag) {
                 zclose(flock_fd);
@@ -658,11 +699,15 @@
 		zwarnnam(nam, "failed to lock file %s: %e", args[0], errno);
 		return 1;
 	    }
-	    if (time(NULL) >= end) {
+	    now = time_clock_us();
+	    if (now >= end) {
                 zclose(flock_fd);
 		return 2;
             }
-	    sleep(1);
+	    if (now + timeout_retry > end) {
+		timeout_retry = end - now;
+	    }
+	    zsleep(timeout_retry);
 	}
     } else {
 	while (fcntl(flock_fd, timeout == 0 ? F_SETLK : F_SETLKW, &lck) < 0) {
diff -ruN zsh-5.8.orig/Src/utils.c zsh-5.8/Src/utils.c
--- zsh-5.8.orig/Src/utils.c	2020-01-13 06:39:15.000000000 +0100
+++ zsh-5.8/Src/utils.c	2020-03-08 18:43:02.756951315 +0100
@@ -2745,6 +2745,26 @@
 }
 
 /*
+ * Return the current time in microseconds, using the system's
+ * monotonic clock if supported, the wall clock if not.
+ */
+
+/**/
+zlong
+time_clock_us(void)
+{
+#if defined(HAS_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return ts.tv_sec * (zlong)1e6 + ts.tv_nsec / 1000;
+#else
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec * (zlong)1e6 + tv.tv_usec;
+#endif
+}
+
+/*
  * Sleep for the given number of microseconds --- must be within
  * range of a long at the moment, but this is only used for
  * limited internal purposes.
diff -ruN zsh-5.8.orig/Test/V14system.ztst zsh-5.8/Test/V14system.ztst
--- zsh-5.8.orig/Test/V14system.ztst	1970-01-01 01:00:00.000000000 +0100
+++ zsh-5.8/Test/V14system.ztst	2020-03-14 21:59:02.351858164 +0100
@@ -0,0 +1,131 @@
+# Test zsh/system module
+
+%prep
+
+  if zmodload -s zsh/system && zmodload -s zsh/zselect; then
+    tst_dir=V13.tmp
+    mkdir -p -- $tst_dir
+  else
+    ZTST_unimplemented='the zsh/system and zsh/zselect modules are not available'
+  fi
+  : > $tst_dir/file # File on which to acquire flock.
+
+%test
+
+  (
+    zsystem flock -t 0.1 -i 0.000001 $tst_dir/file
+  )
+0:zsystem flock valid time arguments
+
+  (
+    zsystem flock -t -1      $tst_dir/file ||
+    zsystem flock -t 0.49e-6 $tst_dir/file ||
+    zsystem flock -t 1e100   $tst_dir/file ||
+    zsystem flock -i -1      $tst_dir/file ||
+    zsystem flock -i 0.49e-6 $tst_dir/file ||
+    zsystem flock -i 1e100   $tst_dir/file
+  )
+1:zsystem flock invalid time arguments
+?(eval):zsystem:2: flock: invalid timeout value
+?(eval):zsystem:3: flock: invalid timeout value
+?(eval):zsystem:4: flock: invalid timeout value
+?(eval):zsystem:5: flock: invalid interval value
+?(eval):zsystem:6: flock: invalid interval value
+?(eval):zsystem:7: flock: invalid interval value
+
+  (
+    # Lock file for 1 second in the background.
+    (zsystem flock $tst_dir/file \
+     && touch $tst_dir/locked \
+     && zselect -t 100
+     rm -f $tst_dir/locked) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $tst_dir/locked ]]; do
+      zselect -t 1
+    done
+    # Attempt to lock file with 0.5 second timeout: must fail.
+    zsystem flock -t 0.5 $tst_dir/file
+  )
+2:zsystem flock unsuccessful wait test
+
+  (
+    # Wait until sub-shell of the previous test has finished.
+    while [[ -f $tst_dir/locked ]]; do
+      zselect -t 10
+    done
+    # Lock file for 0.5 second in the background.
+    (zsystem flock $tst_dir/file \
+      && touch $tst_dir/locked \
+      && zselect -t 50
+      rm -f $tst_dir/locked) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $tst_dir/locked ]]; do
+      zselect -t 1
+    done
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file without a timeout:
+    # must succeed after sub-shell above releases it (0.5 second).
+    if zsystem flock $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.3 && $elapsed -le 0.7 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 0.5 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, no timeout
+?elapsed time seems OK
+
+  (
+    # Lock file for 0.5 second in the background.
+    (zsystem flock $tst_dir/file \
+      && touch $tst_dir/locked \
+      && zselect -t 50
+      rm -f $tst_dir/locked) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $tst_dir/locked ]]; do
+      zselect -t 1
+    done
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file with 1-second timeout:
+    # must succeed 1 second after start because we retry every 1 second.
+    if zsystem flock -t 1 $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.8 && $elapsed -le 1.2 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 1 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, integral seconds
+?elapsed time seems OK
+
+  (
+    # Lock file for 0.25 second in the background.
+    (zsystem flock $tst_dir/file \
+      && touch $tst_dir/locked \
+      && zselect -t 25
+      rm -f $tst_dir/locked) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $tst_dir/locked ]]; do
+      zselect -t 1
+    done
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file with 0.4-second timeout, retrying every 0.1 second:
+    # must succeed 0.3 second after start.
+    if zsystem flock -t 0.4 -i 0.1 $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.2 && $elapsed -le 0.5 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 0.3 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, fractional seconds
+?elapsed time seems OK

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-14 21:04                     ` Cedric Ware
@ 2020-03-15  0:50                       ` Daniel Shahaf
  2020-03-15  1:04                         ` dana
  2020-03-15 16:03                         ` Cedric Ware
  2020-03-15  1:04                       ` Daniel Shahaf
  1 sibling, 2 replies; 33+ messages in thread
From: Daniel Shahaf @ 2020-03-15  0:50 UTC (permalink / raw)
  To: Cedric Ware; +Cc: dana, zsh-workers

Cedric Ware wrote on Sat, 14 Mar 2020 22:04 +0100:
> +++ zsh-5.8/Doc/Zsh/mod_system.yo	2020-03-08 18:39:32.672683779 +0100
> @@ -166,7 +166,7 @@
>  printed in the last case, but the parameter tt(ERRNO) will reflect
>  the error that occurred.
>  )
> -xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
> +xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-i) var(interval) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
>  item(tt(zsystem flock -u) var(fd_expr))(

Please s/var(var)/var(interval)/ in the body paragraphs of the item()().

> +++ zsh-5.8/Src/Modules/system.c	2020-03-08 18:43:02.756951315 +0100

Please write patches against git HEAD rather than against the latest
release, if possible.  (See 45283 for an example where that would have
mattered.)

> @@ -583,7 +586,44 @@

(Aside: if your diff(1) supports the -p option to show function names
in hunk headers, please use it; it makes reviewing easier.)

>  		} else {  
>  		    optarg = *args++;
>  		}  
> -		timeout = mathevali(optarg);
> +		timeout_param = matheval(optarg);
> +		if (!(timeout_param.type & MN_FLOAT)) {
> +		    timeout_param.type = MN_FLOAT;
> +		    timeout_param.u.d = (double)timeout_param.u.l;
> +		}
> +		timeout_tmp = timeout_param.u.d * 1e6;
> +		if ((timeout_tmp < 1) || (timeout_tmp > ZLONG_MAX / 2)) {
> +		    zwarnnam(nam, "flock: invalid timeout value");

I think the invalid value should be included in the error message,
either as a number, or as a string if needed: "flock: invalid timeout
value: '%s'".  In my experience, "invalid input" error messages are
easier to consume when they actually say what the invalid input is.

dana, I see you advocated the opposite approach in 45283.  What's your
rationale?

If we change this, there's another instance of this in the next «case»
block that should be changed too.

> +		    return 1;
> +		}
> +		timeout = (zlong)timeout_tmp;
> +		break;

> @@ -647,7 +687,8 @@
>      lck.l_len = 0;  /* lock the whole file */
>  
>      if (timeout > 0) {
> -	time_t end = time(NULL) + (time_t)timeout;
> +	zlong now;
> +	zlong end = time_clock_us() + timeout;

Could this sum overflow (for example, if the -t option is used)?

>  	while (fcntl(flock_fd, F_SETLK, &lck) < 0) {
>  	    if (errflag) {
>                  zclose(flock_fd);

> @@ -658,11 +699,15 @@
>  		zwarnnam(nam, "failed to lock file %s: %e", args[0], errno);
>  		return 1;
>  	    }  
> -	    if (time(NULL) >= end) {
> +	    now = time_clock_us();
> +	    if (now >= end) {
>                  zclose(flock_fd);
>  		return 2;
>              }  
> -	    sleep(1);
> +	    if (now + timeout_retry > end) {

Could this sum overflow (for example, if the -i option is used)?

> +		timeout_retry = end - now;
> +	    }
> +	    zsleep(timeout_retry);

> +++ zsh-5.8/Src/utils.c	2020-03-08 18:43:02.756951315 +0100
> @@ -2745,6 +2745,26 @@
>  /*
> + * Return the current time in microseconds, using the system's
> + * monotonic clock if supported, the wall clock if not.
> + */
> +
> +/**/
> +zlong
> +time_clock_us(void)
> +{
> +#if defined(HAS_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)

Isn't the macro spelled HAVE_CLOCK_GETTIME?  It's spelled that way in
Src/compat.c:105.

Have both codepaths (the #if and the #else) been tested?

> +    struct timespec ts;
> +    clock_gettime(CLOCK_MONOTONIC, &ts);
> +    return ts.tv_sec * (zlong)1e6 + ts.tv_nsec / 1000;
> +#else
> +    struct timeval tv;
> +    gettimeofday(&tv, NULL);
> +    return tv.tv_sec * (zlong)1e6 + tv.tv_usec;
> +#endif
> +}  
> +

Cheers,

Daniel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-14 21:04                     ` Cedric Ware
  2020-03-15  0:50                       ` Daniel Shahaf
@ 2020-03-15  1:04                       ` Daniel Shahaf
  1 sibling, 0 replies; 33+ messages in thread
From: Daniel Shahaf @ 2020-03-15  1:04 UTC (permalink / raw)
  To: Cedric Ware; +Cc: dana, zsh-workers

Cedric Ware wrote on Sat, 14 Mar 2020 22:04 +0100:
> dana (Thursday 2020-03-12):
> > 2. This particular test doesn't seem reliable on my machine. Within the test
> >    harness, it normally takes about 0.078 seconds. Probably the fork over-head
> >    (which is pretty high on macOS) is greater than the amount of time you're
> >    giving it wait? If i change `zselect -t 1` to `zselect -t 10` it seems to
> >    work better... but it still feels very brittle. Very much dependent on the
> >    hardware, the OS, and the current resource utilisation  
> 
> I see.  Here's a new version of the patch.  As long as the tests are
> run sequentially and not in parallel, there shouldn't be any race
> condition left.  Instead of just waiting 10 ms and hoping that the
> sub-shell has had time to start in the background, I'm now actually
> testing the presence of a file that it creates.
> 
> It might still fail if the background sub-shell completes, including
> the several-tenths-of-a-second wait, before the next part of the test
> is run.  Do you think it's still too likely?

> +++ zsh-5.8/Test/V14system.ztst	2020-03-14 21:59:02.351858164 +0100
> @@ -0,0 +1,131 @@
> +# Test zsh/system module
> +  (
> +    # Lock file for 1 second in the background.
> +    (zsystem flock $tst_dir/file \
> +     && touch $tst_dir/locked \
> +     && zselect -t 100
> +     rm -f $tst_dir/locked) &
> +    # Wait until sub-shell above has started.
> +    while ! [[ -f $tst_dir/locked ]]; do
> +      zselect -t 1
> +    done

If «zsystem flock» returns non-zero, this loop will never terminate.

Tests should be written to always terminate, if possible; and if not,
they should warn about that on $ZTST_fd.  (The output of «make check»
has several examples of the latter.)

There are additional instances of this later in the file.

> +    # Attempt to lock file with 0.5 second timeout: must fail.
> +    zsystem flock -t 0.5 $tst_dir/file
> +  )
> +2:zsystem flock unsuccessful wait test
> +
> +  (
> +    # Wait until sub-shell of the previous test has finished.
> +    while [[ -f $tst_dir/locked ]]; do
> +      zselect -t 10
> +    done

Wouldn't it be easier to use a different file in this test than in the
previous test?  Tests should be independent of each other if possible.

> +    # Lock file for 0.5 second in the background.
> +    (zsystem flock $tst_dir/file \
> +      && touch $tst_dir/locked \
> +      && zselect -t 50
> +      rm -f $tst_dir/locked) &
> +    # Wait until sub-shell above has started.
> +    while ! [[ -f $tst_dir/locked ]]; do
> +      zselect -t 1
> +    done
> +    typeset -F SECONDS
> +    start=$SECONDS
> +    # Attempt to lock file without a timeout:
> +    # must succeed after sub-shell above releases it (0.5 second).
> +    if zsystem flock $tst_dir/file; then
> +      elapsed=$[ $SECONDS - $start ]
> +      if [[ $elapsed -ge 0.3 && $elapsed -le 0.7 ]]; then
> +        echo "elapsed time seems OK" 1>&2
> +      else
> +        echo "elapsed time $elapsed should be ~ 0.5 second" 1>&2
> +      fi
> +    fi
> +  )
> +0:zsystem flock successful wait test, no timeout
> +?elapsed time seems OK

How about adding some "F:" lines (to this and subsequent tests)
explaining that failure doesn't necessarily indicate a problem in zsh,
but could also be caused by process scheduling issues?

Cheers,

Daniel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-15  0:50                       ` Daniel Shahaf
@ 2020-03-15  1:04                         ` dana
  2020-03-15 16:03                         ` Cedric Ware
  1 sibling, 0 replies; 33+ messages in thread
From: dana @ 2020-03-15  1:04 UTC (permalink / raw)
  To: Daniel Shahaf; +Cc: Cedric Ware, zsh-workers

On 14 Mar 2020, at 19:50, Daniel Shahaf <d.s@daniel.shahaf.name> wrote:
> I think the invalid value should be included in the error message,
> either as a number, or as a string if needed: "flock: invalid timeout
> value: '%s'".  In my experience, "invalid input" error messages are
> easier to consume when they actually say what the invalid input is.
>
> dana, I see you advocated the opposite approach in 45283.  What's your
> rationale?

I think i was just trying not to seem nit-picky in light of the fact that it
didn't previously error-check the time-out value. I agree that including the
original input in the error message would be better (and more typical)

dana


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-15  0:50                       ` Daniel Shahaf
  2020-03-15  1:04                         ` dana
@ 2020-03-15 16:03                         ` Cedric Ware
  2020-03-15 16:54                           ` Daniel Shahaf
  1 sibling, 1 reply; 33+ messages in thread
From: Cedric Ware @ 2020-03-15 16:03 UTC (permalink / raw)
  To: Daniel Shahaf; +Cc: dana, zsh-workers


	Hello,

Thanks for the feedback.  I have tried to address all the issues with
the new patch below (against git HEAD), but please keep in mind that
I'm not familiar with the test framework, so I may have missed better
solutions.

Some comments on what I may not have addressed:

Daniel Shahaf (Sunday 2020-03-15):
> Cedric Ware wrote on Sat, 14 Mar 2020 22:04 +0100:
> > +++ zsh-5.8/Doc/Zsh/mod_system.yo	2020-03-08 18:39:32.672683779 +0100
> > @@ -166,7 +166,7 @@
> >  printed in the last case, but the parameter tt(ERRNO) will reflect
> >  the error that occurred.
> >  )
> > -xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
> > +xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-i) var(interval) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
> >  item(tt(zsystem flock -u) var(fd_expr))(
> 
> Please s/var(var)/var(interval)/ in the body paragraphs of the item()().

I don't understand.  Option -f's parameter is actually called "var".
The only occurrences of "var(var)" here are about -f, I didn't add them.

> > +		timeout_tmp = timeout_param.u.d * 1e6;
> > +		if ((timeout_tmp < 1) || (timeout_tmp > ZLONG_MAX / 2)) {
> > +		    zwarnnam(nam, "flock: invalid timeout value");
> 
> I think the invalid value should be included in the error message,
> either as a number, or as a string if needed: "flock: invalid timeout
> value: '%s'".

Done, but it had to be as a string, because I didn't see a way to use
a zlong in zwarnnam().

> dana, I see you advocated the opposite approach in 45283.  What's your
> rationale?

I think that was about my earlier attempt to include the actual limit
in the error message, not the input.  I reverted to a generic message,
because I don't know how to make the test script check for an error
message that depends on the system/compiler options.

> > +	zlong now;
> > +	zlong end = time_clock_us() + timeout;
> 
> Could this sum overflow (for example, if the -t option is used)?

I don't think so: timeout is limited to ZLONG_MAX / 2, which
corresponds to 140000 years or so if zlong is 64-bit.  For the sum
to overflow, the current time as given by either gettimeofday() or
clock_gettime(CLOCK_MONOTONIC) would have to also be greater than that.

OTOH, if zlong is only 32-bit, there could be a problem, and in fact my
function time_clock_us() might not work.  However, it was suggested in
message 45250 that it shouldn't be my problem.  Should it be?  Should
this whole patch be disabled somehow on systems that don't support any
64-bit type?

> > +	    if (now + timeout_retry > end) {
> 
> Could this sum overflow (for example, if the -i option is used)?

It might if long and zlong are the same size, as timeout_retry
is limited to LONG_MAX.  The program would then wait past the timeout
by one interval.  But, again, this interval would have to be hundreds
of thousands of years; if a user requests that, in my opinion, we might
as well just wait forever.

Still, would you like it better if I limited the interval to
min(LONG_MAX, ZLONG_MAX / 2) instead of LONG_MAX?

> Isn't the macro spelled HAVE_CLOCK_GETTIME?  It's spelled that way in
> Src/compat.c:105.

Oops.

> Have both codepaths (the #if and the #else) been tested?

Well, now they have.

					Best regards,
					Cedric Ware.


diff --git a/Doc/Zsh/mod_system.yo b/Doc/Zsh/mod_system.yo
index 6292af071..715acf45b 100644
--- a/Doc/Zsh/mod_system.yo
+++ b/Doc/Zsh/mod_system.yo
@@ -166,7 +166,7 @@ to the command, or 2 for an error on the write; no error message is
 printed in the last case, but the parameter tt(ERRNO) will reflect
 the error that occurred.
 )
-xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
+xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-i) var(interval) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
 item(tt(zsystem flock -u) var(fd_expr))(
 The builtin tt(zsystem)'s subcommand tt(flock) performs advisory file
 locking (via the manref(fcntl)(2) system call) over the entire contents
@@ -196,9 +196,16 @@ a safety check that the file descriptor is in use for file locking.
 
 By default the shell waits indefinitely for the lock to succeed.
 The option tt(-t) var(timeout) specifies a timeout for the lock in
-seconds; currently this must be an integer.  The shell will attempt
-to lock the file once a second during this period.  If the attempt
-times out, status 2 is returned.
+seconds; fractional seconds are allowed.  During this period, the
+shell will attempt to lock the file every var(interval) seconds
+if the tt(-i) var(interval) option is given, otherwise once a second.
+(This var(interval) is shortened before the last attempt if needed,
+so that the shell waits only until the var(timeout) and not longer.)
+If the attempt times out, status 2 is returned.
+
+(Note: var(interval) must be less than LONG_MAX microseconds.
+This is many millenia on 64-bit systems, but only about 35 minutes
+on 32-bit systems.)
 
 If the option tt(-e) is given, the file descriptor for the lock is
 preserved when the shell uses tt(exec) to start a new process;
diff --git a/Src/Modules/system.c b/Src/Modules/system.c
index fb3d80773..e0ddd2a2a 100644
--- a/Src/Modules/system.c
+++ b/Src/Modules/system.c
@@ -532,6 +532,9 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
 {
     int cloexec = 1, unlock = 0, readlock = 0;
     zlong timeout = -1;
+    double timeout_tmp;
+    long timeout_retry = 1e6;
+    mnumber timeout_param;
     char *fdvar = NULL;
 #ifdef HAVE_FCNTL_H
     struct flock lck;
@@ -583,7 +586,46 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
 		} else {
 		    optarg = *args++;
 		}
-		timeout = mathevali(optarg);
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout_tmp = timeout_param.u.d * 1e6;
+		if ((timeout_tmp < 1) || (timeout_tmp > ZLONG_MAX / 2)) {
+		    zwarnnam(nam, "flock: invalid timeout value: '%s'",
+			     optarg);
+		    return 1;
+		}
+		timeout = (zlong)timeout_tmp;
+		break;
+
+	    case 'i':
+		/* retry interval in seconds */
+		if (optptr[1]) {
+		    optarg = optptr + 1;
+		    optptr += strlen(optarg) - 1;
+		} else if (!*args) {
+		    zwarnnam(nam,
+			     "flock: option %c requires "
+			     "a numeric retry interval",
+			     opt);
+		    return 1;
+		} else {
+		    optarg = *args++;
+		}
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout_tmp = timeout_param.u.d * 1e6;
+		if ((timeout_tmp < 1) || (timeout_tmp > LONG_MAX)) {
+		    zwarnnam(nam, "flock: invalid interval value: '%s'",
+			     optarg);
+		    return 1;
+		}
+		timeout_retry = (long)timeout_tmp;
 		break;
 
 	    case 'u':
@@ -647,7 +689,8 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
     lck.l_len = 0;  /* lock the whole file */
 
     if (timeout > 0) {
-	time_t end = time(NULL) + (time_t)timeout;
+	zlong now;
+	zlong end = time_clock_us() + timeout;
 	while (fcntl(flock_fd, F_SETLK, &lck) < 0) {
 	    if (errflag) {
                 zclose(flock_fd);
@@ -658,11 +701,15 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
 		zwarnnam(nam, "failed to lock file %s: %e", args[0], errno);
 		return 1;
 	    }
-	    if (time(NULL) >= end) {
+	    now = time_clock_us();
+	    if (now >= end) {
                 zclose(flock_fd);
 		return 2;
             }
-	    sleep(1);
+	    if (now + timeout_retry > end) {
+		timeout_retry = end - now;
+	    }
+	    zsleep(timeout_retry);
 	}
     } else {
 	while (fcntl(flock_fd, timeout == 0 ? F_SETLK : F_SETLKW, &lck) < 0) {
diff --git a/Src/utils.c b/Src/utils.c
index f9c2d4a2b..efa9fe08a 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -2744,6 +2744,26 @@ read_poll(int fd, int *readchar, int polltty, zlong microseconds)
     return (ret > 0);
 }
 
+/*
+ * Return the current time in microseconds, using the system's
+ * monotonic clock if supported, the wall clock if not.
+ */
+
+/**/
+zlong
+time_clock_us(void)
+{
+#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return ts.tv_sec * (zlong)1e6 + ts.tv_nsec / 1000;
+#else
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec * (zlong)1e6 + tv.tv_usec;
+#endif
+}
+
 /*
  * Sleep for the given number of microseconds --- must be within
  * range of a long at the moment, but this is only used for
diff --git a/Test/V14system.ztst b/Test/V14system.ztst
index e69de29bb..bc01a8fe6 100644
--- a/Test/V14system.ztst
+++ b/Test/V14system.ztst
@@ -0,0 +1,148 @@
+# Test zsh/system module
+
+%prep
+
+  if zmodload -s zsh/system && zmodload -s zsh/zselect; then
+    tst_dir=V13.tmp
+    mkdir -p -- $tst_dir
+  else
+    ZTST_unimplemented='the zsh/system and zsh/zselect modules are not available'
+  fi
+  : > $tst_dir/file # File on which to acquire flock.
+
+%test
+
+  (
+    zsystem flock -t 0.1 -i 0.000001 $tst_dir/file
+  )
+0:zsystem flock valid time arguments
+
+  (
+    zsystem flock -t -1      $tst_dir/file ||
+    zsystem flock -t 0.49e-6 $tst_dir/file ||
+    zsystem flock -t 1e100   $tst_dir/file ||
+    zsystem flock -i -1      $tst_dir/file ||
+    zsystem flock -i 0.49e-6 $tst_dir/file ||
+    zsystem flock -i 1e100   $tst_dir/file
+  )
+1:zsystem flock invalid time arguments
+?(eval):zsystem:2: flock: invalid timeout value: '-1'
+?(eval):zsystem:3: flock: invalid timeout value: '0.49e-6'
+?(eval):zsystem:4: flock: invalid timeout value: '1e100'
+?(eval):zsystem:5: flock: invalid interval value: '-1'
+?(eval):zsystem:6: flock: invalid interval value: '0.49e-6'
+?(eval):zsystem:7: flock: invalid interval value: '1e100'
+
+  (
+    # Lock file for 1 second in the background.
+    lock_flag=$tst_dir/locked1
+    (zsystem flock $tst_dir/file \
+     && touch $lock_flag \
+     && zselect -t 100
+     mv $lock_flag $lock_flag.done) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
+      zselect -t 1
+    done
+    if [[ -f $lock_flag.done ]]; then
+      echo "Background shell should not have completed already." 1>&2
+    else
+      # Attempt to lock file with 0.5 second timeout: must fail.
+      zsystem flock -t 0.5 $tst_dir/file
+    fi
+  )
+2:zsystem flock unsuccessful wait test
+F:This timing test might fail due to process scheduling issues unrelated to zsh.
+
+  (
+    # Lock file for 0.5 second in the background.
+    lock_flag=$tst_dir/locked2
+    (zsystem flock $tst_dir/file \
+      && touch $lock_flag \
+      && zselect -t 50
+      mv $lock_flag $lock_flag.done) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
+      zselect -t 1
+    done
+    if [[ -f $lock_flag.done ]]; then
+      echo "Background shell should not have completed already." 1>&2
+    fi
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file without a timeout:
+    # must succeed after sub-shell above releases it (0.5 second).
+    if zsystem flock $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.3 && $elapsed -le 0.7 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 0.5 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, no timeout
+?elapsed time seems OK
+F:This timing test might fail due to process scheduling issues unrelated to zsh.
+
+  (
+    # Lock file for 0.5 second in the background.
+    lock_flag=$tst_dir/locked3
+    (zsystem flock $tst_dir/file \
+      && touch $lock_flag \
+      && zselect -t 50
+      mv $lock_flag $lock_flag.done) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
+      zselect -t 1
+    done
+    if [[ -f $lock_flag.done ]]; then
+      echo "Background shell should not have completed already." 1>&2
+    fi
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file with 1-second timeout:
+    # must succeed 1 second after start because we retry every 1 second.
+    if zsystem flock -t 1 $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.8 && $elapsed -le 1.2 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 1 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, integral seconds
+?elapsed time seems OK
+F:This timing test might fail due to process scheduling issues unrelated to zsh.
+
+  (
+    # Lock file for 0.25 second in the background.
+    lock_flag=$tst_dir/locked4
+    (zsystem flock $tst_dir/file \
+      && touch $lock_flag \
+      && zselect -t 25
+      mv $lock_flag $lock_flag.done) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
+      zselect -t 1
+    done
+    if [[ -f $lock_flag.done ]]; then
+      echo "Background shell should not have completed already." 1>&2
+    fi
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file with 0.4-second timeout, retrying every 0.1 second:
+    # must succeed 0.3 second after start.
+    if zsystem flock -t 0.4 -i 0.1 $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.2 && $elapsed -le 0.5 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 0.3 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, fractional seconds
+?elapsed time seems OK
+F:This timing test might fail due to process scheduling issues unrelated to zsh.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-15 16:03                         ` Cedric Ware
@ 2020-03-15 16:54                           ` Daniel Shahaf
  2020-03-15 17:35                             ` Peter Stephenson
                                               ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: Daniel Shahaf @ 2020-03-15 16:54 UTC (permalink / raw)
  To: Cedric Ware; +Cc: dana, zsh-workers

Cedric Ware wrote on Sun, Mar 15, 2020 at 17:03:24 +0100:
> Daniel Shahaf (Sunday 2020-03-15):
> > Cedric Ware wrote on Sat, 14 Mar 2020 22:04 +0100:
> > > +++ zsh-5.8/Doc/Zsh/mod_system.yo	2020-03-08 18:39:32.672683779 +0100
> > > @@ -166,7 +166,7 @@
> > >  printed in the last case, but the parameter tt(ERRNO) will reflect
> > >  the error that occurred.
> > >  )
> > > -xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
> > > +xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-i) var(interval) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
> > >  item(tt(zsystem flock -u) var(fd_expr))(
> > 
> > Please s/var(var)/var(interval)/ in the body paragraphs of the item()().
> 
> I don't understand.  Option -f's parameter is actually called "var".
> The only occurrences of "var(var)" here are about -f, I didn't add them.

My bad; I'd misread the diff.  Sorry for the noise.

> > > +		timeout_tmp = timeout_param.u.d * 1e6;
> > > +		if ((timeout_tmp < 1) || (timeout_tmp > ZLONG_MAX / 2)) {
> > > +		    zwarnnam(nam, "flock: invalid timeout value");
> > 
> > I think the invalid value should be included in the error message,
> > either as a number, or as a string if needed: "flock: invalid timeout
> > value: '%s'".
> 
> Done, but it had to be as a string, because I didn't see a way to use
> a zlong in zwarnnam().

That doesn't seem to be supported.  It could be added, if needed.

> > dana, I see you advocated the opposite approach in 45283.  What's your
> > rationale?
> 
> I think that was about my earlier attempt to include the actual limit
> in the error message, not the input.  I reverted to a generic message,
> because I don't know how to make the test script check for an error
> message that depends on the system/compiler options.

How would the message depend on the system/compiler options?

In any case, you might be able to address this by specifying the
expectations as patterns:

    # Each line of a '>' and '?' chunk may be preceded by a '*', so the line
    # starts '*>' or '*?'.  This signifies that for any line with '*' in front
    # the actual output should be pattern matched against the corresponding
    # lines in the test output.  Each line following '>' or '?' must be a
    # valid pattern, so characters special to patterns such as parentheses
    # must be quoted with a backslash.  The EXTENDED_GLOB option is used for
    # all such patterns.

> > > +	zlong now;
> > > +	zlong end = time_clock_us() + timeout;
> > 
> > Could this sum overflow (for example, if the -t option is used)?
> 
> I don't think so: timeout is limited to ZLONG_MAX / 2, which
> corresponds to 140000 years or so if zlong is 64-bit.  For the sum
> to overflow, the current time as given by either gettimeofday() or
> clock_gettime(CLOCK_MONOTONIC) would have to also be greater than that.
> 

Sounds good.

> OTOH, if zlong is only 32-bit, there could be a problem, and in fact my
> function time_clock_us() might not work.  However, it was suggested in
> message 45250 that it shouldn't be my problem.  Should it be?  Should
> this whole patch be disabled somehow on systems that don't support any
> 64-bit type?

I think pws was saying there that failure to detect a 64-bit type _when
one is available_ isn't your problem; i.e., that you can trust configure
to detect a 64-bit type when one is available.

How likely is zsh 5.9 to be ported to a platform that doesn't have a
64-bit type?

If that's a concern, I'd recommend to issue a warning or disable the
feature when we detect that case.

I'm not sure whether that's a concern.

> > > +	    if (now + timeout_retry > end) {
> > 
> > Could this sum overflow (for example, if the -i option is used)?
> 
> It might if long and zlong are the same size, as timeout_retry
> is limited to LONG_MAX.  The program would then wait past the timeout
> by one interval.  But, again, this interval would have to be hundreds
> of thousands of years; if a user requests that, in my opinion, we might
> as well just wait forever.

Thanks, agreed.

> Still, would you like it better if I limited the interval to
> min(LONG_MAX, ZLONG_MAX / 2) instead of LONG_MAX?

Well, it sounds like that won't make any difference to bin_system_flock()'s
behaviour in practice (at least until someone has servers with uptimes
on the order of tens of kiloyears), so I don't have a strong preference.
I suppose I'd recommend whichever of these is more likely to remain
correct even if the code is copied elsewhere and adapted.

Thanks,

Daniel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-15 16:54                           ` Daniel Shahaf
@ 2020-03-15 17:35                             ` Peter Stephenson
  2020-03-15 18:36                             ` Cedric Ware
  2020-04-13 21:34                             ` Cedric Ware
  2 siblings, 0 replies; 33+ messages in thread
From: Peter Stephenson @ 2020-03-15 17:35 UTC (permalink / raw)
  To: zsh-workers

On Sun, 2020-03-15 at 16:54 +0000, Daniel Shahaf wrote:
> > 
> > Done, but it had to be as a string, because I didn't see a way to use
> > a zlong in zwarnnam().
> 
> That doesn't seem to be supported.  It could be added, if needed.

Seems obvious enough maybe we should just add it?  A single use of it
would justify adding it for the extra clarity alone.

Not (yet) tested as not (yet) used.

pws

diff --git a/Src/utils.c b/Src/utils.c
index f9c2d4a2b..73d27dc94 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -130,6 +130,7 @@ set_widearray(char *mb_array, Widechar_array wca)
    %l	const char *, int	C string of given length (null not required)
    %L	long			decimal value
    %d	int			decimal value
+   %z	zlong			decimal value
    %%	(none)			literal '%'
    %c	int			character at that codepoint
    %e	int			strerror() message (argument is typically 'errno')
@@ -331,6 +332,14 @@ zerrmsg(FILE *file, const char *fmt, va_list ap)
 		num = va_arg(ap, int);
 		fprintf(file, "%d", num);
 		break;
+	    case 'z':
+	    {
+		zlong znum = va_arg(ap, zlong);
+		char buf[DIGBUFSIZE];
+		convbase(buf, znum, 10);
+		fputs(buf, file);
+		break;
+	    }
 	    case '%':
 		putc('%', file);
 		break;



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-15 16:54                           ` Daniel Shahaf
  2020-03-15 17:35                             ` Peter Stephenson
@ 2020-03-15 18:36                             ` Cedric Ware
  2020-03-15 19:13                               ` Daniel Shahaf
  2020-04-13 21:34                             ` Cedric Ware
  2 siblings, 1 reply; 33+ messages in thread
From: Cedric Ware @ 2020-03-15 18:36 UTC (permalink / raw)
  To: Daniel Shahaf; +Cc: dana, zsh-workers


Daniel Shahaf (Sunday 2020-03-15):
> Cedric Ware wrote on Sun, Mar 15, 2020 at 17:03:24 +0100:
> > I think that was about my earlier attempt to include the actual limit
> > in the error message, not the input.  I reverted to a generic message,
> > because I don't know how to make the test script check for an error
> > message that depends on the system/compiler options.
> 
> How would the message depend on the system/compiler options?

Not with your suggestion, which I implemented.  My original idea was
for an error message like "invalid timeout value, maximum is N", where
N would be ZLONG_MAX / 2 / 1000000.  But to display the latter, I'd
have needed a way to output a zlong, which has just now been proposed.
Would it be useful?

> In any case, you might be able to address this by specifying the
> expectations as patterns:

OK, thanks.

> How likely is zsh 5.9 to be ported to a platform that doesn't have a
> 64-bit type?
> 
> If that's a concern, I'd recommend to issue a warning or disable the
> feature when we detect that case.
> 
> I'm not sure whether that's a concern.

Well, neither am I. :-)  Essentially we'd have to keep both the old and
the new code alongside, with an #if sizeof(zlong) < 8 to choose one.
Cumbersome, but it could be done.

> > Still, would you like it better if I limited the interval to
> > min(LONG_MAX, ZLONG_MAX / 2) instead of LONG_MAX?
> 
> Well, it sounds like that won't make any difference to bin_system_flock()'s
> behaviour in practice (at least until someone has servers with uptimes
> on the order of tens of kiloyears), so I don't have a strong preference.
> I suppose I'd recommend whichever of these is more likely to remain
> correct even if the code is copied elsewhere and adapted.

Changing the limit should be easy.  I see there is a macro minimum().
Can I assume that a long can always be promoted to a zlong?

					Thanks, best regards,
					Cedric Ware.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-15 18:36                             ` Cedric Ware
@ 2020-03-15 19:13                               ` Daniel Shahaf
  0 siblings, 0 replies; 33+ messages in thread
From: Daniel Shahaf @ 2020-03-15 19:13 UTC (permalink / raw)
  To: Cedric Ware; +Cc: dana, zsh-workers

Cedric Ware wrote on Sun, 15 Mar 2020 18:36 +00:00:
> 
> Daniel Shahaf (Sunday 2020-03-15):
> > Cedric Ware wrote on Sun, Mar 15, 2020 at 17:03:24 +0100:
> > > I think that was about my earlier attempt to include the actual limit
> > > in the error message, not the input.  I reverted to a generic message,
> > > because I don't know how to make the test script check for an error
> > > message that depends on the system/compiler options.
> > 
> > How would the message depend on the system/compiler options?
> 
> Not with your suggestion, which I implemented.  My original idea was
> for an error message like "invalid timeout value, maximum is N", where
> N would be ZLONG_MAX / 2 / 1000000.  But to display the latter, I'd
> have needed a way to output a zlong, which has just now been proposed.
> Would it be useful?

I think the important part is to display the invalid input.  I don't
immediately see a situation in which I'd run into the input validation
error message and be glad that it spelled out the value of (2**63 / 2e6) for me.

> > > Still, would you like it better if I limited the interval to
> > > min(LONG_MAX, ZLONG_MAX / 2) instead of LONG_MAX?
> > 
> > Well, it sounds like that won't make any difference to bin_system_flock()'s
> > behaviour in practice (at least until someone has servers with uptimes
> > on the order of tens of kiloyears), so I don't have a strong preference.
> > I suppose I'd recommend whichever of these is more likely to remain
> > correct even if the code is copied elsewhere and adapted.
> 
> Changing the limit should be easy.  I see there is a macro minimum().
> Can I assume that a long can always be promoted to a zlong?

Given that we assume zlong is >= 64 bits long, the only way this can be
false is if long is int128_t (or larger) and zlong is int64_t, isn't it?
I don't know of any platform where that'd be true.

Cheers,

Daniel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-03-15 16:54                           ` Daniel Shahaf
  2020-03-15 17:35                             ` Peter Stephenson
  2020-03-15 18:36                             ` Cedric Ware
@ 2020-04-13 21:34                             ` Cedric Ware
  2020-04-14 11:47                               ` Daniel Shahaf
  2 siblings, 1 reply; 33+ messages in thread
From: Cedric Ware @ 2020-04-13 21:34 UTC (permalink / raw)
  To: zsh-workers


	Hello,

About the patch I submitted last month enabling fractional seconds for
the timeout in zsystem flock, there was the following outstanding issue.


Daniel Shahaf (Sunday 2020-03-15):
> How likely is zsh 5.9 to be ported to a platform that doesn't have a
> 64-bit type?
> 
> If that's a concern, I'd recommend to issue a warning or disable the
> feature when we detect that case.
> 
> I'm not sure whether that's a concern.

If it is, here is a reimplementation of the patch that doesn't depend
on a 64-bit type.  Instead of counting time in microseconds, I've used
struct timespec and a function similar to zgettime() but that uses
CLOCK_MONOTONIC if available instead of CLOCK_REALTIME.

(I haven't managed to avoid some code duplication, because a common
function would require choosing between the two clocks, thus need a
parameter of type clockid_t, which is not always defined.  So I don't
know how to write the prototype of a common function.)

Another difference is that the timeout length is now limited to 2^30-1
seconds (34 years) so as to avoid overflowing time_t on 32-bit systems.
There is one case where it may overflow, but it can't be avoided: if
time_t is 32-bit signed and CLOCK_MONOTONIC isn't available or returns
the wall clock instead of e.g. the uptime.  Then, specifying a timeout
beyond year 2038 won't work, but then the system has a Y2038 problem
anyway.  I don't see how to avoid it short of checking the timeout
against Y2038, which may actually create a Y2038 problem on systems
that wouldn't otherwise have it.

Also, the retry interval is now limited to 0.999*LONG_MAX microseconds;
the 0.999 avoids a rounding problem that causes an overflow on 64-bit
systems (the value is given as a double, but a double can't represent
2^63-1 exactly).

I think that's all.  I believe I checked all code paths, though I don't
know how to do it systematically.

					Thanks, best regards,
					Cedric Ware.


diff --git a/Doc/Zsh/mod_system.yo b/Doc/Zsh/mod_system.yo
index 6292af071..be9a99f62 100644
--- a/Doc/Zsh/mod_system.yo
+++ b/Doc/Zsh/mod_system.yo
@@ -166,7 +166,7 @@ to the command, or 2 for an error on the write; no error message is
 printed in the last case, but the parameter tt(ERRNO) will reflect
 the error that occurred.
 )
-xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
+xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-i) var(interval) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
 item(tt(zsystem flock -u) var(fd_expr))(
 The builtin tt(zsystem)'s subcommand tt(flock) performs advisory file
 locking (via the manref(fcntl)(2) system call) over the entire contents
@@ -196,9 +196,16 @@ a safety check that the file descriptor is in use for file locking.
 
 By default the shell waits indefinitely for the lock to succeed.
 The option tt(-t) var(timeout) specifies a timeout for the lock in
-seconds; currently this must be an integer.  The shell will attempt
-to lock the file once a second during this period.  If the attempt
-times out, status 2 is returned.
+seconds; fractional seconds are allowed.  During this period, the
+shell will attempt to lock the file every var(interval) seconds
+if the tt(-i) var(interval) option is given, otherwise once a second.
+(This var(interval) is shortened before the last attempt if needed,
+so that the shell waits only until the var(timeout) and not longer.)
+If the attempt times out, status 2 is returned.
+
+(Note: var(timeout) must be less than 2^30-1 seconds (about 34 years);
+and var(interval) must be less than 0.999 * LONG_MAX microseconds
+(only about 35 minutes on 32-bit systems).)
 
 If the option tt(-e) is given, the file descriptor for the lock is
 preserved when the shell uses tt(exec) to start a new process;
diff --git a/Src/Modules/system.c b/Src/Modules/system.c
index fb3d80773..a60c18f5d 100644
--- a/Src/Modules/system.c
+++ b/Src/Modules/system.c
@@ -29,6 +29,7 @@
 
 #include "system.mdh"
 #include "system.pro"
+#include <math.h>
 
 #ifdef HAVE_POLL_H
 # include <poll.h>
@@ -531,7 +532,9 @@ static int
 bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
 {
     int cloexec = 1, unlock = 0, readlock = 0;
-    zlong timeout = -1;
+    double timeout = -1;
+    long timeout_interval = 1e6;
+    mnumber timeout_param;
     char *fdvar = NULL;
 #ifdef HAVE_FCNTL_H
     struct flock lck;
@@ -583,7 +586,51 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
 		} else {
 		    optarg = *args++;
 		}
-		timeout = mathevali(optarg);
+		timeout_param = matheval(optarg);
+		timeout = (timeout_param.type & MN_FLOAT)?
+		    timeout_param.u.d : (double)timeout_param.u.l;
+
+		/*
+		 * timeout must not overflow time_t, but little is known
+		 * about this type's limits.  Conservatively limit to 2^30-1
+		 * (34 years).  Then it can only overflow if time_t is only
+		 * a 32-bit int and CLOCK_MONOTONIC is not supported, in which
+		 * case there is a Y2038 problem anyway.
+		 */
+		if ((timeout < 1e-6) || (timeout > 1073741823.)) {
+		    zwarnnam(nam, "flock: invalid timeout value: '%s'",
+			     optarg);
+		    return 1;
+		}
+		break;
+
+	    case 'i':
+		/* retry interval in seconds */
+		if (optptr[1]) {
+		    optarg = optptr + 1;
+		    optptr += strlen(optarg) - 1;
+		} else if (!*args) {
+		    zwarnnam(nam,
+			     "flock: option %c requires "
+			     "a numeric retry interval",
+			     opt);
+		    return 1;
+		} else {
+		    optarg = *args++;
+		}
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout_param.u.d *= 1e6;
+		if ((timeout_param.u.d < 1)
+		    || (timeout_param.u.d > 0.999 * LONG_MAX)) {
+		    zwarnnam(nam, "flock: invalid interval value: '%s'",
+			     optarg);
+		    return 1;
+		}
+		timeout_interval = (long)timeout_param.u.d;
 		break;
 
 	    case 'u':
@@ -647,7 +694,24 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
     lck.l_len = 0;  /* lock the whole file */
 
     if (timeout > 0) {
-	time_t end = time(NULL) + (time_t)timeout;
+	/*
+	 * Get current time, calculate timeout time.
+	 * No need to check for overflow, already checked above.
+	 */
+	struct timespec now, end;
+	double timeout_s;
+	long remaining_us;
+	zgettime_monotonic_if_available(&now);
+	end.tv_sec = now.tv_sec;
+	end.tv_nsec = now.tv_nsec;
+	end.tv_nsec += modf(timeout, &timeout_s) * 1000000000L;
+	end.tv_sec += timeout_s;
+	if (end.tv_nsec >= 1000000000L) {
+	    end.tv_nsec -= 1000000000L;
+	    end.tv_sec += 1;
+	}
+
+	/* Try acquiring lock, loop until timeout. */
 	while (fcntl(flock_fd, F_SETLK, &lck) < 0) {
 	    if (errflag) {
                 zclose(flock_fd);
@@ -658,11 +722,16 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
 		zwarnnam(nam, "failed to lock file %s: %e", args[0], errno);
 		return 1;
 	    }
-	    if (time(NULL) >= end) {
+	    zgettime_monotonic_if_available(&now);
+	    remaining_us = timespec_diff_us(&now, &end);
+	    if (remaining_us <= 0) {
                 zclose(flock_fd);
 		return 2;
             }
-	    sleep(1);
+	    if (remaining_us <= timeout_interval) {
+		timeout_interval = remaining_us;
+	    }
+	    zsleep(timeout_interval);
 	}
     } else {
 	while (fcntl(flock_fd, timeout == 0 ? F_SETLK : F_SETLKW, &lck) < 0) {
diff --git a/Src/compat.c b/Src/compat.c
index 74e426fba..817bb4aaf 100644
--- a/Src/compat.c
+++ b/Src/compat.c
@@ -126,6 +126,32 @@ zgettime(struct timespec *ts)
     return ret;
 }
 
+/* Likewise with CLOCK_MONOTONIC if available. */
+
+/**/
+mod_export int
+zgettime_monotonic_if_available(struct timespec *ts)
+{
+    int ret = -1;
+
+#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+    struct timespec dts;
+    if (clock_gettime(CLOCK_MONOTONIC, &dts) < 0) {
+	zwarn("unable to retrieve CLOCK_MONOTONIC time: %e", errno);
+	ret--;
+    } else {
+	ret++;
+	ts->tv_sec = (time_t) dts.tv_sec;
+	ts->tv_nsec = (long) dts.tv_nsec;
+    }
+#endif
+
+    if (ret) {
+	ret = zgettime(ts);
+    }
+    return ret;
+}
+
 
 /* compute the difference between two calendar times */
 
diff --git a/Src/utils.c b/Src/utils.c
index 69885fed3..db5626830 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -2748,6 +2748,42 @@ read_poll(int fd, int *readchar, int polltty, zlong microseconds)
     return (ret > 0);
 }
 
+/*
+ * Return the difference between 2 times, given as struct timespec*,
+ * expressed in microseconds, as a long.  If the difference doesn't fit
+ * into a long, return LONG_MIN or LONG_MAX so that the times can still
+ * be compared.
+ *
+ * Note: returns a long rather than a zlong because zsleep() below
+ * takes a long.
+ */
+
+/**/
+long
+timespec_diff_us(const struct timespec *t1, const struct timespec *t2)
+{
+    int reverse = (t1->tv_sec > t2->tv_sec);
+    time_t diff_sec;
+    long diff_usec, max_margin, res;
+
+    /* Don't just subtract t2-t1 because time_t might be unsigned. */
+    diff_sec = (reverse? t1->tv_sec - t2->tv_sec : t2->tv_sec - t1->tv_sec);
+    if (diff_sec > LONG_MAX / 1000000L) {
+	goto overflow;
+    }
+    res = diff_sec * 1000000L;
+    max_margin = LONG_MAX - res;
+    diff_usec = (reverse?
+		 t1->tv_nsec - t2->tv_nsec : t2->tv_nsec - t1->tv_nsec
+		 ) / 1000;
+    if (diff_usec <= max_margin) {
+	res += diff_usec;
+	return (reverse? -res : res);
+    }
+ overflow:
+    return (reverse? LONG_MIN : LONG_MAX);
+}
+
 /*
  * Sleep for the given number of microseconds --- must be within
  * range of a long at the moment, but this is only used for
diff --git a/Test/V14system.ztst b/Test/V14system.ztst
index e69de29bb..79a2c0a77 100644
--- a/Test/V14system.ztst
+++ b/Test/V14system.ztst
@@ -0,0 +1,150 @@
+# Test zsh/system module
+
+%prep
+
+  if zmodload -s zsh/system && zmodload -s zsh/zselect; then
+    tst_dir=V13.tmp
+    mkdir -p -- $tst_dir
+  else
+    ZTST_unimplemented='the zsh/system and zsh/zselect modules are not available'
+  fi
+  : > $tst_dir/file # File on which to acquire flock.
+
+%test
+
+  (
+    zsystem flock -t 0.1 -i 0.000001 $tst_dir/file
+  )
+0:zsystem flock valid time arguments
+
+  (
+    zsystem flock -t -1         $tst_dir/file ||
+    zsystem flock -t 0.49e-6    $tst_dir/file ||
+    zsystem flock -t 1073741824 $tst_dir/file ||
+    zsystem flock -t 1e100      $tst_dir/file ||
+    zsystem flock -i -1         $tst_dir/file ||
+    zsystem flock -i 0.49e-6    $tst_dir/file ||
+    zsystem flock -i 1e100      $tst_dir/file
+  )
+1:zsystem flock invalid time arguments
+?(eval):zsystem:2: flock: invalid timeout value: '-1'
+?(eval):zsystem:3: flock: invalid timeout value: '0.49e-6'
+?(eval):zsystem:4: flock: invalid timeout value: '1073741824'
+?(eval):zsystem:5: flock: invalid timeout value: '1e100'
+?(eval):zsystem:6: flock: invalid interval value: '-1'
+?(eval):zsystem:7: flock: invalid interval value: '0.49e-6'
+?(eval):zsystem:8: flock: invalid interval value: '1e100'
+
+  (
+    # Lock file for 1 second in the background.
+    lock_flag=$tst_dir/locked1
+    (zsystem flock $tst_dir/file \
+     && touch $lock_flag \
+     && zselect -t 100
+     mv $lock_flag $lock_flag.done) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
+      zselect -t 1
+    done
+    if [[ -f $lock_flag.done ]]; then
+      echo "Background shell should not have completed already." 1>&2
+    else
+      # Attempt to lock file with 0.5 second timeout: must fail.
+      zsystem flock -t 0.5 $tst_dir/file
+    fi
+  )
+2:zsystem flock unsuccessful wait test
+F:This timing test might fail due to process scheduling issues unrelated to zsh.
+
+  (
+    # Lock file for 0.5 second in the background.
+    lock_flag=$tst_dir/locked2
+    (zsystem flock $tst_dir/file \
+      && touch $lock_flag \
+      && zselect -t 50
+      mv $lock_flag $lock_flag.done) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
+      zselect -t 1
+    done
+    if [[ -f $lock_flag.done ]]; then
+      echo "Background shell should not have completed already." 1>&2
+    fi
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file without a timeout:
+    # must succeed after sub-shell above releases it (0.5 second).
+    if zsystem flock $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.3 && $elapsed -le 0.7 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 0.5 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, no timeout
+?elapsed time seems OK
+F:This timing test might fail due to process scheduling issues unrelated to zsh.
+
+  (
+    # Lock file for 0.5 second in the background.
+    lock_flag=$tst_dir/locked3
+    (zsystem flock $tst_dir/file \
+      && touch $lock_flag \
+      && zselect -t 50
+      mv $lock_flag $lock_flag.done) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
+      zselect -t 1
+    done
+    if [[ -f $lock_flag.done ]]; then
+      echo "Background shell should not have completed already." 1>&2
+    fi
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file with 1-second timeout:
+    # must succeed 1 second after start because we retry every 1 second.
+    if zsystem flock -t 1 $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.8 && $elapsed -le 1.2 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 1 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, integral seconds
+?elapsed time seems OK
+F:This timing test might fail due to process scheduling issues unrelated to zsh.
+
+  (
+    # Lock file for 0.25 second in the background.
+    lock_flag=$tst_dir/locked4
+    (zsystem flock $tst_dir/file \
+      && touch $lock_flag \
+      && zselect -t 25
+      mv $lock_flag $lock_flag.done) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
+      zselect -t 1
+    done
+    if [[ -f $lock_flag.done ]]; then
+      echo "Background shell should not have completed already." 1>&2
+    fi
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file with 0.4-second timeout, retrying every 0.1 second:
+    # must succeed 0.3 second after start.
+    if zsystem flock -t 0.4 -i 0.1 $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.2 && $elapsed -le 0.5 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 0.3 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, fractional seconds
+?elapsed time seems OK
+F:This timing test might fail due to process scheduling issues unrelated to zsh.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-04-13 21:34                             ` Cedric Ware
@ 2020-04-14 11:47                               ` Daniel Shahaf
  2020-04-14 20:21                                 ` Cedric Ware
  0 siblings, 1 reply; 33+ messages in thread
From: Daniel Shahaf @ 2020-04-14 11:47 UTC (permalink / raw)
  To: Cedric Ware; +Cc: zsh-workers

Cedric Ware wrote on Mon, 13 Apr 2020 23:34 +0200:
> 	Hello,
> 
> About the patch I submitted last month enabling fractional seconds for
> the timeout in zsystem flock, there was the following outstanding issue.
> 
> 
> Daniel Shahaf (Sunday 2020-03-15):
> > How likely is zsh 5.9 to be ported to a platform that doesn't have a
> > 64-bit type?
> > 
> > If that's a concern, I'd recommend to issue a warning or disable the
> > feature when we detect that case.
> > 
> > I'm not sure whether that's a concern.  
> 
> If it is, here is a reimplementation of the patch that doesn't depend
> on a 64-bit type.  Instead of counting time in microseconds, I've used
> struct timespec and a function similar to zgettime() but that uses
> CLOCK_MONOTONIC if available instead of CLOCK_REALTIME.
> 

Wow.  Thank you very much for the rewrite, Cedric.  I'm afraid I wasn't
clear, though.  All I wanted to say was: (1) _If_ zsh 5.9 is likely to
be ported to a 64-bit-type-less platform, we should add a preprocessor
check to catch that case (at either configure- or compile-time);
(2) I don't know what's the probability of the antecedent.  I didn't
mean to ask for a rewrite, not in the least — only for a quick,
three-liner «#if» block — but that's water under the bridge now.
Thanks for going the extra mile :-)

Cheers,

Daniel


> (I haven't managed to avoid some code duplication, because a common
> function would require choosing between the two clocks, thus need a
> parameter of type clockid_t, which is not always defined.  So I don't
> know how to write the prototype of a common function.)
> 
> Another difference is that the timeout length is now limited to 2^30-1
> seconds (34 years) so as to avoid overflowing time_t on 32-bit systems.
> There is one case where it may overflow, but it can't be avoided: if
> time_t is 32-bit signed and CLOCK_MONOTONIC isn't available or returns
> the wall clock instead of e.g. the uptime.  Then, specifying a timeout
> beyond year 2038 won't work, but then the system has a Y2038 problem
> anyway.  I don't see how to avoid it short of checking the timeout
> against Y2038, which may actually create a Y2038 problem on systems
> that wouldn't otherwise have it.
> 
> Also, the retry interval is now limited to 0.999*LONG_MAX microseconds;
> the 0.999 avoids a rounding problem that causes an overflow on 64-bit
> systems (the value is given as a double, but a double can't represent
> 2^63-1 exactly).
> 
> I think that's all.  I believe I checked all code paths, though I don't
> know how to do it systematically.
> 
> 					Thanks, best regards,
> 					Cedric Ware.
> 
> 
> diff --git a/Doc/Zsh/mod_system.yo b/Doc/Zsh/mod_system.yo
> index 6292af071..be9a99f62 100644
> --- a/Doc/Zsh/mod_system.yo
> +++ b/Doc/Zsh/mod_system.yo
> @@ -166,7 +166,7 @@ to the command, or 2 for an error on the write; no error message is
>  printed in the last case, but the parameter tt(ERRNO) will reflect
>  the error that occurred.
>  )
> -xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
> +xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-i) var(interval) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
>  item(tt(zsystem flock -u) var(fd_expr))(
>  The builtin tt(zsystem)'s subcommand tt(flock) performs advisory file
>  locking (via the manref(fcntl)(2) system call) over the entire contents
> @@ -196,9 +196,16 @@ a safety check that the file descriptor is in use for file locking.
>  
>  By default the shell waits indefinitely for the lock to succeed.
>  The option tt(-t) var(timeout) specifies a timeout for the lock in
> -seconds; currently this must be an integer.  The shell will attempt
> -to lock the file once a second during this period.  If the attempt
> -times out, status 2 is returned.
> +seconds; fractional seconds are allowed.  During this period, the
> +shell will attempt to lock the file every var(interval) seconds
> +if the tt(-i) var(interval) option is given, otherwise once a second.
> +(This var(interval) is shortened before the last attempt if needed,
> +so that the shell waits only until the var(timeout) and not longer.)
> +If the attempt times out, status 2 is returned.
> +
> +(Note: var(timeout) must be less than 2^30-1 seconds (about 34 years);
> +and var(interval) must be less than 0.999 * LONG_MAX microseconds
> +(only about 35 minutes on 32-bit systems).)
>  
>  If the option tt(-e) is given, the file descriptor for the lock is
>  preserved when the shell uses tt(exec) to start a new process;
> diff --git a/Src/Modules/system.c b/Src/Modules/system.c
> index fb3d80773..a60c18f5d 100644
> --- a/Src/Modules/system.c
> +++ b/Src/Modules/system.c
> @@ -29,6 +29,7 @@
>  
>  #include "system.mdh"
>  #include "system.pro"
> +#include <math.h>
>  
>  #ifdef HAVE_POLL_H
>  # include <poll.h>
> @@ -531,7 +532,9 @@ static int
>  bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
>  {
>      int cloexec = 1, unlock = 0, readlock = 0;
> -    zlong timeout = -1;
> +    double timeout = -1;
> +    long timeout_interval = 1e6;
> +    mnumber timeout_param;
>      char *fdvar = NULL;
>  #ifdef HAVE_FCNTL_H
>      struct flock lck;
> @@ -583,7 +586,51 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
>  		} else {  
>  		    optarg = *args++;
>  		}  
> -		timeout = mathevali(optarg);
> +		timeout_param = matheval(optarg);
> +		timeout = (timeout_param.type & MN_FLOAT)?
> +		    timeout_param.u.d : (double)timeout_param.u.l;
> +
> +		/*
> +		 * timeout must not overflow time_t, but little is known
> +		 * about this type's limits.  Conservatively limit to 2^30-1
> +		 * (34 years).  Then it can only overflow if time_t is only
> +		 * a 32-bit int and CLOCK_MONOTONIC is not supported, in which
> +		 * case there is a Y2038 problem anyway.
> +		 */
> +		if ((timeout < 1e-6) || (timeout > 1073741823.)) {
> +		    zwarnnam(nam, "flock: invalid timeout value: '%s'",
> +			     optarg);
> +		    return 1;
> +		}
> +		break;
> +
> +	    case 'i':
> +		/* retry interval in seconds */
> +		if (optptr[1]) {
> +		    optarg = optptr + 1;
> +		    optptr += strlen(optarg) - 1;
> +		} else if (!*args) {
> +		    zwarnnam(nam,
> +			     "flock: option %c requires "
> +			     "a numeric retry interval",
> +			     opt);
> +		    return 1;
> +		} else {
> +		    optarg = *args++;
> +		}
> +		timeout_param = matheval(optarg);
> +		if (!(timeout_param.type & MN_FLOAT)) {
> +		    timeout_param.type = MN_FLOAT;
> +		    timeout_param.u.d = (double)timeout_param.u.l;
> +		}
> +		timeout_param.u.d *= 1e6;
> +		if ((timeout_param.u.d < 1)
> +		    || (timeout_param.u.d > 0.999 * LONG_MAX)) {
> +		    zwarnnam(nam, "flock: invalid interval value: '%s'",
> +			     optarg);
> +		    return 1;
> +		}
> +		timeout_interval = (long)timeout_param.u.d;
>  		break;
>  
>  	    case 'u':
> @@ -647,7 +694,24 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
>      lck.l_len = 0;  /* lock the whole file */
>  
>      if (timeout > 0) {
> -	time_t end = time(NULL) + (time_t)timeout;
> +	/*
> +	 * Get current time, calculate timeout time.
> +	 * No need to check for overflow, already checked above.
> +	 */
> +	struct timespec now, end;
> +	double timeout_s;
> +	long remaining_us;
> +	zgettime_monotonic_if_available(&now);
> +	end.tv_sec = now.tv_sec;
> +	end.tv_nsec = now.tv_nsec;
> +	end.tv_nsec += modf(timeout, &timeout_s) * 1000000000L;
> +	end.tv_sec += timeout_s;
> +	if (end.tv_nsec >= 1000000000L) {
> +	    end.tv_nsec -= 1000000000L;
> +	    end.tv_sec += 1;
> +	}
> +
> +	/* Try acquiring lock, loop until timeout. */
>  	while (fcntl(flock_fd, F_SETLK, &lck) < 0) {
>  	    if (errflag) {
>                  zclose(flock_fd);
> @@ -658,11 +722,16 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
>  		zwarnnam(nam, "failed to lock file %s: %e", args[0], errno);
>  		return 1;
>  	    }  
> -	    if (time(NULL) >= end) {
> +	    zgettime_monotonic_if_available(&now);
> +	    remaining_us = timespec_diff_us(&now, &end);
> +	    if (remaining_us <= 0) {
>                  zclose(flock_fd);
>  		return 2;
>              }  
> -	    sleep(1);
> +	    if (remaining_us <= timeout_interval) {
> +		timeout_interval = remaining_us;
> +	    }
> +	    zsleep(timeout_interval);
>  	}
>      } else {  
>  	while (fcntl(flock_fd, timeout == 0 ? F_SETLK : F_SETLKW, &lck) < 0) {
> diff --git a/Src/compat.c b/Src/compat.c
> index 74e426fba..817bb4aaf 100644
> --- a/Src/compat.c
> +++ b/Src/compat.c
> @@ -126,6 +126,32 @@ zgettime(struct timespec *ts)
>      return ret;
>  }  
>  
> +/* Likewise with CLOCK_MONOTONIC if available. */
> +
> +/**/
> +mod_export int
> +zgettime_monotonic_if_available(struct timespec *ts)
> +{
> +    int ret = -1;
> +
> +#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
> +    struct timespec dts;
> +    if (clock_gettime(CLOCK_MONOTONIC, &dts) < 0) {
> +	zwarn("unable to retrieve CLOCK_MONOTONIC time: %e", errno);
> +	ret--;
> +    } else {
> +	ret++;
> +	ts->tv_sec = (time_t) dts.tv_sec;
> +	ts->tv_nsec = (long) dts.tv_nsec;
> +    }
> +#endif
> +
> +    if (ret) {
> +	ret = zgettime(ts);
> +    }
> +    return ret;
> +}  
> +
>  
>  /* compute the difference between two calendar times */
>  
> diff --git a/Src/utils.c b/Src/utils.c
> index 69885fed3..db5626830 100644
> --- a/Src/utils.c
> +++ b/Src/utils.c
> @@ -2748,6 +2748,42 @@ read_poll(int fd, int *readchar, int polltty, zlong microseconds)
>      return (ret > 0);
>  }  
>  
> +/*
> + * Return the difference between 2 times, given as struct timespec*,
> + * expressed in microseconds, as a long.  If the difference doesn't fit
> + * into a long, return LONG_MIN or LONG_MAX so that the times can still
> + * be compared.
> + *
> + * Note: returns a long rather than a zlong because zsleep() below
> + * takes a long.
> + */
> +
> +/**/
> +long
> +timespec_diff_us(const struct timespec *t1, const struct timespec *t2)
> +{
> +    int reverse = (t1->tv_sec > t2->tv_sec);
> +    time_t diff_sec;
> +    long diff_usec, max_margin, res;
> +
> +    /* Don't just subtract t2-t1 because time_t might be unsigned. */
> +    diff_sec = (reverse? t1->tv_sec - t2->tv_sec : t2->tv_sec - t1->tv_sec);
> +    if (diff_sec > LONG_MAX / 1000000L) {
> +	goto overflow;
> +    }
> +    res = diff_sec * 1000000L;
> +    max_margin = LONG_MAX - res;
> +    diff_usec = (reverse?
> +		 t1->tv_nsec - t2->tv_nsec : t2->tv_nsec - t1->tv_nsec
> +		 ) / 1000;
> +    if (diff_usec <= max_margin) {
> +	res += diff_usec;
> +	return (reverse? -res : res);
> +    }
> + overflow:
> +    return (reverse? LONG_MIN : LONG_MAX);
> +}  
> +
>  /*
>   * Sleep for the given number of microseconds --- must be within
>   * range of a long at the moment, but this is only used for
> diff --git a/Test/V14system.ztst b/Test/V14system.ztst
> index e69de29bb..79a2c0a77 100644
> --- a/Test/V14system.ztst
> +++ b/Test/V14system.ztst
> @@ -0,0 +1,150 @@
> +# Test zsh/system module
> +
> +%prep
> +
> +  if zmodload -s zsh/system && zmodload -s zsh/zselect; then
> +    tst_dir=V13.tmp
> +    mkdir -p -- $tst_dir
> +  else
> +    ZTST_unimplemented='the zsh/system and zsh/zselect modules are not available'
> +  fi
> +  : > $tst_dir/file # File on which to acquire flock.
> +
> +%test
> +
> +  (
> +    zsystem flock -t 0.1 -i 0.000001 $tst_dir/file
> +  )
> +0:zsystem flock valid time arguments
> +
> +  (
> +    zsystem flock -t -1         $tst_dir/file ||
> +    zsystem flock -t 0.49e-6    $tst_dir/file ||
> +    zsystem flock -t 1073741824 $tst_dir/file ||
> +    zsystem flock -t 1e100      $tst_dir/file ||
> +    zsystem flock -i -1         $tst_dir/file ||
> +    zsystem flock -i 0.49e-6    $tst_dir/file ||
> +    zsystem flock -i 1e100      $tst_dir/file
> +  )
> +1:zsystem flock invalid time arguments
> +?(eval):zsystem:2: flock: invalid timeout value: '-1'
> +?(eval):zsystem:3: flock: invalid timeout value: '0.49e-6'
> +?(eval):zsystem:4: flock: invalid timeout value: '1073741824'
> +?(eval):zsystem:5: flock: invalid timeout value: '1e100'
> +?(eval):zsystem:6: flock: invalid interval value: '-1'
> +?(eval):zsystem:7: flock: invalid interval value: '0.49e-6'
> +?(eval):zsystem:8: flock: invalid interval value: '1e100'
> +
> +  (
> +    # Lock file for 1 second in the background.
> +    lock_flag=$tst_dir/locked1
> +    (zsystem flock $tst_dir/file \
> +     && touch $lock_flag \
> +     && zselect -t 100
> +     mv $lock_flag $lock_flag.done) &
> +    # Wait until sub-shell above has started.
> +    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
> +      zselect -t 1
> +    done
> +    if [[ -f $lock_flag.done ]]; then
> +      echo "Background shell should not have completed already." 1>&2
> +    else
> +      # Attempt to lock file with 0.5 second timeout: must fail.
> +      zsystem flock -t 0.5 $tst_dir/file
> +    fi
> +  )
> +2:zsystem flock unsuccessful wait test
> +F:This timing test might fail due to process scheduling issues unrelated to zsh.
> +
> +  (
> +    # Lock file for 0.5 second in the background.
> +    lock_flag=$tst_dir/locked2
> +    (zsystem flock $tst_dir/file \
> +      && touch $lock_flag \
> +      && zselect -t 50
> +      mv $lock_flag $lock_flag.done) &
> +    # Wait until sub-shell above has started.
> +    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
> +      zselect -t 1
> +    done
> +    if [[ -f $lock_flag.done ]]; then
> +      echo "Background shell should not have completed already." 1>&2
> +    fi
> +    typeset -F SECONDS
> +    start=$SECONDS
> +    # Attempt to lock file without a timeout:
> +    # must succeed after sub-shell above releases it (0.5 second).
> +    if zsystem flock $tst_dir/file; then
> +      elapsed=$[ $SECONDS - $start ]
> +      if [[ $elapsed -ge 0.3 && $elapsed -le 0.7 ]]; then
> +        echo "elapsed time seems OK" 1>&2
> +      else
> +        echo "elapsed time $elapsed should be ~ 0.5 second" 1>&2
> +      fi
> +    fi
> +  )
> +0:zsystem flock successful wait test, no timeout
> +?elapsed time seems OK
> +F:This timing test might fail due to process scheduling issues unrelated to zsh.
> +
> +  (
> +    # Lock file for 0.5 second in the background.
> +    lock_flag=$tst_dir/locked3
> +    (zsystem flock $tst_dir/file \
> +      && touch $lock_flag \
> +      && zselect -t 50
> +      mv $lock_flag $lock_flag.done) &
> +    # Wait until sub-shell above has started.
> +    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
> +      zselect -t 1
> +    done
> +    if [[ -f $lock_flag.done ]]; then
> +      echo "Background shell should not have completed already." 1>&2
> +    fi
> +    typeset -F SECONDS
> +    start=$SECONDS
> +    # Attempt to lock file with 1-second timeout:
> +    # must succeed 1 second after start because we retry every 1 second.
> +    if zsystem flock -t 1 $tst_dir/file; then
> +      elapsed=$[ $SECONDS - $start ]
> +      if [[ $elapsed -ge 0.8 && $elapsed -le 1.2 ]]; then
> +        echo "elapsed time seems OK" 1>&2
> +      else
> +        echo "elapsed time $elapsed should be ~ 1 second" 1>&2
> +      fi
> +    fi
> +  )
> +0:zsystem flock successful wait test, integral seconds
> +?elapsed time seems OK
> +F:This timing test might fail due to process scheduling issues unrelated to zsh.
> +
> +  (
> +    # Lock file for 0.25 second in the background.
> +    lock_flag=$tst_dir/locked4
> +    (zsystem flock $tst_dir/file \
> +      && touch $lock_flag \
> +      && zselect -t 25
> +      mv $lock_flag $lock_flag.done) &
> +    # Wait until sub-shell above has started.
> +    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
> +      zselect -t 1
> +    done
> +    if [[ -f $lock_flag.done ]]; then
> +      echo "Background shell should not have completed already." 1>&2
> +    fi
> +    typeset -F SECONDS
> +    start=$SECONDS
> +    # Attempt to lock file with 0.4-second timeout, retrying every 0.1 second:
> +    # must succeed 0.3 second after start.
> +    if zsystem flock -t 0.4 -i 0.1 $tst_dir/file; then
> +      elapsed=$[ $SECONDS - $start ]
> +      if [[ $elapsed -ge 0.2 && $elapsed -le 0.5 ]]; then
> +        echo "elapsed time seems OK" 1>&2
> +      else
> +        echo "elapsed time $elapsed should be ~ 0.3 second" 1>&2
> +      fi
> +    fi
> +  )
> +0:zsystem flock successful wait test, fractional seconds
> +?elapsed time seems OK
> +F:This timing test might fail due to process scheduling issues unrelated to zsh.


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-04-14 11:47                               ` Daniel Shahaf
@ 2020-04-14 20:21                                 ` Cedric Ware
  2020-04-15  1:15                                   ` Daniel Shahaf
  0 siblings, 1 reply; 33+ messages in thread
From: Cedric Ware @ 2020-04-14 20:21 UTC (permalink / raw)
  To: Daniel Shahaf; +Cc: zsh-workers


Daniel Shahaf (Tuesday 2020-04-14):
> Wow.  Thank you very much for the rewrite, Cedric.  I'm afraid I wasn't
> clear, though.  All I wanted to say was: (1) _If_ zsh 5.9 is likely to
> be ported to a 64-bit-type-less platform, we should add a preprocessor
> check to catch that case (at either configure- or compile-time);
> (2) I don't know what's the probability of the antecedent.  I didn't
> mean to ask for a rewrite, not in the least — only for a quick,
> three-liner «#if» block

I didn't mean to imply otherwise.  That's what I wanted to do at first,
but it was more than a single #if block, then I got confused between
the blocks, then I started having other ideas, then... well, now, here
we are.

> — but that's water under the bridge now.
> Thanks for going the extra mile :-)

You're welcome.  Hopefully this patch can now be integrated, unless
there are new issues?

					Best,
					Cedric Ware.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-04-14 20:21                                 ` Cedric Ware
@ 2020-04-15  1:15                                   ` Daniel Shahaf
  2020-04-15  2:05                                     ` dana
  0 siblings, 1 reply; 33+ messages in thread
From: Daniel Shahaf @ 2020-04-15  1:15 UTC (permalink / raw)
  To: Cedric Ware; +Cc: zsh-workers

Cedric Ware wrote on Tue, 14 Apr 2020 22:21 +0200:
> Hopefully this patch can now be integrated, unless there are new
> issues?

The issues I've raised before have all been addressed (other than the
new nits below); however, I haven't done a line-by-line review.

dana, would you happen to have time to review the Cedric's latest
revision (workers/45690)?  No problem if not; just asking since you
reviewed earlier revisions.

Cedric Ware wrote on Mon, 13 Apr 2020 23:34 +0200:
> +++ b/Doc/Zsh/mod_system.yo
> @@ -196,9 +196,16 @@ item(tt(zsystem flock -u) var(fd_expr))(
> +(Note: var(timeout) must be less than 2^30-1 seconds (about 34 years);
> +and var(interval) must be less than 0.999 * LONG_MAX microseconds
> +(only about 35 minutes on 32-bit systems).)

The semicolon feels out of place.  (Might be just me, though.)

> +++ b/Test/V14system.ztst
> @@ -0,0 +1,150 @@
> +# Test zsh/system module
> +
> +%prep
> +
> +  if zmodload -s zsh/system && zmodload -s zsh/zselect; then
> +    tst_dir=V13.tmp

s/V13/V14/

>

Cheers,

Daniel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-04-15  1:15                                   ` Daniel Shahaf
@ 2020-04-15  2:05                                     ` dana
  2020-04-16  4:24                                       ` Daniel Shahaf
  0 siblings, 1 reply; 33+ messages in thread
From: dana @ 2020-04-15  2:05 UTC (permalink / raw)
  To: Daniel Shahaf; +Cc: Cedric Ware, zsh-workers

On 14 Apr 2020, at 20:15, Daniel Shahaf <d.s@daniel.shahaf.name> wrote:
> dana, would you happen to have time to review the Cedric's latest
> revision (workers/45690)?

I'm not sure i fully understand the implications of the 32-vs-64-bit stuff,
nor whether it matters enough to go to all the extra trouble. But aside from
that and the two bits you mentioned (agreed on the semicolon), the only thing
i noticed is that there are several instances where the formatting of the new
C code doesn't match its surroundings, e.g. unnecessary brackets in
((foo < bar) || (baz > qux)) and inconsistent white space in foo? bar : baz

Otherwise i think it makes sense, as far as i understand it, and the tests
pass on my machine. I don't see any functional issues

dana


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-04-15  2:05                                     ` dana
@ 2020-04-16  4:24                                       ` Daniel Shahaf
  2020-04-18 16:32                                         ` Cedric Ware
  0 siblings, 1 reply; 33+ messages in thread
From: Daniel Shahaf @ 2020-04-16  4:24 UTC (permalink / raw)
  To: dana; +Cc: Cedric Ware, zsh-workers

dana wrote on Tue, 14 Apr 2020 21:05 -0500:
> On 14 Apr 2020, at 20:15, Daniel Shahaf <d.s@daniel.shahaf.name> wrote:
> > dana, would you happen to have time to review the Cedric's latest
> > revision (workers/45690)?  
> 
> I'm not sure i fully understand the implications of the 32-vs-64-bit stuff,
> nor whether it matters enough to go to all the extra trouble.

It's a theory v. practice issue.

In theory, zsh is written in C89, and C89 doesn't guarantee a 64-bit
integer type.

In practice, C89 is 31 years old; C99 does guarantee a 64-bit type; and
we don't positively know of anyone porting zsh to a platform that
doesn't have a 64-bit type; so the concern may well be academic, or
nearly so.

> But aside from that and the two bits you mentioned (agreed on the
> semicolon), the only thing i noticed is that there are several
> instances where the formatting of the new C code doesn't match its
> surroundings, e.g. unnecessary brackets in ((foo < bar) || (baz > qux))

I don't mind these.  They add clarity and don't hurt readability.

> and inconsistent white space in foo? bar : baz

Agreed.  (I noticed them too, but didn't want to nitpick from the peanut
gallery.)

In other news, how about using real superscripts:

--- a/Doc/Zsh/mod_system.yo
+++ b/Doc/Zsh/mod_system.yo
@@ -203,7 +203,7 @@ if the tt(-i) var(interval) option is given, otherwise once a second.
 so that the shell waits only until the var(timeout) and not longer.)
 If the attempt times out, status 2 is returned.

-(Note: var(timeout) must be less than 2^30-1 seconds (about 34 years);
+(Note: var(timeout) must be less than ifzman(2^30-1)ifnzman(2NOTRANS(@sup{30})-1) seconds (about 34 years)
 and var(interval) must be less than 0.999 * LONG_MAX microseconds
 (only about 35 minutes on 32-bit systems).)

(To see the difference, «make -C Doc html pdf».)

> Otherwise i think it makes sense, as far as i understand it, and the tests
> pass on my machine. I don't see any functional issues

If you think the feature makes sense and the implementation is robust,
why not merge it (once the 64-bit question is settled)?  Between you and
Cedric there have already been two pairs of eyes on the code, after all.

Cheers,

Daniel

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-04-16  4:24                                       ` Daniel Shahaf
@ 2020-04-18 16:32                                         ` Cedric Ware
  2020-04-20 17:28                                           ` dana
  0 siblings, 1 reply; 33+ messages in thread
From: Cedric Ware @ 2020-04-18 16:32 UTC (permalink / raw)
  To: Daniel Shahaf; +Cc: dana, zsh-workers


Daniel Shahaf (Thursday 2020-04-16):
> In practice, C89 is 31 years old; C99 does guarantee a 64-bit type; and
> we don't positively know of anyone porting zsh to a platform that
> doesn't have a 64-bit type; so the concern may well be academic, or
> nearly so.

There's also the issue of maintainability: the new code, using
struct timespec, is slightly more complicated than the previous one
that used a hopefully-64-bit zlong.  And there could be a problem with
time_t on systems that have a Y2038 problem.

Here's a new patch, still using struct timespec, I believe I took into
account all the code formatting and documentation issues.

> If you think the feature makes sense and the implementation is robust,
> why not merge it (once the 64-bit question is settled)?  Between you and
> Cedric there have already been two pairs of eyes on the code, after all.

I'm all for it, of course. ;-)  You decide whether you want the zlong
one or the struct timespec one.  I believe the latter can be integrated
as-is using the patch below.  For the zlong one, I may have to go back
and fix one issue remaining if LONG_MAX were greater than ZLONG_MAX.

					Best,
					Cedric Ware.


diff --git a/Doc/Zsh/mod_system.yo b/Doc/Zsh/mod_system.yo
index 6292af071..06ea87918 100644
--- a/Doc/Zsh/mod_system.yo
+++ b/Doc/Zsh/mod_system.yo
@@ -166,7 +166,7 @@ to the command, or 2 for an error on the write; no error message is
 printed in the last case, but the parameter tt(ERRNO) will reflect
 the error that occurred.
 )
-xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
+xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-i) var(interval) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
 item(tt(zsystem flock -u) var(fd_expr))(
 The builtin tt(zsystem)'s subcommand tt(flock) performs advisory file
 locking (via the manref(fcntl)(2) system call) over the entire contents
@@ -196,9 +196,17 @@ a safety check that the file descriptor is in use for file locking.
 
 By default the shell waits indefinitely for the lock to succeed.
 The option tt(-t) var(timeout) specifies a timeout for the lock in
-seconds; currently this must be an integer.  The shell will attempt
-to lock the file once a second during this period.  If the attempt
-times out, status 2 is returned.
+seconds; fractional seconds are allowed.  During this period, the
+shell will attempt to lock the file every var(interval) seconds
+if the tt(-i) var(interval) option is given, otherwise once a second.
+(This var(interval) is shortened before the last attempt if needed,
+so that the shell waits only until the var(timeout) and not longer.)
+If the attempt times out, status 2 is returned.
+
+(Note: var(timeout) must be less than
+ifzman(2^30-1)ifnzman(2NOTRANS(@sup{30})-1) seconds (about 34 years),
+and var(interval) must be less than 0.999 * LONG_MAX microseconds
+(only about 35 minutes on 32-bit systems).)
 
 If the option tt(-e) is given, the file descriptor for the lock is
 preserved when the shell uses tt(exec) to start a new process;
diff --git a/Src/Modules/system.c b/Src/Modules/system.c
index fb3d80773..972aa0767 100644
--- a/Src/Modules/system.c
+++ b/Src/Modules/system.c
@@ -29,6 +29,7 @@
 
 #include "system.mdh"
 #include "system.pro"
+#include <math.h>
 
 #ifdef HAVE_POLL_H
 # include <poll.h>
@@ -531,7 +532,9 @@ static int
 bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
 {
     int cloexec = 1, unlock = 0, readlock = 0;
-    zlong timeout = -1;
+    double timeout = -1;
+    long timeout_interval = 1e6;
+    mnumber timeout_param;
     char *fdvar = NULL;
 #ifdef HAVE_FCNTL_H
     struct flock lck;
@@ -583,7 +586,51 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
 		} else {
 		    optarg = *args++;
 		}
-		timeout = mathevali(optarg);
+		timeout_param = matheval(optarg);
+		timeout = (timeout_param.type & MN_FLOAT) ?
+		    timeout_param.u.d : (double)timeout_param.u.l;
+
+		/*
+		 * timeout must not overflow time_t, but little is known
+		 * about this type's limits.  Conservatively limit to 2^30-1
+		 * (34 years).  Then it can only overflow if time_t is only
+		 * a 32-bit int and CLOCK_MONOTONIC is not supported, in which
+		 * case there is a Y2038 problem anyway.
+		 */
+		if (timeout < 1e-6 || timeout > 1073741823.) {
+		    zwarnnam(nam, "flock: invalid timeout value: '%s'",
+			     optarg);
+		    return 1;
+		}
+		break;
+
+	    case 'i':
+		/* retry interval in seconds */
+		if (optptr[1]) {
+		    optarg = optptr + 1;
+		    optptr += strlen(optarg) - 1;
+		} else if (!*args) {
+		    zwarnnam(nam,
+			     "flock: option %c requires "
+			     "a numeric retry interval",
+			     opt);
+		    return 1;
+		} else {
+		    optarg = *args++;
+		}
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout_param.u.d *= 1e6;
+		if (timeout_param.u.d < 1
+		    || timeout_param.u.d > 0.999 * LONG_MAX) {
+		    zwarnnam(nam, "flock: invalid interval value: '%s'",
+			     optarg);
+		    return 1;
+		}
+		timeout_interval = (long)timeout_param.u.d;
 		break;
 
 	    case 'u':
@@ -647,7 +694,24 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
     lck.l_len = 0;  /* lock the whole file */
 
     if (timeout > 0) {
-	time_t end = time(NULL) + (time_t)timeout;
+	/*
+	 * Get current time, calculate timeout time.
+	 * No need to check for overflow, already checked above.
+	 */
+	struct timespec now, end;
+	double timeout_s;
+	long remaining_us;
+	zgettime_monotonic_if_available(&now);
+	end.tv_sec = now.tv_sec;
+	end.tv_nsec = now.tv_nsec;
+	end.tv_nsec += modf(timeout, &timeout_s) * 1000000000L;
+	end.tv_sec += timeout_s;
+	if (end.tv_nsec >= 1000000000L) {
+	    end.tv_nsec -= 1000000000L;
+	    end.tv_sec += 1;
+	}
+
+	/* Try acquiring lock, loop until timeout. */
 	while (fcntl(flock_fd, F_SETLK, &lck) < 0) {
 	    if (errflag) {
                 zclose(flock_fd);
@@ -658,11 +722,16 @@ bin_zsystem_flock(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
 		zwarnnam(nam, "failed to lock file %s: %e", args[0], errno);
 		return 1;
 	    }
-	    if (time(NULL) >= end) {
+	    zgettime_monotonic_if_available(&now);
+	    remaining_us = timespec_diff_us(&now, &end);
+	    if (remaining_us <= 0) {
                 zclose(flock_fd);
 		return 2;
             }
-	    sleep(1);
+	    if (remaining_us <= timeout_interval) {
+		timeout_interval = remaining_us;
+	    }
+	    zsleep(timeout_interval);
 	}
     } else {
 	while (fcntl(flock_fd, timeout == 0 ? F_SETLK : F_SETLKW, &lck) < 0) {
diff --git a/Src/compat.c b/Src/compat.c
index 74e426fba..817bb4aaf 100644
--- a/Src/compat.c
+++ b/Src/compat.c
@@ -126,6 +126,32 @@ zgettime(struct timespec *ts)
     return ret;
 }
 
+/* Likewise with CLOCK_MONOTONIC if available. */
+
+/**/
+mod_export int
+zgettime_monotonic_if_available(struct timespec *ts)
+{
+    int ret = -1;
+
+#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+    struct timespec dts;
+    if (clock_gettime(CLOCK_MONOTONIC, &dts) < 0) {
+	zwarn("unable to retrieve CLOCK_MONOTONIC time: %e", errno);
+	ret--;
+    } else {
+	ret++;
+	ts->tv_sec = (time_t) dts.tv_sec;
+	ts->tv_nsec = (long) dts.tv_nsec;
+    }
+#endif
+
+    if (ret) {
+	ret = zgettime(ts);
+    }
+    return ret;
+}
+
 
 /* compute the difference between two calendar times */
 
diff --git a/Src/utils.c b/Src/utils.c
index 69885fed3..e258ef836 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -2748,6 +2748,42 @@ read_poll(int fd, int *readchar, int polltty, zlong microseconds)
     return (ret > 0);
 }
 
+/*
+ * Return the difference between 2 times, given as struct timespec*,
+ * expressed in microseconds, as a long.  If the difference doesn't fit
+ * into a long, return LONG_MIN or LONG_MAX so that the times can still
+ * be compared.
+ *
+ * Note: returns a long rather than a zlong because zsleep() below
+ * takes a long.
+ */
+
+/**/
+long
+timespec_diff_us(const struct timespec *t1, const struct timespec *t2)
+{
+    int reverse = (t1->tv_sec > t2->tv_sec);
+    time_t diff_sec;
+    long diff_usec, max_margin, res;
+
+    /* Don't just subtract t2-t1 because time_t might be unsigned. */
+    diff_sec = (reverse ? t1->tv_sec - t2->tv_sec : t2->tv_sec - t1->tv_sec);
+    if (diff_sec > LONG_MAX / 1000000L) {
+	goto overflow;
+    }
+    res = diff_sec * 1000000L;
+    max_margin = LONG_MAX - res;
+    diff_usec = (reverse ?
+		 t1->tv_nsec - t2->tv_nsec : t2->tv_nsec - t1->tv_nsec
+		 ) / 1000;
+    if (diff_usec <= max_margin) {
+	res += diff_usec;
+	return (reverse ? -res : res);
+    }
+ overflow:
+    return (reverse ? LONG_MIN : LONG_MAX);
+}
+
 /*
  * Sleep for the given number of microseconds --- must be within
  * range of a long at the moment, but this is only used for
diff --git a/Test/V14system.ztst b/Test/V14system.ztst
index e69de29bb..b8af96cda 100644
--- a/Test/V14system.ztst
+++ b/Test/V14system.ztst
@@ -0,0 +1,150 @@
+# Test zsh/system module
+
+%prep
+
+  if zmodload -s zsh/system && zmodload -s zsh/zselect; then
+    tst_dir=V14.tmp
+    mkdir -p -- $tst_dir
+  else
+    ZTST_unimplemented='the zsh/system and zsh/zselect modules are not available'
+  fi
+  : > $tst_dir/file # File on which to acquire flock.
+
+%test
+
+  (
+    zsystem flock -t 0.1 -i 0.000001 $tst_dir/file
+  )
+0:zsystem flock valid time arguments
+
+  (
+    zsystem flock -t -1         $tst_dir/file ||
+    zsystem flock -t 0.49e-6    $tst_dir/file ||
+    zsystem flock -t 1073741824 $tst_dir/file ||
+    zsystem flock -t 1e100      $tst_dir/file ||
+    zsystem flock -i -1         $tst_dir/file ||
+    zsystem flock -i 0.49e-6    $tst_dir/file ||
+    zsystem flock -i 1e100      $tst_dir/file
+  )
+1:zsystem flock invalid time arguments
+?(eval):zsystem:2: flock: invalid timeout value: '-1'
+?(eval):zsystem:3: flock: invalid timeout value: '0.49e-6'
+?(eval):zsystem:4: flock: invalid timeout value: '1073741824'
+?(eval):zsystem:5: flock: invalid timeout value: '1e100'
+?(eval):zsystem:6: flock: invalid interval value: '-1'
+?(eval):zsystem:7: flock: invalid interval value: '0.49e-6'
+?(eval):zsystem:8: flock: invalid interval value: '1e100'
+
+  (
+    # Lock file for 1 second in the background.
+    lock_flag=$tst_dir/locked1
+    (zsystem flock $tst_dir/file \
+     && touch $lock_flag \
+     && zselect -t 100
+     mv $lock_flag $lock_flag.done) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
+      zselect -t 1
+    done
+    if [[ -f $lock_flag.done ]]; then
+      echo "Background shell should not have completed already." 1>&2
+    else
+      # Attempt to lock file with 0.5 second timeout: must fail.
+      zsystem flock -t 0.5 $tst_dir/file
+    fi
+  )
+2:zsystem flock unsuccessful wait test
+F:This timing test might fail due to process scheduling issues unrelated to zsh.
+
+  (
+    # Lock file for 0.5 second in the background.
+    lock_flag=$tst_dir/locked2
+    (zsystem flock $tst_dir/file \
+      && touch $lock_flag \
+      && zselect -t 50
+      mv $lock_flag $lock_flag.done) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
+      zselect -t 1
+    done
+    if [[ -f $lock_flag.done ]]; then
+      echo "Background shell should not have completed already." 1>&2
+    fi
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file without a timeout:
+    # must succeed after sub-shell above releases it (0.5 second).
+    if zsystem flock $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.3 && $elapsed -le 0.7 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 0.5 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, no timeout
+?elapsed time seems OK
+F:This timing test might fail due to process scheduling issues unrelated to zsh.
+
+  (
+    # Lock file for 0.5 second in the background.
+    lock_flag=$tst_dir/locked3
+    (zsystem flock $tst_dir/file \
+      && touch $lock_flag \
+      && zselect -t 50
+      mv $lock_flag $lock_flag.done) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
+      zselect -t 1
+    done
+    if [[ -f $lock_flag.done ]]; then
+      echo "Background shell should not have completed already." 1>&2
+    fi
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file with 1-second timeout:
+    # must succeed 1 second after start because we retry every 1 second.
+    if zsystem flock -t 1 $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.8 && $elapsed -le 1.2 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 1 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, integral seconds
+?elapsed time seems OK
+F:This timing test might fail due to process scheduling issues unrelated to zsh.
+
+  (
+    # Lock file for 0.25 second in the background.
+    lock_flag=$tst_dir/locked4
+    (zsystem flock $tst_dir/file \
+      && touch $lock_flag \
+      && zselect -t 25
+      mv $lock_flag $lock_flag.done) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $lock_flag || -f $lock_flag.done ]]; do
+      zselect -t 1
+    done
+    if [[ -f $lock_flag.done ]]; then
+      echo "Background shell should not have completed already." 1>&2
+    fi
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file with 0.4-second timeout, retrying every 0.1 second:
+    # must succeed 0.3 second after start.
+    if zsystem flock -t 0.4 -i 0.1 $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.2 && $elapsed -le 0.5 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 0.3 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, fractional seconds
+?elapsed time seems OK
+F:This timing test might fail due to process scheduling issues unrelated to zsh.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-04-18 16:32                                         ` Cedric Ware
@ 2020-04-20 17:28                                           ` dana
  2020-04-20 22:17                                             ` Cedric Ware
  0 siblings, 1 reply; 33+ messages in thread
From: dana @ 2020-04-20 17:28 UTC (permalink / raw)
  To: Cedric Ware; +Cc: Daniel Shahaf, Zsh hackers list

On 18 Apr 2020, at 11:32, Cedric Ware <cedric.ware__bml@normalesup.org> wrote:
> I'm all for it, of course. ;-)  You decide whether you want the zlong
> one or the struct timespec one.

I'm not sure i'm qualified to have a strong opinion on the 32/64 thing, but it
sounds like both options have their merits, and nobody else has indicated a
preference, so i've merged the most recent patch. We can change it later if we
want.

I did make two small modifications:

* The documentation said that the time-out/interval values had to be *less
  than* 2^30-1 / 0.999*LONG_MAX, but as far as i can see from the code it's
  actually *less than or equal to*, so i changed the wording slightly. Please
  correct me if there's any mistake here

* I reverted the superscript formatting change that Daniel suggested. It looks
  like @sup was added to texinfo only in 2014, and Apple's makeinfo in
  particular does not support it. I was able to get Homebrew's texinfo package
  to work, but it's keg-only (not linked into PATH due to conflicts with other
  software), and i'm nervous about unilaterally breaking the Doc build with
  the standard tooling on macOS and possibly other platforms just to format
  one number. Not suggesting we limit ourselves to Apple's GPL2 tooling until
  the end of time, but i suppose we should at least discuss it first

dana


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Enable sub-second timeout in zsystem flock
  2020-04-20 17:28                                           ` dana
@ 2020-04-20 22:17                                             ` Cedric Ware
  0 siblings, 0 replies; 33+ messages in thread
From: Cedric Ware @ 2020-04-20 22:17 UTC (permalink / raw)
  To: dana; +Cc: Daniel Shahaf, Zsh hackers list


dana (Monday 2020-04-20):
> so i've merged the most recent patch. We can change it later if we
> want.

Thanks!

> * The documentation said that the time-out/interval values had to be *less
>   than* 2^30-1 / 0.999*LONG_MAX, but as far as i can see from the code it's
>   actually *less than or equal to*, so i changed the wording slightly. Please
>   correct me if there's any mistake here

No problem, it's accurate.

					Best,
					Cedric Ware.

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2020-04-20 22:18 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-29 20:35 [PATCH] Enable sub-second timeout in zsystem flock Cedric Ware
2019-07-29 22:25 ` Bart Schaefer
2020-01-04 18:47   ` Cedric Ware
2020-01-05 18:42     ` dana
2020-01-05 21:49       ` dana
2020-01-06 17:30       ` Cedric Ware
2020-01-06 17:36         ` Peter Stephenson
2020-01-07  3:48         ` dana
2020-01-11 15:41           ` Cedric Ware
2020-01-11 19:36             ` dana
2020-01-12  4:25               ` dana
2020-03-08 18:39                 ` Cedric Ware
2020-03-12 18:46                   ` dana
2020-03-12 19:13                     ` dana
2020-03-14 21:04                     ` Cedric Ware
2020-03-15  0:50                       ` Daniel Shahaf
2020-03-15  1:04                         ` dana
2020-03-15 16:03                         ` Cedric Ware
2020-03-15 16:54                           ` Daniel Shahaf
2020-03-15 17:35                             ` Peter Stephenson
2020-03-15 18:36                             ` Cedric Ware
2020-03-15 19:13                               ` Daniel Shahaf
2020-04-13 21:34                             ` Cedric Ware
2020-04-14 11:47                               ` Daniel Shahaf
2020-04-14 20:21                                 ` Cedric Ware
2020-04-15  1:15                                   ` Daniel Shahaf
2020-04-15  2:05                                     ` dana
2020-04-16  4:24                                       ` Daniel Shahaf
2020-04-18 16:32                                         ` Cedric Ware
2020-04-20 17:28                                           ` dana
2020-04-20 22:17                                             ` Cedric Ware
2020-03-15  1:04                       ` Daniel Shahaf
2020-03-13 14:26                   ` dana

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).