zsh-workers
 help / color / mirror / code / Atom feed
* [PATCH] zsh/random module
@ 2022-11-02 17:13 Clinton Bunch
  2022-11-03 17:50 ` Bart Schaefer
  2022-11-04  3:17 ` dana
  0 siblings, 2 replies; 46+ messages in thread
From: Clinton Bunch @ 2022-11-02 17:13 UTC (permalink / raw)
  To: zsh-workers

[-- Attachment #1: Type: text/plain, Size: 366 bytes --]

This has been tested on the following platforms against the git/master 
(and 5.5.1)

Solaris 11.4 (x86) Developer Studio

FreeBSD 13.1 clang

Centos 7 gcc

Rocky 8 gcc

Rocky 9 gcc

HP-UX 11.31 HP ANSI C compiler


I'd appreciate if someone would test it on a few other platforms and try 
to break it.  I tried everything I could think of, but...

[-- Attachment #2: zsh-random.patch.txt --]
[-- Type: text/plain, Size: 20171 bytes --]

diff --git a/Doc/Makefile.in b/Doc/Makefile.in
index 23e5fc7e2..bacee7c27 100644
--- a/Doc/Makefile.in
+++ b/Doc/Makefile.in
@@ -66,7 +66,7 @@ Zsh/mod_example.yo Zsh/mod_files.yo Zsh/mod_langinfo.yo \
 Zsh/mod_mapfile.yo Zsh/mod_mathfunc.yo \
 Zsh/mod_nearcolor.yo Zsh/mod_newuser.yo \
 Zsh/mod_parameter.yo Zsh/mod_pcre.yo Zsh/mod_private.yo \
-Zsh/mod_regex.yo Zsh/mod_sched.yo Zsh/mod_socket.yo \
+Zsh/mod_regex.yo Zsh/mod_random.yo Zsh/mod_sched.yo Zsh/mod_socket.yo \
 Zsh/mod_stat.yo  Zsh/mod_system.yo Zsh/mod_tcp.yo \
 Zsh/mod_termcap.yo Zsh/mod_terminfo.yo \
 Zsh/mod_watch.yo \
diff --git a/Doc/Zsh/mod_random.yo b/Doc/Zsh/mod_random.yo
new file mode 100644
index 000000000..c18695dd2
--- /dev/null
+++ b/Doc/Zsh/mod_random.yo
@@ -0,0 +1,61 @@
+COMMENT(!MOD!zsh/random
+Some High-quality randomness commands, parameters, and functions.
+!MOD!)
+The tt(zsh/random) module gets random data from the kernel random pool. If no 
+kernel random pool can be found, the module will not load.
+
+
+subsect(Builtins)
+
+startitem()
+findex(getrandom)
+cindex(random data, printing, array)
+xitem(tt(getrandom) [ tt(-l) var(length) ] [ tt(-r) ] [ tt(-s) var(scalar) ] [tt(-i) ] [ tt(-a) var(array) ] [ tt(-U) var(upper_bound) ] [ tt(-L) var(lower_bound) ]) 
+NL()Outputs a random hex-encoded string of var(length) bytes by default. NL()
+
+
+startitem()
+item(tt(-l) var(length)) (
+Set the length of the returned random data. Defaults to 8.
+)
+item(tt(-r)) (
+Return the raw bytes rather than a hex-encoded string.
+)
+item(tt(-s) var(scalar)) (
+Save the random data in var(scalar) instead of printing it. Only useful in string mode or when var(length) is 1.
+)
+item(tt(-a) var(array)) (
+Save the random data as an array var(array) of var(length) containing a
+decimal representation of the integers LPAR()tt(-i), tt(-L), or tt(-U)RPAR() 
+or individual bytes.
+)
+item(tt(-i)) (
+Produces var(length) 32-bit unsigned integers.
+)
+item(tt(-U) var(upper_bound)) (
+Output var(length) integers between var(lower_bound) and var(upper_bound). 
+Defaults to 4,294,967,296.
+)
+item(tt(-L) var(lower_bound)) (
+Output var(length) integers between var(lower_bound) and var(upper_bound).
+Defaults to 0.
+)
+enditem()
+enditem()
+
+subsect(Parameters)
+
+startitem()
+vindex(SRANDOM)
+item(tt(SRANDOM)) (
+A random positive 32-bit integer between 0 and 4,294,967,295
+)
+enditem()
+
+subsect(Math Functions)
+
+startitem()
+item(tt(zrandom+LPAR()RPAR())) (
+Returns a random floating point number between 0 and 1.
+)
+enditem()
diff --git a/Src/Modules/random.c b/Src/Modules/random.c
new file mode 100644
index 000000000..2c18711ff
--- /dev/null
+++ b/Src/Modules/random.c
@@ -0,0 +1,659 @@
+/*
+ * random.c - module to access kernel random sources.
+ *
+ * This file is part of zsh, the Z shell.
+ *
+ * Copyright (c) 2022 Clinton Bunch
+ * All rights reserved.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and to distribute modified versions of this software for any
+ * purpose, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ *
+ * In no event shall Clinton Bunch or the Zsh Development Group be liable
+ * to any party for direct, indirect, special, incidental, or consequential
+ * damages arising out of the use of this software and its documentation,
+ * even if Clinton Bunch and the Zsh Development Group have been advised of
+ * the possibility of such damage.
+ *
+ * Clinton Bunch and the Zsh Development Group specifically disclaim any
+ * warranties, including, but not limited to, the implied warranties of
+ * merchantability and fitness for a particular purpose.  The software
+ * provided hereunder is on an "as is" basis, and Clinton Bunch and the
+ * Zsh Development Group have no obligation to provide maintenance,
+ * support, updates, enhancements, or modifications.
+ *
+ */
+
+#include "random.mdh"
+#include "random.pro"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include <stdbool.h>
+
+#ifdef HAVE_SYS_RANDOM_H
+#include <sys/random.h>
+#endif
+
+/* Simplify select URANDOM specific code */
+#if !defined(HAVE_ARC4RANDOM) && !defined(HAVE_GETRANDOM)
+#define USE_URANDOM
+#endif
+
+/* buffer to pre-load integers for SRANDOM to lessen the context switches */
+uint32_t rand_buff[8];
+static int buf_cnt = -1;
+
+#ifdef USE_URANDOM
+/* File descriptor for /dev/urandom */
+int randfd = -1;
+#endif /* USE_URANDOM */
+
+/*
+ *  Function takes an input buffer and converts the characters to hex and
+ *  places them in an outbut buffer twice the size. Returns -1 if it can't.
+ *  Returns the size of the output otherwise.
+ */
+
+/**/
+static int
+ztr_to_hex(char *out, size_t outlen, uint8_t *input, size_t inlen)
+{
+    int i = 0, j = 0;
+
+    if (outlen < (inlen * 2 + 1))
+	return -1;
+
+    for(i = 0, j = 0; i < inlen; i++, j+=2) {
+	if (j < outlen) {
+	    sprintf((char *)(out + j),"%02X",input[i]);
+	} else {
+	    return -1;
+	}
+    }
+    out[j]='\0';
+    return j;
+}
+
+/**/
+static ssize_t
+getrandom_buffer(void *buf, size_t len)
+{
+    ssize_t ret;
+    size_t  val    = 0;
+    uint8_t *bufptr = buf;
+
+    do {
+#ifdef HAVE_ARC4RANDOM
+	/* Apparently, the creaters of arc4random didn't believe in errors */
+        arc4random_buf(buf,len);
+        ret = len;
+#elif defined(HAVE_GETRANDOM)
+        ret=getrandom(bufptr,(len - val),0);
+#else
+        ret=read(randfd,bufptr,(len - val));
+#endif
+        if (ret < 0) {
+	    if (errno != EINTR || errflag || retflag || breaks || contflag) {
+		zwarn("Unable to get random data: %e", errno);
+	        return -1;
+	    }
+        }
+	bufptr += ret;
+	val    += ret;
+    } while (ret < len);
+    return ret;
+}
+
+/*
+ * Implements the getrandom builtin to return a string of random bytes of
+ * user-specified length.  It either prints them to stdout or returns them
+ * in a parameter passed on the command line.
+ *
+ */
+
+/**/
+static int
+bin_getrandom(char *name, char **argv, Options ops, int func)
+{
+
+    zulong len      = 8;
+    size_t byte_len = 0, outlen; /* buffer lengths */
+    int    pad      = 0;
+
+    bool integer_out = false;         /* boolean */
+    int i, j;                       /*loop counters */
+
+    /*maximum string length of a 32-bit integer + null */
+    char int2str[11];
+
+    uint8_t   *buf;
+    uint32_t  *int_buf = NULL, int_val = 0; /* buffer for integer return */
+
+    char *scalar = NULL, *arrname = NULL; /* parameter names */
+
+    char *outbuff;  /* hex string or metafy'd output */
+    char **array = NULL;   /* array value for -a */
+
+    /* Vailues for -U *nd -L */
+    zulong   upper = UINT32_MAX, lower = 0, diff = UINT32_MAX;
+    uint32_t min   = 0, rej = 0;  /* Reject below min and count */
+    bool     bound = false; /* boolean to set if upper or lower bound set */
+    /* End of variable declarations */
+
+
+    /* store out put in a scalar variable */
+    if (OPT_ISSET(ops, 's')) {
+	scalar = OPT_ARG(ops, 's');
+	if (!isident(scalar)) {
+	    zwarnnam(name, "argument to -s not an identifier: %s", scalar);
+	    return 1;
+	}
+    }
+
+    /* create array of descimal numbers */
+    if (OPT_ISSET(ops,'a')) {
+	arrname = OPT_ARG(ops, 'a');
+	if (!isident(arrname)) {
+	    zwarnnam(name,"argument to -a not an identifier: %s", arrname);
+	    return 1;
+	}
+    }
+
+    /* specify number of random bytes or integers to output */
+    if (OPT_ISSET(ops, 'l')) {
+
+	if (zstrtoul_underscore(OPT_ARG(ops, 'l'), &len)) {
+	    if (len > 64 || len <= 0) {
+	        zwarnnam(name,
+			"length must be between 1 and 64 you specified: %d",
+		        len);
+	        return 1;
+	    }
+	} else {
+	    zwarnnam(name, "Couldn't convert -l %s to a number",
+		    OPT_ARG(ops, 'l'));
+	    return 1;
+	}
+    }
+
+    if (OPT_ISSET(ops, 'U')) {
+	if (!zstrtoul_underscore(OPT_ARG(ops, 'U'), &upper)) {
+	    zwarnnam(name, "Couldn't convert -U %s to a number",
+		    OPT_ARG(ops, 'U'));
+	    return 1;
+        } else {
+	    bound = true;
+	}
+    }
+
+    if (OPT_ISSET(ops, 'L')) {
+	if (!zstrtoul_underscore(OPT_ARG(ops, 'L'), &lower)) {
+	    zwarnnam(name, "Couldn't convert -L %s to a number",
+		    OPT_ARG(ops, 'L'));
+	    return 1;
+	} else {
+	    bound = true;
+	}
+    }
+
+    /* integer rather than byte output to array */
+    if (OPT_ISSET(ops, 'i')) {
+	if (OPT_ISSET(ops,'s') && len > 1) {
+	    zwarnnam(name,"-i only makes sense with -s when 1ength is 1");
+	    return 1;
+	}
+	integer_out = true;
+    }
+
+    if (bound) {
+        if (scalar && len > 1) {
+            zwarnnam(name,"Bounds only make sense with -s when length is 1");
+	    return 1;
+        }
+
+	if (lower > upper) {
+	    zwarnnam(name,"-U must be larger than -L.");
+	    return 1;
+	}
+
+        diff = upper - lower;
+
+	if(!diff) {
+	    zwarnnam(name,"Upper and Lower bounds cannot be the same.");
+	    return 1;
+	}
+    }
+
+
+    if (!byte_len)
+	byte_len = len;
+
+
+    if (bound) {
+	pad = len / 16 + 1;
+        byte_len = (len + pad) * sizeof(uint32_t);
+    }
+    if (integer_out)
+	byte_len = len * sizeof(uint32_t);
+
+    if (byte_len > 256)
+	byte_len=256;
+
+    buf = zalloc(byte_len);
+
+    if (getrandom_buffer(buf, byte_len) < 0) {
+	zwarnnam(name,"Couldn't get random data");
+	return 1;
+    }
+
+    /* Raw output or hex string */
+    if (!integer_out && !bound && !arrname) {
+        if (OPT_ISSET(ops, 'r')) {
+	    outbuff = (char *) buf;
+	    outlen  = len;
+        } else {
+	    outlen=len*2;
+	    outbuff = (char*) zalloc(outlen+1);
+	    ztr_to_hex(outbuff, outlen+1, buf, len);
+        }
+
+        if (scalar) {
+	    setsparam(scalar, metafy(outbuff, outlen, META_DUP));
+        } else {
+            fwrite(outbuff,1,outlen,stdout);
+	    if (!OPT_ISSET(ops, 'r'))
+		printf("\n");
+	}
+        if (outbuff !=  (void *) buf) {
+	    zfree(outbuff,outlen+1);
+	}
+	return 0;
+    } else {
+	if (OPT_ISSET(ops, 'r')) {
+	    zwarnnam(name, "-r can't be specified with bounds or -i or -a");
+	    return 1;
+	}
+    }
+
+    if(arrname) {
+	array = (char **) zalloc((len+1)*sizeof(char *));
+	array[len] = NULL;
+    }
+
+    if(integer_out || bound)
+	int_buf=(uint32_t *)buf;
+
+    min = -diff % diff;
+
+    j = 0; rej = 0;
+    for(i = 0; i < len; i++) {
+	if(integer_out) {
+	    int_val = int_buf[i];
+	} else if (!bound) {
+	    int_val=buf[i];
+	} else {
+	    while (int_buf[j] < min ) { /*Reject */
+		j++; rej++;
+		if (j * sizeof(uint32_t) >= byte_len ){
+		    if (getrandom_buffer(buf, byte_len) < 0) {
+			zwarnnam(name,"Can't get enough random data");
+			return 1;
+		    }
+		    j = 0;
+		}
+	    }
+	    int_val = int_buf[j++] % diff + lower;
+	}
+	sprintf(int2str, "%u", int_val);
+	if (arrname) {
+	    array[i]=ztrdup(int2str);
+	} else if (scalar && len == 1) {
+	    setiparam(scalar, int_val);
+	} else {
+	    printf("%s ",int2str);
+	}
+    }
+    if (arrname) {
+	setaparam(arrname,array);
+    } else if (!scalar) {
+	printf("\n");
+    }
+
+
+    zfree(buf, byte_len);
+    return 0;
+}
+
+
+/*
+ * Provides for the SRANDOM parameter and returns  a unisgned 32-bit random
+ * integer.
+ */
+
+/**/
+static zlong
+get_srandom(UNUSED(Param pm)) {
+
+    if(buf_cnt < 0) {
+	getrandom_buffer((void*) rand_buff,sizeof(rand_buff));
+	buf_cnt=7;
+    }
+    return rand_buff[buf_cnt--];
+}
+
+/*
+ * Implements  the mathfunc zrandom and returns a random floating-point
+ * number between 0 and 1.  Does this by getting a random 32-bt integer
+ * and dividing it by UINT32_MAX.
+ *
+ */
+
+/**/
+static mnumber
+math_zrandom(UNUSED(char *name), UNUSED(int argc), UNUSED(mnumber *argv),
+	     UNUSED(int id))
+{
+    mnumber ret;
+    double r;
+
+    r = random_real();
+    if (r < 0) {
+	zwarnnam(name,"Random Data Failed");
+    }
+    ret.type = MN_FLOAT;
+    ret.u.d = r;
+
+    return ret;
+}
+
+static struct builtin bintab[] = {
+    BUILTIN("getrandom", 0, bin_getrandom, 0, 8, 0, "ria:s:l:%U:%L:%", NULL),
+};
+
+static const struct gsu_integer srandom_gsu =
+{ get_srandom, nullintsetfn, stdunsetfn };
+
+static struct paramdef patab[] = {
+    SPECIALPMDEF("SRANDOM", PM_INTEGER|PM_READONLY, &srandom_gsu,NULL,NULL),
+};
+
+static struct mathfunc mftab[] = {
+    NUMMATHFUNC("zrandom", math_zrandom, 0, 0, 0),
+};
+
+static struct features module_features = {
+    bintab, sizeof(bintab)/sizeof(*bintab),
+    NULL, 0,
+    mftab, sizeof(mftab)/sizeof(*mftab),
+    patab, sizeof(patab)/sizeof(*patab),
+    0
+};
+
+/**/
+int
+setup_(UNUSED(Module m))
+{
+#ifdef USE_URANDOM
+    /* Check for the existence of /dev/urnadom */
+
+    struct stat st;
+
+    if (lstat("/dev/urandom",&st) < 0) {
+	zwarn("No kernel random pool found.");
+	return 1;
+    }
+
+    if (!(S_ISCHR(st.st_mode)) ) {
+	zwarn("No kernel random pool found.");
+	return 1;
+    }
+#endif /* USE_URANDOM */
+    return 0;
+}
+
+/**/
+int
+features_(Module m, char ***features)
+{
+    *features = featuresarray(m, &module_features);
+    return 0;
+}
+
+/**/
+int
+enables_(Module m, int **enables)
+{
+    return handlefeatures(m, &module_features, enables);
+}
+
+/**/
+int
+boot_(Module m)
+{
+#ifdef USE_URANDOM
+    /*
+     * Open /dev/urandom.  Here because of a weird issue on HP-UX 11.31
+     * When opening in setup_ open returned 0.  In boot_, it returns
+     * an unused file descriptor. Decided against ifdef HPUX as it works
+     * here just as well for other platforms.
+     *
+     */
+
+    int tmpfd=-1;
+
+    if ((tmpfd = open("/dev/urandom", O_RDONLY)) < 0) {
+	zwarn("Could not access kernel random pool");
+	return 1;
+    }
+    randfd = movefd(tmpfd);
+    addmodulefd(randfd,FDT_MODULE);
+    if (randfd < 0) {
+	zwarn("Could not access kernel random pool");
+	return 1;
+    }
+#endif /* USE_URANDOM */
+    return 0;
+}
+
+/**/
+int
+cleanup_(Module m)
+{
+    return setfeatureenables(m, &module_features, NULL);
+}
+
+/**/
+int
+finish_(UNUSED(Module m))
+{
+#ifdef USE_URANDOM
+    if (randfd >= 0)
+	zclose(randfd);
+#endif /* USE_URANDOM */
+    return 0;
+}
+
+
+/* Count the number of leading zeros, hopefully in gcc/clang by HW
+ * instruction */
+#if defined(__GNUC__) || defined(__clang__)
+#define clz64(x) __builtin_clzll(x)
+#else
+#define clz64(x) _zclz64(x)
+
+/**/
+int
+_zclz64(uint64_t x) {
+    int n = 0;
+
+    if (x == 0)
+	return 64;
+
+    if (!(x & 0xFFFFFFFF00000000)) {
+	n+=32;
+	x<<=32;
+    }
+    if (!(x & 0xFFFF000000000000)) {
+	n+=16;
+	x<<=16;
+    }
+    if (!(x & 0xFF00000000000000)) {
+	n+=8;
+	x<<=8;
+    }
+    if (!(x & 0xF000000000000000)) {
+	n+=4;
+	x<<=4;
+    }
+    if (!(x & 0xC000000000000000)) {
+	n+=2;
+	x<<=1;
+    }
+    if (!(x & 0x8000000000000000)) {
+	n+=1;
+    }
+    return n;
+}
+#endif /* __GNU_C__ or __clang__ */
+
+/**/
+uint64_t
+random64(void) {
+    uint64_t r;
+
+    if(getrandom_buffer(&r,sizeof(r)) < 0) {
+	zwarn("zsh/random: Can't get sufficient random data");
+	return 1; /* 0 will cause loop */
+    }
+
+    return r;
+}
+
+/* Following code is under the below copyright, despite changes for error
+ * handling and removing GCCisms */
+
+/*-
+ * Copyright (c) 2014 Taylor R. Campbell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Uniform random floats: How to generate a double-precision
+ * floating-point numbers in [0, 1] uniformly at random given a uniform
+ * random source of bits.
+ *
+ * See <http://mumble.net/~campbell/2014/04/28/uniform-random-float>
+ * for explanation.
+ *
+ * Updated 2015-02-22 to replace ldexp(x, <constant>) by x * ldexp(1,
+ * <constant>), since glibc and NetBSD libm both have slow software
+ * bit-twiddling implementations of ldexp, but GCC can constant-fold
+ * the latter.
+ */
+
+#include <math.h>
+#include <stdint.h>
+
+/*
+ * random_real: Generate a stream of bits uniformly at random and
+ * interpret it as the fractional part of the binary expansion of a
+ * number in [0, 1], 0.00001010011111010100...; then round it.
+ */
+
+/**/
+double
+random_real(void)
+{
+	int exponent = 0;
+	uint64_t significand = 0;
+	uint64_t r = 0;
+	unsigned shift;
+
+	/*
+	 * Read zeros into the exponent until we hit a one; the rest
+	 * will go into the significand.
+	 */
+	while (significand == 0) {
+		exponent -= 64;
+
+		/* Get random64 and check for error */
+		errno = 0;
+		significand = random64();
+		if (errno)
+		    return -1;
+		/*
+		 * If the exponent falls below -1074 = emin + 1 - p,
+		 * the exponent of the smallest subnormal, we are
+		 * guaranteed the result will be rounded to zero.  This
+		 * case is so unlikely it will happen in realistic
+		 * terms only if random64 is broken.
+		 */
+		if (exponent < -1074)
+			return 0;
+	}
+
+	/*
+	 * There is a 1 somewhere in significand, not necessarily in
+	 * the most significant position.  If there are leading zeros,
+	 * shift them into the exponent and refill the less-significant
+	 * bits of the significand.  Can't predict one way or another
+	 * whether there are leading zeros: there's a fifty-fifty
+	 * chance, if random64 is uniformly distributed.
+	 */
+	shift = clz64(significand);
+	if (shift != 0) {
+
+		errno = 0;
+		r = random64();
+		if (errno)
+		    return -1;
+
+		exponent -= shift;
+		significand <<= shift;
+		significand |= (r >> (64 - shift));
+	}
+
+	/*
+	 * Set the sticky bit, since there is almost surely another 1
+	 * in the bit stream.  Otherwise, we might round what looks
+	 * like a tie to even when, almost surely, were we to look
+	 * further in the bit stream, there would be a 1 breaking the
+	 * tie.
+	 */
+	significand |= 1;
+
+	/*
+	 * Finally, convert to double (rounding) and scale by
+	 * 2^exponent.
+	 */
+	return ldexp((double)significand, exponent);
+}
diff --git a/Src/Modules/random.mdd b/Src/Modules/random.mdd
new file mode 100644
index 000000000..3803a4533
--- /dev/null
+++ b/Src/Modules/random.mdd
@@ -0,0 +1,7 @@
+name=zsh/random
+link=either
+load=yes
+
+autofeatures="b:getrandom p:SRANDOM f:zrandom"
+
+objects="random.o"
diff --git a/configure.ac b/configure.ac
index 074141d38..f1fa01274 100644
--- a/configure.ac
+++ b/configure.ac
@@ -675,6 +675,7 @@ fi
 AC_CHECK_HEADERS(sys/time.h sys/times.h sys/select.h termcap.h termio.h \
 		 termios.h sys/param.h sys/filio.h string.h memory.h \
 		 limits.h fcntl.h libc.h sys/utsname.h sys/resource.h \
+                 sys/random.h \
 		 locale.h errno.h stdio.h stdarg.h varargs.h stdlib.h \
 		 unistd.h sys/capability.h \
 		 utmp.h utmpx.h sys/types.h pwd.h grp.h poll.h sys/mman.h \
@@ -1337,6 +1338,7 @@ AC_CHECK_FUNCS(strftime strptime mktime timelocal \
 	       cygwin_conv_path \
 	       nanosleep \
 	       srand_deterministic \
+               getrandom arc4random \
 	       setutxent getutxent endutxent getutent)
 AC_FUNC_STRCOLL
 

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module
  2022-11-02 17:13 [PATCH] zsh/random module Clinton Bunch
@ 2022-11-03 17:50 ` Bart Schaefer
  2022-11-04  3:17 ` dana
  1 sibling, 0 replies; 46+ messages in thread
From: Bart Schaefer @ 2022-11-03 17:50 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

On Wed, Nov 2, 2022 at 10:14 AM Clinton Bunch <cdb_zsh@zentaur.org> wrote:
>
> I'd appreciate if someone would test it on a few other platforms and try
> to break it.  I tried everything I could think of, but...

Seems to work on MacOS Catalina.  If you add a Test/ file for "make
check" we'll get more coverage.

Documentation suggestions:

    -s SCALAR
          Save the random data in SCALAR instead of printing it. Only
          useful in string mode or when LENGTH is 1.

Actually say that this is an error for -i with length > 1, rather than
just "Only useful when".  Avoids use of the undefined term "string
mode" and better describes the behavior.  I wonder if using both -i
and -s should default length to 1 rather than 8 ... otherwise you
should state that you can't use them together without also using -l.

I'd have put -L before -U but I suppose you expect -U to be used more
frequently.


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module
  2022-11-02 17:13 [PATCH] zsh/random module Clinton Bunch
  2022-11-03 17:50 ` Bart Schaefer
@ 2022-11-04  3:17 ` dana
  2022-11-04  6:22   ` Clinton Bunch
  1 sibling, 1 reply; 46+ messages in thread
From: dana @ 2022-11-04  3:17 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: Zsh hackers list

On Wed 2 Nov 2022, at 12:13, Clinton Bunch wrote:
> I'd appreciate if someone would test it on a few other platforms and try 
> to break it.  I tried everything I could think of, but...

I like this idea, i've wanted to do something like it for a long time. It
seems to work OK for me on Catalina as well

Here's my feedback. Sorry if any of this came up in previous discussions. I
also didn't look super closely at the code, just skimmed it for high-level
stuff, so idk about the security, portability, &c.


API/behaviour:

The default length behaviour with -i is weird to me. I get that there's a
certain internal consistency to it, but i feel like 95% of the time if you
want a random integer you only want one, right?

Would it be less confusing to refer to it as 'count' (-c) or 'number' (-n)
instead of length? Maybe it'd be easier to understand that it means byte count
(not string length) in the default mode and element count in integer mode? I
guess programming languages often overload 'length' to mean different things
like that, though, and people get by OK with it

  zwarnnam(name, "argument to -s not an identifier: %s", scalar);
  zwarnnam(name,"argument to -a not an identifier: %s", arrname);

Seems like it could just create these for you? I'm not aware of any other
built-ins that take a parameter name that treat non-existence of the parameter
as an error (except for like typeset). See for example printf and sysread

  zwarnnam(name, "-r can't be specified with bounds or -i or -a");

I can't think of any reason -r couldn't work with bounds or -a. The latter in
particular seems useful, though easy enough to achieve with parameter
expansions i guess. Making it support bounds would let you do things like only
return random ASCII letters

Tests and a completion function would be useful


Documentation:

  +Set the length of the returned random data. Defaults to 8.

I feel like it's not immediately clear what 'length' means, due to how it
behaves with -i. I wasn't sure i was understanding until i just tried it for
myself. Maybe add something like '(in bytes or, with -i, number of integer
elements)'?

I also agree with Bart that it makes sense to put -L before -U. Makes sense to
alphabetise option lists in general imo, though this wouldn't be the first
built-in in the manual that has them out of order

And i think all of the descriptions should be either indicative ('does x') or
imperative ('do x'), not a mix of the two. Though again this wouldn't be the
first to mix them


Typos, white space, consistency issues, &c.:

Some typos i noticed:

  +Some High-quality randomness commands, parameters, and functions.
  *  places them in an outbut buffer twice the size. Returns -1 if it can't.
  /* Vailues for -U *nd -L */
  "length must be between 1 and 64 you specified: %d",
  * Provides for the SRANDOM parameter and returns  a unisgned 32-bit random
  /* Check for the existence of /dev/urnadom */
  zwarnnam(name,"-i only makes sense with -s when 1ength is 1");
  zwarnnam(name,"Upper and Lower bounds cannot be the same.");

git complains about trailing white-space on lines 23, 32, 48, 55

Lots of random double-spaces all over

Inconsistent white space around operators, commas, e.g.:

  zwarnnam(name,"Couldn't get random data");
but then
  zwarnnam(name, "-r can't be specified with bounds or -i or -a");

Inconsistent indentation in several places, usually spaces where there should
be tabs (Ctrl+F eight spaces)

Weird error message, not sure what it means for data to fail:

   zwarnnam(name,"Random Data Failed");

Some error messages use sentence case, some don't


dana


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module
  2022-11-04  3:17 ` dana
@ 2022-11-04  6:22   ` Clinton Bunch
  2022-11-04  7:27     ` dana
  0 siblings, 1 reply; 46+ messages in thread
From: Clinton Bunch @ 2022-11-04  6:22 UTC (permalink / raw)
  To: zsh-workers

On 11/3/2022 10:17 PM, dana wrote:
> On Wed 2 Nov 2022, at 12:13, Clinton Bunch wrote:
>> I'd appreciate if someone would test it on a few other platforms and try
>> to break it.  I tried everything I could think of, but...
> I like this idea, i've wanted to do something like it for a long time. It
> seems to work OK for me on Catalina as well
>
> Here's my feedback. Sorry if any of this came up in previous discussions. I
> also didn't look super closely at the code, just skimmed it for high-level
> stuff, so idk about the security, portability, &c.
>
>
> API/behaviour:
>
> The default length behaviour with -i is weird to me. I get that there's a
> certain internal consistency to it, but i feel like 95% of the time if you
> want a random integer you only want one, right?
Then why would you use the builtin in preference to the parameter SRANDOM?
>
> Would it be less confusing to refer to it as 'count' (-c) or 'number' (-n)
> instead of length? Maybe it'd be easier to understand that it means byte count
> (not string length) in the default mode and element count in integer mode? I
> guess programming languages often overload 'length' to mean different things
> like that, though, and people get by OK with it
I could see count instead of length, since length is more commonly used 
with the string mode.
>
>    zwarnnam(name, "argument to -s not an identifier: %s", scalar);
>    zwarnnam(name,"argument to -a not an identifier: %s", arrname);
>
> Seems like it could just create these for you? I'm not aware of any other
> built-ins that take a parameter name that treat non-existence of the parameter
> as an error (except for like typeset). See for example printf and sysread
Copied that pattern straight out of Src/Modules/datetime.c
>
>    zwarnnam(name, "-r can't be specified with bounds or -i or -a");
>
> I can't think of any reason -r couldn't work with bounds or -a. The latter in
> particular seems useful, though easy enough to achieve with parameter
> expansions i guess. Making it support bounds would let you do things like only
> return random ASCII letters
raw mode isn't something I would think of with processing like bounds.  
It would also restrict the bounds to 0-256.  I would think the -a with 
no -i mode would be more useful for picking out specific characters out 
of a string param of desired characters rather than using the raw ascii 
codes.  I saw raw mode more as passing binary data to a pipe or file.
>
> Tests and a completion function would be useful
Trying to think how to design tests when the output is different every 
time by design.  And I can't think what a completion function would 
complete.  It's not like it's using long options or enumerated arguments.
>
>
> Documentation:
>
>    +Set the length of the returned random data. Defaults to 8.
>
> I feel like it's not immediately clear what 'length' means, due to how it
> behaves with -i. I wasn't sure i was understanding until i just tried it for
> myself. Maybe add something like '(in bytes or, with -i, number of integer
> elements)'?
>
> I also agree with Bart that it makes sense to put -L before -U. Makes sense to
> alphabetise option lists in general imo, though this wouldn't be the first
> built-in in the manual that has them out of order
>
> And i think all of the descriptions should be either indicative ('does x') or
> imperative ('do x'), not a mix of the two. Though again this wouldn't be the
> first to mix them
>
>
> Typos, white space, consistency issues, &c.:
>
> Some typos i noticed:
>
>    +Some High-quality randomness commands, parameters, and functions.
>    *  places them in an outbut buffer twice the size. Returns -1 if it can't.
>    /* Vailues for -U *nd -L */
>    "length must be between 1 and 64 you specified: %d",
>    * Provides for the SRANDOM parameter and returns  a unisgned 32-bit random
>    /* Check for the existence of /dev/urnadom */
>    zwarnnam(name,"-i only makes sense with -s when 1ength is 1");
>    zwarnnam(name,"Upper and Lower bounds cannot be the same.");
>
> git complains about trailing white-space on lines 23, 32, 48, 55
I thought I got all the ones in code.  I wasn't sure enough about YODL 
to remove them in that file.
>
> Lots of random double-spaces all over
>
> Inconsistent white space around operators, commas, e.g.:
My normal coding style rarely uses unnecessary white space to save line 
width.  It's not surprising I reverted to form a few times and didn't 
catch it on review.
>
>    zwarnnam(name,"Couldn't get random data");
> but then
>    zwarnnam(name, "-r can't be specified with bounds or -i or -a");
>
> Inconsistent indentation in several places, usually spaces where there should
> be tabs (Ctrl+F eight spaces)
I used editconfig, but when I had to re-indent the line I typed spaces.  
It still seems weird that the dev guide specifies mixing the two.
>
> Weird error message, not sure what it means for data to fail:
>
>     zwarnnam(name,"Random Data Failed");
>
> Some error messages use sentence case, some don't
>
>
> dana
>



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module
  2022-11-04  6:22   ` Clinton Bunch
@ 2022-11-04  7:27     ` dana
  2022-11-04 12:57       ` Clinton Bunch
  0 siblings, 1 reply; 46+ messages in thread
From: dana @ 2022-11-04  7:27 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: Zsh hackers list

On Fri 4 Nov 2022, at 01:22, Clinton Bunch wrote:
> Then why would you use the builtin in preference to the parameter SRANDOM?

I guess the main reason would be for the bounds functionality

On Fri 4 Nov 2022, at 01:22, Clinton Bunch wrote:
> Copied that pattern straight out of Src/Modules/datetime.c

TIL. I also lied about sysread, it does the same. print/printf, zparseopts,
and zstyle don't. Can't think of anything else off the top of my head rn

On Fri 4 Nov 2022, at 01:22, Clinton Bunch wrote:
> Trying to think how to design tests when the output is different every
> time by design.

If nothing else it could just make sure it's in the expected format

On Fri 4 Nov 2022, at 01:22, Clinton Bunch wrote:
> And I can't think what a completion function would
> complete.  It's not like it's using long options or enumerated arguments.

Most of zsh's built-ins have no long options but they still have completion
functions. Some people use completion as a substitute for the documentation.
Attached _getrandom for your consideration (assumes no further changes, tested
very minimally)

btw, in writing that function i realised a few things:

* In the documentation, i think the default upper bound should be 4294967295
  rather than 4294967296

* Why is the maximum length 64? Also, should that value be documented?

* If you put -i after -L/-U it overrides their effect. Maybe the
  `if (integer_out)` in the code should be an `else if` instead?

* It appears that -L is inclusive but -U is exclusive. e.g. if you do
  `getrandom -l1 -L2 -U3` it will only ever return 2. I assume that's not
  intentional?

On Fri 4 Nov 2022, at 01:22, Clinton Bunch wrote:
> It still seems weird that the dev guide specifies mixing the two.

I agree

dana


diff --git a/Completion/Zsh/Command/_getrandom b/Completion/Zsh/Command/_getrandom
new file mode 100644
index 000000000..3513e10b7
--- /dev/null
+++ b/Completion/Zsh/Command/_getrandom
@@ -0,0 +1,12 @@
+#compdef getrandom
+
+local min=0 max=$(( 2 ** 32 - 1 ))
+
+_arguments -s -S : \
+  '(-r -s)-a+[assign result to specified array parameter]:array parameter:_parameters -g "*array*~*readonly*"' \
+  '(-a)-s+[assign result to specified scalar parameter]:scalar parameter:_parameters -g "*(integer|scalar)*~*readonly*"' \
+  '(-r)-i[produce random data as 32-bit unsigned integers]' \
+  '-l+[specify length of data]: :_numbers -d8 -l1 -m64 -u "bytes or integer elements" "data length"' \
+  '(-i -L -U)-r[produce random data as raw bytes]' \
+  '(-r)-L+[specify integer lower bound (implies -i)]: :_numbers -d$min -l$min -m$max "integer lower bound"' \
+  '(-r)-U+[specify integer upper bound (implies -i)]: :_numbers -d$max -l$min -m$max "integer upper bound"'


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module
  2022-11-04  7:27     ` dana
@ 2022-11-04 12:57       ` Clinton Bunch
  2022-11-08  0:18         ` [PATCH] zsh/random module [UPDATED] Clinton Bunch
  0 siblings, 1 reply; 46+ messages in thread
From: Clinton Bunch @ 2022-11-04 12:57 UTC (permalink / raw)
  To: zsh-workers

On 11/4/2022 2:27 AM, dana wrote:
> On Fri 4 Nov 2022, at 01:22, Clinton Bunch wrote:
>> Then why would you use the builtin in preference to the parameter SRANDOM?
> I guess the main reason would be for the bounds functionality
I suppose that's true.  It may be worth looking at making the default 1 
except in raw and hex-string mode, though that would be harder to 
explain in the documentation, I think.
>
> On Fri 4 Nov 2022, at 01:22, Clinton Bunch wrote:
>> Copied that pattern straight out of Src/Modules/datetime.c
> TIL. I also lied about sysread, it does the same. print/printf, zparseopts,
> and zstyle don't. Can't think of anything else off the top of my head rn
>
> On Fri 4 Nov 2022, at 01:22, Clinton Bunch wrote:
>> Trying to think how to design tests when the output is different every
>> time by design.
> If nothing else it could just make sure it's in the expected format
>
> On Fri 4 Nov 2022, at 01:22, Clinton Bunch wrote:
>> And I can't think what a completion function would
>> complete.  It's not like it's using long options or enumerated arguments.
> Most of zsh's built-ins have no long options but they still have completion
> functions. Some people use completion as a substitute for the documentation.
> Attached _getrandom for your consideration (assumes no further changes, tested
> very minimally)
>
> btw, in writing that function i realised a few things:
>
> * In the documentation, i think the default upper bound should be 4294967295
>    rather than 4294967296
Wanted to put UINT32_MAX, but didn't think Yodl would translate that :)
>
> * Why is the maximum length 64? Also, should that value be documented?
The getrandom() function itself is only guaranteed to return 256 bytes. 
64 32-bit integers.  And it should definitely be documented, I thought I 
had.
>
> * If you put -i after -L/-U it overrides their effect. Maybe the
>    `if (integer_out)` in the code should be an `else if` instead?
That definitely isn't intended behavior.  Specifying both is redundant, 
but bounds should definitely take precedence.
>
> * It appears that -L is inclusive but -U is exclusive. e.g. if you do
>    `getrandom -l1 -L2 -U3` it will only ever return 2. I assume that's not
>    intentional?
When (after I posted, of course), I realized that the algorithm I used 
from arc4random_uniform would never return the upper bound, I began to 
debate whether to try to make it inclusive, handling the pathological 
cases, or documenting it.
>
> On Fri 4 Nov 2022, at 01:22, Clinton Bunch wrote:
>> It still seems weird that the dev guide specifies mixing the two.
> I agree
>
> dana
>
>
> diff --git a/Completion/Zsh/Command/_getrandom b/Completion/Zsh/Command/_getrandom
> new file mode 100644
> index 000000000..3513e10b7
> --- /dev/null
> +++ b/Completion/Zsh/Command/_getrandom
> @@ -0,0 +1,12 @@
> +#compdef getrandom
> +
> +local min=0 max=$(( 2 ** 32 - 1 ))
> +
> +_arguments -s -S : \
> +  '(-r -s)-a+[assign result to specified array parameter]:array parameter:_parameters -g "*array*~*readonly*"' \
> +  '(-a)-s+[assign result to specified scalar parameter]:scalar parameter:_parameters -g "*(integer|scalar)*~*readonly*"' \
> +  '(-r)-i[produce random data as 32-bit unsigned integers]' \
> +  '-l+[specify length of data]: :_numbers -d8 -l1 -m64 -u "bytes or integer elements" "data length"' \
> +  '(-i -L -U)-r[produce random data as raw bytes]' \
> +  '(-r)-L+[specify integer lower bound (implies -i)]: :_numbers -d$min -l$min -m$max "integer lower bound"' \
> +  '(-r)-U+[specify integer upper bound (implies -i)]: :_numbers -d$max -l$min -m$max "integer upper bound"'
>



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-04 12:57       ` Clinton Bunch
@ 2022-11-08  0:18         ` Clinton Bunch
  2022-11-18 14:30           ` Clinton Bunch
                             ` (3 more replies)
  0 siblings, 4 replies; 46+ messages in thread
From: Clinton Bunch @ 2022-11-08  0:18 UTC (permalink / raw)
  To: zsh-workers

[-- Attachment #1: Type: text/plain, Size: 658 bytes --]

Here's the updated patch taking into account the feedback I got from 
Bart and dana.

-l length has been converted to -c count

-L is placed before -U in the documentation

Documentation specifies that -r (raw) is for binary data.

Range checking has been added to the -L and -U options

Bug where -i overrode -L or -U has been fixed.  Bounds take precedence 
even if -i is redundantly specified.

Default count has been changed to 1 if -i, -L, and/or -U has been 
specified without -a

dana's completion function has been updated with these changes and included

A test file has been created (Not sure how useful it is, but it's there)

[-- Attachment #2: zsh-random.patch --]
[-- Type: text/plain, Size: 23698 bytes --]

diff --git a/Completion/Zsh/Command/_getrandom b/Completion/Zsh/Command/_getrandom
new file mode 100644
index 000000000..d97e36918
--- /dev/null
+++ b/Completion/Zsh/Command/_getrandom
@@ -0,0 +1,14 @@
+#compdef getrandom
+
+#Thans to dana for providing this completion function
+
+local lmin=0 lmax=$(( 2 ** 32 - 2 )) umin=1 umax=$(( 2 ** 32 - 1 ))
+
+_arguments -s -S : \
+  '(-r -s)-a+[assign result to specified array parameter]:array parameter:_parameters -g "*array*~*readonly*"' \
+  '(-a)-s+[assign result to specified scalar parameter]:scalar parameter:_parameters -g "*(integer|scalar)*~*readonly*"' \
+  '(-r)-i[produce random data as 32-bit unsigned integers]' \
+  '-c+[specify count of data]: :_numbers -d8 -l1 -m64 -u "bytes or integer elements" "data length"' \
+  '(-i -L -U)-r[produce random data as raw binary bytes]' \
+  '(-r)-L+[specify integer lower bound (implies -i)]: :_numbers -d$lmin -l$lmin -m$max "integer lower bound"' \
+  '(-r)-U+[specify integer upper bound (implies -i)]: :_numbers -d$umax -l$umin -m$max "integer upper bound"'
diff --git a/Doc/Makefile.in b/Doc/Makefile.in
index 23e5fc7e2..bacee7c27 100644
--- a/Doc/Makefile.in
+++ b/Doc/Makefile.in
@@ -66,7 +66,7 @@ Zsh/mod_example.yo Zsh/mod_files.yo Zsh/mod_langinfo.yo \
 Zsh/mod_mapfile.yo Zsh/mod_mathfunc.yo \
 Zsh/mod_nearcolor.yo Zsh/mod_newuser.yo \
 Zsh/mod_parameter.yo Zsh/mod_pcre.yo Zsh/mod_private.yo \
-Zsh/mod_regex.yo Zsh/mod_sched.yo Zsh/mod_socket.yo \
+Zsh/mod_regex.yo Zsh/mod_random.yo Zsh/mod_sched.yo Zsh/mod_socket.yo \
 Zsh/mod_stat.yo  Zsh/mod_system.yo Zsh/mod_tcp.yo \
 Zsh/mod_termcap.yo Zsh/mod_terminfo.yo \
 Zsh/mod_watch.yo \
diff --git a/Doc/Zsh/mod_random.yo b/Doc/Zsh/mod_random.yo
new file mode 100644
index 000000000..af255f9ce
--- /dev/null
+++ b/Doc/Zsh/mod_random.yo
@@ -0,0 +1,67 @@
+COMMENT(!MOD!zsh/random
+Some High-quality randomness commands, parameters, and functions.
+!MOD!)
+The tt(zsh/random) module gets random data from the kernel random pool. If no
+kernel random pool can be found, the module will not load.
+
+
+subsect(Builtins)
+
+startitem()
+findex(getrandom)
+cindex(random data, printing, array)
+xitem(tt(getrandom) [ tt(-c) var(count) ] [ tt(-r) ] [ tt(-s) var(scalar) ] [tt(-i) ] [ tt(-a) var(array) ] [ tt(-L) var(lower_bound) ] [ tt(-U) var(upper_bound) ])
+NL()Outputs a random hex-encoded string of var(count) bytes by default. NL()
+
+
+startitem()
+item(tt(-c) var(count)) (
+Sets the number of returned random data. Defaults to 8, unless -i, -L, or
+-U is specified without -a, in which case the default is 1. var(count) must be
+between 1 and 64.
+)
+item(tt(-r)) (
+Returns the raw binary bytes rather than a hex-encoded string.
+)
+item(tt(-s) var(scalar)) (
+Saves the random data in var(scalar) instead of printing it. It is an error to
+use this with -i, -L, or -U with a count greater than 1.
+)
+item(tt(-a) var(array)) (
+Saves the random data as an array var(array) of var(count) containing a
+decimal representation of the integers LPAR()tt(-i), tt(-L), or tt(-U)RPAR()
+or individual bytes.
+)
+item(tt(-i)) (
+Produces var(count) 32-bit unsigned integers.
+)
+item(tt(-L) var(lower_bound)) (
+Outputs var(count) integers between var(lower_bound) and var(upper_bound).
+Defaults to 0.  var(lower_bound) can not be negative and the maximum value is
+4,294,967,294.
+)
+item(tt(-U) var(upper_bound)) (
+Outputs var(count) integers between var(lower_bound) and var(upper_bound).
+Defaults to 4,294,967,295. This is the maximum value of var(upper_bound) and
+the minimum is 1.
+)
+enditem()
+enditem()
+
+subsect(Parameters)
+
+startitem()
+vindex(SRANDOM)
+item(tt(SRANDOM)) (
+A random positive 32-bit integer between 0 and 4,294,967,295.  This parameter
+is read-only.
+)
+enditem()
+
+subsect(Math Functions)
+
+startitem()
+item(tt(zrandom+LPAR()RPAR())) (
+Returns a random floating point number between 0 and 1.
+)
+enditem()
diff --git a/Src/Modules/random.c b/Src/Modules/random.c
new file mode 100644
index 000000000..9dc9a9ade
--- /dev/null
+++ b/Src/Modules/random.c
@@ -0,0 +1,675 @@
+/*
+ * random.c - module to access kernel random sources.
+ *
+ * This file is part of zsh, the Z shell.
+ *
+ * Copyright (c) 2022 Clinton Bunch
+ * All rights reserved.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and to distribute modified versions of this software for any
+ * purpose, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ *
+ * In no event shall Clinton Bunch or the Zsh Development Group be liable
+ * to any party for direct, indirect, special, incidental, or consequential
+ * damages arising out of the use of this software and its documentation,
+ * even if Clinton Bunch and the Zsh Development Group have been advised of
+ * the possibility of such damage.
+ *
+ * Clinton Bunch and the Zsh Development Group specifically disclaim any
+ * warranties, including, but not limited to, the implied warranties of
+ * merchantability and fitness for a particular purpose.  The software
+ * provided hereunder is on an "as is" basis, and Clinton Bunch and the
+ * Zsh Development Group have no obligation to provide maintenance,
+ * support, updates, enhancements, or modifications.
+ *
+ */
+
+#include "random.mdh"
+#include "random.pro"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include <stdbool.h>
+
+#ifdef HAVE_SYS_RANDOM_H
+#include <sys/random.h>
+#endif
+
+/* Simplify select URANDOM specific code */
+#if !defined(HAVE_ARC4RANDOM) && !defined(HAVE_GETRANDOM)
+#define USE_URANDOM
+#endif
+
+/* buffer to pre-load integers for SRANDOM to lessen the context switches */
+uint32_t rand_buff[8];
+static int buf_cnt = -1;
+
+#ifdef USE_URANDOM
+/* File descriptor for /dev/urandom */
+int randfd = -1;
+#endif /* USE_URANDOM */
+
+/*
+ *  Function takes an input buffer and converts the characters to hex and
+ *  places them in an outbut buffer twice the size. Returns -1 if it can't.
+ *  Returns the size of the output otherwise.
+ */
+
+/**/
+static int
+ztr_to_hex(char *out, size_t outlen, uint8_t *input, size_t inlen)
+{
+    int i = 0, j = 0;
+
+    if (outlen < (inlen * 2 + 1))
+	return -1;
+
+    for(i = 0, j = 0; i < inlen; i++, j+=2) {
+	if (j < outlen) {
+	    sprintf((char *)(out + j),"%02X",input[i]);
+	} else {
+	    return -1;
+	}
+    }
+    out[j]='\0';
+    return j;
+}
+
+/**/
+static ssize_t
+getrandom_buffer(void *buf, size_t len)
+{
+    ssize_t ret;
+    size_t  val     = 0;
+    uint8_t *bufptr = buf;
+
+    do {
+#ifdef HAVE_ARC4RANDOM
+	/* Apparently, the creaters of arc4random didn't believe in errors */
+	arc4random_buf(buf,len);
+	ret = len;
+#elif defined(HAVE_GETRANDOM)
+	ret=getrandom(bufptr,(len - val),0);
+#else
+	ret=read(randfd,bufptr,(len - val));
+#endif
+	if (ret < 0) {
+	    if (errno != EINTR || errflag || retflag || breaks || contflag) {
+		zwarn("Unable to get random data: %e.", errno);
+		return -1;
+	    }
+	}
+	bufptr += ret;
+	val    += ret;
+    } while (ret < len);
+    return ret;
+}
+
+/*
+ * Implements the getrandom builtin to return a string of random bytes of
+ * user-specified length.  It either prints them to stdout or returns them
+ * in a parameter passed on the command line.
+ *
+ */
+
+/**/
+static int
+bin_getrandom(char *name, char **argv, Options ops, int func)
+{
+
+    zulong len      = 1;
+    size_t byte_len = 0, outlen; /* buffer lengths */
+    int    pad      = 0;
+
+    bool integer_out = false;	 /*  boolean */
+    int i, j;		         /* loop counters */
+
+    /*maximum string length of a 32-bit integer + null */
+    char int2str[11];
+
+    uint8_t   *buf;
+    uint32_t  *int_buf = NULL, int_val = 0; /* buffer for integer return */
+
+    char *scalar = NULL, *arrname = NULL;   /* parameter names */
+
+    char *outbuff;         /* hex string or metafy'd output */
+    char **array = NULL;   /* array value for -a */
+
+    /* Vailues for -U *nd -L */
+    zulong   upper = UINT32_MAX, lower = 0;
+    uint32_t diff  = UINT32_MAX;
+    uint32_t min   = 0, rej = 0;  /* Reject below min and count */
+    bool     bound = false;       /* set if upper or lower bound specified */
+    /* End of variable declarations */
+
+
+    /* store out put in a scalar variable */
+    if (OPT_ISSET(ops, 's')) {
+	scalar = OPT_ARG(ops, 's');
+	if (!isident(scalar)) {
+	    zwarnnam(name, "argument to -s not an identifier: %s.", scalar);
+	    return 1;
+	}
+    }
+
+    /* create array of descimal numbers */
+    if (OPT_ISSET(ops,'a')) {
+	arrname = OPT_ARG(ops, 'a');
+	if (!isident(arrname)) {
+	    zwarnnam(name, "argument to -a not an identifier: %s.", arrname);
+	    return 1;
+	}
+    }
+
+    /* specify number of random bytes or integers to output */
+    if (OPT_ISSET(ops, 'c')) {
+
+	if (zstrtoul_underscore(OPT_ARG(ops, 'c'), &len)) {
+	    if (len > 64 || len <= 0) {
+		zwarnnam(name,
+			"Count must be between 1 and 64.  You specified: %z.",
+			(zlong) len);
+		return 1;
+	    }
+	} else {
+	    zwarnnam(name, "Couldn't convert -c %s to a number.",
+		    OPT_ARG(ops, 'c'));
+	    return 1;
+	}
+    }
+
+    if (OPT_ISSET(ops, 'U')) {
+	if (!zstrtoul_underscore(OPT_ARG(ops, 'U'), &upper)) {
+	    zwarnnam(name, "Couldn't convert -U %s to a number.",
+		    OPT_ARG(ops, 'U'));
+	    return 1;
+	} else if (upper > UINT32_MAX || upper < 1) {
+	    zwarnnam(name,
+		    "Upper bound must be between 1 and %z you specified: %z.",
+		     (zlong) UINT32_MAX, (zlong) upper);
+	    return 1;
+	} else {
+	    bound = true;
+	}
+    }
+
+    if (OPT_ISSET(ops, 'L')) {
+	if (!zstrtoul_underscore(OPT_ARG(ops, 'L'), &lower)) {
+	    zwarnnam(name, "Couldn't convert -L %s to a positive number.",
+		    OPT_ARG(ops, 'L'));
+	    return 1;
+	} else if (lower < 0 || lower > UINT32_MAX-1) {
+	    zwarnnam(name,
+		    "Lower bound must be between 0 and %z you specified: %z.",
+		    (zlong) UINT32_MAX-1, (zlong) lower);
+	    return 1;
+	} else {
+	    bound = true;
+	}
+    }
+
+    /* integer rather than byte output to array */
+    if (OPT_ISSET(ops, 'i')) {
+	if (OPT_ISSET(ops,'s') && len > 1) {
+	    zwarnnam(name, "-i only makes sense with -s when count is 1.");
+	    return 1;
+	}
+	integer_out = true;
+    }
+
+    if (bound) {
+	if (scalar && len > 1) {
+	    zwarnnam(name, "Bounds only make sense with -s when count is 1.");
+	    return 1;
+	}
+
+	if (lower > upper) {
+	    zwarnnam(name, "-U must be larger than -L.");
+	    return 1;
+	}
+
+	diff = upper == UINT32_MAX && lower == 0 ? UINT32_MAX :
+		upper - lower + 1;
+
+	if(!diff) {
+	    zwarnnam(name, "Upper and Lower bounds cannot be the same.");
+	    return 1;
+	}
+    }
+
+    if(!OPT_ISSET(ops,'c'))
+	if ((!bound && !integer_out) || arrname)
+	len = 8;
+
+
+    if (!byte_len)
+	byte_len = len;
+
+
+    if (bound) {
+	pad = len / 16 + 1;
+	byte_len = (len + pad) * sizeof(uint32_t);
+    } else if (integer_out)
+	byte_len = len * sizeof(uint32_t);
+
+    if (byte_len > 256)
+	byte_len=256;
+
+    buf = zalloc(byte_len);
+
+    if (getrandom_buffer(buf, byte_len) < 0) {
+	zwarnnam(name, "Couldn't get random data.");
+	return 1;
+    }
+
+    /* Raw output or hex string */
+    if (!integer_out && !bound && !arrname) {
+	if (OPT_ISSET(ops, 'r')) {
+	    outbuff = (char *) buf;
+	    outlen  = len;
+	} else {
+	    outlen=len*2;
+	    outbuff = (char*) zalloc(outlen+1);
+	    ztr_to_hex(outbuff, outlen+1, buf, len);
+	}
+
+	if (scalar) {
+	    setsparam(scalar, metafy(outbuff, outlen, META_DUP));
+	} else {
+	    fwrite(outbuff,1,outlen,stdout);
+	    if (!OPT_ISSET(ops, 'r'))
+		printf("\n");
+	}
+	if (outbuff !=  (void *) buf) {
+	    zfree(outbuff,outlen+1);
+	}
+	return 0;
+    } else {
+	if (OPT_ISSET(ops, 'r')) {
+	    zwarnnam(name, "-r can't be specified with bounds or -i or -a.");
+	    return 1;
+	}
+    }
+
+    if (arrname) {
+	array = (char **) zalloc((len+1)*sizeof(char *));
+	array[len] = NULL;
+    }
+
+    if (integer_out || bound)
+	int_buf=(uint32_t *)buf;
+
+    min = -diff % diff;
+
+    j = 0; rej = 0;
+    for(i = 0; i < len; i++) {
+	if (integer_out && !bound) {
+	    int_val = int_buf[i];
+	} else if (!bound) {
+	    int_val=buf[i];
+	} else {
+	    while (int_buf[j] < min ) { /*Reject */
+		j++; rej++;
+		if (j * sizeof(uint32_t) >= byte_len ){
+		    if (getrandom_buffer(buf, byte_len) < 0) {
+			zwarnnam(name, "Can't get enough random data.");
+			return 1;
+		    }
+		    j = 0;
+		}
+	    }
+	    int_val = int_buf[j++] % diff + lower;
+	}
+	sprintf(int2str, "%u", int_val);
+	if (arrname) {
+	    array[i]=ztrdup(int2str);
+	} else if (scalar && len == 1) {
+	    setiparam(scalar, int_val);
+	} else {
+	    printf("%s ",int2str);
+	}
+    }
+    if (arrname) {
+	setaparam(arrname,array);
+    } else if (!scalar) {
+	printf("\n");
+    }
+
+
+    zfree(buf, byte_len);
+    return 0;
+}
+
+
+/*
+ * Provides for the SRANDOM parameter and returns an unsigned 32-bit random
+ * integer.
+ */
+
+/**/
+static zlong
+get_srandom(UNUSED(Param pm)) {
+
+    if(buf_cnt < 0) {
+	getrandom_buffer((void*) rand_buff,sizeof(rand_buff));
+	buf_cnt=7;
+    }
+    return rand_buff[buf_cnt--];
+}
+
+/*
+ * Implements the mathfunc zrandom and returns a random floating-point
+ * number between 0 and 1.  Does this by getting a random 32-bt integer
+ * and dividing it by UINT32_MAX.
+ *
+ */
+
+/**/
+static mnumber
+math_zrandom(UNUSED(char *name), UNUSED(int argc), UNUSED(mnumber *argv),
+	     UNUSED(int id))
+{
+    mnumber ret;
+    double r;
+
+    r = random_real();
+    if (r < 0) {
+	zwarnnam(name, "Failed to get sufficient random data.");
+    }
+    ret.type = MN_FLOAT;
+    ret.u.d = r;
+
+    return ret;
+}
+
+static struct builtin bintab[] = {
+    BUILTIN("getrandom", 0, bin_getrandom, 0, 8, 0, "ria:s:c:U:L:", NULL),
+};
+
+static const struct gsu_integer srandom_gsu =
+{ get_srandom, nullintsetfn, stdunsetfn };
+
+static struct paramdef patab[] = {
+    {"SRANDOM", PM_INTEGER | PM_READONLY_SPECIAL | PM_HIDEVAL, NULL,
+	    &srandom_gsu, NULL, NULL, NULL},
+};
+
+static struct mathfunc mftab[] = {
+    NUMMATHFUNC("zrandom", math_zrandom, 0, 0, 0),
+};
+
+static struct features module_features = {
+    bintab, sizeof(bintab)/sizeof(*bintab),
+    NULL, 0,
+    mftab, sizeof(mftab)/sizeof(*mftab),
+    patab, sizeof(patab)/sizeof(*patab),
+    0
+};
+
+/**/
+int
+setup_(UNUSED(Module m))
+{
+#ifdef USE_URANDOM
+    /* Check for the existence of /dev/urandom */
+
+    struct stat st;
+
+    if (lstat("/dev/urandom",&st) < 0) {
+	zwarn("No kernel random pool found.");
+	return 1;
+    }
+
+    if (!(S_ISCHR(st.st_mode)) ) {
+	zwarn("No kernel random pool found.");
+	return 1;
+    }
+#endif /* USE_URANDOM */
+    return 0;
+}
+
+/**/
+int
+features_(Module m, char ***features)
+{
+    *features = featuresarray(m, &module_features);
+    return 0;
+}
+
+/**/
+int
+enables_(Module m, int **enables)
+{
+    return handlefeatures(m, &module_features, enables);
+}
+
+/**/
+int
+boot_(Module m)
+{
+#ifdef USE_URANDOM
+    /*
+     * Open /dev/urandom.  Here because of a weird issue on HP-UX 11.31
+     * When opening in setup_ open returned 0.  In boot_, it returns
+     * an unused file descriptor. Decided against ifdef HPUX as it works
+     * here just as well for other platforms.
+     *
+     */
+
+    int tmpfd=-1;
+
+    if ((tmpfd = open("/dev/urandom", O_RDONLY)) < 0) {
+	zwarn("Could not access kernel random pool.");
+	return 1;
+    }
+    randfd = movefd(tmpfd);
+    addmodulefd(randfd,FDT_MODULE);
+    if (randfd < 0) {
+	zwarn("Could not access kernel random pool.");
+	return 1;
+    }
+#endif /* USE_URANDOM */
+    return 0;
+}
+
+/**/
+int
+cleanup_(Module m)
+{
+    return setfeatureenables(m, &module_features, NULL);
+}
+
+/**/
+int
+finish_(UNUSED(Module m))
+{
+#ifdef USE_URANDOM
+    if (randfd >= 0)
+	zclose(randfd);
+#endif /* USE_URANDOM */
+    return 0;
+}
+
+
+/* Count the number of leading zeros, hopefully in gcc/clang by HW
+ * instruction */
+#if defined(__GNUC__) || defined(__clang__)
+#define clz64(x) __builtin_clzll(x)
+#else
+#define clz64(x) _zclz64(x)
+
+/**/
+int
+_zclz64(uint64_t x) {
+    int n = 0;
+
+    if (x == 0)
+	return 64;
+
+    if (!(x & 0xFFFFFFFF00000000)) {
+	n+=32;
+	x<<=32;
+    }
+    if (!(x & 0xFFFF000000000000)) {
+	n+=16;
+	x<<=16;
+    }
+    if (!(x & 0xFF00000000000000)) {
+	n+=8;
+	x<<=8;
+    }
+    if (!(x & 0xF000000000000000)) {
+	n+=4;
+	x<<=4;
+    }
+    if (!(x & 0xC000000000000000)) {
+	n+=2;
+	x<<=1;
+    }
+    if (!(x & 0x8000000000000000)) {
+	n+=1;
+    }
+    return n;
+}
+#endif /* __GNU_C__ or __clang__ */
+
+/**/
+uint64_t
+random64(void) {
+    uint64_t r;
+
+    if(getrandom_buffer(&r,sizeof(r)) < 0) {
+	zwarn("zsh/random: Can't get sufficient random data.");
+	return 1; /* 0 will cause loop */
+    }
+
+    return r;
+}
+
+/* Following code is under the below copyright, despite changes for error
+ * handling and removing GCCisms */
+
+/*-
+ * Copyright (c) 2014 Taylor R. Campbell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Uniform random floats: How to generate a double-precision
+ * floating-point numbers in [0, 1] uniformly at random given a uniform
+ * random source of bits.
+ *
+ * See <http://mumble.net/~campbell/2014/04/28/uniform-random-float>
+ * for explanation.
+ *
+ * Updated 2015-02-22 to replace ldexp(x, <constant>) by x * ldexp(1,
+ * <constant>), since glibc and NetBSD libm both have slow software
+ * bit-twiddling implementations of ldexp, but GCC can constant-fold
+ * the latter.
+ */
+
+#include <math.h>
+#include <stdint.h>
+
+/*
+ * random_real: Generate a stream of bits uniformly at random and
+ * interpret it as the fractional part of the binary expansion of a
+ * number in [0, 1], 0.00001010011111010100...; then round it.
+ */
+
+/**/
+double
+random_real(void)
+{
+	int exponent = 0;
+	uint64_t significand = 0;
+	uint64_t r = 0;
+	unsigned shift;
+
+	/*
+	 * Read zeros into the exponent until we hit a one; the rest
+	 * will go into the significand.
+	 */
+	while (significand == 0) {
+		exponent -= 64;
+
+		/* Get random64 and check for error */
+		errno = 0;
+		significand = random64();
+		if (errno)
+		    return -1;
+		/*
+		 * If the exponent falls below -1074 = emin + 1 - p,
+		 * the exponent of the smallest subnormal, we are
+		 * guaranteed the result will be rounded to zero.  This
+		 * case is so unlikely it will happen in realistic
+		 * terms only if random64 is broken.
+		 */
+		if (exponent < -1074)
+			return 0;
+	}
+
+	/*
+	 * There is a 1 somewhere in significand, not necessarily in
+	 * the most significant position.  If there are leading zeros,
+	 * shift them into the exponent and refill the less-significant
+	 * bits of the significand.  Can't predict one way or another
+	 * whether there are leading zeros: there's a fifty-fifty
+	 * chance, if random64 is uniformly distributed.
+	 */
+	shift = clz64(significand);
+	if (shift != 0) {
+
+		errno = 0;
+		r = random64();
+		if (errno)
+		    return -1;
+
+		exponent -= shift;
+		significand <<= shift;
+		significand |= (r >> (64 - shift));
+	}
+
+	/*
+	 * Set the sticky bit, since there is almost surely another 1
+	 * in the bit stream.  Otherwise, we might round what looks
+	 * like a tie to even when, almost surely, were we to look
+	 * further in the bit stream, there would be a 1 breaking the
+	 * tie.
+	 */
+	significand |= 1;
+
+	/*
+	 * Finally, convert to double (rounding) and scale by
+	 * 2^exponent.
+	 */
+	return ldexp((double)significand, exponent);
+}
diff --git a/Src/Modules/random.mdd b/Src/Modules/random.mdd
new file mode 100644
index 000000000..3803a4533
--- /dev/null
+++ b/Src/Modules/random.mdd
@@ -0,0 +1,7 @@
+name=zsh/random
+link=either
+load=yes
+
+autofeatures="b:getrandom p:SRANDOM f:zrandom"
+
+objects="random.o"
diff --git a/Test/V15random.ztst b/Test/V15random.ztst
new file mode 100644
index 000000000..3ff347dbb
--- /dev/null
+++ b/Test/V15random.ztst
@@ -0,0 +1,51 @@
+# Tests for the zsh/random module
+%test
+  getrandom -U 56
+1f:Checking if system has a kernel random source
+?(eval):1: No kernel random pool found.
+
+  getrandom -U 64
+0:Checking upper Bound is observed
+*><0-64> 
+
+  tmpmsg="failed"
+  getrandom -L 4 -U 5 -c 16 -a tmpa
+  for i in ${tmpa[@]}
+  do
+    if (( i == 5 )); then
+      tmpmsg="success"
+      break
+    fi
+  done
+  echo $tmpmsg
+0:Checking that upper bound is inclusive
+F:Try running the test again: make TESTNUM=V15 check
+>success
+
+  getrandom -L -10 -U 5
+1:Checking error message if -L given a negative number
+?(eval):getrandom:1: Couldn't convert -L -10 to a positive number.
+
+  getrandom -L 5 -U $(( 2**32 ))
+1:Checking error message if -U is out of range
+?(eval):getrandom:1: Upper bound must be between 1 and 4294967295 you specified: 4294967296.
+
+  getrandom -c 65
+1:Checking error when count is out of range
+?(eval):getrandom:1: Count must be between 1 and 64.  You specified: 65.
+
+  getrandom
+0:Checking if default options give a string of 16 hex digits
+*>[0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F]
+
+  getrandom -i
+0:Checking if getrandom -i produces an unsigned 32-bit integer
+*><0-4294967295> 
+
+  echo $SRANDOM
+0:Checking if SRANDOM produces an unsigned 32-bit integer
+*><0-4294967295>
+
+  echo $(( zrandom() ))
+0:Checking if zrandom() produces a floating-point number between 0 and 1
+*>0(|.)[[:digit:]]#
diff --git a/configure.ac b/configure.ac
index 074141d38..f1fa01274 100644
--- a/configure.ac
+++ b/configure.ac
@@ -675,6 +675,7 @@ fi
 AC_CHECK_HEADERS(sys/time.h sys/times.h sys/select.h termcap.h termio.h \
 		 termios.h sys/param.h sys/filio.h string.h memory.h \
 		 limits.h fcntl.h libc.h sys/utsname.h sys/resource.h \
+                 sys/random.h \
 		 locale.h errno.h stdio.h stdarg.h varargs.h stdlib.h \
 		 unistd.h sys/capability.h \
 		 utmp.h utmpx.h sys/types.h pwd.h grp.h poll.h sys/mman.h \
@@ -1337,6 +1338,7 @@ AC_CHECK_FUNCS(strftime strptime mktime timelocal \
 	       cygwin_conv_path \
 	       nanosleep \
 	       srand_deterministic \
+               getrandom arc4random \
 	       setutxent getutxent endutxent getutent)
 AC_FUNC_STRCOLL
 

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-08  0:18         ` [PATCH] zsh/random module [UPDATED] Clinton Bunch
@ 2022-11-18 14:30           ` Clinton Bunch
  2022-11-19  6:42             ` Lawrence Velázquez
  2022-11-18 16:23           ` Stephane Chazelas
                             ` (2 subsequent siblings)
  3 siblings, 1 reply; 46+ messages in thread
From: Clinton Bunch @ 2022-11-18 14:30 UTC (permalink / raw)
  To: zsh-workers

I submitted the final version of zsh/random (workers/50906) on the 7th 
and it seems to hang in limbo with no further feedback that would cause 
revisions, nor discussion about whether it should be included or not.

Did I miss a step in submitting a patch for inclusion?

Was there an off-list discussion that decided it's not worthy?

Or did it just get lost in the shuffle?

I don't want to seem ungrateful for the help I've received, but am I 
just being impatient or has my poor module been forgotten about?




^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-08  0:18         ` [PATCH] zsh/random module [UPDATED] Clinton Bunch
  2022-11-18 14:30           ` Clinton Bunch
@ 2022-11-18 16:23           ` Stephane Chazelas
  2022-11-18 17:08             ` Clinton Bunch
  2022-11-21  1:07           ` [PATCH] zsh/random module [UPDATED] Matthew Martin
  2022-11-23 19:46           ` Daniel Shahaf
  3 siblings, 1 reply; 46+ messages in thread
From: Stephane Chazelas @ 2022-11-18 16:23 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

I should point out first that I'm not a zsh maintainer. I have
been using zsh daily for over 25 years though, and have been
following this ML on and off for a few years.

I should also point out that I've not read the past discussions
about your module. Sorry if I'm repeating points that have been
made earlier.

Thanks for contributing to zsh, it's always good to get more
hands on it!

In this instance though I'm not particularly convinced that that
new module is particularly needed.

We already have $RANDOM and $(( rand48() )). Most if not all of
the systems I've ever used had /dev/random / /dev/urandom

You can already read n bytes from them with:

set +o multibyte
read -ru0 -k20 bytes < /dev/random

or

sysread -s 20 bytes < /dev/urandom

Split into individual bytes in an array with:

array=( ${(s[])bytes )

convert those from/to decimal/octal/hexadecimal with printf or
the # flag or #/## arithmetic operators:

hex=(); printf -v hex %02x \'$^array

For instance or:

set -o extendedglob
hex_string=${bytes//(#m)?/${(l[2][0])$(([#16] #MATCH))}}

So it seems the functionality of that module could be
implemented in a few lines of zsh. Likely less efficiently and
more awkwardly, but maybe adding capabilities to decode bytes
(a la perl's pack/unpack) would be more useful and could be used
for other things than /dev/random.

Some comments about the API:

>   -c COUNT
>        Sets the number of returned random data.  Defaults to 8,
>        unless -i, -L, or -U is specified without -a, in which case
>        the default is 1.  COUNT must be between 1 and 64.

Why those arbitrary limits? Why not from 0 to inf?

Without -i you get uint8 in hex and with -i uint32 in decimal.
Why the different bases? Why limiting the bounds to unsigned 32
bits when zsh arithmetic is in signed 64 bits?

Other than that, it looks good to me.

Trying to use it to get a random word with it, I came up with:

$ set -o extendedglob
$ l=({A..Z} {a..z} {0..9} _)
$ getrandom -a a -c20 -L1 -U$#l
$ echo ${(j[])a//(#m)*/$l[MATCH]}
EMhKvFuQlmCVSuet5uh0

Doing it straight by reading /dev/urandom would have been a lot
more awkward. Could that be improved/simplified?

-- 
Stephane


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-18 16:23           ` Stephane Chazelas
@ 2022-11-18 17:08             ` Clinton Bunch
  2022-11-18 18:12               ` Stephane Chazelas
  0 siblings, 1 reply; 46+ messages in thread
From: Clinton Bunch @ 2022-11-18 17:08 UTC (permalink / raw)
  To: zsh-workers


On 11/18/2022 10:23 AM, Stephane Chazelas wrote:
> I should point out first that I'm not a zsh maintainer. I have
> been using zsh daily for over 25 years though, and have been
> following this ML on and off for a few years.
>
> I should also point out that I've not read the past discussions
> about your module. Sorry if I'm repeating points that have been
> made earlier.
>
> Thanks for contributing to zsh, it's always good to get more
> hands on it!
>
> In this instance though I'm not particularly convinced that that
> new module is particularly needed.
>
> We already have $RANDOM and $(( rand48() )). Most if not all of
> the systems I've ever used had /dev/random / /dev/urandom
RANDOM and rand48 are predictable and low quality.
>
> You can already read n bytes from them with:
>
> set +o multibyte
> read -ru0 -k20 bytes < /dev/random
>
> or
>
> sysread -s 20 bytes < /dev/urandom
>
> Split into individual bytes in an array with:
>
> array=( ${(s[])bytes )
>
> convert those from/to decimal/octal/hexadecimal with printf or
> the # flag or #/## arithmetic operators:
>
> hex=(); printf -v hex %02x \'$^array
>
> For instance or:
>
> set -o extendedglob
> hex_string=${bytes//(#m)?/${(l[2][0])$(([#16] #MATCH))}}
>
> So it seems the functionality of that module could be
> implemented in a few lines of zsh. Likely less efficiently and
> more awkwardly, but maybe adding capabilities to decode bytes
> (a la perl's pack/unpack) would be more useful and could be used
> for other things than /dev/random.
>
> Some comments about the API:
>
>>    -c COUNT
>>         Sets the number of returned random data.  Defaults to 8,
>>         unless -i, -L, or -U is specified without -a, in which case
>>         the default is 1.  COUNT must be between 1 and 64.
> Why those arbitrary limits? Why not from 0 to inf?

the getrandom function only guarantees 256 bytes.  64 32-bit integers.  
Allowing 256 uint8 would be possible, but harder to explain and doesn't 
work well with bounding.  As the difference between the lower and upper 
limit surpasses half the max, the number of bytes thrown away to allow 
uniformity of distribution becomes unwieldy.

Also zero would not be particularly useful :)

>
> Without -i you get uint8 in hex and with -i uint32 in decimal.
> Why the different bases?

It's possible to break a hex string into bytes in a rather straight 
forward way.   A string of decimal numbers not so much, but decimal 
numbers are easier to manipulate.

If I didn't think it should do *something* when given no arguments, I'd 
eliminate the hex string altogether.

> Why limiting the bounds to unsigned 32
> bits when zsh arithmetic is in signed 64 bits?
compatibility with SRANDOM in bash.  Return size of arc4random. Reducing 
maximum count to 32.
>
> Other than that, it looks good to me.
>
> Trying to use it to get a random word with it, I came up with:
>
> $ set -o extendedglob
> $ l=({A..Z} {a..z} {0..9} _)
> $ getrandom -a a -c20 -L1 -U$#l
> $ echo ${(j[])a//(#m)*/$l[MATCH]}
> EMhKvFuQlmCVSuet5uh0
>
> Doing it straight by reading /dev/urandom would have been a lot
> more awkward. Could that be improved/simplified?
It could be done, but a list of acceptable characters would need to be 
agreed upon and documented.  Are symbols acceptable?  About half of 
passwords require them, but only accept certain ones and that differs 
from website to website.
>


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-18 17:08             ` Clinton Bunch
@ 2022-11-18 18:12               ` Stephane Chazelas
  2022-11-18 18:38                 ` Clinton Bunch
  0 siblings, 1 reply; 46+ messages in thread
From: Stephane Chazelas @ 2022-11-18 18:12 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

2022-11-18 11:08:42 -0600, Clinton Bunch:
[...]
> > Some comments about the API:
> > 
> > >    -c COUNT
> > >         Sets the number of returned random data.  Defaults to 8,
> > >         unless -i, -L, or -U is specified without -a, in which case
> > >         the default is 1.  COUNT must be between 1 and 64.
> > Why those arbitrary limits? Why not from 0 to inf?
> 
> the getrandom function only guarantees 256 bytes.  64 32-bit integers. 
> Allowing 256 uint8 would be possible, but harder to explain and doesn't work
> well with bounding.  As the difference between the lower and upper limit
> surpasses half the max, the number of bytes thrown away to allow uniformity
> of distribution becomes unwieldy.
> 
> Also zero would not be particularly useful :)

You could call getrandom() as many times as necessary. From a
user point of view (mine at least) that limit doesn't make
sense. Users don't care what API you use underneath.

An array can have from 0 to inf elements, a string 0 to inf
bytes, I should be able to request from 0 to inf numbers/bytes to
store in my array/string.

> > Without -i you get uint8 in hex and with -i uint32 in decimal.
> > Why the different bases?
> 
> It's possible to break a hex string into bytes in a rather straight forward
> way.   A string of decimal numbers not so much, but decimal numbers are
> easier to manipulate.
> 
> If I didn't think it should do *something* when given no arguments, I'd
> eliminate the hex string altogether.

Alternatively, you could add a -f %02x, -f %u -f %c and do
without the -r/-i, with default -c 8 -f %02x -L 0 -U 255.

Or maybe keep the -i but only as an alias for -c 1 -f %u -L 0 -U
$((2**32-1)). (-r would be -f %c)

[...]
> > Trying to use it to get a random word with it, I came up with:
> > 
> > $ set -o extendedglob
> > $ l=({A..Z} {a..z} {0..9} _)
> > $ getrandom -a a -c20 -L1 -U$#l
> > $ echo ${(j[])a//(#m)*/$l[MATCH]}
> > EMhKvFuQlmCVSuet5uh0
> > 
> > Doing it straight by reading /dev/urandom would have been a lot
> > more awkward. Could that be improved/simplified?
> It could be done, but a list of acceptable characters would need to be
> agreed upon and documented.  Are symbols acceptable?  About half of
> passwords require them, but only accept certain ones and that differs from
> website to website.
[...]

I was not suggesting you to add that as an extra feature to
getrandom.

-- 
Stephane


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-18 18:12               ` Stephane Chazelas
@ 2022-11-18 18:38                 ` Clinton Bunch
  2022-11-23 19:52                   ` Daniel Shahaf
  0 siblings, 1 reply; 46+ messages in thread
From: Clinton Bunch @ 2022-11-18 18:38 UTC (permalink / raw)
  To: zsh-workers


On 11/18/2022 12:12 PM, Stephane Chazelas wrote:
> 2022-11-18 11:08:42 -0600, Clinton Bunch:
> [...]
>>> Some comments about the API:
>>>
>>>>     -c COUNT
>>>>          Sets the number of returned random data.  Defaults to 8,
>>>>          unless -i, -L, or -U is specified without -a, in which case
>>>>          the default is 1.  COUNT must be between 1 and 64.
>>> Why those arbitrary limits? Why not from 0 to inf?
>> the getrandom function only guarantees 256 bytes.  64 32-bit integers.
>> Allowing 256 uint8 would be possible, but harder to explain and doesn't work
>> well with bounding.  As the difference between the lower and upper limit
>> surpasses half the max, the number of bytes thrown away to allow uniformity
>> of distribution becomes unwieldy.
>>
>> Also zero would not be particularly useful :)
> You could call getrandom() as many times as necessary. From a
> user point of view (mine at least) that limit doesn't make
> sense. Users don't care what API you use underneath.
>
> An array can have from 0 to inf elements, a string 0 to inf
> bytes, I should be able to request from 0 to inf numbers/bytes to
> store in my array/string.
I can't do inf (there is a limit to how big an array can be, the largest 
integer for an index).  I honestly think it's unlikely anyone will need 
more than 64 random integers.  I saw no reason to expand the limit, but 
it can be done.
>>> Without -i you get uint8 in hex and with -i uint32 in decimal.
>>> Why the different bases?
>> It's possible to break a hex string into bytes in a rather straight forward
>> way.   A string of decimal numbers not so much, but decimal numbers are
>> easier to manipulate.
>>
>> If I didn't think it should do *something* when given no arguments, I'd
>> eliminate the hex string altogether.
> Alternatively, you could add a -f %02x, -f %u -f %c and do
> without the -r/-i, with default -c 8 -f %02x -L 0 -U 255.
That seems a lot more confusing than -r (raw) or -i (integer) unless 
it's expanded to a lot more arbitrary formats.  At least for someone not 
familiar with printf formats.
>
> Or maybe keep the -i but only as an alias for -c 1 -f %u -L 0 -U
> $((2**32-1)). (-r would be -f %c)
>
> [...]
>>> Trying to use it to get a random word with it, I came up with:
>>>
>>> $ set -o extendedglob
>>> $ l=({A..Z} {a..z} {0..9} _)
>>> $ getrandom -a a -c20 -L1 -U$#l
>>> $ echo ${(j[])a//(#m)*/$l[MATCH]}
>>> EMhKvFuQlmCVSuet5uh0
>>>
>>> Doing it straight by reading /dev/urandom would have been a lot
>>> more awkward. Could that be improved/simplified?
>> It could be done, but a list of acceptable characters would need to be
>> agreed upon and documented.  Are symbols acceptable?  About half of
>> passwords require them, but only accept certain ones and that differs from
>> website to website.
> [...]
>
> I was not suggesting you to add that as an extra feature to
> getrandom.
>


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-18 14:30           ` Clinton Bunch
@ 2022-11-19  6:42             ` Lawrence Velázquez
  0 siblings, 0 replies; 46+ messages in thread
From: Lawrence Velázquez @ 2022-11-19  6:42 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

On Fri, Nov 18, 2022, at 9:30 AM, Clinton Bunch wrote:
> Did I miss a step in submitting a patch for inclusion?

You did not.

> Was there an off-list discussion that decided it's not worthy?

Not to my knowledge.

> Or did it just get lost in the shuffle?

It was not lost.

> I don't want to seem ungrateful for the help I've received, but am I 
> just being impatient or has my poor module been forgotten about?

The pace of development here may be more leisurely than what you're
used to.

-- 
vq


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-08  0:18         ` [PATCH] zsh/random module [UPDATED] Clinton Bunch
  2022-11-18 14:30           ` Clinton Bunch
  2022-11-18 16:23           ` Stephane Chazelas
@ 2022-11-21  1:07           ` Matthew Martin
  2022-11-21  1:59             ` Clinton Bunch
  2022-11-23 19:46           ` Daniel Shahaf
  3 siblings, 1 reply; 46+ messages in thread
From: Matthew Martin @ 2022-11-21  1:07 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

On Mon, Nov 07, 2022 at 06:18:15PM -0600, Clinton Bunch wrote:
> Here's the updated patch taking into account the feedback I got from Bart
> and dana.
> 
> -l length has been converted to -c count
> 
> -L is placed before -U in the documentation
> 
> Documentation specifies that -r (raw) is for binary data.
> 
> Range checking has been added to the -L and -U options
> 
> Bug where -i overrode -L or -U has been fixed.  Bounds take precedence even
> if -i is redundantly specified.
> 
> Default count has been changed to 1 if -i, -L, and/or -U has been specified
> without -a
> 
> dana's completion function has been updated with these changes and included
> 
> A test file has been created (Not sure how useful it is, but it's there)

I'm not clear on where the getrandom builtin came from. I understand the
desire for SRANDOM to have a proper random source in the shell; however,
the rest seems like feature creep that if necessary could be implemented
with a loop. It seems prudent to keep the initial module to a minimum to
ensure there's a usecase and so backwards compat concerns don't crop up.

Would it suffice for your uses to have a module with just SRANDOM?


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-21  1:07           ` [PATCH] zsh/random module [UPDATED] Matthew Martin
@ 2022-11-21  1:59             ` Clinton Bunch
  2022-11-21  2:21               ` Matthew Martin
  0 siblings, 1 reply; 46+ messages in thread
From: Clinton Bunch @ 2022-11-21  1:59 UTC (permalink / raw)
  To: zsh-workers

On 11/20/2022 7:07 PM, Matthew Martin wrote:
> On Mon, Nov 07, 2022 at 06:18:15PM -0600, Clinton Bunch wrote:
>> Here's the updated patch taking into account the feedback I got from Bart
>> and dana.
>>
>> -l length has been converted to -c count
>>
>> -L is placed before -U in the documentation
>>
>> Documentation specifies that -r (raw) is for binary data.
>>
>> Range checking has been added to the -L and -U options
>>
>> Bug where -i overrode -L or -U has been fixed.  Bounds take precedence even
>> if -i is redundantly specified.
>>
>> Default count has been changed to 1 if -i, -L, and/or -U has been specified
>> without -a
>>
>> dana's completion function has been updated with these changes and included
>>
>> A test file has been created (Not sure how useful it is, but it's there)
> I'm not clear on where the getrandom builtin came from. I understand the
> desire for SRANDOM to have a proper random source in the shell; however,
> the rest seems like feature creep that if necessary could be implemented
> with a loop. It seems prudent to keep the initial module to a minimum to
> ensure there's a usecase and so backwards compat concerns don't crop up.
>
> Would it suffice for your uses to have a module with just SRANDOM?

One of my use cases is in precmd to replace this function

get_random() {
   local retvar=$1;shift
   local max=${1:-32767}

   local out=$(( int( rand48(my_seed) * $max + 0.5 ) ))

   eval "${retvar}=$out"
}

used here:

precmd () {
   get_random cdb_fg "6"
   psvar[1]=$(( cdb_fg + 1))
}

A better more uniform distribution of random numbers can be achieved with

getrandom -U 6 -s cdb_fg

than $SRANDOM % 7 which will have a modulo bias



>



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-21  1:59             ` Clinton Bunch
@ 2022-11-21  2:21               ` Matthew Martin
  2022-11-21  2:57                 ` Clinton Bunch
  0 siblings, 1 reply; 46+ messages in thread
From: Matthew Martin @ 2022-11-21  2:21 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

On Sun, Nov 20, 2022 at 07:59:54PM -0600, Clinton Bunch wrote:
> On 11/20/2022 7:07 PM, Matthew Martin wrote:
> > I'm not clear on where the getrandom builtin came from. I understand the
> > desire for SRANDOM to have a proper random source in the shell; however,
> > the rest seems like feature creep that if necessary could be implemented
> > with a loop. It seems prudent to keep the initial module to a minimum to
> > ensure there's a usecase and so backwards compat concerns don't crop up.
> > 
> > Would it suffice for your uses to have a module with just SRANDOM?
> 
> One of my use cases is in precmd to replace this function
> 
> get_random() {
>   local retvar=$1;shift
>   local max=${1:-32767}
> 
>   local out=$(( int( rand48(my_seed) * $max + 0.5 ) ))
> 
>   eval "${retvar}=$out"
> }
> 
> used here:
> 
> precmd () {
>   get_random cdb_fg "6"
>   psvar[1]=$(( cdb_fg + 1))
> }
> 
> A better more uniform distribution of random numbers can be achieved with
> 
> getrandom -U 6 -s cdb_fg
> 
> than $SRANDOM % 7 which will have a modulo bias

It's true SRANDOM % 7 would have modulo bias; however, even if only
SRANDOM is provided, the out of range rejection loop could happen in
shell code. While that's less convenient, I am of the opinion once
a user needs to care about modulo bias, they're likely better served by
a language other than shell.

If a uniform random function is desired in zsh, I think it should mirror
the interface of arc4random_uniform: just take an upper_bound and return
a value in the range [0, upper_bound).


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-21  2:21               ` Matthew Martin
@ 2022-11-21  2:57                 ` Clinton Bunch
  2022-11-21  3:14                   ` Lawrence Velázquez
  2022-11-23  1:23                   ` Matthew Martin
  0 siblings, 2 replies; 46+ messages in thread
From: Clinton Bunch @ 2022-11-21  2:57 UTC (permalink / raw)
  To: zsh-workers

On 11/20/2022 8:21 PM, Matthew Martin wrote:
> On Sun, Nov 20, 2022 at 07:59:54PM -0600, Clinton Bunch wrote:
>> On 11/20/2022 7:07 PM, Matthew Martin wrote:
>>> I'm not clear on where the getrandom builtin came from. I understand the
>>> desire for SRANDOM to have a proper random source in the shell; however,
>>> the rest seems like feature creep that if necessary could be implemented
>>> with a loop. It seems prudent to keep the initial module to a minimum to
>>> ensure there's a usecase and so backwards compat concerns don't crop up.
>>>
>>> Would it suffice for your uses to have a module with just SRANDOM?
>> One of my use cases is in precmd to replace this function
>>
>> get_random() {
>>    local retvar=$1;shift
>>    local max=${1:-32767}
>>
>>    local out=$(( int( rand48(my_seed) * $max + 0.5 ) ))
>>
>>    eval "${retvar}=$out"
>> }
>>
>> used here:
>>
>> precmd () {
>>    get_random cdb_fg "6"
>>    psvar[1]=$(( cdb_fg + 1))
>> }
>>
>> A better more uniform distribution of random numbers can be achieved with
>>
>> getrandom -U 6 -s cdb_fg
>>
>> than $SRANDOM % 7 which will have a modulo bias
> It's true SRANDOM % 7 would have modulo bias; however, even if only
> SRANDOM is provided, the out of range rejection loop could happen in
> shell code. While that's less convenient, I am of the opinion once
> a user needs to care about modulo bias, they're likely better served by
> a language other than shell.
Why write an external python script too generate a random password for a 
service account when you can do it with the shell you're already 
running, especially when python might not be available?
>
> If a uniform random function is desired in zsh, I think it should mirror
> the interface of arc4random_uniform: just take an upper_bound and return
> a value in the range [0, upper_bound).

The original implementation did exclude the upper bound, inadvertently, 
but after discussion it seemed that was counter-intuitive and something 
easily missed in a cursory glance at the documentation.  Adding a lower 
bound was easy enough and saves extra shell code even if it's not likely 
to be used as much.  I can see the first question from a user being why 
can't I specify the smallest number I want?  I'm already getting user 
complaints about a limit of 64 on count, even though that's unlikely to 
be exceeded in practice.  I'm also anticipating complaints about not 
generating negative numbers.

yes, much of getrandom *could* be implemented in shell code, much less 
efficiently.  For example my precmd could save the math eval by 
specifying getrandom -L 1 -U 7.

Once you've written the code to access the kernel random source, it 
seems to make sense to me to make its output as flexible as possible.  I 
don't get the concern about backwards compatibility in implementing a 
new builtin unless you know of an external command called getrandom that 
is in common use, and that's easily fixed by changing it to zgetrandom.

>



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-21  2:57                 ` Clinton Bunch
@ 2022-11-21  3:14                   ` Lawrence Velázquez
  2022-11-21  4:17                     ` Bart Schaefer
  2022-11-23  1:23                   ` Matthew Martin
  1 sibling, 1 reply; 46+ messages in thread
From: Lawrence Velázquez @ 2022-11-21  3:14 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

On Sun, Nov 20, 2022, at 9:57 PM, Clinton Bunch wrote:
> I don't get the concern about backwards compatibility in implementing a 
> new builtin unless you know of an external command called getrandom that 
> is in common use, and that's easily fixed by changing it to zgetrandom.

The concern is about publishing (and thus committing to) a complex
API right out of the gate, only to later realize that it is inelegant
and awkward, as opposed to starting with a minimal API and extending
it later in a deliberate, considered fashion.

-- 
vq


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-21  3:14                   ` Lawrence Velázquez
@ 2022-11-21  4:17                     ` Bart Schaefer
  2022-11-21  5:05                       ` Clinton Bunch
  2022-11-22 17:44                       ` Oliver Kiddle
  0 siblings, 2 replies; 46+ messages in thread
From: Bart Schaefer @ 2022-11-21  4:17 UTC (permalink / raw)
  To: Lawrence Velázquez; +Cc: Clinton Bunch, zsh-workers

On Sun, Nov 20, 2022 at 7:16 PM Lawrence Velázquez <larryv@zsh.org> wrote:
>
> The concern is about publishing (and thus committing to) a complex
> API right out of the gate, only to later realize that it is inelegant

This could be addressed by NOT defining getrandom as an autoloaded
builtin, so that a user would have to explicitly name it with zmodload
-ab, and document it as experimental.

Actually this could be done with -ap for SRANDOM and -af for zrandom,
as well, but they're much simpler and the only reasonable quibble
might be with the name "zrandom".

Incidentally, Clinton, I didn't look closer at the patch before
because the use of ".patch" as a name suffix meant I had download it
and open it in an external editor instead of just perusing it along
with my email.


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-21  4:17                     ` Bart Schaefer
@ 2022-11-21  5:05                       ` Clinton Bunch
  2022-11-22 13:42                         ` dana
  2022-11-23 19:49                         ` Daniel Shahaf
  2022-11-22 17:44                       ` Oliver Kiddle
  1 sibling, 2 replies; 46+ messages in thread
From: Clinton Bunch @ 2022-11-21  5:05 UTC (permalink / raw)
  To: zsh-workers

On 11/20/2022 10:17 PM, Bart Schaefer wrote:
> On Sun, Nov 20, 2022 at 7:16 PM Lawrence Velázquez <larryv@zsh.org> wrote:
>> The concern is about publishing (and thus committing to) a complex
>> API right out of the gate, only to later realize that it is inelegant

The only thing that feels inelegant about the getrandom command is the 
hex-string default, and I would welcome suggestions for a better default 
action.

On a side note, while implementing getrandom, I found myself wishing zsh 
implemented typeset -ia (and -Fa)

> This could be addressed by NOT defining getrandom as an autoloaded
> builtin, so that a user would have to explicitly name it with zmodload
> -ab, and document it as experimental.
I was originally against autoloading, though I now think that SRANDOM 
should be, but I'm not opposed to getrandom and zrandom having to be 
explicitly loaded.
>
> Actually this could be done with -ap for SRANDOM and -af for zrandom,
> as well, but they're much simpler and the only reasonable quibble
> might be with the name "zrandom".
Not tied to the name, but plain random felt wrong somehow.
>
> Incidentally, Clinton, I didn't look closer at the patch before
> because the use of ".patch" as a name suffix meant I had download it
> and open it in an external editor instead of just perusing it along
> with my email.
>
Yeah, after I posted, I saw you make a similar comment to someone else.  
I'll use .txt next time.  Incidentally this suggestion might be a good 
candidate for inclusion in the developer's guide when pasting into the 
message is not desirable.



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-21  5:05                       ` Clinton Bunch
@ 2022-11-22 13:42                         ` dana
  2022-11-23 19:49                         ` Daniel Shahaf
  1 sibling, 0 replies; 46+ messages in thread
From: dana @ 2022-11-22 13:42 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: Zsh hackers list

On Sun 20 Nov 2022, at 23:05, Clinton Bunch wrote:
> The only thing that feels inelegant about the getrandom command is the
> hex-string default, and I would welcome suggestions for a better default
> action.

Sorry for taking so long to get back to this

I think there's some concern that the API is a little weird (or at least
ambitious), which i'd agree with, though i don't have any specific suggestions
for improving it right now, other than what i've already said

But i do still like the idea, since i personally find myself needing random
data in the form of bytes/characters more often than numbers. It'd be nice to
have an easy, cross-platform, 'correct' way of achieving that. And i don't
consider it outside the scope of a shell that has a built-in FTP client lol

Matthew's idea of limiting it to an SRANDOM parameter and an arc4random
function (either initially or instead) seems likely to gain immediate
acceptance, though, since there's not much to bike-shed there. I would
certainly sign off on it, even if it's not everything i want

dana


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-21  4:17                     ` Bart Schaefer
  2022-11-21  5:05                       ` Clinton Bunch
@ 2022-11-22 17:44                       ` Oliver Kiddle
  2022-11-22 19:48                         ` Clinton Bunch
  1 sibling, 1 reply; 46+ messages in thread
From: Oliver Kiddle @ 2022-11-22 17:44 UTC (permalink / raw)
  To: Clinton Bunch, zsh-workers

Firstly, thanks for your work on this Clinton and for being patient with us.

Bart Schaefer wrote:
> Actually this could be done with -ap for SRANDOM and -af for zrandom,
> as well, but they're much simpler and the only reasonable quibble
> might be with the name "zrandom".

Bash appears to have an SRANDOM and if this is compatible with that then
I'd see no issue with having that feature autoload the module.

The builtin's option letter API looks good to me.

If we're going to quibble about naming, many of our modules use 'z'
prefixes on their builtins so I'd favour zrandom for the builtin too.
The use of "get" and "set" in naming is often superfluous and "get"
implies the fetching of something that already exists rather than
of something that needs to be created.
What issue do you see with "zrandom" on the math function? Most of the
existing math functions correspond fairly closely to libc counterparts.
Distinguishing it from those seems reasonable.

The question of why $RANDOM isn't backed by some secure modern and fancy
implementation seems to come up roughly yearly. It's not a feature I use
especially often and I certainly don't care, either about stability or
security, on the rare occasion I use *(oe:REPLY=\$RANDOM:) to shuffle
a few mp3 files when playing them. Neither would I have any special
attachment to the existing $RANDOM implementation if it was changed in a
compatible way.

Given that it can be autoloaded, could we move $RANDOM to the module
too?

Oliver


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-22 17:44                       ` Oliver Kiddle
@ 2022-11-22 19:48                         ` Clinton Bunch
  0 siblings, 0 replies; 46+ messages in thread
From: Clinton Bunch @ 2022-11-22 19:48 UTC (permalink / raw)
  To: zsh-workers

I

On 11/22/2022 11:44 AM, Oliver Kiddle wrote:
> Firstly, thanks for your work on this Clinton and for being patient with us.
>
> Bart Schaefer wrote:
>> Actually this could be done with -ap for SRANDOM and -af for zrandom,
>> as well, but they're much simpler and the only reasonable quibble
>> might be with the name "zrandom".
> Bash appears to have an SRANDOM and if this is compatible with that then
> I'd see no issue with having that feature autoload the module.
>
> The builtin's option letter API looks good to me.

based on feedback, more and more I'm thinking the default action, 
instead of returning a hex-string of 8 bytes should return the 
equivalent of echo $SRANDOM. -c would always default to 1 even when 
using -r, and -i would be removed and become the default functionality 
as saving an array of decimal byte values could be achieved with -U 255 
and -c.


>
> If we're going to quibble about naming, many of our modules use 'z'
> prefixes on their builtins so I'd favour zrandom for the builtin too.
> The use of "get" and "set" in naming is often superfluous and "get"
> implies the fetching of something that already exists rather than
> of something that needs to be created.
I thought it would be confusing to have both named zrandom, but I'm not 
opposed to the idea.
> What issue do you see with "zrandom" on the math function? Most of the
> existing math functions correspond fairly closely to libc counterparts.
> Distinguishing it from those seems reasonable.
>
> The question of why $RANDOM isn't backed by some secure modern and fancy
> implementation seems to come up roughly yearly. It's not a feature I use
> especially often and I certainly don't care, either about stability or
> security, on the rare occasion I use *(oe:REPLY=\$RANDOM:) to shuffle
> a few mp3 files when playing them. Neither would I have any special
> attachment to the existing $RANDOM implementation if it was changed in a
> compatible way.
>
> Given that it can be autoloaded, could we move $RANDOM to the module
> too?

I suppose we could move RANDOM and use rand_r to try to keep the 
documented behavior, though currently the behavior is not consistent.

I tried changing the other places zsh uses rand to use rand_r and found 
that on at least EL8 the use of rand48 screwed up the sequence even when 
using rand_r for RANDOM.  I'm guessing glibc uses the same engine for 
rand_r and erand48 and doesn't reset the upper 16 bits when rand_r is 
called after erand48, but I haven't dug into the code.

I'd rather make RANDOM a 15-bit integer sampled from the same source as 
SRANDOM, but it would mean completely breaking an only partially broken 
documented behavior.

>
> Oliver
>


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-21  2:57                 ` Clinton Bunch
  2022-11-21  3:14                   ` Lawrence Velázquez
@ 2022-11-23  1:23                   ` Matthew Martin
  2022-11-23  2:58                     ` Clinton Bunch
  1 sibling, 1 reply; 46+ messages in thread
From: Matthew Martin @ 2022-11-23  1:23 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

On Sun, Nov 20, 2022 at 08:57:25PM -0600, Clinton Bunch wrote:
> On 11/20/2022 8:21 PM, Matthew Martin wrote:
> > If a uniform random function is desired in zsh, I think it should mirror
> > the interface of arc4random_uniform: just take an upper_bound and return
> > a value in the range [0, upper_bound).
> 
> The original implementation did exclude the upper bound, inadvertently, but
> after discussion it seemed that was counter-intuitive and something easily
> missed in a cursory glance at the documentation.  Adding a lower bound was
> easy enough and saves extra shell code even if it's not likely to be used as
> much. 

arc4random_uniform excludes the upper bound so it is a direct
replacement for <random source> % <upper bound>. I think it would be
wise to follow an established API and avoid off by one errors when
refactoring existing code. Similarly it doesn't offer a lower bound
option since that's trivial to implement and difficult to get wrong.

> yes, much of getrandom *could* be implemented in shell code, much less
> efficiently.  For example my precmd could save the math eval by specifying
> getrandom -L 1 -U 7.
> 
> Once you've written the code to access the kernel random source, it seems to
> make sense to me to make its output as flexible as possible.  I don't get
> the concern about backwards compatibility in implementing a new builtin
> unless you know of an external command called getrandom that is in common
> use, and that's easily fixed by changing it to zgetrandom.

I disagree on making the output as flexible as possible because there
are already features in zsh to get those formats and the API would need
to be maintained into future releases even if seldom used. I think it
would be wise to start with the minimum interface necessary and then
build off it as uses arise.

Interfacing with libc and the kernel to get random numbers is something
that can only be done in C and SRANDOM builds off an existing API, so
I have no opposition to it. I can accept that uniformly distributed
random integers is a useful and non-trivial task, so probably should be
included, but would prefer an existing and proven API like
arc4random_uniform.

I have a patch based off yours that implements just SRANDOM and
a mathfunc for arc4random_uniform, but haven't yet had time to test it
on multipl platforms.

With just SRANDOM and arc4random_uniform the features of getrandom can
be implemented in a script. I've taken a quick shot at a proof of
concept below.


die() {
	printf >&2 '%s\n' "$1"
	exit 1
}

zmodload zsh/random

local uint32_max=$(( 2**32 - 1 ))
local count=8 lower=0 upper=uint32_max result format=hex output=print output_name
local -a results

while getopts a:c:iL:rs:U: opt; do
	case $opt in
	a) output=array; output_name=$OPTARG;;
	c) count=$OPTARG;;
	L) lower=$OPTARG;;
	i) format=int;;
	r) format=raw; upper=255;;
	s) output=scalar; output_name=$OPTARG; count=1;;
	U) upper=$OPTARG;;
	esac
done

(( count > 0 )) || die 'count must be at least 1'
(( lower >= 0 )) || die 'lower bound must be greater than or equal to 0'
if [[ $format == raw ]]; then
	(( upper <= 255 )) || die 'upper bound must be less than or equal to 255 in raw mode'
else
	(( upper <= uint32_max )) || die "upper bound must be less than or equal to $uint32_max"
fi
(( lower < upper )) || die 'lower bound must be less than upper bound'
if [[ $output == scalar ]] && (( count != 1 )); then die 'count must be 1 for scalar output'; fi

repeat count; do
	if (( lower == 0 && upper == uint32_max )); then
		result=$SRANDOM
	else
		result=$(( arc4random_uniform(upper - lower) + lower ))
	fi

	case $format in
	hex) result=$(( [##16] result ));;
	int) ;;
	raw) printf -v result '%b' "\x$(( [##16] result ))"
	esac

	results+=($result)
done

case $output in
array) : ${(PA)output_name::=$results};;
scalar) : ${(P)output_name::=${results[1]}};;
print) print -rn - $results;;
esac


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-23  1:23                   ` Matthew Martin
@ 2022-11-23  2:58                     ` Clinton Bunch
  2022-11-23  4:14                       ` Matthew Martin
  0 siblings, 1 reply; 46+ messages in thread
From: Clinton Bunch @ 2022-11-23  2:58 UTC (permalink / raw)
  To: zsh-workers

On 11/22/2022 7:23 PM, Matthew Martin wrote:
> On Sun, Nov 20, 2022 at 08:57:25PM -0600, Clinton Bunch wrote:
>> On 11/20/2022 8:21 PM, Matthew Martin wrote:
>>> If a uniform random function is desired in zsh, I think it should mirror
>>> the interface of arc4random_uniform: just take an upper_bound and return
>>> a value in the range [0, upper_bound).
>> The original implementation did exclude the upper bound, inadvertently, but
>> after discussion it seemed that was counter-intuitive and something easily
>> missed in a cursory glance at the documentation.  Adding a lower bound was
>> easy enough and saves extra shell code even if it's not likely to be used as
>> much.
> arc4random_uniform excludes the upper bound so it is a direct
> replacement for <random source> % <upper bound>. I think it would be
> wise to follow an established API and avoid off by one errors when
> refactoring existing code. Similarly it doesn't offer a lower bound
> option since that's trivial to implement and difficult to get wrong.
Instead you get off-by-one errors in new code because people assume the 
specified limit is included unless they read the documentation closely.
>
>> yes, much of getrandom *could* be implemented in shell code, much less
>> efficiently.  For example my precmd could save the math eval by specifying
>> getrandom -L 1 -U 7.
>>
>> Once you've written the code to access the kernel random source, it seems to
>> make sense to me to make its output as flexible as possible.  I don't get
>> the concern about backwards compatibility in implementing a new builtin
>> unless you know of an external command called getrandom that is in common
>> use, and that's easily fixed by changing it to zgetrandom.
> I disagree on making the output as flexible as possible because there
> are already features in zsh to get those formats and the API would need
> to be maintained into future releases even if seldom used. I think it
> would be wise to start with the minimum interface necessary and then
> build off it as uses arise.
>
> Interfacing with libc and the kernel to get random numbers is something
> that can only be done in C and SRANDOM builds off an existing API, so
> I have no opposition to it. I can accept that uniformly distributed
> random integers is a useful and non-trivial task, so probably should be
> included, but would prefer an existing and proven API like
> arc4random_uniform.
>
> I have a patch based off yours that implements just SRANDOM and
> a mathfunc for arc4random_uniform, but haven't yet had time to test it
> on multipl platforms.
Even if we go this way,  I don't think the function should be called 
arc4random_uniform.  It only implements the arc4random algorithm on *BSD.
>
> With just SRANDOM and arc4random_uniform the features of getrandom can
> be implemented in a script. I've taken a quick shot at a proof of
> concept below.
>
>
[snip shell code example]

That's a lot of zsh code, most of which would be difficult for a non-zsh 
expert to write.  So it would need to be provided as a function and then 
you're left with the same support problem, but a slower implementation 
that keeps converting strings to numbers and back again.

getrandom doesn't need advanced zsh programming knowledge to use.  The 
-U and -L make it easy for a relative novice. arc4random_uniform is a 
programmer's interface, not a user's interface.


You also ignored the zrandom function which is definitely non-trivial to 
implement in the shell.  It's not even trivial to implement in C.



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-23  2:58                     ` Clinton Bunch
@ 2022-11-23  4:14                       ` Matthew Martin
  2022-11-23 13:41                         ` Clinton Bunch
  0 siblings, 1 reply; 46+ messages in thread
From: Matthew Martin @ 2022-11-23  4:14 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

On Tue, Nov 22, 2022 at 08:58:30PM -0600, Clinton Bunch wrote:
> On 11/22/2022 7:23 PM, Matthew Martin wrote:
> > On Sun, Nov 20, 2022 at 08:57:25PM -0600, Clinton Bunch wrote:
> > > On 11/20/2022 8:21 PM, Matthew Martin wrote:
> > > > If a uniform random function is desired in zsh, I think it should mirror
> > > > the interface of arc4random_uniform: just take an upper_bound and return
> > > > a value in the range [0, upper_bound).
> > > The original implementation did exclude the upper bound, inadvertently, but
> > > after discussion it seemed that was counter-intuitive and something easily
> > > missed in a cursory glance at the documentation.  Adding a lower bound was
> > > easy enough and saves extra shell code even if it's not likely to be used as
> > > much.
> > arc4random_uniform excludes the upper bound so it is a direct
> > replacement for <random source> % <upper bound>. I think it would be
> > wise to follow an established API and avoid off by one errors when
> > refactoring existing code. Similarly it doesn't offer a lower bound
> > option since that's trivial to implement and difficult to get wrong.
> Instead you get off-by-one errors in new code because people assume the
> specified limit is included unless they read the documentation closely.

I think exclusive would be more useful. A common use case would be
having an array and wanting to choose a random element. If exclusive the
code is
    choices=(red green blue); print ${choices[arc4random_uniform($#choices) + 1]}
if inclusive an extra -1 is required.

> > > yes, much of getrandom *could* be implemented in shell code, much less
> > > efficiently.  For example my precmd could save the math eval by specifying
> > > getrandom -L 1 -U 7.
> > > 
> > > Once you've written the code to access the kernel random source, it seems to
> > > make sense to me to make its output as flexible as possible.  I don't get
> > > the concern about backwards compatibility in implementing a new builtin
> > > unless you know of an external command called getrandom that is in common
> > > use, and that's easily fixed by changing it to zgetrandom.
> > I disagree on making the output as flexible as possible because there
> > are already features in zsh to get those formats and the API would need
> > to be maintained into future releases even if seldom used. I think it
> > would be wise to start with the minimum interface necessary and then
> > build off it as uses arise.
> > 
> > Interfacing with libc and the kernel to get random numbers is something
> > that can only be done in C and SRANDOM builds off an existing API, so
> > I have no opposition to it. I can accept that uniformly distributed
> > random integers is a useful and non-trivial task, so probably should be
> > included, but would prefer an existing and proven API like
> > arc4random_uniform.
> > 
> > I have a patch based off yours that implements just SRANDOM and
> > a mathfunc for arc4random_uniform, but haven't yet had time to test it
> > on multipl platforms.
> Even if we go this way,  I don't think the function should be called
> arc4random_uniform.  It only implements the arc4random algorithm on *BSD.
> > 
> > With just SRANDOM and arc4random_uniform the features of getrandom can
> > be implemented in a script. I've taken a quick shot at a proof of
> > concept below.
> > 
> > 
> [snip shell code example]
> 
> That's a lot of zsh code, most of which would be difficult for a non-zsh
> expert to write.  So it would need to be provided as a function and then
> you're left with the same support problem, but a slower implementation that
> keeps converting strings to numbers and back again.

It's a lot of code because it's doing a lot. Most users would need just
a subset to do their particular task. I'm not suggesting adding the
script to zsh; it was just to show SRANDOM and arc4random_uniform are
sufficient.

> getrandom doesn't need advanced zsh programming knowledge to use.  The -U
> and -L make it easy for a relative novice. arc4random_uniform is a
> programmer's interface, not a user's interface.

I don't agree with this argument. Addition to establish a lower bound is
trivial and the subtraction to determine the range is not much harder.
Further since getrandom is a builtin not a mathfunc it's more cumbersome
to use in the array choice context.

> You also ignored the zrandom function which is definitely non-trivial to
> implement in the shell.  It's not even trivial to implement in C.

It's not clear what a use case for zrandom would be.


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-23  4:14                       ` Matthew Martin
@ 2022-11-23 13:41                         ` Clinton Bunch
  2022-11-23 20:33                           ` Daniel Shahaf
  0 siblings, 1 reply; 46+ messages in thread
From: Clinton Bunch @ 2022-11-23 13:41 UTC (permalink / raw)
  To: zsh-workers

I don't think we're going to come to an agreement on inclusive vs 
exclusive, and I know you'd hate the idea of both interfaces, so others 
will have to weigh in.

On 11/22/2022 10:14 PM, Matthew Martin wrote:

>> You also ignored the zrandom function which is definitely non-trivial to
>> implement in the shell.  It's not even trivial to implement in C.
> It's not clear what a use case for zrandom would be.
>
It's intended as a replacement for rand48()



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-08  0:18         ` [PATCH] zsh/random module [UPDATED] Clinton Bunch
                             ` (2 preceding siblings ...)
  2022-11-21  1:07           ` [PATCH] zsh/random module [UPDATED] Matthew Martin
@ 2022-11-23 19:46           ` Daniel Shahaf
  2022-11-24  2:58             ` Clinton Bunch
  2022-11-24 14:33             ` Clinton Bunch
  3 siblings, 2 replies; 46+ messages in thread
From: Daniel Shahaf @ 2022-11-23 19:46 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

Clinton Bunch wrote on Mon, Nov 07, 2022 at 18:18:15 -0600:
> diff --git a/Completion/Zsh/Command/_getrandom b/Completion/Zsh/Command/_getrandom
> new file mode 100644
> index 000000000..d97e36918
> --- /dev/null
> +++ b/Completion/Zsh/Command/_getrandom
> @@ -0,0 +1,14 @@
> +#Thans to dana for providing this completion function

This crediting doesn't belong in the source file but in the log message.

> +local lmin=0 lmax=$(( 2 ** 32 - 2 )) umin=1 umax=$(( 2 ** 32 - 1 ))
> +
> +_arguments -s -S : \
> +  '(-r -s)-a+[assign result to specified array parameter]:array parameter:_parameters -g "*array*~*readonly*"' \
> +  '(-a)-s+[assign result to specified scalar parameter]:scalar parameter:_parameters -g "*(integer|scalar)*~*readonly*"' \
> +  '(-r)-i[produce random data as 32-bit unsigned integers]' \
> +  '-c+[specify count of data]: :_numbers -d8 -l1 -m64 -u "bytes or integer elements" "data length"' \
> +  '(-i -L -U)-r[produce random data as raw binary bytes]' \
> +  '(-r)-L+[specify integer lower bound (implies -i)]: :_numbers -d$lmin -l$lmin -m$max "integer lower bound"' \
> +  '(-r)-U+[specify integer upper bound (implies -i)]: :_numbers -d$umax -l$umin -m$max "integer upper bound"'

Suggest to have this state either "inclusive" or "exclusive".

> +++ b/Doc/Zsh/mod_random.yo
> @@ -0,0 +1,67 @@
> +startitem()
> +findex(getrandom)
> +cindex(random data, printing, array)
> +xitem(tt(getrandom) [ tt(-c) var(count) ] [ tt(-r) ] [ tt(-s) var(scalar) ] [tt(-i) ] [ tt(-a) var(array) ] [ tt(-L) var(lower_bound) ] [ tt(-U) var(upper_bound) ])

Never use xitem() without item() after it.

> +NL()Outputs a random hex-encoded string of var(count) bytes by default. NL()
> +
> +
> +startitem()
> +item(tt(-c) var(count)) (
> +Sets the number of returned random data. Defaults to 8, unless -i, -L, or
> +-U is specified without -a, in which case the default is 1. var(count) must be
> +between 1 and 64.
> +)
> +item(tt(-r)) (
> +Returns the raw binary bytes rather than a hex-encoded string.
> +)
> +item(tt(-s) var(scalar)) (
> +Saves the random data in var(scalar) instead of printing it. It is an error to
> +use this with -i, -L, or -U with a count greater than 1.
> +)
> +item(tt(-a) var(array)) (
> +Saves the random data as an array var(array) of var(count) containing a
> +decimal representation of the integers LPAR()tt(-i), tt(-L), or tt(-U)RPAR()
> +or individual bytes.
> +)
> +item(tt(-i)) (
> +Produces var(count) 32-bit unsigned integers.
> +)
> +item(tt(-L) var(lower_bound)) (
> +Outputs var(count) integers between var(lower_bound) and var(upper_bound).
> +Defaults to 0.  var(lower_bound) can not be negative and the maximum value is
> +4,294,967,294.
> +)
> +item(tt(-U) var(upper_bound)) (
> +Outputs var(count) integers between var(lower_bound) and var(upper_bound).
> +Defaults to 4,294,967,295. This is the maximum value of var(upper_bound) and
> +the minimum is 1.
> +)
> +enditem()
> +enditem()
> +

I don't understand this API.

- Magic numbers:

  + Why should -c default to 8 in the "random bytes" case?  Why
    shouldn't it default to some other value, or even be required to be
    specified explicitly by the user?

  + Why should -c0 be an error?  It should be valid to request an array
    of 0 random numbers, just like, say, «yes | head -n 0».

  + Why should -c65 be an error?  (Saw your comment elsethread that you
    imagine that would suffice.  That's not a good reason for this limit.)

  + Why should -L$(( 2**32 - 1)) be an error?  I should be able to request
    a random integer in the range { x | 42 ≤ x ≤ 42 }.  It's perfectly
    well-defined.

- Why should -c's default depend on anything at all?  You mentioned
  downthread you consider making -c1 the default in all cases; that'd be
  better.

- The builtin can generate either integers or bytes.  Most of the API
  (the type of "returned random data" that -c deals with, the default
  number of those, validity of -L/-U, etc.) is split right along this
  line.  Shouldn't this become two separate builtins, then?  One for
  bytes and one for integers?

  (More on this below, at the end of the implementation review.)

- Interdependencies.  This can generate either bytes or unsigned ints,
  and bytes can be emitted either in hex, raw, or in decimal, and either
  to stdout or to a scalar or to an array… but not all of these
  combinations are possible.  For instance, adding «-a» to «getrandom -c 1»
  changes the output encoding from hex to decimal AND the output channel
  from stdout to $reply.  It seems better to avoid coupling things like
  this.

> +subsect(Parameters)
> +
> +startitem()
> +vindex(SRANDOM)
> +item(tt(SRANDOM)) (
> +A random positive 32-bit integer between 0 and 4,294,967,295.  This parameter
> +is read-only.
> +)
> +enditem()
> +
> +subsect(Math Functions)
> +
> +startitem()
> +item(tt(zrandom+LPAR()RPAR())) (

Doesn't this need a findex()?

Do other math functions have index entries?  And if not (systell()
doesn't), shouldn't they?

> +++ b/Src/Modules/random.c
> @@ -0,0 +1,675 @@

There's a whole bunch of orthographic issues in this file (whitespace,
punctuation, spelling, etc.).  I won't point them out now since there
are bigger issues.

> +#include <stdbool.h>

C99-ism.  (zsh is written in C89.)

> +/* buffer to pre-load integers for SRANDOM to lessen the context switches */
> +uint32_t rand_buff[8];

C99-ism (the array element type).

Even in C99, wouldn't this require an explicit #include?  I guess it
works now due to transitive #include's, but I don't know that it's
guaranteed to work everywhere.

> +getrandom_buffer(void *buf, size_t len)
> +{
> +    do {
> +#ifdef HAVE_ARC4RANDOM
> +	/* Apparently, the creaters of arc4random didn't believe in errors */

Leave out the /ad homines/.

FWIW, I would sooner guess arc4random() was originally written on
a platform where it /couldn't/ error.

> +	arc4random_buf(buf,len);
> +	ret = len;
> +#elif defined(HAVE_GETRANDOM)
> +	ret=getrandom(bufptr,(len - val),0);
> +#else
> +	ret=read(randfd,bufptr,(len - val));

The premise of the module is to offer an interface to high-quality
random numbers.  Is /dev/urandom, which is the module falls back to
here, necessarily of sufficient quality?  If not, shouldn't there be
a way for the module's user to determine the source of randomness?

> +#endif
> +	if (ret < 0) {
> +	    if (errno != EINTR || errflag || retflag || breaks || contflag) {
> +		zwarn("Unable to get random data: %e.", errno);

Need to set «errno = 0» first, surely?

> +/*
> + * Implements the getrandom builtin to return a string of random bytes of
> + * user-specified length.  It either prints them to stdout or returns them
> + * in a parameter passed on the command line.
> + *
> + */
> +
> +/**/
> +static int
> +bin_getrandom(char *name, char **argv, Options ops, int func)
> +{
> +    bool integer_out = false;	 /*  boolean */

The target audience for comments knows C.  So, nuke this comment.

You might define inline enums here (for ints/bytes, scalar/array/stdout,
and decimal/hex/raw) to make the code more self-documenting.

Incidentally, this would also immediately raise the question of why some
combinations of these aren't valid (e.g., hex or raw bytes output to an
array).

> +    int i, j;		         /* loop counters */
> +
> +    /*maximum string length of a 32-bit integer + null */
> +    char int2str[11];

Have the comment point out that this doesn't include a sign byte?

FWIW, we have a 64-bit version of this in DIGBUFSIZE.

> +    /* Vailues for -U *nd -L */
> +    zulong   upper = UINT32_MAX, lower = 0;
> +    uint32_t diff  = UINT32_MAX;
> +    uint32_t min   = 0, rej = 0;  /* Reject below min and count */

«rej» is never read.

> +    bool     bound = false;       /* set if upper or lower bound specified */

Why is this variable needed?  Having it at all means that passing «-L 0
-U $((2**32-1))» is different to not passing any -L or -U option at all,
even though these values are ostensibly the defaults.

> +    if (OPT_ISSET(ops, 'U')) {
> +	if (!zstrtoul_underscore(OPT_ARG(ops, 'U'), &upper)) {
> +	    zwarnnam(name, "Couldn't convert -U %s to a number.",
> +		    OPT_ARG(ops, 'U'));
> +	    return 1;
> +	} else if (upper > UINT32_MAX || upper < 1) {
> +	    zwarnnam(name,
> +		    "Upper bound must be between 1 and %z you specified: %z.",
> +		     (zlong) UINT32_MAX, (zlong) upper);

If «upper > ZLONG_MAX», the downcast to zlong would result in
a negative number.

> +	    return 1;
> +    if (bound) {
> +	if (lower > upper) {
> +	    zwarnnam(name, "-U must be larger than -L.");
> +	    return 1;
> +	}
> +
> +	diff = upper == UINT32_MAX && lower == 0 ? UINT32_MAX :
> +		upper - lower + 1;

How can this possibly be right?  As written it means the meaning of
«diff» with the default values of «upper»/«lower» is different to its
meaning in literally every other case.

Ah, I see.  Without special-casing the default values, explicitly
passing «-L 0 -U $((2**32 - 1))» would incorrectly trigger the
zwarnnam() just below, because «diff» is a uint32_t.  Thus, the
special-casing is an incorrect fix to a real issue.

> +	if(!diff) {
> +	    zwarnnam(name, "Upper and Lower bounds cannot be the same.");
> +	    return 1;
> +	}

This warning will never be printed.  [Why?  Because «diff» is either
UINT32_MAX, which is true, or «(uint32_t)(zulong)(upper - lower + 1)»,
which, taking into account the range checks on «upper» and «lower», can
only work out to 0 if «upper == UINT32_MAX && lower == 0»… which is
precisely the case that's special-cased in the assignment to «diff» just
above.]

I suppose it was intended to be printed for «getrandom -L 42 -U 42».  In
practice, that incantation just prints 42.  As I mentioned in the API
review, I think that's desired behaviour.  In either case, please add
a test.

> +    }
> +
> +    if(!OPT_ISSET(ops,'c'))
> +	if ((!bound && !integer_out) || arrname)
> +	len = 8;

So one default value of -c is here and the other is up in the
declaration of «len».  That's somewhat confusing.

> +
> +
> +    if (!byte_len)
> +	byte_len = len;

The condition is always true.

Didn't you get a compiler warning for this?

> +
> +
> +    if (bound) {
> +	pad = len / 16 + 1;
> +	byte_len = (len + pad) * sizeof(uint32_t);

«pad» should be declared in this scope since it's not used elsewhere.
Ditto «outbuff» below and others.

Why «len / 16 + 1»?

> +    } else if (integer_out)
> +	byte_len = len * sizeof(uint32_t);
> +
> +    if (byte_len > 256)
> +	byte_len=256;
> +

Why not report an error to the user?

>
> +    min = -diff % diff;

See above about the special casing in the assignment to «diff».  I guess
it affects this line.

>
> +	sprintf(int2str, "%u", int_val);

Hold on.  %u expects an «unsigned int» and «int_val» is a «uint32_t».
So, this line is only correct when «unsigned int» happens to be a 32-bit
type… while there's nothing stopping that type from being a 64-bit type,
or even a 16-bit type, and in either case this line would read the wrong
number of bytes off the varargs.

> +	if (arrname) {
> +	    array[i]=ztrdup(int2str);
> +	} else if (scalar && len == 1) {
> +	    setiparam(scalar, int_val);
> +	} else {
> +	    printf("%s ",int2str);
> +	}
> +    }
> +    if (arrname) {
> +	setaparam(arrname,array);
> +    } else if (!scalar) {
> +	printf("\n");
> +    }
> +
> +
> +    zfree(buf, byte_len);
> +    return 0;
> +}
> +

In the API docs review I proposed to split «getrandom» into two separate
builtins.

Having read this function, I reiterate that proposal.  This function,
with all the if/then cases that try to keep track of which of the
function's different modes of operation was requested, is difficult to
read and to modify.

> +
> +/*
> + * Provides for the SRANDOM parameter and returns an unsigned 32-bit random
> + * integer.
> + */
> +
> +/**/
> +static zlong
> +get_srandom(UNUSED(Param pm)) {
> +
> +    if(buf_cnt < 0) {
> +	getrandom_buffer((void*) rand_buff,sizeof(rand_buff));
> +	buf_cnt=7;

Magic number.  You might want «sizeof(rand_buff) / sizeof(rand_buff[0]) - 1»…

…but it'd be more idiomatic to leave that «- 1» out and then change the
postfix decrement to a prefix decrement.  That would also be more
consistent with the variable's name.  (As written, «buf_cnt == 0» means
there's one more zlong in it; it's not a "count" but a "largest valid
index".)

> +    }
> +    return rand_buff[buf_cnt--];
> +}
> +
> +/*
> + * Implements the mathfunc zrandom and returns a random floating-point
> + * number between 0 and 1.  Does this by getting a random 32-bt integer
> + * and dividing it by UINT32_MAX.
> + *

Presumably this means to guarantee a /uniformly-distributed/ random
floating-point number in the given range.  So, why does "getting
a random 32-bit integer and dividing it by UINT32_MAX" actually
implement that guarantee?

Oh, wait, never mind.  The comment is out of date.  Fix it.

> + */
> +
> +/**/
> +static mnumber
> +math_zrandom(UNUSED(char *name), UNUSED(int argc), UNUSED(mnumber *argv),
> +	     UNUSED(int id))
> +{
> +    mnumber ret;
> +    double r;
> +
> +    r = random_real();
> +    if (r < 0) {
> +	zwarnnam(name, "Failed to get sufficient random data.");

Say why.  (Probably via errno?)

Rule of thumb: every error message should include at least one replaceable.

(Further instances of this elsewhere in the file.)

> +/**/
> +int
> +setup_(UNUSED(Module m))
> +{
> +#ifdef USE_URANDOM
> +    /* Check for the existence of /dev/urandom */
> +
> +    struct stat st;
> +
> +    if (lstat("/dev/urandom",&st) < 0) {

Why not stat()?

> +	zwarn("No kernel random pool found.");
> +	return 1;
> +    }
> +
> +    if (!(S_ISCHR(st.st_mode)) ) {
> +	zwarn("No kernel random pool found.");
> +	return 1;
> +    }
> +#endif /* USE_URANDOM */
> +    return 0;
> +}

> +/**/
> +int
> +_zclz64(uint64_t x) {
> +    if (!(x & 0xFFFFFFFF00000000)) {

Don't you need ZLONG_CONST() here?

> +/**/
> +uint64_t
> +random64(void) {
> +    uint64_t r;
> +
> +    if(getrandom_buffer(&r,sizeof(r)) < 0) {
> +	zwarn("zsh/random: Can't get sufficient random data.");
> +	return 1; /* 0 will cause loop */

Where is the docstring of random64() itself?  /That/ is what determines
whether or not returning 0 would be correct.

What loops and why?

Also, I suspect this name will collide with a third-party function one
day.  Recommending to rename.

> +    }
> +
> +    return r;
> +}
> +
> +/* Following code is under the below copyright, despite changes for error
> + * handling and removing GCCisms */
> +

-1.  Don't say "Following code"; identify the code explicitly, or better
yet, move it to its own *.c file.  Otherwise, any function we might
append to this file would be taken to be covered by this copyright
notice and license statement.

> +/*
> + * random_real: Generate a stream of bits uniformly at random and
> + * interpret it as the fractional part of the binary expansion of a
> + * number in [0, 1], 0.00001010011111010100...; then round it.
> + */
> +
> +/**/
> +double

Need «static».

(Further instances of this elsewhere in the file.)

> +random_real(void)
> +{
> +}
> +++ b/Test/V15random.ztst
> @@ -0,0 +1,51 @@
> +# Tests for the zsh/random module
> +%test
> +  getrandom -U 56
> +1f:Checking if system has a kernel random source
> +?(eval):1: No kernel random pool found.
> +

The test point's code has nothing to do with the test point's
description and expected errput.

The 'f' flag is why this test point's failure doesn't get reported.

> +  getrandom -U 64
> +0:Checking upper Bound is observed
> +*><0-64> 
> +
> +  tmpmsg="failed"
> +  getrandom -L 4 -U 5 -c 16 -a tmpa
> +  for i in ${tmpa[@]}
> +  do
> +    if (( i == 5 )); then
> +      tmpmsg="success"
> +      break
> +    fi
> +  done

Use ${tmpa[(I)5]} ?

> +  echo $tmpmsg
> +0:Checking that upper bound is inclusive
> +F:Try running the test again: make TESTNUM=V15 check

Baby and bathwater.  Say what the problem is first and what the
recommended fix is second.  In this case, something along the lines
of "This test WILL false positive once every 65536 runs on average.  To
rule this out, run the test again."

Oh, and bump that 16 to something 3 or 4 times as big, because a 1/65536
chance isn't really enough in a world where automated builds (CI,
distros' QA, etc.) is a thing.

> +>success
> +
> +  echo $(( zrandom() ))
> +0:Checking if zrandom() produces a floating-point number between 0 and 1
> +*>0(|.)[[:digit:]]#

Why is the period optional?  Something like "042" shouldn't be accepted.

> diff --git a/configure.ac b/configure.ac
> index 074141d38..f1fa01274 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -675,6 +675,7 @@ fi
>  AC_CHECK_HEADERS(sys/time.h sys/times.h sys/select.h termcap.h termio.h \
>  		 termios.h sys/param.h sys/filio.h string.h memory.h \
>  		 limits.h fcntl.h libc.h sys/utsname.h sys/resource.h \
> +                 sys/random.h \

Tabs/spaces are wrong.  Also in the next hunk.

>  		 locale.h errno.h stdio.h stdarg.h varargs.h stdlib.h \
>  		 unistd.h sys/capability.h \
>  		 utmp.h utmpx.h sys/types.h pwd.h grp.h poll.h sys/mman.h \
> @@ -1337,6 +1338,7 @@ AC_CHECK_FUNCS(strftime strptime mktime timelocal \
>  	       cygwin_conv_path \
>  	       nanosleep \
>  	       srand_deterministic \
> +               getrandom arc4random \
>  	       setutxent getutxent endutxent getutent)
>  AC_FUNC_STRCOLL
>  

Bottom line:

- Interfacing to kernel random number APIs seems reasonable enough.

- The API to shell scripts feels unintuitive.

- As the patch stands, it is not ready to be merged (even assuming
  arguendo the API is perfect as-is).

Thanks,

Daniel


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-21  5:05                       ` Clinton Bunch
  2022-11-22 13:42                         ` dana
@ 2022-11-23 19:49                         ` Daniel Shahaf
  1 sibling, 0 replies; 46+ messages in thread
From: Daniel Shahaf @ 2022-11-23 19:49 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

Clinton Bunch wrote on Sun, Nov 20, 2022 at 23:05:25 -0600:
> On 11/20/2022 10:17 PM, Bart Schaefer wrote:
> > Incidentally, Clinton, I didn't look closer at the patch before
> > because the use of ".patch" as a name suffix meant I had download it
> > and open it in an external editor instead of just perusing it along
> > with my email.
> > 
> Yeah, after I posted, I saw you make a similar comment to someone else. 
> I'll use .txt next time.  Incidentally this suggestion might be a good
> candidate for inclusion in the developer's guide when pasting into the
> message is not desirable.

+1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-18 18:38                 ` Clinton Bunch
@ 2022-11-23 19:52                   ` Daniel Shahaf
  2022-11-24 16:19                     ` Stephane Chazelas
  0 siblings, 1 reply; 46+ messages in thread
From: Daniel Shahaf @ 2022-11-23 19:52 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

Clinton Bunch wrote on Fri, Nov 18, 2022 at 12:38:26 -0600:
> 
> On 11/18/2022 12:12 PM, Stephane Chazelas wrote:
> > 2022-11-18 11:08:42 -0600, Clinton Bunch:
> > [...]
> > > > Some comments about the API:
> > > > 
> > > > >     -c COUNT
> > > > >          Sets the number of returned random data.  Defaults to 8,
> > > > >          unless -i, -L, or -U is specified without -a, in which case
> > > > >          the default is 1.  COUNT must be between 1 and 64.
> > > > Why those arbitrary limits? Why not from 0 to inf?
> > > the getrandom function only guarantees 256 bytes.  64 32-bit integers.
> > > Allowing 256 uint8 would be possible, but harder to explain and doesn't work
> > > well with bounding.  As the difference between the lower and upper limit
> > > surpasses half the max, the number of bytes thrown away to allow uniformity
> > > of distribution becomes unwieldy.
> > > 
> > > Also zero would not be particularly useful :)
> > You could call getrandom() as many times as necessary. From a
> > user point of view (mine at least) that limit doesn't make
> > sense. Users don't care what API you use underneath.
> > 
> > An array can have from 0 to inf elements, a string 0 to inf
> > bytes, I should be able to request from 0 to inf numbers/bytes to
> > store in my array/string.
> I can't do inf (there is a limit to how big an array can be, the largest
> integer for an index).  I honestly think it's unlikely anyone will need more
> than 64 random integers.  I saw no reason to expand the limit, but it can be
> done.
> > > > Without -i you get uint8 in hex and with -i uint32 in decimal.
> > > > Why the different bases?
> > > It's possible to break a hex string into bytes in a rather straight forward
> > > way.   A string of decimal numbers not so much, but decimal numbers are
> > > easier to manipulate.
> > > 
> > > If I didn't think it should do *something* when given no arguments, I'd
> > > eliminate the hex string altogether.
> > Alternatively, you could add a -f %02x, -f %u -f %c and do
> > without the -r/-i, with default -c 8 -f %02x -L 0 -U 255.
> That seems a lot more confusing than -r (raw) or -i (integer) unless it's
> expanded to a lot more arbitrary formats.  At least for someone not familiar
> with printf formats.

Could have something like this:

    "-f:specify format:(raw hex decimal)"

It's extensible and readable.  (Could easily extend it to support printf
formats, or uppercase/lowercase hex variations, etc..)  Also, it uses
one fewer short option :)


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-23 13:41                         ` Clinton Bunch
@ 2022-11-23 20:33                           ` Daniel Shahaf
  2022-11-23 21:42                             ` dana
  0 siblings, 1 reply; 46+ messages in thread
From: Daniel Shahaf @ 2022-11-23 20:33 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

Clinton Bunch wrote on Wed, Nov 23, 2022 at 07:41:40 -0600:
> I don't think we're going to come to an agreement on inclusive vs exclusive,
> and I know you'd hate the idea of both interfaces, so others will have to
> weigh in.

Some considerations:

1. What existing API, within or without zsh, should the new
builtin/mathfunc be consistent with?

2. Is it the case that choice A can be implemented in shell code in
terms of choice B, but not vice versa?  [For instance, if the argument
to -U were a uint64_t and were exclusive, there'd be no way to specify
UINT64_MAX as the upper bound.]

3. Inclusive is necessarily the rule when sampling from a given set of
elements (e.g., in zsh terms, "here's an array name, gimme a random
element from it"; when the array contains, say, {January..December}).
In this case there's no need to involve integers at all.

4. Which approach generalizes better?

---

[Please consider these questions for a second before reading on; my
proposed answer(s) are below.]

---

For #1, we might want to be consistent with the subscript slice syntax.
$foo[4,5] selects $foo[4] and $foo[5] both.  Similarly {4..5} includes
both endpoints.

Also, I could be wrong here, but isn't it the case that exclusiveness
goes along with the lower bound being hard-coded as zero?  That's how it
is in modular arithmetic, in arc4random_uniform(), and in Perl, and even
in C's idiom for iterating a dynamically-allocated array («malloc(42 * …)»,
followed by «for (i = 0; i < 42; …)»).  Python has both random.randint(),
which is inclusive, and random.randrange(), which is exclusive — but the
latter mirrors Python's array slice syntax, and that syntax has
a special-case form for "All elements from the Nth through the last one,
inclusive".

I think I'm leaning towards inclusive if -L and -U can both be specified
and towards exclusive if -L is hard-coded as 0, but I'm happy to be
convinced otherwise.

HTH,

Daniel


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-23 20:33                           ` Daniel Shahaf
@ 2022-11-23 21:42                             ` dana
  2022-11-23 23:54                               ` Daniel Shahaf
  2022-11-24 13:52                               ` Clinton Bunch
  0 siblings, 2 replies; 46+ messages in thread
From: dana @ 2022-11-23 21:42 UTC (permalink / raw)
  To: Daniel Shahaf, Clinton Bunch; +Cc: Zsh hackers list

On Mon 7 Nov 2022, at 18:18, Clinton Bunch wrote:
> '(-r)-L+[specify integer lower bound (implies -i)]: :_numbers -d$lmin -l$lmin -m$max "integer lower bound"' \
> '(-r)-U+[specify integer upper bound (implies -i)]: :_numbers -d$umax -l$umin -m$max "integer upper bound"'

Was going to mention in my draft reply that the $max references here are
broken now

On Wed 23 Nov 2022, at 13:46, Daniel Shahaf wrote:
>   + Why should -c default to 8 in the "random bytes" case?  Why
>     shouldn't it default to some other value, or even be required to be
>     specified explicitly by the user? ...
> - Why should -c's default depend on anything at all?  You mentioned
>   downthread you consider making -c1 the default in all cases; that'd be
>   better.

The defaults with this API are kind of weird, because if you make them
dependent on the format (e.g. 8 for hex and 1 for everything else) it's kind
of arbitrary, but if you keep them all the same (e.g. 1 or 8 for everything)
they aren't generally useful — i think it's safe to assume that 'i would like
exactly 1 random hex digit' is not going to be the most common use case

Requiring the user to explicitly specify it would address that, though you
could say then that it goes the other way, e.g. again it's probably safe to
assume that 90% of the time you're only going to want one integer value, and
making people write that out every time, whilst expected in a lower-level API
like a C function, is maybe annoying in a convenience shell built-in

But annoying is probably better than confusing, if those are the options

On Wed 23 Nov 2022, at 13:46, Daniel Shahaf wrote:
>> +%test
>> +  getrandom -U 56
>> +1f:Checking if system has a kernel random source
>> +?(eval):1: No kernel random pool found.
>> +
>
> The test point's code has nothing to do with the test point's
> description and expected errput.

The assertion is backwards but i guess this is just testing to see if the
module works on the system. But should that even be a test? Shouldn't there
instead be a %prep that just leaves the whole file unimplemented in this case?
Unless we're expecting the module to be both available and usable on every zsh
build

On Wed 23 Nov 2022, at 13:46, Daniel Shahaf wrote:
> Oh, and bump that 16 to something 3 or 4 times as big, because a 1/65536
> chance isn't really enough in a world where automated builds (CI,
> distros' QA, etc.) is a thing.

I feel like it should be very nearly impossible for a test to fail just for
randomness reasons. Maybe it's over-kill but in my draft reply to the patch i
was going to suggest something like this:

  () {
    repeat $(( 10 ** 5 )); do
      getrandom -L4 -U5 -c64 -a tmpa
      [[ $tmpa[(r)5] == 5 ]] && return 0
    done
    return 1
  }

On Wed 23 Nov 2022, at 13:52, Daniel Shahaf wrote:
> Could have something like this:
>
>     "-f:specify format:(raw hex decimal)"

I kind of like that

On Wed 23 Nov 2022, at 14:33, Daniel Shahaf wrote:
> 4. Which approach generalizes better?

If we were to directly expose an underlying C API as in Matthew's proposed
arc4random_uniform(), i think it would make sense to remain consistent with
that API. But in any other case i would prefer inclusive, and i think that
fits nicely with other things in zsh as you said

dana


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-23 21:42                             ` dana
@ 2022-11-23 23:54                               ` Daniel Shahaf
  2022-11-24  0:17                                 ` Daniel Shahaf
  2022-11-24  1:05                                 ` dana
  2022-11-24 13:52                               ` Clinton Bunch
  1 sibling, 2 replies; 46+ messages in thread
From: Daniel Shahaf @ 2022-11-23 23:54 UTC (permalink / raw)
  To: Zsh hackers list; +Cc: Clinton Bunch

dana wrote on Wed, 23 Nov 2022 21:42 +00:00:
> On Wed 23 Nov 2022, at 13:46, Daniel Shahaf wrote:
>>   + Why should -c default to 8 in the "random bytes" case?  Why
>>     shouldn't it default to some other value, or even be required to be
>>     specified explicitly by the user? ...
>> - Why should -c's default depend on anything at all?  You mentioned
>>   downthread you consider making -c1 the default in all cases; that'd be
>>   better.
>
> The defaults with this API are kind of weird, because if you make them
> dependent on the format (e.g. 8 for hex and 1 for everything else) it's kind
> of arbitrary, but if you keep them all the same (e.g. 1 or 8 for everything)
> they aren't generally useful — i think it's safe to assume that 'i would like
> exactly 1 random hex digit' is not going to be the most common use case
>

Well, agreed on that last sentence, but note that «-c 1» in the patch
means one byte, not one nibble.

> Requiring the user to explicitly specify it would address that, though you
> could say then that it goes the other way, e.g. again it's probably safe to
> assume that 90% of the time you're only going to want one integer value, and
> making people write that out every time, whilst expected in a lower-level API
> like a C function, is maybe annoying in a convenience shell built-in
>

But 1 /is/ the default for integer mode, and I don't think anyone
proposed to change that?  Rather, it was proposed to change the default
for bytes mode from 4 bytes (8 nibbles) to 1 byte.  Do you reckon requesting 4 bytes
should be the default for that mode, as opposed to, say, 1, 2, 8, or 64 bytes?

> But annoying is probably better than confusing, if those are the options
>

Heh :)

> On Wed 23 Nov 2022, at 13:46, Daniel Shahaf wrote:
>> Oh, and bump that 16 to something 3 or 4 times as big, because a 1/65536
>> chance isn't really enough in a world where automated builds (CI,
>> distros' QA, etc.) is a thing.
>
> I feel like it should be very nearly impossible for a test to fail just for
> randomness reasons. Maybe it's over-kill but in my draft reply to the patch i
> was going to suggest something like this:
>
>   () {
>     repeat $(( 10 ** 5 )); do
>       getrandom -L4 -U5 -c64 -a tmpa
>       [[ $tmpa[(r)5] == 5 ]] && return 0
>     done
>     return 1
>   }
>

No maybe about it :)

With these parameters, the probability of a false positive is 2 to the
power of minus the overall number of iterations, i.e., 2**(-6.4 million),
which is 1/[a number that has 1.9M decimal digits].

To be clear, it's not 1/1.9M, which is about the probability of a random
Londoner being at 10 Downing Street right now.  It's 1/[10 ** 1.9M],
which is about the probability of correctly guessing the genders of all
Londoners.

If you converted the entire Earth's mass to CPUs and ran «getrandom -L4
-U5 -c64» on it repeatedly until Sol died, and the CPUs all operated at
4GHz, and there were no bugs in anything, the chance of getting a single
run to not return a 5 would still be something like a billion to one
(give or take several zeroes depending on CPU mass, the argument to -c,
and so on).

That's why in practice, if a single -c64 call ever doesn't return a 5,
it's safe to assume there's a bug.

Conversely, if you actually retain those 6.4 million iterations, what's
the probability that the outer loop will return 0 on the first iteration
and then a gamma ray will flip that to 0?


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-23 23:54                               ` Daniel Shahaf
@ 2022-11-24  0:17                                 ` Daniel Shahaf
  2022-11-24  1:05                                 ` dana
  1 sibling, 0 replies; 46+ messages in thread
From: Daniel Shahaf @ 2022-11-24  0:17 UTC (permalink / raw)
  To: zsh-workers

Daniel Shahaf wrote on Wed, 23 Nov 2022 23:54 +00:00:
> dana wrote on Wed, 23 Nov 2022 21:42 +00:00:
>> On Wed 23 Nov 2022, at 13:46, Daniel Shahaf wrote:
>>> Oh, and bump that 16 to something 3 or 4 times as big, because a 1/65536
>>> chance isn't really enough in a world where automated builds (CI,
>>> distros' QA, etc.) is a thing.
>>
>> I feel like it should be very nearly impossible for a test to fail just for
>> randomness reasons. Maybe it's over-kill but in my draft reply to the patch i
>> was going to suggest something like this:
>>
>>   () {
>>     repeat $(( 10 ** 5 )); do
>>       getrandom -L4 -U5 -c64 -a tmpa
>>       [[ $tmpa[(r)5] == 5 ]] && return 0
>>     done
>>     return 1
>>   }
>>
>
> No maybe about it :)
>
> With these parameters, the probability of a false positive is 2 to the
> power of minus the overall number of iterations, i.e., 2**(-6.4 million),
> which is 1/[a number that has 1.9M decimal digits].
>
> To be clear, it's not 1/1.9M, which is about the probability of a random
> Londoner being at 10 Downing Street right now.  It's 1/[10 ** 1.9M],
> which is about the probability of correctly guessing the genders of all
> Londoners.

To be clear, I don't mean guessing the gender /ratio/.  I mean filling
a spreadsheet with one row for each Londoner and having to guess each and
every row correctly, without any information about the particular Londoner.

> If you converted the entire Earth's mass to CPUs and ran «getrandom -L4
> -U5 -c64» on it repeatedly until Sol died, and the CPUs all operated at
> 4GHz, and there were no bugs in anything, the chance of getting a single
> run to not return a 5 would still be something like a billion to one
> (give or take several zeroes depending on CPU mass, the argument to -c,
> and so on).
>
> That's why in practice, if a single -c64 call ever doesn't return a 5,
> it's safe to assume there's a bug.
>
> Conversely, if you actually retain those 6.4 million iterations, what's
> the probability that the outer loop will return 0 on the first iteration
> and then a gamma ray will flip that to 0?


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-23 23:54                               ` Daniel Shahaf
  2022-11-24  0:17                                 ` Daniel Shahaf
@ 2022-11-24  1:05                                 ` dana
  1 sibling, 0 replies; 46+ messages in thread
From: dana @ 2022-11-24  1:05 UTC (permalink / raw)
  To: Daniel Shahaf; +Cc: Zsh hackers list

On Wed 23 Nov 2022, at 17:54, Daniel Shahaf wrote:
> Well, agreed on that last sentence, but note that «-c 1» in the patch
> means one byte, not one nibble.

Right, i misspoke

On Wed 23 Nov 2022, at 17:54, Daniel Shahaf wrote:
> But 1 /is/ the default for integer mode, and I don't think anyone
> proposed to change that?  Rather, it was proposed to change the default
> for bytes mode from 4 bytes (8 nibbles) to 1 byte.  Do you reckon
> requesting 4 bytes
> should be the default for that mode, as opposed to, say, 1, 2, 8, or 64
> bytes?

The default for all modes was initially 8, so i was thinking of that as a
counter-proposal to them all being 1 as mentioned in the previous paragraph. I
think that no matter which number you choose, if you make it the same for all
modes, there's going to be at least one mode where it does something that is
rarely (or less) useful, like returning a single byte in raw or hex mode. But
i don't know how you'd quantify the usefulness of any particular alternative,
so i guess i'm just thinking out loud

On Wed 23 Nov 2022, at 17:54, Daniel Shahaf wrote:
> To be clear, it's not 1/1.9M, which is about the probability of a random
> Londoner being at 10 Downing Street right now.  It's 1/[10 ** 1.9M],
> which is about the probability of correctly guessing the genders of all
> Londoners.

That sounds very nearly impossible to me

dana


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-23 19:46           ` Daniel Shahaf
@ 2022-11-24  2:58             ` Clinton Bunch
  2022-11-24 10:07               ` nimaje+zml
  2022-11-24 14:33             ` Clinton Bunch
  1 sibling, 1 reply; 46+ messages in thread
From: Clinton Bunch @ 2022-11-24  2:58 UTC (permalink / raw)
  To: zsh-workers

This is a from memory reply as I'm away from my git repository/code for 
the holiday. :)  So there may be a follow up.

On 11/23/2022 1:46 PM, Daniel Shahaf wrote:
> Clinton Bunch wrote on Mon, Nov 07, 2022 at 18:18:15 -0600:
> +++ b/Doc/Zsh/mod_random.yo
>> @@ -0,0 +1,67 @@
>> +startitem()
>> +findex(getrandom)
>> +cindex(random data, printing, array)
>> +xitem(tt(getrandom) [ tt(-c) var(count) ] [ tt(-r) ] [ tt(-s) var(scalar) ] [tt(-i) ] [ tt(-a) var(array) ] [ tt(-L) var(lower_bound) ] [ tt(-U) var(upper_bound) ])
> Never use xitem() without item() after it.
I'm using existing docs as a template as I don't know YODL at all, so 
I'm not surprised I made mistakes.
>
> I don't understand this API.
>
> - Magic numbers:
>
>    + Why should -c default to 8 in the "random bytes" case?  Why
>      shouldn't it default to some other value, or even be required to be
>      specified explicitly by the user?
Not sure why 8 was the number I thought of, just that one wasn't 
particularly useful.
>
>    + Why should -c0 be an error?  It should be valid to request an array
>      of 0 random numbers, just like, say, «yes | head -n 0».
I thought nonsensical options must be an error.
>
>    + Why should -c65 be an error?  (Saw your comment elsethread that you
>      imagine that would suffice.  That's not a good reason for this limit.)
It started as a 256 byte limit from getrandom().
>
>    + Why should -L$(( 2**32 - 1)) be an error?  I should be able to request
>      a random integer in the range { x | 42 ≤ x ≤ 42 }.  It's perfectly
>      well-defined.
if the difference between lower and upper is zero, you get a divide by 
zero error unless you special case it, and it never occurred to me that 
someone would specify that case deliberately
>
> - Why should -c's default depend on anything at all?  You mentioned
>    downthread you consider making -c1 the default in all cases; that'd be
>    better.
>
> - The builtin can generate either integers or bytes.  Most of the API
>    (the type of "returned random data" that -c deals with, the default
>    number of those, validity of -L/-U, etc.) is split right along this
>    line.  Shouldn't this become two separate builtins, then?  One for
>    bytes and one for integers?
>
>    (More on this below, at the end of the implementation review.)
>
> - Interdependencies.  This can generate either bytes or unsigned ints,
>    and bytes can be emitted either in hex, raw, or in decimal, and either
>    to stdout or to a scalar or to an array… but not all of these
>    combinations are possible.  For instance, adding «-a» to «getrandom -c 1»
>    changes the output encoding from hex to decimal AND the output channel
>    from stdout to $reply.  It seems better to avoid coupling things like
>    this.
>
>> +subsect(Parameters)
>> +
>> +startitem()
>> +vindex(SRANDOM)
>> +item(tt(SRANDOM)) (
>> +A random positive 32-bit integer between 0 and 4,294,967,295.  This parameter
>> +is read-only.
>> +)
>> +enditem()
>> +
>> +subsect(Math Functions)
>> +
>> +startitem()
>> +item(tt(zrandom+LPAR()RPAR())) (
> Doesn't this need a findex()?
>
> Do other math functions have index entries?  And if not (systell()
> doesn't), shouldn't they?
>
>> +++ b/Src/Modules/random.c
>> @@ -0,0 +1,675 @@
> There's a whole bunch of orthographic issues in this file (whitespace,
> punctuation, spelling, etc.).  I won't point them out now since there
> are bigger issues.
>
>> +#include <stdbool.h>
> C99-ism.  (zsh is written in C89.)
>
>> +/* buffer to pre-load integers for SRANDOM to lessen the context switches */
>> +uint32_t rand_buff[8];
> C99-ism (the array element type).

I asked about POSIX standards as the dev guide seemed badly out of date 
in workers/50819

Got this response in workers/50847


On Tue, Oct 25, 2022 at 6:52 PM Clinton Bunch <cdb_zsh@zentaur.org> wrote:
 >
 > My personal opinion is that development should use at least the
 > POSIX-1.2001 standard.  It's 20 years old.  That's surely old enough for
 > any system still running.  (It's  certainly old enough that any such
 > system is not supported by the vendor)

OTOH any system not supported by the vendor is exactly one where
somebody might be trying to build their own zsh binary.

That said, I thought we standardized on c99 rather than c89 quite some 
time ago.

>
> Even in C99, wouldn't this require an explicit #include?  I guess it
> works now due to transitive #include's, but I don't know that it's
> guaranteed to work everywhere.
>
>> +getrandom_buffer(void *buf, size_t len)
>> +{
> ⋮
>> +    do {
>> +#ifdef HAVE_ARC4RANDOM
>> +	/* Apparently, the creaters of arc4random didn't believe in errors */
> Leave out the /ad homines/.
>
> FWIW, I would sooner guess arc4random() was originally written on
> a platform where it /couldn't/ error.
>
>> +	arc4random_buf(buf,len);
>> +	ret = len;
>> +#elif defined(HAVE_GETRANDOM)
>> +	ret=getrandom(bufptr,(len - val),0);
>> +#else
>> +	ret=read(randfd,bufptr,(len - val));
> The premise of the module is to offer an interface to high-quality
> random numbers.  Is /dev/urandom, which is the module falls back to
> here, necessarily of sufficient quality?  If not, shouldn't there be
> a way for the module's user to determine the source of randomness?
arc4random and getrandom are just more efficient access to the pool of 
/dev/urandom.   AFAIK it's as good a pseudo-random source as any on any 
platform.
>
>> +#endif
>> +	if (ret < 0) {
>> +	    if (errno != EINTR || errflag || retflag || breaks || contflag) {
>> +		zwarn("Unable to get random data: %e.", errno);
> Need to set «errno = 0» first, surely?
>
>> +/*
>> + * Implements the getrandom builtin to return a string of random bytes of
>> + * user-specified length.  It either prints them to stdout or returns them
>> + * in a parameter passed on the command line.
>> + *
>> + */
>> +
>> +/**/
>> +static int
>> +bin_getrandom(char *name, char **argv, Options ops, int func)
>> +{
> ⋮
>> +    bool integer_out = false;	 /*  boolean */
> The target audience for comments knows C.  So, nuke this comment.
>
> You might define inline enums here (for ints/bytes, scalar/array/stdout,
> and decimal/hex/raw) to make the code more self-documenting.
>
> Incidentally, this would also immediately raise the question of why some
> combinations of these aren't valid (e.g., hex or raw bytes output to an
> array).
>
>> +    int i, j;		         /* loop counters */
>> +
>> +    /*maximum string length of a 32-bit integer + null */
>> +    char int2str[11];
> Have the comment point out that this doesn't include a sign byte?
>
> FWIW, we have a 64-bit version of this in DIGBUFSIZE.
>
>> +    /* Vailues for -U *nd -L */
>> +    zulong   upper = UINT32_MAX, lower = 0;
>> +    uint32_t diff  = UINT32_MAX;
>> +    uint32_t min   = 0, rej = 0;  /* Reject below min and count */
> «rej» is never read.
>
>> +    bool     bound = false;       /* set if upper or lower bound specified */
> Why is this variable needed?  Having it at all means that passing «-L 0
> -U $((2**32-1))» is different to not passing any -L or -U option at all,
> even though these values are ostensibly the defaults.
>
>> +    if (OPT_ISSET(ops, 'U')) {
>> +	if (!zstrtoul_underscore(OPT_ARG(ops, 'U'), &upper)) {
>> +	    zwarnnam(name, "Couldn't convert -U %s to a number.",
>> +		    OPT_ARG(ops, 'U'));
>> +	    return 1;
>> +	} else if (upper > UINT32_MAX || upper < 1) {
>> +	    zwarnnam(name,
>> +		    "Upper bound must be between 1 and %z you specified: %z.",
>> +		     (zlong) UINT32_MAX, (zlong) upper);
> If «upper > ZLONG_MAX», the downcast to zlong would result in
> a negative number.
zwarnnam has no zulong format and upper is supposed to be a 32-bit integer.
>
>> +	    return 1;
> ⋮
>> +    if (bound) {
> ⋮
>> +	if (lower > upper) {
>> +	    zwarnnam(name, "-U must be larger than -L.");
>> +	    return 1;
>> +	}
>> +
>> +	diff = upper == UINT32_MAX && lower == 0 ? UINT32_MAX :
>> +		upper - lower + 1;
> How can this possibly be right?  As written it means the meaning of
> «diff» with the default values of «upper»/«lower» is different to its
> meaning in literally every other case.
>
> Ah, I see.  Without special-casing the default values, explicitly
> passing «-L 0 -U $((2**32 - 1))» would incorrectly trigger the
> zwarnnam() just below, because «diff» is a uint32_t.  Thus, the
> special-casing is an incorrect fix to a real issue.
>
>> +	if(!diff) {
>> +	    zwarnnam(name, "Upper and Lower bounds cannot be the same.");
>> +	    return 1;
>> +	}
> This warning will never be printed.  [Why?  Because «diff» is either
> UINT32_MAX, which is true, or «(uint32_t)(zulong)(upper - lower + 1)»,
> which, taking into account the range checks on «upper» and «lower», can
> only work out to 0 if «upper == UINT32_MAX && lower == 0»… which is
> precisely the case that's special-cased in the assignment to «diff» just
> above.]
>
> I suppose it was intended to be printed for «getrandom -L 42 -U 42».  In
> practice, that incantation just prints 42.  As I mentioned in the API
> review, I think that's desired behaviour.  In either case, please add
> a test.
>
>> +    }
>> +
>> +    if(!OPT_ISSET(ops,'c'))
>> +	if ((!bound && !integer_out) || arrname)
>> +	len = 8;
> So one default value of -c is here and the other is up in the
> declaration of «len».  That's somewhat confusing.
>
>> +
>> +
>> +    if (!byte_len)
>> +	byte_len = len;
> The condition is always true.
>
> Didn't you get a compiler warning for this?
No.  Not in Developer's Studio, clang, gcc, or HP ANSI C.
>
>> +
>> +
>> +    if (bound) {
>> +	pad = len / 16 + 1;
>> +	byte_len = (len + pad) * sizeof(uint32_t);
> «pad» should be declared in this scope since it's not used elsewhere.
> Ditto «outbuff» below and others.
>
> Why «len / 16 + 1»?
I couldn't find any statistical information that would tell me how many 
numbers were likely to be rejected, so I guessed.
>> +    } else if (integer_out)
>> +	byte_len = len * sizeof(uint32_t);
>> +
>> +    if (byte_len > 256)
>> +	byte_len=256;
>> +
> Why not report an error to the user?
>
> ⋮
>> +    min = -diff % diff;
> See above about the special casing in the assignment to «diff».  I guess
> it affects this line.
>
> ⋮
>> +	sprintf(int2str, "%u", int_val);
> Hold on.  %u expects an «unsigned int» and «int_val» is a «uint32_t».
> So, this line is only correct when «unsigned int» happens to be a 32-bit
> type… while there's nothing stopping that type from being a 64-bit type,
> or even a 16-bit type, and in either case this line would read the wrong
> number of bytes off the varargs.
>
>> +	if (arrname) {
>> +	    array[i]=ztrdup(int2str);
>> +	} else if (scalar && len == 1) {
>> +	    setiparam(scalar, int_val);
>> +	} else {
>> +	    printf("%s ",int2str);
>> +	}
>> +    }
>> +    if (arrname) {
>> +	setaparam(arrname,array);
>> +    } else if (!scalar) {
>> +	printf("\n");
>> +    }
>> +
>> +
>> +    zfree(buf, byte_len);
>> +    return 0;
>> +}
>> +
> In the API docs review I proposed to split «getrandom» into two separate
> builtins.
>
> Having read this function, I reiterate that proposal.  This function,
> with all the if/then cases that try to keep track of which of the
> function's different modes of operation was requested, is difficult to
> read and to modify.
>
>> +
>> +/*
>> + * Provides for the SRANDOM parameter and returns an unsigned 32-bit random
>> + * integer.
>> + */
>> +
>> +/**/
>> +static zlong
>> +get_srandom(UNUSED(Param pm)) {
>> +
>> +    if(buf_cnt < 0) {
>> +	getrandom_buffer((void*) rand_buff,sizeof(rand_buff));
>> +	buf_cnt=7;
> Magic number.  You might want «sizeof(rand_buff) / sizeof(rand_buff[0]) - 1»…
>
> …but it'd be more idiomatic to leave that «- 1» out and then change the
> postfix decrement to a prefix decrement.  That would also be more
> consistent with the variable's name.  (As written, «buf_cnt == 0» means
> there's one more zlong in it; it's not a "count" but a "largest valid
> index".)
>
>> +    }
>> +    return rand_buff[buf_cnt--];
>> +}
>> +
>> +/*
>> + * Implements the mathfunc zrandom and returns a random floating-point
>> + * number between 0 and 1.  Does this by getting a random 32-bt integer
>> + * and dividing it by UINT32_MAX.
>> + *
> Presumably this means to guarantee a /uniformly-distributed/ random
> floating-point number in the given range.  So, why does "getting
> a random 32-bit integer and dividing it by UINT32_MAX" actually
> implement that guarantee?
>
> Oh, wait, never mind.  The comment is out of date.  Fix it.
>
>> + */
>> +
>> +/**/
>> +static mnumber
>> +math_zrandom(UNUSED(char *name), UNUSED(int argc), UNUSED(mnumber *argv),
>> +	     UNUSED(int id))
>> +{
>> +    mnumber ret;
>> +    double r;
>> +
>> +    r = random_real();
>> +    if (r < 0) {
>> +	zwarnnam(name, "Failed to get sufficient random data.");
> Say why.  (Probably via errno?)
>
> Rule of thumb: every error message should include at least one replaceable.
>
> (Further instances of this elsewhere in the file.)
>
>> +/**/
>> +int
>> +setup_(UNUSED(Module m))
>> +{
>> +#ifdef USE_URANDOM
>> +    /* Check for the existence of /dev/urandom */
>> +
>> +    struct stat st;
>> +
>> +    if (lstat("/dev/urandom",&st) < 0) {
> Why not stat()?
Is it appropriate for /dev/urandom to be a symlink?
>
>> +	zwarn("No kernel random pool found.");
>> +	return 1;
>> +    }
>> +
>> +    if (!(S_ISCHR(st.st_mode)) ) {
>> +	zwarn("No kernel random pool found.");
>> +	return 1;
>> +    }
>> +#endif /* USE_URANDOM */
>> +    return 0;
>> +}
>> +/**/
>> +int
>> +_zclz64(uint64_t x) {
> ⋮
>> +    if (!(x & 0xFFFFFFFF00000000)) {
> Don't you need ZLONG_CONST() here?
>
>> +/**/
>> +uint64_t
>> +random64(void) {
>> +    uint64_t r;
>> +
>> +    if(getrandom_buffer(&r,sizeof(r)) < 0) {
>> +	zwarn("zsh/random: Can't get sufficient random data.");
>> +	return 1; /* 0 will cause loop */
> Where is the docstring of random64() itself?  /That/ is what determines
> whether or not returning 0 would be correct.
>
> What loops and why?
>
> Also, I suspect this name will collide with a third-party function one
> day.  Recommending to rename.
>
>> +    }
>> +
>> +    return r;
>> +}
>> +
>> +/* Following code is under the below copyright, despite changes for error
>> + * handling and removing GCCisms */
>> +
> -1.  Don't say "Following code"; identify the code explicitly, or better
> yet, move it to its own *.c file.  Otherwise, any function we might
> append to this file would be taken to be covered by this copyright
> notice and license statement.
>
>> +/*
>> + * random_real: Generate a stream of bits uniformly at random and
>> + * interpret it as the fractional part of the binary expansion of a
>> + * number in [0, 1], 0.00001010011111010100...; then round it.
>> + */
>> +
>> +/**/
>> +double
> Need «static».
>
> (Further instances of this elsewhere in the file.)
>
>> +random_real(void)
>> +{
> ⋮
>> +}
>> +++ b/Test/V15random.ztst
>> @@ -0,0 +1,51 @@
>> +# Tests for the zsh/random module
>> +%test
>> +  getrandom -U 56
>> +1f:Checking if system has a kernel random source
>> +?(eval):1: No kernel random pool found.
>> +
> The test point's code has nothing to do with the test point's
> description and expected errput.
>
> The 'f' flag is why this test point's failure doesn't get reported.
The test is supposed to fail.
>
>> +  getrandom -U 64
>> +0:Checking upper Bound is observed
>> +*><0-64>
>> +
>> +  tmpmsg="failed"
>> +  getrandom -L 4 -U 5 -c 16 -a tmpa
>> +  for i in ${tmpa[@]}
>> +  do
>> +    if (( i == 5 )); then
>> +      tmpmsg="success"
>> +      break
>> +    fi
>> +  done
> Use ${tmpa[(I)5]} ?
>
>> +  echo $tmpmsg
>> +0:Checking that upper bound is inclusive
>> +F:Try running the test again: make TESTNUM=V15 check
> Baby and bathwater.  Say what the problem is first and what the
> recommended fix is second.  In this case, something along the lines
> of "This test WILL false positive once every 65536 runs on average.  To
> rule this out, run the test again."
>
> Oh, and bump that 16 to something 3 or 4 times as big, because a 1/65536
> chance isn't really enough in a world where automated builds (CI,
> distros' QA, etc.) is a thing.
>
>> +>success
>> +
>> +  echo $(( zrandom() ))
>> +0:Checking if zrandom() produces a floating-point number between 0 and 1
>> +*>0(|.)[[:digit:]]#
> Why is the period optional?  Something like "042" shouldn't be accepted.
>
>> diff --git a/configure.ac b/configure.ac
>> index 074141d38..f1fa01274 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -675,6 +675,7 @@ fi
>>   AC_CHECK_HEADERS(sys/time.h sys/times.h sys/select.h termcap.h termio.h \
>>   		 termios.h sys/param.h sys/filio.h string.h memory.h \
>>   		 limits.h fcntl.h libc.h sys/utsname.h sys/resource.h \
>> +                 sys/random.h \
> Tabs/spaces are wrong.  Also in the next hunk.
>
>>   		 locale.h errno.h stdio.h stdarg.h varargs.h stdlib.h \
>>   		 unistd.h sys/capability.h \
>>   		 utmp.h utmpx.h sys/types.h pwd.h grp.h poll.h sys/mman.h \
>> @@ -1337,6 +1338,7 @@ AC_CHECK_FUNCS(strftime strptime mktime timelocal \
>>   	       cygwin_conv_path \
>>   	       nanosleep \
>>   	       srand_deterministic \
>> +               getrandom arc4random \
>>   	       setutxent getutxent endutxent getutent)
>>   AC_FUNC_STRCOLL
>>   
> Bottom line:
>
> - Interfacing to kernel random number APIs seems reasonable enough.
>
> - The API to shell scripts feels unintuitive.
>
> - As the patch stands, it is not ready to be merged (even assuming
>    arguendo the API is perfect as-is).
>
> Thanks,
>
> Daniel
>



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-24  2:58             ` Clinton Bunch
@ 2022-11-24 10:07               ` nimaje+zml
  2022-11-24 13:19                 ` Clinton Bunch
  0 siblings, 1 reply; 46+ messages in thread
From: nimaje+zml @ 2022-11-24 10:07 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers



On 11/24/22 03:58, Clinton Bunch wrote:
> [snip]
>>> +/**/
>>> +int
>>> +setup_(UNUSED(Module m))
>>> +{
>>> +#ifdef USE_URANDOM
>>> +    /* Check for the existence of /dev/urandom */
>>> +
>>> +    struct stat st;
>>> +
>>> +    if (lstat("/dev/urandom",&st) < 0) {
>> Why not stat()?
> Is it appropriate for /dev/urandom to be a symlink?

Yes, for example that is the case on freebsd where urandom is just a 
symlink to random for compatibility with programs that expect urandom to 
be there (random blocks until the randomness pool is seeded and then 
doesn't block anymore).

>>
>>> +    zwarn("No kernel random pool found.");
>>> +    return 1;
>>> +    }
>>> +
>>> +    if (!(S_ISCHR(st.st_mode)) ) {
>>> +    zwarn("No kernel random pool found.");
>>> +    return 1;
>>> +    }

So this check would be wrong on freebsd (or use random directly and give 
an option to use urandom on systems where random starts to block for 
some reason after being seeded, but no idea if there is a good reason 
for random being a symlink).

>[snip]


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-24 10:07               ` nimaje+zml
@ 2022-11-24 13:19                 ` Clinton Bunch
  0 siblings, 0 replies; 46+ messages in thread
From: Clinton Bunch @ 2022-11-24 13:19 UTC (permalink / raw)
  To: zsh-workers

On 11/24/2022 4:07 AM, nimaje+zml@bureaucracy.de wrote:
>
>
> On 11/24/22 03:58, Clinton Bunch wrote:
>> [snip]
>>>> +/**/
>>>> +int
>>>> +setup_(UNUSED(Module m))
>>>> +{
>>>> +#ifdef USE_URANDOM
>>>> +    /* Check for the existence of /dev/urandom */
>>>> +
>>>> +    struct stat st;
>>>> +
>>>> +    if (lstat("/dev/urandom",&st) < 0) {
>>> Why not stat()?
>> Is it appropriate for /dev/urandom to be a symlink?
>
> Yes, for example that is the case on freebsd where urandom is just a 
> symlink to random for compatibility with programs that expect urandom 
> to be there (random blocks until the randomness pool is seeded and 
> then doesn't block anymore).
On FreeBSD it doesn't use urandom, so it didn't show up in my testing.  
It only uses urandom if arc4random() or getrandom() doesn't exist
>
>>>
>>>> +    zwarn("No kernel random pool found.");
>>>> +    return 1;
>>>> +    }
>>>> +
>>>> +    if (!(S_ISCHR(st.st_mode)) ) {
>>>> +    zwarn("No kernel random pool found.");
>>>> +    return 1;
>>>> +    }
>
> So this check would be wrong on freebsd (or use random directly and 
> give an option to use urandom on systems where random starts to block 
> for some reason after being seeded, but no idea if there is a good 
> reason for random being a symlink).
On most systems random is defined to block if the randomness pool falls 
below a certain threshold until more random data is available
>
>> [snip]
>



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-23 21:42                             ` dana
  2022-11-23 23:54                               ` Daniel Shahaf
@ 2022-11-24 13:52                               ` Clinton Bunch
  1 sibling, 0 replies; 46+ messages in thread
From: Clinton Bunch @ 2022-11-24 13:52 UTC (permalink / raw)
  To: zsh-workers

On 11/23/2022 3:42 PM, dana wrote:
[snip]
> On Wed 23 Nov 2022, at 13:46, Daniel Shahaf wrote:
>>> +%test
>>> +  getrandom -U 56
>>> +1f:Checking if system has a kernel random source
>>> +?(eval):1: No kernel random pool found.
>>> +
>> The test point's code has nothing to do with the test point's
>> description and expected errput.
> The assertion is backwards but i guess this is just testing to see if the
> module works on the system. But should that even be a test? Shouldn't there
> instead be a %prep that just leaves the whole file unimplemented in this case?
> Unless we're expecting the module to be both available and usable on every zsh
> build
The test is because the module is designed to be run on operating 
systems where the kernel module for urandom is an add-on package. 
(Specifically, my problem child for the last decade, HP-UX), so may be 
available on the build system, but the zsh package installed on one 
where it isn't available.  On further thought, since the reverse is also 
true, though, the test may be misguided.


[snip]

> dana
>



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-23 19:46           ` Daniel Shahaf
  2022-11-24  2:58             ` Clinton Bunch
@ 2022-11-24 14:33             ` Clinton Bunch
  1 sibling, 0 replies; 46+ messages in thread
From: Clinton Bunch @ 2022-11-24 14:33 UTC (permalink / raw)
  To: zsh-workers

On 11/23/2022 1:46 PM, Daniel Shahaf wrote:
> Clinton Bunch wrote on Mon, Nov 07, 2022 at 18:18:15 -0600:

[snip]
>
>> +++ b/Src/Modules/random.c
>> @@ -0,0 +1,675 @@
> There's a whole bunch of orthographic issues in this file (whitespace,
> punctuation, spelling, etc.).  I won't point them out now since there
> are bigger issues.

The default color scheme in vim syntax highlighting makes comments very 
hard to read, so I ended up not proofreading them as much as I should have.

The whitespace issues are due to trying to write in, for me, an 
unnatural coding style.  I'm working on it.  I've seen inconsistent 
coding styles elsewhere in the code, perhaps a code formatter should be 
recommended with appropriate options.  The editconfig file is helpful, 
but doesn't work well when lines are added or  a new indentation level 
is required for existing code. (When writing Perl code, I'm a big fan of 
perltidy)

[snip]

>
>> +    bool integer_out = false;	 /*  boolean */
> The target audience for comments knows C.  So, nuke this comment.
The comment is left over from when integer_out was an int, before I was 
told C99 was acceptable and changed it to bool.[snip]
>> +    /* Vailues for -U *nd -L */
>> +    zulong   upper = UINT32_MAX, lower = 0;
>> +    uint32_t diff  = UINT32_MAX;
>> +    uint32_t min   = 0, rej = 0;  /* Reject below min and count */
> «rej» is never read.
It should probably be wrapped in an #ifdef DEBUG as should its intended 
use. (I used it track how many numbers were being discarded to achieve 
uniform distribution during debugging. I removed those statements and 
forgot to remove the declaration.  I'm surprised I didn't get a warning)
>


[snip]

>> +/**/
>> +uint64_t
>> +random64(void) {
>> +    uint64_t r;
>> +
>> +    if(getrandom_buffer(&r,sizeof(r)) < 0) {
>> +	zwarn("zsh/random: Can't get sufficient random data.");
>> +	return 1; /* 0 will cause loop */
> Where is the docstring of random64() itself?  /That/ is what determines
> whether or not returning 0 would be correct.
>
> What loops and why?

The loop in the random_real that checks for repeated zeros and 
terminates when the exponent reaches the minimum value.

[snip]


> Thanks,
>
> Daniel
>



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-23 19:52                   ` Daniel Shahaf
@ 2022-11-24 16:19                     ` Stephane Chazelas
  2022-11-24 16:30                       ` Roman Perepelitsa
  0 siblings, 1 reply; 46+ messages in thread
From: Stephane Chazelas @ 2022-11-24 16:19 UTC (permalink / raw)
  To: Daniel Shahaf; +Cc: Clinton Bunch, zsh-workers

2022-11-23 19:52:22 +0000, Daniel Shahaf:
[...]
> > > Alternatively, you could add a -f %02x, -f %u -f %c and do
> > > without the -r/-i, with default -c 8 -f %02x -L 0 -U 255.
> > That seems a lot more confusing than -r (raw) or -i (integer) unless it's
> > expanded to a lot more arbitrary formats.  At least for someone not familiar
> > with printf formats.
> 
> Could have something like this:
> 
>     "-f:specify format:(raw hex decimal)"
> 
> It's extensible and readable.  (Could easily extend it to support printf
> formats, or uppercase/lowercase hex variations, etc..)  Also, it uses
> one fewer short option :)

You'd then need a separate option for length/padding/truncating
of those numbers and maybe even an endianness one.

For instance, without -i, at the moment, you get 0-padding of
the numbers to length 2. I'd expect with -i -f hex, you'd want
either 0xabcde or 000ABCDE. With -f raw -L -12 -U 1234, you'd
want to specify if it's short/int/long you want dumped, etc.

To me, having $SRANDOM added to core zsh, and maybe a
randint(first, last) added to zsh/math would be more than
enough.

To get a stream of random bytes, /dev/urandom is best.
and we have read/sysread to get that.

We have printf / print -f for formatting numbers already.

As already said, having some equivalent of perl's pack/unpack to
interpret/generate binary data would be more useful than adding
that functionality to every new builtin that deals with some
form of binary data.

-- 
Stephane


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-24 16:19                     ` Stephane Chazelas
@ 2022-11-24 16:30                       ` Roman Perepelitsa
  2022-11-24 22:39                         ` Clinton Bunch
  0 siblings, 1 reply; 46+ messages in thread
From: Roman Perepelitsa @ 2022-11-24 16:30 UTC (permalink / raw)
  To: Daniel Shahaf, Clinton Bunch, zsh-workers

On Thu, Nov 24, 2022 at 5:20 PM Stephane Chazelas <stephane@chazelas.org> wrote:
>
> To me, having $SRANDOM added to core zsh, and maybe a
> randint(first, last) added to zsh/math would be more than
> enough.

This is my opinion as well. These two facilities are easy to
understand, cover all use cases (even if requiring a bit of extra code
for the rare ones) and are easy to agree upon as far as the API is
concerned.

Roman.


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-24 16:30                       ` Roman Perepelitsa
@ 2022-11-24 22:39                         ` Clinton Bunch
  2022-11-25  8:53                           ` Stephane Chazelas
  0 siblings, 1 reply; 46+ messages in thread
From: Clinton Bunch @ 2022-11-24 22:39 UTC (permalink / raw)
  To: zsh-workers

On 11/24/2022 10:30 AM, Roman Perepelitsa wrote:
> On Thu, Nov 24, 2022 at 5:20 PM Stephane Chazelas <stephane@chazelas.org> wrote:
>> To me, having $SRANDOM added to core zsh, and maybe a
>> randint(first, last) added to zsh/math would be more than
>> enough.
> This is my opinion as well. These two facilities are easy to
> understand, cover all use cases (even if requiring a bit of extra code
> for the rare ones) and are easy to agree upon as far as the API is
> concerned.
>
> Roman.
If we move SRANDOM into zsh core, we'll need to define the behavior if 
urandom is not found.  The module fails to load if it doesn't find 
arc4random(), getrandom(), or /dev/urandom, but we can't do that if it's 
in core.




^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-24 22:39                         ` Clinton Bunch
@ 2022-11-25  8:53                           ` Stephane Chazelas
  2022-11-25  9:40                             ` Stephane Chazelas
  0 siblings, 1 reply; 46+ messages in thread
From: Stephane Chazelas @ 2022-11-25  8:53 UTC (permalink / raw)
  To: Clinton Bunch; +Cc: zsh-workers

2022-11-24 16:39:07 -0600, Clinton Bunch:
[...]
> If we move SRANDOM into zsh core, we'll need to define the behavior if
> urandom is not found.
[...]

Since that would be for bash compatibility, we can do the same
as they do there.

From what I can tell, if getrandom(), called with GRND_NONBLOCK
is not supported or fails (including with EAGAIN), it falls back
to arc4random if supported or with the same pseudo-generator as
$RANDOM if not (though seeded in each subshell with the time of
the day with µs precision and also looping until the value is
different from the previous one (!? is that a guarantee also
given by getrandom()?)).

On GNU/Linux, since bash's "configure" doesn't look for
arc4random() in libbsd, you get the same pseudo-generator as
$RANDOM as fallback (zsh's configure has the same problem
AFAICT; note that libbsd is used by xorg server, iproute2 at
least, so is likely to be present and pre-loaded on most
GNU/Linux systems).

-- 
Stephane


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH] zsh/random module [UPDATED]
  2022-11-25  8:53                           ` Stephane Chazelas
@ 2022-11-25  9:40                             ` Stephane Chazelas
  2022-11-28 16:37                               ` further discussion of zsh/random (was [PATCH] zsh/random module [UPDATED]) Clinton Bunch
  0 siblings, 1 reply; 46+ messages in thread
From: Stephane Chazelas @ 2022-11-25  9:40 UTC (permalink / raw)
  To: Clinton Bunch, zsh-workers

2022-11-25 08:53:11 +0000, Stephane Chazelas:
[...]
> and also looping until the value is
> different from the previous one (!? is that a guarantee also
> given by getrandom()?)).
[...]

From testing, it doesn't.

bash -c 'until (( (new = SRANDOM) == last )); do last=$new; done'

(where getrandom() is being used) eventually terminates.

Maybe the loop was added as a safeguard against programming
errors or corner cases.

-- 
Stephane


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: further discussion of zsh/random (was [PATCH] zsh/random module [UPDATED])
  2022-11-25  9:40                             ` Stephane Chazelas
@ 2022-11-28 16:37                               ` Clinton Bunch
  0 siblings, 0 replies; 46+ messages in thread
From: Clinton Bunch @ 2022-11-28 16:37 UTC (permalink / raw)
  To: zsh-workers

[-- Attachment #1: Type: text/plain, Size: 1324 bytes --]

Having read all the criticism of the API, I still don't agree with the 
minimalist view nor do I favor implementing a mathfunc to mirror a 
semi-obscure API, and prefer to keep SRANDOM in a module that fails to 
load if no kernel random source is available  so I'm proposing a new API.

builtin:

zrandint -c /count/=*1 *-U /upper/=*MAX_UINT32 *-L /lower/=*0 *-s 
/scalar /-a /array /-f raw|hex|*dec*

**default output is equivalent to _echo $SRANDOM_. I can special case 
the nonsensical arguments like /count/=0 and /upper/=/lower /if that's 
the wish of the group/. upper/ is inclusive

zrandbyte -c /count/=*8 *-s /scalar /-a /array /-f raw|hex|*hex:*|dec:

     default output format will be a colon-separated string of bytes 
encoded in hex, in an array hex and hex: are equivalent.  I could, 
instead, add another option to specify a separator and default to :.

Parameter:

SRANDOM

     A random, uniformly-distributed, unsigned, 32-bit integer.

mathfunc:

zrandomf()

     produces a random float-point number from 0 to 1  inclusive

zrandomi(/upper/=*MAX_UINT32*,/lower/=*0*,/inclusive/=*0*)

     takes 0 to 3 arguments.  exclusive by default to accommodate 
${string[zrandomi($#string)+1]}

I'm not particularly tied to any of the names except SRANDOM



[-- Attachment #2: Type: text/html, Size: 2425 bytes --]

^ permalink raw reply	[flat|nested] 46+ messages in thread

end of thread, other threads:[~2022-11-28 16:38 UTC | newest]

Thread overview: 46+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-02 17:13 [PATCH] zsh/random module Clinton Bunch
2022-11-03 17:50 ` Bart Schaefer
2022-11-04  3:17 ` dana
2022-11-04  6:22   ` Clinton Bunch
2022-11-04  7:27     ` dana
2022-11-04 12:57       ` Clinton Bunch
2022-11-08  0:18         ` [PATCH] zsh/random module [UPDATED] Clinton Bunch
2022-11-18 14:30           ` Clinton Bunch
2022-11-19  6:42             ` Lawrence Velázquez
2022-11-18 16:23           ` Stephane Chazelas
2022-11-18 17:08             ` Clinton Bunch
2022-11-18 18:12               ` Stephane Chazelas
2022-11-18 18:38                 ` Clinton Bunch
2022-11-23 19:52                   ` Daniel Shahaf
2022-11-24 16:19                     ` Stephane Chazelas
2022-11-24 16:30                       ` Roman Perepelitsa
2022-11-24 22:39                         ` Clinton Bunch
2022-11-25  8:53                           ` Stephane Chazelas
2022-11-25  9:40                             ` Stephane Chazelas
2022-11-28 16:37                               ` further discussion of zsh/random (was [PATCH] zsh/random module [UPDATED]) Clinton Bunch
2022-11-21  1:07           ` [PATCH] zsh/random module [UPDATED] Matthew Martin
2022-11-21  1:59             ` Clinton Bunch
2022-11-21  2:21               ` Matthew Martin
2022-11-21  2:57                 ` Clinton Bunch
2022-11-21  3:14                   ` Lawrence Velázquez
2022-11-21  4:17                     ` Bart Schaefer
2022-11-21  5:05                       ` Clinton Bunch
2022-11-22 13:42                         ` dana
2022-11-23 19:49                         ` Daniel Shahaf
2022-11-22 17:44                       ` Oliver Kiddle
2022-11-22 19:48                         ` Clinton Bunch
2022-11-23  1:23                   ` Matthew Martin
2022-11-23  2:58                     ` Clinton Bunch
2022-11-23  4:14                       ` Matthew Martin
2022-11-23 13:41                         ` Clinton Bunch
2022-11-23 20:33                           ` Daniel Shahaf
2022-11-23 21:42                             ` dana
2022-11-23 23:54                               ` Daniel Shahaf
2022-11-24  0:17                                 ` Daniel Shahaf
2022-11-24  1:05                                 ` dana
2022-11-24 13:52                               ` Clinton Bunch
2022-11-23 19:46           ` Daniel Shahaf
2022-11-24  2:58             ` Clinton Bunch
2022-11-24 10:07               ` nimaje+zml
2022-11-24 13:19                 ` Clinton Bunch
2022-11-24 14:33             ` Clinton Bunch

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).