mailing list of musl libc
 help / color / mirror / code / Atom feed
* [musl] [PATCH] stdlib: implement qsort_r
@ 2021-08-08 11:26 Olivier Galibert
  2021-08-08 12:42 ` Jon Chesterfield
  2021-08-08 20:28 ` Érico Nogueira
  0 siblings, 2 replies; 11+ messages in thread
From: Olivier Galibert @ 2021-08-08 11:26 UTC (permalink / raw)
  To: musl; +Cc: Olivier Galibert

The extension qsort_r allows calling qsort on a list of indices
without having a global variable to hold the data to sort.

---
 COPYRIGHT                  |   4 +-
 include/stdlib.h           |   4 +
 src/stdlib/qsort.c         | 200 +---------------------------------
 src/stdlib/qsort_common.ic | 218 +++++++++++++++++++++++++++++++++++++
 src/stdlib/qsort_r.c       |  28 +++++
 5 files changed, 257 insertions(+), 197 deletions(-)
 create mode 100644 src/stdlib/qsort_common.ic
 create mode 100644 src/stdlib/qsort_r.c

diff --git a/COPYRIGHT b/COPYRIGHT
index c1628e9a..f5199541 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -142,8 +142,8 @@ originally written by Solar Designer and placed into the public
 domain. The code also comes with a fallback permissive license for use
 in jurisdictions that may not recognize the public domain.
 
-The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
-Valentin Ochs and is licensed under an MIT-style license.
+The smoothsort implementation (src/stdlib/qsort_common.ic) is
+Copyright © 2011 Valentin Ochs and is licensed under an MIT-style license.
 
 The x86_64 port was written by Nicholas J. Kain and is licensed under
 the standard MIT terms.
diff --git a/include/stdlib.h b/include/stdlib.h
index b54a051f..cacb61bf 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -55,6 +55,10 @@ int system (const char *);
 void *bsearch (const void *, const void *, size_t, size_t, int (*)(const void *, const void *));
 void qsort (void *, size_t, size_t, int (*)(const void *, const void *));
 
+#if defined(_GNU_SOURCE)
+  void qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, void *), void *);
+#endif
+
 int abs (int);
 long labs (long);
 long long llabs (long long);
diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index da58fd31..b0ade53c 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -19,200 +19,10 @@
  * IN THE SOFTWARE.
  */
 
-/* Minor changes by Rich Felker for integration in musl, 2011-04-27. */
+/* Instanciate qsort */
 
-/* Smoothsort, an adaptive variant of Heapsort.  Memory usage: O(1).
-   Run time: Worst case O(n log n), close to O(n) in the mostly-sorted case. */
+#define FN(function) function
+#define ARG_PROTO
+#define ARG
 
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "atomic.h"
-#define ntz(x) a_ctz_l((x))
-
-typedef int (*cmpfun)(const void *, const void *);
-
-static inline int pntz(size_t p[2]) {
-	int r = ntz(p[0] - 1);
-	if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) {
-		return r;
-	}
-	return 0;
-}
-
-static void cycle(size_t width, unsigned char* ar[], int n)
-{
-	unsigned char tmp[256];
-	size_t l;
-	int i;
-
-	if(n < 2) {
-		return;
-	}
-
-	ar[n] = tmp;
-	while(width) {
-		l = sizeof(tmp) < width ? sizeof(tmp) : width;
-		memcpy(ar[n], ar[0], l);
-		for(i = 0; i < n; i++) {
-			memcpy(ar[i], ar[i + 1], l);
-			ar[i] += l;
-		}
-		width -= l;
-	}
-}
-
-/* shl() and shr() need n > 0 */
-static inline void shl(size_t p[2], int n)
-{
-	if(n >= 8 * sizeof(size_t)) {
-		n -= 8 * sizeof(size_t);
-		p[1] = p[0];
-		p[0] = 0;
-	}
-	p[1] <<= n;
-	p[1] |= p[0] >> (sizeof(size_t) * 8 - n);
-	p[0] <<= n;
-}
-
-static inline void shr(size_t p[2], int n)
-{
-	if(n >= 8 * sizeof(size_t)) {
-		n -= 8 * sizeof(size_t);
-		p[0] = p[1];
-		p[1] = 0;
-	}
-	p[0] >>= n;
-	p[0] |= p[1] << (sizeof(size_t) * 8 - n);
-	p[1] >>= n;
-}
-
-static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size_t lp[])
-{
-	unsigned char *rt, *lf;
-	unsigned char *ar[14 * sizeof(size_t) + 1];
-	int i = 1;
-
-	ar[0] = head;
-	while(pshift > 1) {
-		rt = head - width;
-		lf = head - width - lp[pshift - 2];
-
-		if((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) {
-			break;
-		}
-		if((*cmp)(lf, rt) >= 0) {
-			ar[i++] = lf;
-			head = lf;
-			pshift -= 1;
-		} else {
-			ar[i++] = rt;
-			head = rt;
-			pshift -= 2;
-		}
-	}
-	cycle(width, ar, i);
-}
-
-static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], int pshift, int trusty, size_t lp[])
-{
-	unsigned char *stepson,
-	              *rt, *lf;
-	size_t p[2];
-	unsigned char *ar[14 * sizeof(size_t) + 1];
-	int i = 1;
-	int trail;
-
-	p[0] = pp[0];
-	p[1] = pp[1];
-
-	ar[0] = head;
-	while(p[0] != 1 || p[1] != 0) {
-		stepson = head - lp[pshift];
-		if((*cmp)(stepson, ar[0]) <= 0) {
-			break;
-		}
-		if(!trusty && pshift > 1) {
-			rt = head - width;
-			lf = head - width - lp[pshift - 2];
-			if((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0) {
-				break;
-			}
-		}
-
-		ar[i++] = stepson;
-		head = stepson;
-		trail = pntz(p);
-		shr(p, trail);
-		pshift += trail;
-		trusty = 0;
-	}
-	if(!trusty) {
-		cycle(width, ar, i);
-		sift(head, width, cmp, pshift, lp);
-	}
-}
-
-void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
-{
-	size_t lp[12*sizeof(size_t)];
-	size_t i, size = width * nel;
-	unsigned char *head, *high;
-	size_t p[2] = {1, 0};
-	int pshift = 1;
-	int trail;
-
-	if (!size) return;
-
-	head = base;
-	high = head + size - width;
-
-	/* Precompute Leonardo numbers, scaled by element width */
-	for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++);
-
-	while(head < high) {
-		if((p[0] & 3) == 3) {
-			sift(head, width, cmp, pshift, lp);
-			shr(p, 2);
-			pshift += 2;
-		} else {
-			if(lp[pshift - 1] >= high - head) {
-				trinkle(head, width, cmp, p, pshift, 0, lp);
-			} else {
-				sift(head, width, cmp, pshift, lp);
-			}
-			
-			if(pshift == 1) {
-				shl(p, 1);
-				pshift = 0;
-			} else {
-				shl(p, pshift - 1);
-				pshift = 1;
-			}
-		}
-		
-		p[0] |= 1;
-		head += width;
-	}
-
-	trinkle(head, width, cmp, p, pshift, 0, lp);
-
-	while(pshift != 1 || p[0] != 1 || p[1] != 0) {
-		if(pshift <= 1) {
-			trail = pntz(p);
-			shr(p, trail);
-			pshift += trail;
-		} else {
-			shl(p, 2);
-			pshift -= 2;
-			p[0] ^= 7;
-			shr(p, 1);
-			trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp);
-			shl(p, 1);
-			p[0] |= 1;
-			trinkle(head - width, width, cmp, p, pshift, 1, lp);
-		}
-		head -= width;
-	}
-}
+#include "qsort_common.ic"
diff --git a/src/stdlib/qsort_common.ic b/src/stdlib/qsort_common.ic
new file mode 100644
index 00000000..099b1bf4
--- /dev/null
+++ b/src/stdlib/qsort_common.ic
@@ -0,0 +1,218 @@
+/* Copyright (C) 2011 by Valentin Ochs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* Minor changes by Rich Felker for integration in musl, 2011-04-27. */
+
+/* Smoothsort, an adaptive variant of Heapsort.  Memory usage: O(1).
+   Run time: Worst case O(n log n), close to O(n) in the mostly-sorted case. */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "atomic.h"
+#define ntz(x) a_ctz_l((x))
+
+typedef int (*cmpfun)(const void *, const void * ARG_PROTO);
+
+static inline int pntz(size_t p[2]) {
+	int r = ntz(p[0] - 1);
+	if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) {
+		return r;
+	}
+	return 0;
+}
+
+static void FN(cycle)(size_t width, unsigned char* ar[], int n)
+{
+	unsigned char tmp[256];
+	size_t l;
+	int i;
+
+	if(n < 2) {
+		return;
+	}
+
+	ar[n] = tmp;
+	while(width) {
+		l = sizeof(tmp) < width ? sizeof(tmp) : width;
+		memcpy(ar[n], ar[0], l);
+		for(i = 0; i < n; i++) {
+			memcpy(ar[i], ar[i + 1], l);
+			ar[i] += l;
+		}
+		width -= l;
+	}
+}
+
+/* shl() and shr() need n > 0 */
+static inline void FN(shl)(size_t p[2], int n)
+{
+	if(n >= 8 * sizeof(size_t)) {
+		n -= 8 * sizeof(size_t);
+		p[1] = p[0];
+		p[0] = 0;
+	}
+	p[1] <<= n;
+	p[1] |= p[0] >> (sizeof(size_t) * 8 - n);
+	p[0] <<= n;
+}
+
+static inline void shr(size_t p[2], int n)
+{
+	if(n >= 8 * sizeof(size_t)) {
+		n -= 8 * sizeof(size_t);
+		p[0] = p[1];
+		p[1] = 0;
+	}
+	p[0] >>= n;
+	p[0] |= p[1] << (sizeof(size_t) * 8 - n);
+	p[1] >>= n;
+}
+
+static void FN(sift)(unsigned char *head, size_t width, cmpfun cmp, int pshift, size_t lp[] ARG_PROTO)
+{
+	unsigned char *rt, *lf;
+	unsigned char *ar[14 * sizeof(size_t) + 1];
+	int i = 1;
+
+	ar[0] = head;
+	while(pshift > 1) {
+		rt = head - width;
+		lf = head - width - lp[pshift - 2];
+
+		if((*cmp)(ar[0], lf ARG) >= 0 && (*cmp)(ar[0], rt ARG) >= 0) {
+			break;
+		}
+		if((*cmp)(lf, rt ARG) >= 0) {
+			ar[i++] = lf;
+			head = lf;
+			pshift -= 1;
+		} else {
+			ar[i++] = rt;
+			head = rt;
+			pshift -= 2;
+		}
+	}
+	FN(cycle)(width, ar, i);
+}
+
+static void FN(trinkle)(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], int pshift, int trusty, size_t lp[] ARG_PROTO)
+{
+	unsigned char *stepson,
+	              *rt, *lf;
+	size_t p[2];
+	unsigned char *ar[14 * sizeof(size_t) + 1];
+	int i = 1;
+	int trail;
+
+	p[0] = pp[0];
+	p[1] = pp[1];
+
+	ar[0] = head;
+	while(p[0] != 1 || p[1] != 0) {
+		stepson = head - lp[pshift];
+		if((*cmp)(stepson, ar[0] ARG) <= 0) {
+			break;
+		}
+		if(!trusty && pshift > 1) {
+			rt = head - width;
+			lf = head - width - lp[pshift - 2];
+			if((*cmp)(rt, stepson ARG) >= 0 || (*cmp)(lf, stepson ARG) >= 0) {
+				break;
+			}
+		}
+
+		ar[i++] = stepson;
+		head = stepson;
+		trail = pntz(p);
+		shr(p, trail);
+		pshift += trail;
+		trusty = 0;
+	}
+	if(!trusty) {
+		FN(cycle)(width, ar, i);
+		FN(sift)(head, width, cmp, pshift, lp ARG);
+	}
+}
+
+void FN(qsort)(void *base, size_t nel, size_t width, cmpfun cmp ARG_PROTO)
+{
+	size_t lp[12*sizeof(size_t)];
+	size_t i, size = width * nel;
+	unsigned char *head, *high;
+	size_t p[2] = {1, 0};
+	int pshift = 1;
+	int trail;
+
+	if (!size) return;
+
+	head = base;
+	high = head + size - width;
+
+	/* Precompute Leonardo numbers, scaled by element width */
+	for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++);
+
+	while(head < high) {
+		if((p[0] & 3) == 3) {
+			FN(sift)(head, width, cmp, pshift, lp ARG);
+			shr(p, 2);
+			pshift += 2;
+		} else {
+			if(lp[pshift - 1] >= high - head) {
+				FN(trinkle)(head, width, cmp, p, pshift, 0, lp ARG);
+			} else {
+				FN(sift)(head, width, cmp, pshift, lp ARG);
+			}
+			
+			if(pshift == 1) {
+				FN(shl)(p, 1);
+				pshift = 0;
+			} else {
+				FN(shl)(p, pshift - 1);
+				pshift = 1;
+			}
+		}
+		
+		p[0] |= 1;
+		head += width;
+	}
+
+	FN(trinkle)(head, width, cmp, p, pshift, 0, lp ARG);
+
+	while(pshift != 1 || p[0] != 1 || p[1] != 0) {
+		if(pshift <= 1) {
+			trail = pntz(p);
+			shr(p, trail);
+			pshift += trail;
+		} else {
+			FN(shl)(p, 2);
+			pshift -= 2;
+			p[0] ^= 7;
+			shr(p, 1);
+			FN(trinkle)(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp ARG);
+			FN(shl)(p, 1);
+			p[0] |= 1;
+			FN(trinkle)(head - width, width, cmp, p, pshift, 1, lp ARG);
+		}
+		head -= width;
+	}
+}
diff --git a/src/stdlib/qsort_r.c b/src/stdlib/qsort_r.c
new file mode 100644
index 00000000..d57e4a1e
--- /dev/null
+++ b/src/stdlib/qsort_r.c
@@ -0,0 +1,28 @@
+/* Copyright (C) 2011 by Valentin Ochs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* Instanciate qsort_r */
+
+#define FN(function) function##_r
+#define ARG_PROTO , void *arg
+#define ARG , arg
+
+#include "qsort_common.ic"
-- 
2.32.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [musl] [PATCH] stdlib: implement qsort_r
  2021-08-08 11:26 [musl] [PATCH] stdlib: implement qsort_r Olivier Galibert
@ 2021-08-08 12:42 ` Jon Chesterfield
  2021-08-08 19:39   ` Olivier Galibert
                     ` (2 more replies)
  2021-08-08 20:28 ` Érico Nogueira
  1 sibling, 3 replies; 11+ messages in thread
From: Jon Chesterfield @ 2021-08-08 12:42 UTC (permalink / raw)
  To: musl; +Cc: Olivier Galibert

[-- Attachment #1: Type: text/plain, Size: 1548 bytes --]

On Sun, Aug 8, 2021 at 12:29 PM Olivier Galibert <galibert@pobox.com> wrote:

> The extension qsort_r allows calling qsort on a list of indices
> without having a global variable to hold the data to sort.
>

qsort_r is a strong improvement on qsort. I think it's available outside of
glibc.

I remember doing something similar locally. Just looked it up and I
renamed & mutated qsort to pass the context along. Therefore typed into
email, I think something like this would provide an implementation of
qsort in terms of qsort_r.

// declare qsort_r
typedef int (*cmp_r_t)(const void *, const void *, void * context);


void qsort_r(void *base, size_t nel, size_t width, cmp_r_t cmp, void*
context);


// pass it a function that extracts the comparator for qsort from the
// context
typedef int (*cmp_t)(const void *, const void *);


static int compare_adapter(const void *l, const void *r, void * context)
{
  cmpt_t c = (cmpt_t) context;


  return c(l,r);


}

// tail call
void qsort(void *base, size_t nel, size_t width, cmp_t c)
{
  return qsort_r(base, nel, width, compare_adapter, (cmp_t_t)c);


}

Given optimism about inlining or an always inline annotation
it should turn into the same machine code as the macro
instantiation approach. Alternatively it's a tail call into qsort_r, so
a couple of indirections in exchange for minimal code size growth.

I haven't compiled or tested that (or looked up the coding conventions
for musl) so this is a drive by suggestion, written in pseudocode
instead of prose for clarity.

Thanks all!

Jon

[-- Attachment #2: Type: text/html, Size: 3588 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [musl] [PATCH] stdlib: implement qsort_r
  2021-08-08 12:42 ` Jon Chesterfield
@ 2021-08-08 19:39   ` Olivier Galibert
  2021-08-08 20:35   ` Érico Nogueira
  2021-08-09 17:27   ` Florian Weimer
  2 siblings, 0 replies; 11+ messages in thread
From: Olivier Galibert @ 2021-08-08 19:39 UTC (permalink / raw)
  To: Jon Chesterfield; +Cc: musl

[-- Attachment #1: Type: text/plain, Size: 2037 bytes --]

I don’t believe in deep inlining in that case, there’s too much recursion.
As for adding overhead to qsort by sharing qsort_r’s code it is indeed
trivial to do but I don’t know whether the tradeoff should be done.

  OG.

Le dim. 8 août 2021 à 14:42, Jon Chesterfield <
jonathanchesterfield@gmail.com> a écrit :

> On Sun, Aug 8, 2021 at 12:29 PM Olivier Galibert <galibert@pobox.com>
> wrote:
>
>> The extension qsort_r allows calling qsort on a list of indices
>> without having a global variable to hold the data to sort.
>>
>
> qsort_r is a strong improvement on qsort. I think it's available outside of
> glibc.
>
> I remember doing something similar locally. Just looked it up and I
> renamed & mutated qsort to pass the context along. Therefore typed into
> email, I think something like this would provide an implementation of
> qsort in terms of qsort_r.
>
> // declare qsort_r
> typedef int (*cmp_r_t)(const void *, const void *, void * context);
>
>
> void qsort_r(void *base, size_t nel, size_t width, cmp_r_t cmp, void*
> context);
>
>
> // pass it a function that extracts the comparator for qsort from the
> // context
> typedef int (*cmp_t)(const void *, const void *);
>
>
> static int compare_adapter(const void *l, const void *r, void * context)
> {
>   cmpt_t c = (cmpt_t) context;
>
>
>   return c(l,r);
>
>
> }
>
> // tail call
> void qsort(void *base, size_t nel, size_t width, cmp_t c)
> {
>   return qsort_r(base, nel, width, compare_adapter, (cmp_t_t)c);
>
>
> }
>
> Given optimism about inlining or an always inline annotation
> it should turn into the same machine code as the macro
> instantiation approach. Alternatively it's a tail call into qsort_r, so
> a couple of indirections in exchange for minimal code size growth.
>
> I haven't compiled or tested that (or looked up the coding conventions
> for musl) so this is a drive by suggestion, written in pseudocode
> instead of prose for clarity.
>
> Thanks all!
>
> Jon
>
>

[-- Attachment #2: Type: text/html, Size: 4400 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [musl] [PATCH] stdlib: implement qsort_r
  2021-08-08 11:26 [musl] [PATCH] stdlib: implement qsort_r Olivier Galibert
  2021-08-08 12:42 ` Jon Chesterfield
@ 2021-08-08 20:28 ` Érico Nogueira
  1 sibling, 0 replies; 11+ messages in thread
From: Érico Nogueira @ 2021-08-08 20:28 UTC (permalink / raw)
  To: musl; +Cc: Olivier Galibert

A similar implementation was suggested in

https://inbox.vuxu.org/musl/20210309035652.32453-1-ericonr@disroot.org/

And mostly voted against, in favor of the wrapper implementation.

On Sun Aug 8, 2021 at 8:26 AM -03, Olivier Galibert wrote:
> The extension qsort_r allows calling qsort on a list of indices
> without having a global variable to hold the data to sort.
>
> ---
> COPYRIGHT | 4 +-
> include/stdlib.h | 4 +
> src/stdlib/qsort.c | 200 +---------------------------------
> src/stdlib/qsort_common.ic | 218 +++++++++++++++++++++++++++++++++++++
> src/stdlib/qsort_r.c | 28 +++++
> 5 files changed, 257 insertions(+), 197 deletions(-)
> create mode 100644 src/stdlib/qsort_common.ic
> create mode 100644 src/stdlib/qsort_r.c
>
> diff --git a/COPYRIGHT b/COPYRIGHT
> index c1628e9a..f5199541 100644
> --- a/COPYRIGHT
> +++ b/COPYRIGHT
> @@ -142,8 +142,8 @@ originally written by Solar Designer and placed into
> the public
> domain. The code also comes with a fallback permissive license for use
> in jurisdictions that may not recognize the public domain.
>  
> -The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
> -Valentin Ochs and is licensed under an MIT-style license.
> +The smoothsort implementation (src/stdlib/qsort_common.ic) is
> +Copyright © 2011 Valentin Ochs and is licensed under an MIT-style
> license.
>  
> The x86_64 port was written by Nicholas J. Kain and is licensed under
> the standard MIT terms.
> diff --git a/include/stdlib.h b/include/stdlib.h
> index b54a051f..cacb61bf 100644
> --- a/include/stdlib.h
> +++ b/include/stdlib.h
> @@ -55,6 +55,10 @@ int system (const char *);
> void *bsearch (const void *, const void *, size_t, size_t, int (*)(const
> void *, const void *));
> void qsort (void *, size_t, size_t, int (*)(const void *, const void
> *));
>  
> +#if defined(_GNU_SOURCE)
> + void qsort_r (void *, size_t, size_t, int (*)(const void *, const void
> *, void *), void *);
> +#endif
> +
> int abs (int);
> long labs (long);
> long long llabs (long long);
> diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
> index da58fd31..b0ade53c 100644
> --- a/src/stdlib/qsort.c
> +++ b/src/stdlib/qsort.c
> @@ -19,200 +19,10 @@
> * IN THE SOFTWARE.
> */
>  
> -/* Minor changes by Rich Felker for integration in musl, 2011-04-27. */
> +/* Instanciate qsort */
>  
> -/* Smoothsort, an adaptive variant of Heapsort. Memory usage: O(1).
> - Run time: Worst case O(n log n), close to O(n) in the mostly-sorted
> case. */
> +#define FN(function) function
> +#define ARG_PROTO
> +#define ARG
>  
> -#include <stdint.h>
> -#include <stdlib.h>
> -#include <string.h>
> -
> -#include "atomic.h"
> -#define ntz(x) a_ctz_l((x))
> -
> -typedef int (*cmpfun)(const void *, const void *);
> -
> -static inline int pntz(size_t p[2]) {
> - int r = ntz(p[0] - 1);
> - if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) {
> - return r;
> - }
> - return 0;
> -}
> -
> -static void cycle(size_t width, unsigned char* ar[], int n)
> -{
> - unsigned char tmp[256];
> - size_t l;
> - int i;
> -
> - if(n < 2) {
> - return;
> - }
> -
> - ar[n] = tmp;
> - while(width) {
> - l = sizeof(tmp) < width ? sizeof(tmp) : width;
> - memcpy(ar[n], ar[0], l);
> - for(i = 0; i < n; i++) {
> - memcpy(ar[i], ar[i + 1], l);
> - ar[i] += l;
> - }
> - width -= l;
> - }
> -}
> -
> -/* shl() and shr() need n > 0 */
> -static inline void shl(size_t p[2], int n)
> -{
> - if(n >= 8 * sizeof(size_t)) {
> - n -= 8 * sizeof(size_t);
> - p[1] = p[0];
> - p[0] = 0;
> - }
> - p[1] <<= n;
> - p[1] |= p[0] >> (sizeof(size_t) * 8 - n);
> - p[0] <<= n;
> -}
> -
> -static inline void shr(size_t p[2], int n)
> -{
> - if(n >= 8 * sizeof(size_t)) {
> - n -= 8 * sizeof(size_t);
> - p[0] = p[1];
> - p[1] = 0;
> - }
> - p[0] >>= n;
> - p[0] |= p[1] << (sizeof(size_t) * 8 - n);
> - p[1] >>= n;
> -}
> -
> -static void sift(unsigned char *head, size_t width, cmpfun cmp, int
> pshift, size_t lp[])
> -{
> - unsigned char *rt, *lf;
> - unsigned char *ar[14 * sizeof(size_t) + 1];
> - int i = 1;
> -
> - ar[0] = head;
> - while(pshift > 1) {
> - rt = head - width;
> - lf = head - width - lp[pshift - 2];
> -
> - if((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) {
> - break;
> - }
> - if((*cmp)(lf, rt) >= 0) {
> - ar[i++] = lf;
> - head = lf;
> - pshift -= 1;
> - } else {
> - ar[i++] = rt;
> - head = rt;
> - pshift -= 2;
> - }
> - }
> - cycle(width, ar, i);
> -}
> -
> -static void trinkle(unsigned char *head, size_t width, cmpfun cmp,
> size_t pp[2], int pshift, int trusty, size_t lp[])
> -{
> - unsigned char *stepson,
> - *rt, *lf;
> - size_t p[2];
> - unsigned char *ar[14 * sizeof(size_t) + 1];
> - int i = 1;
> - int trail;
> -
> - p[0] = pp[0];
> - p[1] = pp[1];
> -
> - ar[0] = head;
> - while(p[0] != 1 || p[1] != 0) {
> - stepson = head - lp[pshift];
> - if((*cmp)(stepson, ar[0]) <= 0) {
> - break;
> - }
> - if(!trusty && pshift > 1) {
> - rt = head - width;
> - lf = head - width - lp[pshift - 2];
> - if((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0) {
> - break;
> - }
> - }
> -
> - ar[i++] = stepson;
> - head = stepson;
> - trail = pntz(p);
> - shr(p, trail);
> - pshift += trail;
> - trusty = 0;
> - }
> - if(!trusty) {
> - cycle(width, ar, i);
> - sift(head, width, cmp, pshift, lp);
> - }
> -}
> -
> -void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
> -{
> - size_t lp[12*sizeof(size_t)];
> - size_t i, size = width * nel;
> - unsigned char *head, *high;
> - size_t p[2] = {1, 0};
> - int pshift = 1;
> - int trail;
> -
> - if (!size) return;
> -
> - head = base;
> - high = head + size - width;
> -
> - /* Precompute Leonardo numbers, scaled by element width */
> - for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size;
> i++);
> -
> - while(head < high) {
> - if((p[0] & 3) == 3) {
> - sift(head, width, cmp, pshift, lp);
> - shr(p, 2);
> - pshift += 2;
> - } else {
> - if(lp[pshift - 1] >= high - head) {
> - trinkle(head, width, cmp, p, pshift, 0, lp);
> - } else {
> - sift(head, width, cmp, pshift, lp);
> - }
> -
> - if(pshift == 1) {
> - shl(p, 1);
> - pshift = 0;
> - } else {
> - shl(p, pshift - 1);
> - pshift = 1;
> - }
> - }
> -
> - p[0] |= 1;
> - head += width;
> - }
> -
> - trinkle(head, width, cmp, p, pshift, 0, lp);
> -
> - while(pshift != 1 || p[0] != 1 || p[1] != 0) {
> - if(pshift <= 1) {
> - trail = pntz(p);
> - shr(p, trail);
> - pshift += trail;
> - } else {
> - shl(p, 2);
> - pshift -= 2;
> - p[0] ^= 7;
> - shr(p, 1);
> - trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp);
> - shl(p, 1);
> - p[0] |= 1;
> - trinkle(head - width, width, cmp, p, pshift, 1, lp);
> - }
> - head -= width;
> - }
> -}
> +#include "qsort_common.ic"
> diff --git a/src/stdlib/qsort_common.ic b/src/stdlib/qsort_common.ic
> new file mode 100644
> index 00000000..099b1bf4
> --- /dev/null
> +++ b/src/stdlib/qsort_common.ic
> @@ -0,0 +1,218 @@
> +/* Copyright (C) 2011 by Valentin Ochs
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a copy
> + * of this software and associated documentation files (the
> "Software"), to
> + * deal in the Software without restriction, including without
> limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense,
> and/or
> + * sell copies of the Software, and to permit persons to whom the
> Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +/* Minor changes by Rich Felker for integration in musl, 2011-04-27. */
> +
> +/* Smoothsort, an adaptive variant of Heapsort. Memory usage: O(1).
> + Run time: Worst case O(n log n), close to O(n) in the mostly-sorted
> case. */
> +
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <string.h>
> +
> +#include "atomic.h"
> +#define ntz(x) a_ctz_l((x))
> +
> +typedef int (*cmpfun)(const void *, const void * ARG_PROTO);
> +
> +static inline int pntz(size_t p[2]) {
> + int r = ntz(p[0] - 1);
> + if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) {
> + return r;
> + }
> + return 0;
> +}
> +
> +static void FN(cycle)(size_t width, unsigned char* ar[], int n)
> +{
> + unsigned char tmp[256];
> + size_t l;
> + int i;
> +
> + if(n < 2) {
> + return;
> + }
> +
> + ar[n] = tmp;
> + while(width) {
> + l = sizeof(tmp) < width ? sizeof(tmp) : width;
> + memcpy(ar[n], ar[0], l);
> + for(i = 0; i < n; i++) {
> + memcpy(ar[i], ar[i + 1], l);
> + ar[i] += l;
> + }
> + width -= l;
> + }
> +}
> +
> +/* shl() and shr() need n > 0 */
> +static inline void FN(shl)(size_t p[2], int n)
> +{
> + if(n >= 8 * sizeof(size_t)) {
> + n -= 8 * sizeof(size_t);
> + p[1] = p[0];
> + p[0] = 0;
> + }
> + p[1] <<= n;
> + p[1] |= p[0] >> (sizeof(size_t) * 8 - n);
> + p[0] <<= n;
> +}
> +
> +static inline void shr(size_t p[2], int n)
> +{
> + if(n >= 8 * sizeof(size_t)) {
> + n -= 8 * sizeof(size_t);
> + p[0] = p[1];
> + p[1] = 0;
> + }
> + p[0] >>= n;
> + p[0] |= p[1] << (sizeof(size_t) * 8 - n);
> + p[1] >>= n;
> +}
> +
> +static void FN(sift)(unsigned char *head, size_t width, cmpfun cmp, int
> pshift, size_t lp[] ARG_PROTO)
> +{
> + unsigned char *rt, *lf;
> + unsigned char *ar[14 * sizeof(size_t) + 1];
> + int i = 1;
> +
> + ar[0] = head;
> + while(pshift > 1) {
> + rt = head - width;
> + lf = head - width - lp[pshift - 2];
> +
> + if((*cmp)(ar[0], lf ARG) >= 0 && (*cmp)(ar[0], rt ARG) >= 0) {
> + break;
> + }
> + if((*cmp)(lf, rt ARG) >= 0) {
> + ar[i++] = lf;
> + head = lf;
> + pshift -= 1;
> + } else {
> + ar[i++] = rt;
> + head = rt;
> + pshift -= 2;
> + }
> + }
> + FN(cycle)(width, ar, i);
> +}
> +
> +static void FN(trinkle)(unsigned char *head, size_t width, cmpfun cmp,
> size_t pp[2], int pshift, int trusty, size_t lp[] ARG_PROTO)
> +{
> + unsigned char *stepson,
> + *rt, *lf;
> + size_t p[2];
> + unsigned char *ar[14 * sizeof(size_t) + 1];
> + int i = 1;
> + int trail;
> +
> + p[0] = pp[0];
> + p[1] = pp[1];
> +
> + ar[0] = head;
> + while(p[0] != 1 || p[1] != 0) {
> + stepson = head - lp[pshift];
> + if((*cmp)(stepson, ar[0] ARG) <= 0) {
> + break;
> + }
> + if(!trusty && pshift > 1) {
> + rt = head - width;
> + lf = head - width - lp[pshift - 2];
> + if((*cmp)(rt, stepson ARG) >= 0 || (*cmp)(lf, stepson ARG) >= 0) {
> + break;
> + }
> + }
> +
> + ar[i++] = stepson;
> + head = stepson;
> + trail = pntz(p);
> + shr(p, trail);
> + pshift += trail;
> + trusty = 0;
> + }
> + if(!trusty) {
> + FN(cycle)(width, ar, i);
> + FN(sift)(head, width, cmp, pshift, lp ARG);
> + }
> +}
> +
> +void FN(qsort)(void *base, size_t nel, size_t width, cmpfun cmp
> ARG_PROTO)
> +{
> + size_t lp[12*sizeof(size_t)];
> + size_t i, size = width * nel;
> + unsigned char *head, *high;
> + size_t p[2] = {1, 0};
> + int pshift = 1;
> + int trail;
> +
> + if (!size) return;
> +
> + head = base;
> + high = head + size - width;
> +
> + /* Precompute Leonardo numbers, scaled by element width */
> + for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size;
> i++);
> +
> + while(head < high) {
> + if((p[0] & 3) == 3) {
> + FN(sift)(head, width, cmp, pshift, lp ARG);
> + shr(p, 2);
> + pshift += 2;
> + } else {
> + if(lp[pshift - 1] >= high - head) {
> + FN(trinkle)(head, width, cmp, p, pshift, 0, lp ARG);
> + } else {
> + FN(sift)(head, width, cmp, pshift, lp ARG);
> + }
> +
> + if(pshift == 1) {
> + FN(shl)(p, 1);
> + pshift = 0;
> + } else {
> + FN(shl)(p, pshift - 1);
> + pshift = 1;
> + }
> + }
> +
> + p[0] |= 1;
> + head += width;
> + }
> +
> + FN(trinkle)(head, width, cmp, p, pshift, 0, lp ARG);
> +
> + while(pshift != 1 || p[0] != 1 || p[1] != 0) {
> + if(pshift <= 1) {
> + trail = pntz(p);
> + shr(p, trail);
> + pshift += trail;
> + } else {
> + FN(shl)(p, 2);
> + pshift -= 2;
> + p[0] ^= 7;
> + shr(p, 1);
> + FN(trinkle)(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1,
> lp ARG);
> + FN(shl)(p, 1);
> + p[0] |= 1;
> + FN(trinkle)(head - width, width, cmp, p, pshift, 1, lp ARG);
> + }
> + head -= width;
> + }
> +}
> diff --git a/src/stdlib/qsort_r.c b/src/stdlib/qsort_r.c
> new file mode 100644
> index 00000000..d57e4a1e
> --- /dev/null
> +++ b/src/stdlib/qsort_r.c
> @@ -0,0 +1,28 @@
> +/* Copyright (C) 2011 by Valentin Ochs
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a copy
> + * of this software and associated documentation files (the
> "Software"), to
> + * deal in the Software without restriction, including without
> limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense,
> and/or
> + * sell copies of the Software, and to permit persons to whom the
> Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +/* Instanciate qsort_r */
> +
> +#define FN(function) function##_r
> +#define ARG_PROTO , void *arg
> +#define ARG , arg
> +
> +#include "qsort_common.ic"

From what I have seen, musl prefers using .h files for these situations
instead of inventing new file extensions.

It is also my belief that the version I linked was more readable.

> --
> 2.32.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [musl] [PATCH] stdlib: implement qsort_r
  2021-08-08 12:42 ` Jon Chesterfield
  2021-08-08 19:39   ` Olivier Galibert
@ 2021-08-08 20:35   ` Érico Nogueira
  2021-08-08 21:21     ` Jon Chesterfield
  2021-08-09 17:27   ` Florian Weimer
  2 siblings, 1 reply; 11+ messages in thread
From: Érico Nogueira @ 2021-08-08 20:35 UTC (permalink / raw)
  To: musl; +Cc: Olivier Galibert

On Sun Aug 8, 2021 at 9:42 AM -03, Jon Chesterfield wrote:
> On Sun, Aug 8, 2021 at 12:29 PM Olivier Galibert <galibert@pobox.com>
> wrote:
>
> > The extension qsort_r allows calling qsort on a list of indices
> > without having a global variable to hold the data to sort.
> >
>
> qsort_r is a strong improvement on qsort. I think it's available outside
> of
> glibc.
>
> I remember doing something similar locally. Just looked it up and I
> renamed & mutated qsort to pass the context along. Therefore typed into
> email, I think something like this would provide an implementation of
> qsort in terms of qsort_r.
>
> // declare qsort_r
> typedef int (*cmp_r_t)(const void *, const void *, void * context);
>
>
> void qsort_r(void *base, size_t nel, size_t width, cmp_r_t cmp, void*
> context);
>
>
> // pass it a function that extracts the comparator for qsort from the
> // context
> typedef int (*cmp_t)(const void *, const void *);
>
>
> static int compare_adapter(const void *l, const void *r, void * context)
> {
> cmpt_t c = (cmpt_t) context;
>
>
> return c(l,r);
>
>
> }
>
> // tail call
> void qsort(void *base, size_t nel, size_t width, cmp_t c)
> {
> return qsort_r(base, nel, width, compare_adapter, (cmp_t_t)c);
>
>
> }
>
> Given optimism about inlining or an always inline annotation
> it should turn into the same machine code as the macro
> instantiation approach. Alternatively it's a tail call into qsort_r, so
> a couple of indirections in exchange for minimal code size growth.
>
> I haven't compiled or tested that (or looked up the coding conventions
> for musl) so this is a drive by suggestion, written in pseudocode
> instead of prose for clarity.

This is the favored approach, from what I understood of the discussions
surrounding it. It's implemented with musl's namespacing rules and such
in [1].

It is badly optimized for some archs, unfortunately, as explained in the
thread from [2]. I believe that's what's holding it up.

[1] https://inbox.vuxu.org/musl/20210309210213.29539-1-ericonr@disroot.org/
[2] https://inbox.vuxu.org/musl/20210309150320.GU32655@brightrain.aerifal.cx/

>
> Thanks all!
>
> Jon


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [musl] [PATCH] stdlib: implement qsort_r
  2021-08-08 20:35   ` Érico Nogueira
@ 2021-08-08 21:21     ` Jon Chesterfield
  2021-08-08 23:05       ` Rich Felker
  0 siblings, 1 reply; 11+ messages in thread
From: Jon Chesterfield @ 2021-08-08 21:21 UTC (permalink / raw)
  To: musl; +Cc: Olivier Galibert

[-- Attachment #1: Type: text/plain, Size: 713 bytes --]

On Sun, Aug 8, 2021 at 9:39 PM Érico Nogueira <ericonr@disroot.org> wrote:

> This is the favored approach, from what I understood of the discussions
> surrounding it. It's implemented with musl's namespacing rules and such
> in [1].
>

Unsurprisingly I like your patch.

>
> It is badly optimized for some archs, unfortunately, as explained in the
> thread from [2]. I believe that's what's holding it up.
>
> [1]
> https://inbox.vuxu.org/musl/20210309210213.29539-1-ericonr@disroot.org/
> [2]
> https://inbox.vuxu.org/musl/20210309150320.GU32655@brightrain.aerifal.cx/


And that's what I get for not reading the list carefully enough. I missed
the thread
from March entirely.

Thanks!

[-- Attachment #2: Type: text/html, Size: 1440 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [musl] [PATCH] stdlib: implement qsort_r
  2021-08-08 21:21     ` Jon Chesterfield
@ 2021-08-08 23:05       ` Rich Felker
  2021-08-09  6:03         ` Olivier Galibert
  0 siblings, 1 reply; 11+ messages in thread
From: Rich Felker @ 2021-08-08 23:05 UTC (permalink / raw)
  To: Jon Chesterfield; +Cc: musl, Olivier Galibert

On Sun, Aug 08, 2021 at 10:21:06PM +0100, Jon Chesterfield wrote:
> On Sun, Aug 8, 2021 at 9:39 PM Érico Nogueira <ericonr@disroot.org> wrote:
> 
> > This is the favored approach, from what I understood of the discussions
> > surrounding it. It's implemented with musl's namespacing rules and such
> > in [1].
> >
> 
> Unsurprisingly I like your patch.
> 
> >
> > It is badly optimized for some archs, unfortunately, as explained in the
> > thread from [2]. I believe that's what's holding it up.
> >
> > [1]
> > https://inbox.vuxu.org/musl/20210309210213.29539-1-ericonr@disroot.org/
> > [2]
> > https://inbox.vuxu.org/musl/20210309150320.GU32655@brightrain.aerifal.cx/
> 
> 
> And that's what I get for not reading the list carefully enough. I missed
> the thread
> from March entirely.

No problem. Indeed, this was stalled because of concerns about bad
compiler behavior on some archs, but I really don't want to allow that
to dictate how we solve the problem. The compiler is doing the wrong
thing (failing to tail call) and should just be fixed. I'll probably
merge the tail-call patch as-is, or with any minimal fixes needed,
right after this release so as not to make major API changes at last
minute.

Rich

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [musl] [PATCH] stdlib: implement qsort_r
  2021-08-08 23:05       ` Rich Felker
@ 2021-08-09  6:03         ` Olivier Galibert
  0 siblings, 0 replies; 11+ messages in thread
From: Olivier Galibert @ 2021-08-09  6:03 UTC (permalink / raw)
  To: Rich Felker; +Cc: Jon Chesterfield, musl

[-- Attachment #1: Type: text/plain, Size: 1583 bytes --]

Yeah, the wrapper_cmp version looks rather good, I didn’t realize it would
go down to one instruction.  Or that an old patch would have been stalled
for five months ;-)

  OG

Le lun. 9 août 2021 à 01:05, Rich Felker <dalias@libc.org> a écrit :

> On Sun, Aug 08, 2021 at 10:21:06PM +0100, Jon Chesterfield wrote:
> > On Sun, Aug 8, 2021 at 9:39 PM Érico Nogueira <ericonr@disroot.org>
> wrote:
> >
> > > This is the favored approach, from what I understood of the discussions
> > > surrounding it. It's implemented with musl's namespacing rules and such
> > > in [1].
> > >
> >
> > Unsurprisingly I like your patch.
> >
> > >
> > > It is badly optimized for some archs, unfortunately, as explained in
> the
> > > thread from [2]. I believe that's what's holding it up.
> > >
> > > [1]
> > >
> https://inbox.vuxu.org/musl/20210309210213.29539-1-ericonr@disroot.org/
> > > [2]
> > >
> https://inbox.vuxu.org/musl/20210309150320.GU32655@brightrain.aerifal.cx/
> >
> >
> > And that's what I get for not reading the list carefully enough. I missed
> > the thread
> > from March entirely.
>
> No problem. Indeed, this was stalled because of concerns about bad
> compiler behavior on some archs, but I really don't want to allow that
> to dictate how we solve the problem. The compiler is doing the wrong
> thing (failing to tail call) and should just be fixed. I'll probably
> merge the tail-call patch as-is, or with any minimal fixes needed,
> right after this release so as not to make major API changes at last
> minute.
>
> Rich
>

[-- Attachment #2: Type: text/html, Size: 2384 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [musl] [PATCH] stdlib: implement qsort_r
  2021-08-08 12:42 ` Jon Chesterfield
  2021-08-08 19:39   ` Olivier Galibert
  2021-08-08 20:35   ` Érico Nogueira
@ 2021-08-09 17:27   ` Florian Weimer
  2 siblings, 0 replies; 11+ messages in thread
From: Florian Weimer @ 2021-08-09 17:27 UTC (permalink / raw)
  To: Jon Chesterfield; +Cc: musl, Olivier Galibert

* Jon Chesterfield:

> On Sun, Aug 8, 2021 at 12:29 PM Olivier Galibert <galibert@pobox.com> wrote:
>
>  The extension qsort_r allows calling qsort on a list of indices
>  without having a global variable to hold the data to sort.
>
> qsort_r is a strong improvement on qsort. I think it's available outside of
> glibc.

Right.  Solaris has it, too.  And apparently FreeBSD adjusted their
interface: <https://reviews.freebsd.org/D17083>

Thanks,
Florian


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [musl] [PATCH] stdlib: implement qsort_r
  2021-08-18 17:36 Olivier Galibert
@ 2021-08-18 20:18 ` Rich Felker
  0 siblings, 0 replies; 11+ messages in thread
From: Rich Felker @ 2021-08-18 20:18 UTC (permalink / raw)
  To: Olivier Galibert; +Cc: musl

On Wed, Aug 18, 2021 at 07:36:53PM +0200, Olivier Galibert wrote:
> The extension qsort_r allows calling qsort on a list of indices
> without having a global variable to hold the data to sort.
> ---
> 
> I didn't see a "clean" patch for the wrapper version in the thread, so
> here is one.  Passes libc-test, of course.

Here is the original patch, which also handles namespace correctly and
separates the translation units:

https://www.openwall.com/lists/musl/2021/03/09/10

Otherwise I think yours is roughly the same. I haven't checked whether
there are any other differences that should be resolved.

One change that should be made to either is that qsort_r should be
under _BSD_SOURCE, not _GNU_SOURCE.

>  include/stdlib.h   |  4 ++++
>  src/stdlib/qsort.c | 39 +++++++++++++++++++++++++--------------
>  2 files changed, 29 insertions(+), 14 deletions(-)
> 
> diff --git a/include/stdlib.h b/include/stdlib.h
> index b54a051f..cacb61bf 100644
> --- a/include/stdlib.h
> +++ b/include/stdlib.h
> @@ -55,6 +55,10 @@ int system (const char *);
>  void *bsearch (const void *, const void *, size_t, size_t, int (*)(const void *, const void *));
>  void qsort (void *, size_t, size_t, int (*)(const void *, const void *));
>  
> +#if defined(_GNU_SOURCE)
> +  void qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, void *), void *);
> +#endif
> +
>  int abs (int);
>  long labs (long);
>  long long llabs (long long);
> diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
> index da58fd31..9add0ac5 100644
> --- a/src/stdlib/qsort.c
> +++ b/src/stdlib/qsort.c
> @@ -32,6 +32,7 @@
>  #define ntz(x) a_ctz_l((x))
>  
>  typedef int (*cmpfun)(const void *, const void *);
> +typedef int (*cmpfun_r)(const void *, const void *, void *);
>  
>  static inline int pntz(size_t p[2]) {
>  	int r = ntz(p[0] - 1);
> @@ -88,7 +89,7 @@ static inline void shr(size_t p[2], int n)
>  	p[1] >>= n;
>  }
>  
> -static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size_t lp[])
> +static void sift(unsigned char *head, size_t width, cmpfun_r cmp, void *arg, int pshift, size_t lp[])
>  {
>  	unsigned char *rt, *lf;
>  	unsigned char *ar[14 * sizeof(size_t) + 1];
> @@ -99,10 +100,10 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size
>  		rt = head - width;
>  		lf = head - width - lp[pshift - 2];
>  
> -		if((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) {
> +		if((*cmp)(ar[0], lf, arg) >= 0 && (*cmp)(ar[0], rt, arg) >= 0) {
>  			break;
>  		}
> -		if((*cmp)(lf, rt) >= 0) {
> +		if((*cmp)(lf, rt, arg) >= 0) {
>  			ar[i++] = lf;
>  			head = lf;
>  			pshift -= 1;
> @@ -115,7 +116,7 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size
>  	cycle(width, ar, i);
>  }
>  
> -static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], int pshift, int trusty, size_t lp[])
> +static void trinkle(unsigned char *head, size_t width, cmpfun_r cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[])
>  {
>  	unsigned char *stepson,
>  	              *rt, *lf;
> @@ -130,13 +131,13 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2],
>  	ar[0] = head;
>  	while(p[0] != 1 || p[1] != 0) {
>  		stepson = head - lp[pshift];
> -		if((*cmp)(stepson, ar[0]) <= 0) {
> +		if((*cmp)(stepson, ar[0], arg) <= 0) {
>  			break;
>  		}
>  		if(!trusty && pshift > 1) {
>  			rt = head - width;
>  			lf = head - width - lp[pshift - 2];
> -			if((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0) {
> +			if((*cmp)(rt, stepson, arg) >= 0 || (*cmp)(lf, stepson, arg) >= 0) {
>  				break;
>  			}
>  		}
> @@ -150,11 +151,11 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2],
>  	}
>  	if(!trusty) {
>  		cycle(width, ar, i);
> -		sift(head, width, cmp, pshift, lp);
> +		sift(head, width, cmp, arg, pshift, lp);
>  	}
>  }
>  
> -void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
> +void qsort_r(void *base, size_t nel, size_t width, cmpfun_r cmp, void *arg)
>  {
>  	size_t lp[12*sizeof(size_t)];
>  	size_t i, size = width * nel;
> @@ -173,14 +174,14 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
>  
>  	while(head < high) {
>  		if((p[0] & 3) == 3) {
> -			sift(head, width, cmp, pshift, lp);
> +			sift(head, width, cmp, arg, pshift, lp);
>  			shr(p, 2);
>  			pshift += 2;
>  		} else {
>  			if(lp[pshift - 1] >= high - head) {
> -				trinkle(head, width, cmp, p, pshift, 0, lp);
> +				trinkle(head, width, cmp, arg, p, pshift, 0, lp);
>  			} else {
> -				sift(head, width, cmp, pshift, lp);
> +				sift(head, width, cmp, arg, pshift, lp);
>  			}
>  			
>  			if(pshift == 1) {
> @@ -196,7 +197,7 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
>  		head += width;
>  	}
>  
> -	trinkle(head, width, cmp, p, pshift, 0, lp);
> +	trinkle(head, width, cmp, arg, p, pshift, 0, lp);
>  
>  	while(pshift != 1 || p[0] != 1 || p[1] != 0) {
>  		if(pshift <= 1) {
> @@ -208,11 +209,21 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
>  			pshift -= 2;
>  			p[0] ^= 7;
>  			shr(p, 1);
> -			trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp);
> +			trinkle(head - lp[pshift] - width, width, cmp, arg, p, pshift + 1, 1, lp);
>  			shl(p, 1);
>  			p[0] |= 1;
> -			trinkle(head - width, width, cmp, p, pshift, 1, lp);
> +			trinkle(head - width, width, cmp, arg, p, pshift, 1, lp);
>  		}
>  		head -= width;
>  	}
>  }
> +
> +static int qsort_cmp(const void *e1, const void *e2, void *arg)
> +{
> +	return ((cmpfun)arg)(e1, e2);
> +}
> +
> +void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
> +{
> +	return qsort_r(base, nel, width, qsort_cmp, cmp);
> +}
> -- 
> 2.32.0

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [musl] [PATCH] stdlib: implement qsort_r
@ 2021-08-18 17:36 Olivier Galibert
  2021-08-18 20:18 ` Rich Felker
  0 siblings, 1 reply; 11+ messages in thread
From: Olivier Galibert @ 2021-08-18 17:36 UTC (permalink / raw)
  To: musl; +Cc: Olivier Galibert

The extension qsort_r allows calling qsort on a list of indices
without having a global variable to hold the data to sort.
---

I didn't see a "clean" patch for the wrapper version in the thread, so
here is one.  Passes libc-test, of course.


 include/stdlib.h   |  4 ++++
 src/stdlib/qsort.c | 39 +++++++++++++++++++++++++--------------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/include/stdlib.h b/include/stdlib.h
index b54a051f..cacb61bf 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -55,6 +55,10 @@ int system (const char *);
 void *bsearch (const void *, const void *, size_t, size_t, int (*)(const void *, const void *));
 void qsort (void *, size_t, size_t, int (*)(const void *, const void *));
 
+#if defined(_GNU_SOURCE)
+  void qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, void *), void *);
+#endif
+
 int abs (int);
 long labs (long);
 long long llabs (long long);
diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index da58fd31..9add0ac5 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -32,6 +32,7 @@
 #define ntz(x) a_ctz_l((x))
 
 typedef int (*cmpfun)(const void *, const void *);
+typedef int (*cmpfun_r)(const void *, const void *, void *);
 
 static inline int pntz(size_t p[2]) {
 	int r = ntz(p[0] - 1);
@@ -88,7 +89,7 @@ static inline void shr(size_t p[2], int n)
 	p[1] >>= n;
 }
 
-static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size_t lp[])
+static void sift(unsigned char *head, size_t width, cmpfun_r cmp, void *arg, int pshift, size_t lp[])
 {
 	unsigned char *rt, *lf;
 	unsigned char *ar[14 * sizeof(size_t) + 1];
@@ -99,10 +100,10 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size
 		rt = head - width;
 		lf = head - width - lp[pshift - 2];
 
-		if((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) {
+		if((*cmp)(ar[0], lf, arg) >= 0 && (*cmp)(ar[0], rt, arg) >= 0) {
 			break;
 		}
-		if((*cmp)(lf, rt) >= 0) {
+		if((*cmp)(lf, rt, arg) >= 0) {
 			ar[i++] = lf;
 			head = lf;
 			pshift -= 1;
@@ -115,7 +116,7 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size
 	cycle(width, ar, i);
 }
 
-static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], int pshift, int trusty, size_t lp[])
+static void trinkle(unsigned char *head, size_t width, cmpfun_r cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[])
 {
 	unsigned char *stepson,
 	              *rt, *lf;
@@ -130,13 +131,13 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2],
 	ar[0] = head;
 	while(p[0] != 1 || p[1] != 0) {
 		stepson = head - lp[pshift];
-		if((*cmp)(stepson, ar[0]) <= 0) {
+		if((*cmp)(stepson, ar[0], arg) <= 0) {
 			break;
 		}
 		if(!trusty && pshift > 1) {
 			rt = head - width;
 			lf = head - width - lp[pshift - 2];
-			if((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0) {
+			if((*cmp)(rt, stepson, arg) >= 0 || (*cmp)(lf, stepson, arg) >= 0) {
 				break;
 			}
 		}
@@ -150,11 +151,11 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2],
 	}
 	if(!trusty) {
 		cycle(width, ar, i);
-		sift(head, width, cmp, pshift, lp);
+		sift(head, width, cmp, arg, pshift, lp);
 	}
 }
 
-void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
+void qsort_r(void *base, size_t nel, size_t width, cmpfun_r cmp, void *arg)
 {
 	size_t lp[12*sizeof(size_t)];
 	size_t i, size = width * nel;
@@ -173,14 +174,14 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
 
 	while(head < high) {
 		if((p[0] & 3) == 3) {
-			sift(head, width, cmp, pshift, lp);
+			sift(head, width, cmp, arg, pshift, lp);
 			shr(p, 2);
 			pshift += 2;
 		} else {
 			if(lp[pshift - 1] >= high - head) {
-				trinkle(head, width, cmp, p, pshift, 0, lp);
+				trinkle(head, width, cmp, arg, p, pshift, 0, lp);
 			} else {
-				sift(head, width, cmp, pshift, lp);
+				sift(head, width, cmp, arg, pshift, lp);
 			}
 			
 			if(pshift == 1) {
@@ -196,7 +197,7 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
 		head += width;
 	}
 
-	trinkle(head, width, cmp, p, pshift, 0, lp);
+	trinkle(head, width, cmp, arg, p, pshift, 0, lp);
 
 	while(pshift != 1 || p[0] != 1 || p[1] != 0) {
 		if(pshift <= 1) {
@@ -208,11 +209,21 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
 			pshift -= 2;
 			p[0] ^= 7;
 			shr(p, 1);
-			trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp);
+			trinkle(head - lp[pshift] - width, width, cmp, arg, p, pshift + 1, 1, lp);
 			shl(p, 1);
 			p[0] |= 1;
-			trinkle(head - width, width, cmp, p, pshift, 1, lp);
+			trinkle(head - width, width, cmp, arg, p, pshift, 1, lp);
 		}
 		head -= width;
 	}
 }
+
+static int qsort_cmp(const void *e1, const void *e2, void *arg)
+{
+	return ((cmpfun)arg)(e1, e2);
+}
+
+void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
+{
+	return qsort_r(base, nel, width, qsort_cmp, cmp);
+}
-- 
2.32.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-08-18 20:18 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-08 11:26 [musl] [PATCH] stdlib: implement qsort_r Olivier Galibert
2021-08-08 12:42 ` Jon Chesterfield
2021-08-08 19:39   ` Olivier Galibert
2021-08-08 20:35   ` Érico Nogueira
2021-08-08 21:21     ` Jon Chesterfield
2021-08-08 23:05       ` Rich Felker
2021-08-09  6:03         ` Olivier Galibert
2021-08-09 17:27   ` Florian Weimer
2021-08-08 20:28 ` Érico Nogueira
2021-08-18 17:36 Olivier Galibert
2021-08-18 20:18 ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).