The extension qsort_r allows calling qsort on a list of indices without having a global variable to hold the data to sort. --- COPYRIGHT | 4 +- include/stdlib.h | 4 + src/stdlib/qsort.c | 200 +--------------------------------- src/stdlib/qsort_common.ic | 218 +++++++++++++++++++++++++++++++++++++ src/stdlib/qsort_r.c | 28 +++++ 5 files changed, 257 insertions(+), 197 deletions(-) create mode 100644 src/stdlib/qsort_common.ic create mode 100644 src/stdlib/qsort_r.c diff --git a/COPYRIGHT b/COPYRIGHT index c1628e9a..f5199541 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -142,8 +142,8 @@ originally written by Solar Designer and placed into the public domain. The code also comes with a fallback permissive license for use in jurisdictions that may not recognize the public domain. -The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011 -Valentin Ochs and is licensed under an MIT-style license. +The smoothsort implementation (src/stdlib/qsort_common.ic) is +Copyright © 2011 Valentin Ochs and is licensed under an MIT-style license. The x86_64 port was written by Nicholas J. Kain and is licensed under the standard MIT terms. diff --git a/include/stdlib.h b/include/stdlib.h index b54a051f..cacb61bf 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -55,6 +55,10 @@ int system (const char *); void *bsearch (const void *, const void *, size_t, size_t, int (*)(const void *, const void *)); void qsort (void *, size_t, size_t, int (*)(const void *, const void *)); +#if defined(_GNU_SOURCE) + void qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, void *), void *); +#endif + int abs (int); long labs (long); long long llabs (long long); diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c index da58fd31..b0ade53c 100644 --- a/src/stdlib/qsort.c +++ b/src/stdlib/qsort.c @@ -19,200 +19,10 @@ * IN THE SOFTWARE. */ -/* Minor changes by Rich Felker for integration in musl, 2011-04-27. */ +/* Instanciate qsort */ -/* Smoothsort, an adaptive variant of Heapsort. Memory usage: O(1). - Run time: Worst case O(n log n), close to O(n) in the mostly-sorted case. */ +#define FN(function) function +#define ARG_PROTO +#define ARG -#include <stdint.h> -#include <stdlib.h> -#include <string.h> - -#include "atomic.h" -#define ntz(x) a_ctz_l((x)) - -typedef int (*cmpfun)(const void *, const void *); - -static inline int pntz(size_t p[2]) { - int r = ntz(p[0] - 1); - if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) { - return r; - } - return 0; -} - -static void cycle(size_t width, unsigned char* ar[], int n) -{ - unsigned char tmp[256]; - size_t l; - int i; - - if(n < 2) { - return; - } - - ar[n] = tmp; - while(width) { - l = sizeof(tmp) < width ? sizeof(tmp) : width; - memcpy(ar[n], ar[0], l); - for(i = 0; i < n; i++) { - memcpy(ar[i], ar[i + 1], l); - ar[i] += l; - } - width -= l; - } -} - -/* shl() and shr() need n > 0 */ -static inline void shl(size_t p[2], int n) -{ - if(n >= 8 * sizeof(size_t)) { - n -= 8 * sizeof(size_t); - p[1] = p[0]; - p[0] = 0; - } - p[1] <<= n; - p[1] |= p[0] >> (sizeof(size_t) * 8 - n); - p[0] <<= n; -} - -static inline void shr(size_t p[2], int n) -{ - if(n >= 8 * sizeof(size_t)) { - n -= 8 * sizeof(size_t); - p[0] = p[1]; - p[1] = 0; - } - p[0] >>= n; - p[0] |= p[1] << (sizeof(size_t) * 8 - n); - p[1] >>= n; -} - -static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size_t lp[]) -{ - unsigned char *rt, *lf; - unsigned char *ar[14 * sizeof(size_t) + 1]; - int i = 1; - - ar[0] = head; - while(pshift > 1) { - rt = head - width; - lf = head - width - lp[pshift - 2]; - - if((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) { - break; - } - if((*cmp)(lf, rt) >= 0) { - ar[i++] = lf; - head = lf; - pshift -= 1; - } else { - ar[i++] = rt; - head = rt; - pshift -= 2; - } - } - cycle(width, ar, i); -} - -static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], int pshift, int trusty, size_t lp[]) -{ - unsigned char *stepson, - *rt, *lf; - size_t p[2]; - unsigned char *ar[14 * sizeof(size_t) + 1]; - int i = 1; - int trail; - - p[0] = pp[0]; - p[1] = pp[1]; - - ar[0] = head; - while(p[0] != 1 || p[1] != 0) { - stepson = head - lp[pshift]; - if((*cmp)(stepson, ar[0]) <= 0) { - break; - } - if(!trusty && pshift > 1) { - rt = head - width; - lf = head - width - lp[pshift - 2]; - if((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0) { - break; - } - } - - ar[i++] = stepson; - head = stepson; - trail = pntz(p); - shr(p, trail); - pshift += trail; - trusty = 0; - } - if(!trusty) { - cycle(width, ar, i); - sift(head, width, cmp, pshift, lp); - } -} - -void qsort(void *base, size_t nel, size_t width, cmpfun cmp) -{ - size_t lp[12*sizeof(size_t)]; - size_t i, size = width * nel; - unsigned char *head, *high; - size_t p[2] = {1, 0}; - int pshift = 1; - int trail; - - if (!size) return; - - head = base; - high = head + size - width; - - /* Precompute Leonardo numbers, scaled by element width */ - for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++); - - while(head < high) { - if((p[0] & 3) == 3) { - sift(head, width, cmp, pshift, lp); - shr(p, 2); - pshift += 2; - } else { - if(lp[pshift - 1] >= high - head) { - trinkle(head, width, cmp, p, pshift, 0, lp); - } else { - sift(head, width, cmp, pshift, lp); - } - - if(pshift == 1) { - shl(p, 1); - pshift = 0; - } else { - shl(p, pshift - 1); - pshift = 1; - } - } - - p[0] |= 1; - head += width; - } - - trinkle(head, width, cmp, p, pshift, 0, lp); - - while(pshift != 1 || p[0] != 1 || p[1] != 0) { - if(pshift <= 1) { - trail = pntz(p); - shr(p, trail); - pshift += trail; - } else { - shl(p, 2); - pshift -= 2; - p[0] ^= 7; - shr(p, 1); - trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp); - shl(p, 1); - p[0] |= 1; - trinkle(head - width, width, cmp, p, pshift, 1, lp); - } - head -= width; - } -} +#include "qsort_common.ic" diff --git a/src/stdlib/qsort_common.ic b/src/stdlib/qsort_common.ic new file mode 100644 index 00000000..099b1bf4 --- /dev/null +++ b/src/stdlib/qsort_common.ic @@ -0,0 +1,218 @@ +/* Copyright (C) 2011 by Valentin Ochs + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* Minor changes by Rich Felker for integration in musl, 2011-04-27. */ + +/* Smoothsort, an adaptive variant of Heapsort. Memory usage: O(1). + Run time: Worst case O(n log n), close to O(n) in the mostly-sorted case. */ + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "atomic.h" +#define ntz(x) a_ctz_l((x)) + +typedef int (*cmpfun)(const void *, const void * ARG_PROTO); + +static inline int pntz(size_t p[2]) { + int r = ntz(p[0] - 1); + if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) { + return r; + } + return 0; +} + +static void FN(cycle)(size_t width, unsigned char* ar[], int n) +{ + unsigned char tmp[256]; + size_t l; + int i; + + if(n < 2) { + return; + } + + ar[n] = tmp; + while(width) { + l = sizeof(tmp) < width ? sizeof(tmp) : width; + memcpy(ar[n], ar[0], l); + for(i = 0; i < n; i++) { + memcpy(ar[i], ar[i + 1], l); + ar[i] += l; + } + width -= l; + } +} + +/* shl() and shr() need n > 0 */ +static inline void FN(shl)(size_t p[2], int n) +{ + if(n >= 8 * sizeof(size_t)) { + n -= 8 * sizeof(size_t); + p[1] = p[0]; + p[0] = 0; + } + p[1] <<= n; + p[1] |= p[0] >> (sizeof(size_t) * 8 - n); + p[0] <<= n; +} + +static inline void shr(size_t p[2], int n) +{ + if(n >= 8 * sizeof(size_t)) { + n -= 8 * sizeof(size_t); + p[0] = p[1]; + p[1] = 0; + } + p[0] >>= n; + p[0] |= p[1] << (sizeof(size_t) * 8 - n); + p[1] >>= n; +} + +static void FN(sift)(unsigned char *head, size_t width, cmpfun cmp, int pshift, size_t lp[] ARG_PROTO) +{ + unsigned char *rt, *lf; + unsigned char *ar[14 * sizeof(size_t) + 1]; + int i = 1; + + ar[0] = head; + while(pshift > 1) { + rt = head - width; + lf = head - width - lp[pshift - 2]; + + if((*cmp)(ar[0], lf ARG) >= 0 && (*cmp)(ar[0], rt ARG) >= 0) { + break; + } + if((*cmp)(lf, rt ARG) >= 0) { + ar[i++] = lf; + head = lf; + pshift -= 1; + } else { + ar[i++] = rt; + head = rt; + pshift -= 2; + } + } + FN(cycle)(width, ar, i); +} + +static void FN(trinkle)(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], int pshift, int trusty, size_t lp[] ARG_PROTO) +{ + unsigned char *stepson, + *rt, *lf; + size_t p[2]; + unsigned char *ar[14 * sizeof(size_t) + 1]; + int i = 1; + int trail; + + p[0] = pp[0]; + p[1] = pp[1]; + + ar[0] = head; + while(p[0] != 1 || p[1] != 0) { + stepson = head - lp[pshift]; + if((*cmp)(stepson, ar[0] ARG) <= 0) { + break; + } + if(!trusty && pshift > 1) { + rt = head - width; + lf = head - width - lp[pshift - 2]; + if((*cmp)(rt, stepson ARG) >= 0 || (*cmp)(lf, stepson ARG) >= 0) { + break; + } + } + + ar[i++] = stepson; + head = stepson; + trail = pntz(p); + shr(p, trail); + pshift += trail; + trusty = 0; + } + if(!trusty) { + FN(cycle)(width, ar, i); + FN(sift)(head, width, cmp, pshift, lp ARG); + } +} + +void FN(qsort)(void *base, size_t nel, size_t width, cmpfun cmp ARG_PROTO) +{ + size_t lp[12*sizeof(size_t)]; + size_t i, size = width * nel; + unsigned char *head, *high; + size_t p[2] = {1, 0}; + int pshift = 1; + int trail; + + if (!size) return; + + head = base; + high = head + size - width; + + /* Precompute Leonardo numbers, scaled by element width */ + for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++); + + while(head < high) { + if((p[0] & 3) == 3) { + FN(sift)(head, width, cmp, pshift, lp ARG); + shr(p, 2); + pshift += 2; + } else { + if(lp[pshift - 1] >= high - head) { + FN(trinkle)(head, width, cmp, p, pshift, 0, lp ARG); + } else { + FN(sift)(head, width, cmp, pshift, lp ARG); + } + + if(pshift == 1) { + FN(shl)(p, 1); + pshift = 0; + } else { + FN(shl)(p, pshift - 1); + pshift = 1; + } + } + + p[0] |= 1; + head += width; + } + + FN(trinkle)(head, width, cmp, p, pshift, 0, lp ARG); + + while(pshift != 1 || p[0] != 1 || p[1] != 0) { + if(pshift <= 1) { + trail = pntz(p); + shr(p, trail); + pshift += trail; + } else { + FN(shl)(p, 2); + pshift -= 2; + p[0] ^= 7; + shr(p, 1); + FN(trinkle)(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp ARG); + FN(shl)(p, 1); + p[0] |= 1; + FN(trinkle)(head - width, width, cmp, p, pshift, 1, lp ARG); + } + head -= width; + } +} diff --git a/src/stdlib/qsort_r.c b/src/stdlib/qsort_r.c new file mode 100644 index 00000000..d57e4a1e --- /dev/null +++ b/src/stdlib/qsort_r.c @@ -0,0 +1,28 @@ +/* Copyright (C) 2011 by Valentin Ochs + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* Instanciate qsort_r */ + +#define FN(function) function##_r +#define ARG_PROTO , void *arg +#define ARG , arg + +#include "qsort_common.ic" -- 2.32.0
[-- Attachment #1: Type: text/plain, Size: 1548 bytes --] On Sun, Aug 8, 2021 at 12:29 PM Olivier Galibert <galibert@pobox.com> wrote: > The extension qsort_r allows calling qsort on a list of indices > without having a global variable to hold the data to sort. > qsort_r is a strong improvement on qsort. I think it's available outside of glibc. I remember doing something similar locally. Just looked it up and I renamed & mutated qsort to pass the context along. Therefore typed into email, I think something like this would provide an implementation of qsort in terms of qsort_r. // declare qsort_r typedef int (*cmp_r_t)(const void *, const void *, void * context); void qsort_r(void *base, size_t nel, size_t width, cmp_r_t cmp, void* context); // pass it a function that extracts the comparator for qsort from the // context typedef int (*cmp_t)(const void *, const void *); static int compare_adapter(const void *l, const void *r, void * context) { cmpt_t c = (cmpt_t) context; return c(l,r); } // tail call void qsort(void *base, size_t nel, size_t width, cmp_t c) { return qsort_r(base, nel, width, compare_adapter, (cmp_t_t)c); } Given optimism about inlining or an always inline annotation it should turn into the same machine code as the macro instantiation approach. Alternatively it's a tail call into qsort_r, so a couple of indirections in exchange for minimal code size growth. I haven't compiled or tested that (or looked up the coding conventions for musl) so this is a drive by suggestion, written in pseudocode instead of prose for clarity. Thanks all! Jon [-- Attachment #2: Type: text/html, Size: 3588 bytes --]
[-- Attachment #1: Type: text/plain, Size: 2037 bytes --] I don’t believe in deep inlining in that case, there’s too much recursion. As for adding overhead to qsort by sharing qsort_r’s code it is indeed trivial to do but I don’t know whether the tradeoff should be done. OG. Le dim. 8 août 2021 à 14:42, Jon Chesterfield < jonathanchesterfield@gmail.com> a écrit : > On Sun, Aug 8, 2021 at 12:29 PM Olivier Galibert <galibert@pobox.com> > wrote: > >> The extension qsort_r allows calling qsort on a list of indices >> without having a global variable to hold the data to sort. >> > > qsort_r is a strong improvement on qsort. I think it's available outside of > glibc. > > I remember doing something similar locally. Just looked it up and I > renamed & mutated qsort to pass the context along. Therefore typed into > email, I think something like this would provide an implementation of > qsort in terms of qsort_r. > > // declare qsort_r > typedef int (*cmp_r_t)(const void *, const void *, void * context); > > > void qsort_r(void *base, size_t nel, size_t width, cmp_r_t cmp, void* > context); > > > // pass it a function that extracts the comparator for qsort from the > // context > typedef int (*cmp_t)(const void *, const void *); > > > static int compare_adapter(const void *l, const void *r, void * context) > { > cmpt_t c = (cmpt_t) context; > > > return c(l,r); > > > } > > // tail call > void qsort(void *base, size_t nel, size_t width, cmp_t c) > { > return qsort_r(base, nel, width, compare_adapter, (cmp_t_t)c); > > > } > > Given optimism about inlining or an always inline annotation > it should turn into the same machine code as the macro > instantiation approach. Alternatively it's a tail call into qsort_r, so > a couple of indirections in exchange for minimal code size growth. > > I haven't compiled or tested that (or looked up the coding conventions > for musl) so this is a drive by suggestion, written in pseudocode > instead of prose for clarity. > > Thanks all! > > Jon > > [-- Attachment #2: Type: text/html, Size: 4400 bytes --]
A similar implementation was suggested in https://inbox.vuxu.org/musl/20210309035652.32453-1-ericonr@disroot.org/ And mostly voted against, in favor of the wrapper implementation. On Sun Aug 8, 2021 at 8:26 AM -03, Olivier Galibert wrote: > The extension qsort_r allows calling qsort on a list of indices > without having a global variable to hold the data to sort. > > --- > COPYRIGHT | 4 +- > include/stdlib.h | 4 + > src/stdlib/qsort.c | 200 +--------------------------------- > src/stdlib/qsort_common.ic | 218 +++++++++++++++++++++++++++++++++++++ > src/stdlib/qsort_r.c | 28 +++++ > 5 files changed, 257 insertions(+), 197 deletions(-) > create mode 100644 src/stdlib/qsort_common.ic > create mode 100644 src/stdlib/qsort_r.c > > diff --git a/COPYRIGHT b/COPYRIGHT > index c1628e9a..f5199541 100644 > --- a/COPYRIGHT > +++ b/COPYRIGHT > @@ -142,8 +142,8 @@ originally written by Solar Designer and placed into > the public > domain. The code also comes with a fallback permissive license for use > in jurisdictions that may not recognize the public domain. > > -The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011 > -Valentin Ochs and is licensed under an MIT-style license. > +The smoothsort implementation (src/stdlib/qsort_common.ic) is > +Copyright © 2011 Valentin Ochs and is licensed under an MIT-style > license. > > The x86_64 port was written by Nicholas J. Kain and is licensed under > the standard MIT terms. > diff --git a/include/stdlib.h b/include/stdlib.h > index b54a051f..cacb61bf 100644 > --- a/include/stdlib.h > +++ b/include/stdlib.h > @@ -55,6 +55,10 @@ int system (const char *); > void *bsearch (const void *, const void *, size_t, size_t, int (*)(const > void *, const void *)); > void qsort (void *, size_t, size_t, int (*)(const void *, const void > *)); > > +#if defined(_GNU_SOURCE) > + void qsort_r (void *, size_t, size_t, int (*)(const void *, const void > *, void *), void *); > +#endif > + > int abs (int); > long labs (long); > long long llabs (long long); > diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c > index da58fd31..b0ade53c 100644 > --- a/src/stdlib/qsort.c > +++ b/src/stdlib/qsort.c > @@ -19,200 +19,10 @@ > * IN THE SOFTWARE. > */ > > -/* Minor changes by Rich Felker for integration in musl, 2011-04-27. */ > +/* Instanciate qsort */ > > -/* Smoothsort, an adaptive variant of Heapsort. Memory usage: O(1). > - Run time: Worst case O(n log n), close to O(n) in the mostly-sorted > case. */ > +#define FN(function) function > +#define ARG_PROTO > +#define ARG > > -#include <stdint.h> > -#include <stdlib.h> > -#include <string.h> > - > -#include "atomic.h" > -#define ntz(x) a_ctz_l((x)) > - > -typedef int (*cmpfun)(const void *, const void *); > - > -static inline int pntz(size_t p[2]) { > - int r = ntz(p[0] - 1); > - if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) { > - return r; > - } > - return 0; > -} > - > -static void cycle(size_t width, unsigned char* ar[], int n) > -{ > - unsigned char tmp[256]; > - size_t l; > - int i; > - > - if(n < 2) { > - return; > - } > - > - ar[n] = tmp; > - while(width) { > - l = sizeof(tmp) < width ? sizeof(tmp) : width; > - memcpy(ar[n], ar[0], l); > - for(i = 0; i < n; i++) { > - memcpy(ar[i], ar[i + 1], l); > - ar[i] += l; > - } > - width -= l; > - } > -} > - > -/* shl() and shr() need n > 0 */ > -static inline void shl(size_t p[2], int n) > -{ > - if(n >= 8 * sizeof(size_t)) { > - n -= 8 * sizeof(size_t); > - p[1] = p[0]; > - p[0] = 0; > - } > - p[1] <<= n; > - p[1] |= p[0] >> (sizeof(size_t) * 8 - n); > - p[0] <<= n; > -} > - > -static inline void shr(size_t p[2], int n) > -{ > - if(n >= 8 * sizeof(size_t)) { > - n -= 8 * sizeof(size_t); > - p[0] = p[1]; > - p[1] = 0; > - } > - p[0] >>= n; > - p[0] |= p[1] << (sizeof(size_t) * 8 - n); > - p[1] >>= n; > -} > - > -static void sift(unsigned char *head, size_t width, cmpfun cmp, int > pshift, size_t lp[]) > -{ > - unsigned char *rt, *lf; > - unsigned char *ar[14 * sizeof(size_t) + 1]; > - int i = 1; > - > - ar[0] = head; > - while(pshift > 1) { > - rt = head - width; > - lf = head - width - lp[pshift - 2]; > - > - if((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) { > - break; > - } > - if((*cmp)(lf, rt) >= 0) { > - ar[i++] = lf; > - head = lf; > - pshift -= 1; > - } else { > - ar[i++] = rt; > - head = rt; > - pshift -= 2; > - } > - } > - cycle(width, ar, i); > -} > - > -static void trinkle(unsigned char *head, size_t width, cmpfun cmp, > size_t pp[2], int pshift, int trusty, size_t lp[]) > -{ > - unsigned char *stepson, > - *rt, *lf; > - size_t p[2]; > - unsigned char *ar[14 * sizeof(size_t) + 1]; > - int i = 1; > - int trail; > - > - p[0] = pp[0]; > - p[1] = pp[1]; > - > - ar[0] = head; > - while(p[0] != 1 || p[1] != 0) { > - stepson = head - lp[pshift]; > - if((*cmp)(stepson, ar[0]) <= 0) { > - break; > - } > - if(!trusty && pshift > 1) { > - rt = head - width; > - lf = head - width - lp[pshift - 2]; > - if((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0) { > - break; > - } > - } > - > - ar[i++] = stepson; > - head = stepson; > - trail = pntz(p); > - shr(p, trail); > - pshift += trail; > - trusty = 0; > - } > - if(!trusty) { > - cycle(width, ar, i); > - sift(head, width, cmp, pshift, lp); > - } > -} > - > -void qsort(void *base, size_t nel, size_t width, cmpfun cmp) > -{ > - size_t lp[12*sizeof(size_t)]; > - size_t i, size = width * nel; > - unsigned char *head, *high; > - size_t p[2] = {1, 0}; > - int pshift = 1; > - int trail; > - > - if (!size) return; > - > - head = base; > - high = head + size - width; > - > - /* Precompute Leonardo numbers, scaled by element width */ > - for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; > i++); > - > - while(head < high) { > - if((p[0] & 3) == 3) { > - sift(head, width, cmp, pshift, lp); > - shr(p, 2); > - pshift += 2; > - } else { > - if(lp[pshift - 1] >= high - head) { > - trinkle(head, width, cmp, p, pshift, 0, lp); > - } else { > - sift(head, width, cmp, pshift, lp); > - } > - > - if(pshift == 1) { > - shl(p, 1); > - pshift = 0; > - } else { > - shl(p, pshift - 1); > - pshift = 1; > - } > - } > - > - p[0] |= 1; > - head += width; > - } > - > - trinkle(head, width, cmp, p, pshift, 0, lp); > - > - while(pshift != 1 || p[0] != 1 || p[1] != 0) { > - if(pshift <= 1) { > - trail = pntz(p); > - shr(p, trail); > - pshift += trail; > - } else { > - shl(p, 2); > - pshift -= 2; > - p[0] ^= 7; > - shr(p, 1); > - trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp); > - shl(p, 1); > - p[0] |= 1; > - trinkle(head - width, width, cmp, p, pshift, 1, lp); > - } > - head -= width; > - } > -} > +#include "qsort_common.ic" > diff --git a/src/stdlib/qsort_common.ic b/src/stdlib/qsort_common.ic > new file mode 100644 > index 00000000..099b1bf4 > --- /dev/null > +++ b/src/stdlib/qsort_common.ic > @@ -0,0 +1,218 @@ > +/* Copyright (C) 2011 by Valentin Ochs > + * > + * Permission is hereby granted, free of charge, to any person > obtaining a copy > + * of this software and associated documentation files (the > "Software"), to > + * deal in the Software without restriction, including without > limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, > and/or > + * sell copies of the Software, and to permit persons to whom the > Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be > included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT > SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR > OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +/* Minor changes by Rich Felker for integration in musl, 2011-04-27. */ > + > +/* Smoothsort, an adaptive variant of Heapsort. Memory usage: O(1). > + Run time: Worst case O(n log n), close to O(n) in the mostly-sorted > case. */ > + > +#include <stdint.h> > +#include <stdlib.h> > +#include <string.h> > + > +#include "atomic.h" > +#define ntz(x) a_ctz_l((x)) > + > +typedef int (*cmpfun)(const void *, const void * ARG_PROTO); > + > +static inline int pntz(size_t p[2]) { > + int r = ntz(p[0] - 1); > + if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) { > + return r; > + } > + return 0; > +} > + > +static void FN(cycle)(size_t width, unsigned char* ar[], int n) > +{ > + unsigned char tmp[256]; > + size_t l; > + int i; > + > + if(n < 2) { > + return; > + } > + > + ar[n] = tmp; > + while(width) { > + l = sizeof(tmp) < width ? sizeof(tmp) : width; > + memcpy(ar[n], ar[0], l); > + for(i = 0; i < n; i++) { > + memcpy(ar[i], ar[i + 1], l); > + ar[i] += l; > + } > + width -= l; > + } > +} > + > +/* shl() and shr() need n > 0 */ > +static inline void FN(shl)(size_t p[2], int n) > +{ > + if(n >= 8 * sizeof(size_t)) { > + n -= 8 * sizeof(size_t); > + p[1] = p[0]; > + p[0] = 0; > + } > + p[1] <<= n; > + p[1] |= p[0] >> (sizeof(size_t) * 8 - n); > + p[0] <<= n; > +} > + > +static inline void shr(size_t p[2], int n) > +{ > + if(n >= 8 * sizeof(size_t)) { > + n -= 8 * sizeof(size_t); > + p[0] = p[1]; > + p[1] = 0; > + } > + p[0] >>= n; > + p[0] |= p[1] << (sizeof(size_t) * 8 - n); > + p[1] >>= n; > +} > + > +static void FN(sift)(unsigned char *head, size_t width, cmpfun cmp, int > pshift, size_t lp[] ARG_PROTO) > +{ > + unsigned char *rt, *lf; > + unsigned char *ar[14 * sizeof(size_t) + 1]; > + int i = 1; > + > + ar[0] = head; > + while(pshift > 1) { > + rt = head - width; > + lf = head - width - lp[pshift - 2]; > + > + if((*cmp)(ar[0], lf ARG) >= 0 && (*cmp)(ar[0], rt ARG) >= 0) { > + break; > + } > + if((*cmp)(lf, rt ARG) >= 0) { > + ar[i++] = lf; > + head = lf; > + pshift -= 1; > + } else { > + ar[i++] = rt; > + head = rt; > + pshift -= 2; > + } > + } > + FN(cycle)(width, ar, i); > +} > + > +static void FN(trinkle)(unsigned char *head, size_t width, cmpfun cmp, > size_t pp[2], int pshift, int trusty, size_t lp[] ARG_PROTO) > +{ > + unsigned char *stepson, > + *rt, *lf; > + size_t p[2]; > + unsigned char *ar[14 * sizeof(size_t) + 1]; > + int i = 1; > + int trail; > + > + p[0] = pp[0]; > + p[1] = pp[1]; > + > + ar[0] = head; > + while(p[0] != 1 || p[1] != 0) { > + stepson = head - lp[pshift]; > + if((*cmp)(stepson, ar[0] ARG) <= 0) { > + break; > + } > + if(!trusty && pshift > 1) { > + rt = head - width; > + lf = head - width - lp[pshift - 2]; > + if((*cmp)(rt, stepson ARG) >= 0 || (*cmp)(lf, stepson ARG) >= 0) { > + break; > + } > + } > + > + ar[i++] = stepson; > + head = stepson; > + trail = pntz(p); > + shr(p, trail); > + pshift += trail; > + trusty = 0; > + } > + if(!trusty) { > + FN(cycle)(width, ar, i); > + FN(sift)(head, width, cmp, pshift, lp ARG); > + } > +} > + > +void FN(qsort)(void *base, size_t nel, size_t width, cmpfun cmp > ARG_PROTO) > +{ > + size_t lp[12*sizeof(size_t)]; > + size_t i, size = width * nel; > + unsigned char *head, *high; > + size_t p[2] = {1, 0}; > + int pshift = 1; > + int trail; > + > + if (!size) return; > + > + head = base; > + high = head + size - width; > + > + /* Precompute Leonardo numbers, scaled by element width */ > + for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; > i++); > + > + while(head < high) { > + if((p[0] & 3) == 3) { > + FN(sift)(head, width, cmp, pshift, lp ARG); > + shr(p, 2); > + pshift += 2; > + } else { > + if(lp[pshift - 1] >= high - head) { > + FN(trinkle)(head, width, cmp, p, pshift, 0, lp ARG); > + } else { > + FN(sift)(head, width, cmp, pshift, lp ARG); > + } > + > + if(pshift == 1) { > + FN(shl)(p, 1); > + pshift = 0; > + } else { > + FN(shl)(p, pshift - 1); > + pshift = 1; > + } > + } > + > + p[0] |= 1; > + head += width; > + } > + > + FN(trinkle)(head, width, cmp, p, pshift, 0, lp ARG); > + > + while(pshift != 1 || p[0] != 1 || p[1] != 0) { > + if(pshift <= 1) { > + trail = pntz(p); > + shr(p, trail); > + pshift += trail; > + } else { > + FN(shl)(p, 2); > + pshift -= 2; > + p[0] ^= 7; > + shr(p, 1); > + FN(trinkle)(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, > lp ARG); > + FN(shl)(p, 1); > + p[0] |= 1; > + FN(trinkle)(head - width, width, cmp, p, pshift, 1, lp ARG); > + } > + head -= width; > + } > +} > diff --git a/src/stdlib/qsort_r.c b/src/stdlib/qsort_r.c > new file mode 100644 > index 00000000..d57e4a1e > --- /dev/null > +++ b/src/stdlib/qsort_r.c > @@ -0,0 +1,28 @@ > +/* Copyright (C) 2011 by Valentin Ochs > + * > + * Permission is hereby granted, free of charge, to any person > obtaining a copy > + * of this software and associated documentation files (the > "Software"), to > + * deal in the Software without restriction, including without > limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, > and/or > + * sell copies of the Software, and to permit persons to whom the > Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be > included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT > SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR > OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +/* Instanciate qsort_r */ > + > +#define FN(function) function##_r > +#define ARG_PROTO , void *arg > +#define ARG , arg > + > +#include "qsort_common.ic" From what I have seen, musl prefers using .h files for these situations instead of inventing new file extensions. It is also my belief that the version I linked was more readable. > -- > 2.32.0
On Sun Aug 8, 2021 at 9:42 AM -03, Jon Chesterfield wrote: > On Sun, Aug 8, 2021 at 12:29 PM Olivier Galibert <galibert@pobox.com> > wrote: > > > The extension qsort_r allows calling qsort on a list of indices > > without having a global variable to hold the data to sort. > > > > qsort_r is a strong improvement on qsort. I think it's available outside > of > glibc. > > I remember doing something similar locally. Just looked it up and I > renamed & mutated qsort to pass the context along. Therefore typed into > email, I think something like this would provide an implementation of > qsort in terms of qsort_r. > > // declare qsort_r > typedef int (*cmp_r_t)(const void *, const void *, void * context); > > > void qsort_r(void *base, size_t nel, size_t width, cmp_r_t cmp, void* > context); > > > // pass it a function that extracts the comparator for qsort from the > // context > typedef int (*cmp_t)(const void *, const void *); > > > static int compare_adapter(const void *l, const void *r, void * context) > { > cmpt_t c = (cmpt_t) context; > > > return c(l,r); > > > } > > // tail call > void qsort(void *base, size_t nel, size_t width, cmp_t c) > { > return qsort_r(base, nel, width, compare_adapter, (cmp_t_t)c); > > > } > > Given optimism about inlining or an always inline annotation > it should turn into the same machine code as the macro > instantiation approach. Alternatively it's a tail call into qsort_r, so > a couple of indirections in exchange for minimal code size growth. > > I haven't compiled or tested that (or looked up the coding conventions > for musl) so this is a drive by suggestion, written in pseudocode > instead of prose for clarity. This is the favored approach, from what I understood of the discussions surrounding it. It's implemented with musl's namespacing rules and such in [1]. It is badly optimized for some archs, unfortunately, as explained in the thread from [2]. I believe that's what's holding it up. [1] https://inbox.vuxu.org/musl/20210309210213.29539-1-ericonr@disroot.org/ [2] https://inbox.vuxu.org/musl/20210309150320.GU32655@brightrain.aerifal.cx/ > > Thanks all! > > Jon
[-- Attachment #1: Type: text/plain, Size: 713 bytes --] On Sun, Aug 8, 2021 at 9:39 PM Érico Nogueira <ericonr@disroot.org> wrote: > This is the favored approach, from what I understood of the discussions > surrounding it. It's implemented with musl's namespacing rules and such > in [1]. > Unsurprisingly I like your patch. > > It is badly optimized for some archs, unfortunately, as explained in the > thread from [2]. I believe that's what's holding it up. > > [1] > https://inbox.vuxu.org/musl/20210309210213.29539-1-ericonr@disroot.org/ > [2] > https://inbox.vuxu.org/musl/20210309150320.GU32655@brightrain.aerifal.cx/ And that's what I get for not reading the list carefully enough. I missed the thread from March entirely. Thanks! [-- Attachment #2: Type: text/html, Size: 1440 bytes --]
On Sun, Aug 08, 2021 at 10:21:06PM +0100, Jon Chesterfield wrote:
> On Sun, Aug 8, 2021 at 9:39 PM Érico Nogueira <ericonr@disroot.org> wrote:
>
> > This is the favored approach, from what I understood of the discussions
> > surrounding it. It's implemented with musl's namespacing rules and such
> > in [1].
> >
>
> Unsurprisingly I like your patch.
>
> >
> > It is badly optimized for some archs, unfortunately, as explained in the
> > thread from [2]. I believe that's what's holding it up.
> >
> > [1]
> > https://inbox.vuxu.org/musl/20210309210213.29539-1-ericonr@disroot.org/
> > [2]
> > https://inbox.vuxu.org/musl/20210309150320.GU32655@brightrain.aerifal.cx/
>
>
> And that's what I get for not reading the list carefully enough. I missed
> the thread
> from March entirely.
No problem. Indeed, this was stalled because of concerns about bad
compiler behavior on some archs, but I really don't want to allow that
to dictate how we solve the problem. The compiler is doing the wrong
thing (failing to tail call) and should just be fixed. I'll probably
merge the tail-call patch as-is, or with any minimal fixes needed,
right after this release so as not to make major API changes at last
minute.
Rich
[-- Attachment #1: Type: text/plain, Size: 1583 bytes --] Yeah, the wrapper_cmp version looks rather good, I didn’t realize it would go down to one instruction. Or that an old patch would have been stalled for five months ;-) OG Le lun. 9 août 2021 à 01:05, Rich Felker <dalias@libc.org> a écrit : > On Sun, Aug 08, 2021 at 10:21:06PM +0100, Jon Chesterfield wrote: > > On Sun, Aug 8, 2021 at 9:39 PM Érico Nogueira <ericonr@disroot.org> > wrote: > > > > > This is the favored approach, from what I understood of the discussions > > > surrounding it. It's implemented with musl's namespacing rules and such > > > in [1]. > > > > > > > Unsurprisingly I like your patch. > > > > > > > > It is badly optimized for some archs, unfortunately, as explained in > the > > > thread from [2]. I believe that's what's holding it up. > > > > > > [1] > > > > https://inbox.vuxu.org/musl/20210309210213.29539-1-ericonr@disroot.org/ > > > [2] > > > > https://inbox.vuxu.org/musl/20210309150320.GU32655@brightrain.aerifal.cx/ > > > > > > And that's what I get for not reading the list carefully enough. I missed > > the thread > > from March entirely. > > No problem. Indeed, this was stalled because of concerns about bad > compiler behavior on some archs, but I really don't want to allow that > to dictate how we solve the problem. The compiler is doing the wrong > thing (failing to tail call) and should just be fixed. I'll probably > merge the tail-call patch as-is, or with any minimal fixes needed, > right after this release so as not to make major API changes at last > minute. > > Rich > [-- Attachment #2: Type: text/html, Size: 2384 bytes --]
* Jon Chesterfield: > On Sun, Aug 8, 2021 at 12:29 PM Olivier Galibert <galibert@pobox.com> wrote: > > The extension qsort_r allows calling qsort on a list of indices > without having a global variable to hold the data to sort. > > qsort_r is a strong improvement on qsort. I think it's available outside of > glibc. Right. Solaris has it, too. And apparently FreeBSD adjusted their interface: <https://reviews.freebsd.org/D17083> Thanks, Florian
The extension qsort_r allows calling qsort on a list of indices without having a global variable to hold the data to sort. --- I didn't see a "clean" patch for the wrapper version in the thread, so here is one. Passes libc-test, of course. include/stdlib.h | 4 ++++ src/stdlib/qsort.c | 39 +++++++++++++++++++++++++-------------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/include/stdlib.h b/include/stdlib.h index b54a051f..cacb61bf 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -55,6 +55,10 @@ int system (const char *); void *bsearch (const void *, const void *, size_t, size_t, int (*)(const void *, const void *)); void qsort (void *, size_t, size_t, int (*)(const void *, const void *)); +#if defined(_GNU_SOURCE) + void qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, void *), void *); +#endif + int abs (int); long labs (long); long long llabs (long long); diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c index da58fd31..9add0ac5 100644 --- a/src/stdlib/qsort.c +++ b/src/stdlib/qsort.c @@ -32,6 +32,7 @@ #define ntz(x) a_ctz_l((x)) typedef int (*cmpfun)(const void *, const void *); +typedef int (*cmpfun_r)(const void *, const void *, void *); static inline int pntz(size_t p[2]) { int r = ntz(p[0] - 1); @@ -88,7 +89,7 @@ static inline void shr(size_t p[2], int n) p[1] >>= n; } -static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size_t lp[]) +static void sift(unsigned char *head, size_t width, cmpfun_r cmp, void *arg, int pshift, size_t lp[]) { unsigned char *rt, *lf; unsigned char *ar[14 * sizeof(size_t) + 1]; @@ -99,10 +100,10 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size rt = head - width; lf = head - width - lp[pshift - 2]; - if((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) { + if((*cmp)(ar[0], lf, arg) >= 0 && (*cmp)(ar[0], rt, arg) >= 0) { break; } - if((*cmp)(lf, rt) >= 0) { + if((*cmp)(lf, rt, arg) >= 0) { ar[i++] = lf; head = lf; pshift -= 1; @@ -115,7 +116,7 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size cycle(width, ar, i); } -static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], int pshift, int trusty, size_t lp[]) +static void trinkle(unsigned char *head, size_t width, cmpfun_r cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[]) { unsigned char *stepson, *rt, *lf; @@ -130,13 +131,13 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], ar[0] = head; while(p[0] != 1 || p[1] != 0) { stepson = head - lp[pshift]; - if((*cmp)(stepson, ar[0]) <= 0) { + if((*cmp)(stepson, ar[0], arg) <= 0) { break; } if(!trusty && pshift > 1) { rt = head - width; lf = head - width - lp[pshift - 2]; - if((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0) { + if((*cmp)(rt, stepson, arg) >= 0 || (*cmp)(lf, stepson, arg) >= 0) { break; } } @@ -150,11 +151,11 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], } if(!trusty) { cycle(width, ar, i); - sift(head, width, cmp, pshift, lp); + sift(head, width, cmp, arg, pshift, lp); } } -void qsort(void *base, size_t nel, size_t width, cmpfun cmp) +void qsort_r(void *base, size_t nel, size_t width, cmpfun_r cmp, void *arg) { size_t lp[12*sizeof(size_t)]; size_t i, size = width * nel; @@ -173,14 +174,14 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp) while(head < high) { if((p[0] & 3) == 3) { - sift(head, width, cmp, pshift, lp); + sift(head, width, cmp, arg, pshift, lp); shr(p, 2); pshift += 2; } else { if(lp[pshift - 1] >= high - head) { - trinkle(head, width, cmp, p, pshift, 0, lp); + trinkle(head, width, cmp, arg, p, pshift, 0, lp); } else { - sift(head, width, cmp, pshift, lp); + sift(head, width, cmp, arg, pshift, lp); } if(pshift == 1) { @@ -196,7 +197,7 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp) head += width; } - trinkle(head, width, cmp, p, pshift, 0, lp); + trinkle(head, width, cmp, arg, p, pshift, 0, lp); while(pshift != 1 || p[0] != 1 || p[1] != 0) { if(pshift <= 1) { @@ -208,11 +209,21 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp) pshift -= 2; p[0] ^= 7; shr(p, 1); - trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp); + trinkle(head - lp[pshift] - width, width, cmp, arg, p, pshift + 1, 1, lp); shl(p, 1); p[0] |= 1; - trinkle(head - width, width, cmp, p, pshift, 1, lp); + trinkle(head - width, width, cmp, arg, p, pshift, 1, lp); } head -= width; } } + +static int qsort_cmp(const void *e1, const void *e2, void *arg) +{ + return ((cmpfun)arg)(e1, e2); +} + +void qsort(void *base, size_t nel, size_t width, cmpfun cmp) +{ + return qsort_r(base, nel, width, qsort_cmp, cmp); +} -- 2.32.0
On Wed, Aug 18, 2021 at 07:36:53PM +0200, Olivier Galibert wrote: > The extension qsort_r allows calling qsort on a list of indices > without having a global variable to hold the data to sort. > --- > > I didn't see a "clean" patch for the wrapper version in the thread, so > here is one. Passes libc-test, of course. Here is the original patch, which also handles namespace correctly and separates the translation units: https://www.openwall.com/lists/musl/2021/03/09/10 Otherwise I think yours is roughly the same. I haven't checked whether there are any other differences that should be resolved. One change that should be made to either is that qsort_r should be under _BSD_SOURCE, not _GNU_SOURCE. > include/stdlib.h | 4 ++++ > src/stdlib/qsort.c | 39 +++++++++++++++++++++++++-------------- > 2 files changed, 29 insertions(+), 14 deletions(-) > > diff --git a/include/stdlib.h b/include/stdlib.h > index b54a051f..cacb61bf 100644 > --- a/include/stdlib.h > +++ b/include/stdlib.h > @@ -55,6 +55,10 @@ int system (const char *); > void *bsearch (const void *, const void *, size_t, size_t, int (*)(const void *, const void *)); > void qsort (void *, size_t, size_t, int (*)(const void *, const void *)); > > +#if defined(_GNU_SOURCE) > + void qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, void *), void *); > +#endif > + > int abs (int); > long labs (long); > long long llabs (long long); > diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c > index da58fd31..9add0ac5 100644 > --- a/src/stdlib/qsort.c > +++ b/src/stdlib/qsort.c > @@ -32,6 +32,7 @@ > #define ntz(x) a_ctz_l((x)) > > typedef int (*cmpfun)(const void *, const void *); > +typedef int (*cmpfun_r)(const void *, const void *, void *); > > static inline int pntz(size_t p[2]) { > int r = ntz(p[0] - 1); > @@ -88,7 +89,7 @@ static inline void shr(size_t p[2], int n) > p[1] >>= n; > } > > -static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size_t lp[]) > +static void sift(unsigned char *head, size_t width, cmpfun_r cmp, void *arg, int pshift, size_t lp[]) > { > unsigned char *rt, *lf; > unsigned char *ar[14 * sizeof(size_t) + 1]; > @@ -99,10 +100,10 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size > rt = head - width; > lf = head - width - lp[pshift - 2]; > > - if((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) { > + if((*cmp)(ar[0], lf, arg) >= 0 && (*cmp)(ar[0], rt, arg) >= 0) { > break; > } > - if((*cmp)(lf, rt) >= 0) { > + if((*cmp)(lf, rt, arg) >= 0) { > ar[i++] = lf; > head = lf; > pshift -= 1; > @@ -115,7 +116,7 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size > cycle(width, ar, i); > } > > -static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], int pshift, int trusty, size_t lp[]) > +static void trinkle(unsigned char *head, size_t width, cmpfun_r cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[]) > { > unsigned char *stepson, > *rt, *lf; > @@ -130,13 +131,13 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], > ar[0] = head; > while(p[0] != 1 || p[1] != 0) { > stepson = head - lp[pshift]; > - if((*cmp)(stepson, ar[0]) <= 0) { > + if((*cmp)(stepson, ar[0], arg) <= 0) { > break; > } > if(!trusty && pshift > 1) { > rt = head - width; > lf = head - width - lp[pshift - 2]; > - if((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0) { > + if((*cmp)(rt, stepson, arg) >= 0 || (*cmp)(lf, stepson, arg) >= 0) { > break; > } > } > @@ -150,11 +151,11 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], > } > if(!trusty) { > cycle(width, ar, i); > - sift(head, width, cmp, pshift, lp); > + sift(head, width, cmp, arg, pshift, lp); > } > } > > -void qsort(void *base, size_t nel, size_t width, cmpfun cmp) > +void qsort_r(void *base, size_t nel, size_t width, cmpfun_r cmp, void *arg) > { > size_t lp[12*sizeof(size_t)]; > size_t i, size = width * nel; > @@ -173,14 +174,14 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp) > > while(head < high) { > if((p[0] & 3) == 3) { > - sift(head, width, cmp, pshift, lp); > + sift(head, width, cmp, arg, pshift, lp); > shr(p, 2); > pshift += 2; > } else { > if(lp[pshift - 1] >= high - head) { > - trinkle(head, width, cmp, p, pshift, 0, lp); > + trinkle(head, width, cmp, arg, p, pshift, 0, lp); > } else { > - sift(head, width, cmp, pshift, lp); > + sift(head, width, cmp, arg, pshift, lp); > } > > if(pshift == 1) { > @@ -196,7 +197,7 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp) > head += width; > } > > - trinkle(head, width, cmp, p, pshift, 0, lp); > + trinkle(head, width, cmp, arg, p, pshift, 0, lp); > > while(pshift != 1 || p[0] != 1 || p[1] != 0) { > if(pshift <= 1) { > @@ -208,11 +209,21 @@ void qsort(void *base, size_t nel, size_t width, cmpfun cmp) > pshift -= 2; > p[0] ^= 7; > shr(p, 1); > - trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp); > + trinkle(head - lp[pshift] - width, width, cmp, arg, p, pshift + 1, 1, lp); > shl(p, 1); > p[0] |= 1; > - trinkle(head - width, width, cmp, p, pshift, 1, lp); > + trinkle(head - width, width, cmp, arg, p, pshift, 1, lp); > } > head -= width; > } > } > + > +static int qsort_cmp(const void *e1, const void *e2, void *arg) > +{ > + return ((cmpfun)arg)(e1, e2); > +} > + > +void qsort(void *base, size_t nel, size_t width, cmpfun cmp) > +{ > + return qsort_r(base, nel, width, qsort_cmp, cmp); > +} > -- > 2.32.0