#include #include #include #include typedef int (*cmpfun)(const void *, const void *); size_t __bsearch(const void *, const void *, size_t, size_t, cmpfun); static inline size_t bsearch_pos(const void *key, const void *haystack, size_t nmel, size_t width, cmpfun cmp) { size_t pos = __bsearch(key, haystack, nmel, width, cmp); return pos > SSIZE_MAX ? ~pos : pos; } static inline void *tail(char *base, size_t nmel, size_t width) { return base + (nmel-1) * width; } static void swap(char *a, char *b, size_t width) { #ifdef __GNUC__ typedef uint32_t __attribute__((__may_alias__)) u32; if ((uintptr_t)a % 4 == 0 && (uintptr_t)b % 4 == 0) { for (; width >= 4; width -= 4) { uint32_t tmp = *((u32*)a); *((u32*)a) = *((u32*)b); *((u32*)b) = tmp; a += 4; b += 4; } } #endif while (width--) { char tmp = *a; *a++ = *b; *b++ = tmp; } } static void rotate(char *base, size_t size, size_t shift) { int dir = 1; while (shift) { while (2*shift <= size) { swap(base, base + dir*shift, shift); size -= shift; base += shift*dir; } shift = size - shift; base = dir > 0 ? base + size - shift : base - shift; dir *= -1; } } static void distribute_buffer(char *base, size_t bufnmel, size_t sortnmel, size_t width, cmpfun cmp) { while (bufnmel) { char *sorted = base + bufnmel * width; size_t insertpos = bsearch_pos(base, sorted, sortnmel, width, cmp); if (insertpos > 0) rotate(base, (bufnmel + insertpos) * width, bufnmel * width); base += (insertpos + 1) * width; bufnmel -= 1; sortnmel -= insertpos; } } #define MAX_SORTNET 8 static const uint8_t sortnet[][2] = { /* 0: index = 0 */ /* 1: index = 0 */ /* 2: index = 0 */ {0,1}, /* 3: index = 1 */ {0,1}, {0,2}, {1,2}, /* 4: index = 4 */ {0,1}, {2,3}, {0,2}, {1,3}, {1,2}, /* 5: index = 9 */ {0,1}, {3,4}, {2,4}, {2,3}, {1,4}, {0,3}, {0,2}, {1,3}, {1,2}, /* 6: index = 18 */ {1,2}, {4,5}, {0,2}, {3,5}, {0,1}, {3,4}, {2,5}, {0,3}, {1,4}, {2,4}, {1,3}, {2,3}, /* 7: index = 30 */ {1,2}, {3,4}, {5,6}, {0,2}, {3,5}, {4,6}, {0,1}, {4,5}, {2,6}, {0,4}, {1,5}, {0,3}, {2,5}, {1,3}, {2,4}, {2,3}, /* 8: index = 46 */ {0,1}, {2,3}, {4,5}, {6,7}, {0,2}, {1,3}, {4,6}, {5,7}, {1,2}, {5,6}, {0,4}, {3,7}, {1,5}, {2,6}, {1,4}, {3,6}, {2,4}, {3,5}, {3,4}, /* 9: index = 65 */ }; static const uint8_t sortnet_index[] = { 0, 0, 0, 1, 4, 9, 18, 30, 46, 65 }; static void sorting_network(char *base, size_t nmel, size_t width, cmpfun cmp) { for (int i = sortnet_index[nmel]; i < sortnet_index[nmel+1]; i++) { char *elem1 = base + sortnet[i][0] * width; char *elem2 = base + sortnet[i][1] * width; if (cmp(elem1, elem2) > 0) swap(elem1, elem2, width); } } /* get index of last block whose head is less than the previous block's tail */ static size_t last_overlap(char *base, size_t bcount, size_t bwidth, size_t width, cmpfun cmp) { for (char *cur = tail(base, bcount, bwidth); --bcount; cur -= bwidth) if (cmp(cur - width, cur) > 0) break; return bcount; } void merge(char *buf, char *base, size_t anmel, size_t bnmel, size_t width, cmpfun cmp) { char *a = buf; char *b = base + anmel * width; size_t skip = bsearch_pos(b, base, anmel, width, cmp); anmel -= skip; base += skip * width; swap(base, a, anmel * width); while (anmel && bnmel) { if (cmp(a, b) <= 0) { swap(base, a, width); a += width; anmel--; } else { swap(base, b, width); b += width; bnmel--; } base += width; } swap(base, a, anmel * width); } void qsort(void *unsorted, size_t nmel, size_t width, cmpfun cmp) { char *base = unsorted; if (nmel <= MAX_SORTNET) { sorting_network(base, nmel, width, cmp); return; } size_t blknmel = sqrt(nmel); /* elements in a block */ size_t bufnmel = blknmel + nmel % blknmel; /* elements in the buffer */ size_t bwidth = blknmel * width; /* size of a block in bytes */ size_t blocks = nmel / blknmel - 1; /* number of blocks in a + b */ size_t acount = blocks / 2; size_t bcount = blocks - acount; char *a = base + bufnmel * width; char *b = a + acount * bwidth; qsort(a, acount * blknmel, width, cmp); qsort(b, bcount * blknmel, width, cmp); /* if already sorted, nothing to do */ if (cmp(tail(a, acount * blknmel, width), b) <= 0) goto distribute; /* sort all the a and b blocks together by their head elements */ qsort(a, blocks, bwidth, cmp); /* merge, starting from the end and working towards the beginning */ while (blocks > 1) { size_t overlap = last_overlap(a, blocks, bwidth, width, cmp); if (overlap > 0) merge(base, tail(a, overlap, bwidth), blknmel, (blocks - overlap) * blknmel, width, cmp); blocks = overlap; } distribute: qsort(base, bufnmel, width, cmp); distribute_buffer(base, bufnmel, nmel - bufnmel, width, cmp); }