/* * Experiments with various sorting algorithms * * (c) 2007--2008 Martin Mares */ #include #include #include #include #include #include #include #include #include #include struct elt { u32 key; u32 ballast[3]; }; static struct elt *ary, *alt, **ind, *array0, *array1; static uint n = 10000000; static u32 sum; static struct elt *alloc_elts(uint n) { return big_alloc(n * sizeof(struct elt)); } static void free_elts(struct elt *a, uint n) { big_free(a, n * sizeof(struct elt)); } static int comp(const void *x, const void *y) { const struct elt *xx = x, *yy = y; return (xx->key < yy->key) ? -1 : (xx->key > yy->key) ? 1 : 0; } static int comp_ind(const void *x, const void *y) { const struct elt * const *xx = x, * const *yy = y; return comp(*xx, *yy); } #define ASORT_PREFIX(x) as_##x #define ASORT_KEY_TYPE u32 #define ASORT_ELT(i) a[i].key #define ASORT_SWAP(i,j) do { struct elt t=a[i]; a[i]=a[j]; a[j]=t; } while (0) #define ASORT_EXTRA_ARGS , struct elt *a #include #define ASORT_PREFIX(x) asi_##x #define ASORT_KEY_TYPE u32 #define ASORT_ELT(i) ind[i]->key #define ASORT_SWAP(i,j) do { struct elt *t=ind[i]; ind[i]=ind[j]; ind[j]=t; } while (0) #include static void r1_sort(void) { struct elt *from = ary, *to = alt, *tmp; #define BITS 8 uint cnt[1 << BITS]; for (uint sh=0; sh<32; sh+=BITS) { bzero(cnt, sizeof(cnt)); for (uint i=0; i> sh) & ((1 << BITS) - 1)]++; uint pos = 0; for (uint i=0; i<(1<> sh) & ((1 << BITS) - 1)]++] = from[i]; ASSERT(cnt[(1 << BITS)-1] == n); tmp=from, from=to, to=tmp; } ary = from; #undef BITS } static void r1b_sort(void) { struct elt *from = ary, *to = alt, *tmp; #define BITS 8 uint cnt[1 << BITS], cnt2[1 << BITS]; for (uint sh=0; sh<32; sh+=BITS) { if (sh) memcpy(cnt, cnt2, sizeof(cnt)); else { bzero(cnt, sizeof(cnt)); for (uint i=0; i> sh) & ((1 << BITS) - 1)]++; } uint pos = 0; for (uint i=0; i<(1<> (sh + BITS)) & ((1 << BITS) - 1)]++; to[cnt[(from[i].key >> sh) & ((1 << BITS) - 1)]++] = from[i]; } ASSERT(cnt[(1 << BITS)-1] == n); tmp=from, from=to, to=tmp; } ary = from; #undef BITS } static void r1c_sort(void) { uint cnt[256]; struct elt *ptrs[256], *x, *lim; x = ary; lim = ary + n; bzero(cnt, sizeof(cnt)); while (x < lim) cnt[x++->key & 255]++; #define PTRS(start) x=start; for (uint i=0; i<256; i++) { ptrs[i]=x; x+=cnt[i]; } PTRS(alt); x = ary; lim = ary + n; bzero(cnt, sizeof(cnt)); while (x < lim) { cnt[(x->key >> 8) & 255]++; *ptrs[x->key & 255]++ = *x; x++; } PTRS(ary); x = alt; lim = alt + n; bzero(cnt, sizeof(cnt)); while (x < lim) { cnt[(x->key >> 16) & 255]++; *ptrs[(x->key >> 8) & 255]++ = *x; x++; } PTRS(alt); x = ary; lim = ary + n; bzero(cnt, sizeof(cnt)); while (x < lim) { cnt[(x->key >> 24) & 255]++; *ptrs[(x->key >> 16) & 255]++ = *x; x++; } PTRS(ary); x = alt; lim = alt + n; while (x < lim) { *ptrs[(x->key >> 24) & 255]++ = *x; x++; } #undef PTRS } #include static inline void sse_copy_elt(struct elt *to, struct elt *from) { __m128i m = _mm_load_si128((__m128i *) from); _mm_store_si128((__m128i *) to, m); } static void r1c_sse_sort(void) { uint cnt[256]; struct elt *ptrs[256], *x, *lim; ASSERT(sizeof(struct elt) == 16); ASSERT(!((uintptr_t)alt & 15)); ASSERT(!((uintptr_t)ary & 15)); x = ary; lim = ary + n; bzero(cnt, sizeof(cnt)); while (x < lim) cnt[x++->key & 255]++; #define PTRS(start) x=start; for (uint i=0; i<256; i++) { ptrs[i]=x; x+=cnt[i]; } PTRS(alt); x = ary; lim = ary + n; bzero(cnt, sizeof(cnt)); while (x < lim) { cnt[(x->key >> 8) & 255]++; sse_copy_elt(ptrs[x->key & 255]++, x); x++; } PTRS(ary); x = alt; lim = alt + n; bzero(cnt, sizeof(cnt)); while (x < lim) { cnt[(x->key >> 16) & 255]++; sse_copy_elt(ptrs[(x->key >> 8) & 255]++, x); x++; } PTRS(alt); x = ary; lim = ary + n; bzero(cnt, sizeof(cnt)); while (x < lim) { cnt[(x->key >> 24) & 255]++; sse_copy_elt(ptrs[(x->key >> 16) & 255]++, x); x++; } PTRS(ary); x = alt; lim = alt + n; while (x < lim) { sse_copy_elt(ptrs[(x->key >> 24) & 255]++, x); x++; } #undef PTRS } static void r1d_sort(void) { uint cnt[256]; struct elt *ptrs[256], *x, *y, *lim; ASSERT(!(n % 4)); x = ary; lim = ary + n; bzero(cnt, sizeof(cnt)); while (x < lim) { cnt[x++->key & 255]++; cnt[x++->key & 255]++; cnt[x++->key & 255]++; cnt[x++->key & 255]++; } #define PTRS(start) x=start; for (uint i=0; i<256; i++) { ptrs[i]=x; x+=cnt[i]; } PTRS(alt); x = ary; y = ary+n/2; lim = ary + n/2; bzero(cnt, sizeof(cnt)); while (x < lim) { cnt[(x->key >> 8) & 255]++; cnt[(y->key >> 8) & 255]++; *ptrs[x->key & 255]++ = *x; *ptrs[y->key & 255]++ = *y; x++, y++; cnt[(x->key >> 8) & 255]++; cnt[(y->key >> 8) & 255]++; *ptrs[x->key & 255]++ = *x; *ptrs[y->key & 255]++ = *y; x++, y++; } PTRS(ary); x = alt; lim = alt + n; bzero(cnt, sizeof(cnt)); while (x < lim) { cnt[(x->key >> 16) & 255]++; *ptrs[(x->key >> 8) & 255]++ = *x; x++; cnt[(x->key >> 16) & 255]++; *ptrs[(x->key >> 8) & 255]++ = *x; x++; } PTRS(alt); x = ary; lim = ary + n; bzero(cnt, sizeof(cnt)); while (x < lim) { cnt[(x->key >> 24) & 255]++; *ptrs[(x->key >> 16) & 255]++ = *x; x++; cnt[(x->key >> 24) & 255]++; *ptrs[(x->key >> 16) & 255]++ = *x; x++; } PTRS(ary); x = alt; lim = alt + n; while (x < lim) { *ptrs[(x->key >> 24) & 255]++ = *x; x++; *ptrs[(x->key >> 24) & 255]++ = *x; x++; } #undef PTRS } static void r2_sort(void) { struct elt *from = ary, *to = alt; #define BITS 14 uint cnt[1 << BITS]; bzero(cnt, sizeof(cnt)); for (uint i=0; i> (32 - BITS)) & ((1 << BITS) - 1)]++; uint pos = 0; for (uint i=0; i<(1<> (32 - BITS)) & ((1 << BITS) - 1)]++] = from[i]; ASSERT(cnt[(1 << BITS)-1] == n); pos = 0; for (uint i=0; i<(1 << BITS); i++) { as_sort(cnt[i] - pos, alt+pos); pos = cnt[i]; } ary = alt; #undef BITS } static void r3_sort(void) { #define BITS 10 #define LEVELS 2 #define BUCKS (1 << BITS) #define THRESHOLD 5000 #define ODDEVEN 0 auto void r3(struct elt *from, struct elt *to, uint n, uint lev); void r3(struct elt *from, struct elt *to, uint n, uint lev) { uint sh = 32 - lev*BITS; uint cnt[BUCKS]; bzero(cnt, sizeof(cnt)); for (uint i=0; i> sh) & (BUCKS - 1)]++; uint pos = 0; for (uint i=0; i> sh) & (BUCKS - 1)]++] = from[i]; #else sse_copy_elt(&to[cnt[(from[i].key >> sh) & (BUCKS - 1)]++], &from[i]); #endif pos = 0; for (uint i=0; i= LEVELS || l <= THRESHOLD) { as_sort(l, to+pos); if ((lev % 2) != ODDEVEN) memcpy(from+pos, to+pos, l * sizeof(struct elt)); } else r3(to+pos, from+pos, l, lev+1); pos = cnt[i]; } } r3(ary, alt, n, 1); if (ODDEVEN) ary = alt; #undef ODDEVEN #undef THRESHOLD #undef BUCKS #undef LEVELS #undef BITS } static inline struct elt *mrg(struct elt *x, struct elt *xl, struct elt *y, struct elt *yl, struct elt *z) { for (;;) { if (x->key <= y->key) { *z++ = *x++; if (x >= xl) goto xend; } else { *z++ = *y++; if (y >= yl) goto yend; } } xend: while (y < yl) *z++ = *y++; return z; yend: while (x < xl) *z++ = *x++; return z; } static void mergesort(void) { struct elt *from, *to; uint lev = 0; if (1) { struct elt *x = ary, *z = alt, *last = ary + (n & ~1U); while (x < last) { if (x[0].key < x[1].key) *z++ = *x++, *z++ = *x++; else { *z++ = x[1]; *z++ = x[0]; x += 2; } } if (n % 2) *z = *x; lev++; } for (; (1U << lev) < n; lev++) { if (lev % 2) from = alt, to = ary; else from = ary, to = alt; struct elt *x, *z, *last; x = from; z = to; last = from + n; uint step = 1 << lev; while (x + 2*step <= last) { z = mrg(x, x+step, x+step, x+2*step, z); x += 2*step; } if (x + step < last) mrg(x, x+step, x+step, last, z); else memcpy(z, x, (byte*)last - (byte*)x); } if (lev % 2) ary = alt; } static void sampsort(uint n, struct elt *ar, struct elt *al, struct elt *dest, byte *wbuf) { #define WAYS 256 struct elt k[WAYS]; uint cnt[WAYS]; bzero(cnt, sizeof(cnt)); for (uint i=0; i k[w+delta].key) w += delta FW(128); FW(64); FW(32); FW(16); FW(8); FW(4); FW(2); FW(1); wbuf[i] = w; cnt[w]++; } struct elt *y = al, *way[WAYS], *z; for (uint i=0; i= 1000) sampsort(cnt[i], y, z, dest, wbuf); else { as_sort(cnt[i], y); if (al != dest) memcpy(z, y, cnt[i]*sizeof(struct elt)); } y += cnt[i]; z += cnt[i]; } #undef FW #undef WAYS } static void samplesort(void) { byte *aux = xmalloc(n); sampsort(n, ary, alt, ary, aux); xfree(aux); } static void sampsort2(uint n, struct elt *ar, struct elt *al, struct elt *dest, byte *wbuf) { #define WAYS 256 struct elt k[WAYS]; uint cnt[WAYS]; bzero(cnt, sizeof(cnt)); for (uint i=0; ikey > k[w1+delta].key) w1 += delta #define FW2(delta) if (k2->key > k[w2+delta].key) w2 += delta FW1(128); FW2(128); FW1(64); FW2(64); FW1(32); FW2(32); FW1(16); FW2(16); FW1(8); FW2(8); FW1(4); FW2(4); FW1(2); FW2(2); FW1(1); FW2(1); *ww++ = w1; *ww++ = w2; cnt[w1]++; cnt[w2]++; k1 += 2; k2 += 2; } if (k1 < kend) { uint w1 = 0; FW1(128); FW1(64); FW1(32); FW1(16); FW1(8); FW1(4); FW1(2); FW1(1); *ww++ = w1; cnt[w1]++; } struct elt *y = al, *way[WAYS], *z; for (uint i=0; i= 1000) sampsort2(cnt[i], y, z, dest, wbuf); else { as_sort(cnt[i], y); if (al != dest) memcpy(z, y, cnt[i]*sizeof(struct elt)); } y += cnt[i]; z += cnt[i]; } #undef FW1 #undef FW2 #undef WAYS } static void samplesort2(void) { byte *aux = xmalloc(n); sampsort2(n, ary, alt, ary, aux); xfree(aux); } static void heapsort(void) { #define H_LESS(_a,_b) ((_a).key > (_b).key) struct elt *heap = ary-1; HEAP_INIT(struct elt, heap, n, H_LESS, HEAP_SWAP); uint nn = n; while (nn) HEAP_DELETE_MIN(struct elt, heap, nn, H_LESS, HEAP_SWAP); #undef H_LESS } static void heapsort_ind(void) { #define H_LESS(_a,_b) ((_a)->key > (_b)->key) struct elt **heap = ind-1; HEAP_INIT(struct elt *, heap, n, H_LESS, HEAP_SWAP); uint nn = n; while (nn) HEAP_DELETE_MIN(struct elt *, heap, nn, H_LESS, HEAP_SWAP); #undef H_LESS } static void mk_ary(void) { ary = array0; alt = array1; md5_context ctx; md5_init(&ctx); u32 block[16]; bzero(block, sizeof(block)); sum = 0; for (uint i=0; ikey; for (uint i=1; ikey < ind[i-1]->key) die("Missorted at %d", i); else s ^= ind[i]->key; if (s != sum) die("Corrupted"); xfree(ind); } int main(int argc, char **argv) { log_init(argv[0]); int opt; uint op = 0; while ((opt = cf_getopt(argc, argv, CF_SHORT_OPTS "1", CF_NO_LONG_OPTS, NULL)) >= 0) switch (opt) { case '1': op |= (1 << (opt - '0')); break; default: die("usage?"); } array0 = alloc_elts(n); array1 = alloc_elts(n); for (uint i=0; i