You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
765 lines
15 KiB
765 lines
15 KiB
2 months ago
|
/*
|
||
|
* Experiments with various sorting algorithms
|
||
|
*
|
||
|
* (c) 2007--2008 Martin Mares <mj@ucw.cz>
|
||
|
*/
|
||
|
|
||
|
#include <ucw/lib.h>
|
||
|
#include <ucw/getopt.h>
|
||
|
#include <ucw/md5.h>
|
||
|
#include <ucw/heap.h>
|
||
|
#include <ucw/time.h>
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#include <sys/mman.h>
|
||
|
#include <sys/user.h>
|
||
|
|
||
|
struct elt {
|
||
|
u32 key;
|
||
|
u32 ballast[3];
|
||
|
};
|
||
|
|
||
|
static struct elt *ary, *alt, **ind, *array0, *array1;
|
||
|
static uint n = 10000000;
|
||
|
static u32 sum;
|
||
|
|
||
|
static struct elt *alloc_elts(uint n)
|
||
|
{
|
||
|
return big_alloc(n * sizeof(struct elt));
|
||
|
}
|
||
|
|
||
|
static void free_elts(struct elt *a, uint n)
|
||
|
{
|
||
|
big_free(a, n * sizeof(struct elt));
|
||
|
}
|
||
|
|
||
|
static int comp(const void *x, const void *y)
|
||
|
{
|
||
|
const struct elt *xx = x, *yy = y;
|
||
|
return (xx->key < yy->key) ? -1 : (xx->key > yy->key) ? 1 : 0;
|
||
|
}
|
||
|
|
||
|
static int comp_ind(const void *x, const void *y)
|
||
|
{
|
||
|
const struct elt * const *xx = x, * const *yy = y;
|
||
|
return comp(*xx, *yy);
|
||
|
}
|
||
|
|
||
|
#define ASORT_PREFIX(x) as_##x
|
||
|
#define ASORT_KEY_TYPE u32
|
||
|
#define ASORT_ELT(i) a[i].key
|
||
|
#define ASORT_SWAP(i,j) do { struct elt t=a[i]; a[i]=a[j]; a[j]=t; } while (0)
|
||
|
#define ASORT_EXTRA_ARGS , struct elt *a
|
||
|
#include <ucw/sorter/array-simple.h>
|
||
|
|
||
|
#define ASORT_PREFIX(x) asi_##x
|
||
|
#define ASORT_KEY_TYPE u32
|
||
|
#define ASORT_ELT(i) ind[i]->key
|
||
|
#define ASORT_SWAP(i,j) do { struct elt *t=ind[i]; ind[i]=ind[j]; ind[j]=t; } while (0)
|
||
|
#include <ucw/sorter/array-simple.h>
|
||
|
|
||
|
static void r1_sort(void)
|
||
|
{
|
||
|
struct elt *from = ary, *to = alt, *tmp;
|
||
|
#define BITS 8
|
||
|
uint cnt[1 << BITS];
|
||
|
for (uint sh=0; sh<32; sh+=BITS)
|
||
|
{
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
for (uint i=0; i<n; i++)
|
||
|
cnt[(from[i].key >> sh) & ((1 << BITS) - 1)]++;
|
||
|
uint pos = 0;
|
||
|
for (uint i=0; i<(1<<BITS); i++)
|
||
|
{
|
||
|
uint c = cnt[i];
|
||
|
cnt[i] = pos;
|
||
|
pos += c;
|
||
|
}
|
||
|
ASSERT(pos == n);
|
||
|
for (uint i=0; i<n; i++)
|
||
|
to[cnt[(from[i].key >> sh) & ((1 << BITS) - 1)]++] = from[i];
|
||
|
ASSERT(cnt[(1 << BITS)-1] == n);
|
||
|
tmp=from, from=to, to=tmp;
|
||
|
}
|
||
|
ary = from;
|
||
|
#undef BITS
|
||
|
}
|
||
|
|
||
|
static void r1b_sort(void)
|
||
|
{
|
||
|
struct elt *from = ary, *to = alt, *tmp;
|
||
|
#define BITS 8
|
||
|
uint cnt[1 << BITS], cnt2[1 << BITS];
|
||
|
for (uint sh=0; sh<32; sh+=BITS)
|
||
|
{
|
||
|
if (sh)
|
||
|
memcpy(cnt, cnt2, sizeof(cnt));
|
||
|
else
|
||
|
{
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
for (uint i=0; i<n; i++)
|
||
|
cnt[(from[i].key >> sh) & ((1 << BITS) - 1)]++;
|
||
|
}
|
||
|
uint pos = 0;
|
||
|
for (uint i=0; i<(1<<BITS); i++)
|
||
|
{
|
||
|
uint c = cnt[i];
|
||
|
cnt[i] = pos;
|
||
|
pos += c;
|
||
|
}
|
||
|
ASSERT(pos == n);
|
||
|
bzero(cnt2, sizeof(cnt2));
|
||
|
for (uint i=0; i<n; i++)
|
||
|
{
|
||
|
cnt2[(from[i].key >> (sh + BITS)) & ((1 << BITS) - 1)]++;
|
||
|
to[cnt[(from[i].key >> sh) & ((1 << BITS) - 1)]++] = from[i];
|
||
|
}
|
||
|
ASSERT(cnt[(1 << BITS)-1] == n);
|
||
|
tmp=from, from=to, to=tmp;
|
||
|
}
|
||
|
ary = from;
|
||
|
#undef BITS
|
||
|
}
|
||
|
|
||
|
static void r1c_sort(void)
|
||
|
{
|
||
|
uint cnt[256];
|
||
|
struct elt *ptrs[256], *x, *lim;
|
||
|
|
||
|
x = ary; lim = ary + n;
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
while (x < lim)
|
||
|
cnt[x++->key & 255]++;
|
||
|
|
||
|
#define PTRS(start) x=start; for (uint i=0; i<256; i++) { ptrs[i]=x; x+=cnt[i]; }
|
||
|
|
||
|
PTRS(alt);
|
||
|
x = ary; lim = ary + n;
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
while (x < lim)
|
||
|
{
|
||
|
cnt[(x->key >> 8) & 255]++;
|
||
|
*ptrs[x->key & 255]++ = *x;
|
||
|
x++;
|
||
|
}
|
||
|
|
||
|
PTRS(ary);
|
||
|
x = alt; lim = alt + n;
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
while (x < lim)
|
||
|
{
|
||
|
cnt[(x->key >> 16) & 255]++;
|
||
|
*ptrs[(x->key >> 8) & 255]++ = *x;
|
||
|
x++;
|
||
|
}
|
||
|
|
||
|
PTRS(alt);
|
||
|
x = ary; lim = ary + n;
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
while (x < lim)
|
||
|
{
|
||
|
cnt[(x->key >> 24) & 255]++;
|
||
|
*ptrs[(x->key >> 16) & 255]++ = *x;
|
||
|
x++;
|
||
|
}
|
||
|
|
||
|
PTRS(ary);
|
||
|
x = alt; lim = alt + n;
|
||
|
while (x < lim)
|
||
|
{
|
||
|
*ptrs[(x->key >> 24) & 255]++ = *x;
|
||
|
x++;
|
||
|
}
|
||
|
#undef PTRS
|
||
|
}
|
||
|
|
||
|
#include <emmintrin.h>
|
||
|
|
||
|
static inline void sse_copy_elt(struct elt *to, struct elt *from)
|
||
|
{
|
||
|
__m128i m = _mm_load_si128((__m128i *) from);
|
||
|
_mm_store_si128((__m128i *) to, m);
|
||
|
}
|
||
|
|
||
|
static void r1c_sse_sort(void)
|
||
|
{
|
||
|
uint cnt[256];
|
||
|
struct elt *ptrs[256], *x, *lim;
|
||
|
|
||
|
ASSERT(sizeof(struct elt) == 16);
|
||
|
ASSERT(!((uintptr_t)alt & 15));
|
||
|
ASSERT(!((uintptr_t)ary & 15));
|
||
|
|
||
|
x = ary; lim = ary + n;
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
while (x < lim)
|
||
|
cnt[x++->key & 255]++;
|
||
|
|
||
|
#define PTRS(start) x=start; for (uint i=0; i<256; i++) { ptrs[i]=x; x+=cnt[i]; }
|
||
|
|
||
|
PTRS(alt);
|
||
|
x = ary; lim = ary + n;
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
while (x < lim)
|
||
|
{
|
||
|
cnt[(x->key >> 8) & 255]++;
|
||
|
sse_copy_elt(ptrs[x->key & 255]++, x);
|
||
|
x++;
|
||
|
}
|
||
|
|
||
|
PTRS(ary);
|
||
|
x = alt; lim = alt + n;
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
while (x < lim)
|
||
|
{
|
||
|
cnt[(x->key >> 16) & 255]++;
|
||
|
sse_copy_elt(ptrs[(x->key >> 8) & 255]++, x);
|
||
|
x++;
|
||
|
}
|
||
|
|
||
|
PTRS(alt);
|
||
|
x = ary; lim = ary + n;
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
while (x < lim)
|
||
|
{
|
||
|
cnt[(x->key >> 24) & 255]++;
|
||
|
sse_copy_elt(ptrs[(x->key >> 16) & 255]++, x);
|
||
|
x++;
|
||
|
}
|
||
|
|
||
|
PTRS(ary);
|
||
|
x = alt; lim = alt + n;
|
||
|
while (x < lim)
|
||
|
{
|
||
|
sse_copy_elt(ptrs[(x->key >> 24) & 255]++, x);
|
||
|
x++;
|
||
|
}
|
||
|
#undef PTRS
|
||
|
}
|
||
|
|
||
|
static void r1d_sort(void)
|
||
|
{
|
||
|
uint cnt[256];
|
||
|
struct elt *ptrs[256], *x, *y, *lim;
|
||
|
|
||
|
ASSERT(!(n % 4));
|
||
|
|
||
|
x = ary; lim = ary + n;
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
while (x < lim)
|
||
|
{
|
||
|
cnt[x++->key & 255]++;
|
||
|
cnt[x++->key & 255]++;
|
||
|
cnt[x++->key & 255]++;
|
||
|
cnt[x++->key & 255]++;
|
||
|
}
|
||
|
|
||
|
#define PTRS(start) x=start; for (uint i=0; i<256; i++) { ptrs[i]=x; x+=cnt[i]; }
|
||
|
|
||
|
PTRS(alt);
|
||
|
x = ary; y = ary+n/2; lim = ary + n/2;
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
while (x < lim)
|
||
|
{
|
||
|
cnt[(x->key >> 8) & 255]++;
|
||
|
cnt[(y->key >> 8) & 255]++;
|
||
|
*ptrs[x->key & 255]++ = *x;
|
||
|
*ptrs[y->key & 255]++ = *y;
|
||
|
x++, y++;
|
||
|
cnt[(x->key >> 8) & 255]++;
|
||
|
cnt[(y->key >> 8) & 255]++;
|
||
|
*ptrs[x->key & 255]++ = *x;
|
||
|
*ptrs[y->key & 255]++ = *y;
|
||
|
x++, y++;
|
||
|
}
|
||
|
|
||
|
PTRS(ary);
|
||
|
x = alt; lim = alt + n;
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
while (x < lim)
|
||
|
{
|
||
|
cnt[(x->key >> 16) & 255]++;
|
||
|
*ptrs[(x->key >> 8) & 255]++ = *x;
|
||
|
x++;
|
||
|
cnt[(x->key >> 16) & 255]++;
|
||
|
*ptrs[(x->key >> 8) & 255]++ = *x;
|
||
|
x++;
|
||
|
}
|
||
|
|
||
|
PTRS(alt);
|
||
|
x = ary; lim = ary + n;
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
while (x < lim)
|
||
|
{
|
||
|
cnt[(x->key >> 24) & 255]++;
|
||
|
*ptrs[(x->key >> 16) & 255]++ = *x;
|
||
|
x++;
|
||
|
cnt[(x->key >> 24) & 255]++;
|
||
|
*ptrs[(x->key >> 16) & 255]++ = *x;
|
||
|
x++;
|
||
|
}
|
||
|
|
||
|
PTRS(ary);
|
||
|
x = alt; lim = alt + n;
|
||
|
while (x < lim)
|
||
|
{
|
||
|
*ptrs[(x->key >> 24) & 255]++ = *x;
|
||
|
x++;
|
||
|
*ptrs[(x->key >> 24) & 255]++ = *x;
|
||
|
x++;
|
||
|
}
|
||
|
#undef PTRS
|
||
|
}
|
||
|
|
||
|
static void r2_sort(void)
|
||
|
{
|
||
|
struct elt *from = ary, *to = alt;
|
||
|
#define BITS 14
|
||
|
uint cnt[1 << BITS];
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
for (uint i=0; i<n; i++)
|
||
|
cnt[(from[i].key >> (32 - BITS)) & ((1 << BITS) - 1)]++;
|
||
|
uint pos = 0;
|
||
|
for (uint i=0; i<(1<<BITS); i++)
|
||
|
{
|
||
|
uint c = cnt[i];
|
||
|
cnt[i] = pos;
|
||
|
pos += c;
|
||
|
}
|
||
|
ASSERT(pos == n);
|
||
|
for (uint i=0; i<n; i++)
|
||
|
to[cnt[(from[i].key >> (32 - BITS)) & ((1 << BITS) - 1)]++] = from[i];
|
||
|
ASSERT(cnt[(1 << BITS)-1] == n);
|
||
|
|
||
|
pos = 0;
|
||
|
for (uint i=0; i<(1 << BITS); i++)
|
||
|
{
|
||
|
as_sort(cnt[i] - pos, alt+pos);
|
||
|
pos = cnt[i];
|
||
|
}
|
||
|
ary = alt;
|
||
|
#undef BITS
|
||
|
}
|
||
|
|
||
|
static void r3_sort(void)
|
||
|
{
|
||
|
#define BITS 10
|
||
|
#define LEVELS 2
|
||
|
#define BUCKS (1 << BITS)
|
||
|
#define THRESHOLD 5000
|
||
|
#define ODDEVEN 0
|
||
|
|
||
|
auto void r3(struct elt *from, struct elt *to, uint n, uint lev);
|
||
|
void r3(struct elt *from, struct elt *to, uint n, uint lev)
|
||
|
{
|
||
|
uint sh = 32 - lev*BITS;
|
||
|
uint cnt[BUCKS];
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
for (uint i=0; i<n; i++)
|
||
|
cnt[(from[i].key >> sh) & (BUCKS - 1)]++;
|
||
|
uint pos = 0;
|
||
|
for (uint i=0; i<BUCKS; i++)
|
||
|
{
|
||
|
uint c = cnt[i];
|
||
|
cnt[i] = pos;
|
||
|
pos += c;
|
||
|
}
|
||
|
ASSERT(pos == n);
|
||
|
for (uint i=0; i<n; i++)
|
||
|
#if 1
|
||
|
to[cnt[(from[i].key >> sh) & (BUCKS - 1)]++] = from[i];
|
||
|
#else
|
||
|
sse_copy_elt(&to[cnt[(from[i].key >> sh) & (BUCKS - 1)]++], &from[i]);
|
||
|
#endif
|
||
|
pos = 0;
|
||
|
for (uint i=0; i<BUCKS; i++)
|
||
|
{
|
||
|
uint l = cnt[i]-pos;
|
||
|
if (lev >= LEVELS || l <= THRESHOLD)
|
||
|
{
|
||
|
as_sort(l, to+pos);
|
||
|
if ((lev % 2) != ODDEVEN)
|
||
|
memcpy(from+pos, to+pos, l * sizeof(struct elt));
|
||
|
}
|
||
|
else
|
||
|
r3(to+pos, from+pos, l, lev+1);
|
||
|
pos = cnt[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
r3(ary, alt, n, 1);
|
||
|
if (ODDEVEN)
|
||
|
ary = alt;
|
||
|
|
||
|
#undef ODDEVEN
|
||
|
#undef THRESHOLD
|
||
|
#undef BUCKS
|
||
|
#undef LEVELS
|
||
|
#undef BITS
|
||
|
}
|
||
|
|
||
|
static inline struct elt *mrg(struct elt *x, struct elt *xl, struct elt *y, struct elt *yl, struct elt *z)
|
||
|
{
|
||
|
for (;;)
|
||
|
{
|
||
|
if (x->key <= y->key)
|
||
|
{
|
||
|
*z++ = *x++;
|
||
|
if (x >= xl)
|
||
|
goto xend;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
*z++ = *y++;
|
||
|
if (y >= yl)
|
||
|
goto yend;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
xend:
|
||
|
while (y < yl)
|
||
|
*z++ = *y++;
|
||
|
return z;
|
||
|
|
||
|
yend:
|
||
|
while (x < xl)
|
||
|
*z++ = *x++;
|
||
|
return z;
|
||
|
}
|
||
|
|
||
|
static void mergesort(void)
|
||
|
{
|
||
|
struct elt *from, *to;
|
||
|
uint lev = 0;
|
||
|
if (1)
|
||
|
{
|
||
|
struct elt *x = ary, *z = alt, *last = ary + (n & ~1U);
|
||
|
while (x < last)
|
||
|
{
|
||
|
if (x[0].key < x[1].key)
|
||
|
*z++ = *x++, *z++ = *x++;
|
||
|
else
|
||
|
{
|
||
|
*z++ = x[1];
|
||
|
*z++ = x[0];
|
||
|
x += 2;
|
||
|
}
|
||
|
}
|
||
|
if (n % 2)
|
||
|
*z = *x;
|
||
|
lev++;
|
||
|
}
|
||
|
for (; (1U << lev) < n; lev++)
|
||
|
{
|
||
|
if (lev % 2)
|
||
|
from = alt, to = ary;
|
||
|
else
|
||
|
from = ary, to = alt;
|
||
|
struct elt *x, *z, *last;
|
||
|
x = from;
|
||
|
z = to;
|
||
|
last = from + n;
|
||
|
uint step = 1 << lev;
|
||
|
while (x + 2*step <= last)
|
||
|
{
|
||
|
z = mrg(x, x+step, x+step, x+2*step, z);
|
||
|
x += 2*step;
|
||
|
}
|
||
|
if (x + step < last)
|
||
|
mrg(x, x+step, x+step, last, z);
|
||
|
else
|
||
|
memcpy(z, x, (byte*)last - (byte*)x);
|
||
|
}
|
||
|
if (lev % 2)
|
||
|
ary = alt;
|
||
|
}
|
||
|
|
||
|
static void sampsort(uint n, struct elt *ar, struct elt *al, struct elt *dest, byte *wbuf)
|
||
|
{
|
||
|
#define WAYS 256
|
||
|
struct elt k[WAYS];
|
||
|
uint cnt[WAYS];
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
for (uint i=0; i<WAYS; i++)
|
||
|
k[i] = ar[random() % n];
|
||
|
as_sort(WAYS, k);
|
||
|
for (uint i=0; i<n; i++)
|
||
|
{
|
||
|
uint w = 0;
|
||
|
#define FW(delta) if (ar[i].key > k[w+delta].key) w += delta
|
||
|
FW(128);
|
||
|
FW(64);
|
||
|
FW(32);
|
||
|
FW(16);
|
||
|
FW(8);
|
||
|
FW(4);
|
||
|
FW(2);
|
||
|
FW(1);
|
||
|
wbuf[i] = w;
|
||
|
cnt[w]++;
|
||
|
}
|
||
|
struct elt *y = al, *way[WAYS], *z;
|
||
|
for (uint i=0; i<WAYS; i++)
|
||
|
{
|
||
|
way[i] = y;
|
||
|
y += cnt[i];
|
||
|
}
|
||
|
ASSERT(y == al+n);
|
||
|
for (uint i=0; i<n; i++)
|
||
|
{
|
||
|
uint w = wbuf[i];
|
||
|
*way[w]++ = ar[i];
|
||
|
}
|
||
|
y = al;
|
||
|
z = ar;
|
||
|
for (uint i=0; i<WAYS; i++)
|
||
|
{
|
||
|
if (cnt[i] >= 1000)
|
||
|
sampsort(cnt[i], y, z, dest, wbuf);
|
||
|
else
|
||
|
{
|
||
|
as_sort(cnt[i], y);
|
||
|
if (al != dest)
|
||
|
memcpy(z, y, cnt[i]*sizeof(struct elt));
|
||
|
}
|
||
|
y += cnt[i];
|
||
|
z += cnt[i];
|
||
|
}
|
||
|
#undef FW
|
||
|
#undef WAYS
|
||
|
}
|
||
|
|
||
|
static void samplesort(void)
|
||
|
{
|
||
|
byte *aux = xmalloc(n);
|
||
|
sampsort(n, ary, alt, ary, aux);
|
||
|
xfree(aux);
|
||
|
}
|
||
|
|
||
|
static void sampsort2(uint n, struct elt *ar, struct elt *al, struct elt *dest, byte *wbuf)
|
||
|
{
|
||
|
#define WAYS 256
|
||
|
struct elt k[WAYS];
|
||
|
uint cnt[WAYS];
|
||
|
bzero(cnt, sizeof(cnt));
|
||
|
for (uint i=0; i<WAYS; i++)
|
||
|
k[i] = ar[random() % n];
|
||
|
as_sort(WAYS, k);
|
||
|
struct elt *k1 = ar, *k2 = ar+1, *kend = ar+n;
|
||
|
byte *ww = wbuf;
|
||
|
while (k2 < kend)
|
||
|
{
|
||
|
uint w1 = 0, w2 = 0;
|
||
|
#define FW1(delta) if (k1->key > k[w1+delta].key) w1 += delta
|
||
|
#define FW2(delta) if (k2->key > k[w2+delta].key) w2 += delta
|
||
|
FW1(128); FW2(128);
|
||
|
FW1(64); FW2(64);
|
||
|
FW1(32); FW2(32);
|
||
|
FW1(16); FW2(16);
|
||
|
FW1(8); FW2(8);
|
||
|
FW1(4); FW2(4);
|
||
|
FW1(2); FW2(2);
|
||
|
FW1(1); FW2(1);
|
||
|
*ww++ = w1;
|
||
|
*ww++ = w2;
|
||
|
cnt[w1]++;
|
||
|
cnt[w2]++;
|
||
|
k1 += 2;
|
||
|
k2 += 2;
|
||
|
}
|
||
|
if (k1 < kend)
|
||
|
{
|
||
|
uint w1 = 0;
|
||
|
FW1(128); FW1(64); FW1(32); FW1(16);
|
||
|
FW1(8); FW1(4); FW1(2); FW1(1);
|
||
|
*ww++ = w1;
|
||
|
cnt[w1]++;
|
||
|
}
|
||
|
struct elt *y = al, *way[WAYS], *z;
|
||
|
for (uint i=0; i<WAYS; i++)
|
||
|
{
|
||
|
way[i] = y;
|
||
|
y += cnt[i];
|
||
|
}
|
||
|
ASSERT(y == al+n);
|
||
|
for (uint i=0; i<n; i++)
|
||
|
{
|
||
|
uint w = wbuf[i];
|
||
|
*way[w]++ = ar[i];
|
||
|
}
|
||
|
y = al;
|
||
|
z = ar;
|
||
|
for (uint i=0; i<WAYS; i++)
|
||
|
{
|
||
|
if (cnt[i] >= 1000)
|
||
|
sampsort2(cnt[i], y, z, dest, wbuf);
|
||
|
else
|
||
|
{
|
||
|
as_sort(cnt[i], y);
|
||
|
if (al != dest)
|
||
|
memcpy(z, y, cnt[i]*sizeof(struct elt));
|
||
|
}
|
||
|
y += cnt[i];
|
||
|
z += cnt[i];
|
||
|
}
|
||
|
#undef FW1
|
||
|
#undef FW2
|
||
|
#undef WAYS
|
||
|
}
|
||
|
|
||
|
static void samplesort2(void)
|
||
|
{
|
||
|
byte *aux = xmalloc(n);
|
||
|
sampsort2(n, ary, alt, ary, aux);
|
||
|
xfree(aux);
|
||
|
}
|
||
|
|
||
|
static void heapsort(void)
|
||
|
{
|
||
|
#define H_LESS(_a,_b) ((_a).key > (_b).key)
|
||
|
struct elt *heap = ary-1;
|
||
|
HEAP_INIT(struct elt, heap, n, H_LESS, HEAP_SWAP);
|
||
|
uint nn = n;
|
||
|
while (nn)
|
||
|
HEAP_DELETE_MIN(struct elt, heap, nn, H_LESS, HEAP_SWAP);
|
||
|
#undef H_LESS
|
||
|
}
|
||
|
|
||
|
static void heapsort_ind(void)
|
||
|
{
|
||
|
#define H_LESS(_a,_b) ((_a)->key > (_b)->key)
|
||
|
struct elt **heap = ind-1;
|
||
|
HEAP_INIT(struct elt *, heap, n, H_LESS, HEAP_SWAP);
|
||
|
uint nn = n;
|
||
|
while (nn)
|
||
|
HEAP_DELETE_MIN(struct elt *, heap, nn, H_LESS, HEAP_SWAP);
|
||
|
#undef H_LESS
|
||
|
}
|
||
|
|
||
|
static void mk_ary(void)
|
||
|
{
|
||
|
ary = array0;
|
||
|
alt = array1;
|
||
|
md5_context ctx;
|
||
|
md5_init(&ctx);
|
||
|
u32 block[16];
|
||
|
bzero(block, sizeof(block));
|
||
|
|
||
|
sum = 0;
|
||
|
for (uint i=0; i<n; i++)
|
||
|
{
|
||
|
#if 1
|
||
|
if (!(i % 4))
|
||
|
{
|
||
|
block[i%16] = i;
|
||
|
md5_transform(ctx.buf, block);
|
||
|
}
|
||
|
ary[i].key = ctx.buf[i%4];
|
||
|
#else
|
||
|
ary[i].key = i*(~0U/(n-1));
|
||
|
#endif
|
||
|
for (uint j=1; j<sizeof(struct elt)/4; j++)
|
||
|
((u32*)&ary[i])[j] = ROL(ary[i].key, 3*j);
|
||
|
sum ^= ary[i].key;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void chk_ary(void)
|
||
|
{
|
||
|
u32 s = ary[0].key;
|
||
|
for (uint i=1; i<n; i++)
|
||
|
if (ary[i].key < ary[i-1].key)
|
||
|
die("Missorted at %d", i);
|
||
|
else
|
||
|
s ^= ary[i].key;
|
||
|
if (s != sum)
|
||
|
die("Corrupted");
|
||
|
}
|
||
|
|
||
|
static void mk_ind(void)
|
||
|
{
|
||
|
mk_ary();
|
||
|
ind = xmalloc(sizeof(struct elt *) * n);
|
||
|
for (uint i=0; i<n; i++)
|
||
|
ind[i] = &ary[i];
|
||
|
}
|
||
|
|
||
|
static void chk_ind(void)
|
||
|
{
|
||
|
u32 s = ind[0]->key;
|
||
|
for (uint i=1; i<n; i++)
|
||
|
if (ind[i]->key < ind[i-1]->key)
|
||
|
die("Missorted at %d", i);
|
||
|
else
|
||
|
s ^= ind[i]->key;
|
||
|
if (s != sum)
|
||
|
die("Corrupted");
|
||
|
xfree(ind);
|
||
|
}
|
||
|
|
||
|
int main(int argc, char **argv)
|
||
|
{
|
||
|
log_init(argv[0]);
|
||
|
|
||
|
int opt;
|
||
|
uint op = 0;
|
||
|
while ((opt = cf_getopt(argc, argv, CF_SHORT_OPTS "1", CF_NO_LONG_OPTS, NULL)) >= 0)
|
||
|
switch (opt)
|
||
|
{
|
||
|
case '1':
|
||
|
op |= (1 << (opt - '0'));
|
||
|
break;
|
||
|
default:
|
||
|
die("usage?");
|
||
|
}
|
||
|
|
||
|
array0 = alloc_elts(n);
|
||
|
array1 = alloc_elts(n);
|
||
|
for (uint i=0; i<n; i++)
|
||
|
array0[i] = array1[i] = (struct elt) { 0 };
|
||
|
|
||
|
msg(L_INFO, "Testing with %u elements", n);
|
||
|
|
||
|
mk_ary();
|
||
|
timestamp_t timer;
|
||
|
init_timer(&timer);
|
||
|
for (uint i=0; i<5; i++)
|
||
|
{
|
||
|
#if 1
|
||
|
memcpy(alt, ary, sizeof(struct elt) * n);
|
||
|
memcpy(ary, alt, sizeof(struct elt) * n);
|
||
|
#else
|
||
|
for (uint j=0; j<n; j++)
|
||
|
alt[j] = ary[j];
|
||
|
for (uint j=0; j<n; j++)
|
||
|
ary[j] = alt[j];
|
||
|
#endif
|
||
|
}
|
||
|
msg(L_DEBUG, "memcpy: %d", get_timer(&timer)/10);
|
||
|
|
||
|
#define BENCH(type, name, func) mk_##type(); init_timer(&timer); func; msg(L_DEBUG, name ": %d", get_timer(&timer)); chk_##type()
|
||
|
|
||
|
BENCH(ary, "qsort", qsort(ary, n, sizeof(struct elt), comp));
|
||
|
BENCH(ary, "arraysort", as_sort(n, ary));
|
||
|
BENCH(ind, "indirect qsort", qsort(ind, n, sizeof(struct elt *), comp_ind));
|
||
|
BENCH(ind, "indirect arraysort", asi_sort(n));
|
||
|
BENCH(ary, "radix1", r1_sort());
|
||
|
BENCH(ary, "radix1b", r1b_sort());
|
||
|
BENCH(ary, "radix1c", r1c_sort());
|
||
|
BENCH(ary, "radix1c-sse", r1c_sse_sort());
|
||
|
BENCH(ary, "radix1d", r1d_sort());
|
||
|
BENCH(ary, "radix2", r2_sort());
|
||
|
BENCH(ary, "radix3", r3_sort());
|
||
|
BENCH(ary, "mergesort", mergesort());
|
||
|
BENCH(ary, "samplesort", samplesort());
|
||
|
BENCH(ary, "samplesort2", samplesort2());
|
||
|
BENCH(ary, "heapsort", heapsort());
|
||
|
BENCH(ind, "indirect heapsort", heapsort_ind());
|
||
|
|
||
|
free_elts(array0, n);
|
||
|
free_elts(array1, n);
|
||
|
return 0;
|
||
|
}
|