651 lines
16 KiB
C
651 lines
16 KiB
C
|
/*
|
|||
|
* UCW Library -- Fast pseudo-random generator
|
|||
|
*
|
|||
|
* (c) 2020--2022 Pavel Charvat <pchar@ucw.cz>
|
|||
|
*
|
|||
|
* This software may be freely distributed and used according to the terms
|
|||
|
* of the GNU Lesser General Public License.
|
|||
|
*/
|
|||
|
|
|||
|
#undef LOCAL_DEBUG
|
|||
|
|
|||
|
#include <ucw/lib.h>
|
|||
|
#include <ucw/random.h>
|
|||
|
#include <ucw/threads.h>
|
|||
|
#include <ucw/unaligned.h>
|
|||
|
|
|||
|
#include <float.h>
|
|||
|
#include <limits.h>
|
|||
|
#include <math.h>
|
|||
|
#include <stdlib.h>
|
|||
|
#include <unistd.h>
|
|||
|
#include <sys/time.h>
|
|||
|
|
|||
|
// Select exactly one of these pseudo-random generators:
|
|||
|
//#define FASTRAND_USE_PCG
|
|||
|
//#define FASTRAND_USE_SPLITMIX
|
|||
|
#define FASTRAND_USE_XOROSHIRO
|
|||
|
//#define FASTRAND_USE_LIBC_SVID
|
|||
|
|
|||
|
/**
|
|||
|
* FASTRAND_USE_PCG
|
|||
|
*
|
|||
|
* References:
|
|||
|
* -- https://en.wikipedia.org/wiki/Permuted_congruential_generator
|
|||
|
* -- https://www.pcg-random.org/
|
|||
|
* -- "PCG: A Family of Simple Fast Space-Efficient Statistically Good Algorithms for Random Number Generation",
|
|||
|
* 2014, Melissa E. O’Neill
|
|||
|
*
|
|||
|
* Notes:
|
|||
|
* -- Simple and fast generator with good statistical properties.
|
|||
|
* -- We use a constant increment and 64->32bit XSH RR output function.
|
|||
|
**/
|
|||
|
|
|||
|
#define FASTRAND_PCG_MUL 6364136223846793005LLU
|
|||
|
#define FASTRAND_PCG_INC 1442695040888963407LLU
|
|||
|
|
|||
|
/**
|
|||
|
* FASTRAND_USE_SPLITMIX
|
|||
|
*
|
|||
|
* References:
|
|||
|
* -- http://prng.di.unimi.it/splitmix64.c (public, no copyright)
|
|||
|
* -- http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html
|
|||
|
* -- "Fast Splittable Pseudorandom Number Generators",
|
|||
|
* 2014, Guy L. Steele Jr., Doug Lea, Christine H. Flood
|
|||
|
*
|
|||
|
* Notes:
|
|||
|
* -- Each 64 bit number is generated exactly once in a cycle,
|
|||
|
* so we only use half of it -> 32 bit output.
|
|||
|
**/
|
|||
|
|
|||
|
/**
|
|||
|
* FASTRAND_USE_XOROSHIRO
|
|||
|
*
|
|||
|
* References:
|
|||
|
* -- http://prng.di.unimi.it/ (examples are public, no copyright)
|
|||
|
* -- "Scrambled Linear Pseudorandom Number Generators",
|
|||
|
* 2019, David Blackman, Sebastiano Vigna
|
|||
|
*
|
|||
|
* Notes:
|
|||
|
* -- A family of very fast generators. Good statistical properties,
|
|||
|
* some variants just have problems with lowest bits.
|
|||
|
* -- We support more xo(ro)shiro variants, see the options below.
|
|||
|
**/
|
|||
|
|
|||
|
// Engine: xoroshiro 128, xoshiro 256
|
|||
|
#define FASTRAND_XOROSHIRO_SIZE 128
|
|||
|
|
|||
|
// Scrambler: 1 (+), 2 (++), 3 (**)
|
|||
|
#define FASTRAND_XOROSHIRO_SCRAMBLER 1
|
|||
|
|
|||
|
// If defined, we only use the highest 60 bits from each 64 bit number;
|
|||
|
// useful especially for the weakest '+' scrambler. Even if not
|
|||
|
// we prefer higher bits where trivial.
|
|||
|
#undef FASTRAND_XOROSHIRO_ONLY_HIGHER
|
|||
|
|
|||
|
/**
|
|||
|
* FASTRAND_USE_LIBC_SVID
|
|||
|
* -- mrand48_r()
|
|||
|
* -- libc SVID generator
|
|||
|
**/
|
|||
|
|
|||
|
struct fastrand {
|
|||
|
#ifdef FASTRAND_USE_PCG
|
|||
|
u64 pcg_state; // PCG generator's current state
|
|||
|
#endif
|
|||
|
|
|||
|
#ifdef FASTRAND_USE_SPLITMIX
|
|||
|
u64 splitmix_state;
|
|||
|
#endif
|
|||
|
|
|||
|
#ifdef FASTRAND_USE_XOROSHIRO
|
|||
|
u64 xoroshiro_state[FASTRAND_XOROSHIRO_SIZE / 64];
|
|||
|
#endif
|
|||
|
|
|||
|
#ifdef FASTRAND_USE_LIBC_SVID
|
|||
|
struct drand48_data svid_data; // SVID generator's state structure
|
|||
|
#endif
|
|||
|
|
|||
|
bool inited; // Is seeded?
|
|||
|
fastrand_seed_t seed_value; // Initial seed value; useful for reproducible debugging
|
|||
|
};
|
|||
|
|
|||
|
/*** Common helpers ***/
|
|||
|
|
|||
|
// GCC should optimize these to a single instruction
|
|||
|
|
|||
|
static inline u32 fastrand_ror32(u32 x, uint r)
|
|||
|
{
|
|||
|
return (x >> r) | (x << (-r & 31));
|
|||
|
}
|
|||
|
|
|||
|
static inline u64 fastrand_rol64(u64 x, uint r)
|
|||
|
{
|
|||
|
return (x << r) | (x >> (-r & 63));
|
|||
|
}
|
|||
|
|
|||
|
/*** PCG random generator ***/
|
|||
|
|
|||
|
#ifdef FASTRAND_USE_PCG
|
|||
|
#define FASTRAND_ONE_BITS 32
|
|||
|
|
|||
|
static inline u32 fastrand_one(struct fastrand *c)
|
|||
|
{
|
|||
|
u64 s = c->pcg_state;
|
|||
|
uint r = s >> 59;
|
|||
|
c->pcg_state = s * FASTRAND_PCG_MUL + FASTRAND_PCG_INC;
|
|||
|
u32 x = ((s >> 18) ^ s) >> 27;
|
|||
|
return fastrand_ror32(x, r);
|
|||
|
}
|
|||
|
|
|||
|
static void fastrand_init(struct fastrand *c)
|
|||
|
{
|
|||
|
c->pcg_state = c->seed_value + FASTRAND_PCG_INC;
|
|||
|
(void) fastrand_one(c);
|
|||
|
DBG("RANDOM: Initialized PCG random generator, seed=0x%jx", (uintmax_t)c->seed_value);
|
|||
|
}
|
|||
|
#endif
|
|||
|
|
|||
|
/*** splitmix64 generator ***/
|
|||
|
|
|||
|
// A helper; used also for seeding xoroshiro
|
|||
|
static inline u64 fastrand_splitmix64_next(u64 *state)
|
|||
|
{
|
|||
|
u64 z = (*state += 0x9e3779b97f4a7c15LLU);
|
|||
|
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9LLU;
|
|||
|
z = (z ^ (z >> 27)) * 0x94d049bb133111ebLLU;
|
|||
|
return z ^ (z >> 31);
|
|||
|
}
|
|||
|
|
|||
|
#ifdef FASTRAND_USE_SPLITMIX
|
|||
|
#define FASTRAND_ONE_BITS 32
|
|||
|
|
|||
|
static void fastrand_init(struct fastrand *c)
|
|||
|
{
|
|||
|
c->splitmix_state = c->seed_value;
|
|||
|
DBG("RANDOM: Initialized splitmix64 random generator, seed=0x%jx", (uintmax_t)c->seed_value);
|
|||
|
}
|
|||
|
|
|||
|
static inline u32 fastrand_one(struct fastrand *c)
|
|||
|
{
|
|||
|
return fastrand_splitmix64_next(&c->splitmix_state);
|
|||
|
}
|
|||
|
#endif
|
|||
|
|
|||
|
/*** xoroshiro variants ***/
|
|||
|
|
|||
|
#ifdef FASTRAND_USE_XOROSHIRO
|
|||
|
#ifdef FASTRAND_XOROSHIRO_ONLY_HIGHER
|
|||
|
# define FASTRAND_ONE_BITS 60
|
|||
|
#else
|
|||
|
# define FASTRAND_ONE_BITS 64
|
|||
|
# define FASTRAND_PREFER_HIGHER
|
|||
|
#endif
|
|||
|
|
|||
|
static void fastrand_init(struct fastrand *c)
|
|||
|
{
|
|||
|
u64 x = c->seed_value;
|
|||
|
for (uint i = 0; i < ARRAY_SIZE(c->xoroshiro_state); i++)
|
|||
|
c->xoroshiro_state[i] = fastrand_splitmix64_next(&x);
|
|||
|
if (unlikely(!c->xoroshiro_state[0]))
|
|||
|
c->xoroshiro_state[0] = 1;
|
|||
|
DBG("RANDOM: Initialized xoroshiro random generator, seed=0x%jx, size=%u, scrambler=%u",
|
|||
|
(uintmax_t)c->seed_value, FASTRAND_XOROSHIRO_SIZE, FASTRAND_XOROSHIRO_SCRAMBLER);
|
|||
|
}
|
|||
|
|
|||
|
static inline u64 fastrand_one(struct fastrand *c)
|
|||
|
{
|
|||
|
u64 result;
|
|||
|
u64 *s = c->xoroshiro_state;
|
|||
|
|
|||
|
#if FASTRAND_XOROSHIRO_SIZE == 128
|
|||
|
u64 s0 = s[0];
|
|||
|
u64 s1 = s[1];
|
|||
|
#if FASTRAND_XOROSHIRO_SCRAMBLER == 1
|
|||
|
result = s0 + s1;
|
|||
|
uint ra = 24, rb = 16, rc = 37;
|
|||
|
#elif FASTRAND_XOROSHIRO_SCRAMBLER == 2
|
|||
|
result = fastrand_rol64(s0 + s1, 17) + s0;
|
|||
|
uint ra = 49, rb = 21, rc = 28;
|
|||
|
#elif FASTRAND_XOROSHIRO_SCRAMBLER == 3
|
|||
|
result = fastrand_rol64(s0 * 5, 7) * 9;
|
|||
|
uint ra = 24, rb = 16, rc = 37;
|
|||
|
#else
|
|||
|
# error "Unsupported xoroshiro parameters"
|
|||
|
#endif
|
|||
|
s[0] = fastrand_rol64(s0, ra) ^ s1 ^ (s1 << rb);
|
|||
|
s[1] = fastrand_rol64(s1, rc);
|
|||
|
|
|||
|
#elif FASTRAND_XOROSHIRO_SIZE == 256
|
|||
|
#if FASTRAND_XOROSHIRO_SCRAMBLER == 1
|
|||
|
result = s[0] + s[3];
|
|||
|
#elif FASTRAND_XOROSHIRO_SCRAMBLER == 2
|
|||
|
result = fastrand_rol64(s[0] + s[3], 23) + s[0];
|
|||
|
#elif FASTRAND_XOROSHIRO_SCRAMBLER == 3
|
|||
|
result = fastrand_rol64(s[1] * 5, 7) * 9;
|
|||
|
#else
|
|||
|
# error "Unsupported xoroshiro parameters"
|
|||
|
#endif
|
|||
|
u64 t = s[1] << 17;
|
|||
|
s[2] ^= s[0];
|
|||
|
s[3] ^= s[1];
|
|||
|
s[1] ^= s[2];
|
|||
|
s[0] ^= s[3];
|
|||
|
s[2] ^= t;
|
|||
|
s[3] = fastrand_rol64(s[3], 45);
|
|||
|
#else
|
|||
|
# error "Unsupported xoroshiro parameters"
|
|||
|
#endif
|
|||
|
|
|||
|
#ifdef FASTRAND_XOROSHIRO_ONLY_HIGHER
|
|||
|
result >>= 4;
|
|||
|
#endif
|
|||
|
return result;
|
|||
|
}
|
|||
|
|
|||
|
#endif
|
|||
|
|
|||
|
/*** Libc SVID generator ***/
|
|||
|
|
|||
|
#ifdef FASTRAND_USE_LIBC_SVID
|
|||
|
#define FASTRAND_ONE_BITS 32
|
|||
|
|
|||
|
static void fastrand_init(struct fastrand *c)
|
|||
|
{
|
|||
|
DBG("RANDOM: Initialized libc SVID random generator, seed=0x%jx", (uintmax_t)c->seed_value);
|
|||
|
if (srand48_r(c->seed_value, &c->svid_data) < 0)
|
|||
|
die("RANDOM: Failed to call srand48_r(): %m");
|
|||
|
}
|
|||
|
|
|||
|
#ifdef LOCAL_DEBUG
|
|||
|
static NONRET void fastrand_mrand48_r_failed(void)
|
|||
|
{
|
|||
|
die("RANDOM: Failed to call mrand48_r(): %m");
|
|||
|
}
|
|||
|
#endif
|
|||
|
|
|||
|
static inline u32 fastrand_one(struct fastrand *c)
|
|||
|
{
|
|||
|
long int r;
|
|||
|
#ifdef LOCAL_DEBUG
|
|||
|
if (unlikely(mrand48_r(&c->svid_data, &r) < 0))
|
|||
|
fastrand_mrand48_r_failed();
|
|||
|
#else
|
|||
|
(void) mrand48_r(&c->svid_data, &r);
|
|||
|
#endif
|
|||
|
return r;
|
|||
|
}
|
|||
|
|
|||
|
#endif
|
|||
|
|
|||
|
/*** Interface for creating/deleting/setting contexts ***/
|
|||
|
|
|||
|
struct fastrand *fastrand_new(void)
|
|||
|
{
|
|||
|
struct fastrand *c = xmalloc_zero(sizeof(*c));
|
|||
|
return c;
|
|||
|
}
|
|||
|
|
|||
|
void fastrand_delete(struct fastrand *c)
|
|||
|
{
|
|||
|
ASSERT(c);
|
|||
|
xfree(c);
|
|||
|
}
|
|||
|
|
|||
|
void fastrand_set_seed(struct fastrand *c, fastrand_seed_t seed)
|
|||
|
{
|
|||
|
c->seed_value = seed;
|
|||
|
c->inited = true;
|
|||
|
fastrand_init(c);
|
|||
|
}
|
|||
|
|
|||
|
fastrand_seed_t fastrand_gen_seed_value(void)
|
|||
|
{
|
|||
|
// Prefer kernel's urandom if getrandom() syscall is available.
|
|||
|
u64 val;
|
|||
|
if (strongrand_getrandom_mem_try(&val, sizeof(val)))
|
|||
|
return val;
|
|||
|
|
|||
|
// Generate a seed value as a mixture of:
|
|||
|
// -- current time
|
|||
|
// -- process id to deal with frequent fork() or startups
|
|||
|
// -- counter to deal with multiple contexts, for example in threaded application
|
|||
|
static u64 static_counter;
|
|||
|
ucwlib_lock();
|
|||
|
val = (static_counter += 0x1927a7639274162dLLU);
|
|||
|
ucwlib_unlock();
|
|||
|
val += getpid();
|
|||
|
struct timeval tv;
|
|||
|
if (gettimeofday(&tv, NULL) < 0)
|
|||
|
die("RANDOM: Failed to call gettimeofday(): %m");
|
|||
|
val += (u64)(tv.tv_sec * 1000000LL + tv.tv_usec) * 0x5a03173d83f78347LLU;
|
|||
|
return val;
|
|||
|
}
|
|||
|
|
|||
|
fastrand_seed_t fastrand_gen_seed(struct fastrand *c)
|
|||
|
{
|
|||
|
fastrand_seed_t seed = fastrand_gen_seed_value();
|
|||
|
fastrand_set_seed(c, seed);
|
|||
|
return seed;
|
|||
|
}
|
|||
|
|
|||
|
bool fastrand_has_seed(struct fastrand *c)
|
|||
|
{
|
|||
|
return c->inited;
|
|||
|
}
|
|||
|
|
|||
|
void fastrand_reset(struct fastrand *c)
|
|||
|
{
|
|||
|
c->inited = false;
|
|||
|
}
|
|||
|
|
|||
|
#if 1
|
|||
|
// XXX: Adds some overhead, but probably worth it
|
|||
|
#define FASTRAND_CHECK_SEED(c) do { if (unlikely(!(c)->inited)) fastrand_check_seed_failed(); } while (0)
|
|||
|
static NONRET void fastrand_check_seed_failed(void)
|
|||
|
{
|
|||
|
ASSERT(0);
|
|||
|
}
|
|||
|
#else
|
|||
|
#define FASTRAND_CHECK_SEED(c) do { } while (0)
|
|||
|
#endif
|
|||
|
|
|||
|
/*** Reading interface ***/
|
|||
|
|
|||
|
// We assume 32-64 bit output of fastrand_one().
|
|||
|
// For 32 bits fastrand_one() can return u32 type, otherwise it must be u64.
|
|||
|
COMPILE_ASSERT(fastrand_one_range, FASTRAND_ONE_BITS >= 32 && FASTRAND_ONE_BITS <= 64);
|
|||
|
|
|||
|
#if FASTRAND_ONE_BITS == 32
|
|||
|
#define FASTRAND_ONE_MAX UINT32_MAX
|
|||
|
#else
|
|||
|
#define FASTRAND_ONE_MAX (UINT64_MAX >> (64 - FASTRAND_ONE_BITS))
|
|||
|
#endif
|
|||
|
|
|||
|
static inline u32 fastrand_u32_helper(struct fastrand *c)
|
|||
|
{
|
|||
|
#if FASTRAND_ONE_BITS > 32 && defined(FASTRAND_PREFER_HIGHER)
|
|||
|
return fastrand_one(c) >> (FASTRAND_ONE_BITS - 32);
|
|||
|
#else
|
|||
|
return fastrand_one(c);
|
|||
|
#endif
|
|||
|
}
|
|||
|
|
|||
|
u32 fastrand_u32(struct fastrand *c)
|
|||
|
{
|
|||
|
FASTRAND_CHECK_SEED(c);
|
|||
|
return fastrand_u32_helper(c);
|
|||
|
}
|
|||
|
|
|||
|
u64 fastrand_u64(struct fastrand *c)
|
|||
|
{
|
|||
|
FASTRAND_CHECK_SEED(c);
|
|||
|
#if FASTRAND_ONE_BITS < 64
|
|||
|
return ((u64)fastrand_one(c) << FASTRAND_ONE_BITS) | fastrand_one(c);
|
|||
|
#else
|
|||
|
return fastrand_one(c);
|
|||
|
#endif
|
|||
|
}
|
|||
|
|
|||
|
u64 fastrand_bits_u64(struct fastrand *c, uint bits)
|
|||
|
{
|
|||
|
#ifdef LOCAL_DEBUG
|
|||
|
ASSERT(bits <= 64);
|
|||
|
#endif
|
|||
|
// Notice that we never rotate an integer by all its bits (forbidden by C standard).
|
|||
|
FASTRAND_CHECK_SEED(c);
|
|||
|
#if FASTRAND_ONE_BITS == 64
|
|||
|
if (!bits)
|
|||
|
return 0;
|
|||
|
return fastrand_one(c) >> (FASTRAND_ONE_BITS - bits);
|
|||
|
#else
|
|||
|
u64 r = fastrand_one(c);
|
|||
|
if (bits <= FASTRAND_ONE_BITS)
|
|||
|
return r >> (FASTRAND_ONE_BITS - bits);
|
|||
|
return (r << (bits - FASTRAND_ONE_BITS)) | (fastrand_one(c) >> (2 * FASTRAND_ONE_BITS - bits));
|
|||
|
#endif
|
|||
|
}
|
|||
|
|
|||
|
/*
|
|||
|
* fastrand_max() and variants
|
|||
|
*
|
|||
|
* Algorithm 1:
|
|||
|
*
|
|||
|
* We can return r % bound from uniformly generated i-bit number r,
|
|||
|
* but only if:
|
|||
|
* r - r % bound + bound <= 2^i
|
|||
|
* r - r % bound <= 2^i-1 - bound + 1 -- no risk of integer overflow
|
|||
|
* r - r % bound <= (2^i-1 + 1) - bound -- can temporarily overflow
|
|||
|
*
|
|||
|
* Then r fits to a complete interval in the i-bit range
|
|||
|
* and results should be uniformly distributed.
|
|||
|
*
|
|||
|
* If r is too high, we simply generate a different r and repeat
|
|||
|
* the whole process. The probability is < 50% for any bound <= 2^i
|
|||
|
* (with worst case bound == 2^i / 2 + 1), so the average number
|
|||
|
* of iterations is < 2.
|
|||
|
*
|
|||
|
*
|
|||
|
* Algorithm 2 (without any division in many cases, but needs big multiplication):
|
|||
|
* ("Fast Random Integer Generation in an Interval", 2019, Daniel Lemier)
|
|||
|
*
|
|||
|
* Let bound < 2^i. Then for j in [0, bound) each interval [j * 2^i + 2^i % bound, (j + 1) * 2^i)
|
|||
|
* contains the same number of multiples [0, 1 * bound, 2 * bound, ..., 2^i * bound),
|
|||
|
* exactly 2^i / bound (rounded down) of them.
|
|||
|
*
|
|||
|
* Therefore if we have a random i-bit number r and r * bound fits to one of these intervals,
|
|||
|
* we can immediately return (r * bound) / 2^i (index of that interval) and results should be
|
|||
|
* correctly distributed. If we are lucky and r fits to even smaller interval
|
|||
|
* [j * 2^i + bound, (j + 1) * 2^i], we can check that very quickly without any division.
|
|||
|
*
|
|||
|
* Probability of multiple iterations is again < 50%.
|
|||
|
* Probability of division-less check is (2^i - bound) / 2^i.
|
|||
|
*
|
|||
|
*
|
|||
|
* Algorithm 3 (always without divisions or multiplications, but needs more iterations):
|
|||
|
*
|
|||
|
* Let 2^(i-1) <= bound < 2^i. Then i-bit random number r has at least 50% probability
|
|||
|
* to be lower than bound, when we can simply return it.
|
|||
|
* Can be useful for very fast fastrand_one() like xoroshiro128+.
|
|||
|
*/
|
|||
|
|
|||
|
#define FASTRAND_MAX_U32_ALGORITHM 2
|
|||
|
#define FASTRAND_MAX_U64_ALGORITHM 3
|
|||
|
|
|||
|
static inline u32 fastrand_max_u32_helper(struct fastrand *c, u32 bound)
|
|||
|
{
|
|||
|
// Variant for bound <= UINT32_MAX.
|
|||
|
// This is the most common case -> we optimize it and inline the code to both fastrand_max() and fastrand_max_u64().
|
|||
|
|
|||
|
#if FASTRAND_MAX_U32_ALGORITHM == 1
|
|||
|
while (1)
|
|||
|
{
|
|||
|
u32 r = fastrand_u32_helper(c);
|
|||
|
u32 x = r % bound;
|
|||
|
if (r - x <= (u32)-bound)
|
|||
|
return x;
|
|||
|
#if 1
|
|||
|
// Possible optimization for unlucky cases
|
|||
|
u32 y = r - x;
|
|||
|
do
|
|||
|
r = fastrand_u32_helper(c);
|
|||
|
while (r >= y);
|
|||
|
return r % bound;
|
|||
|
#endif
|
|||
|
}
|
|||
|
|
|||
|
#elif FASTRAND_MAX_U32_ALGORITHM == 2
|
|||
|
u32 r = fastrand_u32_helper(c);
|
|||
|
u64 m = (u64)r * bound;
|
|||
|
u32 l = (u32)m;
|
|||
|
if (l < bound)
|
|||
|
{
|
|||
|
u32 t = (u32)-bound;
|
|||
|
#if 1
|
|||
|
// This condition can decrease the probability of division to <= 50% even in worst case and also optimize bad cases.
|
|||
|
if (t < bound)
|
|||
|
{
|
|||
|
// Implies l < t % bound, so we don't need to check it before switching to completely different loop.
|
|||
|
do
|
|||
|
r = fastrand_u32_helper(c);
|
|||
|
while (r >= bound);
|
|||
|
return r;
|
|||
|
}
|
|||
|
#endif
|
|||
|
t = t % bound;
|
|||
|
while (l < t)
|
|||
|
{
|
|||
|
r = fastrand_u32_helper(c);
|
|||
|
m = (u64)r * bound;
|
|||
|
l = (u32)m;
|
|||
|
}
|
|||
|
}
|
|||
|
return m >> 32;
|
|||
|
|
|||
|
#elif FASTRAND_MAX_U32_ALGORITHM == 3
|
|||
|
// XXX: Probably could be optimized with __builtin_clz()
|
|||
|
u32 r, y = bound - 1;
|
|||
|
y |= y >> 1;
|
|||
|
y |= y >> 2;
|
|||
|
y |= y >> 4;
|
|||
|
y |= y >> 8;
|
|||
|
y |= y >> 16;
|
|||
|
do
|
|||
|
r = fastrand_u32_helper(c) & y;
|
|||
|
while (r >= bound);
|
|||
|
return r;
|
|||
|
|
|||
|
#else
|
|||
|
# error Invalid FASTRAND_MAX_U32_ALGORITHM
|
|||
|
#endif
|
|||
|
}
|
|||
|
|
|||
|
static u64 fastrand_max_u64_helper(struct fastrand *c, u64 bound)
|
|||
|
{
|
|||
|
// Less common case -> compromise between speed and size
|
|||
|
#if FASTRAND_MAX_U64_ALGORITHM == 1
|
|||
|
while (1)
|
|||
|
{
|
|||
|
u64 r = fastrand_one(c);
|
|||
|
u64 m = FASTRAND_ONE_MAX;
|
|||
|
#if FASTRAND_ONE_BITS < 64
|
|||
|
if (bound > m + 1)
|
|||
|
{
|
|||
|
r = (r << FASTRAND_ONE_BITS) | fastrand_one(c);
|
|||
|
m = (m << FASTRAND_ONE_BITS) | FASTRAND_ONE_MAX;
|
|||
|
}
|
|||
|
#endif
|
|||
|
u64 x = r % bound;
|
|||
|
if (r - x <= m + 1 - bound)
|
|||
|
return x;
|
|||
|
}
|
|||
|
|
|||
|
#elif FASTRAND_MAX_U64_ALGORITHM == 3
|
|||
|
u64 r, y = bound - 1;
|
|||
|
y |= y >> 1;
|
|||
|
y |= y >> 2;
|
|||
|
y |= y >> 4;
|
|||
|
y |= y >> 8;
|
|||
|
y |= y >> 16;
|
|||
|
y |= y >> 32;
|
|||
|
do
|
|||
|
{
|
|||
|
r = fastrand_one(c);
|
|||
|
#if FASTRAND_ONE_BITS < 64
|
|||
|
if (y > FASTRAND_ONE_MAX)
|
|||
|
r = (r << FASTRAND_ONE_BITS) | fastrand_one(c);
|
|||
|
#endif
|
|||
|
r &= y;
|
|||
|
}
|
|||
|
while (r >= bound);
|
|||
|
return r;
|
|||
|
#else
|
|||
|
# error Invalid FASTRAND_MAX_U64_ALGORITHM
|
|||
|
#endif
|
|||
|
}
|
|||
|
|
|||
|
uint fastrand_max(struct fastrand *c, uint bound)
|
|||
|
{
|
|||
|
FASTRAND_CHECK_SEED(c);
|
|||
|
#if UINT32_MAX < UINT_MAX
|
|||
|
if (bound <= UINT32_MAX)
|
|||
|
return fastrand_max_u32_helper(c, bound);
|
|||
|
else
|
|||
|
return fastrand_max_u64_helper(c, bound);
|
|||
|
#else
|
|||
|
return fastrand_max_u32_helper(c, bound);
|
|||
|
#endif
|
|||
|
}
|
|||
|
|
|||
|
u64 fastrand_max_u64(struct fastrand *c, u64 bound)
|
|||
|
{
|
|||
|
FASTRAND_CHECK_SEED(c);
|
|||
|
if (bound <= UINT32_MAX)
|
|||
|
return fastrand_max_u32_helper(c, bound);
|
|||
|
else
|
|||
|
return fastrand_max_u64_helper(c, bound);
|
|||
|
}
|
|||
|
|
|||
|
void fastrand_mem(struct fastrand *c, void *ptr, size_t size)
|
|||
|
{
|
|||
|
// XXX: Could be optimized to aligned assignments instead of put_u32/64, but it
|
|||
|
// would be quite complex to make it deterministic (for same seed and any @ptr)
|
|||
|
// and large sizes are not used often anyway.
|
|||
|
FASTRAND_CHECK_SEED(c);
|
|||
|
byte *p = ptr;
|
|||
|
#if FASTRAND_ONE_BITS < 64
|
|||
|
for (size_t n = size >> 2; n--; p += 4)
|
|||
|
put_u32(p, fastrand_u32_helper(c));
|
|||
|
if (size &= 3)
|
|||
|
{
|
|||
|
u32 x = fastrand_u32_helper(c);
|
|||
|
#else
|
|||
|
for (size_t n = size >> 3; n--; p += 8)
|
|||
|
put_u64(p, fastrand_one(c));
|
|||
|
if (size &= 7)
|
|||
|
{
|
|||
|
u64 x = fastrand_one(c);
|
|||
|
#endif
|
|||
|
while (size--)
|
|||
|
{
|
|||
|
*p++ = x;
|
|||
|
x >>= 8;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
double fastrand_double(struct fastrand *c)
|
|||
|
{
|
|||
|
// Generate 52 random bits
|
|||
|
FASTRAND_CHECK_SEED(c);
|
|||
|
u64 r = fastrand_one(c);
|
|||
|
#if FASTRAND_ONE_BITS >= 52
|
|||
|
r >>= FASTRAND_ONE_BITS - 52;
|
|||
|
#else
|
|||
|
// Do we want to generate doubles with full 52 precision?
|
|||
|
#if 0
|
|||
|
r = (r << (52 - FASTRAND_ONE_BITS)) | (fastrand_one(c) >> (2 * FASTRAND_ONE_BITS - 52));
|
|||
|
#else
|
|||
|
r <<= 52 - FASTRAND_ONE_BITS;
|
|||
|
#endif
|
|||
|
#endif
|
|||
|
|
|||
|
// With standard IEC 60559 / IEEE 754 format, we can directly store @r as a double
|
|||
|
// from range [1.0, 2.0) and subtract 1.0.
|
|||
|
// But the alternative with multiplication is often faster anyway.
|
|||
|
#if 0 && defined(__STDC_IEC_559__)
|
|||
|
union {
|
|||
|
u64 u;
|
|||
|
double d;
|
|||
|
} tmp;
|
|||
|
ASSERT(sizeof(double) == 8 && sizeof(tmp) == 8);
|
|||
|
tmp.u = (0x3ffLLU << 52) | r;
|
|||
|
return tmp.d - 1.0;
|
|||
|
#elif 1
|
|||
|
static const double mul = 1.0 / (1LLU << 52);
|
|||
|
return r * mul;
|
|||
|
#else
|
|||
|
return ldexp(r, -52);
|
|||
|
#endif
|
|||
|
}
|