/*
 *	UCW Library -- Coding of u64 into variable length bytecode.
 *
 *	(c) 2013 Tomas Valla <tom@ucw.cz>
 *
 *	This software may be freely distributed and used according to the terms
 *	of the GNU Lesser General Public License.
 */

#ifndef _UCW_VARINT_H
#define _UCW_VARINT_H

#ifdef CONFIG_UCW_CLEAN_ABI
#define varint_get_big ucw_varint_get_big
#define varint_put_big ucw_varint_put_big
#endif

/***
 * The encoding works in the following way:
 *
 *  First byte			Stored values
 *
 *  0xxxxxxx			7 bits
 *  10xxxxxx  +1 byte		14 bits, value shifted by 2^7
 *  110xxxxx  +2 bytes		21 bits, value shifted by 2^7+2^14
 *  1110xxxx  +3 bytes		28 bits, value shifted by 2^7+2^14+2^21
 *    ....
 *  11111110  +7 bytes		56 bits, value shifted by 2^7+2^14+2^21+2^28+2^35+2^42
 *  11111111  +8 bytes		full 64 bits, value shifted by 2^7+2^14+2^21+2^28+2^35+2^42+2^56
 *
 * The values are stored in bigendian to allow lexicographic sorting.
 * The encoding and algorithms are aimed to be optimised for storing shorter numbers.
 *
 * There is an invalid combination: if the first two bytes are 0xff 0xff.
 ***/


#define VARINT_SHIFT_L1			      0x80
#define VARINT_SHIFT_L2			    0x4080
#define VARINT_SHIFT_L3			  0x204080
#define VARINT_SHIFT_L4			0x10204080
#define VARINT_SHIFT_L5		      0x0810204080
#define VARINT_SHIFT_L6		    0x040810204080
#define VARINT_SHIFT_L7		  0x02040810204080
#define VARINT_SHIFT_L8		0x0102040810204080

/*
 * The following code is already unrolled, to maximize the speed.
 * Yes, it is ugly.
 */

#define PUTB(j,i)  p[j] = (byte)((u >> (8*(i))));
#define PUTB4(b)   PUTB(0,b-1) PUTB(1,b-2) PUTB(2,b-3) PUTB(3,b-4)

/* for internal use only, need the length > 4 */
uint varint_put_big(byte *p, u64 u);
const byte *varint_get_big(const byte *p, u64 *r);

/**
 * Encode u64 value u into byte sequence p.
 * Returns the number of bytes used (at least 1 and at most 9).
 **/
static inline uint varint_put(byte *p, u64 u)
{
	if (u < VARINT_SHIFT_L1) {
		p[0] = (byte)u;
		return 1;
	}
	if (u < VARINT_SHIFT_L2) {
		u -= VARINT_SHIFT_L1;
		PUTB(0,1)
		p[0] |= 0x80;
		PUTB(1,0)
		return 2;
	}
	if (u < VARINT_SHIFT_L3) {
		u -= VARINT_SHIFT_L2;
		PUTB(0,2)
		p[0] |= 0xc0;
		PUTB(1,1) PUTB(2,0)
		return 3;
	}
	if (u < VARINT_SHIFT_L4) {
		u -= VARINT_SHIFT_L3;
		PUTB4(4)
		p[0] |= 0xe0;
		return 4;
	}
	return varint_put_big(p, u);
}
#undef GETB
#undef GETB4

/**
 * Decode u64 value from a byte sequence p into res.
 * The invalid combination is not detected.
 * Returns pointer to the next position after the sequence.
 **/
static inline const byte *varint_get(const byte *p, u64 *res)
{
	byte h = ~*p;

	if (h & 0x80) {
		*res = p[0];
		return p+1;
	}
	if (h & 0x40) {
		*res = (p[0] & 0x3f)<<8 | p[1];
		*res += VARINT_SHIFT_L1;
		return p+2;
	}
	if (h & 0x20) {
		*res = (p[0] & 0x1f)<<16 | p[1]<<8 | p[2];
		*res += VARINT_SHIFT_L2;
		return p+3;
	}
	if (h & 0x10) {
		*res = (p[0] & 0xf)<<24 | p[1]<<16 | p[2]<<8 | p[3];
		*res += VARINT_SHIFT_L3;
		return p+4;
	}
	return varint_get_big(p, res);
}

/**
 * Decode at most u32 value from a byte sequence p into u32 res.
 * The invalid combination is not detected.
 * Returns pointer to the next position after the sequence.
 * If the stored number cannot fit into u32, fill res by 0xffffffff and returns p.
 **/
static inline const byte *varint_get32(const byte *p, u32 *res)
{
	byte h = ~*p;

	if (h & 0x80) {
		*res = p[0];
		return p+1;
	}
	if (h & 0x40) {
		*res = (p[0] & 0x3f)<<8 | p[1];
		*res += VARINT_SHIFT_L1;
		return p+2;
	}
	if (h & 0x20) {
		*res = (p[0] & 0x1f)<<16 | p[1]<<8 | p[2];
		*res += VARINT_SHIFT_L2;
		return p+3;
	}
	if (h & 0x10) {
		*res = (p[0] & 0xf)<<24 | p[1]<<16 | p[2]<<8 | p[3];
		*res += VARINT_SHIFT_L3;
		return p+4;
	}
	u64 r = (u64)(p[0] & 7)<<32 | (u64)p[1]<<24 | (u64)p[2]<<16 | (u64)p[3]<<8 | (u64)p[4];
	r += VARINT_SHIFT_L4;
	if (r > 0xffffffff) {
		*res = 0xffffffff;
		return p;
	}
	*res = r;
	return p+5;
}

/** Does the byte sequence p code the invalid sequence? **/
static inline int varint_invalid(const byte *p)
{
	return p[0] == 0xff && p[1] == 0xff;
}

/**
 * Store the invalid sequence.
 * Returns always 2 (2 bytes were used, to be consistent with varint_put).
 **/
static inline uint varint_put_invalid(byte *p)
{
	p[0] = p[1] = 0xff;
	return 2;
}

/** Compute the length of encoding in bytes from the first byte hdr of the encoding. **/
static inline uint varint_len(const byte hdr)
{
	byte b = ~hdr;

	uint l = 0;
	if (!b)
		l = -1;
	else {
		if (b & 0xf0) { l += 4; b &= 0xf0; }
		if (b & 0xcc) { l += 2; b &= 0xcc; }
		if (b & 0xaa) l++;
	}
	return 8 - l;
}

/** Compute the number of bytes needed to store the value u. **/
static inline uint varint_space(u64 u)
{
	if (u < VARINT_SHIFT_L1)
		return 1;
	if (u < VARINT_SHIFT_L2)
		return 2;
	if (u < VARINT_SHIFT_L3)
		return 3;
	if (u < VARINT_SHIFT_L4)
		return 4;
	if (u < VARINT_SHIFT_L5)
		return 5;
	if (u < VARINT_SHIFT_L6)
		return 6;
	if (u < VARINT_SHIFT_L7)
		return 7;
	if (u < VARINT_SHIFT_L8)
		return 8;
	return 9;
}

#endif