You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
219 lines
4.9 KiB
219 lines
4.9 KiB
2 months ago
|
/*
|
||
|
* UCW Library -- Coding of u64 into variable length bytecode.
|
||
|
*
|
||
|
* (c) 2013 Tomas Valla <tom@ucw.cz>
|
||
|
*
|
||
|
* This software may be freely distributed and used according to the terms
|
||
|
* of the GNU Lesser General Public License.
|
||
|
*/
|
||
|
|
||
|
#ifndef _UCW_VARINT_H
|
||
|
#define _UCW_VARINT_H
|
||
|
|
||
|
#ifdef CONFIG_UCW_CLEAN_ABI
|
||
|
#define varint_get_big ucw_varint_get_big
|
||
|
#define varint_put_big ucw_varint_put_big
|
||
|
#endif
|
||
|
|
||
|
/***
|
||
|
* The encoding works in the following way:
|
||
|
*
|
||
|
* First byte Stored values
|
||
|
*
|
||
|
* 0xxxxxxx 7 bits
|
||
|
* 10xxxxxx +1 byte 14 bits, value shifted by 2^7
|
||
|
* 110xxxxx +2 bytes 21 bits, value shifted by 2^7+2^14
|
||
|
* 1110xxxx +3 bytes 28 bits, value shifted by 2^7+2^14+2^21
|
||
|
* ....
|
||
|
* 11111110 +7 bytes 56 bits, value shifted by 2^7+2^14+2^21+2^28+2^35+2^42
|
||
|
* 11111111 +8 bytes full 64 bits, value shifted by 2^7+2^14+2^21+2^28+2^35+2^42+2^56
|
||
|
*
|
||
|
* The values are stored in bigendian to allow lexicographic sorting.
|
||
|
* The encoding and algorithms are aimed to be optimised for storing shorter numbers.
|
||
|
*
|
||
|
* There is an invalid combination: if the first two bytes are 0xff 0xff.
|
||
|
***/
|
||
|
|
||
|
|
||
|
#define VARINT_SHIFT_L1 0x80
|
||
|
#define VARINT_SHIFT_L2 0x4080
|
||
|
#define VARINT_SHIFT_L3 0x204080
|
||
|
#define VARINT_SHIFT_L4 0x10204080
|
||
|
#define VARINT_SHIFT_L5 0x0810204080
|
||
|
#define VARINT_SHIFT_L6 0x040810204080
|
||
|
#define VARINT_SHIFT_L7 0x02040810204080
|
||
|
#define VARINT_SHIFT_L8 0x0102040810204080
|
||
|
|
||
|
/*
|
||
|
* The following code is already unrolled, to maximize the speed.
|
||
|
* Yes, it is ugly.
|
||
|
*/
|
||
|
|
||
|
#define PUTB(j,i) p[j] = (byte)((u >> (8*(i))));
|
||
|
#define PUTB4(b) PUTB(0,b-1) PUTB(1,b-2) PUTB(2,b-3) PUTB(3,b-4)
|
||
|
|
||
|
/* for internal use only, need the length > 4 */
|
||
|
uint varint_put_big(byte *p, u64 u);
|
||
|
const byte *varint_get_big(const byte *p, u64 *r);
|
||
|
|
||
|
/**
|
||
|
* Encode u64 value u into byte sequence p.
|
||
|
* Returns the number of bytes used (at least 1 and at most 9).
|
||
|
**/
|
||
|
static inline uint varint_put(byte *p, u64 u)
|
||
|
{
|
||
|
if (u < VARINT_SHIFT_L1) {
|
||
|
p[0] = (byte)u;
|
||
|
return 1;
|
||
|
}
|
||
|
if (u < VARINT_SHIFT_L2) {
|
||
|
u -= VARINT_SHIFT_L1;
|
||
|
PUTB(0,1)
|
||
|
p[0] |= 0x80;
|
||
|
PUTB(1,0)
|
||
|
return 2;
|
||
|
}
|
||
|
if (u < VARINT_SHIFT_L3) {
|
||
|
u -= VARINT_SHIFT_L2;
|
||
|
PUTB(0,2)
|
||
|
p[0] |= 0xc0;
|
||
|
PUTB(1,1) PUTB(2,0)
|
||
|
return 3;
|
||
|
}
|
||
|
if (u < VARINT_SHIFT_L4) {
|
||
|
u -= VARINT_SHIFT_L3;
|
||
|
PUTB4(4)
|
||
|
p[0] |= 0xe0;
|
||
|
return 4;
|
||
|
}
|
||
|
return varint_put_big(p, u);
|
||
|
}
|
||
|
#undef GETB
|
||
|
#undef GETB4
|
||
|
|
||
|
/**
|
||
|
* Decode u64 value from a byte sequence p into res.
|
||
|
* The invalid combination is not detected.
|
||
|
* Returns pointer to the next position after the sequence.
|
||
|
**/
|
||
|
static inline const byte *varint_get(const byte *p, u64 *res)
|
||
|
{
|
||
|
byte h = ~*p;
|
||
|
|
||
|
if (h & 0x80) {
|
||
|
*res = p[0];
|
||
|
return p+1;
|
||
|
}
|
||
|
if (h & 0x40) {
|
||
|
*res = (p[0] & 0x3f)<<8 | p[1];
|
||
|
*res += VARINT_SHIFT_L1;
|
||
|
return p+2;
|
||
|
}
|
||
|
if (h & 0x20) {
|
||
|
*res = (p[0] & 0x1f)<<16 | p[1]<<8 | p[2];
|
||
|
*res += VARINT_SHIFT_L2;
|
||
|
return p+3;
|
||
|
}
|
||
|
if (h & 0x10) {
|
||
|
*res = (p[0] & 0xf)<<24 | p[1]<<16 | p[2]<<8 | p[3];
|
||
|
*res += VARINT_SHIFT_L3;
|
||
|
return p+4;
|
||
|
}
|
||
|
return varint_get_big(p, res);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Decode at most u32 value from a byte sequence p into u32 res.
|
||
|
* The invalid combination is not detected.
|
||
|
* Returns pointer to the next position after the sequence.
|
||
|
* If the stored number cannot fit into u32, fill res by 0xffffffff and returns p.
|
||
|
**/
|
||
|
static inline const byte *varint_get32(const byte *p, u32 *res)
|
||
|
{
|
||
|
byte h = ~*p;
|
||
|
|
||
|
if (h & 0x80) {
|
||
|
*res = p[0];
|
||
|
return p+1;
|
||
|
}
|
||
|
if (h & 0x40) {
|
||
|
*res = (p[0] & 0x3f)<<8 | p[1];
|
||
|
*res += VARINT_SHIFT_L1;
|
||
|
return p+2;
|
||
|
}
|
||
|
if (h & 0x20) {
|
||
|
*res = (p[0] & 0x1f)<<16 | p[1]<<8 | p[2];
|
||
|
*res += VARINT_SHIFT_L2;
|
||
|
return p+3;
|
||
|
}
|
||
|
if (h & 0x10) {
|
||
|
*res = (p[0] & 0xf)<<24 | p[1]<<16 | p[2]<<8 | p[3];
|
||
|
*res += VARINT_SHIFT_L3;
|
||
|
return p+4;
|
||
|
}
|
||
|
u64 r = (u64)(p[0] & 7)<<32 | (u64)p[1]<<24 | (u64)p[2]<<16 | (u64)p[3]<<8 | (u64)p[4];
|
||
|
r += VARINT_SHIFT_L4;
|
||
|
if (r > 0xffffffff) {
|
||
|
*res = 0xffffffff;
|
||
|
return p;
|
||
|
}
|
||
|
*res = r;
|
||
|
return p+5;
|
||
|
}
|
||
|
|
||
|
/** Does the byte sequence p code the invalid sequence? **/
|
||
|
static inline int varint_invalid(const byte *p)
|
||
|
{
|
||
|
return p[0] == 0xff && p[1] == 0xff;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Store the invalid sequence.
|
||
|
* Returns always 2 (2 bytes were used, to be consistent with varint_put).
|
||
|
**/
|
||
|
static inline uint varint_put_invalid(byte *p)
|
||
|
{
|
||
|
p[0] = p[1] = 0xff;
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
/** Compute the length of encoding in bytes from the first byte hdr of the encoding. **/
|
||
|
static inline uint varint_len(const byte hdr)
|
||
|
{
|
||
|
byte b = ~hdr;
|
||
|
|
||
|
uint l = 0;
|
||
|
if (!b)
|
||
|
l = -1;
|
||
|
else {
|
||
|
if (b & 0xf0) { l += 4; b &= 0xf0; }
|
||
|
if (b & 0xcc) { l += 2; b &= 0xcc; }
|
||
|
if (b & 0xaa) l++;
|
||
|
}
|
||
|
return 8 - l;
|
||
|
}
|
||
|
|
||
|
/** Compute the number of bytes needed to store the value u. **/
|
||
|
static inline uint varint_space(u64 u)
|
||
|
{
|
||
|
if (u < VARINT_SHIFT_L1)
|
||
|
return 1;
|
||
|
if (u < VARINT_SHIFT_L2)
|
||
|
return 2;
|
||
|
if (u < VARINT_SHIFT_L3)
|
||
|
return 3;
|
||
|
if (u < VARINT_SHIFT_L4)
|
||
|
return 4;
|
||
|
if (u < VARINT_SHIFT_L5)
|
||
|
return 5;
|
||
|
if (u < VARINT_SHIFT_L6)
|
||
|
return 6;
|
||
|
if (u < VARINT_SHIFT_L7)
|
||
|
return 7;
|
||
|
if (u < VARINT_SHIFT_L8)
|
||
|
return 8;
|
||
|
return 9;
|
||
|
}
|
||
|
|
||
|
#endif
|