You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
478 lines
12 KiB
478 lines
12 KiB
/*
|
|
* LiZaRd -- Fast compression method based on Lempel-Ziv 77
|
|
*
|
|
* (c) 2004, Robert Spalek <robert@ucw.cz>
|
|
*
|
|
* This software may be freely distributed and used according to the terms
|
|
* of the GNU Lesser General Public License.
|
|
*
|
|
* The file format is based on LZO1X and
|
|
* the compression method is based on zlib.
|
|
*/
|
|
|
|
#include <ucw/lib.h>
|
|
#include <ucw/lizard.h>
|
|
|
|
#include <string.h>
|
|
|
|
typedef u16 hash_ptr_t;
|
|
struct hash_record {
|
|
/* the position in the original text is implicit; it is computed by locate_string() */
|
|
hash_ptr_t next; // 0=end
|
|
hash_ptr_t prev; // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
|
|
};
|
|
|
|
#define HASH_SIZE (1<<14) // size of hash-table
|
|
#define HASH_RECORDS (1<<15) // maximum number of records in hash-table, 0 is unused ==> subtract 1
|
|
#define CHAIN_MAX_TESTS 8 // crop longer collision chains
|
|
#define CHAIN_GOOD_MATCH 32 // we already have a good match => end
|
|
|
|
static inline uint
|
|
hashf(const byte *string)
|
|
/* 0..HASH_SIZE-1 */
|
|
{
|
|
return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
|
|
}
|
|
|
|
static inline byte *
|
|
locate_string(const byte *string, int record_id, int head)
|
|
/* The strings are recorded into the hash-table regularly, hence there is no
|
|
* need to store the pointer there. */
|
|
{
|
|
string += record_id - head;
|
|
if (record_id >= head)
|
|
string -= HASH_RECORDS-1;
|
|
return (byte *)string;
|
|
}
|
|
|
|
static inline uint
|
|
find_match(uint record_id, struct hash_record *hash_rec, const byte *string, const byte *string_end, byte **best_ptr, uint head)
|
|
/* hash_tab[hash] == record_id points to the head of the double-linked
|
|
* link-list of strings with the same hash. The records are statically
|
|
* stored in circular array hash_rec (with the 1st entry unused), and the
|
|
* pointers are just 16-bit indices. The strings in every collision chain
|
|
* are ordered by age. */
|
|
{
|
|
uint count = CHAIN_MAX_TESTS;
|
|
uint best_len = 0;
|
|
while (record_id && count-- > 0)
|
|
{
|
|
byte *record_string = locate_string(string, record_id, head);
|
|
byte *cmp = record_string;
|
|
if (cmp[0] == string[0] && cmp[2] == string[2])
|
|
/* implies cmp[1] == string[1] */
|
|
{
|
|
if (cmp[3] == string[3])
|
|
{
|
|
cmp += 4;
|
|
if (*cmp++ == string[4] && *cmp++ == string[5]
|
|
&& *cmp++ == string[6] && *cmp++ == string[7])
|
|
{
|
|
const byte *str = string + 8;
|
|
while (str <= string_end && *cmp++ == *str++);
|
|
}
|
|
}
|
|
else
|
|
cmp += 4;
|
|
uint len = cmp - record_string - 1; /* cmp points 2 characters after the last match */
|
|
if (len > best_len)
|
|
{
|
|
best_len = len;
|
|
*best_ptr = record_string;
|
|
if (best_len >= CHAIN_GOOD_MATCH) /* optimization */
|
|
break;
|
|
}
|
|
}
|
|
record_id = hash_rec[record_id].next;
|
|
}
|
|
return best_len;
|
|
}
|
|
|
|
static uint
|
|
hash_string(hash_ptr_t *hash_tab, uint hash, struct hash_record *hash_rec, /*byte *string,*/ uint head, uint *to_delete)
|
|
/* We reuse hash-records stored in a circular array. First, delete the old
|
|
* one and then add the new one in front of the link-list. */
|
|
{
|
|
struct hash_record *rec = hash_rec + head;
|
|
if (*to_delete) /* unlink the original record */
|
|
{
|
|
uint prev_id = rec->prev & ((1<<15)-1);
|
|
if (rec->prev & (1<<15)) /* was a head */
|
|
hash_tab[prev_id] = 0;
|
|
else /* thanks to the ordering, this was a tail */
|
|
hash_rec[prev_id].next = 0;
|
|
}
|
|
rec->next = hash_tab[hash];
|
|
rec->prev = (1<<15) | hash;
|
|
hash_rec[rec->next].prev = head;
|
|
hash_tab[hash] = head; /* add the new record before the link-list */
|
|
|
|
if (++head >= HASH_RECORDS) /* circular buffer, reuse old records, 0 is unused */
|
|
{
|
|
head = 1;
|
|
*to_delete = 1;
|
|
}
|
|
return head;
|
|
}
|
|
|
|
static inline byte *
|
|
dump_unary_value(byte *out, uint l)
|
|
{
|
|
while (l > 255)
|
|
{
|
|
l -= 255;
|
|
*out++ = 0;
|
|
}
|
|
*out++ = l;
|
|
return out;
|
|
}
|
|
|
|
static byte *
|
|
flush_copy_command(uint bof, byte *out, const byte *start, uint len)
|
|
{
|
|
if (bof && len <= 238)
|
|
*out++ = len + 17;
|
|
else if (len < 4)
|
|
{
|
|
/* cannot happen when !!bof */
|
|
out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */
|
|
#ifdef CPU_ALLOW_UNALIGNED
|
|
* (u32*) out = * (u32*) start;
|
|
return out + len;
|
|
#else
|
|
while (len-- > 0)
|
|
*out++ = *start++;
|
|
return out;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
/* leave 2 least significant bits of out[-2] set to 0 */
|
|
if (len <= 18)
|
|
*out++ = len - 3;
|
|
else
|
|
{
|
|
*out++ = 0;
|
|
out = dump_unary_value(out, len - 18);
|
|
}
|
|
}
|
|
memcpy(out, start, len);
|
|
return out + len;
|
|
}
|
|
|
|
int
|
|
lizard_compress(const byte *in, uint in_len, byte *out)
|
|
/* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
|
|
* LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters
|
|
* allocated after in. Returns the actual compressed length. */
|
|
{
|
|
hash_ptr_t hash_tab[HASH_SIZE];
|
|
struct hash_record hash_rec[HASH_RECORDS];
|
|
const byte *in_end = in + in_len;
|
|
byte *out_start = out;
|
|
const byte *copy_start = in;
|
|
uint head = 1; /* 0 in unused */
|
|
uint to_delete = 0, bof = 1;
|
|
bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */
|
|
while (in < in_end)
|
|
{
|
|
uint hash = hashf(in);
|
|
byte *best = NULL;
|
|
uint len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
|
|
if (len < 3)
|
|
#if 0 // TODO: now, our routine does not detect matches of length 2
|
|
if (len == 2 && (in - best->string - 1) < (1<<10))
|
|
{ /* pass-thru */ }
|
|
else
|
|
#endif
|
|
{
|
|
literal:
|
|
head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
|
|
in++; /* add a literal */
|
|
continue;
|
|
}
|
|
|
|
if (in + len > in_end) /* crop EOF */
|
|
{
|
|
len = in_end - in;
|
|
if (len < 3)
|
|
goto literal;
|
|
}
|
|
/* Record the match. */
|
|
uint copy_len = in - copy_start;
|
|
uint is_in_copy_mode = bof || copy_len >= 4;
|
|
uint shift = in - best - 1;
|
|
/* Try to use a 2-byte sequence. */
|
|
#if 0
|
|
if (len == 2)
|
|
{
|
|
if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
|
|
goto literal;
|
|
else
|
|
goto dump_2sequence;
|
|
} else
|
|
#endif
|
|
/* now, len >= 3 */
|
|
if (shift < (1<<11) && len <= 8)
|
|
{
|
|
shift |= (len-3 + 2)<<11;
|
|
dump_2sequence:
|
|
if (copy_len)
|
|
out = flush_copy_command(bof, out, copy_start, copy_len);
|
|
*out++ = (shift>>6) & ~3; /* shift fits into 10 bits */
|
|
*out++ = shift & 0xff;
|
|
}
|
|
else if (len == 3 && is_in_copy_mode)
|
|
{
|
|
if (shift < (1<<11) + (1<<10)) /* optimisation for length-3 matches after a copy command */
|
|
{
|
|
shift -= 1<<11;
|
|
goto dump_2sequence; /* shift has 11 bits and contains also len */
|
|
}
|
|
else /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
|
|
goto literal;
|
|
}
|
|
/* We have to use a 3-byte sequence. */
|
|
else
|
|
{
|
|
if (copy_len)
|
|
out = flush_copy_command(bof, out, copy_start, copy_len);
|
|
if (shift < (1<<14))
|
|
{
|
|
if (len <= 33)
|
|
*out++ = (1<<5) | (len-2);
|
|
else
|
|
{
|
|
*out++ = 1<<5;
|
|
out = dump_unary_value(out, len - 33);
|
|
}
|
|
}
|
|
else /* shift < (1<<15)-1 becase of HASH_RECORDS */
|
|
{
|
|
shift++; /* because shift==0 is reserved for EOF */
|
|
byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4);
|
|
if (len <= 9)
|
|
*out++ = pos_bit | (len-2);
|
|
else
|
|
{
|
|
*out++ = pos_bit;
|
|
out = dump_unary_value(out, len - 9);
|
|
}
|
|
}
|
|
*out++ = (shift>>6) & ~3; /* rest of shift fits into 14 bits */
|
|
*out++ = shift & 0xff;
|
|
}
|
|
/* Update the hash-table. */
|
|
head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
|
|
for (uint i=1; i<len; i++)
|
|
head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
|
|
in += len;
|
|
copy_start = in;
|
|
bof = 0;
|
|
}
|
|
uint copy_len = in - copy_start;
|
|
if (copy_len)
|
|
out = flush_copy_command(bof, out, copy_start, copy_len);
|
|
*out++ = 17; /* add EOF */
|
|
*out++ = 0;
|
|
*out++ = 0;
|
|
return out - out_start;
|
|
}
|
|
|
|
static inline byte *
|
|
read_unary_value(const byte *in, uint *val)
|
|
{
|
|
uint l = 0;
|
|
while (!*in++)
|
|
l += 255;
|
|
l += in[-1];
|
|
*val = l;
|
|
return (byte *)in;
|
|
}
|
|
|
|
int
|
|
lizard_decompress(const byte *in, byte *out)
|
|
/* Requires out being allocated for the decompressed length must be known
|
|
* beforehand. It is desirable to lock the following memory page for
|
|
* read-only access to prevent buffer overflow. Returns the actual
|
|
* decompressed length or a negative number when an error has occurred. */
|
|
{
|
|
byte *out_start = out;
|
|
uint expect_copy_command = 1;
|
|
uint len;
|
|
if (*in > 17) /* short copy command at BOF */
|
|
{
|
|
len = *in++ - 17;
|
|
goto perform_copy_command;
|
|
}
|
|
while (1)
|
|
{
|
|
uint c = *in++;
|
|
uint pos;
|
|
if (c < 0x10)
|
|
if (expect_copy_command == 1)
|
|
{
|
|
if (!c)
|
|
{
|
|
in = read_unary_value(in, &len);
|
|
len += 18;
|
|
}
|
|
else
|
|
len = c + 3;
|
|
goto perform_copy_command;
|
|
}
|
|
else
|
|
{
|
|
pos = ((c&0xc)<<6) | *in++;
|
|
if (expect_copy_command == 2)
|
|
{
|
|
pos += 1<<11;
|
|
len = 3;
|
|
}
|
|
else
|
|
len = 2;
|
|
pos++;
|
|
}
|
|
else if (c < 0x20)
|
|
{
|
|
pos = (c&0x8)<<11;
|
|
len = c&0x7;
|
|
if (!len)
|
|
{
|
|
in = read_unary_value(in, &len);
|
|
len += 9;
|
|
}
|
|
else
|
|
len += 2;
|
|
pos |= (*in++ & 0xfc)<<6;
|
|
pos |= *in++;
|
|
if (!pos) /* EOF */
|
|
break;
|
|
/* do NOT pos++ */
|
|
}
|
|
else if (c < 0x40)
|
|
{
|
|
len = c&0x1f;
|
|
if (!len)
|
|
{
|
|
in = read_unary_value(in, &len);
|
|
len += 33;
|
|
}
|
|
else
|
|
len += 2;
|
|
pos = (*in++ & 0xfc)<<6;
|
|
pos |= *in++;
|
|
pos++;
|
|
}
|
|
else /* high bits encode the length */
|
|
{
|
|
len = ((c&0xe0)>>5) -2 +3;
|
|
pos = (c&0x1c)<<6;
|
|
pos |= *in++;
|
|
pos++;
|
|
}
|
|
/* take from the sliding window */
|
|
if (len <= pos)
|
|
{
|
|
memcpy(out, out-pos, len);
|
|
out += len;
|
|
}
|
|
else
|
|
{ /* overlapping */
|
|
for (; len-- > 0; out++)
|
|
*out = *(out-pos);
|
|
/* It's tempting to use out[-pos] above, but unfortunately it's not the same */
|
|
}
|
|
/* extract the copy-bits */
|
|
len = in[-2] & 0x3;
|
|
if (len)
|
|
{
|
|
expect_copy_command = 0;
|
|
#ifdef CPU_ALLOW_UNALIGNED
|
|
* (u32*) out = * (u32*) in;
|
|
out += len;
|
|
in += len;
|
|
#else
|
|
while (len-- > 0)
|
|
*out++ = *in++;
|
|
#endif
|
|
}
|
|
else
|
|
expect_copy_command = 1;
|
|
continue;
|
|
|
|
perform_copy_command:
|
|
expect_copy_command = 2;
|
|
memcpy(out, in, len);
|
|
out += len;
|
|
in += len;
|
|
}
|
|
|
|
return out - out_start;
|
|
}
|
|
|
|
/*
|
|
|
|
Description of the LZO1X format :
|
|
=================================
|
|
|
|
The meaning of the commands depends on the current mode. It can be either
|
|
the compressed mode or the copy mode. In some cases, the compressed mode
|
|
also distinguishes whether we just left the copy mode or not.
|
|
|
|
Beginning of file:
|
|
------------------
|
|
|
|
Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file),
|
|
so switch to the compressed mode. If it is bigger, subtract 17 and copy this number of
|
|
the following characters to the output and switch to the compressed mode.
|
|
If it is smaller, interpret it as a regular copy mode command.
|
|
|
|
Compressed mode:
|
|
----------------
|
|
|
|
Read the first byte of the sequence and determine the type of bit encoding by
|
|
looking at the most significant bits. The sequence is always at least 2 bytes
|
|
long. Decode sequences of these types until the EOF or END marker is read.
|
|
|
|
length L = length of the text taken from the sliding window
|
|
|
|
If L=0, then count the number Z of the following zero bytes and add Z*255
|
|
to the value of the following non-zero byte. This allows setting L
|
|
arbitrarily high.
|
|
|
|
position p = relative position of the beginning of the text
|
|
|
|
Exception: 00010001 00000000 00000000 means EOF
|
|
|
|
copying C = length 1..3 of copied characters or END=0
|
|
|
|
C following characters will be copied from the compressed text to the
|
|
output. The number CC is always stored in the 2 least significant bits of
|
|
the second last byte of the sequence.
|
|
|
|
If END is read, the algorithm switches to the copy mode.
|
|
|
|
pattern length position
|
|
|
|
0000ppCC pppppppp 2 10 bits [default interpretation]
|
|
0000ppCC pppppppp 3 10 bits + 2048 [just after return from copy mode]
|
|
0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits [pos 0 interpreted as EOF]
|
|
001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
|
|
LLLpppCC pppppppp 3..8 11 bits [LLL >= 010]
|
|
|
|
Copy mode:
|
|
----------
|
|
|
|
Read the first byte and, if the most significant bits are 0000, perform the
|
|
following command, otherwise switch to the compressed mode (and evaluate the
|
|
command there).
|
|
|
|
pattern length position
|
|
|
|
0000LLLL L* 4..18 + extend N/A
|
|
|
|
Copy L characters from the compressed text to the output. The overhead for
|
|
incompressible strings is only roughly 1/256 + epsilon.
|
|
|
|
*/
|
|
|