#include "oozle/include/huffman.h"
#include "oozle/include/decompress.h"
int32_t
Huff_ReadCodeLengthsOld (BitReader *bits, uint8_t *syms,
uint32_t *code_prefix)
{
if (BitReader_ReadBitNoRefill (bits))
{
int32_t n, sym = 0, codelen, num_symbols = 0;
int32_t avg_bits_x4 = 32;
int32_t forced_bits = BitReader_ReadBitsNoRefill (bits, 2);
uint32_t thres_for_valid_gamma_bits = 1 << (31 - (20u >> forced_bits));
if (BitReader_ReadBit (bits))
goto SKIP_INITIAL_ZEROS;
do
{
if (!(bits->bits & 0xff000000))
return -1;
sym += BitReader_ReadBitsNoRefill (
bits, 2 * (CountLeadingZeros (bits->bits) + 1))
- 2 + 1;
if (sym >= 256)
break;
SKIP_INITIAL_ZEROS:
BitReader_Refill (bits);
if (!(bits->bits & 0xff000000))
return -1;
n = BitReader_ReadBitsNoRefill (
bits, 2 * (CountLeadingZeros (bits->bits) + 1))
- 2 + 1;
if (sym + n > 256)
return -1;
BitReader_Refill (bits);
num_symbols += n;
do
{
if (bits->bits < thres_for_valid_gamma_bits)
return -1;
int32_t lz = CountLeadingZeros (bits->bits);
int32_t v
= BitReader_ReadBitsNoRefill (bits, lz + forced_bits + 1)
+ ((lz - 1) << forced_bits);
codelen
= (-(int32_t)(v & 1) ^ (v >> 1)) + ((avg_bits_x4 + 2) >> 2);
if (codelen < 1 || codelen > 11)
return -1;
avg_bits_x4 = codelen + ((3 * avg_bits_x4 + 2) >> 2);
BitReader_Refill (bits);
syms[code_prefix[codelen]++] = sym++;
}
while (--n);
}
while (sym != 256);
return (sym == 256) && (num_symbols >= 2) ? num_symbols : -1;
}
else
{
int32_t num_symbols = BitReader_ReadBitsNoRefill (bits, 8);
if (num_symbols == 0)
return -1;
if (num_symbols == 1)
{
syms[0] = BitReader_ReadBitsNoRefill (bits, 8);
}
else
{
int32_t codelen_bits = BitReader_ReadBitsNoRefill (bits, 3);
if (codelen_bits > 4)
return -1;
for (int32_t i = 0; i < num_symbols; i++)
{
BitReader_Refill (bits);
int32_t sym = BitReader_ReadBitsNoRefill (bits, 8);
int32_t codelen
= BitReader_ReadBitsNoRefillZero (bits, codelen_bits) + 1;
if (codelen > 11)
return -1;
syms[code_prefix[codelen]++] = sym;
}
}
return num_symbols;
}
}
int32_t
Huff_ConvertToRanges (HuffRange *range, int32_t num_symbols, int32_t P,
const uint8_t *symlen, BitReader *bits)
{
int32_t num_ranges = P >> 1, v, sym_idx = 0;
if (P & 1)
{
BitReader_Refill (bits);
v = *symlen++;
if (v >= 8)
return -1;
sym_idx = BitReader_ReadBitsNoRefill (bits, v + 1) + (1 << (v + 1)) - 1;
}
int32_t syms_used = 0;
for (int32_t i = 0; i < num_ranges; i++)
{
BitReader_Refill (bits);
v = symlen[0];
if (v >= 9)
return -1;
int32_t num = BitReader_ReadBitsNoRefillZero (bits, v) + (1 << v);
v = symlen[1];
if (v >= 8)
return -1;
int32_t space
= BitReader_ReadBitsNoRefill (bits, v + 1) + (1 << (v + 1)) - 1;
range[i].symbol = sym_idx;
range[i].num = num;
syms_used += num;
sym_idx += num + space;
symlen += 2;
}
if (sym_idx >= 256 || syms_used >= num_symbols
|| sym_idx + num_symbols - syms_used > 256)
return -1;
range[num_ranges].symbol = sym_idx;
range[num_ranges].num = num_symbols - syms_used;
return num_ranges + 1;
}
int32_t
Huff_ReadCodeLengthsNew (BitReader *bits, uint8_t *syms,
uint32_t *code_prefix)
{
int32_t forced_bits = BitReader_ReadBitsNoRefill (bits, 2);
int32_t num_symbols = BitReader_ReadBitsNoRefill (bits, 8) + 1;
int32_t fluff = BitReader_ReadFluff (bits, num_symbols);
uint8_t code_len[512];
BitReader2 br2;
br2.bitpos = (bits->bitpos - 24) & 7;
br2.p_end = bits->p_end;
br2.p = bits->p - (unsigned)((24 - bits->bitpos + 7) >> 3);
if (!DecodeGolombRiceLengths (code_len, num_symbols + fluff, &br2))
return -1;
memset (code_len + (num_symbols + fluff), 0, 16);
if (!DecodeGolombRiceBits (code_len, num_symbols, forced_bits, &br2))
return -1;
bits->bitpos = 24;
bits->p = br2.p;
bits->bits = 0;
BitReader_Refill (bits);
bits->bits <<= br2.bitpos;
bits->bitpos += br2.bitpos;
if (1)
{
uint32_t running_sum = 0x1e;
int32_t maxlen = 11;
for (int32_t i = 0; i < num_symbols; i++)
{
int32_t v = code_len[i];
v = -(int32_t)(v & 1) ^ (v >> 1);
code_len[i] = v + (running_sum >> 2) + 1;
if (code_len[i] < 1 || code_len[i] > 11)
return -1;
running_sum += v;
}
}
else
{
__m128i bak = _mm_loadu_si128 ((__m128i *)&code_len[num_symbols]);
_mm_storeu_si128 ((__m128i *)&code_len[num_symbols], _mm_set1_epi32 (0));
__m128i avg = _mm_set1_epi8 (0x1e);
__m128i ones = _mm_set1_epi8 (1);
__m128i max_codeword_len = _mm_set1_epi8 (10);
for (uint32_t i = 0; i < num_symbols; i += 16)
{
__m128i v = _mm_loadu_si128 ((__m128i *)&code_len[i]), t;
avg = _mm_unpackhi_epi8 (avg, avg);
avg = _mm_unpackhi_epi8 (avg, avg);
avg = _mm_shuffle_epi32 (avg, 255);
v = _mm_xor_si128 (
_mm_sub_epi8 (_mm_set1_epi8 (0), _mm_and_si128 (v, ones)),
_mm_and_si128 (_mm_srli_epi16 (v, 1), _mm_set1_epi8 (0x7f)));
t = _mm_add_epi8 (_mm_slli_si128 (v, 1), v);
t = _mm_add_epi8 (_mm_slli_si128 (t, 2), t);
t = _mm_add_epi8 (_mm_slli_si128 (t, 4), t);
t = _mm_add_epi8 (_mm_slli_si128 (t, 8), t);
__m128i u = _mm_and_si128 (
_mm_srli_epi16 (_mm_add_epi8 (_mm_slli_si128 (t, 1), avg), 2u),
_mm_set1_epi8 (0x3f));
v = _mm_add_epi8 (v, u);
avg = _mm_add_epi8 (avg, t);
max_codeword_len = _mm_max_epu8 (max_codeword_len, v);
_mm_storeu_si128 ((__m128i *)&code_len[i],
_mm_add_epi8 (v, _mm_set1_epi8 (1)));
}
_mm_storeu_si128 ((__m128i *)&code_len[num_symbols], bak);
if (_mm_movemask_epi8 (
_mm_cmpeq_epi8 (max_codeword_len, _mm_set1_epi8 (10)))
!= 0xffff)
return -1; }
HuffRange range[128];
int32_t ranges = Huff_ConvertToRanges (range, num_symbols, fluff,
&code_len[num_symbols], bits);
if (ranges <= 0)
return -1;
uint8_t *cp = code_len;
for (int32_t i = 0; i < ranges; i++)
{
int32_t sym = range[i].symbol;
int32_t n = range[i].num;
do
{
syms[code_prefix[*cp++]++] = sym++;
}
while (--n);
}
return num_symbols;
}
bool
Huff_MakeLut (const uint32_t *prefix_org, const uint32_t *prefix_cur,
NewHuffLut *hufflut, uint8_t *syms)
{
uint32_t currslot = 0;
for (uint32_t i = 1; i < 11; i++)
{
uint32_t start = prefix_org[i];
uint32_t count = prefix_cur[i] - start;
if (count)
{
uint32_t stepsize = 1 << (11 - i);
uint32_t num_to_set = count << (11 - i);
if (currslot + num_to_set > 2048)
return false;
FillByteOverflow16 (&hufflut->bits2len[currslot], i, num_to_set);
uint8_t *p = &hufflut->bits2sym[currslot];
for (uint32_t j = 0; j != count; j++, p += stepsize)
FillByteOverflow16 (p, syms[start + j], stepsize);
currslot += num_to_set;
}
}
if (prefix_cur[11] - prefix_org[11] != 0)
{
uint32_t num_to_set = prefix_cur[11] - prefix_org[11];
if (currslot + num_to_set > 2048)
return false;
FillByteOverflow16 (&hufflut->bits2len[currslot], 11, num_to_set);
memcpy (&hufflut->bits2sym[currslot], &syms[prefix_org[11]], num_to_set);
currslot += num_to_set;
}
return currslot == 2048;
}