#include <algorithm>
#include <queue>
#include "Defines.h"
#include "Huffman.h"
#include "BitStuffer2.h"
using namespace std;
USING_NAMESPACE_LERC
bool Huffman::ComputeCodes(const vector<int>& histo)
{
if (histo.empty() || histo.size() >= m_maxHistoSize)
return false;
priority_queue<Node, vector<Node>, less<Node> > pq;
int numNodes = 0;
int size = (int)histo.size();
for (int i = 0; i < size; i++) if (histo[i] > 0)
pq.push(Node((short)i, histo[i]));
if (pq.size() < 2) return false;
while (pq.size() > 1) {
Node* child0 = new Node(pq.top());
numNodes++;
pq.pop();
Node* child1 = new Node(pq.top());
numNodes++;
pq.pop();
pq.push(Node(child0, child1));
}
m_codeTable.resize(size);
std::fill(m_codeTable.begin(), m_codeTable.end(),
std::pair<unsigned short, unsigned int>((short)0, 0));
if (!pq.top().TreeToLUT(0, 0, m_codeTable)) return false;
Node nodeNonConst = pq.top();
nodeNonConst.FreeTree(numNodes);
if (numNodes != 0) return false;
if (!ConvertCodesToCanonical())
return false;
return true;
}
bool Huffman::ComputeCompressedSize(const std::vector<int>& histo, int& numBytes, double& avgBpp) const
{
if (histo.empty() || histo.size() >= m_maxHistoSize)
return false;
numBytes = 0;
if (!ComputeNumBytesCodeTable(numBytes)) return false;
int numBits = 0, numElem = 0;
int size = (int)histo.size();
for (int i = 0; i < size; i++)
if (histo[i] > 0)
{
numBits += histo[i] * m_codeTable[i].first;
numElem += histo[i];
}
if (numElem == 0)
return false;
int numUInts = ((((numBits + 7) >> 3) + 3) >> 2) + 1; numBytes += 4 * numUInts; avgBpp = 8 * numBytes / (double)numElem;
return true;
}
bool Huffman::SetCodes(const vector<pair<unsigned short, unsigned int> >& codeTable)
{
if (codeTable.empty() || codeTable.size() >= m_maxHistoSize)
return false;
m_codeTable = codeTable;
return true;
}
bool Huffman::WriteCodeTable(Byte** ppByte, int lerc2Version) const
{
if (!ppByte)
return false;
int i0, i1, maxLen;
if (!GetRange(i0, i1, maxLen))
return false;
int size = (int)m_codeTable.size();
vector<unsigned int> dataVec(i1 - i0, 0);
for (int i = i0; i < i1; i++)
{
int k = GetIndexWrapAround(i, size);
dataVec[i - i0] = m_codeTable[k].first;
}
vector<int> intVec;
intVec.push_back(4); intVec.push_back(size);
intVec.push_back(i0); intVec.push_back(i1);
Byte* ptr = *ppByte;
size_t len = intVec.size() * sizeof(int);
memcpy(ptr, &intVec[0], len);
ptr += len;
BitStuffer2 bitStuffer2;
if (!bitStuffer2.EncodeSimple(&ptr, dataVec, lerc2Version)) return false;
if (!BitStuffCodes(&ptr, i0, i1)) return false;
*ppByte = ptr;
return true;
}
bool Huffman::ReadCodeTable(const Byte** ppByte, size_t& nBytesRemainingInOut, int lerc2Version)
{
if (!ppByte || !(*ppByte))
return false;
const Byte* ptr = *ppByte;
size_t nBytesRemaining = nBytesRemainingInOut;
vector<int> intVec(4, 0);
size_t len = intVec.size() * sizeof(int);
if (nBytesRemaining < len)
return false;
memcpy(&intVec[0], ptr, len);
ptr += len;
nBytesRemaining -= len;
int version = intVec[0];
if (version < 2) return false;
const int size = intVec[1];
const int i0 = intVec[2];
const int i1 = intVec[3];
if (i0 >= i1 || i0 < 0 || size < 0 || size > (int)m_maxHistoSize)
return false;
if (GetIndexWrapAround(i0, size) >= size || GetIndexWrapAround(i1 - 1, size) >= size)
return false;
try
{
vector<unsigned int> dataVec(i1 - i0, 0);
BitStuffer2 bitStuffer2;
if (!bitStuffer2.Decode(&ptr, nBytesRemaining, dataVec, dataVec.size(), lerc2Version)) return false;
if (dataVec.size() != static_cast<size_t>(i1 - i0))
return false;
m_codeTable.resize(size);
std::fill(m_codeTable.begin(), m_codeTable.end(),
std::pair<unsigned short, unsigned int>((short)0, 0));
for (int i = i0; i < i1; i++)
{
int k = GetIndexWrapAround(i, size);
m_codeTable[k].first = (unsigned short)dataVec[i - i0];
}
if (!BitUnStuffCodes(&ptr, nBytesRemaining, i0, i1)) return false;
*ppByte = ptr;
nBytesRemainingInOut = nBytesRemaining;
return true;
}
catch (std::exception&)
{
return false;
}
}
bool Huffman::BuildTreeFromCodes(int& numBitsLUT)
{
int i0 = 0, i1 = 0, maxLen = 0;
if (!GetRange(i0, i1, maxLen))
return false;
int size = (int)m_codeTable.size();
int minNumZeroBits = 32;
bool bNeedTree = maxLen > m_maxNumBitsLUT;
numBitsLUT = min(maxLen, m_maxNumBitsLUT);
int sizeLUT = 1 << numBitsLUT;
m_decodeLUT.clear();
m_decodeLUT.assign((size_t)sizeLUT, pair<short, short>((short)-1, (short)-1));
for (int i = i0; i < i1; i++)
{
int k = GetIndexWrapAround(i, size);
int len = m_codeTable[k].first;
if (len == 0)
continue;
unsigned int code = m_codeTable[k].second;
if (len <= numBitsLUT)
{
code <<= (numBitsLUT - len);
unsigned int numEntries = 1 << (numBitsLUT - len);
pair<short, short> entry((short)len, (short)k);
for (unsigned int j = 0; j < numEntries; j++)
m_decodeLUT[code | j] = entry; }
else {
int shift = 1;
while (code >>= 1) shift++; minNumZeroBits = min(minNumZeroBits, len - shift);
}
}
m_numBitsToSkipInTree = bNeedTree? minNumZeroBits : 0;
if (!bNeedTree) return true;
ClearTree();
Node emptyNode((short)-1, 0);
m_root = new Node(emptyNode);
for (int i = i0; i < i1; i++)
{
int k = GetIndexWrapAround(i, size);
int len = m_codeTable[k].first;
if (len > 0 && len > numBitsLUT) {
unsigned int code = m_codeTable[k].second;
Node* node = m_root;
int j = len - m_numBitsToSkipInTree;
while (--j >= 0) {
if (code & (1 << j))
{
if (!node->child1)
node->child1 = new Node(emptyNode);
node = node->child1;
}
else
{
if (!node->child0)
node->child0 = new Node(emptyNode);
node = node->child0;
}
if (j == 0) node->value = (short)k; }
}
}
return true;
}
void Huffman::Clear()
{
m_codeTable.clear();
m_decodeLUT.clear();
ClearTree();
}
void Huffman::ClearTree()
{
if (m_root)
{
int n = 0;
m_root->FreeTree(n);
delete m_root;
m_root = nullptr;
}
}
bool Huffman::ComputeNumBytesCodeTable(int& numBytes) const
{
int i0, i1, maxLen;
if (!GetRange(i0, i1, maxLen))
return false;
int size = (int)m_codeTable.size();
int sum = 0;
for (int i = i0; i < i1; i++)
{
int k = GetIndexWrapAround(i, size);
sum += m_codeTable[k].first;
}
numBytes = 4 * sizeof(int);
BitStuffer2 bitStuffer2;
numBytes += bitStuffer2.ComputeNumBytesNeededSimple((unsigned int)(i1 - i0), (unsigned int)maxLen); int numUInts = (((sum + 7) >> 3) + 3) >> 2;
numBytes += 4 * numUInts;
return true;
}
bool Huffman::GetRange(int& i0, int& i1, int& maxCodeLength) const
{
if (m_codeTable.empty() || m_codeTable.size() >= m_maxHistoSize)
return false;
int size = (int)m_codeTable.size();
{
int i = 0;
while (i < size && m_codeTable[i].first == 0) i++;
i0 = i;
i = size - 1;
while (i >= 0 && m_codeTable[i].first == 0) i--;
i1 = i + 1; }
if (i1 <= i0)
return false;
pair<int, int> segm(0, 0);
int j = 0;
while (j < size) {
while (j < size && m_codeTable[j].first > 0) j++;
int k0 = j;
while (j < size && m_codeTable[j].first == 0) j++;
int k1 = j;
if (k1 - k0 > segm.second)
segm = pair<int, int>(k0, k1 - k0);
}
if (size - segm.second < i1 - i0)
{
i0 = segm.first + segm.second;
i1 = segm.first + size; }
if (i1 <= i0)
return false;
int maxLen = 0;
for (int i = i0; i < i1; i++)
{
int k = GetIndexWrapAround(i, size);
int len = m_codeTable[k].first;
maxLen = max(maxLen, len);
}
if (maxLen <= 0 || maxLen > 32)
return false;
maxCodeLength = maxLen;
return true;
}
bool Huffman::BitStuffCodes(Byte** ppByte, int i0, int i1) const
{
if (!ppByte)
return false;
int size = (int)m_codeTable.size();
int bitPos = 0;
for (int i = i0; i < i1; i++)
{
int k = GetIndexWrapAround(i, size);
int len = m_codeTable[k].first;
if (len > 0)
{
unsigned int val = m_codeTable[k].second;
if (!Huffman::PushValue(ppByte, bitPos, val, len))
return false;
}
}
size_t numUInts = (bitPos > 0 ? 1 : 0);
*ppByte += numUInts * sizeof(unsigned int);
return true;
}
bool Huffman::BitUnStuffCodes(const Byte** ppByte, size_t& nBytesRemainingInOut, int i0, int i1)
{
if (!ppByte || !(*ppByte))
return false;
size_t nBytesRemaining = nBytesRemainingInOut;
const Byte* ptr0 = *ppByte;
const Byte* ptr = ptr0;
const size_t s4 = sizeof(unsigned int);
int size = (int)m_codeTable.size();
int bitPos = 0;
for (int i = i0; i < i1; i++)
{
int k = GetIndexWrapAround(i, size);
int len = m_codeTable[k].first;
if (len > 0)
{
if (nBytesRemaining < s4 || len > 32)
return false;
unsigned int temp(0);
memcpy(&temp, ptr, s4);
m_codeTable[k].second = (temp << bitPos) >> (32 - len);
if (32 - bitPos >= len)
{
bitPos += len;
if (bitPos == 32)
{
bitPos = 0;
ptr += s4;
nBytesRemaining -= s4;
}
}
else
{
bitPos += len - 32;
ptr += s4;
nBytesRemaining -= s4;
if (nBytesRemaining < s4)
return false;
memcpy(&temp, ptr, s4);
m_codeTable[k].second |= temp >> (32 - bitPos); }
}
}
size_t len = (ptr - ptr0) + (bitPos > 0 ? s4 : 0);
if (nBytesRemainingInOut < len)
return false;
*ppByte += len;
nBytesRemainingInOut -= len;
if (nBytesRemaining != nBytesRemainingInOut
&& nBytesRemaining != nBytesRemainingInOut + s4) return false;
return true;
}
bool Huffman::ConvertCodesToCanonical()
{
unsigned int tableSize = (unsigned int)m_codeTable.size();
vector<pair<int, unsigned int>> sortVec(tableSize, pair<int, unsigned int>(0, 0));
for (unsigned int i = 0; i < tableSize; i++)
if (m_codeTable[i].first > 0)
sortVec[i] = pair<int, unsigned int>(m_codeTable[i].first * tableSize - i, i);
std::sort(sortVec.begin(), sortVec.end(),
[](const pair<int, unsigned int>& p0,
const pair<int, unsigned int>& p1) { return p0.first > p1.first; });
unsigned int index = sortVec[0].second;
unsigned short codeLen = m_codeTable[index].first; unsigned int i = 0, codeCanonical = 0;
while (i < tableSize && sortVec[i].first > 0)
{
index = sortVec[i++].second;
short delta = codeLen - m_codeTable[index].first; codeCanonical >>= delta;
codeLen -= delta;
m_codeTable[index].second = codeCanonical++;
}
return true;
}