use crate::Reader;
use thiserror::Error;
type HuffmanResult<T> = Result<T, HuffmanError>;
#[derive(Debug, Error)]
pub enum HuffmanError {
#[error(transparent)]
IoError(#[from] std::io::Error),
#[error("code len out of bounds")]
CodeLenOutOfBounds,
#[error("bad termination code")]
BadTerm,
#[error("expected HUFF magic header")]
InvalidHuffHeader,
#[error("expected CDIC magic header")]
InvalidCDICHeader,
#[error("the provided index to huffman dictionary was out of bounds")]
InvalidDictionaryIndex,
}
type HuffmanDictionary = Vec<Option<(Vec<u8>, bool)>>;
type CodeDictionary = [(u8, bool, u32); 256];
type MinCodesMapping = [u32; 33];
type MaxCodesMapping = [u32; 33];
#[derive(Debug)]
struct HuffmanDecoder {
dictionary: HuffmanDictionary,
code_dict: CodeDictionary,
min_codes: MinCodesMapping,
max_codes: MaxCodesMapping,
}
impl Default for HuffmanDecoder {
fn default() -> Self {
Self {
dictionary: vec![],
code_dict: [(0, false, 0); 256],
min_codes: [0; 33],
max_codes: [u32::MAX; 33],
}
}
}
impl HuffmanDecoder {
fn load_code_dictionary<R: std::io::Read>(
&mut self,
reader: &mut Reader<R>,
offset: usize,
) -> HuffmanResult<()> {
reader.set_position(offset)?;
for code in self.code_dict.iter_mut() {
let v = reader.read_u32_be()?;
let (code_len, term, mut max_code) = ((v & 0x1F) as u8, (v & 0x80) == 0x80, v >> 8);
if code_len == 0 {
return Err(HuffmanError::CodeLenOutOfBounds);
}
if code_len <= 8 && !term {
return Err(HuffmanError::BadTerm);
}
max_code = ((max_code + 1) << (32u8.saturating_sub(code_len))).saturating_sub(1);
*code = (code_len, term, max_code);
}
Ok(())
}
fn load_min_max_codes<R: std::io::Read>(
&mut self,
reader: &mut Reader<R>,
offset: usize,
) -> HuffmanResult<()> {
reader.set_position(offset)?;
for code_len in 1..=32 {
self.min_codes[code_len] = reader.read_u32_be()? << (32 - code_len);
self.max_codes[code_len] =
((reader.read_u32_be()? + 1) << (32 - code_len)).saturating_sub(1);
}
Ok(())
}
fn load_huff(&mut self, huff: &[u8]) -> HuffmanResult<()> {
let mut r = Reader::new(std::io::Cursor::new(huff));
if &r.read_u32_be()?.to_be_bytes() != b"HUFF" || r.read_u32_be()? != 0x18 {
return Err(HuffmanError::InvalidHuffHeader);
}
let cache_offset = r.read_u32_be()?;
let base_offset = r.read_u32_be()?;
self.load_code_dictionary(&mut r, cache_offset as usize)?;
self.load_min_max_codes(&mut r, base_offset as usize)?;
Ok(())
}
fn load_cdic_record(&mut self, cdic: &[u8]) -> HuffmanResult<()> {
let mut r = Reader::new(std::io::Cursor::new(cdic));
if &r.read_u32_be()?.to_be_bytes() != b"CDIC" || r.read_u32_be()? != 0x10 {
return Err(HuffmanError::InvalidCDICHeader);
}
let num_phrases = r.read_u32_be()?;
let bits = r.read_u32_be()?;
let n = (1 << bits).min(num_phrases - self.dictionary.len() as u32);
let mut offsets = Vec::with_capacity(n as usize);
for _ in 0..n {
offsets.push(r.read_u16_be()?);
}
for offset in offsets {
r.set_position(16 + offset as usize)?;
let num_bytes = r.read_u16_be()?;
let bytes = r.read_vec_header((num_bytes & 0x7FFF) as usize)?;
self.dictionary
.push(Some((bytes, (num_bytes & 0x8000) == 0x8000)));
}
Ok(())
}
fn load_cdic_records(&mut self, records: &[&[u8]]) -> HuffmanResult<()> {
for cdic in records {
self.load_cdic_record(cdic)?;
}
Ok(())
}
fn unpack(&mut self, data: &[u8]) -> HuffmanResult<Vec<u8>> {
let mut bits_left = data.len() * 8;
let mut r = Reader::new(std::io::Cursor::new(&data));
let mut x = r.read_u64_be()?;
let mut n = 32i8;
let mut unpacked = vec![];
loop {
if n <= 0 {
if bits_left < 32 {
for _ in 0..bits_left / 8 {
x = (x << 8) | u64::from(r.read_u8()?);
}
x <<= 32 - bits_left;
} else {
x = (x << 32) | u64::from(r.read_u32_be()?);
}
n += 32;
}
let code = (x >> n) as u32;
let (code_len, term, mut max_code) = self.code_dict[(code >> 24) as usize];
let mut code_len = code_len as usize;
if !term {
code_len += self.min_codes[code_len..]
.iter()
.position(|&min_code| code >= min_code)
.unwrap();
max_code = self.max_codes[code_len];
}
let index = ((max_code - code) >> (32 - code_len)) as usize;
println!(
"max_code: {}, code: {}, code_len: {}, index: {}, dict_len: {}",
max_code,
code,
code_len,
index,
self.dictionary.len()
);
let (mut slice, flag) = std::mem::take(
self.dictionary
.get_mut(index)
.ok_or(HuffmanError::InvalidDictionaryIndex)?,
)
.ok_or(HuffmanError::InvalidDictionaryIndex)?;
if !flag {
slice = self.unpack(&slice)?;
}
unpacked.extend_from_slice(&slice);
self.dictionary[index] = Some((slice, true));
n -= code_len as i8;
bits_left = match bits_left.checked_sub(code_len) {
None | Some(0) => break,
Some(i) => i,
};
}
println!("unpacked: {:?}", unpacked);
Ok(unpacked)
}
fn unpack_sections(&mut self, sections: &[&[u8]]) -> HuffmanResult<Vec<Vec<u8>>> {
let mut output = vec![];
for section in sections {
output.push(self.unpack(section)?);
}
Ok(output)
}
fn init(huffs: &[&[u8]]) -> HuffmanResult<Self> {
let mut decoder = Self::default();
decoder.load_huff(huffs[0])?;
decoder.load_cdic_records(&huffs[1..])?;
eprintln!("{:#?}", decoder);
Ok(decoder)
}
}
pub fn decompress(huffs: &[&[u8]], sections: &[&[u8]]) -> HuffmanResult<Vec<Vec<u8>>> {
let mut decoder = HuffmanDecoder::init(huffs)?;
decoder.unpack_sections(sections)
}