use std::io;
#[derive(Clone)]
enum DictEntry {
Leaf(Vec<u8>),
Node(Vec<u8>),
Unpacked(Vec<u8>),
}
pub struct HuffCdicReader {
dict1: Vec<(u8, bool, u32)>,
mincode: Vec<u32>,
maxcode: Vec<u32>,
dictionary: Vec<DictEntry>,
}
impl HuffCdicReader {
pub fn new(huff: &[u8], cdics: &[&[u8]]) -> io::Result<Self> {
let mut reader = Self {
dict1: Vec::with_capacity(256),
mincode: Vec::with_capacity(33),
maxcode: Vec::with_capacity(33),
dictionary: Vec::new(),
};
reader.load_huff(huff)?;
for cdic in cdics {
reader.load_cdic(cdic)?;
}
Ok(reader)
}
fn load_huff(&mut self, huff: &[u8]) -> io::Result<()> {
if huff.len() < 24 || &huff[0..8] != b"HUFF\x00\x00\x00\x18" {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid HUFF header",
));
}
let off1 = u32::from_be_bytes([huff[8], huff[9], huff[10], huff[11]]) as usize;
let off2 = u32::from_be_bytes([huff[12], huff[13], huff[14], huff[15]]) as usize;
if huff.len() < off1 + 256 * 4 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"HUFF dict1 truncated",
));
}
for i in 0..256 {
let pos = off1 + i * 4;
let v = u32::from_be_bytes([huff[pos], huff[pos + 1], huff[pos + 2], huff[pos + 3]]);
let codelen = (v & 0x1f) as u8;
let term = (v & 0x80) != 0;
let maxcode_raw = v >> 8;
let maxcode = if codelen > 0 {
((maxcode_raw + 1) << (32 - codelen)).wrapping_sub(1)
} else {
0
};
self.dict1.push((codelen, term, maxcode));
}
if huff.len() < off2 + 64 * 4 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"HUFF dict2 truncated",
));
}
self.mincode.push(0);
self.maxcode.push(0);
for i in 0..32 {
let pos = off2 + i * 8;
let mincode_raw =
u32::from_be_bytes([huff[pos], huff[pos + 1], huff[pos + 2], huff[pos + 3]]);
let maxcode_raw =
u32::from_be_bytes([huff[pos + 4], huff[pos + 5], huff[pos + 6], huff[pos + 7]]);
let codelen = i + 1;
self.mincode.push(mincode_raw << (32 - codelen));
self.maxcode
.push(((maxcode_raw + 1) << (32 - codelen)).wrapping_sub(1));
}
Ok(())
}
fn load_cdic(&mut self, cdic: &[u8]) -> io::Result<()> {
if cdic.len() < 16 || &cdic[0..8] != b"CDIC\x00\x00\x00\x10" {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid CDIC header",
));
}
let phrases = u32::from_be_bytes([cdic[8], cdic[9], cdic[10], cdic[11]]) as usize;
let bits = u32::from_be_bytes([cdic[12], cdic[13], cdic[14], cdic[15]]) as usize;
let n = std::cmp::min(1 << bits, phrases.saturating_sub(self.dictionary.len()));
if cdic.len() < 16 + n * 2 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"CDIC offset table truncated",
));
}
for i in 0..n {
let off_pos = 16 + i * 2;
let off = u16::from_be_bytes([cdic[off_pos], cdic[off_pos + 1]]) as usize;
if 16 + off + 2 > cdic.len() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"CDIC entry truncated",
));
}
let blen = u16::from_be_bytes([cdic[16 + off], cdic[16 + off + 1]]);
let slice_len = (blen & 0x7fff) as usize;
let is_leaf = (blen & 0x8000) != 0;
let slice_start = 16 + off + 2;
let slice_end = std::cmp::min(slice_start + slice_len, cdic.len());
let slice = cdic[slice_start..slice_end].to_vec();
if is_leaf {
self.dictionary.push(DictEntry::Leaf(slice));
} else {
self.dictionary.push(DictEntry::Node(slice));
}
}
Ok(())
}
pub fn decompress(&mut self, data: &[u8]) -> io::Result<Vec<u8>> {
let mut result = Vec::new();
self.unpack_into(data, &mut result)?;
Ok(result)
}
fn unpack_into(&mut self, data: &[u8], output: &mut Vec<u8>) -> io::Result<()> {
let bitsleft = data.len() * 8;
let mut bits_remaining = bitsleft as i64;
let mut padded = data.to_vec();
padded.extend_from_slice(&[0u8; 8]);
let mut pos = 0usize;
let mut x = read_u64_be(&padded, pos);
let mut n: i32 = 32;
while bits_remaining > 0 {
if n <= 0 {
pos += 4;
x = read_u64_be(&padded, pos);
n += 32;
}
let code = ((x >> n) & 0xFFFFFFFF) as u32;
let (mut codelen, term, mut maxcode) = self.dict1[(code >> 24) as usize];
if !term {
while codelen < 32 && code < self.mincode[codelen as usize] {
codelen += 1;
}
if codelen < 33 {
maxcode = self.maxcode[codelen as usize];
}
}
n -= codelen as i32;
bits_remaining -= codelen as i64;
if bits_remaining < 0 {
break;
}
let r = if codelen > 0 {
(maxcode.wrapping_sub(code) >> (32 - codelen)) as usize
} else {
0
};
if r >= self.dictionary.len() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Dictionary index {} out of bounds (len {})",
r,
self.dictionary.len()
),
));
}
match &self.dictionary[r] {
DictEntry::Leaf(slice) => {
output.extend_from_slice(slice);
}
DictEntry::Node(slice) => {
let slice_copy = slice.clone();
let mut unpacked = Vec::new();
self.unpack_into(&slice_copy, &mut unpacked)?;
output.extend_from_slice(&unpacked);
self.dictionary[r] = DictEntry::Unpacked(unpacked);
}
DictEntry::Unpacked(slice) => {
output.extend_from_slice(slice);
}
}
}
Ok(())
}
}
fn read_u64_be(data: &[u8], pos: usize) -> u64 {
if pos + 8 <= data.len() {
u64::from_be_bytes([
data[pos],
data[pos + 1],
data[pos + 2],
data[pos + 3],
data[pos + 4],
data[pos + 5],
data[pos + 6],
data[pos + 7],
])
} else {
0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_u64_be() {
let data = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01];
assert_eq!(read_u64_be(&data, 0), 1);
let data2 = [0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
assert_eq!(read_u64_be(&data2, 0), 0x0100000000000000);
}
}