use std::io;
#[derive(Clone)]
enum DictEntry {
Leaf(Vec<u8>),
Node(Vec<u8>),
Unpacked(Vec<u8>),
}
pub struct HuffCdicReader {
dict1: Vec<(u8, bool, u32)>,
mincode: Vec<u32>,
maxcode: Vec<u32>,
dictionary: Vec<DictEntry>,
}
impl HuffCdicReader {
pub fn new(huff: &[u8], cdics: &[&[u8]]) -> io::Result<Self> {
let mut reader = Self {
dict1: Vec::with_capacity(256),
mincode: Vec::with_capacity(33),
maxcode: Vec::with_capacity(33),
dictionary: Vec::new(),
};
reader.load_huff(huff)?;
for cdic in cdics {
reader.load_cdic(cdic)?;
}
Ok(reader)
}
fn load_huff(&mut self, huff: &[u8]) -> io::Result<()> {
if huff.len() < 24 || &huff[0..8] != b"HUFF\x00\x00\x00\x18" {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid HUFF header",
));
}
let off1 = u32::from_be_bytes([huff[8], huff[9], huff[10], huff[11]]) as usize;
let off2 = u32::from_be_bytes([huff[12], huff[13], huff[14], huff[15]]) as usize;
if huff.len() < off1 + 256 * 4 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"HUFF dict1 truncated",
));
}
for i in 0..256 {
let pos = off1 + i * 4;
let v = u32::from_be_bytes([huff[pos], huff[pos + 1], huff[pos + 2], huff[pos + 3]]);
let codelen = (v & 0x1f) as u8;
let term = (v & 0x80) != 0;
let maxcode_raw = v >> 8;
let maxcode = if codelen > 0 {
((maxcode_raw + 1) << (32 - codelen)).wrapping_sub(1)
} else {
0
};
self.dict1.push((codelen, term, maxcode));
}
if huff.len() < off2 + 64 * 4 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"HUFF dict2 truncated",
));
}
self.mincode.push(0);
self.maxcode.push(0);
for i in 0..32 {
let pos = off2 + i * 8;
let mincode_raw =
u32::from_be_bytes([huff[pos], huff[pos + 1], huff[pos + 2], huff[pos + 3]]);
let maxcode_raw =
u32::from_be_bytes([huff[pos + 4], huff[pos + 5], huff[pos + 6], huff[pos + 7]]);
let codelen = i + 1;
self.mincode.push(mincode_raw << (32 - codelen));
self.maxcode
.push(((maxcode_raw + 1) << (32 - codelen)).wrapping_sub(1));
}
Ok(())
}
fn load_cdic(&mut self, cdic: &[u8]) -> io::Result<()> {
if cdic.len() < 16 || &cdic[0..8] != b"CDIC\x00\x00\x00\x10" {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid CDIC header",
));
}
let phrases = u32::from_be_bytes([cdic[8], cdic[9], cdic[10], cdic[11]]) as usize;
let bits = u32::from_be_bytes([cdic[12], cdic[13], cdic[14], cdic[15]]) as usize;
let n = std::cmp::min(1 << bits, phrases.saturating_sub(self.dictionary.len()));
if cdic.len() < 16 + n * 2 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"CDIC offset table truncated",
));
}
for i in 0..n {
let off_pos = 16 + i * 2;
let off = u16::from_be_bytes([cdic[off_pos], cdic[off_pos + 1]]) as usize;
if 16 + off + 2 > cdic.len() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"CDIC entry truncated",
));
}
let blen = u16::from_be_bytes([cdic[16 + off], cdic[16 + off + 1]]);
let slice_len = (blen & 0x7fff) as usize;
let is_leaf = (blen & 0x8000) != 0;
let slice_start = 16 + off + 2;
let slice_end = std::cmp::min(slice_start + slice_len, cdic.len());
let slice = cdic[slice_start..slice_end].to_vec();
if is_leaf {
self.dictionary.push(DictEntry::Leaf(slice));
} else {
self.dictionary.push(DictEntry::Node(slice));
}
}
Ok(())
}
pub fn decompress(&mut self, data: &[u8]) -> io::Result<Vec<u8>> {
let mut result = Vec::new();
self.unpack_into(data, &mut result)?;
Ok(result)
}
fn unpack_into(&mut self, data: &[u8], output: &mut Vec<u8>) -> io::Result<()> {
let bitsleft = data.len() * 8;
let mut bits_remaining = bitsleft as i64;
let mut padded = data.to_vec();
padded.extend_from_slice(&[0u8; 8]);
let mut pos = 0usize;
let mut x = read_u64_be(&padded, pos);
let mut n: i32 = 32;
while bits_remaining > 0 {
if n <= 0 {
pos += 4;
x = read_u64_be(&padded, pos);
n += 32;
}
let code = ((x >> n) & 0xFFFFFFFF) as u32;
let (mut codelen, term, mut maxcode) = self.dict1[(code >> 24) as usize];
if !term {
while codelen < 32 && code < self.mincode[codelen as usize] {
codelen += 1;
}
if codelen < 33 {
maxcode = self.maxcode[codelen as usize];
}
}
n -= codelen as i32;
bits_remaining -= codelen as i64;
if bits_remaining < 0 {
break;
}
let r = if codelen > 0 {
(maxcode.wrapping_sub(code) >> (32 - codelen)) as usize
} else {
0
};
if r >= self.dictionary.len() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Dictionary index {} out of bounds (len {})",
r,
self.dictionary.len()
),
));
}
match &self.dictionary[r] {
DictEntry::Leaf(slice) => {
output.extend_from_slice(slice);
}
DictEntry::Node(slice) => {
let slice_copy = slice.clone();
let mut unpacked = Vec::new();
self.unpack_into(&slice_copy, &mut unpacked)?;
output.extend_from_slice(&unpacked);
self.dictionary[r] = DictEntry::Unpacked(unpacked);
}
DictEntry::Unpacked(slice) => {
output.extend_from_slice(slice);
}
}
}
Ok(())
}
}
fn read_u64_be(data: &[u8], pos: usize) -> u64 {
if pos + 8 <= data.len() {
u64::from_be_bytes([
data[pos],
data[pos + 1],
data[pos + 2],
data[pos + 3],
data[pos + 4],
data[pos + 5],
data[pos + 6],
data[pos + 7],
])
} else {
0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_u64_be() {
let data = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01];
assert_eq!(read_u64_be(&data, 0), 1);
let data2 = [0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
assert_eq!(read_u64_be(&data2, 0), 0x0100000000000000);
}
fn make_huff() -> Vec<u8> {
let off1: u32 = 24;
let off2: u32 = 24 + 256 * 4;
let mut huff = vec![0u8; 24];
huff[0..8].copy_from_slice(b"HUFF\x00\x00\x00\x18");
huff[8..12].copy_from_slice(&off1.to_be_bytes());
huff[12..16].copy_from_slice(&off2.to_be_bytes());
let entry = 0x0000FF88u32;
for _ in 0..256 {
huff.extend_from_slice(&entry.to_be_bytes());
}
for i in 0..32u32 {
if i + 1 == 8 {
huff.extend_from_slice(&0u32.to_be_bytes()); huff.extend_from_slice(&255u32.to_be_bytes()); } else {
huff.extend_from_slice(&[0u8; 8]); }
}
huff
}
fn make_cdic() -> Vec<u8> {
let num_phrases: u32 = 4;
let bits: u32 = 2;
let mut cdic = vec![0u8; 16];
cdic[0..8].copy_from_slice(b"CDIC\x00\x00\x00\x10");
cdic[8..12].copy_from_slice(&num_phrases.to_be_bytes());
cdic[12..16].copy_from_slice(&bits.to_be_bytes());
let offset_table_size = 4 * 2; for i in 0..4u16 {
let offset = offset_table_size as u16 + i * 3;
cdic.extend_from_slice(&offset.to_be_bytes());
}
for i in 0..4u8 {
cdic.extend_from_slice(&0x8001u16.to_be_bytes()); cdic.push(i);
}
cdic
}
#[test]
fn test_load_huff_valid() {
let huff = make_huff();
let mut reader = HuffCdicReader {
dict1: Vec::new(),
mincode: Vec::new(),
maxcode: Vec::new(),
dictionary: Vec::new(),
};
assert!(reader.load_huff(&huff).is_ok());
assert_eq!(reader.dict1.len(), 256);
}
#[test]
fn test_load_huff_rejects_bad_magic() {
let mut reader = HuffCdicReader {
dict1: Vec::new(),
mincode: Vec::new(),
maxcode: Vec::new(),
dictionary: Vec::new(),
};
assert!(reader.load_huff(b"\xFF\xD8\xFF\xE0JFIF").is_err());
}
#[test]
fn test_load_cdic_valid() {
let cdic = make_cdic();
let mut reader = HuffCdicReader {
dict1: Vec::new(),
mincode: Vec::new(),
maxcode: Vec::new(),
dictionary: Vec::new(),
};
assert!(reader.load_cdic(&cdic).is_ok());
assert_eq!(reader.dictionary.len(), 4);
}
#[test]
fn test_load_cdic_rejects_jpeg_bytes() {
let jpeg = b"\xFF\xD8\xFF\xE0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00";
let mut reader = HuffCdicReader {
dict1: Vec::new(),
mincode: Vec::new(),
maxcode: Vec::new(),
dictionary: Vec::new(),
};
let err = reader.load_cdic(jpeg).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::InvalidData);
assert!(
err.to_string().contains("Invalid CDIC header"),
"Expected 'Invalid CDIC header', got: {}",
err
);
}
#[test]
fn test_reader_new_with_valid_cdics() {
let huff = make_huff();
let cdic = make_cdic();
let result = HuffCdicReader::new(&huff, &[cdic.as_slice()]);
assert!(result.is_ok());
}
#[test]
fn test_reader_new_with_poison_record_fails() {
let huff = make_huff();
let cdic = make_cdic();
let jpeg = b"\xFF\xD8\xFF\xE0\x00\x10JFIF\x00\x01\x01\x00";
match HuffCdicReader::new(&huff, &[cdic.as_slice(), jpeg.as_slice()]) {
Err(e) => assert!(
e.to_string().contains("Invalid CDIC header"),
"Expected 'Invalid CDIC header', got: {e}"
),
Ok(_) => panic!("Should fail when JPEG bytes are passed as CDIC"),
}
}
}