use super::distilled_table::OFFSET_DECODE_TABLE;
use crate::error::{ArchiveError, Result};
use bitstream_io::{BitRead, BitReader, LittleEndian};
const EOF_SYMBOL: u16 = 0x100;
const MAX_NODES: usize = 0x274;
#[derive(Clone, Copy, Debug)]
enum HuffmanNode {
Leaf(u16),
Branch(usize, usize),
}
struct HuffmanTree {
nodes: Vec<HuffmanNode>,
}
impl HuffmanTree {
fn build(node_data: &[i32], num_nodes: usize) -> Result<Self> {
let mut tree = Self {
nodes: Vec::with_capacity(num_nodes * 2),
};
if num_nodes >= 2 {
tree.build_subtree(node_data, num_nodes - 2, num_nodes, 0)?;
}
Ok(tree)
}
fn build_subtree(&mut self, node_data: &[i32], node_idx: usize, num_nodes: usize, depth: usize) -> Result<usize> {
if depth > 64 {
return Err(ArchiveError::decompression_failed("Distilled", "tree depth exceeded"));
}
let tree_idx = self.nodes.len();
if node_idx >= num_nodes {
let symbol = (node_idx - num_nodes) as u16;
self.nodes.push(HuffmanNode::Leaf(symbol));
} else {
if node_idx + 1 >= node_data.len() {
return Err(ArchiveError::decompression_failed("Distilled", format!("invalid node index: {}", node_idx)));
}
let left_child = node_data[node_idx] as usize;
let right_child = node_data[node_idx + 1] as usize;
self.nodes.push(HuffmanNode::Branch(0, 0));
let left_idx = self.build_subtree(node_data, left_child, num_nodes, depth + 1)?;
let right_idx = self.build_subtree(node_data, right_child, num_nodes, depth + 1)?;
self.nodes[tree_idx] = HuffmanNode::Branch(left_idx, right_idx);
}
Ok(tree_idx)
}
fn decode<R: BitRead>(&self, reader: &mut R) -> Result<u16> {
if self.nodes.is_empty() {
return Err(ArchiveError::decompression_failed("Distilled", "empty tree"));
}
let mut idx = 0;
let max_iterations = self.nodes.len() + 1;
let mut iterations = 0;
loop {
iterations += 1;
if iterations > max_iterations {
return Err(ArchiveError::decompression_failed(
"Distilled",
"tree traversal loop detected (corrupt data or wrong password)",
));
}
if idx >= self.nodes.len() {
return Err(ArchiveError::decompression_failed("Distilled", "invalid tree traversal"));
}
match self.nodes[idx] {
HuffmanNode::Leaf(sym) => return Ok(sym),
HuffmanNode::Branch(left, right) => {
let bit = reader
.read_bit()
.map_err(|e| ArchiveError::decompression_failed("Distilled", format!("read error: {e}")))?;
idx = if bit { right } else { left };
}
}
}
}
}
#[inline]
fn extra_bits_for_position(pos: usize) -> u32 {
if pos >= 0x0FC4 {
7
} else if pos >= 0x07C4 {
6
} else if pos >= 0x03C4 {
5
} else if pos >= 0x01C4 {
4
} else if pos >= 0x00C4 {
3
} else if pos >= 0x0044 {
2
} else if pos >= 0x0004 {
1
} else {
0
}
}
fn decode_offset_symbol<R: BitRead>(reader: &mut R) -> Result<u8> {
let mut code: u16 = 0;
let mut len: u8 = 0;
loop {
let bit = reader
.read_bit()
.map_err(|e| ArchiveError::decompression_failed("Distilled", format!("read error: {e}")))?;
code |= (bit as u16) << len;
len += 1;
if len <= 8 {
let mask = (1u16 << len) - 1;
let idx = (code & mask) as usize;
let sym = OFFSET_DECODE_TABLE[len as usize][idx];
if sym >= 0 {
return Ok(sym as u8);
}
}
if len > 8 {
return Err(ArchiveError::decompression_failed(
"Distilled",
format!("no matching offset code: {code:#x} len={len}"),
));
}
}
}
pub fn decompress(input: &[u8]) -> Result<Vec<u8>> {
if input.len() < 3 {
return Ok(Vec::new());
}
let num_nodes = u16::from_le_bytes([input[0], input[1]]) as usize;
let code_length = input[2];
if !(2..=MAX_NODES).contains(&num_nodes) {
return Err(ArchiveError::decompression_failed("Distilled", format!("invalid node count: {num_nodes}")));
}
if code_length == 0 || code_length > 16 {
return Err(ArchiveError::decompression_failed("Distilled", format!("invalid code length: {code_length}")));
}
let mut reader = BitReader::endian(&input[3..], LittleEndian);
let mut node_data = Vec::with_capacity(num_nodes);
for _ in 0..num_nodes {
let node: u32 = reader
.read_var(code_length as u32)
.map_err(|e| ArchiveError::decompression_failed("Distilled", format!("read error: {e}")))?;
node_data.push(node as i32);
}
let tree = HuffmanTree::build(&node_data, num_nodes)?;
let mut output = Vec::new();
while let Ok(sym) = tree.decode(&mut reader) {
if sym < EOF_SYMBOL {
output.push(sym as u8);
} else if sym == EOF_SYMBOL {
break;
} else {
let length = (sym as usize).saturating_sub(0x101) + 3;
let offset_base = decode_offset_symbol(&mut reader)? as usize;
let extra_bits = extra_bits_for_position(output.len());
let extra: u32 = if extra_bits > 0 {
reader
.read_var(extra_bits)
.map_err(|e| ArchiveError::decompression_failed("Distilled", format!("read error: {e}")))?
} else {
0
};
let offset = ((offset_base as u32) << extra_bits) + extra + 1;
let start = output.len() as isize - offset as isize;
for i in 0..length {
let byte = if start + (i as isize) < 0 {
0x20 } else {
output[(start + (i as isize)) as usize]
};
output.push(byte);
}
}
}
Ok(output)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_input() {
assert_eq!(decompress(&[]).unwrap(), Vec::<u8>::new());
}
#[test]
fn test_extra_bits_thresholds() {
assert_eq!(extra_bits_for_position(0), 0);
assert_eq!(extra_bits_for_position(3), 0);
assert_eq!(extra_bits_for_position(4), 1);
assert_eq!(extra_bits_for_position(0x43), 1);
assert_eq!(extra_bits_for_position(0x44), 2);
assert_eq!(extra_bits_for_position(0xC3), 2);
assert_eq!(extra_bits_for_position(0xC4), 3);
assert_eq!(extra_bits_for_position(0x1C3), 3);
assert_eq!(extra_bits_for_position(0x1C4), 4);
assert_eq!(extra_bits_for_position(0x3C3), 4);
assert_eq!(extra_bits_for_position(0x3C4), 5);
assert_eq!(extra_bits_for_position(0x7C3), 5);
assert_eq!(extra_bits_for_position(0x7C4), 6);
assert_eq!(extra_bits_for_position(0xFC3), 6);
assert_eq!(extra_bits_for_position(0xFC4), 7);
assert_eq!(extra_bits_for_position(0xFFFF), 7);
}
}