use bitstream_io::{BitRead, BitReader, Endianness};
use std::io::{self};
#[derive(thiserror::Error, Debug)]
pub enum InitializationError {
#[error("Prefix already exists")]
DuplicatedPrefix,
#[error("Invalid repeat position")]
InvalidRepeatPosition,
#[error("Invalid repeating code")]
InvaildRepeatingCode,
}
impl From<InitializationError> for io::Error {
fn from(value: InitializationError) -> Self {
io::Error::other(value)
}
}
#[derive(thiserror::Error, Debug)]
pub enum DecompressionError {
#[error("Invalid prefix code when getting next symbol [length]")]
InvalidPrefixCodeLength,
#[error("Invalid prefix code when getting next symbol [code]")]
InvalidPrefixCodeCode,
}
impl From<DecompressionError> for io::Error {
fn from(value: DecompressionError) -> Self {
io::Error::other(value)
}
}
#[derive(Clone, Debug)]
struct HuffmanTreeNode {
left: i32,
right: i32,
}
#[derive(Default, Clone)]
struct HuffmanTableRow {
length: i32,
value: i32,
}
#[derive(Clone)]
pub struct HuffmanDecoder {
min_length: usize,
max_length: usize,
tree: Vec<HuffmanTreeNode>,
table: Option<Vec<HuffmanTableRow>>,
table_size: usize,
}
impl Default for HuffmanDecoder {
fn default() -> Self {
Self::new()
}
}
impl HuffmanDecoder {
pub fn new() -> Self {
let mut me = Self {
min_length: usize::MAX,
max_length: usize::MIN,
tree: Vec::new(),
table: None,
table_size: 0,
};
me.new_node();
me
}
pub fn initialize(
lengths: &[isize],
max_code_length: isize,
zeros: bool,
) -> Result<Self, InitializationError> {
let mut me = Self::new();
let mut code = 0;
let mut unhandled_symbols = lengths.len();
for length in 1..=max_code_length {
for (i, cur_len) in lengths.iter().enumerate() {
if *cur_len != length {
continue;
}
me.add_value(
i as i32,
if zeros { code } else { !code },
length as usize,
length as usize,
)?;
code += 1;
unhandled_symbols -= 1;
if unhandled_symbols == 0 {
break;
}
}
code <<= 1;
}
Ok(me)
}
pub fn add_value(
&mut self,
value: i32,
code: u32,
length: usize,
repeat_pos: usize,
) -> Result<(), InitializationError> {
self.max_length = self.max_length.max(length);
self.min_length = self.min_length.min(length);
let repeat_pos = length as isize - 1 - repeat_pos as isize;
let mut last_node = 0;
let codest = (((repeat_pos - 1) as u32) >> 1) & 3;
if repeat_pos == 0 || (repeat_pos >= 0 && (codest == 0 || codest == 3)) {
return Err(InitializationError::InvalidRepeatPosition);
}
let mut bitpos = length as isize - 1;
loop {
if bitpos < 0 {
break;
}
let bit = ((code >> bitpos) & 1) != 0;
if self.is_leaf_node(last_node) {
return Err(InitializationError::DuplicatedPrefix);
};
if bitpos == repeat_pos {
if !self.is_open_branch(last_node, bit) {
return Err(InitializationError::InvaildRepeatingCode);
};
let repeat_node = self.new_node();
let next_node = self.new_node();
self.set_branch(last_node, bit, repeat_node);
self.set_branch(repeat_node, bit, repeat_node);
self.set_branch(repeat_node, !bit, next_node);
last_node = next_node;
bitpos += 1;
} else {
if self.is_open_branch(last_node, bit) {
let new_node = self.new_node();
self.set_branch(last_node, bit, new_node);
}
last_node = self.branch(last_node, bit);
}
bitpos -= 1;
}
if !self.is_empty_node(last_node) {
return Err(InitializationError::DuplicatedPrefix);
}
self.set_leaf_value(last_node, value);
Ok(())
}
pub(crate) fn new_node(&mut self) -> i32 {
self.tree.push(HuffmanTreeNode {
left: -1,
right: -2,
});
self.tree.len() as i32 - 1
}
pub(crate) fn set_leaf_value(&mut self, node: i32, value: i32) {
self.set_branch(node, false, value);
self.set_branch(node, true, value);
}
fn leaf_value(&self, node: i32) -> i32 {
assert!(self.branch(node, false) == self.branch(node, true));
self.branch(node, false)
}
fn is_empty_node(&self, node: i32) -> bool {
self.branch(node, false) == -1 && self.branch(node, true) == -2
}
fn is_leaf_node(&self, node: i32) -> bool {
self.branch(node, false) == self.branch(node, true)
}
fn is_open_branch(&self, node: i32, right_branch: bool) -> bool {
if right_branch {
self.tree[node as usize].right < 0
} else {
self.tree[node as usize].left < 0
}
}
pub(crate) fn set_branch(&mut self, node: i32, right_branch: bool, value: i32) {
if right_branch {
self.tree[node as usize].right = value;
} else {
self.tree[node as usize].left = value;
}
}
fn branch(&self, node: i32, right_branch: bool) -> i32 {
if right_branch {
self.tree[node as usize].right
} else {
self.tree[node as usize].left
}
}
fn is_invalid_node(&self, node: i32) -> bool {
node < 0
}
pub fn add_value_lf(
&mut self,
value: i32,
code: u32,
length: usize,
repeat_pos: usize,
) -> Result<(), InitializationError> {
self.add_value(value, reverse_n(code, length), length, repeat_pos)
}
#[inline]
pub fn next_symbol<R: io::Read + io::Seek, E: Endianness>(
&mut self,
input: &mut bitstream_io::BitReader<R, E>,
) -> io::Result<i32> {
let Some(table) = self.table.as_ref() else {
panic!("Huffman: search table not built");
};
let bits: u32 = input.peek_word(self.table_size as u32)?;
let HuffmanTableRow { length, value } = table[bits as usize];
if length <= 0 {
return Err(DecompressionError::InvalidPrefixCodeLength)?;
}
if length <= self.table_size as i32 {
input.skip(length as u32)?;
return Ok(value);
}
input.skip(self.table_size as u32)?;
let mut node = value;
loop {
if self.is_leaf_node(node) {
break;
}
let bit = input.read_bit()?;
if self.is_open_branch(node, bit) {
return Err(DecompressionError::InvalidPrefixCodeCode)?;
}
node = self.branch(node, bit);
}
Ok(self.leaf_value(node))
}
fn make_table_recursive_le(&mut self, node: i32, table: &mut [HuffmanTableRow], depth: i32) {
let curr_table_size = 1 << (self.table_size as i32 - depth);
let curr_stride = 1 << depth;
if self.is_invalid_node(node) {
for i in 0..curr_table_size {
table[i * curr_stride].length = -1;
}
} else if self.is_leaf_node(node) {
for i in 0..curr_table_size {
table[i * curr_stride].length = depth;
table[i * curr_stride].value = self.leaf_value(node);
}
} else if depth == self.table_size as i32 {
table[0].length = self.table_size as i32 + 1;
table[0].value = node;
} else {
self.make_table_recursive_le(self.branch(node, false), table, depth + 1);
let size = table.len();
self.make_table_recursive_le(
self.branch(node, true),
&mut table[curr_stride..size],
depth + 1,
);
}
}
fn make_table_recursive_be(&mut self, node: i32, table: &mut [HuffmanTableRow], depth: i32) {
let curr_table_size = 1 << (self.table_size as i32 - depth);
if self.is_invalid_node(node) {
table
.iter_mut()
.take(curr_table_size)
.for_each(|i| i.length = -1);
} else if self.is_leaf_node(node) {
table.iter_mut().take(curr_table_size).for_each(|i| {
i.length = depth;
i.value = self.leaf_value(node);
});
} else if depth == self.table_size as i32 {
table[0].length = self.table_size as i32 + 1;
table[0].value = node;
} else {
self.make_table_recursive_be(self.branch(node, false), table, depth + 1);
let size = table.len();
self.make_table_recursive_be(
self.branch(node, true),
&mut table[(curr_table_size / 2)..size],
depth + 1,
);
}
}
pub fn make_table(&mut self, little_endian: bool) {
self.table_size = if self.max_length < self.min_length || self.max_length >= 10 {
10
} else {
self.max_length
};
let mut table = vec![Default::default(); 1 << self.table_size];
if little_endian {
self.make_table_recursive_le(0, &mut table, 0)
} else {
self.make_table_recursive_be(0, &mut table, 0)
}
self.table = Some(table);
}
}
#[inline]
fn reverse(value: u32) -> u32 {
let mut val = value;
val = ((val >> 1) & 0x55555555) | ((val & 0x55555555) << 1);
val = ((val >> 2) & 0x33333333) | ((val & 0x33333333) << 2);
val = ((val >> 4) & 0x0F0F0F0F) | ((val & 0x0F0F0F0F) << 4);
val = ((val >> 8) & 0x00FF00FF) | ((val & 0x00FF00FF) << 8);
val.rotate_left(16)
}
#[inline]
fn reverse_n(value: u32, length: usize) -> u32 {
reverse(value) >> (32 - length)
}
pub trait ReadWord {
fn peek_word(&mut self, bits: u32) -> io::Result<u32>;
fn read_word(&mut self, bits: u32) -> io::Result<u32>;
}
impl<R: io::Read + io::Seek, E: Endianness> ReadWord for BitReader<R, E> {
fn peek_word(&mut self, bits: u32) -> io::Result<u32> {
let start = self.position_in_bits().unwrap();
match self.read_var::<u32>(bits) {
Ok(result) => {
self.seek_bits(io::SeekFrom::Current(-(bits as i64)))?;
Ok(result)
}
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
let end = self.seek_bits(io::SeekFrom::End(0)).unwrap();
self.seek_bits(io::SeekFrom::Start(start)).unwrap();
if start < end {
log::debug!("Adjusting to {} bit peek", (end - start));
let available_bits = end - start;
let result = self.read_var::<u32>(available_bits as u32)?
<< (bits as u64 - available_bits);
self.seek_bits(io::SeekFrom::Current(-(available_bits as i64)))?;
return Ok(result);
}
Err(e)
}
Err(e) => Err(e),
}
}
#[inline]
fn read_word(&mut self, bits: u32) -> io::Result<u32> {
let start = self.position_in_bits().unwrap();
match self.read_var::<u32>(bits) {
Ok(result) => Ok(result),
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
let end = self.seek_bits(io::SeekFrom::End(0)).unwrap();
if start < end {
log::debug!("Adjusting to {} bit read", (end - start));
let available_bits = end - start;
self.seek_bits(io::SeekFrom::Start(start)).unwrap();
return Ok(self.read_var::<u32>(available_bits as u32)?
<< (bits as u64 - available_bits));
}
Err(e)
}
Err(e) => Err(e),
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn successful_initialization() {
assert!(
HuffmanDecoder::initialize(
&[
3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
],
8,
true,
)
.is_ok()
)
}
}