use std::io::{self, Seek as _};
use bitstream_io::Endianness;
use bitstream_io::{BigEndian, BitRead, BitReader};
const FIXED_CODE_LENGTHS: [usize; 257] = [
3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 9, 9, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 13, 13, 12,
];
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Invalid huffman tree")]
InvalidTree,
#[error("The input stream ended too soon")]
InsufficentSize,
#[error("Invalid stream")]
InvalidStream,
#[error(transparent)]
Io(#[from] io::Error),
}
impl From<Error> for io::Error {
fn from(val: Error) -> Self {
match val {
Error::Io(error) => error,
me => io::Error::other(me),
}
}
}
pub struct HuffmanReader<R: io::Read + io::Seek> {
inner: bitstream_io::BitReader<R, BigEndian>,
uncompressed_size: u64,
offset: u64,
tree: FixedHuffmanDecoder,
translations: [u8; 257],
current_block_size: usize,
current_block_offset: usize,
packbits_operation: packbits_rle::Operation,
packbits_cursor: io::Cursor<Vec<u8>>,
}
impl<R: io::Read + io::Seek> HuffmanReader<R> {
pub fn try_from(inner: R, uncompressed_size: u64) -> Result<Self, Error> {
let inner = bitstream_io::BitReader::new(inner);
let tree = Self::build_decoder();
Ok(Self {
inner,
uncompressed_size,
offset: 0,
tree,
translations: [0u8; 257],
current_block_size: 0,
current_block_offset: 0,
packbits_operation: Default::default(),
packbits_cursor: io::Cursor::new(Vec::new()),
})
}
fn build_decoder() -> FixedHuffmanDecoder {
let mut tree = FixedHuffmanDecoder::new();
let mut last_length = 0;
let mut last_code = 0;
FIXED_CODE_LENGTHS
.iter()
.enumerate()
.for_each(|(i, length)| {
let mut thiscode;
if last_length == 0 {
thiscode = 0;
} else if *length < last_length {
thiscode = (last_code >> (last_length - length)) + 1;
} else {
thiscode = last_code + 1;
if *length > last_length {
thiscode <<= length - last_length;
}
}
last_length = *length;
last_code = thiscode;
tree.add_value(i as i32, thiscode as u32, *length, *length)
.unwrap();
});
tree.make_table(false);
tree
}
pub fn open_block(&mut self) -> io::Result<()> {
assert!(
self.inner.position_in_bits()? % 8 == 0,
"Should be opening at byte boundary",
);
let mut block_size: i32 = self.inner.read_var(32)?;
if block_size > 0 {
if block_size < 4 {
return Err(Error::InvalidStream)?;
}
block_size -= 4;
let uncompressed_block_size: u32 = self.inner.read_var(32)?;
self.current_block_offset = 0;
self.current_block_size = uncompressed_block_size as usize;
let bytes_read = self.read_huffman_symbols_for_block()?;
let mut chunk = vec![0u8; block_size as usize - bytes_read - 4];
self.inner.read_bytes(&mut chunk)?;
let chunk_reader = io::Cursor::new(chunk);
let mut bitstream: bitstream_io::BitReader<_, BigEndian> =
bitstream_io::BitReader::new(chunk_reader);
let mut buffer = vec![0u8; self.current_block_size + 1];
for (idx, byte) in buffer.iter_mut().enumerate() {
match self.tree.next_symbol(&mut bitstream)? {
v if (0..256).contains(&v) => {
self.current_block_offset += 1;
*byte = self.translations[v as usize];
}
_ => {
log::info!(
"Stop code at byte {idx} (expected {})",
self.current_block_size
);
buffer.truncate(idx);
break;
}
}
}
self.packbits_cursor = io::Cursor::new(buffer);
Ok(())
} else {
self.current_block_offset = 0;
self.current_block_size = (-block_size) as usize;
if self.current_block_size < 4 {
return Err(Error::InvalidStream)?;
}
self.current_block_size -= 4;
let mut buffer = vec![0u8; self.current_block_size];
self.inner.read_bytes(&mut buffer)?;
self.packbits_cursor = io::Cursor::new(buffer);
Ok(())
}
}
fn next_byte(&mut self) -> io::Result<Option<u8>> {
if self.stream_position()? >= self.stream_len()? {
return Ok(None);
}
match self.packbits_operation.advance(&mut self.packbits_cursor) {
Ok((byte, next_state)) => {
self.packbits_operation = next_state;
Ok(Some(byte))
}
Err(packbits_rle::OperationError::InsufficientInput(command)) => {
self.open_block()?;
let (byte, next_state) = command.execute(&mut self.packbits_cursor)?;
self.packbits_operation = next_state;
Ok(Some(byte))
}
Err(packbits_rle::OperationError::UnexpectedEof) => {
self.open_block()?;
let (byte, next_state) =
packbits_rle::Operation::default().advance(&mut self.packbits_cursor)?;
self.packbits_operation = next_state;
Ok(Some(byte))
}
Err(e) => Err(e)?,
}
}
fn read_huffman_symbols_for_block(&mut self) -> io::Result<usize> {
assert!(
self.inner.position_in_bits()? % 8 == 0,
"Should be reading symbols at byte boundary",
);
let symbol_count: u16 = self.inner.read_var(16)?;
if symbol_count >= 256 {
return Err(Error::InvalidStream)?;
}
self.inner
.read_bytes(&mut self.translations[0..(symbol_count as usize)])?;
Ok(symbol_count as usize + 2)
}
pub fn into_inner(self) -> R {
self.inner.into_reader()
}
}
impl<R: io::Read + io::Seek> io::Read for HuffmanReader<R> {
#[inline]
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
for (idx, byte) in buf.iter_mut().enumerate() {
match self.next_byte()? {
Some(value) => {
self.offset += 1;
*byte = value;
}
None => return Ok(idx),
}
}
Ok(buf.len())
}
}
impl<R: io::Read + io::Seek> io::Seek for HuffmanReader<R> {
fn seek(&mut self, _: io::SeekFrom) -> io::Result<u64> {
todo!()
}
#[inline]
fn stream_len(&mut self) -> io::Result<u64> {
Ok(self.uncompressed_size)
}
#[inline]
fn stream_position(&mut self) -> io::Result<u64> {
Ok(self.offset)
}
}
#[derive(thiserror::Error, Debug)]
pub enum InitializationError {
#[error("Prefix already exists")]
DuplicatedPrefix,
#[error("Invalid repeat position")]
InvalidRepeatPosition,
#[error("Invalid repeating code")]
InvaildRepeatingCode,
}
impl From<InitializationError> for io::Error {
fn from(value: InitializationError) -> Self {
io::Error::other(value)
}
}
#[derive(thiserror::Error, Debug)]
pub enum DecompressionError {
#[error("Invalid prefix code when getting next symbol [length]")]
InvalidPrefixCodeLength,
#[error("Invalid prefix code when getting next symbol [code]")]
InvalidPrefixCodeCode,
}
impl From<DecompressionError> for io::Error {
fn from(value: DecompressionError) -> Self {
io::Error::other(value)
}
}
#[derive(Clone, Debug)]
struct HuffmanTreeNode {
left: i32,
right: i32,
}
#[derive(Default, Clone)]
struct HuffmanTableRow {
length: i32,
value: i32,
}
#[derive(Clone)]
pub struct FixedHuffmanDecoder {
min_length: usize,
max_length: usize,
tree: Vec<HuffmanTreeNode>,
table: Option<Vec<HuffmanTableRow>>,
table_size: usize,
}
impl Default for FixedHuffmanDecoder {
fn default() -> Self {
Self::new()
}
}
impl FixedHuffmanDecoder {
pub fn new() -> Self {
let mut me = Self {
min_length: usize::MAX,
max_length: usize::MIN,
tree: Vec::new(),
table: None,
table_size: 0,
};
me.new_node();
me
}
pub fn initialize(
lengths: &[isize],
max_code_length: isize,
zeros: bool,
) -> Result<Self, InitializationError> {
let mut me = Self::new();
let mut code = 0;
let mut unhandled_symbols = lengths.len();
for length in 1..=max_code_length {
for (i, cur_len) in lengths.iter().enumerate() {
if *cur_len != length {
continue;
}
me.add_value(
i as i32,
if zeros { code } else { !code },
length as usize,
length as usize,
)?;
code += 1;
unhandled_symbols -= 1;
if unhandled_symbols == 0 {
break;
}
}
code <<= 1;
}
Ok(me)
}
pub fn add_value(
&mut self,
value: i32,
code: u32,
length: usize,
repeat_pos: usize,
) -> Result<(), InitializationError> {
self.max_length = self.max_length.max(length);
self.min_length = self.min_length.min(length);
let repeat_pos = length as isize - 1 - repeat_pos as isize;
let mut last_node = 0;
let codest = (((repeat_pos - 1) as u32) >> 1) & 3;
if repeat_pos == 0 || (repeat_pos >= 0 && (codest == 0 || codest == 3)) {
return Err(InitializationError::InvalidRepeatPosition);
}
let mut bitpos = length as isize - 1;
loop {
if bitpos < 0 {
break;
}
let bit = ((code >> bitpos) & 1) != 0;
if self.is_leaf_node(last_node) {
return Err(InitializationError::DuplicatedPrefix);
};
if bitpos == repeat_pos {
if !self.is_open_branch(last_node, bit) {
return Err(InitializationError::InvaildRepeatingCode);
};
let repeat_node = self.new_node();
let next_node = self.new_node();
self.set_branch(last_node, bit, repeat_node);
self.set_branch(repeat_node, bit, repeat_node);
self.set_branch(repeat_node, !bit, next_node);
last_node = next_node;
bitpos += 1;
} else {
if self.is_open_branch(last_node, bit) {
let new_node = self.new_node();
self.set_branch(last_node, bit, new_node);
}
last_node = self.branch(last_node, bit);
}
bitpos -= 1;
}
if !self.is_empty_node(last_node) {
return Err(InitializationError::DuplicatedPrefix);
}
self.set_leaf_value(last_node, value);
Ok(())
}
pub(crate) fn new_node(&mut self) -> i32 {
self.tree.push(HuffmanTreeNode {
left: -1,
right: -2,
});
self.tree.len() as i32 - 1
}
pub(crate) fn set_leaf_value(&mut self, node: i32, value: i32) {
self.set_branch(node, false, value);
self.set_branch(node, true, value);
}
fn leaf_value(&self, node: i32) -> i32 {
assert!(self.branch(node, false) == self.branch(node, true));
self.branch(node, false)
}
fn is_empty_node(&self, node: i32) -> bool {
self.branch(node, false) == -1 && self.branch(node, true) == -2
}
fn is_leaf_node(&self, node: i32) -> bool {
self.branch(node, false) == self.branch(node, true)
}
fn is_open_branch(&self, node: i32, right_branch: bool) -> bool {
if right_branch {
self.tree[node as usize].right < 0
} else {
self.tree[node as usize].left < 0
}
}
pub(crate) fn set_branch(&mut self, node: i32, right_branch: bool, value: i32) {
if right_branch {
self.tree[node as usize].right = value;
} else {
self.tree[node as usize].left = value;
}
}
fn branch(&self, node: i32, right_branch: bool) -> i32 {
if right_branch {
self.tree[node as usize].right
} else {
self.tree[node as usize].left
}
}
fn is_invalid_node(&self, node: i32) -> bool {
node < 0
}
pub fn add_value_lf(
&mut self,
value: i32,
code: u32,
length: usize,
repeat_pos: usize,
) -> Result<(), InitializationError> {
self.add_value(value, reverse_n(code, length), length, repeat_pos)
}
#[inline]
pub fn next_symbol<R: io::Read + io::Seek, E: Endianness>(
&mut self,
input: &mut bitstream_io::BitReader<R, E>,
) -> io::Result<i32> {
let Some(_) = self.table.as_ref() else {
panic!("Huffman: search table not built");
};
let mut node = 0;
loop {
if self.is_leaf_node(node) {
break;
}
let bit = input.read_bit()?;
if self.is_open_branch(node, bit) {
return Err(DecompressionError::InvalidPrefixCodeCode)?;
}
node = self.branch(node, bit);
}
Ok(self.leaf_value(node))
}
fn make_table_recursive_le(&mut self, node: i32, table: &mut [HuffmanTableRow], depth: i32) {
let curr_table_size = 1 << (self.table_size as i32 - depth);
let curr_stride = 1 << depth;
if self.is_invalid_node(node) {
for i in 0..curr_table_size {
table[i * curr_stride].length = -1;
}
} else if self.is_leaf_node(node) {
for i in 0..curr_table_size {
table[i * curr_stride].length = depth;
table[i * curr_stride].value = self.leaf_value(node);
}
} else if depth == self.table_size as i32 {
table[0].length = self.table_size as i32 + 1;
table[0].value = node;
} else {
self.make_table_recursive_le(self.branch(node, false), table, depth + 1);
let size = table.len();
self.make_table_recursive_le(
self.branch(node, true),
&mut table[curr_stride..size],
depth + 1,
);
}
}
fn make_table_recursive_be(&mut self, node: i32, table: &mut [HuffmanTableRow], depth: i32) {
let curr_table_size = 1 << (self.table_size as i32 - depth);
if self.is_invalid_node(node) {
table
.iter_mut()
.take(curr_table_size)
.for_each(|i| i.length = -1);
} else if self.is_leaf_node(node) {
table.iter_mut().take(curr_table_size).for_each(|i| {
i.length = depth;
i.value = self.leaf_value(node);
});
} else if depth == self.table_size as i32 {
table[0].length = self.table_size as i32 + 1;
table[0].value = node;
} else {
self.make_table_recursive_be(self.branch(node, false), table, depth + 1);
let size = table.len();
self.make_table_recursive_be(
self.branch(node, true),
&mut table[(curr_table_size / 2)..size],
depth + 1,
);
}
}
pub fn make_table(&mut self, little_endian: bool) {
self.table_size = if self.max_length < self.min_length || self.max_length >= 10 {
10
} else {
self.max_length
};
let mut table = vec![Default::default(); 1 << self.table_size];
if little_endian {
self.make_table_recursive_le(0, &mut table, 0)
} else {
self.make_table_recursive_be(0, &mut table, 0)
}
self.table = Some(table);
}
}
#[inline]
fn reverse(value: u32) -> u32 {
let mut val = value;
val = ((val >> 1) & 0x55555555) | ((val & 0x55555555) << 1);
val = ((val >> 2) & 0x33333333) | ((val & 0x33333333) << 2);
val = ((val >> 4) & 0x0F0F0F0F) | ((val & 0x0F0F0F0F) << 4);
val = ((val >> 8) & 0x00FF00FF) | ((val & 0x00FF00FF) << 8);
val.rotate_left(16)
}
#[inline]
fn reverse_n(value: u32, length: usize) -> u32 {
reverse(value) >> (32 - length)
}
pub trait ReadWord {
fn peek_word(&mut self, bits: u32) -> io::Result<u32>;
fn read_word(&mut self, bits: u32) -> io::Result<u32>;
}
impl<R: io::Read + io::Seek, E: Endianness> ReadWord for BitReader<R, E> {
fn peek_word(&mut self, bits: u32) -> io::Result<u32> {
let start = self.position_in_bits().unwrap();
match self.read_var::<u32>(bits) {
Ok(result) => {
self.seek_bits(io::SeekFrom::Current(-(bits as i64)))?;
Ok(result)
}
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
let end = self.seek_bits(io::SeekFrom::End(0)).unwrap();
self.seek_bits(io::SeekFrom::Start(start)).unwrap();
if start < end {
let available_bits = end - start;
let result = self.read_var::<u32>(available_bits as u32)?
<< (bits as u64 - available_bits);
self.seek_bits(io::SeekFrom::Current(-(available_bits as i64)))?;
return Ok(result);
}
Err(e)
}
Err(e) => Err(e),
}
}
fn read_word(&mut self, bits: u32) -> io::Result<u32> {
let start = self.position_in_bits().unwrap();
match self.read_var::<u32>(bits) {
Ok(result) => Ok(result),
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
let end = self.seek_bits(io::SeekFrom::End(0)).unwrap();
if start < end {
let available_bits = end - start;
self.seek_bits(io::SeekFrom::Start(start)).unwrap();
return Ok(self.read_var::<u32>(available_bits as u32)?
<< (bits as u64 - available_bits));
}
Err(e)
}
Err(e) => Err(e),
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn successful_initialization() {
assert!(
FixedHuffmanDecoder::initialize(
&[
3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
],
8,
true,
)
.is_ok()
)
}
}