#![forbid(unsafe_code)]
#![cfg_attr(not(any(feature = "std", test)), no_std)]
extern crate alloc;
use alloc::vec;
use alloc::vec::Vec;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Error {
TruncatedTable,
BadMatchOffset,
}
impl core::fmt::Display for Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Error::TruncatedTable => f.write_str("xpress-huffman: truncated Huffman table"),
Error::BadMatchOffset => {
f.write_str("xpress-huffman: match offset before output start")
}
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for Error {}
const TABLE_LEN: usize = 256;
const BLOCK_SIZE: usize = 1 << 16;
pub fn decompress(compressed: &[u8], decompressed_size: usize) -> Result<Vec<u8>, Error> {
let mut dst: Vec<u8> = Vec::with_capacity(decompressed_size);
let mut bs = BitStream::new(compressed);
while bs.pos < compressed.len() && dst.len() < decompressed_size {
let table = compressed
.get(bs.pos..bs.pos + TABLE_LEN)
.ok_or(Error::TruncatedTable)?;
let tree = build_tree(table);
bs.pos += TABLE_LEN;
bs.init();
let mut produced_in_block = 0usize;
while produced_in_block < BLOCK_SIZE
&& bs.pos < compressed.len()
&& dst.len() < decompressed_size
{
let symbol = bs.decode(&tree);
if symbol < 256 {
dst.push(symbol as u8);
produced_in_block += 1;
continue;
}
let symbol = symbol - 256;
let offset_bits = u32::from(symbol >> 4);
let offset = (1usize << offset_bits) + bs.lookup(offset_bits) as usize;
let length = bs.match_length(u32::from(symbol & 0x0F));
bs.skip(offset_bits as i32);
if offset == 0 || offset > dst.len() {
return Err(Error::BadMatchOffset);
}
let mut remaining = length as usize;
while remaining > 0 {
let from = dst.len() - offset;
let n = remaining.min(offset);
for k in 0..n {
let b = dst[from + k];
dst.push(b);
}
remaining -= n;
}
produced_in_block += length as usize;
}
}
Ok(dst)
}
#[derive(Clone, Copy)]
struct Node {
children: [usize; 2],
is_leaf: bool,
symbol: u16,
}
const NONE: usize = usize::MAX;
fn build_tree(buf: &[u8]) -> Vec<Node> {
let mut nodes = vec![
Node {
children: [NONE, NONE],
is_leaf: false,
symbol: 0,
};
1024
];
let mut symbols: Vec<(u8, u16)> = Vec::with_capacity(512);
for (i, &c) in buf.iter().enumerate().take(TABLE_LEN) {
symbols.push((c & 0x0F, (i * 2) as u16));
symbols.push((c >> 4, (i * 2 + 1) as u16));
}
symbols.sort_unstable();
let start = symbols.iter().take_while(|(len, _)| *len == 0).count();
let mut mask: u32 = 0;
let mut bits: u32 = 1;
let mut tree_index = 1usize;
for &(length, symbol) in symbols.iter().take(512).skip(start) {
let length = u32::from(length);
{
let node = &mut nodes[tree_index];
node.symbol = symbol;
node.is_leaf = true;
}
mask = mask.wrapping_shl(length.wrapping_sub(bits));
bits = length;
tree_index = add_leaf(&mut nodes, tree_index, mask, bits);
mask = mask.wrapping_add(1);
}
nodes
}
fn add_leaf(nodes: &mut [Node], idx: usize, mask: u32, bits: u32) -> usize {
let mut cur = 0usize;
let mut i = idx + 1;
let mut bits = bits;
while bits > 1 {
bits -= 1;
let childidx = ((mask >> bits) & 1) as usize;
if nodes[cur].children[childidx] == NONE {
nodes[cur].children[childidx] = i;
nodes[i].is_leaf = false;
i += 1;
}
cur = nodes[cur].children[childidx];
}
nodes[cur].children[(mask & 1) as usize] = idx;
i
}
struct BitStream<'a> {
data: &'a [u8],
pos: usize,
mask: u32,
bits: i32,
}
impl<'a> BitStream<'a> {
fn new(data: &'a [u8]) -> Self {
Self {
data,
pos: 0,
mask: 0,
bits: 0,
}
}
fn read16(&mut self) -> u32 {
let avail = self.data.len().saturating_sub(self.pos);
let v = match avail {
0 => 0,
1 => u32::from(self.data[self.pos]) << 8,
_ => u32::from(u16::from_le_bytes([
self.data[self.pos],
self.data[self.pos + 1],
])),
};
self.pos += avail.min(2);
v
}
fn init(&mut self) {
self.mask = (self.read16() << 16).wrapping_add(self.read16());
self.bits = 32;
}
fn lookup(&self, n: u32) -> u32 {
if n == 0 {
0
} else {
self.mask >> (32 - n)
}
}
fn skip(&mut self, n: i32) {
self.mask = self.mask.wrapping_shl(n as u32);
self.bits -= n;
if self.bits < 16 {
self.mask = self.mask.wrapping_add(self.read16() << (16 - self.bits));
self.bits += 16;
}
}
fn read_byte(&mut self) -> u8 {
let b = self.data.get(self.pos).copied().unwrap_or(0);
self.pos += 1;
b
}
fn match_length(&mut self, nibble: u32) -> u32 {
let mut length = nibble;
if length == 15 {
length = u32::from(self.read_byte()) + 15;
if length == 270 {
length = self.read16();
}
}
length + 3
}
fn decode(&mut self, nodes: &[Node]) -> u16 {
let mut node = 0usize;
while !nodes[node].is_leaf {
let bit = self.lookup(1) as usize;
self.skip(1);
let next = nodes[node].children[bit];
if next == NONE {
return 0; }
node = next;
}
nodes[node].symbol
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
const COMPRESSED: &[u8] = include_bytes!("../tests/data/am_delta.xhuff");
const EXPECTED: &[u8] = include_bytes!("../tests/data/am_delta.expected");
const AUDIODG: &[u8] = include_bytes!("../tests/data/audiodg.xhuff");
const AUDIODG_EXPECTED: &[u8] = include_bytes!("../tests/data/audiodg.expected");
#[test]
fn decompresses_real_xpress_huffman_vector() {
let out = decompress(COMPRESSED, EXPECTED.len()).unwrap();
assert_eq!(out.len(), EXPECTED.len());
assert_eq!(out, EXPECTED);
}
#[test]
fn decompresses_larger_real_vector() {
let out = decompress(AUDIODG, AUDIODG_EXPECTED.len()).unwrap();
assert_eq!(out, AUDIODG_EXPECTED);
}
fn table_first_symbol_is_match() -> [u8; TABLE_LEN] {
let mut t = [0u8; TABLE_LEN];
t[128] = 0x01;
t
}
#[test]
fn match_before_any_output_errors() {
let mut input = table_first_symbol_is_match().to_vec();
input.extend_from_slice(&[0, 0, 0, 0, 0, 0]);
assert_eq!(decompress(&input, 100), Err(Error::BadMatchOffset));
}
#[test]
fn handles_init_at_exact_eof() {
let mut input = table_first_symbol_is_match().to_vec();
input.extend_from_slice(&[0, 0]);
assert_eq!(decompress(&input, 100), Ok(Vec::new()));
}
#[test]
fn handles_init_with_one_trailing_byte() {
let mut input = table_first_symbol_is_match().to_vec();
input.extend_from_slice(&[0, 0, 0]);
assert_eq!(decompress(&input, 100), Ok(Vec::new()));
}
#[test]
fn stops_at_requested_size() {
let out = decompress(COMPRESSED, 16).unwrap();
assert_eq!(out.len(), 16);
assert_eq!(out, &EXPECTED[..16]);
}
#[test]
fn empty_input_yields_empty() {
assert_eq!(decompress(&[], 0).unwrap(), Vec::<u8>::new());
assert_eq!(decompress(&[], 100).unwrap(), Vec::<u8>::new());
}
#[test]
fn truncated_table_errors() {
assert_eq!(decompress(&[0u8; 10], 100), Err(Error::TruncatedTable));
}
#[test]
fn error_is_display() {
assert!(!format!("{}", Error::TruncatedTable).is_empty());
assert!(!format!("{}", Error::BadMatchOffset).is_empty());
}
#[test]
fn match_length_short() {
let mut bs = BitStream::new(&[]);
assert_eq!(bs.match_length(7), 10);
}
#[test]
fn match_length_one_byte_extension() {
let mut bs = BitStream::new(&[10]);
assert_eq!(bs.match_length(15), 28);
}
#[test]
fn match_length_max_run_reads_16bit() {
let mut bs = BitStream::new(&[255, 0x34, 0x12]);
assert_eq!(bs.match_length(15), 0x1234 + 3);
}
}