#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CanonicalHuffman<T> {
counts: T,
symbols: T,
}
#[derive(Clone, Debug)]
pub struct Decoder<'a, T> {
codebook: &'a CanonicalHuffman<T>,
code: u32, bits: usize, index: usize, first: u32, }
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum DecodeResult {
Incomplete,
Invalid,
Ok(u8),
}
#[cfg(test)]
impl CanonicalHuffman<Vec<u8>> {
pub fn new_from_packed_lengths(packed: &[u8]) -> Option<Self> {
let mut lengths = [0; 256];
let mut symbol = 0;
for b in packed.iter() {
let len = *b & 0b1111;
let count = (*b >> 4) + 1;
for _ in 0..count {
lengths[symbol] = len;
symbol += 1;
}
}
Self::new_from_lengths(&lengths[..symbol])
}
pub fn new_from_lengths(lengths: &[u8]) -> Option<Self> {
let max_len = (*lengths.iter().max().unwrap_or(&0) + 1) as usize;
let mut counts = vec![0; max_len];
for len in lengths.iter() {
counts[*len as usize] += 1;
}
if counts[0] as usize == lengths.len() {
return Some(CanonicalHuffman {
counts,
symbols: vec![],
});
}
let mut symbols_left = 1;
for len in 1..max_len {
symbols_left <<= 1;
if symbols_left < counts[len] {
return None;
}
symbols_left -= counts[len];
}
let mut offsets = vec![0; max_len];
offsets[1] = 0;
for len in 1..(max_len - 1) {
offsets[len + 1] = offsets[len] + counts[len] as usize;
}
let mut symbols = vec![0; lengths.len()];
for symbol in 0..lengths.len() {
if lengths[symbol] > 0 {
symbols[offsets[lengths[symbol] as usize]] = symbol as u8;
offsets[lengths[symbol] as usize] += 1;
}
}
Some(CanonicalHuffman { counts, symbols })
}
pub fn as_ref(&self) -> CanonicalHuffman<&[u8]> {
CanonicalHuffman {
counts: &self.counts,
symbols: &self.symbols,
}
}
}
impl<'a> CanonicalHuffman<&'a [u8]> {
pub const unsafe fn new(counts: &'a [u8], symbols: &'a [u8]) -> Self {
CanonicalHuffman { counts, symbols }
}
}
impl<T> CanonicalHuffman<T>
where
T: std::convert::AsRef<[u8]>,
{
pub fn decoder(&self) -> Decoder<T> {
Decoder {
codebook: self,
code: 0,
bits: 0,
index: 0,
first: 0,
}
}
}
impl<'a, T> Decoder<'a, T>
where
T: std::convert::AsRef<[u8]>,
{
pub fn feed(&mut self, bit: bool) -> DecodeResult {
self.code |= bit as u32;
self.bits += 1;
if self.bits >= self.codebook.counts.as_ref().len() {
return DecodeResult::Invalid;
}
let count = self.codebook.counts.as_ref()[self.bits] as u32;
if self.code < self.first + count {
let i = self.index + (self.code - self.first) as usize;
DecodeResult::Ok(self.codebook.symbols.as_ref()[i])
} else {
self.index += count as usize;
self.first += count;
self.first <<= 1;
self.code <<= 1;
DecodeResult::Incomplete
}
}
}
#[cfg(test)]
mod tests {
use super::CanonicalHuffman;
use super::DecodeResult;
fn decodeiter<'a, T, I>(
table: &CanonicalHuffman<T>,
bits: I,
) -> Option<u8>
where
T: std::convert::AsRef<[u8]>,
I: IntoIterator<Item = &'a bool>,
{
let mut d = table.decoder();
for b in bits {
match d.feed(*b) {
DecodeResult::Incomplete => continue,
DecodeResult::Invalid => return None,
DecodeResult::Ok(c) => return Some(c),
}
}
None
}
#[test]
fn constructors() {
let a =
CanonicalHuffman::new_from_packed_lengths(&[2, 1, 19]).unwrap();
let b = CanonicalHuffman::new_from_lengths(&[2, 1, 3, 3]).unwrap();
let c =
unsafe { CanonicalHuffman::new(&[0, 1, 1, 2], &[1, 0, 2, 3]) };
assert_eq!(a, b);
assert_eq!(a.as_ref(), c);
assert_eq!(b.as_ref(), c);
}
#[test]
fn oversubscribed() {
let a = CanonicalHuffman::new_from_lengths(&[1, 2, 2, 3]);
assert_eq!(a, None);
}
#[test]
fn decode() {
let a = CanonicalHuffman::new_from_lengths(&[2, 1, 3, 3]).unwrap();
assert_eq!(decodeiter(&a, &[true, false]), Some(0));
assert_eq!(decodeiter(&a, &[false]), Some(1));
assert_eq!(decodeiter(&a, &[true, true, false]), Some(2));
assert_eq!(decodeiter(&a, &[true, true, true]), Some(3));
}
#[test]
fn undersubscribed() {
let a = CanonicalHuffman::new_from_lengths(&[1, 3]).unwrap();
assert_eq!(decodeiter(&a, &[false]), Some(0));
assert_eq!(decodeiter(&a, &[true, false, false]), Some(1));
assert_eq!(decodeiter(&a, &[true, true, true]), None);
assert_eq!(decodeiter(&a, &[true, true]), None);
let mut d = a.decoder();
assert_eq!(d.feed(true), DecodeResult::Incomplete);
assert_eq!(d.feed(true), DecodeResult::Incomplete);
assert_eq!(d.feed(true), DecodeResult::Incomplete);
assert_eq!(d.feed(true), DecodeResult::Invalid);
let mut d = a.decoder();
assert_eq!(d.feed(true), DecodeResult::Incomplete);
assert_eq!(d.feed(false), DecodeResult::Incomplete);
assert_eq!(d.feed(true), DecodeResult::Incomplete);
assert_eq!(d.feed(true), DecodeResult::Invalid);
}
#[test]
fn incomplete() {
let a = CanonicalHuffman::new_from_lengths(&[0, 1, 1]).unwrap();
assert_eq!(decodeiter(&a, &[false]), Some(1));
assert_eq!(decodeiter(&a, &[true]), Some(2));
}
#[test]
fn empty() {
let a = CanonicalHuffman::new_from_lengths(&[]).unwrap();
assert_eq!(a.decoder().feed(false), DecodeResult::Invalid);
assert_eq!(a.decoder().feed(true), DecodeResult::Invalid);
let a = CanonicalHuffman::new_from_lengths(&[0, 0]).unwrap();
assert_eq!(a.decoder().feed(false), DecodeResult::Invalid);
assert_eq!(a.decoder().feed(true), DecodeResult::Invalid);
}
}