use std::collections::BTreeMap;
use bitvec::{slice::BitSlice, store::BitStore, order::Msb0, fields::BitField};
use anyhow::*;
pub struct Codec<'a> {
buffer: &'a BitSlice<Msb0, u8>,
index: usize,
}
impl Codec<'_> {
pub fn new<'a>(input: &'a [u8]) -> Codec<'a> {
Codec { buffer: BitSlice::from_slice(input), index: 0 }
}
pub fn look_bits<T: BitStore>(&self, n: usize) -> T {
assert!(n <= T::BITS as usize);
if n == 0 { return T::FALSE }
if self.index + n > self.buffer.len() {
self.buffer[self.index..].load_be::<T>() << (self.index + n - self.buffer.len()) as u8
} else {
self.buffer[self.index..self.index+n].load_be()
}
}
pub fn read_bits<T: BitStore>(&mut self, n: usize) -> Result<T, Error> {
assert!(n <= T::BITS as usize);
if self.index + n > self.buffer.len() {
bail!("read out of index {} < {}", self.index+n, self.buffer.len());
}
if n == 0 { return Ok(T::FALSE) }
let result = self.buffer[self.index..self.index+n].load_be();
self.index += n;
Ok(result)
}
pub fn skip_bits(&mut self, n: usize) {
self.index += n;
}
pub fn current(&self) -> usize {
self.index
}
pub fn len(&self) -> usize {
self.buffer.len()
}
pub fn is_complete(&self) -> bool {
self.index + 7 >= self.buffer.len() && self.index <= self.buffer.len()
}
pub fn get_huffman(&mut self) -> Result<Huffman<u32>, Error> {
let symbol_count = self.read_bits::<u32>(Huffman::<u32>::MAX_SYMBOL_COUNT_BIT)?;
if symbol_count == 0 {
return Huffman::new(BTreeMap::new())
}
let mut tmp_symbol_depth = BTreeMap::new();
let tmp_symbol_count = self.read_bits::<usize>(5)?;
ensure!(tmp_symbol_count <= Key::SHUFFLE.len(),
"tmp_symbol_count {} > {}", tmp_symbol_count, Key::SHUFFLE.len());
for i in 0..tmp_symbol_count {
let value = self.read_bits::<usize>(3)?;
if value != 0 {
tmp_symbol_depth.insert(Key::SHUFFLE[i], value);
}
}
let key = Huffman::new(tmp_symbol_depth).context("get key huffman")?;
let mut symbol_depth = BTreeMap::new();
let mut i = 0;
let mut last = None;
while i < symbol_count {
let (len, d) = match key.next(self).context("get key content")? {
Depth(d) => (1u32, d),
ShortZero => (self.read_bits::<u32>(3)? + 3, 0),
LongZero => (self.read_bits::<u32>(7)? + 11, 0),
ShortRepeat => (self.read_bits::<u32>(2)? + 3, last.ok_or_else(|| anyhow!("short repeat no last"))?),
LongRepeat => (self.read_bits::<u32>(6)? + 7, last.ok_or_else(|| anyhow!("long repeat no last"))?),
};
last = Some(d);
for j in 0..len as u32 {
if d != 0 {
symbol_depth.insert(i+j, d);
}
}
i += len as u32;
}
Huffman::new(symbol_depth)
}
}
#[test]
fn test_read_bits() {
let input = [0b1100_1010u8, 0b0110_1101, 0b1101_1001];
let mut codec = Codec::new(&input);
assert_eq!(codec.read_bits::<u8>(3).unwrap(), 0b110);
assert_eq!(codec.index, 3);
assert_eq!(codec.read_bits::<u32>(17).unwrap(), 0b1010_0110_1101_1101);
assert_eq!(codec.index, 20);
assert_eq!(codec.read_bits::<u8>(0).unwrap(), 0);
assert_eq!(codec.index, 20);
assert_eq!(Huffman::<()>::MAX_SYMBOL_COUNT, 1 << (Huffman::<()>::MAX_SYMBOL_COUNT_BIT - 1));
}
pub struct Huffman<T> {
depth_count: [usize; Key::MAX_DEPTH+1],
symbol_depth: BTreeMap<T, usize>,
symbols: BTreeMap<T, u32>,
symbol_rev: BTreeMap<(usize, u32), T>,
max_depth: usize,
}
impl<T: std::fmt::Debug> std::fmt::Debug for Huffman<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Huffman")
.field("symbol_count", &self.symbols.len())
.field("max_depth", &self.max_depth)
.field("symbol_depth", &self.symbol_depth)
.field("depth_count", &self.depth_count)
.finish()
}
}
impl<T: Ord+Copy> Huffman<T> {
pub fn new(symbol_depth: BTreeMap<T, usize>) -> Result<Self, Error> {
let mut depth_count = [0; Key::MAX_DEPTH+1];
for &depth in symbol_depth.values() {
depth_count[depth] += 1;
}
let mut max_depth = 0;
let mut depth_bound= [0; Key::MAX_DEPTH+1];
let mut available = 0;
for (depth, &n) in depth_count.iter().enumerate() {
if n != 0 {
max_depth = depth;
}
available <<= 1;
if depth != 0 {
available += n as u32;
}
depth_bound[depth] = available;
}
ensure!(
1<<max_depth == depth_bound[max_depth] || (max_depth <= 1 && depth_bound[max_depth] == max_depth as u32),
"depth_bound error: {:?} {:?}", depth_count, depth_bound);
let mut depth_current = [0; Key::MAX_DEPTH+1];
for i in 1..=Key::MAX_DEPTH {
depth_current[i] = depth_bound[i-1]*2;
}
let symbols: BTreeMap<_, _> = symbol_depth.iter().filter_map(|(&key, &depth)| {
if depth == 0 { return None }
let result = depth_current[depth];
depth_current[depth] += 1;
Some((key, result))
}).collect();
let symbol_rev = symbols.iter().map(|(&k, &v)| ((symbol_depth[&k], v), k)).collect();
Ok(Self {
depth_count, symbol_depth, max_depth,
symbols, symbol_rev,
})
}
}
impl<T: Ord+Copy> Huffman<T> {
pub fn next(&self, codec: &mut Codec<'_>) -> Result<T, Error> {
ensure!(codec.current() < codec.len(), "stream end {} >= {}", codec.current(), codec.len());
let k = codec.look_bits::<u32>(self.max_depth);
for i in 1..=self.max_depth {
let t = k >> (self.max_depth - i) as u8;
if let Some(sym) = self.symbol_rev.get(&(i, t)) {
codec.index += i;
return Ok(*sym)
}
}
bail!("incomplete huffman tree no match");
}
}
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub enum Key {
Depth(usize),
ShortZero , LongZero ,
ShortRepeat , LongRepeat ,
}
use Key::*;
impl Key {
pub const MAX_DEPTH: usize = 16;
pub const SHUFFLE: [Key; Self::MAX_DEPTH+5] = [
ShortZero, LongZero, ShortRepeat, LongRepeat,
Depth(0), Depth(8), Depth(7), Depth(9),
Depth(6), Depth(10), Depth(5), Depth(11),
Depth(4), Depth(12), Depth(3), Depth(13),
Depth(2), Depth(14), Depth(1), Depth(15), Depth(16)];
}
impl<T> Huffman<T> {
pub const MAX_SYMBOL_COUNT: usize = 8192;
pub const MAX_SYMBOL_COUNT_BIT: usize = 14;
}
#[test]
fn test_huffman() {
let input = [0b0100_0000u8];
let mut codec = Codec::new(&input);
let huffman = Huffman::new(BTreeMap::<bool,_>::new()).expect("zero huffman");
assert!(huffman.next(&mut codec).is_err());
let mut codec = Codec::new(&input);
let mut depth = BTreeMap::new();
depth.insert(0xff, 1);
let huffman = Huffman::new(depth).expect("zero huffman");
assert_eq!(huffman.next(&mut codec).unwrap(), 0xff);
assert!(huffman.next(&mut codec).is_err());
let mut codec = Codec::new(&input);
let mut depth = BTreeMap::new();
depth.insert(0x01, 1);
depth.insert(0xff, 1);
let huffman = Huffman::new(depth).expect("zero huffman");
assert_eq!(huffman.next(&mut codec).unwrap(), 0x01);
assert_eq!(huffman.next(&mut codec).unwrap(), 0xff);
}