#![doc(html_root_url = "https://docs.rs/huffman-compress/0.5.0")]
#![deny(missing_docs)]
#![deny(warnings)]
#![deny(missing_debug_implementations)]
extern crate bit_vec;
extern crate num_traits;
#[cfg(test)]
#[macro_use]
extern crate quickcheck;
use std::borrow::Borrow;
use std::cmp;
use std::cmp::Reverse;
use std::collections::{btree_map, BTreeMap, BinaryHeap};
use std::error::Error;
use std::fmt;
use std::iter::{FromIterator, Take};
use bit_vec::BitVec;
use num_traits::ops::saturating::Saturating;
#[derive(Debug, Clone)]
pub struct Tree<K> {
root: usize,
arena: Vec<Node<K>>,
}
#[derive(Debug, Clone)]
struct Node<K> {
parent: Option<usize>,
data: NodeData<K>,
}
#[derive(Debug, Clone)]
enum NodeData<K> {
Leaf { symbol: K },
Branch { left: usize, right: usize },
}
impl<K: Clone> Tree<K> {
pub fn unbounded_decoder<I>(&self, iterable: I) -> UnboundedDecoder<K, I>
where
I: IntoIterator<Item = bool>,
{
UnboundedDecoder {
tree: self,
iter: iterable.into_iter(),
}
}
pub fn decoder<I>(&self, iterable: I, num_symbols: usize) -> Decoder<K, I>
where
I: IntoIterator<Item = bool>,
{
self.unbounded_decoder(iterable).take(num_symbols)
}
}
pub type Decoder<'a, K, I> = Take<UnboundedDecoder<'a, K, I>>;
#[derive(Debug)]
pub struct UnboundedDecoder<'a, K: 'a, I: IntoIterator<Item = bool>> {
tree: &'a Tree<K>,
iter: I::IntoIter,
}
impl<'a, K: Clone, I: IntoIterator<Item = bool>> Iterator for UnboundedDecoder<'a, K, I> {
type Item = K;
fn next(&mut self) -> Option<K> {
let mut node = match self.tree.arena.get(self.tree.root) {
Some(root) => root,
None => return None,
};
loop {
match node.data {
NodeData::Leaf { ref symbol } => return Some(symbol.clone()),
NodeData::Branch { left, right } => {
node = match self.iter.next() {
Some(true) => &self.tree.arena[left],
Some(false) => &self.tree.arena[right],
None => return None,
};
}
}
}
}
}
#[derive(Clone, Debug)]
pub struct Book<K> {
book: BTreeMap<K, BitVec>,
}
impl<K: Ord + Clone> Book<K> {
pub fn into_inner(self) -> BTreeMap<K, BitVec> {
self.book
}
pub fn symbols(&self) -> btree_map::Keys<K, BitVec> {
self.book.keys()
}
pub fn iter(&self) -> btree_map::Iter<K, BitVec> {
self.book.iter()
}
pub fn len(&self) -> usize {
self.book.len()
}
pub fn is_empty(&self) -> bool {
self.book.is_empty()
}
pub fn get<Q: ?Sized>(&self, k: &Q) -> Option<&BitVec>
where
K: Borrow<Q>,
Q: Ord,
{
self.book.get(k)
}
pub fn contains_symbol<Q: ?Sized>(&self, k: &Q) -> bool
where
K: Borrow<Q>,
Q: Ord,
{
self.book.contains_key(k)
}
pub fn encode<Q: ?Sized>(&self, buffer: &mut BitVec, k: &Q) -> Result<(), EncodeError>
where
K: Borrow<Q>,
Q: Ord,
{
match self.book.get(k) {
Some(code) => buffer.extend(code),
None => return Err(EncodeError {}),
}
Ok(())
}
fn new() -> Book<K> {
Book {
book: BTreeMap::new(),
}
}
fn build(&mut self, arena: &[Node<K>], node: &Node<K>, word: BitVec) {
match node.data {
NodeData::Leaf { ref symbol } => {
self.book.insert(symbol.clone(), word);
}
NodeData::Branch { left, right } => {
let mut left_word = word.clone();
left_word.push(true);
self.build(arena, &arena[left], left_word);
let mut right_word = word;
right_word.push(false);
self.build(arena, &arena[right], right_word);
}
}
}
}
#[derive(Debug, Clone)]
pub struct EncodeError;
impl fmt::Display for EncodeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.description().fmt(f)
}
}
impl Error for EncodeError {
fn description(&self) -> &str {
"encode error: tried to encode an unknown symbol"
}
}
#[derive(Debug, Clone)]
pub struct CodeBuilder<K: Ord + Clone, W: Saturating + Ord> {
heap: BinaryHeap<HeapData<K, W>>,
arena: Vec<Node<K>>,
}
impl<K: Ord + Clone, W: Saturating + Ord> CodeBuilder<K, W> {
pub fn new() -> CodeBuilder<K, W> {
CodeBuilder {
heap: BinaryHeap::new(),
arena: Vec::new(),
}
}
pub fn with_capacity(capacity: usize) -> CodeBuilder<K, W> {
CodeBuilder {
heap: BinaryHeap::with_capacity(capacity),
arena: Vec::with_capacity(2 * capacity),
}
}
pub fn push(&mut self, symbol: K, weight: W) {
self.heap.push(HeapData {
weight: Reverse(weight),
symbol: symbol.clone(),
id: self.arena.len(),
});
self.arena.push(Node {
parent: None,
data: NodeData::Leaf { symbol },
});
}
pub fn finish(mut self) -> (Book<K>, Tree<K>) {
let mut book = Book::new();
let root = loop {
let left = match self.heap.pop() {
Some(left) => left,
None => return (book, Tree { root: 0, arena: self.arena }),
};
let right = match self.heap.pop() {
Some(right) => right,
None => break left,
};
let id = self.arena.len();
self.arena[left.id].parent = Some(id);
self.arena[right.id].parent = Some(id);
self.heap.push(HeapData {
weight: Reverse(left.weight.0.saturating_add(right.weight.0)),
symbol: cmp::min(left.symbol, right.symbol),
id,
});
self.arena.push(Node {
parent: None,
data: NodeData::Branch {
left: left.id,
right: right.id,
},
});
};
book.build(&self.arena, &self.arena[root.id], BitVec::new());
(book, Tree { root: root.id, arena: self.arena })
}
}
impl<K: Ord + Clone, W: Saturating + Ord> Default for CodeBuilder<K, W> {
fn default() -> CodeBuilder<K, W> {
CodeBuilder::new()
}
}
impl<K: Ord + Clone, W: Saturating + Ord> FromIterator<(K, W)> for CodeBuilder<K, W> {
fn from_iter<T>(weights: T) -> CodeBuilder<K, W>
where
T: IntoIterator<Item = (K, W)>,
{
let iter = weights.into_iter();
let (size_hint, _) = iter.size_hint();
let mut code = CodeBuilder::with_capacity(size_hint);
code.extend(iter);
code
}
}
impl<K: Ord + Clone, W: Saturating + Ord> Extend<(K, W)> for CodeBuilder<K, W> {
fn extend<T>(&mut self, weights: T)
where
T: IntoIterator<Item = (K, W)>,
{
for (symbol, weight) in weights {
self.push(symbol, weight);
}
}
}
impl<'a, K: Ord + Clone, W: Saturating + Ord + Clone> FromIterator<(&'a K, &'a W)> for CodeBuilder<K, W> {
fn from_iter<T>(weights: T) -> CodeBuilder<K, W>
where
T: IntoIterator<Item = (&'a K, &'a W)>,
{
CodeBuilder::from_iter(weights.into_iter().map(|(k, v)| (k.clone(), v.clone())))
}
}
impl<'a, K: Ord + Clone, W: Saturating + Ord + Clone> Extend<(&'a K, &'a W)> for CodeBuilder<K, W> {
fn extend<T>(&mut self, weights: T)
where
T: IntoIterator<Item = (&'a K, &'a W)>,
{
self.extend(weights.into_iter().map(|(k, v)| (k.clone(), v.clone())));
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug)]
struct HeapData<K, W> {
weight: Reverse<W>,
symbol: K,
id: usize,
}
impl<K: Clone, W: Clone> Clone for HeapData<K, W> {
fn clone(&self) -> HeapData<K, W> {
HeapData {
weight: Reverse(self.weight.0.clone()),
symbol: self.symbol.clone(),
id: self.id,
}
}
}
pub fn codebook<'a, I, K, W>(weights: I) -> (Book<K>, Tree<K>)
where
I: IntoIterator<Item = (&'a K, &'a W)>,
K: 'a + Ord + Clone,
W: 'a + Saturating + Ord + Clone,
{
CodeBuilder::from_iter(weights).finish()
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;
#[test]
fn test_uniform() {
let mut sample = HashMap::new();
sample.insert(1, 1);
sample.insert(2, 1);
sample.insert(3, 1);
sample.insert(4, 1);
sample.insert(5, 1);
let (book, tree) = CodeBuilder::from_iter(sample).finish();
let mut buffer = BitVec::new();
book.encode(&mut buffer, &1).unwrap();
book.encode(&mut buffer, &2).unwrap();
book.encode(&mut buffer, &3).unwrap();
book.encode(&mut buffer, &4).unwrap();
book.encode(&mut buffer, &5).unwrap();
let mut decoder = tree.unbounded_decoder(buffer);
assert_eq!(decoder.next(), Some(1));
assert_eq!(decoder.next(), Some(2));
assert_eq!(decoder.next(), Some(3));
assert_eq!(decoder.next(), Some(4));
assert_eq!(decoder.next(), Some(5));
assert_eq!(decoder.next(), None);
}
#[test]
fn test_uniform_from_static() {
const WEIGHTS: &[(&char, &usize)] = &[
(&'a', &1),
(&'b', &1),
(&'c', &1),
(&'d', &1),
];
let (book, tree) = codebook(WEIGHTS.iter().cloned());
let mut buffer = BitVec::new();
book.encode(&mut buffer, &'a').unwrap();
book.encode(&mut buffer, &'b').unwrap();
book.encode(&mut buffer, &'c').unwrap();
book.encode(&mut buffer, &'d').unwrap();
let mut decoder = tree.unbounded_decoder(buffer);
assert_eq!(decoder.next(), Some('a'));
assert_eq!(decoder.next(), Some('b'));
assert_eq!(decoder.next(), Some('c'));
assert_eq!(decoder.next(), Some('d'));
assert_eq!(decoder.next(), None);
}
#[test]
fn test_empty() {
let (book, tree) = CodeBuilder::<&str, i32>::new().finish();
let mut buffer = BitVec::new();
assert!(book.encode(&mut buffer, "hello").is_err());
let mut decoder = tree.unbounded_decoder(buffer);
assert_eq!(decoder.next(), None);
}
#[test]
fn test_single() {
let mut builder = CodeBuilder::new();
builder.push("hello", 1);
let (book, tree) = builder.finish();
let mut buffer = BitVec::new();
book.encode(&mut buffer, "hello").unwrap();
let mut decoder = tree.unbounded_decoder(buffer);
assert_eq!(decoder.next(), Some("hello"));
assert_eq!(decoder.next(), Some("hello"));
}
quickcheck! {
fn efficient_order(ag: u32, at: u32, cg: u32, ct: u32, tg: u32) -> bool {
let mut builder = CodeBuilder::new();
builder.push("CG", cg);
builder.push("AG", ag);
builder.push("AT", at);
builder.push("CT", ct);
builder.push("TG", tg);
let (book, _) = builder.finish();
let len = |symbol| {
book.get(symbol).map_or(0, |code| code.len())
};
at >= ct || len("CT") <= len("AT")
}
fn encode_decode_bytes(symbols: Vec<u8>) -> bool {
let mut counts = [0; 256];
for symbol in &symbols {
counts[usize::from(*symbol)] += 1;
}
let (book, tree) = counts.iter()
.enumerate()
.map(|(k, v)| (k as u8, *v))
.collect::<CodeBuilder<_, _>>()
.finish();
let mut buffer = BitVec::new();
for symbol in &symbols {
book.encode(&mut buffer, symbol).unwrap();
}
tree.unbounded_decoder(&buffer).eq(symbols)
}
}
}