#![forbid(unsafe_code)]
#![deny(missing_docs)]
#![deny(rustdoc::missing_doc_code_examples)]
use std::collections::BinaryHeap;
use bit_vec::BitVec;
pub const MAX_SYMBOLS: usize = u8::MAX as usize + 1;
const TREE_SIZE: usize = MAX_SYMBOLS * 2 - 1;
#[derive(Debug, Clone, Copy)]
struct Node {
pub index: usize,
pub count: usize,
pub parent: Option<usize>,
pub left: Option<usize>,
pub right: Option<usize>,
}
impl Node {
const fn new(index: usize, count: usize) -> Self {
Self {
index,
count,
parent: None,
left: None,
right: None,
}
}
}
impl PartialEq for Node {
fn eq(&self, other: &Self) -> bool {
other.count.eq(&self.count)
}
}
impl Eq for Node {}
impl PartialOrd for Node {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(other.count.cmp(&self.count))
}
}
impl Ord for Node {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
other.count.cmp(&self.count)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Huffman {
tree: [Node; TREE_SIZE],
root_index: usize,
}
impl Huffman {
pub fn new(frequency_table: &[usize; MAX_SYMBOLS]) -> Self {
let mut tree = std::array::from_fn(|i| Node::new(i, 0));
let root_index = Self::build_tree(&mut tree, frequency_table);
Self { tree, root_index }
}
pub fn new_from_data(data: &[u8]) -> Self {
let mut table = [0; MAX_SYMBOLS];
Self::calculate_freq_table(&mut table, data);
Self::new(&table)
}
pub fn calculate_freq_table(table: &mut [usize; MAX_SYMBOLS], data: &[u8]) {
table.fill(0);
for i in data {
table[*i as usize] += 1;
}
}
fn build_tree(tree: &mut [Node; TREE_SIZE], table: &[usize; MAX_SYMBOLS]) -> usize {
let mut priority_queue: BinaryHeap<Node> = table
.iter()
.enumerate()
.map(|(index, v)| Node::new(index, *v))
.collect();
let mut tree_index = 256;
while priority_queue.len() > 1 {
let node1 = priority_queue.pop().unwrap();
let node2 = priority_queue.pop().unwrap();
tree[node1.index] = node1;
tree[node2.index] = node2;
tree[node1.index].parent = Some(tree_index);
tree[node2.index].parent = Some(tree_index);
let parent = Node {
count: node1.count + node2.count,
left: Some(node1.index),
right: Some(node2.index),
parent: None,
index: tree_index,
};
tree[tree_index] = parent;
tree_index += 1;
priority_queue.push(parent);
}
priority_queue.pop().unwrap().index
}
fn traverse(&self, bits: &mut BitVec, index: usize, child_index: Option<usize>) {
if let Some(parent) = self.tree[index].parent {
self.traverse(bits, parent, Some(index));
}
if let Some(child_index) = child_index {
if Some(child_index) == self.tree[index].left {
bits.push(true);
} else if Some(child_index) == self.tree[index].right {
bits.push(false);
}
}
}
pub fn compress(&self, data: &[u8]) -> Vec<u8> {
if data.is_empty() {
return Vec::new();
}
let mut bits = BitVec::new();
for b in data.iter() {
self.traverse(&mut bits, *b as usize, None)
}
bits.to_bytes()
}
pub fn decompress(&self, data: &[u8]) -> Vec<u8> {
if data.is_empty() {
return Vec::new();
}
let bits = BitVec::from_bytes(data);
let mut decompressed = Vec::with_capacity(bits.len() * 2);
let byte_count = self.tree[self.root_index].count;
let mut bits_iter = bits.iter();
for _ in 0..byte_count {
let mut index = self.root_index;
while self.tree[index].left.is_some() || self.tree[index].right.is_some() {
let bit = bits_iter.next().expect("missing data");
if bit {
index = self.tree[index].left.expect("should have left index");
} else {
index = self.tree[index].right.expect("should have right index")
}
}
decompressed.push(index as u8);
}
decompressed
}
}
#[cfg(test)]
mod tests {
use super::*;
use proptest::prelude::*;
#[test]
fn compress_decompress() {
let payload = b"so much words wow many compression";
let huffman = Huffman::new_from_data(payload);
let compressed = huffman.compress(payload);
let decompressed = huffman.decompress(&compressed);
assert!(compressed.len() < payload.len());
assert_eq!(&payload[..], decompressed)
}
#[test]
fn compress_decompress_lorem_ipsum() {
let payload = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
let huffman = Huffman::new_from_data(payload);
let compressed = huffman.compress(payload);
let decompressed = huffman.decompress(&compressed);
assert!(compressed.len() < payload.len());
assert_eq!(&payload[..], decompressed)
}
#[test]
fn create_freq_table() {
let mut table = [0; MAX_SYMBOLS];
Huffman::calculate_freq_table(&mut table, &[0u8]);
assert_eq!(table[0], 1);
}
#[test]
fn payload_size_0() {
let payload = &[];
let huffman = Huffman::new_from_data(payload);
let compressed = huffman.compress(payload);
let decompressed = huffman.decompress(&compressed);
assert_eq!(&payload[..], decompressed)
}
#[test]
fn payload_size_1() {
let payload = &[0u8];
let huffman = Huffman::new_from_data(payload);
let compressed = huffman.compress(payload);
let decompressed = huffman.decompress(&compressed);
assert_eq!(&payload[..], decompressed)
}
proptest! {
#[test]
fn proptest_compress_decompress(data: Vec<u8>) {
let huffman = Huffman::new_from_data(&data);
let compressed = huffman.compress(&data);
let decompressed = huffman.decompress(&compressed);
prop_assert!(compressed.len() <= data.len());
prop_assert_eq!(data, decompressed);
}
#[test]
fn proptest_freq_table(data: Vec<u8>) {
let mut table = [0; MAX_SYMBOLS];
Huffman::calculate_freq_table(&mut table, &data);
for b in data {
prop_assert!(table.get(b as usize).is_some());
prop_assert!(table[b as usize] > 0);
}
}
}
}