use crate::alloc::boxed::Box;
use crate::alloc::vec::Vec;
pub struct BitWriter {
buffer: u8,
bits_count: u8,
output: Vec<u8>,
}
impl BitWriter {
pub fn new() -> Self {
Self {
buffer: 0,
bits_count: 0,
output: Vec::new(),
}
}
pub fn write_bit(&mut self, bit: bool) {
if bit {
self.buffer |= 1 << (7 - self.bits_count);
}
self.bits_count += 1;
if self.bits_count == 8 {
self.output.push(self.buffer);
self.buffer = 0;
self.bits_count = 0;
}
}
pub fn write_bits(&mut self, value: u32, count: u8) {
for i in (0..count).rev() {
self.write_bit(((value >> i) & 1) != 0);
}
}
pub fn flush(&mut self) {
if self.bits_count > 0 {
self.output.push(self.buffer);
self.buffer = 0;
self.bits_count = 0;
}
}
pub fn into_vec(mut self) -> Vec<u8> {
self.flush();
self.output
}
}
pub struct BitReader<'a> {
data: &'a [u8],
byte_pos: usize,
bits_count: u8,
}
impl<'a> BitReader<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self {
data,
byte_pos: 0,
bits_count: 0,
}
}
pub fn read_bit(&mut self) -> Option<bool> {
if self.byte_pos >= self.data.len() {
return None;
}
let bit = (self.data[self.byte_pos] >> (7 - self.bits_count)) & 1 != 0;
self.bits_count += 1;
if self.bits_count == 8 {
self.byte_pos += 1;
self.bits_count = 0;
}
Some(bit)
}
pub fn read_bits(&mut self, count: u8) -> Option<u32> {
let mut value = 0u32;
for _ in 0..count {
value = (value << 1) | if self.read_bit()? { 1 } else { 0 };
}
Some(value)
}
}
#[derive(Debug)]
enum HuffmanNode {
Leaf(u8),
Internal(Box<HuffmanNode>, Box<HuffmanNode>),
}
pub struct StaticHuffman;
impl StaticHuffman {
pub fn encode(data: &[u8]) -> Vec<u8> {
if data.is_empty() {
return Vec::new();
}
let mut freqs = [0u32; 256];
for &b in data {
freqs[b as usize] += 1;
}
let root = Self::build_tree(&freqs);
let mut codes = [None; 256];
Self::generate_codes(&root, 0, 0, &mut codes);
let mut writer = BitWriter::new();
for &f in &freqs {
writer.write_bits(f, 32);
}
for &b in data {
let (code, len) = codes[b as usize].unwrap();
writer.write_bits(code, len);
}
writer.into_vec()
}
pub fn decode(compressed: &[u8], original_len: usize) -> Result<Vec<u8>, &'static str> {
if compressed.is_empty() {
return Ok(Vec::new());
}
let mut reader = BitReader::new(compressed);
let mut freqs = [0u32; 256];
for i in 0..256 {
freqs[i] = reader.read_bits(32).ok_or("Failed to read freq table")?;
}
let root = Self::build_tree(&freqs);
let mut decoded = Vec::with_capacity(original_len);
for _ in 0..original_len {
let mut current = &root;
while let HuffmanNode::Internal(left, right) = current {
let bit = reader.read_bit().ok_or("Unexpected end of bitstream")?;
current = if bit { right } else { left };
}
if let HuffmanNode::Leaf(b) = current {
decoded.push(*b);
}
}
Ok(decoded)
}
fn build_tree(freqs: &[u32; 256]) -> HuffmanNode {
use core::cmp::Ordering;
#[derive(Debug)]
struct NodeWrapper {
node: HuffmanNode,
freq: u32,
}
impl PartialEq for NodeWrapper {
fn eq(&self, other: &Self) -> bool {
self.freq == other.freq && self.node_min_symbol() == other.node_min_symbol()
}
}
impl Eq for NodeWrapper {}
impl PartialOrd for NodeWrapper {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for NodeWrapper {
fn cmp(&self, other: &Self) -> Ordering {
match other.freq.cmp(&self.freq) {
Ordering::Equal => other.node_min_symbol().cmp(&self.node_min_symbol()),
ord => ord,
}
}
}
impl NodeWrapper {
fn node_min_symbol(&self) -> u8 {
match &self.node {
HuffmanNode::Leaf(s) => *s,
HuffmanNode::Internal(l, r) => {
fn get_min(n: &HuffmanNode) -> u8 {
match n {
HuffmanNode::Leaf(s) => *s,
HuffmanNode::Internal(l, r) => {
core::cmp::min(get_min(l), get_min(r))
}
}
}
core::cmp::min(get_min(l), get_min(r))
}
}
}
}
let mut heap = Vec::new();
for (i, &f) in freqs.iter().enumerate() {
if f > 0 {
heap.push(NodeWrapper {
node: HuffmanNode::Leaf(i as u8),
freq: f,
});
}
}
if heap.is_empty() {
return HuffmanNode::Leaf(0);
}
heap.sort_unstable();
while heap.len() > 1 {
let left = heap.pop().unwrap();
let right = heap.pop().unwrap();
let combined_freq = left.freq + right.freq;
let new_node = NodeWrapper {
node: HuffmanNode::Internal(Box::new(left.node), Box::new(right.node)),
freq: combined_freq,
};
let pos = heap.binary_search(&new_node).unwrap_or_else(|e| e);
heap.insert(pos, new_node);
}
heap.pop().unwrap().node
}
fn generate_codes(
node: &HuffmanNode,
code: u32,
len: u8,
codes: &mut [Option<(u32, u8)>; 256],
) {
match node {
HuffmanNode::Leaf(b) => {
codes[*b as usize] = Some((code, len.max(1))); }
HuffmanNode::Internal(left, right) => {
Self::generate_codes(left, code << 1, len + 1, codes);
Self::generate_codes(right, (code << 1) | 1, len + 1, codes);
}
}
}
}
const MAX_SYMBOLS: usize = 256;
const NYT: usize = 256; const MAX_NODES: usize = 513;
#[derive(Clone, Debug)]
struct Node {
parent: isize,
left: isize,
right: isize,
weight: u32,
symbol: usize,
}
pub struct AdaptiveHuffman;
impl AdaptiveHuffman {
pub fn encode(data: &[u8]) -> Vec<u8> {
let mut writer = BitWriter::new();
let mut tree = AdaptiveTree::new();
for &b in data {
let symbol = b as usize;
tree.encode_symbol(symbol, &mut writer);
tree.update(symbol);
}
writer.into_vec()
}
pub fn decode(compressed: &[u8], original_len: usize) -> Result<Vec<u8>, &'static str> {
let mut reader = BitReader::new(compressed);
let mut tree = AdaptiveTree::new();
let mut decoded = Vec::with_capacity(original_len);
for _ in 0..original_len {
let symbol = tree.decode_symbol(&mut reader)?;
decoded.push(symbol as u8);
tree.update(symbol);
}
Ok(decoded)
}
}
struct AdaptiveTree {
nodes: Vec<Node>,
symbol_to_node: [isize; MAX_SYMBOLS + 1],
nyt_node: isize,
}
impl AdaptiveTree {
fn new() -> Self {
let mut nodes = Vec::with_capacity(MAX_NODES);
nodes.push(Node {
parent: -1,
left: -1,
right: -1,
weight: 0,
symbol: NYT,
});
let mut symbol_to_node = [-1isize; MAX_SYMBOLS + 1];
symbol_to_node[NYT] = 0;
Self {
nodes,
symbol_to_node,
nyt_node: 0,
}
}
fn encode_symbol(&self, symbol: usize, writer: &mut BitWriter) {
let node_idx = self.symbol_to_node[symbol];
if node_idx != -1 {
self.write_code(node_idx, writer);
} else {
self.write_code(self.nyt_node, writer);
writer.write_bits(symbol as u32, 8);
}
}
fn decode_symbol(&self, reader: &mut BitReader) -> Result<usize, &'static str> {
if self.nodes.is_empty() {
return Err("Empty tree");
}
let mut root = 0;
for (i, node) in self.nodes.iter().enumerate() {
if node.parent == -1 {
root = i as isize;
break;
}
}
let mut curr = root;
while self.nodes[curr as usize].left != -1 {
let bit = reader.read_bit().ok_or("Bitstream ended early")?;
curr = if bit {
self.nodes[curr as usize].right
} else {
self.nodes[curr as usize].left
};
}
let symbol = self.nodes[curr as usize].symbol;
if symbol == NYT {
let raw = reader
.read_bits(8)
.ok_or("Failed to read raw byte after NYT")?;
Ok(raw as usize)
} else {
Ok(symbol)
}
}
fn write_code(&self, mut node_idx: isize, writer: &mut BitWriter) {
let mut bits = Vec::new();
while self.nodes[node_idx as usize].parent != -1 {
let parent_idx = self.nodes[node_idx as usize].parent;
bits.push(self.nodes[parent_idx as usize].right == node_idx);
node_idx = parent_idx;
}
for &bit in bits.iter().rev() {
writer.write_bit(bit);
}
}
fn update(&mut self, symbol: usize) {
let mut curr_idx = self.symbol_to_node[symbol];
if curr_idx == -1 {
let old_nyt = self.nyt_node;
let new_nyt_idx = self.nodes.len() as isize;
self.nodes.push(Node {
parent: old_nyt,
left: -1,
right: -1,
weight: 0,
symbol: NYT,
});
let symbol_node_idx = self.nodes.len() as isize;
self.nodes.push(Node {
parent: old_nyt,
left: -1,
right: -1,
weight: 0,
symbol,
});
self.nodes[old_nyt as usize].left = new_nyt_idx;
self.nodes[old_nyt as usize].right = symbol_node_idx;
self.nodes[old_nyt as usize].symbol = 0;
self.symbol_to_node[symbol] = symbol_node_idx;
self.symbol_to_node[NYT] = new_nyt_idx;
self.nyt_node = new_nyt_idx;
curr_idx = symbol_node_idx;
}
while curr_idx != -1 {
let mut highest_idx = curr_idx;
let target_weight = self.nodes[curr_idx as usize].weight;
for i in (curr_idx as usize + 1)..self.nodes.len() {
if self.nodes[i].weight == target_weight {
highest_idx = i as isize;
}
}
if highest_idx != curr_idx && highest_idx != self.nodes[curr_idx as usize].parent {
self.swap_nodes(curr_idx, highest_idx);
curr_idx = highest_idx;
}
self.nodes[curr_idx as usize].weight += 1;
curr_idx = self.nodes[curr_idx as usize].parent;
}
}
fn swap_nodes(&mut self, i: isize, j: isize) {
let a = i as usize;
let b = j as usize;
if a == b {
return;
}
let p_a = self.nodes[a].parent;
let p_b = self.nodes[b].parent;
if p_a != -1 {
if self.nodes[p_a as usize].left == i {
self.nodes[p_a as usize].left = j;
} else {
self.nodes[p_a as usize].right = j;
}
}
if p_b != -1 {
if self.nodes[p_b as usize].left == j {
self.nodes[p_b as usize].left = i;
} else {
self.nodes[p_b as usize].right = i;
}
}
self.nodes.swap(a, b);
let fix_children = |tree: &mut AdaptiveTree, idx: usize| {
let left = tree.nodes[idx].left;
let right = tree.nodes[idx].right;
if left != -1 {
tree.nodes[left as usize].parent = idx as isize;
tree.nodes[right as usize].parent = idx as isize;
} else {
tree.symbol_to_node[tree.nodes[idx].symbol] = idx as isize;
if tree.nodes[idx].symbol == NYT {
tree.nyt_node = idx as isize;
}
}
};
fix_children(self, a);
fix_children(self, b);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_static_huffman_roundtrip() {
let data = b"Huffman 1952: Minimum redundancy codes. This is a lossless test.";
let compressed = StaticHuffman::encode(data);
let decompressed = StaticHuffman::decode(&compressed, data.len()).unwrap();
assert_eq!(data.as_slice(), decompressed.as_slice());
}
#[test]
fn test_static_huffman_ratio() {
let data = b"aaaaabbbbbcccccddddd"; let compressed = StaticHuffman::encode(data);
assert!(compressed.len() > 0);
}
#[test]
fn test_adaptive_huffman_roundtrip() {
let data = b"Adaptive Huffman FGK Algorithm Test. aaaaaaaaaaa bbbbbbbbbbb";
let compressed = AdaptiveHuffman::encode(data);
let decompressed = AdaptiveHuffman::decode(&compressed, data.len()).unwrap();
assert_eq!(data.as_slice(), decompressed.as_slice());
}
}