use zipora::{
BlobStore, DictionaryBuilder, DictionaryCompressor, EntropyStats, HuffmanBlobStore,
HuffmanEncoder, HuffmanTree, MemoryBlobStore, RansDecoder, Rans64Encoder,
entropy::{ParallelX1},
};
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("ðĒ Entropy Coding Demo for zipora");
println!("=====================================\n");
println!("ð PART 1: Entropy Analysis");
println!("---------------------------");
let sample_data = b"hello world! this is a sample text for entropy analysis. hello world!";
let entropy = EntropyStats::calculate_entropy(sample_data);
println!("Sample text: \"{}\"", String::from_utf8_lossy(sample_data));
println!("Text length: {} bytes", sample_data.len());
println!("Calculated entropy: {:.3} bits per symbol", entropy);
println!(
"Theoretical compression limit: {:.1}%",
(1.0 - entropy / 8.0) * 100.0
);
println!();
println!("ðģ PART 2: Huffman Coding");
println!("-------------------------");
match HuffmanTree::from_data(sample_data) {
Ok(tree) => {
println!("â
Built Huffman tree successfully");
println!(" Maximum code length: {} bits", tree.max_code_length());
for &byte in b"hello " {
if let Some(code) = tree.get_code(byte) {
let code_str: String =
code.iter().map(|&b| if b { '1' } else { '0' }).collect();
println!(" '{}' -> {}", byte as char, code_str);
}
}
}
Err(e) => {
println!("â Huffman tree construction failed: {}", e);
}
}
match HuffmanEncoder::new(sample_data) {
Ok(encoder) => {
let ratio = encoder.estimate_compression_ratio(sample_data);
println!(" Estimated compression ratio: {:.3}", ratio);
println!(" Estimated space savings: {:.1}%", (1.0 - ratio) * 100.0);
}
Err(e) => {
println!("â Huffman encoder creation failed: {}", e);
}
}
println!();
println!("ð PART 3: rANS (Range Asymmetric Numeral Systems)");
println!("--------------------------------------------------");
let mut frequencies = [0u32; 256];
for &byte in sample_data {
frequencies[byte as usize] += 1;
}
match Rans64Encoder::<ParallelX1>::new(&frequencies) {
Ok(encoder) => {
println!("â
Created rANS encoder successfully");
println!(" Total frequency: {}", encoder.total_freq());
for &byte in b"hello" {
let symbol = encoder.get_symbol(byte);
println!(
" '{}' -> start: {}, freq: {}",
byte as char, symbol.start, symbol.freq
);
}
let decoder = RansDecoder::new(&encoder);
println!(" Created corresponding rANS decoder");
}
Err(e) => {
println!("â rANS encoder creation failed: {}", e);
}
}
println!();
println!("ð PART 4: Dictionary-Based Compression");
println!("---------------------------------------");
let dict_data = b"the quick brown fox jumps over the lazy dog. the quick brown fox.";
let builder = DictionaryBuilder::new()
.min_match_length(3)
.max_match_length(20)
.max_entries(100);
let dictionary = builder.build(dict_data);
println!("â
Built dictionary with {} entries", dictionary.len());
if !dictionary.is_empty() {
let compressor = DictionaryCompressor::new(dictionary);
let ratio = compressor.estimate_compression_ratio(dict_data);
println!(" Estimated compression ratio: {:.3}", ratio);
if ratio < 1.0 {
println!(" Estimated space savings: {:.1}%", (1.0 - ratio) * 100.0);
} else {
println!(" No compression benefit expected (ratio >= 1.0)");
}
}
println!();
println!("ðū PART 5: Entropy Blob Store Integration");
println!("-----------------------------------------");
let inner_store = MemoryBlobStore::new();
let mut huffman_store = HuffmanBlobStore::new(inner_store);
huffman_store.add_training_data(sample_data);
match huffman_store.build_tree() {
Ok(()) => {
println!("â
Built Huffman tree for blob store");
let test_data = b"this is test data for the huffman blob store";
match huffman_store.put(test_data) {
Ok(id) => {
println!(" Stored data with ID: {}", id);
println!(" Store contains {} items", huffman_store.len());
let stats = huffman_store.compression_stats();
println!(" Compressions performed: {}", stats.compressions);
if stats.compressions > 0 {
println!(
" Average compression time: {:.1} Ξs",
stats.avg_compression_time_us()
);
}
}
Err(e) => {
println!("â Failed to store data: {}", e);
}
}
}
Err(e) => {
println!("â Failed to build Huffman tree: {}", e);
}
}
println!();
println!("⥠PART 6: Performance Analysis");
println!("------------------------------");
println!("Entropy coding algorithms comparison:");
println!("âĒ Huffman Coding:");
println!(" - Optimal for known symbol probabilities");
println!(" - Prefix-free codes, good compression ratio");
println!(" - Fast decoding, moderate encoding speed");
println!();
println!("âĒ rANS (range Asymmetric Numeral Systems):");
println!(" - Near-optimal compression (close to entropy limit)");
println!(" - Better than Huffman for most data types");
println!(" - More complex implementation");
println!();
println!("âĒ Dictionary Compression:");
println!(" - Excellent for data with repeated patterns");
println!(" - LZ-style compression, finds substring matches");
println!(" - Good for text and structured data");
println!();
println!("ð Compression effectiveness depends on data characteristics:");
println!("âĒ High entropy (random) data: Limited compression possible");
println!("âĒ Biased distributions: Huffman/rANS work well");
println!("âĒ Repeated patterns: Dictionary compression excels");
println!("âĒ Mixed data: Combination approaches often best");
println!();
println!("â
Entropy coding demonstration completed successfully!");
Ok(())
}