pub mod bit_ops;
pub mod context;
pub mod dictionary;
pub mod fse;
pub mod huffman;
pub mod parallel;
pub mod rans;
pub mod simd_huffman;
pub use bit_ops::{BitOps, BitOpsConfig, EntropyBitOps, BitOpsStats};
pub use context::{EntropyContext, EntropyContextConfig, ContextBuffer, EntropyResult, ContextStats};
pub use dictionary::{DictionaryBuilder, DictionaryCompressor, OptimizedDictionaryCompressor};
pub use fse::{
FseEncoder, FseDecoder, FseConfig, FseTable,
fse_compress, fse_decompress, fse_zip, fse_unzip,
fse_compress_with_config, fse_decompress_with_config,
HardwareCapabilities, FastDivision, EntropyNormalizer
};
pub type EnhancedFseEncoder = FseEncoder;
pub type EnhancedFseConfig = FseConfig;
pub use huffman::{
HuffmanDecoder, HuffmanEncoder, HuffmanTree,
ContextualHuffmanEncoder, ContextualHuffmanDecoder, HuffmanOrder
};
pub use simd_huffman::{
SimdHuffmanEncoder, SimdHuffmanConfig, HuffmanSimdTier
};
pub use rans::{
Rans64Decoder as RansDecoder, Rans64Encoder, Rans64State as RansState,
Rans64Symbol as RansSymbol, AdaptiveRans64Encoder as AdaptiveRansEncoder,
ParallelX1, ParallelX2, ParallelX4, ParallelX8
};
pub use parallel::{
ParallelVariant, ParallelX2Variant, ParallelX4Variant, ParallelX8Variant,
ParallelConfig, ParallelHuffmanEncoder, ParallelHuffmanDecoder,
AdaptiveParallelEncoder, ParallelBenchmark, BenchmarkResult
};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum EntropyAlgorithm {
Huffman,
Rans,
#[cfg(feature = "zstd")]
Fse,
#[cfg(feature = "zstd")]
KFse,
Dictionary,
Auto,
}
impl Default for EntropyAlgorithm {
fn default() -> Self {
Self::Auto
}
}
impl EntropyAlgorithm {
pub fn name(self) -> &'static str {
match self {
EntropyAlgorithm::Huffman => "Huffman",
EntropyAlgorithm::Rans => "rANS",
#[cfg(feature = "zstd")]
EntropyAlgorithm::Fse => "FSE",
#[cfg(feature = "zstd")]
EntropyAlgorithm::KFse => "kFSE",
EntropyAlgorithm::Dictionary => "Dictionary",
EntropyAlgorithm::Auto => "Auto",
}
}
pub fn is_available(self) -> bool {
match self {
EntropyAlgorithm::Huffman => true,
EntropyAlgorithm::Rans => true,
#[cfg(feature = "zstd")]
EntropyAlgorithm::Fse => true,
#[cfg(feature = "zstd")]
EntropyAlgorithm::KFse => true,
EntropyAlgorithm::Dictionary => true,
EntropyAlgorithm::Auto => true,
}
}
pub fn available_algorithms() -> Vec<Self> {
let mut algorithms = vec![
Self::Huffman,
Self::Rans,
Self::Dictionary,
Self::Auto,
];
#[cfg(feature = "zstd")]
{
algorithms.push(Self::Fse);
algorithms.push(Self::KFse);
}
algorithms
}
pub fn select_for_data(data: &[u8]) -> Self {
if data.is_empty() {
return Self::Huffman; }
let entropy = EntropyStats::calculate_entropy(data);
let size = data.len();
let mut char_counts = std::collections::HashMap::new();
for &byte in data {
*char_counts.entry(byte).or_insert(0) += 1;
}
let unique_chars = char_counts.len();
let repetitiveness = 1.0 - (unique_chars as f64 / 256.0);
match (entropy, size, repetitiveness) {
(_, _, r) if r > 0.8 => Self::Dictionary,
#[cfg(feature = "zstd")]
(e, s, _) if e < 4.0 && s > 1024 => Self::Fse,
(e, s, _) if e >= 4.0 && e <= 6.0 && s > 256 => Self::Rans,
_ => Self::Huffman,
}
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct EntropyConfig {
pub algorithm: EntropyAlgorithm,
#[cfg(feature = "zstd")]
pub fse_config: Option<FseConfig>,
pub compression_level: i32,
pub adaptive: bool,
pub dict_size: usize,
pub fast_decode: bool,
}
impl Default for EntropyConfig {
fn default() -> Self {
Self {
algorithm: EntropyAlgorithm::Auto,
#[cfg(feature = "zstd")]
fse_config: Some(FseConfig::default()),
compression_level: 3,
adaptive: true,
dict_size: 0,
fast_decode: false,
}
}
}
impl EntropyConfig {
pub fn fast() -> Self {
Self {
algorithm: EntropyAlgorithm::Huffman,
compression_level: 1,
fast_decode: true,
#[cfg(feature = "zstd")]
fse_config: Some(FseConfig::fast_compression()),
..Default::default()
}
}
pub fn high_compression() -> Self {
Self {
#[cfg(feature = "zstd")]
algorithm: EntropyAlgorithm::Fse,
#[cfg(not(feature = "zstd"))]
algorithm: EntropyAlgorithm::Rans,
compression_level: 19,
adaptive: true,
dict_size: 32 * 1024,
#[cfg(feature = "zstd")]
fse_config: Some(FseConfig::high_compression()),
..Default::default()
}
}
pub fn balanced() -> Self {
Self::default()
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct EntropyStats {
pub input_size: usize,
pub output_size: usize,
pub compression_ratio: f64,
pub bits_per_symbol: f64,
pub entropy: f64,
pub efficiency: f64,
}
impl EntropyStats {
pub fn new(input_size: usize, output_size: usize, entropy: f64) -> Self {
let compression_ratio = if input_size > 0 {
output_size as f64 / input_size as f64
} else {
0.0
};
let bits_per_symbol = if input_size > 0 {
(output_size * 8) as f64 / input_size as f64
} else {
0.0
};
let efficiency = if bits_per_symbol > 0.0 {
entropy / bits_per_symbol
} else {
0.0
};
Self {
input_size,
output_size,
compression_ratio,
bits_per_symbol,
entropy,
efficiency,
}
}
pub fn space_savings(&self) -> f64 {
(1.0 - self.compression_ratio) * 100.0
}
pub fn calculate_entropy(data: &[u8]) -> f64 {
if data.is_empty() {
return 0.0;
}
let mut frequencies = [0u32; 256];
for &byte in data {
frequencies[byte as usize] += 1;
}
let total = data.len() as f64;
let mut entropy = 0.0;
for &freq in &frequencies {
if freq > 0 {
let p = freq as f64 / total;
entropy -= p * p.log2();
}
}
entropy
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_entropy_stats_calculation() {
let stats = EntropyStats::new(1000, 600, 4.5);
assert_eq!(stats.input_size, 1000);
assert_eq!(stats.output_size, 600);
assert!((stats.compression_ratio - 0.6).abs() < 0.001);
assert!((stats.bits_per_symbol - 4.8).abs() < 0.001);
assert!((stats.efficiency - 0.9375).abs() < 0.001);
assert!((stats.space_savings() - 40.0).abs() < 0.001);
}
#[test]
fn test_entropy_calculation() {
let uniform_data = (0..=255).collect::<Vec<u8>>();
let entropy = EntropyStats::calculate_entropy(&uniform_data);
assert!((entropy - 8.0).abs() < 0.001);
let single_symbol = vec![42u8; 100];
let entropy = EntropyStats::calculate_entropy(&single_symbol);
assert!(entropy < 0.001);
let empty: Vec<u8> = vec![];
let entropy = EntropyStats::calculate_entropy(&empty);
assert_eq!(entropy, 0.0);
}
#[test]
fn test_entropy_stats_edge_cases() {
let stats = EntropyStats::new(0, 0, 0.0);
assert_eq!(stats.compression_ratio, 0.0);
assert_eq!(stats.bits_per_symbol, 0.0);
assert_eq!(stats.efficiency, 0.0);
let stats = EntropyStats::new(100, 0, 4.0);
assert_eq!(stats.compression_ratio, 0.0);
assert_eq!(stats.space_savings(), 100.0);
}
}