use crate::{
analyzer::{CompressionOptions, SizeEstimationParameters},
utils::analyze_utils::{calculate_file_entropy, get_zstd_compressed_size},
};
use lossless_transform_utils::match_estimator::estimate_num_lz_matches_fast;
pub mod compare_groups;
pub mod split_comparison;
pub mod stats;
#[derive(Clone, Default, Debug, PartialEq, Copy)]
pub struct GroupComparisonMetrics {
pub lz_matches: u64,
pub entropy: f64,
pub estimated_size: u64,
pub zstd_size: u64,
pub original_size: u64,
}
#[derive(PartialEq, Debug, Clone, Copy, Default)]
pub struct GroupDifference {
pub lz_matches: i64,
pub entropy: f64,
pub estimated_size: i64,
pub zstd_size: i64,
pub original_size: i64,
}
impl GroupComparisonMetrics {
pub fn from_bytes(
bytes: &[u8],
group_name: &str,
compression_options: CompressionOptions,
) -> Self {
let entropy = calculate_file_entropy(bytes);
let lz_matches = estimate_num_lz_matches_fast(bytes) as u64;
let estimated_size = (compression_options.size_estimator_fn)(SizeEstimationParameters {
name: group_name,
data: Some(bytes),
data_len: bytes.len(),
num_lz_matches: lz_matches as usize,
entropy,
lz_match_multiplier: compression_options.lz_match_multiplier,
entropy_multiplier: compression_options.entropy_multiplier,
}) as u64;
let zstd_size = get_zstd_compressed_size(bytes, compression_options.zstd_compression_level);
GroupComparisonMetrics {
lz_matches,
entropy,
estimated_size,
zstd_size,
original_size: bytes.len() as u64,
}
}
}
impl GroupDifference {
pub fn from_metrics(
baseline: &GroupComparisonMetrics,
comparison: &GroupComparisonMetrics,
) -> Self {
GroupDifference {
lz_matches: comparison.lz_matches as i64 - baseline.lz_matches as i64,
entropy: comparison.entropy - baseline.entropy,
estimated_size: comparison.estimated_size as i64 - baseline.estimated_size as i64,
zstd_size: comparison.zstd_size as i64 - baseline.zstd_size as i64,
original_size: comparison.original_size as i64 - baseline.original_size as i64,
}
}
}