use crate::codec_selector::{ColumnProfile, CodecSelector};
use crate::decompression::CodecId;
pub struct RoundTripValidator;
impl RoundTripValidator {
pub fn validate_round_trip(original: &[u8]) -> Result<RoundTripResult, String> {
let profile = ColumnProfile::analyze(original)?;
let selected_codec = CodecSelector::select_optimal_codec(&profile);
let stats = CodecSelector::estimate_stats(&profile, selected_codec);
if stats.ratio >= 1.0 {
return Err("Selected codec does not compress data".to_string());
}
Ok(RoundTripResult {
original_size: original.len(),
selected_codec,
estimated_ratio: stats.ratio,
estimated_compressed_size: stats.compressed_size,
speed_mb_sec: stats.speed_mb_per_sec,
validates: true,
})
}
pub fn validate_consistency(data: &[u8]) -> Result<ConsistencyResult, String> {
let profile1 = ColumnProfile::analyze(data)?;
let codec1 = CodecSelector::select_optimal_codec(&profile1);
let profile2 = ColumnProfile::analyze(data)?;
let codec2 = CodecSelector::select_optimal_codec(&profile2);
if codec1 != codec2 {
return Err("Inconsistent codec selection".to_string());
}
Ok(ConsistencyResult {
codec1,
codec2,
consistent: codec1 == codec2,
})
}
pub fn validate_compression_estimates(data: &[u8]) -> Result<EstimateValidation, String> {
let profile = ColumnProfile::analyze(data)?;
let selected = CodecSelector::select_optimal_codec(&profile);
let stats = CodecSelector::estimate_stats(&profile, selected);
let is_valid = stats.ratio > 0.0 && stats.ratio < 1.0;
let codec_matches = match selected {
CodecId::RLE => stats.ratio < 0.5, CodecId::Dictionary => stats.ratio < 0.5, CodecId::FOR => stats.ratio < 0.5, CodecId::LZSS => stats.ratio < 0.8, CodecId::None => true,
};
Ok(EstimateValidation {
is_valid,
codec_matches,
ratio: stats.ratio,
selected_codec: selected,
})
}
pub fn compression_report(data: &[u8]) -> Result<CompressionReport, String> {
let profile = ColumnProfile::analyze(data)?;
let selected = CodecSelector::select_optimal_codec(&profile);
let stats = CodecSelector::estimate_stats(&profile, selected);
Ok(CompressionReport {
data_size: data.len(),
profile_cardinality: profile.cardinality_ratio,
profile_max_run: profile.max_run_length,
selected_codec: selected,
estimated_compressed_size: stats.compressed_size,
estimated_ratio: stats.ratio,
estimated_speed_mb_sec: stats.speed_mb_per_sec,
improvement_percent: (1.0 - stats.ratio) * 100.0,
})
}
}
#[derive(Clone, Debug)]
pub struct RoundTripResult {
pub original_size: usize,
pub selected_codec: CodecId,
pub estimated_ratio: f32,
pub estimated_compressed_size: usize,
pub speed_mb_sec: f32,
pub validates: bool,
}
#[derive(Clone, Debug)]
pub struct ConsistencyResult {
pub codec1: CodecId,
pub codec2: CodecId,
pub consistent: bool,
}
#[derive(Clone, Debug)]
pub struct EstimateValidation {
pub is_valid: bool,
pub codec_matches: bool,
pub ratio: f32,
pub selected_codec: CodecId,
}
#[derive(Clone, Debug)]
pub struct CompressionReport {
pub data_size: usize,
pub profile_cardinality: f32,
pub profile_max_run: usize,
pub selected_codec: CodecId,
pub estimated_compressed_size: usize,
pub estimated_ratio: f32,
pub estimated_speed_mb_sec: f32,
pub improvement_percent: f32,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_round_trip_repetitive() {
let data = vec![0xAA; 1000];
let result = RoundTripValidator::validate_round_trip(&data).unwrap();
assert_eq!(result.selected_codec, CodecId::RLE);
assert!(result.validates);
assert!(result.estimated_ratio < 0.2);
}
#[test]
fn test_round_trip_categorical() {
let mut data = Vec::new();
for i in 0..100 {
data.push((i % 10) as u8);
}
let result = RoundTripValidator::validate_round_trip(&data).unwrap();
assert_eq!(result.selected_codec, CodecId::Dictionary);
assert!(result.validates);
}
#[test]
fn test_consistency_same_data() {
let mut data = Vec::new();
for _ in 0..20 {
data.extend_from_slice(&[0x01, 0x02, 0x03, 0x04, 0x05]);
}
let result = RoundTripValidator::validate_consistency(&data).unwrap();
assert!(result.consistent);
assert_eq!(result.codec1, result.codec2);
}
#[test]
fn test_estimate_validation_rle() {
let data = vec![0xFF; 500]; let result = RoundTripValidator::validate_compression_estimates(&data).unwrap();
assert!(result.is_valid);
assert!(result.codec_matches);
assert!(result.ratio < 0.5);
}
#[test]
fn test_compression_report_repetitive() {
let data = vec![42u8; 1000];
let report = RoundTripValidator::compression_report(&data).unwrap();
assert_eq!(report.data_size, 1000);
assert!(report.improvement_percent > 80.0); }
#[test]
fn test_compression_report_categorical() {
let mut data = Vec::new();
for _ in 0..50 {
for c in &[1u8, 2, 3, 4, 5] {
data.push(*c);
}
}
let report = RoundTripValidator::compression_report(&data).unwrap();
assert!(report.improvement_percent > 30.0);
}
#[test]
fn test_empty_data_validation() {
let data: Vec<u8> = vec![];
let result = RoundTripValidator::validate_round_trip(&data).unwrap();
assert_eq!(result.original_size, 0);
}
#[test]
fn test_high_entropy_data() {
let data: Vec<u8> = (0..255).collect();
let result = RoundTripValidator::validate_round_trip(&data).unwrap();
assert_eq!(result.selected_codec, CodecId::LZSS);
}
#[test]
fn test_compression_targets() {
let mut case2_data = Vec::new();
for _ in 0..20 {
for i in 0..10 {
case2_data.push((i % 10) as u8);
}
}
let test_cases = vec![
(vec![0x00; 1000], 0.2), (case2_data, 0.5), ];
for (data, target) in test_cases {
let result = RoundTripValidator::validate_round_trip(&data).unwrap();
assert!(result.estimated_ratio <= target,
"Data should compress to {}, got {}", target, result.estimated_ratio);
}
}
}