use super::{
find_optimal_coefficients_for_metrics_parallel, BruteForceComparisonMetrics, BruteForceConfig,
OptimizationResult,
};
use crate::results::analysis_results::AnalysisResults;
#[derive(Debug, Clone)]
pub struct CustomComparisonOptimizationResult {
pub baseline: OptimizationResult,
pub comparisons: Box<[OptimizationResult]>,
}
pub fn find_optimal_custom_result_coefficients(
individual_results: &mut [AnalysisResults],
config: Option<&BruteForceConfig>,
) -> Vec<(String, CustomComparisonOptimizationResult)> {
let default_config = BruteForceConfig::default();
let config = config.unwrap_or(&default_config);
let mut results: Vec<(String, CustomComparisonOptimizationResult)> = Vec::new();
for (comparison_idx, comparison) in individual_results[0].custom_comparisons.iter().enumerate()
{
results.push((
comparison.name.clone(),
find_optimal_custom_result_coefficients_for_comparison(
comparison_idx,
config,
individual_results,
),
));
}
results
}
#[allow(clippy::needless_range_loop)]
fn find_optimal_custom_result_coefficients_for_comparison(
comparison_idx: usize,
config: &BruteForceConfig,
original_results: &[AnalysisResults], ) -> CustomComparisonOptimizationResult {
let first_result = &original_results[0].custom_comparisons[comparison_idx];
let num_comparisons = first_result.group_metrics.len();
let baseline_metrics = extract_baseline_metrics(comparison_idx, original_results);
let baseline_best = find_optimal_coefficients_for_metrics_parallel(&baseline_metrics, config);
let mut comparison_bests = Vec::with_capacity(num_comparisons);
for group_idx in 0..num_comparisons {
let group_metrics =
extract_comparison_group_metrics(comparison_idx, group_idx, original_results);
let group_best = find_optimal_coefficients_for_metrics_parallel(&group_metrics, config);
comparison_bests.push(group_best);
}
CustomComparisonOptimizationResult {
baseline: baseline_best,
comparisons: comparison_bests.into_boxed_slice(),
}
}
fn extract_baseline_metrics(
comparison_idx: usize,
original_results: &[AnalysisResults], ) -> Box<[BruteForceComparisonMetrics]> {
original_results
.iter()
.map(|result| {
result.custom_comparisons[comparison_idx]
.baseline_metrics
.into()
})
.collect()
}
fn extract_comparison_group_metrics(
comparison_idx: usize,
group_idx: usize,
original_results: &[AnalysisResults], ) -> Box<[BruteForceComparisonMetrics]> {
original_results
.iter()
.map(|result| result.custom_comparisons[comparison_idx].group_metrics[group_idx].into())
.collect()
}
pub fn print_optimization_results<W: std::io::Write>(
writer: &mut W,
results: &[(String, CustomComparisonOptimizationResult)],
) -> std::io::Result<()> {
writeln!(
writer,
"\n=== Custom Comparison Parameter Optimization Results ==="
)?;
writeln!(
writer,
"Comparison Name | Group | LZ Multiplier | Entropy Multiplier |"
)?;
writeln!(
writer,
"----------------|-------|---------------|--------------------|"
)?;
for (name, result) in results {
writeln!(
writer,
"{:<16}|{:<7}|{:<15.3}|{:<20.3}|",
name, "BASE", result.baseline.lz_match_multiplier, result.baseline.entropy_multiplier
)?;
for (i, comparison) in result.comparisons.iter().enumerate() {
writeln!(
writer,
"{:<16}|{:<7}|{:<15.3}|{:<20.3}|",
"", i, comparison.lz_match_multiplier, comparison.entropy_multiplier
)?;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use ahash::AHashMap;
use super::*;
use crate::{
brute_force::calculate_error_for_bruteforce_metrics,
comparison::{
compare_groups::GroupComparisonResult, GroupComparisonMetrics, GroupDifference,
},
schema::Metadata,
};
#[allow(clippy::too_many_arguments)]
fn create_mock_group_comparison_result(
name: &str,
baseline_lz_matches: u64,
baseline_entropy: f64,
baseline_zstd_size: u64,
baseline_original_size: u64,
comparison_group_count: usize,
comparison_lz_matches: u64,
comparison_entropy: f64,
comparison_zstd_size: u64,
comparison_original_size: u64,
) -> GroupComparisonResult {
let baseline_metrics = GroupComparisonMetrics {
lz_matches: baseline_lz_matches,
entropy: baseline_entropy,
estimated_size: 0, zstd_size: baseline_zstd_size,
original_size: baseline_original_size,
};
let mut group_names = Vec::with_capacity(comparison_group_count);
let mut group_metrics = Vec::with_capacity(comparison_group_count);
let mut differences = Vec::with_capacity(comparison_group_count);
for i in 0..comparison_group_count {
group_names.push(format!("group_{}", i));
let metrics = GroupComparisonMetrics {
lz_matches: comparison_lz_matches,
entropy: comparison_entropy,
estimated_size: 0, zstd_size: comparison_zstd_size,
original_size: comparison_original_size,
};
group_metrics.push(metrics);
differences.push(GroupDifference::from_metrics(&baseline_metrics, &metrics));
}
GroupComparisonResult {
name: name.to_string(),
description: "Test comparison".to_string(),
baseline_metrics,
group_names,
group_metrics,
differences,
}
}
#[allow(clippy::too_many_arguments)]
fn create_mock_analysis_results_with_custom(
comparison_name: &str,
baseline_lz_matches: u64,
baseline_entropy: f64,
baseline_zstd_size: u64,
baseline_original_size: u64,
comparison_0_group_count: usize,
comparison_0_lz_matches: u64,
comparison_0_entropy: f64,
comparison_0_zstd_size: u64,
comparison_0_original_size: u64,
) -> AnalysisResults {
let custom_comparison = create_mock_group_comparison_result(
comparison_name,
baseline_lz_matches,
baseline_entropy,
baseline_zstd_size,
baseline_original_size,
comparison_0_group_count,
comparison_0_lz_matches,
comparison_0_entropy,
comparison_0_zstd_size,
comparison_0_original_size,
);
AnalysisResults {
schema_metadata: Metadata {
name: "Test Schema".to_string(),
description: "Test Schema Description".to_string(),
},
file_entropy: 0.0,
file_lz_matches: 0,
zstd_file_size: 0,
original_size: 0,
per_field: AHashMap::new(),
split_comparisons: Vec::new(),
custom_comparisons: vec![custom_comparison],
}
}
#[test]
fn can_find_optimal_custom_result_coefficients() {
let analysis_results1 = create_mock_analysis_results_with_custom(
"test_comparison",
100, 1.0,
110,
1000,
2, 210,
1.6,
230,
1000,
);
let config = BruteForceConfig::default();
let mut original_results = vec![analysis_results1];
let optimal_results =
find_optimal_custom_result_coefficients(&mut original_results, Some(&config));
assert_eq!(optimal_results.len(), 1);
assert_eq!(optimal_results[0].0, "test_comparison");
assert!(optimal_results[0].1.baseline.lz_match_multiplier >= config.min_lz_multiplier);
assert!(optimal_results[0].1.baseline.lz_match_multiplier <= config.max_lz_multiplier);
assert!(optimal_results[0].1.baseline.entropy_multiplier >= config.min_entropy_multiplier);
assert!(optimal_results[0].1.baseline.entropy_multiplier <= config.max_entropy_multiplier);
assert_eq!(optimal_results[0].1.comparisons.len(), 2);
let comparisons = &optimal_results[0].1.comparisons;
assert!(comparisons[0].lz_match_multiplier >= config.min_lz_multiplier);
assert!(comparisons[0].lz_match_multiplier <= config.max_lz_multiplier);
assert!(comparisons[0].entropy_multiplier >= config.min_entropy_multiplier);
assert!(comparisons[0].entropy_multiplier <= config.max_entropy_multiplier);
assert!(comparisons[1].lz_match_multiplier >= config.min_lz_multiplier);
assert!(comparisons[1].lz_match_multiplier <= config.max_lz_multiplier);
assert!(comparisons[1].entropy_multiplier >= config.min_entropy_multiplier);
assert!(comparisons[1].entropy_multiplier <= config.max_entropy_multiplier);
let baseline_metrics = extract_baseline_metrics(0, &original_results);
let baseline_error = calculate_error_for_bruteforce_metrics(
&baseline_metrics,
optimal_results[0].1.baseline.lz_match_multiplier,
optimal_results[0].1.baseline.entropy_multiplier,
);
assert!(
baseline_error < 5.0,
"Baseline error {} should be less than 5.0",
baseline_error
);
for i in 0..2 {
let group_metrics = extract_comparison_group_metrics(0, i, &original_results);
let group_error = calculate_error_for_bruteforce_metrics(
&group_metrics,
optimal_results[0].1.comparisons[i].lz_match_multiplier,
optimal_results[0].1.comparisons[i].entropy_multiplier,
);
assert!(
group_error < 5.0,
"Comparison group {} error {} should be less than 5.0",
i,
group_error
);
}
}
#[test]
fn handles_empty_custom_results() {
let analysis_results = AnalysisResults::default();
let mut original_results = vec![analysis_results];
let optimal_results = find_optimal_custom_result_coefficients(&mut original_results, None);
assert!(optimal_results.is_empty());
}
}