struct_compression_analyzer/brute_force/
brute_force_split.rs

1use super::{
2    find_optimal_coefficients_for_metrics_parallel, BruteForceComparisonMetrics, BruteForceConfig,
3    OptimizationResult,
4};
5use crate::results::analysis_results::AnalysisResults;
6
7/// Result of a brute force optimization on a split comparison.
8#[derive(Debug, Clone, Copy)]
9pub struct SplitComparisonOptimizationResult {
10    /// Optimal parameters for group 1
11    pub group_1: OptimizationResult,
12    /// Optimal parameters for group 2
13    pub group_2: OptimizationResult,
14}
15
16/// Finds the optimal values for `lz_match_multiplier` and `entropy_multiplier` for all split
17/// results within a given slice of [`AnalysisResults`].
18///
19/// # Arguments
20///
21/// * `individual_results` - Mutable reference to the slice of [`AnalysisResults`].
22///   This is where we pull the data from, and where we will update the results.
23/// * `config` - Configuration for the optimization process (optional, uses default if [`None`])
24pub fn find_optimal_split_result_coefficients(
25    individual_results: &mut [AnalysisResults],
26    config: Option<&BruteForceConfig>,
27) -> Vec<(String, SplitComparisonOptimizationResult)> {
28    let default_config = BruteForceConfig::default();
29    let config = config.unwrap_or(&default_config);
30
31    let mut results: Vec<(String, SplitComparisonOptimizationResult)> = Vec::new();
32
33    for (comparison_idx, comparison) in individual_results[0].split_comparisons.iter().enumerate() {
34        results.push((
35            comparison.name.clone(),
36            find_optimal_split_result_coefficients_for_comparison(
37                comparison_idx,
38                config,
39                individual_results,
40            ),
41        ));
42    }
43
44    results
45}
46
47/// This function finds the optimal coefficients for both groups in a split comparison
48fn find_optimal_split_result_coefficients_for_comparison(
49    comparison_idx: usize,
50    config: &BruteForceConfig,
51    original_results: &[AnalysisResults], // guaranteed non-empty
52) -> SplitComparisonOptimizationResult {
53    // Find optimal coefficients for group 1
54    let group1_metrics = extract_group1_metrics(comparison_idx, original_results);
55    let group1_best = find_optimal_coefficients_for_metrics_parallel(&group1_metrics, config);
56
57    // Find optimal coefficients for group 2
58    let group2_metrics = extract_group2_metrics(comparison_idx, original_results);
59    let group2_best = find_optimal_coefficients_for_metrics_parallel(&group2_metrics, config);
60
61    SplitComparisonOptimizationResult {
62        group_1: group1_best,
63        group_2: group2_best,
64    }
65}
66
67/// Extracts all the group 1 metrics from each [`AnalysisResults`], at a given comparison index.
68/// Returns a boxed slice of all metrics.
69fn extract_group1_metrics(
70    comparison_idx: usize,
71    original_results: &[AnalysisResults], // guaranteed non-empty
72) -> Box<[BruteForceComparisonMetrics]> {
73    original_results
74        .iter()
75        .map(|result| {
76            result.split_comparisons[comparison_idx]
77                .group1_metrics
78                .into()
79        })
80        .collect()
81}
82
83/// Extracts all the group 2 metrics from each [`AnalysisResults`], at a given comparison index.
84/// Returns a boxed slice of all metrics.
85fn extract_group2_metrics(
86    comparison_idx: usize,
87    original_results: &[AnalysisResults], // guaranteed non-empty
88) -> Box<[BruteForceComparisonMetrics]> {
89    original_results
90        .iter()
91        .map(|result| {
92            result.split_comparisons[comparison_idx]
93                .group2_metrics
94                .into()
95        })
96        .collect()
97}
98
99/// Print optimization results in a user-friendly format.
100///
101/// # Arguments
102///
103/// * `writer` - The writer to print results to
104/// * `results` - Vector of (comparison name, OptimizationResult) tuples
105pub fn print_optimization_results<W: std::io::Write>(
106    writer: &mut W,
107    results: &[(String, SplitComparisonOptimizationResult)],
108) -> std::io::Result<()> {
109    writeln!(
110        writer,
111        "=== Split Comparison Parameter Optimization Results ==="
112    )?;
113    writeln!(
114        writer,
115        "Comparison Name               | Group | LZ Multiplier | Entropy Multiplier |"
116    )?;
117    writeln!(
118        writer,
119        "------------------------------|-------|---------------|--------------------|"
120    )?;
121
122    for (name, result) in results {
123        writeln!(
124            writer,
125            "{:<30}|{:<7}|{:<15.4}|{:<20.4}|",
126            name, "G1", result.group_1.lz_match_multiplier, result.group_1.entropy_multiplier
127        )?;
128        writeln!(
129            writer,
130            "{:<30}|{:<7}|{:<15.4}|{:<20.4}|",
131            "", "G2", result.group_2.lz_match_multiplier, result.group_2.entropy_multiplier
132        )?;
133    }
134
135    Ok(())
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141    use crate::{
142        brute_force::calculate_error_for_bruteforce_metrics,
143        comparison::{
144            split_comparison::SplitComparisonResult, GroupComparisonMetrics, GroupDifference,
145        },
146        results::analysis_results::AnalysisResults,
147    };
148
149    /// Creates a simple mock AnalysisResults instance for testing
150    #[allow(clippy::too_many_arguments)]
151    fn create_mock_analysis_results(
152        group1_lz_matches: u64,
153        group1_entropy: f64,
154        group1_zstd_size: u64,
155        group1_original_size: u64,
156        group2_lz_matches: u64,
157        group2_entropy: f64,
158        group2_zstd_size: u64,
159        group2_original_size: u64,
160    ) -> AnalysisResults {
161        let group1_metrics = GroupComparisonMetrics {
162            lz_matches: group1_lz_matches,
163            entropy: group1_entropy,
164            estimated_size: 0, // Not used in optimization
165            zstd_size: group1_zstd_size,
166            original_size: group1_original_size,
167        };
168
169        let group2_metrics = GroupComparisonMetrics {
170            lz_matches: group2_lz_matches,
171            entropy: group2_entropy,
172            estimated_size: 0, // Not used in optimization
173            zstd_size: group2_zstd_size,
174            original_size: group2_original_size,
175        };
176
177        let difference = GroupDifference::from_metrics(&group1_metrics, &group2_metrics);
178
179        let split_comparison = SplitComparisonResult {
180            name: "test_comparison".to_string(),
181            description: "Test comparison for optimization".to_string(),
182            group1_metrics,
183            group2_metrics,
184            difference,
185            baseline_comparison_metrics: vec![],
186            split_comparison_metrics: vec![],
187        };
188
189        AnalysisResults {
190            split_comparisons: vec![split_comparison],
191            ..Default::default()
192        }
193    }
194
195    #[test]
196    fn can_find_optimal_split_result_coefficients() {
197        let config = BruteForceConfig::default();
198
199        // Create two mock analysis results with the same split comparison
200        let results1 = create_mock_analysis_results(
201            100, 1.0, 110, 1000, // Group 1
202            200, 1.5, 220, 1000, // Group 2
203        );
204
205        let results2 = create_mock_analysis_results(
206            110, 1.1, 120, 1000, // Group 1
207            210, 1.6, 230, 1000, // Group 2
208        );
209
210        let original_results = vec![results1, results2];
211
212        // Call the function we're testing
213        let result = find_optimal_split_result_coefficients_for_comparison(
214            0, // First comparison
215            &config,
216            &original_results,
217        );
218
219        // Assert that the result has reasonable values within our configured ranges
220        assert!(result.group_1.lz_match_multiplier >= config.min_lz_multiplier);
221        assert!(result.group_1.lz_match_multiplier <= config.max_lz_multiplier);
222        assert!(result.group_1.entropy_multiplier >= config.min_entropy_multiplier);
223        assert!(result.group_1.entropy_multiplier <= config.max_entropy_multiplier);
224
225        assert!(result.group_2.lz_match_multiplier >= config.min_lz_multiplier);
226        assert!(result.group_2.lz_match_multiplier <= config.max_lz_multiplier);
227        assert!(result.group_2.entropy_multiplier >= config.min_entropy_multiplier);
228        assert!(result.group_2.entropy_multiplier <= config.max_entropy_multiplier);
229
230        // Assert the error is below 5 (known correct assumption)
231        let group1_metrics = extract_group1_metrics(0, &original_results);
232        let group1_error = calculate_error_for_bruteforce_metrics(
233            &group1_metrics,
234            result.group_1.lz_match_multiplier,
235            result.group_1.entropy_multiplier,
236        );
237        assert!(group1_error < 5.0);
238
239        let group2_metrics = extract_group2_metrics(0, &original_results);
240        let group2_error = calculate_error_for_bruteforce_metrics(
241            &group2_metrics,
242            result.group_2.lz_match_multiplier,
243            result.group_2.entropy_multiplier,
244        );
245        assert!(group2_error < 5.0);
246    }
247
248    #[test]
249    fn handles_empty_split_results() {
250        // Test the function with an empty results array
251        let config = BruteForceConfig::default();
252        let empty_results: Vec<AnalysisResults> = vec![];
253
254        let result =
255            find_optimal_split_result_coefficients_for_comparison(0, &config, &empty_results);
256
257        // Should return default values when no results are provided
258        assert_eq!(result.group_1.lz_match_multiplier, config.min_lz_multiplier);
259        assert_eq!(
260            result.group_1.entropy_multiplier,
261            config.min_entropy_multiplier
262        );
263        assert_eq!(result.group_2.lz_match_multiplier, config.min_lz_multiplier);
264        assert_eq!(
265            result.group_2.entropy_multiplier,
266            config.min_entropy_multiplier
267        );
268    }
269}