struct_compression_analyzer/brute_force/
brute_force_split.rs1use super::{
2 find_optimal_coefficients_for_metrics_parallel, BruteForceComparisonMetrics, BruteForceConfig,
3 OptimizationResult,
4};
5use crate::results::analysis_results::AnalysisResults;
6
7#[derive(Debug, Clone, Copy)]
9pub struct SplitComparisonOptimizationResult {
10 pub group_1: OptimizationResult,
12 pub group_2: OptimizationResult,
14}
15
16pub fn find_optimal_split_result_coefficients(
25 individual_results: &mut [AnalysisResults],
26 config: Option<&BruteForceConfig>,
27) -> Vec<(String, SplitComparisonOptimizationResult)> {
28 let default_config = BruteForceConfig::default();
29 let config = config.unwrap_or(&default_config);
30
31 let mut results: Vec<(String, SplitComparisonOptimizationResult)> = Vec::new();
32
33 for (comparison_idx, comparison) in individual_results[0].split_comparisons.iter().enumerate() {
34 results.push((
35 comparison.name.clone(),
36 find_optimal_split_result_coefficients_for_comparison(
37 comparison_idx,
38 config,
39 individual_results,
40 ),
41 ));
42 }
43
44 results
45}
46
47fn find_optimal_split_result_coefficients_for_comparison(
49 comparison_idx: usize,
50 config: &BruteForceConfig,
51 original_results: &[AnalysisResults], ) -> SplitComparisonOptimizationResult {
53 let group1_metrics = extract_group1_metrics(comparison_idx, original_results);
55 let group1_best = find_optimal_coefficients_for_metrics_parallel(&group1_metrics, config);
56
57 let group2_metrics = extract_group2_metrics(comparison_idx, original_results);
59 let group2_best = find_optimal_coefficients_for_metrics_parallel(&group2_metrics, config);
60
61 SplitComparisonOptimizationResult {
62 group_1: group1_best,
63 group_2: group2_best,
64 }
65}
66
67fn extract_group1_metrics(
70 comparison_idx: usize,
71 original_results: &[AnalysisResults], ) -> Box<[BruteForceComparisonMetrics]> {
73 original_results
74 .iter()
75 .map(|result| {
76 result.split_comparisons[comparison_idx]
77 .group1_metrics
78 .into()
79 })
80 .collect()
81}
82
83fn extract_group2_metrics(
86 comparison_idx: usize,
87 original_results: &[AnalysisResults], ) -> Box<[BruteForceComparisonMetrics]> {
89 original_results
90 .iter()
91 .map(|result| {
92 result.split_comparisons[comparison_idx]
93 .group2_metrics
94 .into()
95 })
96 .collect()
97}
98
99pub fn print_optimization_results<W: std::io::Write>(
106 writer: &mut W,
107 results: &[(String, SplitComparisonOptimizationResult)],
108) -> std::io::Result<()> {
109 writeln!(
110 writer,
111 "=== Split Comparison Parameter Optimization Results ==="
112 )?;
113 writeln!(
114 writer,
115 "Comparison Name | Group | LZ Multiplier | Entropy Multiplier |"
116 )?;
117 writeln!(
118 writer,
119 "------------------------------|-------|---------------|--------------------|"
120 )?;
121
122 for (name, result) in results {
123 writeln!(
124 writer,
125 "{:<30}|{:<7}|{:<15.4}|{:<20.4}|",
126 name, "G1", result.group_1.lz_match_multiplier, result.group_1.entropy_multiplier
127 )?;
128 writeln!(
129 writer,
130 "{:<30}|{:<7}|{:<15.4}|{:<20.4}|",
131 "", "G2", result.group_2.lz_match_multiplier, result.group_2.entropy_multiplier
132 )?;
133 }
134
135 Ok(())
136}
137
138#[cfg(test)]
139mod tests {
140 use super::*;
141 use crate::{
142 brute_force::calculate_error_for_bruteforce_metrics,
143 comparison::{
144 split_comparison::SplitComparisonResult, GroupComparisonMetrics, GroupDifference,
145 },
146 results::analysis_results::AnalysisResults,
147 };
148
149 #[allow(clippy::too_many_arguments)]
151 fn create_mock_analysis_results(
152 group1_lz_matches: u64,
153 group1_entropy: f64,
154 group1_zstd_size: u64,
155 group1_original_size: u64,
156 group2_lz_matches: u64,
157 group2_entropy: f64,
158 group2_zstd_size: u64,
159 group2_original_size: u64,
160 ) -> AnalysisResults {
161 let group1_metrics = GroupComparisonMetrics {
162 lz_matches: group1_lz_matches,
163 entropy: group1_entropy,
164 estimated_size: 0, zstd_size: group1_zstd_size,
166 original_size: group1_original_size,
167 };
168
169 let group2_metrics = GroupComparisonMetrics {
170 lz_matches: group2_lz_matches,
171 entropy: group2_entropy,
172 estimated_size: 0, zstd_size: group2_zstd_size,
174 original_size: group2_original_size,
175 };
176
177 let difference = GroupDifference::from_metrics(&group1_metrics, &group2_metrics);
178
179 let split_comparison = SplitComparisonResult {
180 name: "test_comparison".to_string(),
181 description: "Test comparison for optimization".to_string(),
182 group1_metrics,
183 group2_metrics,
184 difference,
185 baseline_comparison_metrics: vec![],
186 split_comparison_metrics: vec![],
187 };
188
189 AnalysisResults {
190 split_comparisons: vec![split_comparison],
191 ..Default::default()
192 }
193 }
194
195 #[test]
196 fn can_find_optimal_split_result_coefficients() {
197 let config = BruteForceConfig::default();
198
199 let results1 = create_mock_analysis_results(
201 100, 1.0, 110, 1000, 200, 1.5, 220, 1000, );
204
205 let results2 = create_mock_analysis_results(
206 110, 1.1, 120, 1000, 210, 1.6, 230, 1000, );
209
210 let original_results = vec![results1, results2];
211
212 let result = find_optimal_split_result_coefficients_for_comparison(
214 0, &config,
216 &original_results,
217 );
218
219 assert!(result.group_1.lz_match_multiplier >= config.min_lz_multiplier);
221 assert!(result.group_1.lz_match_multiplier <= config.max_lz_multiplier);
222 assert!(result.group_1.entropy_multiplier >= config.min_entropy_multiplier);
223 assert!(result.group_1.entropy_multiplier <= config.max_entropy_multiplier);
224
225 assert!(result.group_2.lz_match_multiplier >= config.min_lz_multiplier);
226 assert!(result.group_2.lz_match_multiplier <= config.max_lz_multiplier);
227 assert!(result.group_2.entropy_multiplier >= config.min_entropy_multiplier);
228 assert!(result.group_2.entropy_multiplier <= config.max_entropy_multiplier);
229
230 let group1_metrics = extract_group1_metrics(0, &original_results);
232 let group1_error = calculate_error_for_bruteforce_metrics(
233 &group1_metrics,
234 result.group_1.lz_match_multiplier,
235 result.group_1.entropy_multiplier,
236 );
237 assert!(group1_error < 5.0);
238
239 let group2_metrics = extract_group2_metrics(0, &original_results);
240 let group2_error = calculate_error_for_bruteforce_metrics(
241 &group2_metrics,
242 result.group_2.lz_match_multiplier,
243 result.group_2.entropy_multiplier,
244 );
245 assert!(group2_error < 5.0);
246 }
247
248 #[test]
249 fn handles_empty_split_results() {
250 let config = BruteForceConfig::default();
252 let empty_results: Vec<AnalysisResults> = vec![];
253
254 let result =
255 find_optimal_split_result_coefficients_for_comparison(0, &config, &empty_results);
256
257 assert_eq!(result.group_1.lz_match_multiplier, config.min_lz_multiplier);
259 assert_eq!(
260 result.group_1.entropy_multiplier,
261 config.min_entropy_multiplier
262 );
263 assert_eq!(result.group_2.lz_match_multiplier, config.min_lz_multiplier);
264 assert_eq!(
265 result.group_2.entropy_multiplier,
266 config.min_entropy_multiplier
267 );
268 }
269}