struct_compression_analyzer/brute_force/
mod.rs

1//! Brute force optimization for LZ match and entropy multiplier parameters.
2//!
3//! This module provides functionality to find optimal values for the
4//! [`lz_match_multiplier`] and [`entropy_multiplier`] parameters used in the
5//! [`size_estimate`] function.
6//!
7//! It exposes two main optimization approaches:
8//!
9//! *   **Split comparisons:** Optimizes parameters for two groups being compared directly using the
10//!     [`find_optimal_split_result_coefficients`] function. Results are returned as
11//!     [`SplitComparisonOptimizationResult`].
12//!
13//! *   **Custom comparisons:** Optimizes parameters for custom groups with a variable number of
14//!     comparisons against a baseline group using the [`find_optimal_custom_result_coefficients`]
15//!     function. Results are returned as [`CustomComparisonOptimizationResult`].
16//!
17//! The main entry point for using this module is the [`optimize_and_apply_coefficients`] function,
18//! which performs the optimization and applies the resulting coefficients to an existing
19//! [`MergedAnalysisResults`] object in place.
20//!
21//! [`size_estimate`]: crate::utils::analyze_utils::size_estimate
22//! [`lz_match_multiplier`]: crate::analyzer::SizeEstimationParameters::lz_match_multiplier
23//! [`entropy_multiplier`]: crate::analyzer::SizeEstimationParameters::entropy_multiplier
24//! [`find_optimal_split_result_coefficients`]: crate::brute_force::find_optimal_split_result_coefficients
25//! [`find_optimal_custom_result_coefficients`]: crate::brute_force::find_optimal_custom_result_coefficients
26//! [`SplitComparisonOptimizationResult`]: crate::brute_force::SplitComparisonOptimizationResult
27//! [`CustomComparisonOptimizationResult`]: crate::brute_force::CustomComparisonOptimizationResult
28//! [`optimize_and_apply_coefficients`]: crate::brute_force::optimize_and_apply_coefficients
29//! [`MergedAnalysisResults`]: crate::results::merged_analysis_results::MergedAnalysisResults
30
31pub mod brute_force_custom;
32pub mod brute_force_split;
33use crate::analyzer::SizeEstimationParameters;
34use crate::comparison::{GroupComparisonMetrics, GroupDifference};
35use crate::results::analysis_results::AnalysisResults;
36use crate::utils::analyze_utils::size_estimate;
37use brute_force_custom::{
38    find_optimal_custom_result_coefficients, CustomComparisonOptimizationResult,
39};
40use brute_force_split::{
41    find_optimal_split_result_coefficients, SplitComparisonOptimizationResult,
42};
43use rayon::prelude::*;
44
45/// Configuration for the brute force optimization process.
46#[derive(Debug, Clone)]
47pub struct BruteForceConfig {
48    /// Minimum value for LZ match multiplier
49    pub min_lz_multiplier: f64,
50    /// Maximum value for LZ match multiplier
51    pub max_lz_multiplier: f64,
52    /// Step size for LZ match multiplier
53    pub lz_step_size: f64,
54    /// Minimum value for entropy multiplier
55    pub min_entropy_multiplier: f64,
56    /// Maximum value for entropy multiplier
57    pub max_entropy_multiplier: f64,
58    /// Step size for entropy multiplier
59    pub entropy_step_size: f64,
60}
61
62impl Default for BruteForceConfig {
63    fn default() -> Self {
64        Self {
65            min_lz_multiplier: 0.0001,
66            max_lz_multiplier: 1.0,
67            lz_step_size: 0.0001,
68            min_entropy_multiplier: 1.0,
69            max_entropy_multiplier: 1.75,
70            entropy_step_size: 0.001,
71        }
72    }
73}
74
75/// Result of a brute force optimization.
76#[derive(Debug, Clone, Copy, Default)]
77pub struct OptimizationResult {
78    /// Optimized LZ match multiplier
79    pub lz_match_multiplier: f64,
80    /// Optimized entropy multiplier
81    pub entropy_multiplier: f64,
82}
83
84/// Calculates the error for a given set of LZ match and entropy multipliers.
85///
86/// # Arguments
87///
88/// * `num_lz_matches` - The number of LZ matches in the input
89/// * `entropy` - The estimated entropy of the input
90/// * `zstd_size` - The ZSTD compressed size of the input
91/// * `original_size` - The original size of the input
92/// * `lz_match_multiplier` - The current LZ match multiplier
93/// * `entropy_multiplier` - The current entropy multiplier
94///
95/// # Returns
96///
97/// The error for the tested parameters (difference between estimated and actual size).
98#[inline(always)]
99pub(crate) fn calculate_error(
100    // Compression Estimator Params
101    num_lz_matches: u64,
102    entropy: f64,
103    // Actual Compression Stats
104    zstd_size: u64,
105    original_size: u64,
106    // Coefficients to Test
107    lz_match_multiplier: f64,
108    entropy_multiplier: f64,
109) -> f64 {
110    // Calculate estimated size with current coefficients
111    let estimated_size = size_estimate(SizeEstimationParameters {
112        name: "",
113        data_len: original_size as usize,
114        data: None,
115        num_lz_matches: num_lz_matches as usize,
116        entropy,
117        lz_match_multiplier,
118        entropy_multiplier,
119    });
120
121    // Calculate error (difference between estimated and actual size)
122    let error = ((estimated_size as f64) - (zstd_size as f64)).abs();
123
124    // If the ratios are on the opposite side of 1.0
125    // (i.e.) estimate thinks its worse, when its better, impose a 'killing'
126    // penalty by giving it max error.
127    let zstd_is_bigger = zstd_size > original_size;
128    let estimate_is_bigger = estimated_size as u64 > original_size;
129    if zstd_is_bigger != estimate_is_bigger {
130        return f32::MAX as f64;
131    }
132
133    error
134}
135
136/// Optimizes and applies coefficients to a slice of [`AnalysisResults`] objects.
137///
138/// This function:
139/// 1. Finds optimal coefficients for all split comparisons
140/// 2. Finds optimal coefficients for all custom comparisons
141/// 3. Updates estimated sizes in both the merged results and original analysis results
142///
143/// # Arguments
144///
145/// * `merged_results` - The merged analysis results to optimize and update
146/// * `config` - Optional configuration for the brute force optimization process
147///
148/// # Returns
149///
150/// A tuple of optimization result vectors for split and custom comparisons
151#[allow(clippy::type_complexity)]
152pub fn optimize_and_apply_coefficients(
153    merged_results: &mut [AnalysisResults],
154    config: Option<&BruteForceConfig>,
155) -> (
156    Vec<(String, SplitComparisonOptimizationResult)>,
157    Vec<(String, CustomComparisonOptimizationResult)>,
158) {
159    // Find optimal coefficients for split comparisons
160    let split_optimization_results = find_optimal_split_result_coefficients(merged_results, config);
161
162    // Find optimal coefficients for custom comparisons
163    let custom_optimization_results =
164        find_optimal_custom_result_coefficients(merged_results, config);
165
166    // Update the merged results with the optimized coefficients
167    apply_optimized_coefficients(
168        merged_results,
169        &split_optimization_results,
170        &custom_optimization_results,
171    );
172
173    (split_optimization_results, custom_optimization_results)
174}
175
176/// Applies the optimized coefficients to the merged results and original files.
177///
178/// # Arguments
179///
180/// * `individual_results` - The analysis results to update
181/// * `split_optimization_results` - The optimization results for split comparisons
182/// * `custom_optimization_results` - The optimization results for custom comparisons
183pub fn apply_optimized_coefficients(
184    individual_results: &mut [AnalysisResults],
185    split_optimization_results: &[(String, SplitComparisonOptimizationResult)],
186    custom_optimization_results: &[(String, CustomComparisonOptimizationResult)],
187) {
188    // Update split comparisons in merged results
189    for (split_idx, comparison) in individual_results[0]
190        .split_comparisons
191        .iter_mut()
192        .enumerate()
193    {
194        let optimization_result = &split_optimization_results[split_idx].1;
195
196        // Update group 1 metrics
197        update_group_metrics(
198            &mut comparison.group1_metrics,
199            optimization_result.group_1.lz_match_multiplier,
200            optimization_result.group_1.entropy_multiplier,
201        );
202
203        // Update group 2 metrics
204        update_group_metrics(
205            &mut comparison.group2_metrics,
206            optimization_result.group_2.lz_match_multiplier,
207            optimization_result.group_2.entropy_multiplier,
208        );
209
210        // Update group difference
211        update_group_difference(
212            &comparison.group1_metrics,
213            &comparison.group2_metrics,
214            &mut comparison.difference,
215        );
216    }
217
218    // Update custom comparisons in merged results
219    for (custom_idx, comparison) in individual_results[0]
220        .custom_comparisons
221        .iter_mut()
222        .enumerate()
223    {
224        let optimization_result = &custom_optimization_results[custom_idx].1;
225
226        // Update baseline metrics
227        update_group_metrics(
228            &mut comparison.baseline_metrics,
229            optimization_result.baseline.lz_match_multiplier,
230            optimization_result.baseline.entropy_multiplier,
231        );
232
233        // Update comparison group metrics
234        for (group_idx, group_metrics) in comparison.group_metrics.iter_mut().enumerate() {
235            update_group_metrics(
236                group_metrics,
237                optimization_result.comparisons[group_idx].lz_match_multiplier,
238                optimization_result.comparisons[group_idx].entropy_multiplier,
239            );
240        }
241
242        // Update group differences
243        for (group_idx, difference) in comparison.differences.iter_mut().enumerate() {
244            update_group_difference(
245                &comparison.baseline_metrics,
246                &comparison.group_metrics[group_idx],
247                difference,
248            );
249        }
250    }
251
252    // Update each original analysis result
253    for result in individual_results {
254        // Update split comparisons in original results
255        for (split_idx, comparison) in result.split_comparisons.iter_mut().enumerate() {
256            let optimization_result = &split_optimization_results[split_idx].1;
257
258            // Update group 1 metrics
259            update_group_metrics(
260                &mut comparison.group1_metrics,
261                optimization_result.group_1.lz_match_multiplier,
262                optimization_result.group_1.entropy_multiplier,
263            );
264
265            // Update group 2 metrics
266            update_group_metrics(
267                &mut comparison.group2_metrics,
268                optimization_result.group_2.lz_match_multiplier,
269                optimization_result.group_2.entropy_multiplier,
270            );
271
272            // Update group difference
273            update_group_difference(
274                &comparison.group1_metrics,
275                &comparison.group2_metrics,
276                &mut comparison.difference,
277            );
278        }
279
280        // Update custom comparisons in original results
281        for (custom_idx, comparison) in result.custom_comparisons.iter_mut().enumerate() {
282            let optimization_result = &custom_optimization_results[custom_idx].1;
283
284            // Update baseline metrics
285            update_group_metrics(
286                &mut comparison.baseline_metrics,
287                optimization_result.baseline.lz_match_multiplier,
288                optimization_result.baseline.entropy_multiplier,
289            );
290
291            // Update comparison group metrics
292            for (group_idx, group_metrics) in comparison.group_metrics.iter_mut().enumerate() {
293                update_group_metrics(
294                    group_metrics,
295                    optimization_result.comparisons[group_idx].lz_match_multiplier,
296                    optimization_result.comparisons[group_idx].entropy_multiplier,
297                );
298            }
299
300            // Update group differences
301            for (group_idx, difference) in comparison.differences.iter_mut().enumerate() {
302                update_group_difference(
303                    &comparison.baseline_metrics,
304                    &comparison.group_metrics[group_idx],
305                    difference,
306                );
307            }
308        }
309    }
310}
311
312/// Updates a [`GroupComparisonMetrics`] struct with new coefficient values.
313///
314/// # Arguments
315///
316/// * `metrics` - The metrics to update
317/// * `lz_match_multiplier` - The new LZ match multiplier
318/// * `entropy_multiplier` - The new entropy multiplier
319fn update_group_metrics(
320    metrics: &mut GroupComparisonMetrics,
321    lz_match_multiplier: f64,
322    entropy_multiplier: f64,
323) {
324    // Recalculate estimated size with the optimized parameters
325    let estimated_size = size_estimate(SizeEstimationParameters {
326        name: "",
327        data_len: metrics.original_size as usize,
328        data: None,
329        num_lz_matches: metrics.lz_matches as usize,
330        entropy: metrics.entropy,
331        lz_match_multiplier,
332        entropy_multiplier,
333    });
334
335    // Update the estimated size
336    metrics.estimated_size = estimated_size as u64;
337}
338
339/// Updates a [`GroupDifference`] struct with recalculated values.
340fn update_group_difference(
341    group1_metrics: &GroupComparisonMetrics,
342    group2_metrics: &GroupComparisonMetrics,
343    difference: &mut GroupDifference,
344) {
345    difference.estimated_size =
346        group2_metrics.estimated_size as i64 - group1_metrics.estimated_size as i64;
347}
348
349/// Prints formatted optimization results for both split and custom comparisons.
350///
351/// # Arguments
352///
353/// * `writer` - The writer to print results to
354/// * `split_results` - Optimization results for split comparisons
355/// * `custom_results` - Optimization results for custom comparisons
356pub fn print_all_optimization_results<W: std::io::Write>(
357    writer: &mut W,
358    split_results: &[(String, SplitComparisonOptimizationResult)],
359    custom_results: &[(String, CustomComparisonOptimizationResult)],
360) -> std::io::Result<()> {
361    brute_force_split::print_optimization_results(writer, split_results)?;
362    brute_force_custom::print_optimization_results(writer, custom_results)?;
363    Ok(())
364}
365
366/// Optimized, reduced form of [`GroupComparisonMetrics`],
367/// meant for storing only the fields used during brute forcing.
368#[derive(Clone, Default, Debug, PartialEq, Copy)]
369pub(crate) struct BruteForceComparisonMetrics {
370    /// Number of total LZ matches
371    pub lz_matches: u64,
372    /// Amount of entropy in the input data set
373    pub entropy: f64,
374    /// Size compressed by zstd.
375    pub zstd_size: u64,
376    /// Size of the original data.
377    pub original_size: u64,
378}
379
380impl From<GroupComparisonMetrics> for BruteForceComparisonMetrics {
381    fn from(value: GroupComparisonMetrics) -> Self {
382        BruteForceComparisonMetrics {
383            lz_matches: value.lz_matches,
384            entropy: value.entropy,
385            zstd_size: value.zstd_size,
386            original_size: value.original_size,
387        }
388    }
389}
390
391/// Finds the optimal coefficients (lz_match_multiplier and entropy_multiplier) for a given
392/// set of metrics by running a brute force optimization. This runs in parallel on all threads.
393///
394/// # Arguments
395///
396/// * `metrics` - The metrics to find optimal coefficients for
397/// * `config` - Configuration for the optimization process
398///
399/// # Returns
400///
401/// The optimal [`OptimizationResult`] containing the best coefficients
402pub(crate) fn find_optimal_coefficients_for_metrics_parallel(
403    metrics: &[BruteForceComparisonMetrics],
404    config: &BruteForceConfig,
405) -> OptimizationResult {
406    // Determine how to split the lz range
407    let num_chunks = rayon::current_num_threads();
408    let lz_range = config.max_lz_multiplier - config.min_lz_multiplier;
409    let chunk_size = lz_range / num_chunks as f64;
410
411    // Create chunks for parallel processing
412    let mut chunks = Vec::with_capacity(num_chunks);
413    for x in 0..num_chunks {
414        let start = config.min_lz_multiplier + (x as f64 * chunk_size);
415        let end = if x == num_chunks - 1 {
416            config.max_lz_multiplier
417        } else {
418            config.min_lz_multiplier + ((x + 1) as f64 * chunk_size)
419        };
420
421        chunks.push((start, end));
422    }
423
424    // Process chunks in parallel
425    let results: Vec<_> = chunks
426        .par_iter()
427        .map(|(start, end)| {
428            find_optimal_coefficients_for_metrics(
429                metrics,
430                &BruteForceConfig {
431                    min_lz_multiplier: *start,
432                    max_lz_multiplier: *end,
433                    min_entropy_multiplier: config.min_entropy_multiplier,
434                    max_entropy_multiplier: config.max_entropy_multiplier,
435                    entropy_step_size: config.entropy_step_size,
436                    lz_step_size: config.lz_step_size,
437                },
438            )
439        })
440        .collect();
441
442    // Find the overall best result using a simple for loop
443    let mut best_result = OptimizationResult::default();
444    let mut min_error = f64::MAX;
445    for (result, error) in results {
446        if error < min_error {
447            min_error = error;
448            best_result = result;
449        }
450    }
451
452    best_result
453}
454
455/// Finds the optimal coefficients (lz_match_multiplier and entropy_multiplier) for a given
456/// set of metrics by running a brute force optimization.
457///
458/// # Arguments
459///
460/// * `metrics` - The metrics to find optimal coefficients for
461/// * `config` - Configuration for the optimization process
462///
463/// # Returns
464///
465/// The optimal [`OptimizationResult`] containing the best coefficients,
466/// and the minimum error found for this best result.
467pub(crate) fn find_optimal_coefficients_for_metrics(
468    metrics: &[BruteForceComparisonMetrics],
469    config: &BruteForceConfig,
470) -> (OptimizationResult, f64) {
471    let mut best_result = OptimizationResult::default();
472    let mut min_error = f64::MAX;
473
474    let mut lz_multiplier = config.min_lz_multiplier;
475    while lz_multiplier <= config.max_lz_multiplier {
476        let mut entropy_multiplier = config.min_entropy_multiplier;
477        while entropy_multiplier <= config.max_entropy_multiplier {
478            // Calculate the error with the given coefficients
479            let error =
480                calculate_error_for_bruteforce_metrics(metrics, lz_multiplier, entropy_multiplier);
481
482            // Update if better than current best
483            if error < min_error {
484                best_result = OptimizationResult {
485                    lz_match_multiplier: lz_multiplier,
486                    entropy_multiplier,
487                };
488
489                min_error = error;
490            }
491
492            entropy_multiplier += config.entropy_step_size;
493        }
494
495        lz_multiplier += config.lz_step_size;
496    }
497
498    (best_result, min_error)
499}
500
501/// Calculates the error for a given set of metrics with specified coefficients.
502/// This returns the sum of all errors for all results in the metrics slice.
503///
504/// # Arguments
505///
506/// * `metrics` - The metrics to calculate the error for
507/// * `lz_match_multiplier` - The LZ match multiplier to test
508/// * `entropy_multiplier` - The entropy multiplier to test
509///
510/// # Returns
511///
512/// The sum of all errors for the given metrics with the specified coefficients
513#[inline(always)]
514pub(crate) fn calculate_error_for_bruteforce_metrics(
515    metrics: &[BruteForceComparisonMetrics],
516    lz_match_multiplier: f64,
517    entropy_multiplier: f64,
518) -> f64 {
519    let mut total_error = 0.0f64;
520
521    for result in metrics {
522        total_error += calculate_error(
523            result.lz_matches,
524            result.entropy,
525            result.zstd_size,
526            result.original_size,
527            lz_match_multiplier,
528            entropy_multiplier,
529        );
530    }
531
532    total_error
533}
534
535/// These tests are crap, they weren't written by a human, after all.
536#[cfg(test)]
537mod tests {
538    use super::*;
539    use crate::{
540        comparison::{
541            compare_groups::GroupComparisonResult, split_comparison::SplitComparisonResult,
542        },
543        results::analysis_results::AnalysisResults,
544        schema::Metadata,
545    };
546    use ahash::AHashMap;
547
548    // Constants for test data
549    const TEST_NAME_SPLIT: &str = "Test Split";
550    const TEST_DESC_SPLIT: &str = "Test Split Description";
551    const TEST_NAME_CUSTOM: &str = "Test Custom";
552    const TEST_DESC_CUSTOM: &str = "Test Custom Description";
553    const TEST_GROUP_NAME: &str = "Test Group";
554    const TEST_SCHEMA_NAME: &str = "Test Schema";
555    const TEST_SCHEMA_DESC: &str = "Test Schema Description";
556
557    // Constants for metrics values
558    const GROUP1_LZ_MATCHES: u64 = 100;
559    const GROUP1_ENTROPY: f64 = 5.0;
560    const GROUP1_ESTIMATED_SIZE: u64 = 1000;
561    const GROUP1_ZSTD_SIZE: u64 = 800;
562    const GROUP1_ORIGINAL_SIZE: u64 = 2000;
563
564    const GROUP2_LZ_MATCHES: u64 = 150;
565    const GROUP2_ENTROPY: f64 = 4.0;
566    const GROUP2_ESTIMATED_SIZE: u64 = 900;
567    const GROUP2_ZSTD_SIZE: u64 = 700;
568    const GROUP2_ORIGINAL_SIZE: u64 = 1800;
569
570    const DIFF_LZ_MATCHES: i64 = 50;
571    const DIFF_ENTROPY: f64 = -1.0;
572    const DIFF_ESTIMATED_SIZE: i64 = -100;
573    const DIFF_ZSTD_SIZE: i64 = -100;
574    const DIFF_ORIGINAL_SIZE: i64 = -200;
575
576    // Constants for brute force config
577    const TEST_MIN_LZ: f64 = 0.01;
578    const TEST_MAX_LZ: f64 = 0.05;
579    const TEST_LZ_STEP: f64 = 0.02;
580    const TEST_MIN_ENTROPY: f64 = 1.0;
581    const TEST_MAX_ENTROPY: f64 = 1.1;
582    const TEST_ENTROPY_STEP: f64 = 0.05;
583
584    /// Creates a simple mock AnalysisResults for testing
585    fn create_mock_results() -> AnalysisResults {
586        // Create a simple split comparison result
587        let group1_metrics = GroupComparisonMetrics {
588            lz_matches: GROUP1_LZ_MATCHES,
589            entropy: GROUP1_ENTROPY,
590            estimated_size: GROUP1_ESTIMATED_SIZE,
591            zstd_size: GROUP1_ZSTD_SIZE,
592            original_size: GROUP1_ORIGINAL_SIZE,
593        };
594
595        let group2_metrics = GroupComparisonMetrics {
596            lz_matches: GROUP2_LZ_MATCHES,
597            entropy: GROUP2_ENTROPY,
598            estimated_size: GROUP2_ESTIMATED_SIZE,
599            zstd_size: GROUP2_ZSTD_SIZE,
600            original_size: GROUP2_ORIGINAL_SIZE,
601        };
602
603        let difference = GroupDifference {
604            lz_matches: DIFF_LZ_MATCHES,
605            entropy: DIFF_ENTROPY,
606            estimated_size: DIFF_ESTIMATED_SIZE,
607            zstd_size: DIFF_ZSTD_SIZE,
608            original_size: DIFF_ORIGINAL_SIZE,
609        };
610
611        // Create a simple custom comparison result
612        let baseline_metrics = GroupComparisonMetrics {
613            lz_matches: GROUP1_LZ_MATCHES,
614            entropy: GROUP1_ENTROPY,
615            estimated_size: GROUP1_ESTIMATED_SIZE,
616            zstd_size: GROUP1_ZSTD_SIZE,
617            original_size: GROUP1_ORIGINAL_SIZE,
618        };
619
620        let group_metrics = vec![GroupComparisonMetrics {
621            lz_matches: GROUP2_LZ_MATCHES,
622            entropy: GROUP2_ENTROPY,
623            estimated_size: GROUP2_ESTIMATED_SIZE,
624            zstd_size: GROUP2_ZSTD_SIZE,
625            original_size: GROUP2_ORIGINAL_SIZE,
626        }];
627
628        let group_difference = GroupDifference {
629            lz_matches: DIFF_LZ_MATCHES,
630            entropy: DIFF_ENTROPY,
631            estimated_size: DIFF_ESTIMATED_SIZE,
632            zstd_size: DIFF_ZSTD_SIZE,
633            original_size: DIFF_ORIGINAL_SIZE,
634        };
635
636        // Create mock original analysis results
637        let schema_metadata = Metadata {
638            name: TEST_SCHEMA_NAME.to_string(),
639            description: TEST_SCHEMA_DESC.to_string(),
640        };
641        AnalysisResults {
642            schema_metadata: schema_metadata.clone(),
643            file_entropy: GROUP1_ENTROPY,
644            file_lz_matches: GROUP1_LZ_MATCHES,
645            zstd_file_size: GROUP1_ZSTD_SIZE,
646            original_size: GROUP1_ORIGINAL_SIZE,
647            per_field: AHashMap::new(),
648            split_comparisons: vec![SplitComparisonResult {
649                name: TEST_NAME_SPLIT.to_string(),
650                description: TEST_DESC_SPLIT.to_string(),
651                group1_metrics,
652                group2_metrics,
653                difference,
654                baseline_comparison_metrics: Vec::new(),
655                split_comparison_metrics: Vec::new(),
656            }],
657            custom_comparisons: vec![GroupComparisonResult {
658                name: TEST_NAME_CUSTOM.to_string(),
659                description: TEST_DESC_CUSTOM.to_string(),
660                baseline_metrics,
661                group_metrics: group_metrics.clone(),
662                group_names: vec![TEST_GROUP_NAME.to_string()],
663                differences: vec![group_difference],
664            }],
665        }
666    }
667
668    #[test]
669    fn can_optimize_and_apply_coefficients() {
670        // Create a simple BruteForceConfig with a narrow range for quick testing
671        let config = BruteForceConfig {
672            min_lz_multiplier: TEST_MIN_LZ,
673            max_lz_multiplier: TEST_MAX_LZ,
674            lz_step_size: TEST_LZ_STEP,
675            min_entropy_multiplier: TEST_MIN_ENTROPY,
676            max_entropy_multiplier: TEST_MAX_ENTROPY,
677            entropy_step_size: TEST_ENTROPY_STEP,
678        };
679
680        // Create mock result
681        let mut results = vec![create_mock_results()];
682
683        // Get references to the split and custom comparisons for cleaner code
684        let split_comparison = &results[0].split_comparisons[0];
685        let custom_comparison = &results[0].custom_comparisons[0];
686
687        // Save the original estimated sizes for comparison
688        let original_split_estimated_size_g1 = split_comparison.group1_metrics.estimated_size;
689        let original_split_estimated_size_g2 = split_comparison.group2_metrics.estimated_size;
690        let original_custom_estimated_size_baseline =
691            custom_comparison.baseline_metrics.estimated_size;
692        let original_custom_estimated_size_group =
693            custom_comparison.group_metrics[0].estimated_size;
694
695        // Run the optimization
696        let (split_results, custom_results) =
697            optimize_and_apply_coefficients(&mut results, Some(&config));
698
699        // After optimization, get references again as the merged_results was mutated
700        let split_comparison = &results[0].split_comparisons[0];
701        let custom_comparison = &results[0].custom_comparisons[0];
702
703        // Verify split optimization results
704        assert!(!split_results.is_empty());
705        assert_eq!(split_results[0].0, TEST_NAME_SPLIT);
706
707        // Verify that the coefficients were applied and estimated sizes were updated
708        assert_ne!(
709            split_comparison.group1_metrics.estimated_size,
710            original_split_estimated_size_g1
711        );
712        assert_ne!(
713            split_comparison.group2_metrics.estimated_size,
714            original_split_estimated_size_g2
715        );
716
717        // Verify custom optimization results
718        assert!(!custom_results.is_empty());
719        assert_eq!(custom_results[0].0, TEST_NAME_CUSTOM);
720
721        // Verify that the coefficients were applied and estimated sizes were updated
722        assert_ne!(
723            custom_comparison.baseline_metrics.estimated_size,
724            original_custom_estimated_size_baseline
725        );
726        assert_ne!(
727            custom_comparison.group_metrics[0].estimated_size,
728            original_custom_estimated_size_group
729        );
730
731        // Verify that original results were also updated
732        assert_ne!(
733            results[0].split_comparisons[0]
734                .group1_metrics
735                .estimated_size,
736            original_split_estimated_size_g1
737        );
738        assert_ne!(
739            results[0].custom_comparisons[0]
740                .baseline_metrics
741                .estimated_size,
742            original_custom_estimated_size_baseline
743        );
744    }
745
746    #[test]
747    fn can_update_group_metrics() {
748        // Create metrics with test constants
749        let mut metrics = GroupComparisonMetrics {
750            lz_matches: GROUP1_LZ_MATCHES,
751            entropy: GROUP1_ENTROPY,
752            estimated_size: GROUP1_ESTIMATED_SIZE,
753            zstd_size: GROUP1_ZSTD_SIZE,
754            original_size: GROUP1_ORIGINAL_SIZE,
755        };
756
757        let original_estimated_size = metrics.estimated_size;
758
759        // Update with different coefficients
760        update_group_metrics(&mut metrics, TEST_MIN_LZ * 2.0, TEST_MIN_ENTROPY + 0.05);
761
762        // Verify that the estimated size was updated
763        assert_ne!(metrics.estimated_size, original_estimated_size);
764
765        // Verify that the other fields remain unchanged
766        assert_eq!(metrics.lz_matches, GROUP1_LZ_MATCHES);
767        assert_eq!(metrics.entropy, GROUP1_ENTROPY);
768        assert_eq!(metrics.zstd_size, GROUP1_ZSTD_SIZE);
769        assert_eq!(metrics.original_size, GROUP1_ORIGINAL_SIZE);
770    }
771
772    #[test]
773    fn can_calculate_group_difference() {
774        // Create test groups using constants
775        let group1_metrics = GroupComparisonMetrics {
776            lz_matches: GROUP1_LZ_MATCHES,
777            entropy: GROUP1_ENTROPY,
778            estimated_size: GROUP1_ESTIMATED_SIZE,
779            zstd_size: GROUP1_ZSTD_SIZE,
780            original_size: GROUP1_ORIGINAL_SIZE,
781        };
782
783        let group2_metrics = GroupComparisonMetrics {
784            lz_matches: GROUP2_LZ_MATCHES,
785            entropy: GROUP2_ENTROPY,
786            estimated_size: GROUP2_ESTIMATED_SIZE,
787            zstd_size: GROUP2_ZSTD_SIZE,
788            original_size: GROUP2_ORIGINAL_SIZE,
789        };
790
791        let mut difference = GroupDifference {
792            lz_matches: 0,     // Will be updated
793            entropy: 0.0,      // Will be updated
794            estimated_size: 0, // Will be updated
795            zstd_size: 0,      // Will be updated
796            original_size: 0,  // Will be updated
797        };
798
799        // Update the difference using our function
800        update_group_difference(&group1_metrics, &group2_metrics, &mut difference);
801
802        // Verify that the estimated_size field was updated correctly
803        assert_eq!(difference.estimated_size, DIFF_ESTIMATED_SIZE);
804
805        // Calculate expected values for other fields (if they were updated by update_group_difference)
806        // For now, we're only testing estimated_size since that's all our function updates
807    }
808}