struct_compression_analyzer/comparison/
mod.rs

1//! Core comparison structures for storing the results of group comparisons.
2//!
3//! The module is split into two specialized submodules:
4//!
5//! - [`split_comparison`]: Easy comparison of 'splitting' structs.
6//!     - e.g. interleaved (RGBRGBRGB) vs. separated fields (RRRGGGBB)
7//! - [`compare_groups`]: Comparison of more custom field transformations and analysis
8//! - [`stats`]: Additional statistics for comparing groups
9//!
10//! # Types
11//!
12//! - [`GroupComparisonMetrics`]: Collects compression metrics (LZ matches, entropy, sizes)
13//! - [`GroupDifference`]: Tracks metric differences between two field groups
14//!
15//! # Example
16//!
17//! ```no_run
18//! use struct_compression_analyzer::comparison::*;
19//! use struct_compression_analyzer::analyzer::CompressionOptions;
20//!
21//! fn calculate_example(baseline_data: &[u8], comparison_data: &[u8]) {
22//!     let options = CompressionOptions::default();
23//!     let baseline = GroupComparisonMetrics::from_bytes(&baseline_data, "name_a", options);
24//!     let comparison = GroupComparisonMetrics::from_bytes(&comparison_data, "name_b", options);
25//!
26//!     // Compare the difference
27//!     let difference = GroupDifference::from_metrics(&baseline, &comparison);
28//! }
29//! ```
30//!
31//! [`split_comparison`]: self::split_comparison
32//! [`compare_groups`]: self::compare_groups
33//! [`stats`]: self::stats
34//! [`GroupComparisonMetrics`]: GroupComparisonMetrics
35//! [`GroupDifference`]: GroupDifference
36
37use crate::{
38    analyzer::{CompressionOptions, SizeEstimationParameters},
39    utils::analyze_utils::{calculate_file_entropy, get_zstd_compressed_size},
40};
41use lossless_transform_utils::match_estimator::estimate_num_lz_matches_fast;
42
43pub mod compare_groups;
44pub mod split_comparison;
45pub mod stats;
46
47/// The statistics for a given group of fields.
48/// This can be a group created by the [`split_comparison`] module, the
49/// [`compare_groups`] module or any other piece of code that compares multiple sets of bytes.
50#[derive(Clone, Default, Debug, PartialEq, Copy)]
51pub struct GroupComparisonMetrics {
52    /// Number of total LZ matches
53    pub lz_matches: u64,
54    /// Amount of entropy in the input data set
55    pub entropy: f64,
56    /// Size estimated by the size estimator function.
57    pub estimated_size: u64,
58    /// Size compressed by zstd.
59    pub zstd_size: u64,
60    /// Size of the original data.
61    pub original_size: u64,
62}
63
64/// Represents the difference between 2 groups of fields.
65/// For the raw values of a single group, see [`GroupComparisonMetrics`].
66///
67/// This can be used for representing the difference between either splits, or any two arbitrary
68/// groups of analyzed bytes. Usually this is the difference between a result and a baseline.
69#[derive(PartialEq, Debug, Clone, Copy, Default)]
70pub struct GroupDifference {
71    /// The difference in LZ matches.
72    pub lz_matches: i64,
73    /// The difference in entropy
74    pub entropy: f64,
75    /// Difference in estimated size using the user
76    /// provided estimate function.
77    pub estimated_size: i64,
78    /// Difference in zstd compressed size
79    pub zstd_size: i64,
80    /// Difference in original size
81    pub original_size: i64,
82}
83
84impl GroupComparisonMetrics {
85    /// Calculates group comparison metrics for a given byte slice.
86    ///
87    /// This function computes various metrics such as entropy, LZ matches, estimated size,
88    /// and Zstandard compressed size, which are used for comparing different compression strategies.
89    ///
90    /// # Arguments
91    /// * `bytes` - A slice of bytes representing the data to analyze.
92    /// * `group_name` - The name of the group being analyzed.
93    /// * `compression_options` - Compression options, zstd compression level, etc.
94    ///
95    /// # Returns
96    /// A [`GroupComparisonMetrics`] struct containing the computed metrics.
97    pub fn from_bytes(
98        bytes: &[u8],
99        group_name: &str,
100        compression_options: CompressionOptions,
101    ) -> Self {
102        let entropy = calculate_file_entropy(bytes);
103        let lz_matches = estimate_num_lz_matches_fast(bytes) as u64;
104        let estimated_size = (compression_options.size_estimator_fn)(SizeEstimationParameters {
105            name: group_name,
106            data: Some(bytes),
107            data_len: bytes.len(),
108            num_lz_matches: lz_matches as usize,
109            entropy,
110            lz_match_multiplier: compression_options.lz_match_multiplier,
111            entropy_multiplier: compression_options.entropy_multiplier,
112        }) as u64;
113        let zstd_size = get_zstd_compressed_size(bytes, compression_options.zstd_compression_level);
114
115        GroupComparisonMetrics {
116            lz_matches,
117            entropy,
118            estimated_size,
119            zstd_size,
120            original_size: bytes.len() as u64,
121        }
122    }
123}
124
125impl GroupDifference {
126    /// Creates a new GroupDifference by comparing two sets of metrics
127    ///
128    /// # Arguments
129    /// * `baseline` - The baseline metrics to compare against
130    /// * `comparison` - The metrics to compare with the baseline
131    ///
132    /// # Returns
133    /// A new [`GroupDifference`] containing the calculated differences
134    pub fn from_metrics(
135        baseline: &GroupComparisonMetrics,
136        comparison: &GroupComparisonMetrics,
137    ) -> Self {
138        GroupDifference {
139            lz_matches: comparison.lz_matches as i64 - baseline.lz_matches as i64,
140            entropy: comparison.entropy - baseline.entropy,
141            estimated_size: comparison.estimated_size as i64 - baseline.estimated_size as i64,
142            zstd_size: comparison.zstd_size as i64 - baseline.zstd_size as i64,
143            original_size: comparison.original_size as i64 - baseline.original_size as i64,
144        }
145    }
146}