struct_compression_analyzer/comparison/mod.rs
1//! Core comparison structures for storing the results of group comparisons.
2//!
3//! The module is split into two specialized submodules:
4//!
5//! - [`split_comparison`]: Easy comparison of 'splitting' structs.
6//! - e.g. interleaved (RGBRGBRGB) vs. separated fields (RRRGGGBB)
7//! - [`compare_groups`]: Comparison of more custom field transformations and analysis
8//! - [`stats`]: Additional statistics for comparing groups
9//!
10//! # Types
11//!
12//! - [`GroupComparisonMetrics`]: Collects compression metrics (LZ matches, entropy, sizes)
13//! - [`GroupDifference`]: Tracks metric differences between two field groups
14//!
15//! # Example
16//!
17//! ```no_run
18//! use struct_compression_analyzer::comparison::*;
19//! use struct_compression_analyzer::analyzer::CompressionOptions;
20//!
21//! fn calculate_example(baseline_data: &[u8], comparison_data: &[u8]) {
22//! let options = CompressionOptions::default();
23//! let baseline = GroupComparisonMetrics::from_bytes(&baseline_data, "name_a", options);
24//! let comparison = GroupComparisonMetrics::from_bytes(&comparison_data, "name_b", options);
25//!
26//! // Compare the difference
27//! let difference = GroupDifference::from_metrics(&baseline, &comparison);
28//! }
29//! ```
30//!
31//! [`split_comparison`]: self::split_comparison
32//! [`compare_groups`]: self::compare_groups
33//! [`stats`]: self::stats
34//! [`GroupComparisonMetrics`]: GroupComparisonMetrics
35//! [`GroupDifference`]: GroupDifference
36
37use crate::{
38 analyzer::{CompressionOptions, SizeEstimationParameters},
39 utils::analyze_utils::{calculate_file_entropy, get_zstd_compressed_size},
40};
41use lossless_transform_utils::match_estimator::estimate_num_lz_matches_fast;
42
43pub mod compare_groups;
44pub mod split_comparison;
45pub mod stats;
46
47/// The statistics for a given group of fields.
48/// This can be a group created by the [`split_comparison`] module, the
49/// [`compare_groups`] module or any other piece of code that compares multiple sets of bytes.
50#[derive(Clone, Default, Debug, PartialEq, Copy)]
51pub struct GroupComparisonMetrics {
52 /// Number of total LZ matches
53 pub lz_matches: u64,
54 /// Amount of entropy in the input data set
55 pub entropy: f64,
56 /// Size estimated by the size estimator function.
57 pub estimated_size: u64,
58 /// Size compressed by zstd.
59 pub zstd_size: u64,
60 /// Size of the original data.
61 pub original_size: u64,
62}
63
64/// Represents the difference between 2 groups of fields.
65/// For the raw values of a single group, see [`GroupComparisonMetrics`].
66///
67/// This can be used for representing the difference between either splits, or any two arbitrary
68/// groups of analyzed bytes. Usually this is the difference between a result and a baseline.
69#[derive(PartialEq, Debug, Clone, Copy, Default)]
70pub struct GroupDifference {
71 /// The difference in LZ matches.
72 pub lz_matches: i64,
73 /// The difference in entropy
74 pub entropy: f64,
75 /// Difference in estimated size using the user
76 /// provided estimate function.
77 pub estimated_size: i64,
78 /// Difference in zstd compressed size
79 pub zstd_size: i64,
80 /// Difference in original size
81 pub original_size: i64,
82}
83
84impl GroupComparisonMetrics {
85 /// Calculates group comparison metrics for a given byte slice.
86 ///
87 /// This function computes various metrics such as entropy, LZ matches, estimated size,
88 /// and Zstandard compressed size, which are used for comparing different compression strategies.
89 ///
90 /// # Arguments
91 /// * `bytes` - A slice of bytes representing the data to analyze.
92 /// * `group_name` - The name of the group being analyzed.
93 /// * `compression_options` - Compression options, zstd compression level, etc.
94 ///
95 /// # Returns
96 /// A [`GroupComparisonMetrics`] struct containing the computed metrics.
97 pub fn from_bytes(
98 bytes: &[u8],
99 group_name: &str,
100 compression_options: CompressionOptions,
101 ) -> Self {
102 let entropy = calculate_file_entropy(bytes);
103 let lz_matches = estimate_num_lz_matches_fast(bytes) as u64;
104 let estimated_size = (compression_options.size_estimator_fn)(SizeEstimationParameters {
105 name: group_name,
106 data: Some(bytes),
107 data_len: bytes.len(),
108 num_lz_matches: lz_matches as usize,
109 entropy,
110 lz_match_multiplier: compression_options.lz_match_multiplier,
111 entropy_multiplier: compression_options.entropy_multiplier,
112 }) as u64;
113 let zstd_size = get_zstd_compressed_size(bytes, compression_options.zstd_compression_level);
114
115 GroupComparisonMetrics {
116 lz_matches,
117 entropy,
118 estimated_size,
119 zstd_size,
120 original_size: bytes.len() as u64,
121 }
122 }
123}
124
125impl GroupDifference {
126 /// Creates a new GroupDifference by comparing two sets of metrics
127 ///
128 /// # Arguments
129 /// * `baseline` - The baseline metrics to compare against
130 /// * `comparison` - The metrics to compare with the baseline
131 ///
132 /// # Returns
133 /// A new [`GroupDifference`] containing the calculated differences
134 pub fn from_metrics(
135 baseline: &GroupComparisonMetrics,
136 comparison: &GroupComparisonMetrics,
137 ) -> Self {
138 GroupDifference {
139 lz_matches: comparison.lz_matches as i64 - baseline.lz_matches as i64,
140 entropy: comparison.entropy - baseline.entropy,
141 estimated_size: comparison.estimated_size as i64 - baseline.estimated_size as i64,
142 zstd_size: comparison.zstd_size as i64 - baseline.zstd_size as i64,
143 original_size: comparison.original_size as i64 - baseline.original_size as i64,
144 }
145 }
146}