struct_compression_analyzer/comparison/compare_groups/
mod.rs

1//! Processes custom field transformations and group comparisons defined in schemas.
2//!
3//! This module implements analysis of user-defined field groupings and transformations,
4//! allowing comparison of different field arrangements and bit layouts. Unlike
5//! split comparisons which handle basic field reordering, this module supports
6//! complex transformations like:
7//!
8//! - Bit padding and alignment
9//! - Field slicing and partial reads
10//! - Custom field grouping patterns
11//!
12//! # Core Types
13//!
14//! - [`GroupComparisonResult`]: Results from analyzing custom field groupings
15//! - [`GroupComparisonError`]: Errors that can occur during group analysis
16//!
17//! # Example
18//!
19//! ```yaml
20//! compare_groups:
21//!   - name: convert_7_to_8_bit
22//!     description: "Convert 7-bit colors to 8-bit by padding"
23//!     baseline:
24//!       - type: array      # Original 7-bit values
25//!         field: color7
26//!         bits: 7
27//!     comparisons:
28//!       padded_8bit:      # Padded to 8 bits
29//!         - type: struct
30//!           fields:
31//!             - type: field
32//!               field: color7
33//!               bits: 7
34//!             - type: padding
35//!               bits: 1
36//!               value: 0
37//! ```
38//!
39//! Each comparison analyzes:
40//! - Compression metrics (entropy, LZ matches)
41//! - Size comparisons (original, estimated, actual zstd)
42//! - Field-level statistics
43//!
44//! # Usage Notes
45//!
46//! - Baseline group serves as reference for comparisons
47//! - Multiple comparison groups can be defined
48//! - Field transformations are applied during analysis
49//! - Bit padding and alignment can impact compression
50//!
51//! # Submodules
52//!
53//! - [`generate_bytes`]: Core byte stream generation from schemas
54//! - [`test_helpers`]: Testing utilities (only in test builds)
55//!
56//! [`GroupComparisonResult`]: crate::comparison::compare_groups::GroupComparisonResult
57//! [`GroupComparisonError`]: crate::comparison::compare_groups::GroupComparisonError
58//! [`generate_bytes`]: crate::comparison::compare_groups::generate_bytes
59
60pub mod generate_bytes;
61#[cfg(test)]
62pub(crate) mod test_helpers;
63
64use super::{GroupComparisonMetrics, GroupDifference};
65use crate::analyzer::CompressionOptions;
66use crate::comparison::compare_groups::generate_bytes::generate_group_bytes;
67use crate::schema::Schema;
68use crate::{analyzer::AnalyzerFieldState, schema::CustomComparison};
69use ahash::AHashMap;
70use generate_bytes::GenerateBytesError;
71use thiserror::Error;
72
73/// Describes an error that occurred while computing a group comparison.
74#[derive(Error, Debug)]
75pub enum GroupComparisonError {
76    #[error("Failed to generate group bytes: {0}")]
77    BytesGeneration(#[from] GenerateBytesError),
78
79    #[error("Mismatched number of byte slices and group names. Slices {slices} != {names} Names")]
80    InvalidItemCount { slices: usize, names: usize },
81
82    #[error("Invalid comparison configuration: {0}")]
83    InvalidConfiguration(String),
84}
85
86/// Contains the result of comparing custom field groupings defined in the schema.
87#[derive(Clone)]
88pub struct GroupComparisonResult {
89    /// The name of the group comparison. (Copied from schema)
90    pub name: String,
91    /// A description of the group comparison. (Copied from schema)
92    pub description: String,
93    /// Metrics for the baseline group.
94    pub baseline_metrics: GroupComparisonMetrics,
95    /// Names of the comparison groups in order they were specified in the schema
96    pub group_names: Vec<String>,
97    /// Metrics for the comparison groups in schema order
98    pub group_metrics: Vec<GroupComparisonMetrics>,
99    /// Comparison between other groups and first (baseline) group.
100    pub differences: Vec<GroupDifference>,
101}
102
103impl GroupComparisonResult {
104    /// Creates comparison results from precomputed group bytes
105    ///
106    /// Arguments:
107    /// * `name` - The name of the comparison (copied from schema)
108    /// * `description` - The description of the comparison (copied from schema)
109    /// * `baseline_bytes` - The bytes of the baseline (original/reference) group.
110    /// * `comparison_byte_slices` - The bytes of the comparison groups.
111    /// * `group_names` - The names of the comparison groups in order they were specified in the schema.
112    /// * `compression_options` - Compression options, zstd compression level, etc.
113    pub fn from_custom_comparison<T: AsRef<[u8]>>(
114        name: String,
115        description: String,
116        baseline_bytes: &[u8],
117        comparison_byte_slices: &[T],
118        group_names: &[String],
119        compression_options: CompressionOptions,
120    ) -> Result<Self, GroupComparisonError> {
121        if comparison_byte_slices.len() != group_names.len() {
122            return Err(GroupComparisonError::InvalidItemCount {
123                slices: comparison_byte_slices.len(),
124                names: group_names.len(),
125            });
126        }
127
128        // Calculate baseline metrics
129        let baseline_name = format!("{}-baseline", name);
130        let baseline_metrics =
131            GroupComparisonMetrics::from_bytes(baseline_bytes, &baseline_name, compression_options);
132
133        // Process comparison groups
134        let mut group_metrics = Vec::with_capacity(comparison_byte_slices.len());
135        let mut differences = Vec::with_capacity(comparison_byte_slices.len());
136        let mut names = Vec::with_capacity(comparison_byte_slices.len());
137        for group_name in group_names {
138            names.push(group_name.clone());
139        }
140
141        for (comparison, group_name) in comparison_byte_slices.iter().zip(group_names.iter()) {
142            let comparison_name = format!("{}-{}", name, group_name);
143            let metrics = GroupComparisonMetrics::from_bytes(
144                comparison.as_ref(),
145                &comparison_name,
146                compression_options,
147            );
148            differences.push(GroupDifference::from_metrics(&baseline_metrics, &metrics));
149            group_metrics.push(metrics);
150        }
151
152        Ok(Self {
153            name,
154            description,
155            baseline_metrics,
156            group_names: names,
157            group_metrics,
158            differences,
159        })
160    }
161}
162
163/// Analyzes a single custom comparison defined in the [`Schema`].
164/// This is an internal API.
165///
166/// # Arguments
167///
168/// * `comparison` - The comparison to analyze
169/// * `field_stats` - Mutable reference to field statistics map
170/// * `compression_options` - Compression options, zstd compression level, etc.
171///
172/// # Returns
173///
174/// A single [`GroupComparisonResult`] containing metrics for the passed in comparison
175pub(crate) fn process_single_comparison(
176    comparison: &CustomComparison,
177    field_stats: &mut AHashMap<String, AnalyzerFieldState>,
178    compression_options: CompressionOptions,
179) -> Result<GroupComparisonResult, GroupComparisonError> {
180    // Generate baseline bytes with error context
181    let baseline_bytes = generate_group_bytes(&comparison.baseline, field_stats).map_err(|e| {
182        GroupComparisonError::InvalidConfiguration(format!(
183            "Comparison '{}' baseline error: {}. This is indicative of a configuration error.",
184            comparison.name, e
185        ))
186    })?;
187
188    // Generate comparison group bytes in schema order
189    let mut comparison_bytes = Vec::new();
190    let mut group_names = Vec::new();
191
192    for (group_name, components) in &comparison.comparisons {
193        let bytes = generate_group_bytes(components, field_stats).map_err(|e| {
194            GroupComparisonError::InvalidConfiguration(format!(
195                "Comparison '{}' group '{}' error: {}. This is indicative of a configuration error.",
196                comparison.name, group_name, e
197            ))
198        })?;
199
200        comparison_bytes.push(bytes);
201        group_names.push(group_name.clone());
202    }
203
204    // Create custom compression options for this comparison using its multipliers
205    let custom_compression_options = CompressionOptions {
206        zstd_compression_level: compression_options.zstd_compression_level,
207        size_estimator_fn: compression_options.size_estimator_fn,
208        lz_match_multiplier: compression_options.lz_match_multiplier,
209        entropy_multiplier: compression_options.entropy_multiplier,
210    };
211
212    GroupComparisonResult::from_custom_comparison(
213        comparison.name.clone(),
214        comparison.description.clone(),
215        &baseline_bytes,
216        &comparison_bytes,
217        &group_names,
218        custom_compression_options,
219    )
220}
221
222/// Analyzes all custom comparisons defined in the [`Schema`].
223/// This is an internal API.
224///
225/// # Arguments
226///
227/// * `schema` - Reference to loaded schema definition
228/// * `field_stats` - Mutable reference to field statistics map
229/// * `compression_options` - Compression options, zstd compression level, etc.
230///
231/// # Returns
232///
233/// Vector of [`GroupComparisonResult`] containing metrics for all configured comparisons
234pub(crate) fn analyze_custom_comparisons(
235    schema: &Schema,
236    field_stats: &mut AHashMap<String, AnalyzerFieldState>,
237    compression_options: CompressionOptions,
238) -> Result<Vec<GroupComparisonResult>, GroupComparisonError> {
239    schema
240        .analysis
241        .compare_groups
242        .iter()
243        .map(|comparison| {
244            // Use base compression options but pass comparison through for multipliers
245            process_single_comparison(comparison, field_stats, compression_options)
246        })
247        .collect()
248}
249
250#[cfg(test)]
251mod from_custom_comparison_tests {
252    use super::*;
253    use crate::comparison::compare_groups::test_helpers::create_mock_field_states;
254    use crate::comparison::compare_groups::test_helpers::TEST_FIELD_NAME;
255    use crate::schema::BitOrder;
256    use crate::schema::GroupComponent;
257    use crate::schema::GroupComponentArray;
258    use indexmap::IndexMap;
259
260    #[test]
261    fn from_custom_comparison_basic() {
262        let input_data = [0b1010_1010, 0b0101_0101];
263        let mut field_stats = create_mock_field_states(
264            TEST_FIELD_NAME,
265            &input_data,
266            8,
267            BitOrder::Lsb,
268            BitOrder::Lsb,
269        );
270
271        let comparison = CustomComparison {
272            name: "test_comp".to_string(),
273            description: "test comparison".to_string(),
274            baseline: vec![GroupComponent::Array(GroupComponentArray {
275                field: TEST_FIELD_NAME.to_string(),
276                offset: 0,
277                bits: 8,
278                ..Default::default()
279            })],
280            comparisons: {
281                let mut map = IndexMap::new();
282                map.insert(
283                    "comp1".to_string(),
284                    vec![GroupComponent::Array(GroupComponentArray {
285                        field: TEST_FIELD_NAME.to_string(),
286                        offset: 0,
287                        bits: 4,
288                        ..Default::default()
289                    })],
290                );
291                map
292            },
293        };
294
295        let result =
296            process_single_comparison(&comparison, &mut field_stats, CompressionOptions::default())
297                .unwrap();
298
299        // Note: The 'zstd' and 'estimated size' numbers may randomly break with parameter changes.
300        //       This is OK, we hardcoded them here for sanity test only.
301        // Validate baseline metrics
302        assert_eq!(result.baseline_metrics.original_size, 2); // input_data
303        assert_eq!(result.baseline_metrics.zstd_size, 11); // Zstd has overhead
304        assert_eq!(result.baseline_metrics.estimated_size, 0); // Arbitrary, and can change.
305        assert_eq!(result.baseline_metrics.entropy, 1.0); // 2 different bytes == entropy of 1
306
307        // Validate comparison group
308        assert_eq!(result.group_names, vec!["comp1"]);
309        let comp_metrics = &result.group_metrics[0];
310        assert_eq!(comp_metrics.original_size, 1); // Half the data.
311        assert_eq!(comp_metrics.zstd_size, 10); // Zstd has overhead
312        assert_eq!(comp_metrics.entropy, 0.0); // One byte == entropy of 0
313
314        // Validate differences
315        let diff = &result.differences[0];
316        assert_eq!(diff.original_size, -1);
317        assert_eq!(diff.zstd_size, -1);
318        assert_eq!(diff.entropy, -1.0);
319    }
320
321    #[test]
322    fn from_custom_comparison_multiple_groups() {
323        let input_data = [0b1111_0000];
324        let mut field_stats = create_mock_field_states(
325            TEST_FIELD_NAME,
326            &input_data,
327            8,
328            BitOrder::Msb,
329            BitOrder::Msb,
330        );
331
332        let comparison = CustomComparison {
333            name: "multi_group".to_string(),
334            description: String::new(),
335            baseline: vec![GroupComponent::Array(GroupComponentArray {
336                field: TEST_FIELD_NAME.to_string(),
337                offset: 0,
338                bits: 8,
339                ..Default::default()
340            })],
341            comparisons: {
342                let mut map = IndexMap::new();
343                map.insert(
344                    "half_bits".to_string(),
345                    vec![GroupComponent::Array(GroupComponentArray {
346                        field: TEST_FIELD_NAME.to_string(),
347                        offset: 0,
348                        bits: 4,
349                        ..Default::default()
350                    })],
351                );
352                map.insert(
353                    "full_bits".to_string(),
354                    vec![GroupComponent::Array(GroupComponentArray {
355                        field: TEST_FIELD_NAME.to_string(),
356                        offset: 0,
357                        bits: 8,
358                        ..Default::default()
359                    })],
360                );
361                map
362            },
363        };
364
365        let result =
366            process_single_comparison(&comparison, &mut field_stats, CompressionOptions::default())
367                .unwrap();
368
369        assert_eq!(result.group_names, vec!["half_bits", "full_bits"]);
370        assert_eq!(result.differences.len(), 2);
371
372        // Note: The 'zstd' and 'estimated size' numbers may randomly break with parameter changes.
373        //       This is OK, we hardcoded them here for sanity test only.
374        // First comparison group differences
375        // Estimated size is equal.
376        assert!(result.differences[0].estimated_size <= 0);
377
378        // Second comparison should match baseline
379        assert_eq!(result.differences[1].estimated_size, 0);
380        assert_eq!(result.differences[1].original_size, 0);
381        assert_eq!(result.differences[1].zstd_size, 0);
382        assert_eq!(result.differences[1].entropy, 0.0);
383    }
384
385    #[test]
386    fn invalid_configuration_error() {
387        let invalid_comparison = CustomComparison {
388            name: "invalid_comp".to_string(),
389            description: "Invalid comparison".to_string(),
390            baseline: vec![GroupComponent::Array(GroupComponentArray {
391                field: "nonexistent_field".to_string(), // Field doesn't exist
392                offset: 0,
393                bits: 8,
394                ..Default::default()
395            })],
396            comparisons: IndexMap::new(),
397        };
398
399        let mut field_stats = AHashMap::new();
400        let result = process_single_comparison(
401            &invalid_comparison,
402            &mut field_stats,
403            CompressionOptions::default(),
404        );
405
406        assert!(matches!(
407            result,
408            Err(GroupComparisonError::InvalidConfiguration(msg))
409                if msg.contains("Comparison 'invalid_comp' baseline error")
410                && msg.contains("Field 'nonexistent_field' not found")
411        ));
412    }
413
414    #[test]
415    fn errors_on_mismatched_group_count() {
416        let result = GroupComparisonResult::from_custom_comparison(
417            "test".into(),
418            "test".into(),
419            &[],
420            &[&[1u8], &[2u8]],
421            &["group1".into()],
422            CompressionOptions::default(),
423        );
424
425        assert!(matches!(
426            result,
427            Err(GroupComparisonError::InvalidItemCount {
428                slices: 2,
429                names: 1
430            })
431        ));
432    }
433}