struct_compression_analyzer/comparison/compare_groups/
mod.rs1pub mod generate_bytes;
61#[cfg(test)]
62pub(crate) mod test_helpers;
63
64use super::{GroupComparisonMetrics, GroupDifference};
65use crate::analyzer::CompressionOptions;
66use crate::comparison::compare_groups::generate_bytes::generate_group_bytes;
67use crate::schema::Schema;
68use crate::{analyzer::AnalyzerFieldState, schema::CustomComparison};
69use ahash::AHashMap;
70use generate_bytes::GenerateBytesError;
71use thiserror::Error;
72
73#[derive(Error, Debug)]
75pub enum GroupComparisonError {
76 #[error("Failed to generate group bytes: {0}")]
77 BytesGeneration(#[from] GenerateBytesError),
78
79 #[error("Mismatched number of byte slices and group names. Slices {slices} != {names} Names")]
80 InvalidItemCount { slices: usize, names: usize },
81
82 #[error("Invalid comparison configuration: {0}")]
83 InvalidConfiguration(String),
84}
85
86#[derive(Clone)]
88pub struct GroupComparisonResult {
89 pub name: String,
91 pub description: String,
93 pub baseline_metrics: GroupComparisonMetrics,
95 pub group_names: Vec<String>,
97 pub group_metrics: Vec<GroupComparisonMetrics>,
99 pub differences: Vec<GroupDifference>,
101}
102
103impl GroupComparisonResult {
104 pub fn from_custom_comparison<T: AsRef<[u8]>>(
114 name: String,
115 description: String,
116 baseline_bytes: &[u8],
117 comparison_byte_slices: &[T],
118 group_names: &[String],
119 compression_options: CompressionOptions,
120 ) -> Result<Self, GroupComparisonError> {
121 if comparison_byte_slices.len() != group_names.len() {
122 return Err(GroupComparisonError::InvalidItemCount {
123 slices: comparison_byte_slices.len(),
124 names: group_names.len(),
125 });
126 }
127
128 let baseline_name = format!("{}-baseline", name);
130 let baseline_metrics =
131 GroupComparisonMetrics::from_bytes(baseline_bytes, &baseline_name, compression_options);
132
133 let mut group_metrics = Vec::with_capacity(comparison_byte_slices.len());
135 let mut differences = Vec::with_capacity(comparison_byte_slices.len());
136 let mut names = Vec::with_capacity(comparison_byte_slices.len());
137 for group_name in group_names {
138 names.push(group_name.clone());
139 }
140
141 for (comparison, group_name) in comparison_byte_slices.iter().zip(group_names.iter()) {
142 let comparison_name = format!("{}-{}", name, group_name);
143 let metrics = GroupComparisonMetrics::from_bytes(
144 comparison.as_ref(),
145 &comparison_name,
146 compression_options,
147 );
148 differences.push(GroupDifference::from_metrics(&baseline_metrics, &metrics));
149 group_metrics.push(metrics);
150 }
151
152 Ok(Self {
153 name,
154 description,
155 baseline_metrics,
156 group_names: names,
157 group_metrics,
158 differences,
159 })
160 }
161}
162
163pub(crate) fn process_single_comparison(
176 comparison: &CustomComparison,
177 field_stats: &mut AHashMap<String, AnalyzerFieldState>,
178 compression_options: CompressionOptions,
179) -> Result<GroupComparisonResult, GroupComparisonError> {
180 let baseline_bytes = generate_group_bytes(&comparison.baseline, field_stats).map_err(|e| {
182 GroupComparisonError::InvalidConfiguration(format!(
183 "Comparison '{}' baseline error: {}. This is indicative of a configuration error.",
184 comparison.name, e
185 ))
186 })?;
187
188 let mut comparison_bytes = Vec::new();
190 let mut group_names = Vec::new();
191
192 for (group_name, components) in &comparison.comparisons {
193 let bytes = generate_group_bytes(components, field_stats).map_err(|e| {
194 GroupComparisonError::InvalidConfiguration(format!(
195 "Comparison '{}' group '{}' error: {}. This is indicative of a configuration error.",
196 comparison.name, group_name, e
197 ))
198 })?;
199
200 comparison_bytes.push(bytes);
201 group_names.push(group_name.clone());
202 }
203
204 let custom_compression_options = CompressionOptions {
206 zstd_compression_level: compression_options.zstd_compression_level,
207 size_estimator_fn: compression_options.size_estimator_fn,
208 lz_match_multiplier: compression_options.lz_match_multiplier,
209 entropy_multiplier: compression_options.entropy_multiplier,
210 };
211
212 GroupComparisonResult::from_custom_comparison(
213 comparison.name.clone(),
214 comparison.description.clone(),
215 &baseline_bytes,
216 &comparison_bytes,
217 &group_names,
218 custom_compression_options,
219 )
220}
221
222pub(crate) fn analyze_custom_comparisons(
235 schema: &Schema,
236 field_stats: &mut AHashMap<String, AnalyzerFieldState>,
237 compression_options: CompressionOptions,
238) -> Result<Vec<GroupComparisonResult>, GroupComparisonError> {
239 schema
240 .analysis
241 .compare_groups
242 .iter()
243 .map(|comparison| {
244 process_single_comparison(comparison, field_stats, compression_options)
246 })
247 .collect()
248}
249
250#[cfg(test)]
251mod from_custom_comparison_tests {
252 use super::*;
253 use crate::comparison::compare_groups::test_helpers::create_mock_field_states;
254 use crate::comparison::compare_groups::test_helpers::TEST_FIELD_NAME;
255 use crate::schema::BitOrder;
256 use crate::schema::GroupComponent;
257 use crate::schema::GroupComponentArray;
258 use indexmap::IndexMap;
259
260 #[test]
261 fn from_custom_comparison_basic() {
262 let input_data = [0b1010_1010, 0b0101_0101];
263 let mut field_stats = create_mock_field_states(
264 TEST_FIELD_NAME,
265 &input_data,
266 8,
267 BitOrder::Lsb,
268 BitOrder::Lsb,
269 );
270
271 let comparison = CustomComparison {
272 name: "test_comp".to_string(),
273 description: "test comparison".to_string(),
274 baseline: vec![GroupComponent::Array(GroupComponentArray {
275 field: TEST_FIELD_NAME.to_string(),
276 offset: 0,
277 bits: 8,
278 ..Default::default()
279 })],
280 comparisons: {
281 let mut map = IndexMap::new();
282 map.insert(
283 "comp1".to_string(),
284 vec![GroupComponent::Array(GroupComponentArray {
285 field: TEST_FIELD_NAME.to_string(),
286 offset: 0,
287 bits: 4,
288 ..Default::default()
289 })],
290 );
291 map
292 },
293 };
294
295 let result =
296 process_single_comparison(&comparison, &mut field_stats, CompressionOptions::default())
297 .unwrap();
298
299 assert_eq!(result.baseline_metrics.original_size, 2); assert_eq!(result.baseline_metrics.zstd_size, 11); assert_eq!(result.baseline_metrics.estimated_size, 0); assert_eq!(result.baseline_metrics.entropy, 1.0); assert_eq!(result.group_names, vec!["comp1"]);
309 let comp_metrics = &result.group_metrics[0];
310 assert_eq!(comp_metrics.original_size, 1); assert_eq!(comp_metrics.zstd_size, 10); assert_eq!(comp_metrics.entropy, 0.0); let diff = &result.differences[0];
316 assert_eq!(diff.original_size, -1);
317 assert_eq!(diff.zstd_size, -1);
318 assert_eq!(diff.entropy, -1.0);
319 }
320
321 #[test]
322 fn from_custom_comparison_multiple_groups() {
323 let input_data = [0b1111_0000];
324 let mut field_stats = create_mock_field_states(
325 TEST_FIELD_NAME,
326 &input_data,
327 8,
328 BitOrder::Msb,
329 BitOrder::Msb,
330 );
331
332 let comparison = CustomComparison {
333 name: "multi_group".to_string(),
334 description: String::new(),
335 baseline: vec![GroupComponent::Array(GroupComponentArray {
336 field: TEST_FIELD_NAME.to_string(),
337 offset: 0,
338 bits: 8,
339 ..Default::default()
340 })],
341 comparisons: {
342 let mut map = IndexMap::new();
343 map.insert(
344 "half_bits".to_string(),
345 vec![GroupComponent::Array(GroupComponentArray {
346 field: TEST_FIELD_NAME.to_string(),
347 offset: 0,
348 bits: 4,
349 ..Default::default()
350 })],
351 );
352 map.insert(
353 "full_bits".to_string(),
354 vec![GroupComponent::Array(GroupComponentArray {
355 field: TEST_FIELD_NAME.to_string(),
356 offset: 0,
357 bits: 8,
358 ..Default::default()
359 })],
360 );
361 map
362 },
363 };
364
365 let result =
366 process_single_comparison(&comparison, &mut field_stats, CompressionOptions::default())
367 .unwrap();
368
369 assert_eq!(result.group_names, vec!["half_bits", "full_bits"]);
370 assert_eq!(result.differences.len(), 2);
371
372 assert!(result.differences[0].estimated_size <= 0);
377
378 assert_eq!(result.differences[1].estimated_size, 0);
380 assert_eq!(result.differences[1].original_size, 0);
381 assert_eq!(result.differences[1].zstd_size, 0);
382 assert_eq!(result.differences[1].entropy, 0.0);
383 }
384
385 #[test]
386 fn invalid_configuration_error() {
387 let invalid_comparison = CustomComparison {
388 name: "invalid_comp".to_string(),
389 description: "Invalid comparison".to_string(),
390 baseline: vec![GroupComponent::Array(GroupComponentArray {
391 field: "nonexistent_field".to_string(), offset: 0,
393 bits: 8,
394 ..Default::default()
395 })],
396 comparisons: IndexMap::new(),
397 };
398
399 let mut field_stats = AHashMap::new();
400 let result = process_single_comparison(
401 &invalid_comparison,
402 &mut field_stats,
403 CompressionOptions::default(),
404 );
405
406 assert!(matches!(
407 result,
408 Err(GroupComparisonError::InvalidConfiguration(msg))
409 if msg.contains("Comparison 'invalid_comp' baseline error")
410 && msg.contains("Field 'nonexistent_field' not found")
411 ));
412 }
413
414 #[test]
415 fn errors_on_mismatched_group_count() {
416 let result = GroupComparisonResult::from_custom_comparison(
417 "test".into(),
418 "test".into(),
419 &[],
420 &[&[1u8], &[2u8]],
421 &["group1".into()],
422 CompressionOptions::default(),
423 );
424
425 assert!(matches!(
426 result,
427 Err(GroupComparisonError::InvalidItemCount {
428 slices: 2,
429 names: 1
430 })
431 ));
432 }
433}