pub mod generate_bytes;
#[cfg(test)]
pub(crate) mod test_helpers;
use super::{GroupComparisonMetrics, GroupDifference};
use crate::analyzer::CompressionOptions;
use crate::comparison::compare_groups::generate_bytes::generate_group_bytes;
use crate::schema::Schema;
use crate::{analyzer::AnalyzerFieldState, schema::CustomComparison};
use ahash::AHashMap;
use generate_bytes::GenerateBytesError;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum GroupComparisonError {
#[error("Failed to generate group bytes: {0}")]
BytesGeneration(#[from] GenerateBytesError),
#[error("Mismatched number of byte slices and group names. Slices {slices} != {names} Names")]
InvalidItemCount { slices: usize, names: usize },
#[error("Invalid comparison configuration: {0}")]
InvalidConfiguration(String),
}
#[derive(Clone)]
pub struct GroupComparisonResult {
pub name: String,
pub description: String,
pub baseline_metrics: GroupComparisonMetrics,
pub group_names: Vec<String>,
pub group_metrics: Vec<GroupComparisonMetrics>,
pub differences: Vec<GroupDifference>,
}
impl GroupComparisonResult {
pub fn from_custom_comparison<T: AsRef<[u8]>>(
name: String,
description: String,
baseline_bytes: &[u8],
comparison_byte_slices: &[T],
group_names: &[String],
compression_options: CompressionOptions,
) -> Result<Self, GroupComparisonError> {
if comparison_byte_slices.len() != group_names.len() {
return Err(GroupComparisonError::InvalidItemCount {
slices: comparison_byte_slices.len(),
names: group_names.len(),
});
}
let baseline_name = format!("{}-baseline", name);
let baseline_metrics =
GroupComparisonMetrics::from_bytes(baseline_bytes, &baseline_name, compression_options);
let mut group_metrics = Vec::with_capacity(comparison_byte_slices.len());
let mut differences = Vec::with_capacity(comparison_byte_slices.len());
let mut names = Vec::with_capacity(comparison_byte_slices.len());
for group_name in group_names {
names.push(group_name.clone());
}
for (comparison, group_name) in comparison_byte_slices.iter().zip(group_names.iter()) {
let comparison_name = format!("{}-{}", name, group_name);
let metrics = GroupComparisonMetrics::from_bytes(
comparison.as_ref(),
&comparison_name,
compression_options,
);
differences.push(GroupDifference::from_metrics(&baseline_metrics, &metrics));
group_metrics.push(metrics);
}
Ok(Self {
name,
description,
baseline_metrics,
group_names: names,
group_metrics,
differences,
})
}
}
pub(crate) fn process_single_comparison(
comparison: &CustomComparison,
field_stats: &mut AHashMap<String, AnalyzerFieldState>,
compression_options: CompressionOptions,
) -> Result<GroupComparisonResult, GroupComparisonError> {
let baseline_bytes = generate_group_bytes(&comparison.baseline, field_stats).map_err(|e| {
GroupComparisonError::InvalidConfiguration(format!(
"Comparison '{}' baseline error: {}. This is indicative of a configuration error.",
comparison.name, e
))
})?;
let mut comparison_bytes = Vec::new();
let mut group_names = Vec::new();
for (group_name, components) in &comparison.comparisons {
let bytes = generate_group_bytes(components, field_stats).map_err(|e| {
GroupComparisonError::InvalidConfiguration(format!(
"Comparison '{}' group '{}' error: {}. This is indicative of a configuration error.",
comparison.name, group_name, e
))
})?;
comparison_bytes.push(bytes);
group_names.push(group_name.clone());
}
let custom_compression_options = CompressionOptions {
zstd_compression_level: compression_options.zstd_compression_level,
size_estimator_fn: compression_options.size_estimator_fn,
lz_match_multiplier: compression_options.lz_match_multiplier,
entropy_multiplier: compression_options.entropy_multiplier,
};
GroupComparisonResult::from_custom_comparison(
comparison.name.clone(),
comparison.description.clone(),
&baseline_bytes,
&comparison_bytes,
&group_names,
custom_compression_options,
)
}
pub(crate) fn analyze_custom_comparisons(
schema: &Schema,
field_stats: &mut AHashMap<String, AnalyzerFieldState>,
compression_options: CompressionOptions,
) -> Result<Vec<GroupComparisonResult>, GroupComparisonError> {
schema
.analysis
.compare_groups
.iter()
.map(|comparison| {
process_single_comparison(comparison, field_stats, compression_options)
})
.collect()
}
#[cfg(test)]
mod from_custom_comparison_tests {
use super::*;
use crate::comparison::compare_groups::test_helpers::create_mock_field_states;
use crate::comparison::compare_groups::test_helpers::TEST_FIELD_NAME;
use crate::schema::BitOrder;
use crate::schema::GroupComponent;
use crate::schema::GroupComponentArray;
use indexmap::IndexMap;
#[test]
fn from_custom_comparison_basic() {
let input_data = [0b1010_1010, 0b0101_0101];
let mut field_stats = create_mock_field_states(
TEST_FIELD_NAME,
&input_data,
8,
BitOrder::Lsb,
BitOrder::Lsb,
);
let comparison = CustomComparison {
name: "test_comp".to_string(),
description: "test comparison".to_string(),
baseline: vec![GroupComponent::Array(GroupComponentArray {
field: TEST_FIELD_NAME.to_string(),
offset: 0,
bits: 8,
..Default::default()
})],
comparisons: {
let mut map = IndexMap::new();
map.insert(
"comp1".to_string(),
vec![GroupComponent::Array(GroupComponentArray {
field: TEST_FIELD_NAME.to_string(),
offset: 0,
bits: 4,
..Default::default()
})],
);
map
},
};
let result =
process_single_comparison(&comparison, &mut field_stats, CompressionOptions::default())
.unwrap();
assert_eq!(result.baseline_metrics.original_size, 2); assert_eq!(result.baseline_metrics.zstd_size, 11); assert_eq!(result.baseline_metrics.estimated_size, 0); assert_eq!(result.baseline_metrics.entropy, 1.0);
assert_eq!(result.group_names, vec!["comp1"]);
let comp_metrics = &result.group_metrics[0];
assert_eq!(comp_metrics.original_size, 1); assert_eq!(comp_metrics.zstd_size, 10); assert_eq!(comp_metrics.entropy, 0.0);
let diff = &result.differences[0];
assert_eq!(diff.original_size, -1);
assert_eq!(diff.zstd_size, -1);
assert_eq!(diff.entropy, -1.0);
}
#[test]
fn from_custom_comparison_multiple_groups() {
let input_data = [0b1111_0000];
let mut field_stats = create_mock_field_states(
TEST_FIELD_NAME,
&input_data,
8,
BitOrder::Msb,
BitOrder::Msb,
);
let comparison = CustomComparison {
name: "multi_group".to_string(),
description: String::new(),
baseline: vec![GroupComponent::Array(GroupComponentArray {
field: TEST_FIELD_NAME.to_string(),
offset: 0,
bits: 8,
..Default::default()
})],
comparisons: {
let mut map = IndexMap::new();
map.insert(
"half_bits".to_string(),
vec![GroupComponent::Array(GroupComponentArray {
field: TEST_FIELD_NAME.to_string(),
offset: 0,
bits: 4,
..Default::default()
})],
);
map.insert(
"full_bits".to_string(),
vec![GroupComponent::Array(GroupComponentArray {
field: TEST_FIELD_NAME.to_string(),
offset: 0,
bits: 8,
..Default::default()
})],
);
map
},
};
let result =
process_single_comparison(&comparison, &mut field_stats, CompressionOptions::default())
.unwrap();
assert_eq!(result.group_names, vec!["half_bits", "full_bits"]);
assert_eq!(result.differences.len(), 2);
assert!(result.differences[0].estimated_size <= 0);
assert_eq!(result.differences[1].estimated_size, 0);
assert_eq!(result.differences[1].original_size, 0);
assert_eq!(result.differences[1].zstd_size, 0);
assert_eq!(result.differences[1].entropy, 0.0);
}
#[test]
fn invalid_configuration_error() {
let invalid_comparison = CustomComparison {
name: "invalid_comp".to_string(),
description: "Invalid comparison".to_string(),
baseline: vec![GroupComponent::Array(GroupComponentArray {
field: "nonexistent_field".to_string(), offset: 0,
bits: 8,
..Default::default()
})],
comparisons: IndexMap::new(),
};
let mut field_stats = AHashMap::new();
let result = process_single_comparison(
&invalid_comparison,
&mut field_stats,
CompressionOptions::default(),
);
assert!(matches!(
result,
Err(GroupComparisonError::InvalidConfiguration(msg))
if msg.contains("Comparison 'invalid_comp' baseline error")
&& msg.contains("Field 'nonexistent_field' not found")
));
}
#[test]
fn errors_on_mismatched_group_count() {
let result = GroupComparisonResult::from_custom_comparison(
"test".into(),
"test".into(),
&[],
&[&[1u8], &[2u8]],
&["group1".into()],
CompressionOptions::default(),
);
assert!(matches!(
result,
Err(GroupComparisonError::InvalidItemCount {
slices: 2,
names: 1
})
));
}
}