pub mod analysis_results;
pub mod merged_analysis_results;
use crate::analyzer::BitStats;
use crate::comparison::compare_groups::GroupComparisonError;
use crate::results::analysis_results::AnalysisResults;
use crate::schema::BitOrder;
use crate::utils::constants::CHILD_MARKER;
use derive_more::FromStr;
use merged_analysis_results::MergedAnalysisResults;
use rustc_hash::FxHashMap;
use std::io::{self, Write};
use thiserror::Error;
#[derive(Debug, Error)]
pub enum AnalysisMergeError {
#[error(
"Number of bit counts did not match while merging `bit_counts`.
This indicates inconsistent input data, or merging of results that were computed differently."
)]
BitCountsDontMatch,
#[error("Field length mismatch: {0} != {1}. This indicates inconsistent, different or incorrect input data.")]
FieldLengthMismatch(u32, u32),
}
#[derive(Debug, Error)]
pub enum ComputeAnalysisResultsError {
#[error(transparent)]
GroupComparisonError(#[from] GroupComparisonError),
}
#[derive(Clone, Default)]
pub struct FieldMetrics {
pub name: String,
pub full_path: String,
pub depth: usize,
pub count: u64,
pub lenbits: u32,
pub entropy: f64,
pub lz_matches: u64,
pub bit_counts: Vec<BitStats>,
pub bit_order: BitOrder,
pub value_counts: FxHashMap<u64, u64>,
pub zstd_size: u64,
pub original_size: u64,
}
impl FieldMetrics {
pub fn try_merge_many(items: &[&Self]) -> Result<FieldMetrics, AnalysisMergeError> {
if items.is_empty() {
return Ok(FieldMetrics::default());
}
let first = items[0];
for other in items {
if first.lenbits != other.lenbits {
return Err(AnalysisMergeError::FieldLengthMismatch(
first.lenbits,
other.lenbits,
));
}
}
let total_items = items.len();
let mut total_count = 0;
let mut total_entropy = 0.0;
let mut total_lz_matches = 0;
let mut total_zstd_size = 0;
let mut total_original_size = 0;
for metrics in items {
total_count += metrics.count;
total_entropy += metrics.entropy;
total_lz_matches += metrics.lz_matches;
total_zstd_size += metrics.zstd_size;
total_original_size += metrics.original_size;
}
let mut this = FieldMetrics {
name: first.name.clone(),
full_path: first.full_path.clone(),
depth: first.depth,
lenbits: first.lenbits,
bit_order: first.bit_order,
..Default::default()
};
this.count = total_count;
this.entropy = total_entropy / total_items as f64;
this.lz_matches = total_lz_matches / total_items as u64;
this.zstd_size = total_zstd_size / total_items as u64;
this.original_size = total_original_size / total_items as u64;
this.merge_bit_stats_and_value_counts(items)?;
Ok(this)
}
fn merge_bit_stats_and_value_counts(
&mut self,
items: &[&Self],
) -> Result<(), AnalysisMergeError> {
let mut bit_counts = items[0].bit_counts.clone();
let mut value_counts = items[0].value_counts.clone();
for other in items {
if bit_counts.len() != other.bit_counts.len() {
return Err(AnalysisMergeError::BitCountsDontMatch);
}
for (bit_offset, bit_stats) in other.bit_counts.iter().enumerate() {
let current = bit_counts
.get_mut(bit_offset)
.ok_or(AnalysisMergeError::BitCountsDontMatch)?;
current.ones += bit_stats.ones;
current.zeros += bit_stats.zeros;
}
for (value, count) in &other.value_counts {
*value_counts.entry(*value).or_insert(0) += count;
}
}
self.bit_counts = bit_counts;
self.value_counts = value_counts;
Ok(())
}
pub fn parent_path(&self) -> Option<&str> {
self.full_path.rsplit_once(CHILD_MARKER).map(|(p, _)| p)
}
pub fn parent_metrics_or<'a>(
&self,
results: &'a AnalysisResults,
optb: &'a FieldMetrics,
) -> &'a FieldMetrics {
let parent_path = self.parent_path();
let parent_stats = parent_path
.and_then(|p| results.per_field.get(p))
.unwrap_or(optb);
parent_stats
}
pub fn parent_metrics_in_merged_or<'a>(
&self,
results: &'a MergedAnalysisResults,
optb: &'a FieldMetrics,
) -> &'a FieldMetrics {
let parent_path = self.parent_path();
let parent_stats = parent_path
.and_then(|p| results.per_field.get(p))
.unwrap_or(optb);
parent_stats
}
pub fn sorted_value_counts(&self) -> Vec<(&u64, &u64)> {
let mut counts: Vec<_> = self.value_counts.iter().collect();
counts.sort_by(|a, b| b.1.cmp(a.1));
counts
}
}
#[derive(Debug, Clone, Copy, Default, FromStr)]
pub enum PrintFormat {
#[default]
Detailed,
Concise,
}
pub(crate) fn calculate_percentage(child: f64, parent: f64) -> f64 {
if parent == 0.0 {
0.0
} else {
(child / parent) * 100.0
}
}
pub(crate) fn print_field_metrics_value_stats<W: Write>(
writer: &mut W,
field: &FieldMetrics,
) -> io::Result<()> {
let indent = " ".repeat(field.depth);
writeln!(writer, "{}{} ({} bits)", indent, field.name, field.lenbits)?;
let counts = field.sorted_value_counts();
if !counts.is_empty() {
let total_values: u64 = counts.iter().map(|(_, &c)| c).sum();
for (val, &count) in counts.iter().take(5) {
let pct = (count as f32 / total_values as f32) * 100.0;
writeln!(writer, "{} {}: {:.1}%", indent, val, pct)?;
}
}
Ok(())
}
pub(crate) fn print_field_metrics_bit_stats<W: Write>(
writer: &mut W,
field: &FieldMetrics,
) -> io::Result<()> {
let indent = " ".repeat(field.depth);
writeln!(writer, "{}{} ({} bits)", indent, field.name, field.lenbits)?;
if field.bit_counts.len() != field.lenbits as usize {
return Ok(());
}
for i in 0..field.lenbits {
let bit_stats = &field.bit_counts[i as usize];
let total = bit_stats.zeros + bit_stats.ones;
let percentage = if total > 0 {
(bit_stats.ones as f64 / total as f64) * 100.0
} else {
0.0
};
writeln!(
writer,
"{} Bit {}: ({}/{}) ({:.1}%)",
indent, i, bit_stats.zeros, bit_stats.ones, percentage
)?;
}
Ok(())
}