use crate::benchmark::types::{
AggregatedResults, AgreementStats, DisagreementExample, NormalizationComparison, ParseResult,
ParsingComparison, ShardResults, TimingInfo,
};
use crate::FerroError;
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufReader, BufWriter};
use std::path::Path;
pub fn collate_parsing<P: AsRef<Path>>(
ferro_dir: P,
mutalyzer_dir: Option<P>,
output: P,
dataset_name: &str,
) -> Result<ParsingComparison, FerroError> {
let ferro_dir = ferro_dir.as_ref();
let output = output.as_ref();
let ferro_timings = load_timing_files(ferro_dir)?;
let ferro_results = AggregatedResults::from_timings(&ferro_timings);
let (mutalyzer_results, speedup, agreement) = if let Some(mut_dir) = mutalyzer_dir {
let mut_timings = load_timing_files(mut_dir.as_ref())?;
let mut_results = AggregatedResults::from_timings(&mut_timings);
let speedup = if mut_results.throughput > 0.0 {
Some(ferro_results.throughput / mut_results.throughput)
} else {
None
};
let agreement = compute_parsing_agreement(ferro_dir, mut_dir.as_ref())?;
(Some(mut_results), speedup, agreement)
} else {
(None, None, None)
};
let comparison = ParsingComparison {
dataset: dataset_name.to_string(),
ferro_hgvs: ferro_results,
mutalyzer: mutalyzer_results,
speedup,
agreement,
};
if let Some(parent) = output.parent() {
std::fs::create_dir_all(parent).map_err(|e| FerroError::Io {
msg: format!("Failed to create directory {}: {}", parent.display(), e),
})?;
}
let file = File::create(output).map_err(|e| FerroError::Io {
msg: format!("Failed to create {}: {}", output.display(), e),
})?;
let writer = BufWriter::new(file);
serde_json::to_writer_pretty(writer, &comparison).map_err(|e| FerroError::Io {
msg: format!("Failed to write JSON: {}", e),
})?;
eprintln!(
"Collated parsing results for {}: ferro={:.4}% pass, throughput={:.0}/s",
dataset_name,
comparison.ferro_hgvs.pass_rate * 100.0,
comparison.ferro_hgvs.throughput
);
Ok(comparison)
}
pub fn collate_normalization<P: AsRef<Path>>(
ferro_results: P,
mutalyzer_results: Option<P>,
output: P,
dataset_name: &str,
) -> Result<NormalizationComparison, FerroError> {
let ferro_results_path = ferro_results.as_ref();
let output = output.as_ref();
let ferro_data: ShardResults = load_json(ferro_results_path)?;
let ferro_agg = AggregatedResults::from_timings(std::slice::from_ref(&ferro_data.timing));
let (mutalyzer_agg, agreement) = if let Some(mut_path) = mutalyzer_results {
let mut_data: ShardResults = load_json(mut_path.as_ref())?;
let mut_agg = AggregatedResults::from_timings(std::slice::from_ref(&mut_data.timing));
let agreement = compute_normalization_agreement(&ferro_data, &mut_data);
(Some(mut_agg), Some(agreement))
} else {
(None, None)
};
let comparison = NormalizationComparison {
dataset: dataset_name.to_string(),
sample_size: ferro_data.timing.total_patterns,
ferro_hgvs: ferro_agg,
mutalyzer: mutalyzer_agg,
agreement,
};
if let Some(parent) = output.parent() {
std::fs::create_dir_all(parent).map_err(|e| FerroError::Io {
msg: format!("Failed to create directory {}: {}", parent.display(), e),
})?;
}
let file = File::create(output).map_err(|e| FerroError::Io {
msg: format!("Failed to create {}: {}", output.display(), e),
})?;
let writer = BufWriter::new(file);
serde_json::to_writer_pretty(writer, &comparison).map_err(|e| FerroError::Io {
msg: format!("Failed to write JSON: {}", e),
})?;
eprintln!(
"Collated normalization results for {}: ferro={:.2}% pass",
dataset_name,
comparison.ferro_hgvs.pass_rate * 100.0
);
Ok(comparison)
}
fn load_timing_files<P: AsRef<Path>>(dir: P) -> Result<Vec<TimingInfo>, FerroError> {
let dir = dir.as_ref();
let mut timings = Vec::new();
for entry in std::fs::read_dir(dir).map_err(|e| FerroError::Io {
msg: format!("Failed to read directory {}: {}", dir.display(), e),
})? {
let entry = entry.map_err(|e| FerroError::Io {
msg: format!("Failed to read entry: {}", e),
})?;
let path = entry.path();
if path.extension().is_some_and(|e| e == "json")
&& path
.file_name()
.and_then(|n| n.to_str())
.is_some_and(|n| n.contains("timing"))
{
let timing: TimingInfo = load_json(&path)?;
timings.push(timing);
}
}
Ok(timings)
}
fn load_json<T: serde::de::DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T, FerroError> {
let path = path.as_ref();
let file = File::open(path).map_err(|e| FerroError::Io {
msg: format!("Failed to open {}: {}", path.display(), e),
})?;
let reader = BufReader::new(file);
serde_json::from_reader(reader).map_err(|e| FerroError::Json {
msg: format!("Failed to parse {}: {}", path.display(), e),
})
}
fn compute_parsing_agreement<P: AsRef<Path>>(
ferro_dir: P,
mutalyzer_dir: P,
) -> Result<Option<AgreementStats>, FerroError> {
let ferro_dir = ferro_dir.as_ref();
let mutalyzer_dir = mutalyzer_dir.as_ref();
let ferro_results = load_results_files(ferro_dir)?;
let mutalyzer_results = load_results_files(mutalyzer_dir)?;
if ferro_results.is_empty() || mutalyzer_results.is_empty() {
return Ok(None);
}
let ferro_map: HashMap<&str, &ParseResult> = ferro_results
.iter()
.flat_map(|sr| sr.sample_results.iter())
.chain(
ferro_results
.iter()
.flat_map(|sr| sr.failed_examples.iter()),
)
.map(|r| (r.input.as_str(), r))
.collect();
let mutalyzer_map: HashMap<&str, &ParseResult> = mutalyzer_results
.iter()
.flat_map(|sr| sr.sample_results.iter())
.chain(
mutalyzer_results
.iter()
.flat_map(|sr| sr.failed_examples.iter()),
)
.map(|r| (r.input.as_str(), r))
.collect();
let mut both_success = 0usize;
let mut both_fail = 0usize;
let mut ferro_only_success = 0usize;
let mut mutalyzer_only_success = 0usize;
let mut agreements = 0usize;
let mut disagreement_examples = Vec::new();
for (input, ferro_result) in &ferro_map {
if let Some(mut_result) = mutalyzer_map.get(input) {
match (ferro_result.success, mut_result.success) {
(true, true) => {
both_success += 1;
if ferro_result.output == mut_result.output {
agreements += 1;
} else if disagreement_examples.len() < 50 {
disagreement_examples.push(DisagreementExample {
input: input.to_string(),
ferro_output: ferro_result.output.clone().unwrap_or_default(),
mutalyzer_output: mut_result.output.clone().unwrap_or_default(),
});
}
}
(true, false) => ferro_only_success += 1,
(false, true) => mutalyzer_only_success += 1,
(false, false) => both_fail += 1,
}
}
}
let agreement_rate = if both_success > 0 {
agreements as f64 / both_success as f64
} else {
0.0
};
Ok(Some(AgreementStats {
both_success,
both_fail,
ferro_only_success,
mutalyzer_only_success,
agreements,
agreement_rate,
disagreement_examples,
}))
}
fn load_results_files<P: AsRef<Path>>(dir: P) -> Result<Vec<ShardResults>, FerroError> {
let dir = dir.as_ref();
let mut results = Vec::new();
for entry in std::fs::read_dir(dir).map_err(|e| FerroError::Io {
msg: format!("Failed to read directory {}: {}", dir.display(), e),
})? {
let entry = entry.map_err(|e| FerroError::Io {
msg: format!("Failed to read entry: {}", e),
})?;
let path = entry.path();
if path.extension().is_some_and(|e| e == "json")
&& !path
.file_name()
.and_then(|n| n.to_str())
.is_some_and(|n| n.contains("timing"))
{
if let Ok(result) = load_json::<ShardResults, _>(&path) {
results.push(result);
}
}
}
Ok(results)
}
fn compute_normalization_agreement(
ferro_data: &ShardResults,
mutalyzer_data: &ShardResults,
) -> AgreementStats {
let ferro_map: HashMap<&str, &ParseResult> = ferro_data
.sample_results
.iter()
.chain(ferro_data.failed_examples.iter())
.map(|r| (r.input.as_str(), r))
.collect();
let mutalyzer_map: HashMap<&str, &ParseResult> = mutalyzer_data
.sample_results
.iter()
.chain(mutalyzer_data.failed_examples.iter())
.map(|r| (r.input.as_str(), r))
.collect();
let mut both_success = 0usize;
let mut both_fail = 0usize;
let mut ferro_only_success = 0usize;
let mut mutalyzer_only_success = 0usize;
let mut agreements = 0usize;
let mut disagreement_examples = Vec::new();
for (input, ferro_result) in &ferro_map {
if let Some(mut_result) = mutalyzer_map.get(input) {
match (ferro_result.success, mut_result.success) {
(true, true) => {
both_success += 1;
if ferro_result.output == mut_result.output {
agreements += 1;
} else if disagreement_examples.len() < 50 {
disagreement_examples.push(DisagreementExample {
input: input.to_string(),
ferro_output: ferro_result.output.clone().unwrap_or_default(),
mutalyzer_output: mut_result.output.clone().unwrap_or_default(),
});
}
}
(true, false) => ferro_only_success += 1,
(false, true) => mutalyzer_only_success += 1,
(false, false) => both_fail += 1,
}
}
}
let agreement_rate = if both_success > 0 {
agreements as f64 / both_success as f64
} else {
0.0
};
AgreementStats {
both_success,
both_fail,
ferro_only_success,
mutalyzer_only_success,
agreements,
agreement_rate,
disagreement_examples,
}
}