use super::model::{CalibrationReport, MetricKey, MetricSummary};
use crate::model::TestResultRow;
use std::collections::HashMap;
pub struct Aggregator {
target_tail: f64,
values: HashMap<MetricKey, Vec<f64>>,
}
impl Aggregator {
pub fn new(target_tail: f64) -> Self {
Self {
target_tail,
values: HashMap::new(),
}
}
pub fn push(&mut self, key: MetricKey, v: f64) {
self.values.entry(key).or_default().push(v);
}
pub fn finish(self, source: &str) -> CalibrationReport {
let mut metrics = Vec::new();
for (key, mut vs) in self.values {
vs.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = vs.len() as u32;
if n == 0 {
continue;
}
let min = *vs.first().unwrap();
let max = *vs.last().unwrap();
let sum: f64 = vs.iter().sum();
let mean = sum / (n as f64);
let variance: f64 = vs.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n as f64);
let std = variance.sqrt();
let p10 = percentile(&vs, 0.10);
let p50 = percentile(&vs, 0.50);
let p90 = percentile(&vs, 0.90);
let recommended_min_score = percentile(&vs, self.target_tail);
let recommended_max_drop = (p50 - p10).clamp(0.02, 0.10);
metrics.push(MetricSummary {
key,
n,
min,
max,
mean,
std,
p10,
p50,
p90,
recommended_min_score,
recommended_max_drop,
});
}
metrics.sort_by(|a, b| a.key.metric.cmp(&b.key.metric));
let mut notes = vec![];
if metrics.iter().any(|m| m.n < 10) {
notes.push(
"Warning: Low sample size (n < 10) makes percentiles unreliable.".to_string(),
);
}
CalibrationReport {
schema_version: 1,
source: source.to_string(),
generated_at: chrono::Utc::now().to_rfc3339(),
metrics,
notes,
}
}
}
pub fn ingest_row(agg: &mut Aggregator, r: &TestResultRow) {
if let Some(obj) = r.details.get("metrics").and_then(|m| m.as_object()) {
for (metric_name, mv) in obj {
if let Some(score) = mv.get("score").and_then(|s| s.as_f64()) {
let score = score.clamp(-1.0, 1.0);
agg.push(
MetricKey {
metric: metric_name.clone(),
test_id: None,
},
score,
);
}
}
}
}
fn percentile(sorted: &[f64], q: f64) -> f64 {
if sorted.is_empty() {
return 0.0;
}
let n = sorted.len() as f64;
let idx = ((q * (n - 1.0)).floor() as usize).min(sorted.len() - 1);
sorted[idx]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_percentiles() {
let data = vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0];
assert_eq!(percentile(&data, 0.10), 0.1);
assert_eq!(percentile(&data, 0.50), 0.5);
assert_eq!(percentile(&data, 0.90), 0.9);
}
}