use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use crate::error::{Result, SdkError};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct VerdictRecord {
pub validator_id: String,
pub candidate_id: String,
pub missed: bool,
}
impl VerdictRecord {
pub fn new(
validator_id: impl Into<String>,
candidate_id: impl Into<String>,
missed: bool,
) -> Self {
Self {
validator_id: validator_id.into(),
candidate_id: candidate_id.into(),
missed,
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct IndependenceStats {
pub miss_rates: BTreeMap<String, f64>,
pub correlations: BTreeMap<(String, String), f64>,
pub rho_eff: Option<f64>,
}
pub fn miss_std(q: f64) -> f64 {
(q * (1.0 - q)).max(0.0).sqrt()
}
pub fn compute(records: &[VerdictRecord]) -> Result<IndependenceStats> {
if records.is_empty() {
return Err(SdkError::Domain("no verdict records".into()));
}
let mut by_validator: BTreeMap<String, BTreeMap<String, bool>> = BTreeMap::new();
for r in records {
by_validator
.entry(r.validator_id.clone())
.or_default()
.insert(r.candidate_id.clone(), r.missed);
}
let mut miss_rates = BTreeMap::new();
for (v, verdicts) in &by_validator {
let n = verdicts.len() as f64;
let misses = verdicts.values().filter(|&&m| m).count() as f64;
miss_rates.insert(v.clone(), misses / n);
}
let validators: Vec<&String> = by_validator.keys().collect();
let mut correlations = BTreeMap::new();
let mut rho_eff: Option<f64> = None;
for a in 0..validators.len() {
for b in (a + 1)..validators.len() {
let vi = validators[a];
let vj = validators[b];
let mi = by_validator[vi].clone();
let mj = by_validator[vj].clone();
let joint: Vec<(bool, bool)> = mi
.iter()
.filter_map(|(c, &m_i)| mj.get(c).map(|&m_j| (m_i, m_j)))
.collect();
if joint.is_empty() {
continue;
}
let kappa = pearson_phi(&joint);
correlations.insert((vi.clone(), vj.clone()), kappa);
let qi = miss_rates[vi];
let qj = miss_rates[vj];
let bound = qi * qj + kappa * miss_std(qi) * miss_std(qj);
rho_eff = Some(match rho_eff {
Some(current) => current.min(bound),
None => bound,
});
}
}
Ok(IndependenceStats {
miss_rates,
correlations,
rho_eff,
})
}
fn pearson_phi(pairs: &[(bool, bool)]) -> f64 {
let n = pairs.len() as f64;
let to_f = |b: bool| if b { 1.0 } else { 0.0 };
let sum_x: f64 = pairs.iter().map(|&(x, _)| to_f(x)).sum();
let sum_y: f64 = pairs.iter().map(|&(_, y)| to_f(y)).sum();
let mean_x = sum_x / n;
let mean_y = sum_y / n;
let mut cov = 0.0;
let mut var_x = 0.0;
let mut var_y = 0.0;
for &(x, y) in pairs {
let dx = to_f(x) - mean_x;
let dy = to_f(y) - mean_y;
cov += dx * dy;
var_x += dx * dx;
var_y += dy * dy;
}
if var_x <= f64::EPSILON || var_y <= f64::EPSILON {
return 0.0;
}
cov / (var_x.sqrt() * var_y.sqrt())
}
pub fn attenuate_weight(weight: f64, correlation_with_counted: f64) -> f64 {
weight * (1.0 - correlation_with_counted.clamp(0.0, 1.0))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn miss_std_matches_bernoulli() {
assert!((miss_std(0.5) - 0.5).abs() < 1e-12);
assert_eq!(miss_std(0.0), 0.0);
assert_eq!(miss_std(1.0), 0.0);
}
#[test]
fn independent_validators_have_low_rho_eff() {
let records = vec![
VerdictRecord::new("a", "c1", true),
VerdictRecord::new("b", "c1", false),
VerdictRecord::new("a", "c2", false),
VerdictRecord::new("b", "c2", true),
VerdictRecord::new("a", "c3", true),
VerdictRecord::new("b", "c3", false),
VerdictRecord::new("a", "c4", false),
VerdictRecord::new("b", "c4", true),
];
let stats = compute(&records).unwrap();
let kappa = stats
.correlations
.get(&("a".into(), "b".into()))
.copied()
.unwrap();
assert!(kappa < 0.0, "expected anti-correlation, got {kappa}");
assert!(stats.rho_eff.unwrap() >= 0.0);
}
#[test]
fn redundant_validators_are_attenuated() {
let records = vec![
VerdictRecord::new("a", "c1", true),
VerdictRecord::new("b", "c1", true),
VerdictRecord::new("a", "c2", false),
VerdictRecord::new("b", "c2", false),
VerdictRecord::new("a", "c3", true),
VerdictRecord::new("b", "c3", true),
];
let stats = compute(&records).unwrap();
let kappa = stats
.correlations
.get(&("a".into(), "b".into()))
.copied()
.unwrap();
assert!(
(kappa - 1.0).abs() < 1e-9,
"expected perfect correlation, got {kappa}"
);
assert_eq!(attenuate_weight(2.0, kappa), 0.0);
}
#[test]
fn empty_ledger_is_error() {
assert!(compute(&[]).is_err());
}
}