extern crate std;
use std::collections::BTreeMap;
use std::format;
use std::string::String;
use std::vec::Vec;
#[derive(Debug, Clone)]
pub struct LooCvFixtureRecord {
pub fixture_name: &'static str,
pub rscr: f64,
pub clean_window_fp_rate: f64,
pub fault_recall: f64,
pub raw_alert_count: u64,
pub fusion_episode_count: u64,
pub consensus_confirmed_typed_episodes: u64,
pub deterministic_replay_holds: bool,
}
#[derive(Debug, Clone)]
pub struct LooCvAggregate {
pub fixtures_observed: usize,
pub fixtures_with_replay_holds: usize,
pub mean_rscr: f64,
pub stddev_rscr: f64,
pub mean_clean_window_fp_rate: f64,
pub stddev_clean_window_fp_rate: f64,
pub mean_fault_recall: f64,
pub stddev_fault_recall: f64,
pub total_raw_alerts: u64,
pub total_episodes: u64,
pub total_typed_episodes: u64,
pub total_consensus_confirmed: u64,
pub per_fixture_deltas: BTreeMap<&'static str, Vec<(&'static str, f64)>>,
}
pub fn run_loo_cv(records: &[LooCvFixtureRecord]) -> LooCvAggregate {
aggregate_loo_cv(records)
}
pub fn aggregate_loo_cv(records: &[LooCvFixtureRecord]) -> LooCvAggregate {
let n = records.len();
if n == 0 {
return LooCvAggregate {
fixtures_observed: 0,
fixtures_with_replay_holds: 0,
mean_rscr: 0.0,
stddev_rscr: 0.0,
mean_clean_window_fp_rate: 0.0,
stddev_clean_window_fp_rate: 0.0,
mean_fault_recall: 0.0,
stddev_fault_recall: 0.0,
total_raw_alerts: 0,
total_episodes: 0,
total_typed_episodes: 0,
total_consensus_confirmed: 0,
per_fixture_deltas: BTreeMap::new(),
};
}
let nf = n as f64;
let mean_rscr = records.iter().map(|r| r.rscr).sum::<f64>() / nf;
let mean_fp = records.iter().map(|r| r.clean_window_fp_rate).sum::<f64>() / nf;
let mean_recall = records.iter().map(|r| r.fault_recall).sum::<f64>() / nf;
let stddev_rscr = (records.iter()
.map(|r| (r.rscr - mean_rscr).powi(2))
.sum::<f64>() / nf).sqrt();
let stddev_fp = (records.iter()
.map(|r| (r.clean_window_fp_rate - mean_fp).powi(2))
.sum::<f64>() / nf).sqrt();
let stddev_recall = (records.iter()
.map(|r| (r.fault_recall - mean_recall).powi(2))
.sum::<f64>() / nf).sqrt();
let total_raw = records.iter().map(|r| r.raw_alert_count).sum::<u64>();
let total_eps = records.iter().map(|r| r.fusion_episode_count).sum::<u64>();
let total_typed = records.iter()
.map(|r| r.consensus_confirmed_typed_episodes).sum::<u64>();
let replay_holds = records.iter()
.filter(|r| r.deterministic_replay_holds).count();
let mut per_fixture_deltas: BTreeMap<&'static str, Vec<(&'static str, f64)>>
= BTreeMap::new();
if n > 1 {
let scale = nf / (nf - 1.0);
for r in records {
let delta_rscr = (r.rscr - mean_rscr) * scale;
let delta_fp = (r.clean_window_fp_rate - mean_fp) * scale;
let delta_recall = (r.fault_recall - mean_recall) * scale;
per_fixture_deltas.entry("rscr").or_default()
.push((r.fixture_name, delta_rscr));
per_fixture_deltas.entry("clean_window_fp_rate").or_default()
.push((r.fixture_name, delta_fp));
per_fixture_deltas.entry("fault_recall").or_default()
.push((r.fixture_name, delta_recall));
}
}
LooCvAggregate {
fixtures_observed: n,
fixtures_with_replay_holds: replay_holds,
mean_rscr,
stddev_rscr,
mean_clean_window_fp_rate: mean_fp,
stddev_clean_window_fp_rate: stddev_fp,
mean_fault_recall: mean_recall,
stddev_fault_recall: stddev_recall,
total_raw_alerts: total_raw,
total_episodes: total_eps,
total_typed_episodes: total_typed,
total_consensus_confirmed: total_typed, per_fixture_deltas,
}
}
pub fn refinement_passes_gate(
baseline: &LooCvAggregate,
refined: &LooCvAggregate,
) -> RefinementGateVerdict {
let mut reasons: Vec<String> = Vec::new();
let rscr_floor = baseline.mean_rscr - 0.5 * baseline.stddev_rscr;
if refined.mean_rscr < rscr_floor {
reasons.push(format!(
"RSCR regressed: refined mean {:.4} < baseline mean {:.4} − 0.5·stddev = {:.4}",
refined.mean_rscr, baseline.mean_rscr, rscr_floor));
}
let fp_ceiling = baseline.mean_clean_window_fp_rate
+ 0.5 * baseline.stddev_clean_window_fp_rate;
if refined.mean_clean_window_fp_rate > fp_ceiling {
reasons.push(format!(
"FP rate regressed: refined mean {:.4} > baseline mean {:.4} + 0.5·stddev = {:.4}",
refined.mean_clean_window_fp_rate, baseline.mean_clean_window_fp_rate, fp_ceiling));
}
let recall_floor = baseline.mean_fault_recall - 0.5 * baseline.stddev_fault_recall;
if refined.mean_fault_recall < recall_floor {
reasons.push(format!(
"Fault recall regressed: refined mean {:.4} < baseline mean {:.4} − 0.5·stddev = {:.4}",
refined.mean_fault_recall, baseline.mean_fault_recall, recall_floor));
}
if refined.fixtures_with_replay_holds < baseline.fixtures_with_replay_holds {
reasons.push(format!(
"Replay-holds regressed: refined {} < baseline {}",
refined.fixtures_with_replay_holds, baseline.fixtures_with_replay_holds));
}
if reasons.is_empty() {
RefinementGateVerdict::Accept
} else {
RefinementGateVerdict::Reject { reasons }
}
}
#[derive(Debug, Clone)]
pub enum RefinementGateVerdict {
Accept,
Reject { reasons: Vec<String> },
}
#[derive(Debug, Clone)]
pub struct KFoldFoldRecord {
pub fold_index: usize,
pub test_fixtures: Vec<&'static str>,
pub test_aggregate: LooCvAggregate,
}
#[derive(Debug, Clone)]
pub struct KFoldCvAggregate {
pub k: usize,
pub fixtures_per_fold: usize,
pub folds: Vec<KFoldFoldRecord>,
pub cross_fold_mean_rscr: f64,
pub cross_fold_stddev_rscr: f64,
pub cross_fold_mean_fp: f64,
pub cross_fold_stddev_fp: f64,
pub cross_fold_mean_recall: f64,
pub cross_fold_stddev_recall: f64,
pub all_folds_replay_holds: bool,
}
pub fn aggregate_kfold_cv(records: &[LooCvFixtureRecord], k: usize) -> KFoldCvAggregate {
let n = records.len();
if k < 2 || n < k {
return KFoldCvAggregate {
k, fixtures_per_fold: 0, folds: Vec::new(),
cross_fold_mean_rscr: 0.0, cross_fold_stddev_rscr: 0.0,
cross_fold_mean_fp: 0.0, cross_fold_stddev_fp: 0.0,
cross_fold_mean_recall: 0.0, cross_fold_stddev_recall: 0.0,
all_folds_replay_holds: false,
};
}
let fpf = (n + k - 1) / k;
let mut folds: Vec<KFoldFoldRecord> = Vec::with_capacity(k);
let mut all_replay = true;
for fold_idx in 0..k {
let start = fold_idx * fpf;
let end = ((fold_idx + 1) * fpf).min(n);
if start >= n { continue; }
let test_set: Vec<LooCvFixtureRecord> = records[start..end].to_vec();
let test_names: Vec<&'static str> = test_set.iter()
.map(|r| r.fixture_name).collect();
let agg = aggregate_loo_cv(&test_set);
if agg.fixtures_with_replay_holds != agg.fixtures_observed {
all_replay = false;
}
folds.push(KFoldFoldRecord {
fold_index: fold_idx,
test_fixtures: test_names,
test_aggregate: agg,
});
}
let kf = folds.len() as f64;
let mean_rscr = folds.iter().map(|f| f.test_aggregate.mean_rscr).sum::<f64>() / kf;
let mean_fp = folds.iter().map(|f| f.test_aggregate.mean_clean_window_fp_rate).sum::<f64>() / kf;
let mean_recall = folds.iter().map(|f| f.test_aggregate.mean_fault_recall).sum::<f64>() / kf;
let stddev_rscr = (folds.iter()
.map(|f| (f.test_aggregate.mean_rscr - mean_rscr).powi(2))
.sum::<f64>() / kf).sqrt();
let stddev_fp = (folds.iter()
.map(|f| (f.test_aggregate.mean_clean_window_fp_rate - mean_fp).powi(2))
.sum::<f64>() / kf).sqrt();
let stddev_recall = (folds.iter()
.map(|f| (f.test_aggregate.mean_fault_recall - mean_recall).powi(2))
.sum::<f64>() / kf).sqrt();
KFoldCvAggregate {
k, fixtures_per_fold: fpf,
folds,
cross_fold_mean_rscr: mean_rscr,
cross_fold_stddev_rscr: stddev_rscr,
cross_fold_mean_fp: mean_fp,
cross_fold_stddev_fp: stddev_fp,
cross_fold_mean_recall: mean_recall,
cross_fold_stddev_recall: stddev_recall,
all_folds_replay_holds: all_replay,
}
}
pub fn render_kfold_cv_md(agg: &KFoldCvAggregate) -> String {
let mut out = String::new();
out.push_str(&format!("# K-fold cross-validation (K = {}) — Phase η.2\n\n", agg.k));
out.push_str("Source: Phase η.2 K-fold harness (`src/audit/loo_cv.rs::aggregate_kfold_cv`).\n\n");
out.push_str(&format!("**Folds:** {}\n", agg.folds.len()));
out.push_str(&format!("**Fixtures per fold:** {} (last fold may be smaller)\n", agg.fixtures_per_fold));
out.push_str(&format!("**All folds Theorem 9 replay holds:** {}\n\n", agg.all_folds_replay_holds));
out.push_str("## Per-fold test-set aggregates\n\n");
out.push_str("| Fold | Test fixtures | RSCR | FP rate | Fault recall | Replay |\n");
out.push_str("|-----:|---------------|-----:|--------:|-------------:|:------:|\n");
for f in &agg.folds {
out.push_str(&format!(
"| {} | {} | {:.4} | {:.4} | {:.4} | {} / {} |\n",
f.fold_index,
f.test_fixtures.iter().map(|s| format!("`{}`", s))
.collect::<Vec<_>>().join(", "),
f.test_aggregate.mean_rscr,
f.test_aggregate.mean_clean_window_fp_rate,
f.test_aggregate.mean_fault_recall,
f.test_aggregate.fixtures_with_replay_holds,
f.test_aggregate.fixtures_observed,
));
}
out.push_str("\n## Cross-fold aggregate\n\n");
out.push_str("| Metric | Cross-fold mean | Cross-fold stddev |\n");
out.push_str("|--------|----------------:|------------------:|\n");
out.push_str(&format!("| RSCR | {:.4} | {:.4} |\n",
agg.cross_fold_mean_rscr, agg.cross_fold_stddev_rscr));
out.push_str(&format!("| Clean-window FP rate | {:.4} | {:.4} |\n",
agg.cross_fold_mean_fp, agg.cross_fold_stddev_fp));
out.push_str(&format!("| Fault recall | {:.4} | {:.4} |\n",
agg.cross_fold_mean_recall, agg.cross_fold_stddev_recall));
out.push_str("\n## Honest empirical reading\n\n");
out.push_str("K-fold CV averages over multiple test-set configurations\n");
out.push_str("(in contrast to LO-1 which holds out one fixture at a time).\n");
out.push_str("With N = ");
let total_fixtures: usize = agg.folds.iter()
.map(|f| f.test_aggregate.fixtures_observed).sum();
out.push_str(&format!("{} fixtures and K = {}, ", total_fixtures, agg.k));
out.push_str("each fold tests on a multi-fixture set (lower variance per\n");
out.push_str("fold than LO-1, but fewer folds than LO-1 — the two views\n");
out.push_str("triangulate the cross-validation noise floor).\n");
out
}
pub fn render_loo_cv_baseline_md(agg: &LooCvAggregate, label: &str) -> String {
let mut out = String::new();
out.push_str(&format!("# Leave-one-fixture-out cross-validation: {}\n\n", label));
out.push_str("Source: Phase ζ.2 LO-CV harness (`src/audit/loo_cv.rs`).\n\n");
out.push_str(&format!("**Fixtures observed:** {}\n", agg.fixtures_observed));
out.push_str(&format!("**Fixtures with deterministic replay verified:** {} / {}\n\n",
agg.fixtures_with_replay_holds, agg.fixtures_observed));
out.push_str("## Cross-fixture aggregate\n\n");
out.push_str("| Metric | Mean | Stddev | LO-CV gate floor (mean−0.5·stddev) |\n");
out.push_str("|--------|-----:|-------:|----------------------------------:|\n");
out.push_str(&format!("| RSCR | {:.4} | {:.4} | {:.4} |\n",
agg.mean_rscr, agg.stddev_rscr,
agg.mean_rscr - 0.5 * agg.stddev_rscr));
out.push_str(&format!("| Clean-window FP rate | {:.4} | {:.4} | (ceiling: {:.4}) |\n",
agg.mean_clean_window_fp_rate, agg.stddev_clean_window_fp_rate,
agg.mean_clean_window_fp_rate + 0.5 * agg.stddev_clean_window_fp_rate));
out.push_str(&format!("| Fault recall | {:.4} | {:.4} | {:.4} |\n",
agg.mean_fault_recall, agg.stddev_fault_recall,
agg.mean_fault_recall - 0.5 * agg.stddev_fault_recall));
out.push_str(&format!("\n**Total raw alerts:** {}\n", agg.total_raw_alerts));
out.push_str(&format!("**Total episodes:** {}\n", agg.total_episodes));
out.push_str(&format!("**Total typed-confirmed episodes:** {}\n\n",
agg.total_typed_episodes));
out.push_str("## Per-fixture LO-CV deltas\n\n");
out.push_str("Delta = (fixture metric) − (mean over other N-1 fixtures).\n");
out.push_str("Large positive deltas indicate the fixture pulls the mean upward;\n");
out.push_str("large negative deltas indicate the fixture pulls it downward.\n\n");
for metric in ["rscr", "clean_window_fp_rate", "fault_recall"] {
if let Some(deltas) = agg.per_fixture_deltas.get(metric) {
out.push_str(&format!("### {}\n\n", metric));
out.push_str("| Fixture | Delta |\n");
out.push_str("|---------|------:|\n");
for (fix, d) in deltas {
out.push_str(&format!("| `{}` | {:+.4} |\n", fix, d));
}
out.push_str("\n");
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use std::vec;
fn record(name: &'static str, rscr: f64, fp: f64, recall: f64) -> LooCvFixtureRecord {
LooCvFixtureRecord {
fixture_name: name,
rscr,
clean_window_fp_rate: fp,
fault_recall: recall,
raw_alert_count: 1,
fusion_episode_count: 1,
consensus_confirmed_typed_episodes: 0,
deterministic_replay_holds: true,
}
}
#[test]
fn empty_records_produce_zero_aggregate() {
let agg = aggregate_loo_cv(&[]);
assert_eq!(agg.fixtures_observed, 0);
assert_eq!(agg.mean_rscr, 0.0);
}
#[test]
fn aggregate_computes_mean_and_stddev() {
let recs = vec![
record("a", 1.0, 0.10, 0.5),
record("b", 2.0, 0.20, 0.7),
record("c", 3.0, 0.30, 0.9),
];
let agg = aggregate_loo_cv(&recs);
assert_eq!(agg.fixtures_observed, 3);
assert!((agg.mean_rscr - 2.0).abs() < 1e-9);
assert!((agg.stddev_rscr - 0.8164965809277260).abs() < 1e-9);
}
#[test]
fn refinement_gate_accepts_within_tolerance() {
let base = aggregate_loo_cv(&[
record("a", 1.0, 0.10, 0.5),
record("b", 2.0, 0.20, 0.7),
record("c", 3.0, 0.30, 0.9),
]);
let refined = base.clone();
match refinement_passes_gate(&base, &refined) {
RefinementGateVerdict::Accept => {},
RefinementGateVerdict::Reject { reasons } => panic!("should accept: {:?}", reasons),
}
}
#[test]
fn refinement_gate_rejects_recall_regression() {
let base = aggregate_loo_cv(&[
record("a", 1.0, 0.10, 0.7),
record("b", 2.0, 0.20, 0.7),
record("c", 3.0, 0.30, 0.7),
]);
let refined = aggregate_loo_cv(&[
record("a", 1.0, 0.10, 0.6),
record("b", 2.0, 0.20, 0.6),
record("c", 3.0, 0.30, 0.6),
]);
match refinement_passes_gate(&base, &refined) {
RefinementGateVerdict::Reject { reasons } => {
assert!(reasons.iter().any(|r| r.contains("Fault recall")));
},
_ => panic!("should reject"),
}
}
}