extern crate std;
use std::format;
use std::string::String;
use std::vec::Vec;
use super::loo_cv::{LooCvFixtureRecord, aggregate_loo_cv};
pub const DEFAULT_BOOTSTRAP_ITERATIONS: usize = 1_000;
pub const DEFAULT_BOOTSTRAP_SEED: u64 = 0x6173_6566_656c_7900;
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct BootstrapCi {
pub point_estimate: f64,
pub ci_lower_2_5: f64,
pub ci_upper_97_5: f64,
}
#[derive(Debug, Clone)]
pub struct BootstrapAggregate {
pub iterations: usize,
pub fixtures_resampled: usize,
pub seed: u64,
pub rscr: BootstrapCi,
pub clean_window_fp_rate: BootstrapCi,
pub fault_recall: BootstrapCi,
pub typed_confirmed_per_fixture: BootstrapCi,
}
struct Lcg {
state: u64,
}
impl Lcg {
fn new(seed: u64) -> Self { Self { state: seed.wrapping_add(1) } }
fn next_u64(&mut self) -> u64 {
self.state = self.state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
self.state
}
fn next_index(&mut self, modulus: usize) -> usize {
(self.next_u64() % modulus as u64) as usize
}
}
pub fn bootstrap_ci(records: &[LooCvFixtureRecord]) -> BootstrapAggregate {
bootstrap_ci_with_seed(records, DEFAULT_BOOTSTRAP_ITERATIONS, DEFAULT_BOOTSTRAP_SEED)
}
pub fn bootstrap_ci_with_seed(
records: &[LooCvFixtureRecord],
iterations: usize,
seed: u64,
) -> BootstrapAggregate {
let n = records.len();
if n == 0 {
let zero = BootstrapCi {
point_estimate: 0.0,
ci_lower_2_5: 0.0,
ci_upper_97_5: 0.0,
};
return BootstrapAggregate {
iterations: 0, fixtures_resampled: 0, seed,
rscr: zero, clean_window_fp_rate: zero,
fault_recall: zero, typed_confirmed_per_fixture: zero,
};
}
let mut lcg = Lcg::new(seed);
let mut iter_rscr: Vec<f64> = Vec::with_capacity(iterations);
let mut iter_fp: Vec<f64> = Vec::with_capacity(iterations);
let mut iter_rec: Vec<f64> = Vec::with_capacity(iterations);
let mut iter_typed: Vec<f64> = Vec::with_capacity(iterations);
let mut resampled: Vec<LooCvFixtureRecord> = Vec::with_capacity(n);
for _ in 0..iterations {
resampled.clear();
for _ in 0..n {
let idx = lcg.next_index(n);
resampled.push(records[idx].clone());
}
let agg = aggregate_loo_cv(&resampled);
iter_rscr.push(agg.mean_rscr);
iter_fp.push(agg.mean_clean_window_fp_rate);
iter_rec.push(agg.mean_fault_recall);
let typed_mean = agg.total_typed_episodes as f64 / n as f64;
iter_typed.push(typed_mean);
}
let point_agg = aggregate_loo_cv(records);
let point_typed_per_fixture = point_agg.total_typed_episodes as f64 / n as f64;
BootstrapAggregate {
iterations,
fixtures_resampled: n,
seed,
rscr: ci_from_samples(&mut iter_rscr, point_agg.mean_rscr),
clean_window_fp_rate: ci_from_samples(&mut iter_fp,
point_agg.mean_clean_window_fp_rate),
fault_recall: ci_from_samples(&mut iter_rec, point_agg.mean_fault_recall),
typed_confirmed_per_fixture: ci_from_samples(&mut iter_typed,
point_typed_per_fixture),
}
}
fn ci_from_samples(samples: &mut [f64], point: f64) -> BootstrapCi {
samples.sort_by(|a, b| a.partial_cmp(b).unwrap_or(core::cmp::Ordering::Equal));
let n = samples.len();
if n == 0 {
return BootstrapCi { point_estimate: point, ci_lower_2_5: 0.0, ci_upper_97_5: 0.0 };
}
let lower_idx = ((n as f64 * 0.025).floor() as usize).min(n - 1);
let upper_idx = ((n as f64 * 0.975).floor() as usize).min(n - 1);
BootstrapCi {
point_estimate: point,
ci_lower_2_5: samples[lower_idx],
ci_upper_97_5: samples[upper_idx],
}
}
pub fn render_bootstrap_md(agg: &BootstrapAggregate) -> String {
let mut out = String::new();
out.push_str("# Bootstrap confidence intervals (Phase η.1)\n\n");
out.push_str("Cross-fixture LO-CV aggregates with 95% percentile-based\n");
out.push_str("bootstrap confidence intervals. Fixture-level resampling\n");
out.push_str("with replacement; deterministic LCG sampler (no `rand`\n");
out.push_str("dependency); fixed seed for reproducibility.\n\n");
out.push_str("Source: Phase η.1 audit harness (`src/audit/bootstrap.rs`).\n\n");
out.push_str(&format!(
"**Iterations:** {} \n**Fixtures resampled per iteration:** {} \n**LCG seed:** 0x{:016x}\n\n",
agg.iterations, agg.fixtures_resampled, agg.seed));
out.push_str("## 95% confidence intervals\n\n");
out.push_str("| Metric | Point estimate | 95% CI lower | 95% CI upper | CI width |\n");
out.push_str("|--------|---------------:|-------------:|-------------:|---------:|\n");
write_ci_row(&mut out, "RSCR (structural mean)", &agg.rscr);
write_ci_row(&mut out, "Clean-window FP rate", &agg.clean_window_fp_rate);
write_ci_row(&mut out, "Fault recall", &agg.fault_recall);
write_ci_row(&mut out, "Typed-confirmed / fixture", &agg.typed_confirmed_per_fixture);
out.push_str("\n## Honest empirical reading\n\n");
out.push_str("With N = ");
out.push_str(&format!("{}", agg.fixtures_resampled));
out.push_str(" bounded fixtures, the bootstrap CI is wide by\n");
out.push_str("construction — the empirical surface does not yet support\n");
out.push_str("tight aggregate claims. The CI width is itself the honest\n");
out.push_str("read: more fixtures (Phase II partner data) shrink the\n");
out.push_str("interval; cross-fixture variance dominates the cross-\n");
out.push_str("validation noise. The point estimates are the operator\n");
out.push_str("anchor; the CI bounds are the publication-honest range.\n");
out
}
fn write_ci_row(out: &mut String, label: &str, ci: &BootstrapCi) {
let width = ci.ci_upper_97_5 - ci.ci_lower_2_5;
out.push_str(&format!(
"| {} | {:.4} | {:.4} | {:.4} | {:.4} |\n",
label, ci.point_estimate, ci.ci_lower_2_5, ci.ci_upper_97_5, width));
}
#[cfg(test)]
mod tests {
use super::*;
use std::vec;
use super::super::loo_cv::LooCvFixtureRecord;
fn rec(name: &'static str, rscr: f64, fp: f64, recall: f64, typed: u64)
-> LooCvFixtureRecord {
LooCvFixtureRecord {
fixture_name: name,
rscr,
clean_window_fp_rate: fp,
fault_recall: recall,
raw_alert_count: 0,
fusion_episode_count: 0,
consensus_confirmed_typed_episodes: typed,
deterministic_replay_holds: true,
}
}
#[test]
fn lcg_is_deterministic() {
let mut a = Lcg::new(12345);
let mut b = Lcg::new(12345);
for _ in 0..100 {
assert_eq!(a.next_u64(), b.next_u64());
}
}
#[test]
fn bootstrap_on_constant_data_gives_zero_width_ci() {
let records = vec![
rec("a", 5.0, 0.1, 0.9, 1),
rec("b", 5.0, 0.1, 0.9, 1),
rec("c", 5.0, 0.1, 0.9, 1),
];
let agg = bootstrap_ci(&records);
assert!((agg.rscr.ci_upper_97_5 - agg.rscr.ci_lower_2_5).abs() < 1e-9,
"CI should collapse on constant data");
assert!((agg.rscr.point_estimate - 5.0).abs() < 1e-9);
}
#[test]
fn bootstrap_widens_ci_with_variance() {
let records = vec![
rec("a", 1.0, 0.0, 0.5, 0),
rec("b", 5.0, 0.5, 0.7, 1),
rec("c", 50.0, 0.9, 1.0, 5),
];
let agg = bootstrap_ci(&records);
let width = agg.rscr.ci_upper_97_5 - agg.rscr.ci_lower_2_5;
assert!(width > 1.0,
"Heterogeneous data should produce non-trivial CI width; got {width}");
}
#[test]
fn bootstrap_is_deterministic() {
let records = vec![
rec("a", 1.0, 0.1, 0.9, 0),
rec("b", 5.0, 0.5, 0.7, 1),
rec("c", 25.0, 0.2, 0.5, 2),
];
let agg1 = bootstrap_ci(&records);
let agg2 = bootstrap_ci(&records);
assert_eq!(agg1.rscr.ci_lower_2_5, agg2.rscr.ci_lower_2_5);
assert_eq!(agg1.rscr.ci_upper_97_5, agg2.rscr.ci_upper_97_5);
}
#[test]
fn bootstrap_seed_affects_lcg_sequence() {
let mut a = Lcg::new(1);
let mut b = Lcg::new(2);
assert_ne!(a.next_u64(), b.next_u64());
}
}