use faer::Side;
use ndarray::{Array1, Array2};
use crate::faer_ndarray::{FaerCholesky, FaerEigh, fast_av};
#[derive(Clone, Debug, PartialEq)]
pub struct ConformalInterval {
pub lo: f64,
pub hi: f64,
}
#[derive(Clone, Debug)]
pub struct FullConformalSet {
pub intervals: Vec<ConformalInterval>,
pub alpha: f64,
pub n_augmented: usize,
pub boundary_margin: f64,
}
pub struct ExactGaussianFullConformal {
u: Array1<f64>,
w: Array1<f64>,
n: usize,
}
impl ExactGaussianFullConformal {
pub fn new(
x: &Array2<f64>,
y: &Array1<f64>,
prior_weights: &Array1<f64>,
s_lambda: &Array2<f64>,
x_star: &Array1<f64>,
) -> Result<Self, String> {
let n = x.nrows();
let p = x.ncols();
if y.len() != n || prior_weights.len() != n {
return Err("full conformal: row-count mismatch".to_string());
}
if s_lambda.nrows() != p || s_lambda.ncols() != p || x_star.len() != p {
return Err("full conformal: column-count mismatch".to_string());
}
if prior_weights.iter().any(|&w| (w - 1.0).abs() > 1e-12) {
return Err(
"full conformal requires unit prior weights: a reweighted training row is \
not exchangeable with the test row, so the finite-sample coverage proof \
does not apply; use the split/ALO conformal calibrator instead"
.to_string(),
);
}
let mut m = x.t().dot(x) + s_lambda;
for i in 0..p {
for j in 0..p {
m[[i, j]] += x_star[i] * x_star[j];
}
}
let chol = m
.cholesky(Side::Lower)
.map_err(|e| format!("full conformal: augmented normal matrix not SPD: {e:?}"))?;
let xty = x.t().dot(y);
let a = chol.solvevec(&xty);
let b = chol.solvevec(&x_star.to_owned());
let mut u = Array1::<f64>::zeros(n + 1);
let mut w = Array1::<f64>::zeros(n + 1);
let xa = fast_av(x, &a);
let xb = fast_av(x, &b);
for i in 0..n {
u[i] = y[i] - xa[i];
w[i] = -xb[i];
}
let mu_a_star = x_star.dot(&a);
let h_frac = x_star.dot(&b); u[n] = -mu_a_star;
w[n] = 1.0 - h_frac; if w[n] <= 0.0 {
return Err(
"full conformal: test-residual slope 1 − x_*ᵀM⁻¹x_* must be positive; \
non-SPD or numerically broken augmented system"
.to_string(),
);
}
Ok(Self { u, w, n })
}
fn dominating_count(&self, z: f64) -> usize {
let e_star = (self.u[self.n] + self.w[self.n] * z).abs();
(0..self.n)
.filter(|&i| (self.u[i] + self.w[i] * z).abs() >= e_star)
.count()
}
fn member(&self, z: f64, alpha: f64) -> bool {
let n1 = (self.n + 1) as f64;
(1.0 + self.dominating_count(z) as f64) > alpha * n1
}
fn required_dominating_count(&self, alpha: f64) -> usize {
let threshold = alpha * (self.n + 1) as f64;
for count in 0..=self.n {
if 1.0 + count as f64 > threshold {
return count;
}
}
self.n + 1
}
fn local_decision_margin(&self, z: f64, alpha: f64) -> f64 {
let required = self.required_dominating_count(alpha);
if required == 0 {
return f64::INFINITY;
}
let e_star = (self.u[self.n] + self.w[self.n] * z).abs();
let mut true_gaps = Vec::new();
let mut false_gaps = Vec::new();
for i in 0..self.n {
let e_i = (self.u[i] + self.w[i] * z).abs();
let gap = (e_i - e_star).abs();
if e_i >= e_star {
true_gaps.push(gap);
} else {
false_gaps.push(gap);
}
}
if true_gaps.len() >= required {
true_gaps.sort_by(|a, b| a.partial_cmp(b).expect("finite score gaps"));
true_gaps[true_gaps.len() - required]
} else {
let needed = required - true_gaps.len();
false_gaps.sort_by(|a, b| a.partial_cmp(b).expect("finite score gaps"));
false_gaps.get(needed - 1).copied().unwrap_or(f64::INFINITY)
}
}
fn push_finite_root(points: &mut Vec<f64>, numerator: f64, denominator: f64) {
if denominator.abs() > 0.0 {
let z = numerator / denominator;
if z.is_finite() {
points.push(z);
}
}
}
fn abs_residual_affine_at(&self, row: usize, z: f64) -> (f64, f64) {
let value = self.u[row] + self.w[row] * z;
let sign = if value >= 0.0 { 1.0 } else { -1.0 };
(sign * self.w[row], sign * self.u[row])
}
fn gap_affines_on_cell(&self, z: f64) -> Vec<(bool, f64, f64)> {
let (star_slope, star_intercept) = self.abs_residual_affine_at(self.n, z);
let mut gaps = Vec::with_capacity(self.n);
for i in 0..self.n {
let (row_slope, row_intercept) = self.abs_residual_affine_at(i, z);
let diff_slope = row_slope - star_slope;
let diff_intercept = row_intercept - star_intercept;
let diff = diff_slope * z + diff_intercept;
if diff >= 0.0 {
gaps.push((true, diff_slope, diff_intercept));
} else {
gaps.push((false, -diff_slope, -diff_intercept));
}
}
gaps
}
fn asymptotic_abs_residual_affine(&self, row: usize, direction: f64) -> (f64, f64) {
let slope_in_t = direction * self.w[row];
let sign = if slope_in_t > 0.0 {
1.0
} else if slope_in_t < 0.0 {
-1.0
} else if self.u[row] >= 0.0 {
1.0
} else {
-1.0
};
(sign * slope_in_t, sign * self.u[row])
}
fn asymptotic_decision_margin(&self, direction: f64, alpha: f64) -> f64 {
let required = self.required_dominating_count(alpha);
if required == 0 {
return f64::INFINITY;
}
let (star_slope, star_intercept) = self.asymptotic_abs_residual_affine(self.n, direction);
let mut true_gaps = Vec::new();
let mut false_gaps = Vec::new();
for i in 0..self.n {
let (row_slope, row_intercept) = self.asymptotic_abs_residual_affine(i, direction);
let diff_slope = row_slope - star_slope;
let diff_intercept = row_intercept - star_intercept;
let truth = diff_slope > 0.0 || (diff_slope == 0.0 && diff_intercept >= 0.0);
let gap = if truth {
(diff_slope, diff_intercept)
} else {
(-diff_slope, -diff_intercept)
};
if truth {
true_gaps.push(gap);
} else {
false_gaps.push(gap);
}
}
let critical = if true_gaps.len() >= required {
true_gaps.sort_by(|a, b| {
a.0.partial_cmp(&b.0)
.expect("finite asymptotic slopes")
.then_with(|| a.1.partial_cmp(&b.1).expect("finite asymptotic intercepts"))
});
true_gaps.get(true_gaps.len() - required).copied()
} else {
let needed = required - true_gaps.len();
false_gaps.sort_by(|a, b| {
a.0.partial_cmp(&b.0)
.expect("finite asymptotic slopes")
.then_with(|| a.1.partial_cmp(&b.1).expect("finite asymptotic intercepts"))
});
false_gaps.get(needed - 1).copied()
};
match critical {
Some((slope, intercept)) if slope == 0.0 => intercept.max(0.0),
Some(_) => f64::INFINITY,
None => f64::INFINITY,
}
}
fn margin_without_finite_boundaries(&self, alpha: f64, roots: &[f64]) -> f64 {
let mut points = roots.to_vec();
for row in 0..=self.n {
Self::push_finite_root(&mut points, -self.u[row], self.w[row]);
}
points.sort_by(|a, b| a.partial_cmp(b).expect("finite breakpoints"));
points.dedup_by(|a, b| *a == *b);
let mut eval_points = points.clone();
for cell in 0..=points.len() {
let lo = if cell == 0 {
f64::NEG_INFINITY
} else {
points[cell - 1]
};
let hi = if cell == points.len() {
f64::INFINITY
} else {
points[cell]
};
let z = if lo.is_finite() && hi.is_finite() {
0.5 * (lo + hi)
} else if lo.is_finite() {
lo + 1.0
} else if hi.is_finite() {
hi - 1.0
} else {
0.0
};
let gaps = self.gap_affines_on_cell(z);
let required = self.required_dominating_count(alpha);
if required == 0 {
continue;
}
let true_count = gaps.iter().filter(|g| g.0).count();
let need_truth = true_count >= required;
let relevant: Vec<(f64, f64)> = gaps
.iter()
.filter(|g| g.0 == need_truth)
.map(|g| (g.1, g.2))
.collect();
for a in 0..relevant.len() {
for b in (a + 1)..relevant.len() {
let denominator = relevant[a].0 - relevant[b].0;
if denominator.abs() > 0.0 {
let cross = (relevant[b].1 - relevant[a].1) / denominator;
if cross.is_finite() && cross > lo && cross < hi {
eval_points.push(cross);
}
}
}
}
}
eval_points.sort_by(|a, b| a.partial_cmp(b).expect("finite margin points"));
eval_points.dedup_by(|a, b| *a == *b);
let mut margin = f64::INFINITY;
if eval_points.is_empty() {
margin = margin.min(self.local_decision_margin(0.0, alpha));
} else {
for z in eval_points {
margin = margin.min(self.local_decision_margin(z, alpha));
}
}
margin = margin.min(self.asymptotic_decision_margin(1.0, alpha));
margin = margin.min(self.asymptotic_decision_margin(-1.0, alpha));
margin
}
pub fn prediction_set(&self, alpha: f64) -> FullConformalSet {
let n = self.n;
let (us, ws) = (self.u[n], self.w[n]);
let mut roots: Vec<f64> = Vec::with_capacity(2 * n);
for i in 0..n {
let d = ws - self.w[i];
Self::push_finite_root(&mut roots, self.u[i] - us, d);
let s = ws + self.w[i];
Self::push_finite_root(&mut roots, -(us + self.u[i]), s);
}
roots.sort_by(|p, q| p.partial_cmp(q).expect("finite breakpoints"));
roots.dedup_by(|p, q| *p == *q);
let mut probes: Vec<f64> = Vec::with_capacity(2 * roots.len() + 3);
if roots.is_empty() {
probes.push(0.0);
} else {
let span = (roots[roots.len() - 1] - roots[0]).max(1.0);
probes.push(roots[0] - span);
for k in 0..roots.len() {
probes.push(roots[k]);
if k + 1 < roots.len() {
probes.push(0.5 * (roots[k] + roots[k + 1]));
}
}
probes.push(roots[roots.len() - 1] + span);
}
let mut intervals: Vec<ConformalInterval> = Vec::new();
let mut open_lo: Option<f64> = None;
let gap_bounds = |idx: usize| -> (f64, f64) {
if roots.is_empty() {
return (f64::NEG_INFINITY, f64::INFINITY);
}
if idx == 0 {
return (f64::NEG_INFINITY, roots[0]);
}
if idx == probes.len() - 1 {
return (roots[roots.len() - 1], f64::INFINITY);
}
let k = (idx - 1) / 2; if idx % 2 == 1 {
(roots[k], roots[k])
} else {
(roots[k], roots[k + 1])
}
};
for (idx, &z) in probes.iter().enumerate() {
let inside = self.member(z, alpha);
let (lo, hi) = gap_bounds(idx);
if inside {
if open_lo.is_none() {
open_lo = Some(lo);
}
if idx == probes.len() - 1 {
intervals.push(ConformalInterval {
lo: open_lo.take().expect("open interval"),
hi,
});
}
} else if let Some(lo_open) = open_lo.take() {
intervals.push(ConformalInterval {
lo: lo_open,
hi: lo,
});
}
}
let mut finite_endpoints = Vec::new();
for itv in &intervals {
for endpoint in [itv.lo, itv.hi] {
if endpoint.is_finite() {
finite_endpoints.push(endpoint);
}
}
}
let boundary_margin = if finite_endpoints.is_empty() {
self.margin_without_finite_boundaries(alpha, &roots)
} else {
finite_endpoints
.into_iter()
.map(|endpoint| self.local_decision_margin(endpoint, alpha))
.fold(f64::INFINITY, f64::min)
};
FullConformalSet {
intervals,
alpha,
n_augmented: n + 1,
boundary_margin,
}
}
pub fn outer_envelope(&self, alpha: f64, mu_point: f64) -> (f64, f64) {
let set = self.prediction_set(alpha);
if set.intervals.is_empty() {
return (mu_point, mu_point);
}
let mut lo = f64::INFINITY;
let mut hi = f64::NEG_INFINITY;
for itv in &set.intervals {
lo = lo.min(itv.lo);
hi = hi.max(itv.hi);
}
(lo, hi)
}
}
pub trait SymmetricAugmentedFit {
fn scores(&mut self, z: f64) -> Result<Array1<f64>, String>;
}
impl<F> SymmetricAugmentedFit for F
where
F: FnMut(f64) -> Result<Array1<f64>, String>,
{
fn scores(&mut self, z: f64) -> Result<Array1<f64>, String> {
self(z)
}
}
#[derive(Clone, Debug)]
pub struct DiscreteCandidate {
pub z: f64,
pub p_value: f64,
pub member: bool,
}
#[derive(Clone, Debug)]
pub struct DiscreteFullConformalSet {
pub members: Vec<f64>,
pub candidates: Vec<DiscreteCandidate>,
pub alpha: f64,
pub n_augmented: usize,
pub lower_tail_unresolved: Option<f64>,
pub upper_tail_unresolved: Option<f64>,
}
pub fn discrete_full_conformal_exhaustive<M: SymmetricAugmentedFit>(
fit: &mut M,
support: &[f64],
alpha: f64,
) -> Result<DiscreteFullConformalSet, String> {
let mut set = discrete_walk(fit, support, alpha)?;
set.lower_tail_unresolved = None;
set.upper_tail_unresolved = None;
Ok(set)
}
pub fn discrete_full_conformal_window<M: SymmetricAugmentedFit>(
fit: &mut M,
window: &[f64],
alpha: f64,
) -> Result<DiscreteFullConformalSet, String> {
discrete_walk(fit, window, alpha)
}
pub fn bernoulli_full_conformal<M: SymmetricAugmentedFit>(
fit: &mut M,
alpha: f64,
) -> Result<DiscreteFullConformalSet, String> {
discrete_full_conformal_exhaustive(fit, &[0.0, 1.0], alpha)
}
fn discrete_walk<M: SymmetricAugmentedFit>(
fit: &mut M,
candidates: &[f64],
alpha: f64,
) -> Result<DiscreteFullConformalSet, String> {
if candidates.is_empty() {
return Err("discrete full conformal: empty candidate list".to_string());
}
if !(0.0..1.0).contains(&alpha) {
return Err(format!(
"discrete full conformal: alpha must be in [0, 1), got {alpha}"
));
}
if candidates.windows(2).any(|w| !(w[0] < w[1])) {
return Err("discrete full conformal: candidates must be strictly increasing".to_string());
}
let mut out = Vec::with_capacity(candidates.len());
let mut members = Vec::new();
let mut n_augmented = 0usize;
for &z in candidates {
let scores = fit.scores(z)?;
let n1 = scores.len();
if n1 < 2 {
return Err(
"discrete full conformal: fitting map must score at least two rows".to_string(),
);
}
if n_augmented == 0 {
n_augmented = n1;
} else if n_augmented != n1 {
return Err(format!(
"discrete full conformal: fitting map returned {n1} scores after returning \
{n_augmented}; the augmented row count cannot change across candidates"
));
}
if scores.iter().any(|s| !s.is_finite()) {
return Err(format!(
"discrete full conformal: non-finite nonconformity score at candidate {z}; \
refusing to rank garbage"
));
}
let e_star = scores[n1 - 1];
let count = scores.iter().take(n1 - 1).filter(|&&e| e >= e_star).count();
let p_value = (1.0 + count as f64) / (n1 as f64);
let member = p_value > alpha;
if member {
members.push(z);
}
out.push(DiscreteCandidate { z, p_value, member });
}
let lower_tail_unresolved = out.first().filter(|c| c.member).map(|c| c.z);
let upper_tail_unresolved = out.last().filter(|c| c.member).map(|c| c.z);
Ok(DiscreteFullConformalSet {
members,
candidates: out,
alpha,
n_augmented,
lower_tail_unresolved,
upper_tail_unresolved,
})
}
#[derive(Clone, Debug)]
pub enum FrozenRhoCertificate {
Certified {
score_perturbation_bound: f64,
boundary_margin: f64,
},
Refused {
score_perturbation_bound: f64,
boundary_margin: f64,
},
}
impl FrozenRhoCertificate {
pub fn decide(score_perturbation_bound: f64, boundary_margin: f64) -> Self {
if boundary_margin > 0.0 && score_perturbation_bound < boundary_margin {
FrozenRhoCertificate::Certified {
score_perturbation_bound,
boundary_margin,
}
} else {
FrozenRhoCertificate::Refused {
score_perturbation_bound,
boundary_margin,
}
}
}
}
pub struct GaussianRemlRhoResponse<'a> {
x: &'a Array2<f64>,
y: &'a Array1<f64>,
s: &'a Array2<f64>,
x_star: &'a Array1<f64>,
n: usize,
p: usize,
rank_s: usize,
xtx: Array2<f64>,
xty: Array1<f64>,
yty: f64,
}
#[derive(Clone, Debug)]
struct RemlEval {
value: f64,
grad: f64,
hess: f64,
cross: f64,
mu_rho_train: Array1<f64>,
mu_rho_test: f64,
}
#[derive(Clone, Debug)]
pub struct CertifiedFullConformal {
pub frozen_set: FullConformalSet,
pub certificate: FrozenRhoCertificate,
pub rho_frozen: f64,
pub rho_excursion: f64,
pub score_rho_lipschitz: f64,
pub rho_probe_count: usize,
pub observed_sup_drho_dz: f64,
}
impl<'a> GaussianRemlRhoResponse<'a> {
pub fn new(
x: &'a Array2<f64>,
y: &'a Array1<f64>,
s: &'a Array2<f64>,
x_star: &'a Array1<f64>,
) -> Result<Self, String> {
let n = x.nrows();
let p = x.ncols();
if y.len() != n {
return Err("gaussian reml response: row-count mismatch".to_string());
}
if s.nrows() != p || s.ncols() != p || x_star.len() != p {
return Err("gaussian reml response: column-count mismatch".to_string());
}
let (evals, _) = s.eigh(Side::Lower).map_err(|e| {
format!("gaussian reml response: penalty eigendecomposition failed: {e:?}")
})?;
let max_ev = evals.iter().cloned().fold(0.0_f64, |a, b| a.max(b.abs()));
let tol = max_ev * 1e-10 * (p.max(1) as f64);
let rank_s = evals.iter().filter(|&&e| e > tol).count();
let xtx = x.t().dot(x);
let xty = x.t().dot(y);
let yty = y.dot(y);
Ok(Self {
x,
y,
s,
x_star,
n,
p,
rank_s,
xtx,
xty,
yty,
})
}
pub fn rank_s(&self) -> usize {
self.rank_s
}
fn eval(&self, rho: f64, z: Option<f64>) -> Result<RemlEval, String> {
let p = self.p;
let n_eff = self.n + usize::from(z.is_some());
let m0 = p - self.rank_s;
if n_eff <= m0 {
return Err(format!(
"gaussian reml response: degrees of freedom n_eff−M₀ = {n_eff}−{m0} ≤ 0; \
REML criterion undefined"
));
}
let coef = (n_eff - m0) as f64;
let r = self.rank_s as f64;
let lambda = rho.exp();
let mut a = self.xtx.clone();
for i in 0..p {
for j in 0..p {
a[[i, j]] += lambda * self.s[[i, j]];
}
}
if z.is_some() {
for i in 0..p {
for j in 0..p {
a[[i, j]] += self.x_star[i] * self.x_star[j];
}
}
}
let chol = a
.cholesky(Side::Lower)
.map_err(|e| format!("gaussian reml response: A(λ) not SPD: {e:?}"))?;
let mut c = self.xty.clone();
if let Some(zv) = z {
for j in 0..p {
c[j] += self.x_star[j] * zv;
}
}
let beta = chol.solvevec(&c);
let yty_eff = self.yty + z.map_or(0.0, |zv| zv * zv);
let d = yty_eff - c.dot(&beta);
if !(d > 0.0) {
return Err(format!(
"gaussian reml response: non-positive penalized RSS D = {d}; degenerate fit"
));
}
let sbeta = self.s.dot(&beta);
let pen = lambda * beta.dot(&sbeta);
let z_mat = chol.solve_mat(self.s);
let mut tr_ainv_s = 0.0;
let mut tr_ainv_s_sq = 0.0;
for i in 0..p {
tr_ainv_s += z_mat[[i, i]];
for j in 0..p {
tr_ainv_s_sq += z_mat[[i, j]] * z_mat[[j, i]];
}
}
let v_s = chol.solvevec(&sbeta);
let quad = sbeta.dot(&v_s);
let logdet: f64 = 2.0 * chol.diag().iter().map(|d| d.ln()).sum::<f64>();
let value = coef * d.ln() + logdet - r * rho;
let grad = coef * pen / d + lambda * tr_ainv_s - r;
let pen_prime = pen - 2.0 * lambda * lambda * quad;
let hess = coef * (pen_prime * d - pen * pen) / (d * d) + lambda * tr_ainv_s
- lambda * lambda * tr_ainv_s_sq;
let xv = fast_av(self.x, &v_s);
let mu_rho_train = xv.mapv(|t| -lambda * t);
let mu_rho_test = -lambda * self.x_star.dot(&v_s);
let cross = if let Some(zv) = z {
let b = chol.solvevec(self.x_star); let pen_z = 2.0 * lambda * sbeta.dot(&b);
let r_star = zv - self.x_star.dot(&beta);
let d_z = 2.0 * r_star;
coef * (pen_z * d - pen * d_z) / (d * d)
} else {
0.0
};
Ok(RemlEval {
value,
grad,
hess,
cross,
mu_rho_train,
mu_rho_test,
})
}
pub fn reml_criterion(&self, rho: f64, z: Option<f64>) -> Result<f64, String> {
Ok(self.eval(rho, z)?.value)
}
pub fn drho_dz(&self, rho: f64, z: f64) -> Result<f64, String> {
let ev = self.eval(rho, Some(z))?;
if ev.hess.abs() < 1e-14 {
return Err(
"gaussian reml response: outer Hessian ∂²Ṽ/∂ρ² ≈ 0; dρ̂/dz singular".to_string(),
);
}
Ok(-ev.cross / ev.hess)
}
pub fn select_rho(&self, z: Option<f64>) -> Result<f64, String> {
let (lo, hi, m) = (-25.0_f64, 25.0_f64, 60usize);
let mut best = (f64::INFINITY, 0.0_f64);
for k in 0..=m {
let rho = lo + (hi - lo) * (k as f64) / (m as f64);
if let Ok(ev) = self.eval(rho, z)
&& ev.value < best.0
{
best = (ev.value, rho);
}
}
let mut rho = best.1;
for _ in 0..100 {
let ev = self.eval(rho, z)?;
if !ev.hess.is_finite() || ev.hess <= 1e-12 {
break;
}
let step = ev.grad / ev.hess;
let new_rho = (rho - step).clamp(lo - 5.0, hi + 5.0);
let delta = new_rho - rho;
rho = new_rho;
if delta.abs() < 1e-13 {
break;
}
}
Ok(rho)
}
pub fn honest_membership(&self, z: f64, alpha: f64) -> Result<bool, String> {
let rho = self.select_rho(Some(z))?;
let lambda = rho.exp();
let p = self.p;
let mut a = self.xtx.clone();
for i in 0..p {
for j in 0..p {
a[[i, j]] += lambda * self.s[[i, j]] + self.x_star[i] * self.x_star[j];
}
}
let chol = a
.cholesky(Side::Lower)
.map_err(|e| format!("gaussian reml response: honest A(λ) not SPD: {e:?}"))?;
let mut c = self.xty.clone();
for j in 0..p {
c[j] += self.x_star[j] * z;
}
let beta = chol.solvevec(&c);
let e_star = (z - self.x_star.dot(&beta)).abs();
let xb = fast_av(self.x, &beta);
let count = (0..self.n)
.filter(|&i| (self.y[i] - xb[i]).abs() >= e_star)
.count();
Ok((1.0 + count as f64) > alpha * (self.n as f64 + 1.0))
}
pub fn certified_full_conformal(&self, alpha: f64) -> Result<CertifiedFullConformal, String> {
let rho0 = self.select_rho(None)?;
let lambda0 = rho0.exp();
let mut s_lambda = Array2::<f64>::zeros((self.p, self.p));
for i in 0..self.p {
for j in 0..self.p {
s_lambda[[i, j]] = lambda0 * self.s[[i, j]];
}
}
let weights = Array1::<f64>::ones(self.n);
let engine =
ExactGaussianFullConformal::new(self.x, self.y, &weights, &s_lambda, self.x_star)?;
let frozen_set = engine.prediction_set(alpha);
let mut endpoints: Vec<f64> = Vec::new();
for itv in &frozen_set.intervals {
for ep in [itv.lo, itv.hi] {
if ep.is_finite() {
endpoints.push(ep);
}
}
}
if endpoints.is_empty() {
let score_perturbation_bound = if frozen_set.boundary_margin == f64::INFINITY {
0.0
} else {
f64::INFINITY
};
return Ok(CertifiedFullConformal {
certificate: FrozenRhoCertificate::decide(
score_perturbation_bound,
frozen_set.boundary_margin,
),
frozen_set,
rho_frozen: rho0,
rho_excursion: 0.0,
score_rho_lipschitz: 0.0,
rho_probe_count: 0,
observed_sup_drho_dz: 0.0,
});
}
endpoints.sort_by(|a, b| a.partial_cmp(b).expect("finite endpoints"));
let z_lo = *endpoints.first().expect("non-empty");
let z_hi = *endpoints.last().expect("non-empty");
let probes = 64usize;
let mut max_dev = 0.0_f64;
let mut observed_sup_drho_dz = 0.0_f64;
let mut lip = 0.0_f64;
let ev0 = self.eval(rho0, None)?;
for i in 0..self.n {
lip = lip.max(ev0.mu_rho_train[i].abs() + ev0.mu_rho_test.abs());
}
for k in 0..=probes {
let z = z_lo + (z_hi - z_lo) * (k as f64) / (probes as f64);
let rho_z = self.select_rho(Some(z))?;
max_dev = max_dev.max((rho_z - rho0).abs());
if let Ok(d) = self.drho_dz(rho_z, z) {
observed_sup_drho_dz = observed_sup_drho_dz.max(d.abs());
}
let evz = self.eval(rho_z, Some(z))?;
for i in 0..self.n {
lip = lip.max(evz.mu_rho_train[i].abs() + evz.mu_rho_test.abs());
}
}
let spacing = (z_hi - z_lo) / (probes as f64);
let rho_excursion = max_dev + observed_sup_drho_dz * spacing;
let score_perturbation_bound = lip * rho_excursion;
let certificate =
FrozenRhoCertificate::decide(score_perturbation_bound, frozen_set.boundary_margin);
Ok(CertifiedFullConformal {
frozen_set,
certificate,
rho_frozen: rho0,
rho_excursion,
score_rho_lipschitz: lip,
rho_probe_count: probes + 1,
observed_sup_drho_dz,
})
}
}
const GLM_HOMOTOPY_MAX_SUBSTEPS: usize = 1024;
const GLM_HOMOTOPY_MAX_HALVINGS: usize = 24;
const GLM_CORRECTOR_MAX_ITERS: usize = 80;
const GLM_NEWTON_MAX_ITERS: usize = 200;
const GLM_NEWTON_MAX_BACKTRACKS: usize = 60;
const GLM_CONVERGENCE_RTOL: f64 = 1e-12;
const GLM_STALL_ACCEPT_RTOL: f64 = 1e-8;
const GLM_CONTRACTION_ACCEPT: f64 = 0.5;
const GLM_ARMIJO_C1: f64 = 1e-4;
const LOGIT_THIRD_DERIV_CRITICAL_ETA: f64 = 1.316_957_896_924_816_6;
#[inline]
fn vec_norm(v: &Array1<f64>) -> f64 {
v.dot(v).sqrt()
}
#[inline]
fn softplus(eta: f64) -> f64 {
eta.max(0.0) + (-eta.abs()).exp().ln_1p()
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum CanonicalGlmFamily {
BernoulliLogit,
PoissonLog,
}
impl CanonicalGlmFamily {
pub fn mean(&self, eta: f64) -> f64 {
match self {
Self::BernoulliLogit => {
if eta >= 0.0 {
1.0 / (1.0 + (-eta).exp())
} else {
let e = eta.exp();
e / (1.0 + e)
}
}
Self::PoissonLog => eta.exp(),
}
}
pub fn weight(&self, eta: f64) -> f64 {
match self {
Self::BernoulliLogit => {
let mu = self.mean(eta);
mu * (1.0 - mu)
}
Self::PoissonLog => eta.exp(),
}
}
fn nll_term(&self, eta: f64, y: f64) -> f64 {
match self {
Self::BernoulliLogit => softplus(eta) - y * eta,
Self::PoissonLog => eta.exp() - y * eta,
}
}
fn weight_abs_sup(&self, lo: f64, hi: f64) -> f64 {
match self {
Self::BernoulliLogit => {
if lo <= 0.0 && 0.0 <= hi {
0.25
} else {
self.weight(lo).max(self.weight(hi))
}
}
Self::PoissonLog => hi.exp(),
}
}
fn third_abs_sup(&self, lo: f64, hi: f64) -> f64 {
match self {
Self::BernoulliLogit => {
let t = |eta: f64| {
let mu = self.mean(eta);
(mu * (1.0 - mu) * (1.0 - 2.0 * mu)).abs()
};
let mut sup = t(lo).max(t(hi));
for c in [
-LOGIT_THIRD_DERIV_CRITICAL_ETA,
LOGIT_THIRD_DERIV_CRITICAL_ETA,
] {
if lo <= c && c <= hi {
sup = sup.max(t(c));
}
}
sup
}
Self::PoissonLog => hi.exp(),
}
}
fn validate_training_response(&self, y: f64, row: usize) -> Result<(), String> {
if !y.is_finite() {
return Err(format!("glm homotopy: non-finite response at row {row}"));
}
match self {
Self::BernoulliLogit => {
if !(0.0..=1.0).contains(&y) {
return Err(format!(
"glm homotopy: Bernoulli response must lie in [0, 1], got {y} at row {row}"
));
}
}
Self::PoissonLog => {
if y < 0.0 {
return Err(format!(
"glm homotopy: Poisson response must be non-negative, got {y} at row {row}"
));
}
}
}
Ok(())
}
fn validate_candidate(&self, z: f64) -> Result<(), String> {
if !z.is_finite() {
return Err(format!("glm homotopy: non-finite candidate {z}"));
}
match self {
Self::BernoulliLogit => {
if !(0.0..=1.0).contains(&z) {
return Err(format!(
"glm homotopy: Bernoulli candidate must lie in [0, 1], got {z}"
));
}
}
Self::PoissonLog => {
if z < 0.0 {
return Err(format!(
"glm homotopy: Poisson candidate must be non-negative, got {z}"
));
}
}
}
Ok(())
}
}
#[derive(Clone, Debug)]
pub struct GlmHomotopyCandidate {
pub z: f64,
pub p_value: f64,
pub member: bool,
pub beta: Array1<f64>,
pub beta_error_bound: f64,
pub cold_refit: bool,
}
#[derive(Clone, Debug)]
pub struct GlmHomotopyConformalSet {
pub members: Vec<f64>,
pub candidates: Vec<GlmHomotopyCandidate>,
pub alpha: f64,
pub n_augmented: usize,
pub refit_fallbacks: usize,
pub margin_refits: usize,
pub ties_unresolved: usize,
pub max_beta_error_bound: f64,
}
struct GlmCandidateVerdict {
p_value: f64,
member: bool,
decided: bool,
}
pub struct GlmHomotopyFullConformal<'a> {
family: CanonicalGlmFamily,
x: &'a Array2<f64>,
y: &'a Array1<f64>,
s_lambda: &'a Array2<f64>,
x_star: &'a Array1<f64>,
n: usize,
p: usize,
row_norm: Array1<f64>,
row_sq: Array1<f64>,
star_norm: f64,
star_sq: f64,
}
impl<'a> GlmHomotopyFullConformal<'a> {
pub fn new(
family: CanonicalGlmFamily,
x: &'a Array2<f64>,
y: &'a Array1<f64>,
prior_weights: &Array1<f64>,
s_lambda: &'a Array2<f64>,
x_star: &'a Array1<f64>,
) -> Result<Self, String> {
let n = x.nrows();
let p = x.ncols();
if y.len() != n || prior_weights.len() != n {
return Err("glm homotopy: row-count mismatch".to_string());
}
if s_lambda.nrows() != p || s_lambda.ncols() != p || x_star.len() != p {
return Err("glm homotopy: column-count mismatch".to_string());
}
if prior_weights.iter().any(|&w| (w - 1.0).abs() > 1e-12) {
return Err(
"glm homotopy full conformal requires unit prior weights: a reweighted \
training row is not exchangeable with the test row, so the finite-sample \
coverage proof does not apply; use the split/ALO conformal calibrator instead"
.to_string(),
);
}
for (i, &yi) in y.iter().enumerate() {
family.validate_training_response(yi, i)?;
}
let mut row_norm = Array1::<f64>::zeros(n);
let mut row_sq = Array1::<f64>::zeros(n);
for i in 0..n {
let sq = x.row(i).dot(&x.row(i));
row_sq[i] = sq;
row_norm[i] = sq.sqrt();
}
let star_sq = x_star.dot(x_star);
Ok(Self {
family,
x,
y,
s_lambda,
x_star,
n,
p,
row_norm,
row_sq,
star_norm: star_sq.sqrt(),
star_sq,
})
}
fn penalized_score(&self, beta: &Array1<f64>, z: f64) -> Array1<f64> {
let eta = fast_av(self.x, beta);
let mut resid = Array1::<f64>::zeros(self.n);
for i in 0..self.n {
resid[i] = self.family.mean(eta[i]) - self.y[i];
}
let mut g = self.x.t().dot(&resid) + self.s_lambda.dot(beta);
let r_star = self.family.mean(self.x_star.dot(beta)) - z;
for j in 0..self.p {
g[j] += self.x_star[j] * r_star;
}
g
}
fn penalized_nll(&self, beta: &Array1<f64>, z: f64) -> f64 {
let eta = fast_av(self.x, beta);
let mut nll = 0.0;
for i in 0..self.n {
nll += self.family.nll_term(eta[i], self.y[i]);
}
nll += self.family.nll_term(self.x_star.dot(beta), z);
nll + 0.5 * beta.dot(&self.s_lambda.dot(beta))
}
fn penalized_hessian(&self, beta: &Array1<f64>) -> Array2<f64> {
let eta = fast_av(self.x, beta);
let mut xw = self.x.to_owned();
for i in 0..self.n {
let w = self.family.weight(eta[i]);
for j in 0..self.p {
xw[[i, j]] *= w;
}
}
let mut h = self.x.t().dot(&xw) + self.s_lambda;
let w_star = self.family.weight(self.x_star.dot(beta));
for a in 0..self.p {
for b in 0..self.p {
h[[a, b]] += w_star * self.x_star[a] * self.x_star[b];
}
}
h
}
fn contraction_kappa(
&self,
eta0: &Array1<f64>,
eta0_star: f64,
shift: &Array1<f64>,
shift_star: f64,
radius: f64,
lambda_min: f64,
) -> f64 {
let mut drift = 0.0_f64;
for i in 0..self.n {
let dev = shift[i] + self.row_norm[i] * radius;
let t_sup = self.family.third_abs_sup(eta0[i] - dev, eta0[i] + dev);
drift += t_sup * dev * self.row_sq[i];
}
let dev_star = shift_star + self.star_norm * radius;
drift += self
.family
.third_abs_sup(eta0_star - dev_star, eta0_star + dev_star)
* dev_star
* self.star_sq;
drift / lambda_min
}
fn stationary_error_bound(&self, beta: &Array1<f64>, z: f64) -> f64 {
let hess = self.penalized_hessian(beta);
let Ok(eigs) = hess.eigh(Side::Lower) else {
return f64::INFINITY;
};
let lambda_min = eigs.0.iter().copied().fold(f64::INFINITY, f64::min);
if !(lambda_min > 0.0) {
return f64::INFINITY;
}
let Ok(chol) = hess.cholesky(Side::Lower) else {
return f64::INFINITY;
};
let r0 = vec_norm(&chol.solvevec(&self.penalized_score(beta, z)));
let eta0 = fast_av(self.x, beta);
let eta0_star = self.x_star.dot(beta);
let zero_shift = Array1::<f64>::zeros(self.n);
let kappa =
self.contraction_kappa(&eta0, eta0_star, &zero_shift, 0.0, 2.0 * r0, lambda_min);
if kappa.is_finite() && kappa < GLM_CONTRACTION_ACCEPT {
r0 / (1.0 - kappa)
} else {
f64::INFINITY
}
}
fn cold_fit(&self, z: f64, init: Array1<f64>) -> Result<(Array1<f64>, f64), String> {
let mut beta = init;
let mut nll = self.penalized_nll(&beta, z);
if !nll.is_finite() {
beta = Array1::<f64>::zeros(self.p);
nll = self.penalized_nll(&beta, z);
}
let mut converged = false;
for _ in 0..GLM_NEWTON_MAX_ITERS {
let g = self.penalized_score(&beta, z);
let hess = self.penalized_hessian(&beta);
let chol = hess
.cholesky(Side::Lower)
.map_err(|e| format!("glm homotopy: augmented Hessian not SPD at z={z}: {e:?}"))?;
let step = chol.solvevec(&g);
let step_norm = vec_norm(&step);
if step_norm <= GLM_CONVERGENCE_RTOL * (1.0 + vec_norm(&beta)) {
converged = true;
break;
}
let decrease = g.dot(&step);
let mut t = 1.0_f64;
let mut accepted = false;
for _ in 0..GLM_NEWTON_MAX_BACKTRACKS {
let mut cand = beta.clone();
cand.scaled_add(-t, &step);
let cand_nll = self.penalized_nll(&cand, z);
if cand_nll.is_finite() && cand_nll <= nll - GLM_ARMIJO_C1 * t * decrease {
beta = cand;
nll = cand_nll;
accepted = true;
break;
}
t *= 0.5;
}
if !accepted {
return Err(format!(
"glm homotopy: cold-fit line search failed at z={z} (step norm {step_norm})"
));
}
}
if !converged {
let g = self.penalized_score(&beta, z);
let hess = self.penalized_hessian(&beta);
let chol = hess
.cholesky(Side::Lower)
.map_err(|e| format!("glm homotopy: augmented Hessian not SPD at z={z}: {e:?}"))?;
let step_norm = vec_norm(&chol.solvevec(&g));
if step_norm > GLM_STALL_ACCEPT_RTOL * (1.0 + vec_norm(&beta)) {
return Err(format!(
"glm homotopy: cold fit did not converge at z={z} (residual {step_norm})"
));
}
}
let bound = self.stationary_error_bound(&beta, z);
Ok((beta, bound))
}
fn track(&self, beta: &mut Array1<f64>, z_from: f64, z_to: f64) -> Option<f64> {
let mut z = z_from;
let mut h = z_to - z_from;
let mut arrival_bound = f64::INFINITY;
for _ in 0..GLM_HOMOTOPY_MAX_SUBSTEPS {
let remaining = z_to - z;
if remaining <= 0.0 {
return Some(arrival_bound);
}
h = h.min(remaining);
let hess = self.penalized_hessian(beta);
let lambda_min = hess
.eigh(Side::Lower)
.ok()?
.0
.iter()
.copied()
.fold(f64::INFINITY, f64::min);
if !(lambda_min > 0.0) {
return None;
}
let chol = hess.cholesky(Side::Lower).ok()?;
let b_dir = chol.solvevec(self.x_star);
let eta0 = fast_av(self.x, beta);
let eta0_star = self.x_star.dot(beta);
let xb = fast_av(self.x, &b_dir);
let xb_star = self.x_star.dot(&b_dir);
let mut accepted = false;
for _ in 0..=GLM_HOMOTOPY_MAX_HALVINGS {
let h_eff = h.min(z_to - z);
let z_new = if h_eff >= z_to - z { z_to } else { z + h_eff };
let mut beta_pred = beta.clone();
beta_pred.scaled_add(h_eff, &b_dir);
let s0 = chol.solvevec(&self.penalized_score(&beta_pred, z_new));
let r0 = vec_norm(&s0);
let radius = 2.0 * r0;
let shift = xb.mapv(|t| (h_eff * t).abs());
let kappa = self.contraction_kappa(
&eta0,
eta0_star,
&shift,
(h_eff * xb_star).abs(),
radius,
lambda_min,
);
if kappa.is_finite() && kappa < GLM_CONTRACTION_ACCEPT {
let mut bcur = beta_pred;
let mut step = s0;
let mut r = r0;
for _ in 0..GLM_CORRECTOR_MAX_ITERS {
if r <= GLM_CONVERGENCE_RTOL * (1.0 + vec_norm(&bcur)) {
break;
}
let mut next = bcur.clone();
next.scaled_add(-1.0, &step);
let next_step = chol.solvevec(&self.penalized_score(&next, z_new));
let r_next = vec_norm(&next_step);
if !(r_next < r) {
break;
}
bcur = next;
step = next_step;
r = r_next;
}
if r <= GLM_STALL_ACCEPT_RTOL * (1.0 + vec_norm(&bcur)) {
let mut diff = bcur.clone();
diff.scaled_add(-1.0, beta);
let shift_fin = fast_av(self.x, &diff).mapv(f64::abs);
let kappa_fin = self.contraction_kappa(
&eta0,
eta0_star,
&shift_fin,
self.x_star.dot(&diff).abs(),
2.0 * r,
lambda_min,
);
if kappa_fin.is_finite() && kappa_fin < GLM_CONTRACTION_ACCEPT {
arrival_bound = r / (1.0 - kappa_fin);
*beta = bcur;
z = z_new;
h = 2.0 * h_eff;
accepted = true;
break;
}
}
}
h = 0.5 * h_eff;
if !(h > 0.0) {
return None;
}
}
if !accepted {
return None;
}
}
if z_to - z <= 0.0 {
Some(arrival_bound)
} else {
None
}
}
fn score_delta(&self, eta: f64, x_norm: f64, beta_error_bound: f64) -> f64 {
if beta_error_bound == 0.0 {
return 0.0;
}
if !beta_error_bound.is_finite() {
return f64::INFINITY;
}
let dev = x_norm * beta_error_bound;
self.family.weight_abs_sup(eta - dev, eta + dev) * dev
}
fn candidate_verdict(
&self,
z: f64,
alpha: f64,
beta: &Array1<f64>,
beta_error_bound: f64,
) -> GlmCandidateVerdict {
let eta = fast_av(self.x, beta);
let eta_star = self.x_star.dot(beta);
let e_star = (z - self.family.mean(eta_star)).abs();
let delta_star = self.score_delta(eta_star, self.star_norm, beta_error_bound);
let mut count = 0usize;
let mut count_certain = 0usize;
let mut count_possible = 0usize;
for i in 0..self.n {
let e_i = (self.y[i] - self.family.mean(eta[i])).abs();
let tol = self.score_delta(eta[i], self.row_norm[i], beta_error_bound) + delta_star;
let gap = e_i - e_star;
if gap >= 0.0 {
count += 1;
}
if gap >= tol {
count_certain += 1;
}
if gap >= -tol {
count_possible += 1;
}
}
let n1 = (self.n + 1) as f64;
let member = (1.0 + count as f64) > alpha * n1;
let member_lo = (1.0 + count_certain as f64) > alpha * n1;
let member_hi = (1.0 + count_possible as f64) > alpha * n1;
GlmCandidateVerdict {
p_value: (1.0 + count as f64) / n1,
member,
decided: member_lo == member_hi,
}
}
pub fn prediction_set(
&self,
candidates: &[f64],
alpha: f64,
) -> Result<GlmHomotopyConformalSet, String> {
if candidates.is_empty() {
return Err("glm homotopy: empty candidate list".to_string());
}
if !(0.0..1.0).contains(&alpha) {
return Err(format!(
"glm homotopy: alpha must be in [0, 1), got {alpha}"
));
}
if candidates.windows(2).any(|w| !(w[0] < w[1])) {
return Err("glm homotopy: candidates must be strictly increasing".to_string());
}
for &z in candidates {
self.family.validate_candidate(z)?;
}
let (mut beta, mut bound) = self.cold_fit(candidates[0], Array1::<f64>::zeros(self.p))?;
let mut out: Vec<GlmHomotopyCandidate> = Vec::with_capacity(candidates.len());
let mut members: Vec<f64> = Vec::new();
let mut refit_fallbacks = 0usize;
let mut margin_refits = 0usize;
let mut ties_unresolved = 0usize;
let mut max_bound = 0.0_f64;
let mut prev_z = candidates[0];
for (idx, &z) in candidates.iter().enumerate() {
let mut cold = idx == 0;
if idx > 0 {
match self.track(&mut beta, prev_z, z) {
Some(b) => bound = b,
None => {
let (refit_beta, refit_bound) = self.cold_fit(z, beta.clone())?;
beta = refit_beta;
bound = refit_bound;
refit_fallbacks += 1;
cold = true;
}
}
}
let mut verdict = self.candidate_verdict(z, alpha, &beta, bound);
if !verdict.decided && !cold {
let (refit_beta, refit_bound) = self.cold_fit(z, beta.clone())?;
beta = refit_beta;
bound = refit_bound;
cold = true;
margin_refits += 1;
verdict = self.candidate_verdict(z, alpha, &beta, bound);
}
if !verdict.decided {
ties_unresolved += 1;
}
if bound.is_finite() {
max_bound = max_bound.max(bound);
} else {
max_bound = f64::INFINITY;
}
if verdict.member {
members.push(z);
}
out.push(GlmHomotopyCandidate {
z,
p_value: verdict.p_value,
member: verdict.member,
beta: beta.clone(),
beta_error_bound: bound,
cold_refit: cold,
});
prev_z = z;
}
Ok(GlmHomotopyConformalSet {
members,
candidates: out,
alpha,
n_augmented: self.n + 1,
refit_fallbacks,
margin_refits,
ties_unresolved,
max_beta_error_bound: max_bound,
})
}
}
#[derive(Clone, Copy, Debug)]
pub struct JackknifePlusInterval {
pub lo: f64,
pub hi: f64,
pub alpha: f64,
pub n: usize,
}
impl JackknifePlusInterval {
pub fn certifies_finite(&self) -> bool {
self.lo.is_finite() && self.hi.is_finite()
}
}
pub fn jackknife_plus_interval(
loo_test_predictions: &Array1<f64>,
loo_abs_residuals: &Array1<f64>,
alpha: f64,
) -> Result<JackknifePlusInterval, String> {
let n = loo_test_predictions.len();
if n == 0 {
return Err("jackknife+: empty leave-one-out inputs".to_string());
}
if loo_abs_residuals.len() != n {
return Err(format!(
"jackknife+: {} predictions but {} residuals",
n,
loo_abs_residuals.len()
));
}
if !(alpha.is_finite() && alpha > 0.0 && alpha < 1.0) {
return Err(format!("jackknife+: alpha must be in (0, 1), got {alpha}"));
}
for (i, (&m, &r)) in loo_test_predictions
.iter()
.zip(loo_abs_residuals.iter())
.enumerate()
{
if !m.is_finite() {
return Err(format!(
"jackknife+: non-finite LOO prediction at index {i}"
));
}
if !(r.is_finite() && r >= 0.0) {
return Err(format!(
"jackknife+: LOO residual at index {i} must be finite and non-negative, got {r}"
));
}
}
let n1 = (n + 1) as f64;
let rank_hi = (n1 * (1.0 - alpha)).ceil() as usize;
let rank_lo = (n1 * alpha).floor() as usize;
let hi = if rank_hi > n {
f64::INFINITY
} else {
let mut upper: Vec<f64> = (0..n)
.map(|i| loo_test_predictions[i] + loo_abs_residuals[i])
.collect();
upper.sort_by(|a, b| a.partial_cmp(b).expect("finite jackknife+ endpoints"));
upper[rank_hi - 1]
};
let lo = if rank_lo < 1 {
f64::NEG_INFINITY
} else {
let mut lower: Vec<f64> = (0..n)
.map(|i| loo_test_predictions[i] - loo_abs_residuals[i])
.collect();
lower.sort_by(|a, b| a.partial_cmp(b).expect("finite jackknife+ endpoints"));
lower[rank_lo - 1]
};
Ok(JackknifePlusInterval { lo, hi, alpha, n })
}
pub fn gaussian_jackknife_plus(
x: &Array2<f64>,
y: &Array1<f64>,
prior_weights: &Array1<f64>,
s_lambda: &Array2<f64>,
x_star: &Array1<f64>,
alpha: f64,
) -> Result<JackknifePlusInterval, String> {
let n = x.nrows();
let p = x.ncols();
if y.len() != n || prior_weights.len() != n {
return Err("gaussian jackknife+: row-count mismatch".to_string());
}
if s_lambda.nrows() != p || s_lambda.ncols() != p || x_star.len() != p {
return Err("gaussian jackknife+: column-count mismatch".to_string());
}
if prior_weights.iter().any(|&w| (w - 1.0).abs() > 1e-12) {
return Err(
"gaussian jackknife+ requires unit prior weights: a reweighted training row \
is not exchangeable with the test row, so the finite-sample coverage proof \
does not apply"
.to_string(),
);
}
let m = x.t().dot(x) + s_lambda;
let chol = m
.cholesky(Side::Lower)
.map_err(|e| format!("gaussian jackknife+: normal matrix not SPD: {e:?}"))?;
let beta = chol.solvevec(&x.t().dot(y));
let mu = fast_av(x, &beta);
let mu_star = x_star.dot(&beta);
let b = chol.solvevec(x_star);
let xt = x.t().as_standard_layout().into_owned();
let minv_xt = chol.solve_mat(&xt);
let mut loo_preds = Array1::<f64>::zeros(n);
let mut loo_resids = Array1::<f64>::zeros(n);
for i in 0..n {
let h_i = x.row(i).dot(&minv_xt.column(i));
let one_minus_h = 1.0 - h_i;
if !(one_minus_h > 1e-10) {
return Err(format!(
"gaussian jackknife+: leverage hᵢ = {h_i} at row {i} leaves no leave-one-out \
information (1 − hᵢ ≤ 1e-10); the rank-one downdate is exact only for hᵢ < 1"
));
}
let r_i = y[i] - mu[i];
let c_i = x.row(i).dot(&b); loo_resids[i] = (r_i / one_minus_h).abs();
loo_preds[i] = mu_star - c_i * r_i / one_minus_h;
}
jackknife_plus_interval(&loo_preds, &loo_resids, alpha)
}
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
pub struct GaussianJackknifePlusStats {
beta: Array1<f64>,
minv_xt: Array2<f64>,
signed_loo: Array1<f64>,
abs_loo: Array1<f64>,
}
impl GaussianJackknifePlusStats {
pub fn new(
x: &Array2<f64>,
y: &Array1<f64>,
prior_weights: &Array1<f64>,
s_lambda: &Array2<f64>,
) -> Result<Self, String> {
let n = x.nrows();
let p = x.ncols();
if y.len() != n || prior_weights.len() != n {
return Err("gaussian jackknife+ stats: row-count mismatch".to_string());
}
if s_lambda.nrows() != p || s_lambda.ncols() != p {
return Err("gaussian jackknife+ stats: column-count mismatch".to_string());
}
if prior_weights.iter().any(|&w| (w - 1.0).abs() > 1e-12) {
return Err(
"gaussian jackknife+ requires unit prior weights: a reweighted training row \
is not exchangeable with the test row, so the finite-sample coverage proof \
does not apply"
.to_string(),
);
}
let m = x.t().dot(x) + s_lambda;
Self::from_design_and_normal_matrix(x, y, &m)
}
pub fn from_design_unit_weight_normal_matrix(
x: &Array2<f64>,
y: &Array1<f64>,
prior_weights: &Array1<f64>,
m: &Array2<f64>,
) -> Result<Self, String> {
let n = x.nrows();
if y.len() != n || prior_weights.len() != n {
return Err("gaussian jackknife+ stats: row-count mismatch".to_string());
}
if prior_weights.iter().any(|&w| (w - 1.0).abs() > 1e-12) {
return Err(
"gaussian jackknife+ requires unit prior weights: a reweighted training row \
is not exchangeable with the test row, so the finite-sample coverage proof \
does not apply"
.to_string(),
);
}
Self::from_design_and_normal_matrix(x, y, m)
}
fn from_design_and_normal_matrix(
x: &Array2<f64>,
y: &Array1<f64>,
m: &Array2<f64>,
) -> Result<Self, String> {
let n = x.nrows();
let p = x.ncols();
if y.len() != n {
return Err("gaussian jackknife+ stats: row-count mismatch".to_string());
}
if m.nrows() != p || m.ncols() != p {
return Err("gaussian jackknife+ stats: normal-matrix shape mismatch".to_string());
}
let chol = m
.cholesky(Side::Lower)
.map_err(|e| format!("gaussian jackknife+ stats: normal matrix not SPD: {e:?}"))?;
let beta = chol.solvevec(&x.t().dot(y));
let mu = fast_av(x, &beta);
let xt = x.t().as_standard_layout().into_owned();
let minv_xt = chol.solve_mat(&xt);
let mut signed_loo = Array1::<f64>::zeros(n);
let mut abs_loo = Array1::<f64>::zeros(n);
for i in 0..n {
let h_i = x.row(i).dot(&minv_xt.column(i));
let one_minus_h = 1.0 - h_i;
if !(one_minus_h > 1e-10) {
return Err(format!(
"gaussian jackknife+ stats: leverage hᵢ = {h_i} at row {i} leaves no \
leave-one-out information (1 − hᵢ ≤ 1e-10); the rank-one downdate is \
exact only for hᵢ < 1"
));
}
let c_i = (y[i] - mu[i]) / one_minus_h;
signed_loo[i] = c_i;
abs_loo[i] = c_i.abs();
}
Ok(Self {
beta,
minv_xt,
signed_loo,
abs_loo,
})
}
pub fn n(&self) -> usize {
self.abs_loo.len()
}
pub fn p(&self) -> usize {
self.beta.len()
}
pub fn beta(&self) -> &Array1<f64> {
&self.beta
}
pub fn interval(
&self,
x_star: &Array1<f64>,
alpha: f64,
) -> Result<JackknifePlusInterval, String> {
let p = self.beta.len();
if x_star.len() != p {
return Err(format!(
"gaussian jackknife+ stats: x_* has {} entries but the fit has {p} coefficients",
x_star.len()
));
}
let n = self.abs_loo.len();
let mu_star = x_star.dot(&self.beta);
let mut loo_preds = Array1::<f64>::zeros(n);
for i in 0..n {
let c = x_star.dot(&self.minv_xt.column(i));
loo_preds[i] = mu_star - c * self.signed_loo[i];
}
jackknife_plus_interval(&loo_preds, &self.abs_loo, alpha)
}
}
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
pub struct ExactFullConformalSubstrate {
x: Array2<f64>,
y: Array1<f64>,
s_lambda: Array2<f64>,
}
#[derive(Clone, Debug)]
pub struct ExactFullConformalInterval {
pub lo: f64,
pub hi: f64,
pub set: FullConformalSet,
pub frozen_rho_certified: bool,
}
impl ExactFullConformalSubstrate {
pub fn from_design_unit_weight_normal_matrix(
x: &Array2<f64>,
y: &Array1<f64>,
prior_weights: &Array1<f64>,
m: &Array2<f64>,
) -> Result<Self, String> {
let n = x.nrows();
let p = x.ncols();
if y.len() != n || prior_weights.len() != n {
return Err("exact full conformal substrate: row-count mismatch".to_string());
}
if m.nrows() != p || m.ncols() != p {
return Err("exact full conformal substrate: normal-matrix shape mismatch".to_string());
}
if prior_weights.iter().any(|&w| (w - 1.0).abs() > 1e-12) {
return Err(
"exact full conformal requires unit prior weights: a reweighted training row \
is not exchangeable with the test row, so the finite-sample coverage proof \
does not apply"
.to_string(),
);
}
let s_lambda = m - &x.t().dot(x);
Ok(Self {
x: x.clone(),
y: y.clone(),
s_lambda,
})
}
pub fn p(&self) -> usize {
self.x.ncols()
}
pub fn n(&self) -> usize {
self.x.nrows()
}
pub fn interval(
&self,
x_star: &Array1<f64>,
alpha: f64,
) -> Result<ExactFullConformalInterval, String> {
if x_star.len() != self.p() {
return Err(format!(
"exact full conformal: x_* has {} entries but the fit has {} coefficients",
x_star.len(),
self.p()
));
}
let weights = Array1::<f64>::ones(self.n());
let engine =
ExactGaussianFullConformal::new(&self.x, &self.y, &weights, &self.s_lambda, x_star)?;
let set = engine.prediction_set(alpha);
let frozen_rho_certified =
GaussianRemlRhoResponse::new(&self.x, &self.y, &self.s_lambda, x_star)
.and_then(|response| response.certified_full_conformal(alpha))
.map(|certified| {
matches!(
certified.certificate,
FrozenRhoCertificate::Certified { .. }
)
})
.unwrap_or(false);
let (lo, hi) = if set.intervals.is_empty() {
let m = &self.x.t().dot(&self.x) + &self.s_lambda;
let chol = m.cholesky(Side::Lower).map_err(|e| {
format!("exact full conformal: frozen normal matrix not SPD: {e:?}")
})?;
let beta = chol.solvevec(&self.x.t().dot(&self.y));
let mu_point = x_star.dot(&beta);
(mu_point, mu_point)
} else {
let mut lo = f64::INFINITY;
let mut hi = f64::NEG_INFINITY;
for itv in &set.intervals {
lo = lo.min(itv.lo);
hi = hi.max(itv.hi);
}
(lo, hi)
};
Ok(ExactFullConformalInterval {
lo,
hi,
set,
frozen_rho_certified,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use ndarray::{Array1, Array2};
#[test]
fn exact_set_matches_brute_force_refits() {
let n = 24usize;
let p = 5usize;
let mut x = Array2::<f64>::zeros((n, p));
let mut y = Array1::<f64>::zeros(n);
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
for j in 0..p {
x[[i, j]] = (t * (j as f64 + 1.0) * std::f64::consts::PI).sin();
}
y[i] = 1.2 * (2.0 * std::f64::consts::PI * t).sin()
+ 0.3 * (17.0 * (i as f64) + 0.5).sin();
}
let mut s_lambda = Array2::<f64>::eye(p);
s_lambda *= 0.7;
let weights = Array1::<f64>::ones(n);
let mut x_star = Array1::<f64>::zeros(p);
for j in 0..p {
x_star[j] = (0.37 * (j as f64 + 1.0) * std::f64::consts::PI).sin();
}
let engine =
ExactGaussianFullConformal::new(&x, &y, &weights, &s_lambda, &x_star).expect("engine");
let alpha = 0.2;
let set = engine.prediction_set(alpha);
assert!(!set.intervals.is_empty(), "set should be non-empty");
let m_base = x.t().dot(&x) + &s_lambda;
let oracle = |z: f64| -> bool {
let mut m = m_base.clone();
for i in 0..p {
for j in 0..p {
m[[i, j]] += x_star[i] * x_star[j];
}
}
let chol = m.cholesky(Side::Lower).expect("oracle chol");
let mut rhs = x.t().dot(&y);
for j in 0..p {
rhs[j] += x_star[j] * z;
}
let beta = chol.solvevec(&rhs);
let e_star = (z - x_star.dot(&beta)).abs();
let count = (0..n)
.filter(|&i| {
let mu_i: f64 = x.row(i).dot(&beta);
(y[i] - mu_i).abs() >= e_star
})
.count();
(1.0 + count as f64) > alpha * (n as f64 + 1.0)
};
let z_lo = set.intervals.first().map(|i| i.lo).unwrap_or(-5.0) - 2.0;
let z_hi = set.intervals.last().map(|i| i.hi).unwrap_or(5.0) + 2.0;
let z_lo = if z_lo.is_finite() { z_lo } else { -50.0 };
let z_hi = if z_hi.is_finite() { z_hi } else { 50.0 };
let grid = 4001usize;
for g in 0..grid {
let z = z_lo + (z_hi - z_lo) * g as f64 / (grid as f64 - 1.0);
let in_set = set.intervals.iter().any(|itv| z >= itv.lo && z <= itv.hi);
assert_eq!(
in_set,
oracle(z),
"breakpoint scan disagrees with brute-force refit at z={z}"
);
}
let chol = m_base.cholesky(Side::Lower).expect("chol");
let beta_unaug = chol.solvevec(&x.t().dot(&y));
let mu_star = x_star.dot(&beta_unaug);
assert!(
set.intervals
.iter()
.any(|itv| mu_star >= itv.lo && mu_star <= itv.hi),
"point prediction should be inside its own conformal set"
);
assert!(set.boundary_margin >= 0.0);
}
#[test]
fn boundary_tie_has_zero_margin_and_refuses() {
let x = Array2::from_shape_vec((1, 1), vec![0.0]).expect("x");
let y = Array1::from_vec(vec![0.0]);
let weights = Array1::ones(1);
let s_lambda = Array2::from_shape_vec((1, 1), vec![1.0]).expect("s");
let x_star = Array1::from_vec(vec![1.0]);
let engine =
ExactGaussianFullConformal::new(&x, &y, &weights, &s_lambda, &x_star).expect("engine");
let set = engine.prediction_set(0.5);
assert_eq!(set.intervals.len(), 1);
assert_eq!(set.intervals[0].lo, 0.0);
assert_eq!(set.intervals[0].hi, 0.0);
assert_eq!(set.boundary_margin, 0.0);
assert!(matches!(
FrozenRhoCertificate::decide(0.0, set.boundary_margin),
FrozenRhoCertificate::Refused { .. }
));
}
#[test]
fn identically_tied_all_real_set_has_zero_margin_and_refuses() {
let x = Array2::from_shape_vec((1, 1), vec![1.0]).expect("x");
let y = Array1::from_vec(vec![0.0]);
let weights = Array1::ones(1);
let s_lambda = Array2::from_shape_vec((1, 1), vec![0.0]).expect("s");
let x_star = Array1::from_vec(vec![1.0]);
let engine =
ExactGaussianFullConformal::new(&x, &y, &weights, &s_lambda, &x_star).expect("engine");
let set = engine.prediction_set(0.5);
assert_eq!(set.intervals.len(), 1);
assert_eq!(set.intervals[0].lo, f64::NEG_INFINITY);
assert_eq!(set.intervals[0].hi, f64::INFINITY);
assert_eq!(set.boundary_margin, 0.0);
assert!(matches!(
FrozenRhoCertificate::decide(0.0, set.boundary_margin),
FrozenRhoCertificate::Refused { .. }
));
}
#[test]
fn strictly_separated_all_real_margin_can_accept() {
let engine = ExactGaussianFullConformal {
u: Array1::from_vec(vec![1.0, 1.0, 0.0]),
w: Array1::from_vec(vec![1.0, -1.0, 0.1]),
n: 2,
};
let set = engine.prediction_set(0.5);
assert_eq!(set.intervals.len(), 1);
assert_eq!(set.intervals[0].lo, f64::NEG_INFINITY);
assert_eq!(set.intervals[0].hi, f64::INFINITY);
assert!(set.boundary_margin > 0.5, "margin={}", set.boundary_margin);
assert!(matches!(
FrozenRhoCertificate::decide(0.5, set.boundary_margin),
FrozenRhoCertificate::Certified { .. }
));
}
fn bernoulli_intercept_scores(train: &[f64], z: f64, lambda: f64) -> Array1<f64> {
let n1 = train.len() + 1;
let sum_y: f64 = train.iter().sum::<f64>() + z;
let mut eta = 0.0_f64;
for _ in 0..200 {
let mu = 1.0 / (1.0 + (-eta).exp());
let g = sum_y - (n1 as f64) * mu - lambda * eta;
let h = -(n1 as f64) * mu * (1.0 - mu) - lambda;
let step = g / h;
eta -= step;
if step.abs() < 1e-14 {
break;
}
}
let mu = 1.0 / (1.0 + (-eta).exp());
let mut scores = Array1::<f64>::zeros(n1);
for (i, &yi) in train.iter().enumerate() {
scores[i] = (yi - mu).abs();
}
scores[n1 - 1] = (z - mu).abs();
scores
}
#[test]
fn bernoulli_full_conformal_exact_coverage_by_total_enumeration() {
let n = 7usize;
let lambda = 0.5_f64;
for &theta in &[0.2_f64, 0.5, 0.8] {
for &alpha in &[0.10_f64, 0.25] {
let mut coverage = 0.0_f64;
let mut any_strict_subset = false;
for mask in 0u32..(1u32 << n) {
let train: Vec<f64> = (0..n).map(|i| f64::from((mask >> i) & 1)).collect();
let p_train: f64 = train
.iter()
.map(|&y| if y > 0.5 { theta } else { 1.0 - theta })
.product();
let mut map = |z: f64| -> Result<Array1<f64>, String> {
Ok(bernoulli_intercept_scores(&train, z, lambda))
};
let set = bernoulli_full_conformal(&mut map, alpha).expect("bernoulli set");
assert!(set.lower_tail_unresolved.is_none());
assert!(set.upper_tail_unresolved.is_none());
let holds_zero = set.members.contains(&0.0);
let holds_one = set.members.contains(&1.0);
if !(holds_zero && holds_one) {
any_strict_subset = true;
}
coverage += p_train
* ((1.0 - theta) * f64::from(u8::from(holds_zero))
+ theta * f64::from(u8::from(holds_one)));
}
assert!(
coverage >= 1.0 - alpha - 1e-12,
"exact full-conformal coverage must be ≥ 1−α for every θ: \
θ={theta} α={alpha} coverage={coverage}"
);
if alpha == 0.25 {
assert!(
any_strict_subset,
"θ={theta} α={alpha}: the set must be informative (a strict \
subset of the support on at least one dataset), otherwise \
the coverage bound is satisfied vacuously"
);
}
}
}
let train = vec![0.0; n];
let mut map = |z: f64| -> Result<Array1<f64>, String> {
Ok(bernoulli_intercept_scores(&train, z, lambda))
};
let set = bernoulli_full_conformal(&mut map, 0.25).expect("set");
assert_eq!(
set.members,
vec![0.0],
"all-zeros training data at α=0.25 must yield the set {{0}}"
);
}
#[test]
fn windowed_discrete_tail_flags_are_honest() {
let train = [3.0_f64, 4.0, 5.0, 4.0, 3.0, 5.0, 4.0];
let mut map = |z: f64| -> Result<Array1<f64>, String> {
let n1 = train.len() + 1;
let mean = (train.iter().sum::<f64>() + z) / n1 as f64;
let mut s = Array1::<f64>::zeros(n1);
for (i, &yi) in train.iter().enumerate() {
s[i] = (yi - mean).abs();
}
s[n1 - 1] = (z - mean).abs();
Ok(s)
};
let alpha = 0.2;
let wide: Vec<f64> = (0..=12).map(|k| k as f64).collect();
let set = discrete_full_conformal_window(&mut map, &wide, alpha).expect("wide");
assert!(!set.members.is_empty(), "wide window must retain the bulk");
assert!(set.lower_tail_unresolved.is_none());
assert!(set.upper_tail_unresolved.is_none());
let lo_member = *set.members.first().expect("non-empty");
let hi_member = *set.members.last().expect("non-empty");
let cut: Vec<f64> = (0..=(hi_member as i64 - 1)).map(|k| k as f64).collect();
let cut_set = discrete_full_conformal_window(&mut map, &cut, alpha).expect("cut");
assert_eq!(
cut_set.upper_tail_unresolved,
Some(cut[cut.len() - 1]),
"a window whose top edge is retained must report the upper tail unresolved"
);
assert!(
lo_member > 0.0 || cut_set.lower_tail_unresolved.is_some(),
"lower flag must mirror the same contract"
);
let exhaustive =
discrete_full_conformal_exhaustive(&mut map, &wide, alpha).expect("exhaustive");
assert!(exhaustive.lower_tail_unresolved.is_none());
assert!(exhaustive.upper_tail_unresolved.is_none());
assert!(discrete_full_conformal_window(&mut map, &[2.0, 1.0], alpha).is_err());
let mut bad_map = {
let mut flip = false;
move |z: f64| -> Result<Array1<f64>, String> {
if !z.is_finite() {
return Err("bad-map fixture received non-finite candidate".to_string());
}
flip = !flip;
Ok(Array1::<f64>::zeros(if flip { 5 } else { 4 }))
}
};
assert!(discrete_full_conformal_window(&mut bad_map, &[0.0, 1.0], alpha).is_err());
}
fn gauss_reml_fixture(n: usize, p: usize) -> (Array2<f64>, Array1<f64>, Array2<f64>) {
use std::f64::consts::PI;
let mut x = Array2::<f64>::zeros((n, p));
let mut y = Array1::<f64>::zeros(n);
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
for j in 0..p {
x[[i, j]] = (j as f64 * PI * t).cos();
}
y[i] = (2.0 * PI * t).sin() + 0.05 * (13.0 * i as f64 + 0.7).sin();
}
let mut s = Array2::<f64>::zeros((p, p));
for j in 0..p {
s[[j, j]] = if j < 2 { 0.0 } else { (j as f64).powi(4) };
}
(x, y, s)
}
fn cosine_row(p: usize, t: f64) -> Array1<f64> {
use std::f64::consts::PI;
let mut r = Array1::<f64>::zeros(p);
for j in 0..p {
r[j] = (j as f64 * PI * t).cos();
}
r
}
#[test]
fn gaussian_reml_rho_derivatives_match_finite_difference() {
let (x, y, s) = gauss_reml_fixture(40, 8);
let x_star = cosine_row(8, 0.5);
let resp = GaussianRemlRhoResponse::new(&x, &y, &s, &x_star).expect("response");
assert_eq!(
resp.rank_s(),
6,
"quartic penalty with two zeros has rank p−2"
);
let rho = 0.4_f64;
let z = 0.3_f64;
let ev = resp.eval(rho, Some(z)).expect("eval");
let v = |r: f64, zz: f64| resp.reml_criterion(r, Some(zz)).expect("v");
let h = 1e-4_f64;
let g_fd = (v(rho + h, z) - v(rho - h, z)) / (2.0 * h);
assert!(
(ev.grad - g_fd).abs() <= 1e-4 * (1.0 + ev.grad.abs()),
"G mismatch: analytic={} fd={}",
ev.grad,
g_fd
);
let hess_fd = (v(rho + h, z) - 2.0 * v(rho, z) + v(rho - h, z)) / (h * h);
assert!(
(ev.hess - hess_fd).abs() <= 1e-3 * (1.0 + ev.hess.abs()),
"∂²Ṽ/∂ρ² mismatch: analytic={} fd={}",
ev.hess,
hess_fd
);
let k = 1e-4_f64;
let cross_fd = (v(rho + h, z + k) - v(rho + h, z - k) - v(rho - h, z + k)
+ v(rho - h, z - k))
/ (4.0 * h * k);
assert!(
(ev.cross - cross_fd).abs() <= 1e-3 * (1.0 + ev.cross.abs()),
"∂²Ṽ/∂ρ∂z mismatch: analytic={} fd={}",
ev.cross,
cross_fd
);
let ev0 = resp.eval(rho, None).expect("eval0");
assert_eq!(ev0.cross, 0.0);
let v0 = |r: f64| resp.reml_criterion(r, None).expect("v0");
let g0_fd = (v0(rho + h) - v0(rho - h)) / (2.0 * h);
assert!((ev0.grad - g0_fd).abs() <= 1e-4 * (1.0 + ev0.grad.abs()));
}
#[test]
fn gaussian_reml_smoothing_response_matches_reselection() {
let (x, y, s) = gauss_reml_fixture(45, 8);
let x_star = cosine_row(8, 0.42);
let resp = GaussianRemlRhoResponse::new(&x, &y, &s, &x_star).expect("response");
for &z in &[0.15_f64, 0.4, 0.75] {
let rho_z = resp.select_rho(Some(z)).expect("select");
let g = resp.eval(rho_z, Some(z)).expect("eval").grad;
assert!(g.abs() < 1e-6, "select_rho not stationary: G={g} at z={z}");
let analytic = resp.drho_dz(rho_z, z).expect("drho");
let hh = 2e-3_f64;
let fd = (resp.select_rho(Some(z + hh)).expect("u")
- resp.select_rho(Some(z - hh)).expect("d"))
/ (2.0 * hh);
assert!(
(analytic - fd).abs() <= 1e-3 + 5e-2 * analytic.abs(),
"dρ̂/dz IFT vs re-selection FD mismatch at z={z}: analytic={analytic} fd={fd}"
);
}
}
#[test]
fn frozen_rho_grid_check_is_conditional_when_it_accepts() {
let mut soundness_checks = 0usize;
for &(n, p) in &[(45usize, 8usize), (90, 6)] {
let (x, y, s) = gauss_reml_fixture(n, p);
for &t_star in &[0.4_f64, 0.5, 0.6] {
let x_star = cosine_row(p, t_star);
let resp = GaussianRemlRhoResponse::new(&x, &y, &s, &x_star).expect("response");
for &alpha in &[0.15_f64, 0.25] {
let cert = resp.certified_full_conformal(alpha).expect("cert");
if !matches!(cert.certificate, FrozenRhoCertificate::Certified { .. }) {
continue;
}
assert_eq!(cert.rho_probe_count, 65);
assert!(cert.observed_sup_drho_dz >= 0.0);
if soundness_checks >= 4 {
continue;
}
let frozen = &cert.frozen_set;
if frozen.intervals.is_empty() {
continue;
}
soundness_checks += 1;
let lo = frozen.intervals.first().unwrap().lo;
let hi = frozen.intervals.last().unwrap().hi;
let span = (hi - lo).max(1.0);
let z_lo = if lo.is_finite() {
lo - 0.5 * span
} else {
-12.0
};
let z_hi = if hi.is_finite() {
hi + 0.5 * span
} else {
12.0
};
let grid = 200usize;
for g in 0..=grid {
let z = z_lo + (z_hi - z_lo) * (g as f64) / (grid as f64);
let in_frozen = frozen
.intervals
.iter()
.any(|itv| z >= itv.lo && z <= itv.hi);
let honest = resp.honest_membership(z, alpha).expect("honest");
assert_eq!(
in_frozen, honest,
"conditionally accepted set disagrees with the honest ρ-re-selecting set \
at z={z} (n={n}, t*={t_star}, α={alpha}, excursion={}, lip={})",
cert.rho_excursion, cert.score_rho_lipschitz
);
}
}
}
}
}
#[test]
fn frozen_rho_certificate_refuses_under_large_smoothing_response() {
let (x, y, s) = gauss_reml_fixture(12, 6);
let x_star = cosine_row(6, 1.9); let resp = GaussianRemlRhoResponse::new(&x, &y, &s, &x_star).expect("response");
let cert = resp.certified_full_conformal(0.2).expect("cert");
assert!(
matches!(cert.certificate, FrozenRhoCertificate::Refused { .. }),
"high-leverage small-n problem should refuse; got {:?} (excursion={}, margin via set)",
cert.certificate,
cert.rho_excursion
);
assert!(cert.rho_excursion >= 0.0);
}
fn oracle_glm_refit(
x: &Array2<f64>,
y: &Array1<f64>,
s: &Array2<f64>,
x_star: &Array1<f64>,
z: f64,
mean: &dyn Fn(f64) -> f64,
weight: &dyn Fn(f64) -> f64,
nll_term: &dyn Fn(f64, f64) -> f64,
) -> Array1<f64> {
let n = x.nrows();
let p = x.ncols();
let pen_nll = |b: &Array1<f64>| -> f64 {
let mut acc = 0.0;
for i in 0..n {
acc += nll_term(x.row(i).dot(b), y[i]);
}
acc += nll_term(x_star.dot(b), z);
acc + 0.5 * b.dot(&s.dot(b))
};
let mut beta = Array1::<f64>::zeros(p);
let mut cur = pen_nll(&beta);
for _ in 0..400 {
let mut g = s.dot(&beta);
let mut h = s.clone();
for i in 0..n {
let eta = x.row(i).dot(&beta);
let r = mean(eta) - y[i];
let w = weight(eta);
for a in 0..p {
g[a] += x[[i, a]] * r;
for b in 0..p {
h[[a, b]] += w * x[[i, a]] * x[[i, b]];
}
}
}
let eta_s = x_star.dot(&beta);
let r_s = mean(eta_s) - z;
let w_s = weight(eta_s);
for a in 0..p {
g[a] += x_star[a] * r_s;
for b in 0..p {
h[[a, b]] += w_s * x_star[a] * x_star[b];
}
}
let chol = h.cholesky(Side::Lower).expect("oracle chol");
let step = chol.solvevec(&g);
if vec_norm(&step) <= 1e-13 * (1.0 + vec_norm(&beta)) {
break;
}
let mut t = 1.0_f64;
loop {
let mut cand = beta.clone();
cand.scaled_add(-t, &step);
let cand_nll = pen_nll(&cand);
if cand_nll.is_finite() && cand_nll <= cur {
beta = cand;
cur = cand_nll;
break;
}
t *= 0.5;
assert!(t > 1e-18, "oracle line search failed at z={z}");
}
}
beta
}
fn oracle_glm_membership(
x: &Array2<f64>,
y: &Array1<f64>,
x_star: &Array1<f64>,
z: f64,
alpha: f64,
beta: &Array1<f64>,
mean: &dyn Fn(f64) -> f64,
) -> bool {
let n = x.nrows();
let e_star = (z - mean(x_star.dot(beta))).abs();
let count = (0..n)
.filter(|&i| (y[i] - mean(x.row(i).dot(beta))).abs() >= e_star)
.count();
(1.0 + count as f64) > alpha * (n as f64 + 1.0)
}
#[test]
fn glm_homotopy_tracks_exact_refit_path_within_certified_bound() {
use std::f64::consts::PI;
let n = 16usize;
let p = 3usize;
let mut x = Array2::<f64>::zeros((n, p));
let mut y = Array1::<f64>::zeros(n);
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
for j in 0..p {
x[[i, j]] = (j as f64 * PI * t).cos();
}
y[i] = (1.0 + (2.0 * PI * t).sin()).exp().round();
}
let mut s = Array2::<f64>::eye(p);
s *= 1.5;
let weights = Array1::<f64>::ones(n);
let x_star = cosine_row(p, 0.37);
let alpha = 0.2;
let eng = GlmHomotopyFullConformal::new(
CanonicalGlmFamily::PoissonLog,
&x,
&y,
&weights,
&s,
&x_star,
)
.expect("poisson engine");
let candidates: Vec<f64> = (0..=6).map(|k| k as f64).collect();
let set = eng.prediction_set(&candidates, alpha).expect("poisson set");
assert_eq!(set.candidates.len(), candidates.len());
assert_eq!(set.n_augmented, n + 1);
assert!(
set.candidates.iter().skip(1).any(|c| !c.cold_refit),
"the homotopy never tracked a single transition on a benign Poisson fixture \
— the certified predictor–corrector path is vacuous"
);
let mean_p = |eta: f64| eta.exp();
let weight_p = |eta: f64| eta.exp();
let nll_p = |eta: f64, yv: f64| eta.exp() - yv * eta;
for c in &set.candidates {
let beta_ref = oracle_glm_refit(&x, &y, &s, &x_star, c.z, &mean_p, &weight_p, &nll_p);
let mut diff = c.beta.clone();
diff.scaled_add(-1.0, &beta_ref);
let err = vec_norm(&diff);
assert!(
c.beta_error_bound.is_finite(),
"certified bound must be finite on a benign fixture (z={})",
c.z
);
assert!(
err <= c.beta_error_bound + 1e-7,
"tracked β̂({}) is {err} from the oracle refit, exceeding the certified \
corrector bound {} (+ oracle tolerance)",
c.z,
c.beta_error_bound
);
assert!(
c.beta_error_bound < 1e-6,
"certified bound {} at z={} is uselessly loose on a benign fixture",
c.beta_error_bound,
c.z
);
let member_ref = oracle_glm_membership(&x, &y, &x_star, c.z, alpha, &beta_ref, &mean_p);
assert_eq!(
c.member, member_ref,
"homotopy membership disagrees with the oracle refit at z={}",
c.z
);
}
assert_eq!(
set.members.len(),
set.candidates.iter().filter(|c| c.member).count()
);
let mut yb = Array1::<f64>::zeros(n);
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
yb[i] = f64::from(u8::from((2.0 * PI * t).sin() > -0.2));
}
let engb = GlmHomotopyFullConformal::new(
CanonicalGlmFamily::BernoulliLogit,
&x,
&yb,
&weights,
&s,
&x_star,
)
.expect("bernoulli engine");
let setb = engb
.prediction_set(&[0.0, 1.0], alpha)
.expect("bernoulli set");
let mean_b = |eta: f64| 1.0 / (1.0 + (-eta).exp());
let weight_b = |eta: f64| {
let mu = 1.0 / (1.0 + (-eta).exp());
mu * (1.0 - mu)
};
let nll_b = |eta: f64, yv: f64| eta.max(0.0) + (-eta.abs()).exp().ln_1p() - yv * eta;
assert!(
!setb.candidates[1].cold_refit,
"the logistic third derivative is globally ≤ 1/(6√3); tracking 0→1 must certify"
);
for c in &setb.candidates {
let beta_ref = oracle_glm_refit(&x, &yb, &s, &x_star, c.z, &mean_b, &weight_b, &nll_b);
let mut diff = c.beta.clone();
diff.scaled_add(-1.0, &beta_ref);
assert!(
vec_norm(&diff) <= c.beta_error_bound + 1e-7,
"Bernoulli tracked path off the refit at z={} beyond the certified bound",
c.z
);
let member_ref =
oracle_glm_membership(&x, &yb, &x_star, c.z, alpha, &beta_ref, &mean_b);
assert_eq!(c.member, member_ref);
}
}
#[test]
fn glm_homotopy_certificate_refuses_and_falls_back_on_third_order_explosion() {
use std::f64::consts::PI;
let n = 16usize;
let p = 3usize;
let mut x = Array2::<f64>::zeros((n, p));
let mut y = Array1::<f64>::zeros(n);
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
for j in 0..p {
x[[i, j]] = (j as f64 * PI * t).cos();
}
y[i] = (1.0 + 2.0 * t).round();
}
let mut s = Array2::<f64>::eye(p);
s *= 0.5;
let weights = Array1::<f64>::ones(n);
let mut x_star = cosine_row(p, 0.31);
x_star.mapv_inplace(|v| 6.0 * v);
let eng = GlmHomotopyFullConformal::new(
CanonicalGlmFamily::PoissonLog,
&x,
&y,
&weights,
&s,
&x_star,
)
.expect("engine");
let alpha = 0.2;
let set = eng
.prediction_set(&[1.0, 2000.0], alpha)
.expect("set under extreme jump");
assert!(
set.refit_fallbacks >= 1,
"a 1 → 2000 Poisson candidate jump at ‖x_*‖ = {} must exhaust the certified \
step budget (b‴ = eʸ explodes along the path) and fall back to a cold refit; \
got {} fallbacks",
x_star.dot(&x_star).sqrt(),
set.refit_fallbacks
);
assert!(
set.candidates[1].cold_refit,
"the candidate decided through the fallback must be marked cold"
);
let mean_p = |eta: f64| eta.exp();
let weight_p = |eta: f64| eta.exp();
let nll_p = |eta: f64, yv: f64| eta.exp() - yv * eta;
for c in &set.candidates {
let beta_ref = oracle_glm_refit(&x, &y, &s, &x_star, c.z, &mean_p, &weight_p, &nll_p);
let mut diff = c.beta.clone();
diff.scaled_add(-1.0, &beta_ref);
assert!(
vec_norm(&diff) <= c.beta_error_bound + 1e-6,
"fallback coefficients at z={} drifted {} from the oracle refit (bound {})",
c.z,
vec_norm(&diff),
c.beta_error_bound
);
let member_ref = oracle_glm_membership(&x, &y, &x_star, c.z, alpha, &beta_ref, &mean_p);
assert_eq!(
c.member, member_ref,
"fallback membership at z={} disagrees with the oracle refit",
c.z
);
}
}
#[test]
fn jackknife_plus_matches_brute_force_loo_refits() {
use std::f64::consts::PI;
let n = 20usize;
let p = 4usize;
let mut x = Array2::<f64>::zeros((n, p));
let mut y = Array1::<f64>::zeros(n);
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
for j in 0..p {
x[[i, j]] = (j as f64 * PI * t).cos();
}
y[i] = (2.0 * PI * t).sin() + 0.15 * (11.0 * i as f64 + 0.3).sin();
}
let mut s = Array2::<f64>::eye(p);
s *= 0.7;
let weights = Array1::<f64>::ones(n);
let x_star = cosine_row(p, 0.43);
let alpha = 0.2;
let jk = gaussian_jackknife_plus(&x, &y, &weights, &s, &x_star, alpha).expect("jk+");
let mut lower_vals: Vec<f64> = Vec::with_capacity(n);
let mut upper_vals: Vec<f64> = Vec::with_capacity(n);
for i in 0..n {
let mut m = s.clone();
let mut rhs = Array1::<f64>::zeros(p);
for r in 0..n {
if r == i {
continue;
}
for a in 0..p {
rhs[a] += x[[r, a]] * y[r];
for b in 0..p {
m[[a, b]] += x[[r, a]] * x[[r, b]];
}
}
}
let chol = m.cholesky(Side::Lower).expect("loo chol");
let beta = chol.solvevec(&rhs);
let mu_star = x_star.dot(&beta);
let resid = (y[i] - x.row(i).dot(&beta)).abs();
lower_vals.push(mu_star - resid);
upper_vals.push(mu_star + resid);
}
lower_vals.sort_by(|a, b| a.partial_cmp(b).expect("finite"));
upper_vals.sort_by(|a, b| a.partial_cmp(b).expect("finite"));
let rank_hi = ((n as f64 + 1.0) * (1.0 - alpha)).ceil() as usize;
let rank_lo = ((n as f64 + 1.0) * alpha).floor() as usize;
assert!(
rank_lo >= 1 && rank_hi <= n,
"fixture sized to certify finite endpoints"
);
let lo_bf = lower_vals[rank_lo - 1];
let hi_bf = upper_vals[rank_hi - 1];
assert!(
(jk.lo - lo_bf).abs() <= 1e-8 * (1.0 + lo_bf.abs()),
"jackknife+ lower endpoint {} disagrees with brute-force LOO refits {}",
jk.lo,
lo_bf
);
assert!(
(jk.hi - hi_bf).abs() <= 1e-8 * (1.0 + hi_bf.abs()),
"jackknife+ upper endpoint {} disagrees with brute-force LOO refits {}",
jk.hi,
hi_bf
);
assert!(jk.certifies_finite());
assert!(jk.lo < jk.hi);
assert_eq!(jk.n, n);
let tight = gaussian_jackknife_plus(&x, &y, &weights, &s, &x_star, 0.04).expect("tight");
assert!(tight.hi.is_infinite() && tight.hi > 0.0);
assert!(tight.lo.is_infinite() && tight.lo < 0.0);
assert!(!tight.certifies_finite());
let one_pred = Array1::<f64>::from(vec![1.0]);
let one_res = Array1::<f64>::from(vec![0.5]);
let tiny = jackknife_plus_interval(&one_pred, &one_res, 0.2).expect("tiny");
assert!(tiny.hi.is_infinite() && tiny.lo.is_infinite());
}
#[test]
fn jackknife_plus_stats_replay_matches_single_shot() {
use std::f64::consts::PI;
let n = 18usize;
let p = 4usize;
let mut x = Array2::<f64>::zeros((n, p));
let mut y = Array1::<f64>::zeros(n);
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
for j in 0..p {
x[[i, j]] = (j as f64 * PI * t).cos();
}
y[i] = (2.0 * PI * t).sin() + 0.2 * (7.0 * i as f64 + 0.9).sin();
}
let mut s = Array2::<f64>::eye(p);
s *= 0.55;
let weights = Array1::<f64>::ones(n);
let alpha = 0.1;
let stats = GaussianJackknifePlusStats::new(&x, &y, &weights, &s).expect("stats");
assert_eq!(stats.n(), n);
assert_eq!(stats.p(), p);
for k in 0..7 {
let x_star = cosine_row(p, 0.13 + 0.11 * k as f64);
let single =
gaussian_jackknife_plus(&x, &y, &weights, &s, &x_star, alpha).expect("single");
let replay = stats.interval(&x_star, alpha).expect("replay");
assert!(
(single.lo - replay.lo).abs() <= 1e-12 * (1.0 + single.lo.abs()),
"stats replay lower {} != single-shot {} at test point {k}",
replay.lo,
single.lo
);
assert!(
(single.hi - replay.hi).abs() <= 1e-12 * (1.0 + single.hi.abs()),
"stats replay upper {} != single-shot {} at test point {k}",
replay.hi,
single.hi
);
assert_eq!(single.n, replay.n);
}
let mut bad_w = Array1::<f64>::ones(n);
bad_w[3] = 2.0;
assert!(GaussianJackknifePlusStats::new(&x, &y, &bad_w, &s).is_err());
}
#[test]
fn jackknife_plus_empirical_coverage_smoke() {
use std::f64::consts::PI;
let n = 40usize;
let p = 4usize;
let alpha = 0.1; let s = {
let mut s = Array2::<f64>::eye(p);
s *= 0.4;
s
};
let weights = Array1::<f64>::ones(n);
let mut state: u64 = 0x9e37_79b9_7f4a_7c15;
let unif = |state: &mut u64| {
*state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
((*state >> 11) as f64) / ((1u64 << 53) as f64)
};
let normal = |state: &mut u64| {
let u1 = unif(state).max(1e-12);
let u2 = unif(state);
(-2.0 * u1.ln()).sqrt() * (2.0 * PI * u2).cos()
};
let beta_true = [0.8_f64, -0.5, 0.3, 0.15];
let sigma = 0.5_f64;
let design_row = |z: f64| {
let mut r = Array1::<f64>::zeros(p);
for j in 0..p {
r[j] = (j as f64 * PI * z).cos();
}
r
};
let trials = 200usize;
let mut covered = 0usize;
for _ in 0..trials {
let mut x = Array2::<f64>::zeros((n, p));
let mut yv = Array1::<f64>::zeros(n);
for i in 0..n {
let z = unif(&mut state);
let row = design_row(z);
let mut eta = 0.0;
for j in 0..p {
x[[i, j]] = row[j];
eta += beta_true[j] * row[j];
}
yv[i] = eta + sigma * normal(&mut state);
}
let stats = match GaussianJackknifePlusStats::new(&x, &yv, &weights, &s) {
Ok(s) => s,
Err(_) => continue,
};
let z_star = unif(&mut state);
let x_star = design_row(z_star);
let mut eta_star = 0.0;
for j in 0..p {
eta_star += beta_true[j] * x_star[j];
}
let y_star = eta_star + sigma * normal(&mut state);
let itv = stats.interval(&x_star, alpha).expect("coverage interval");
assert!(
itv.certifies_finite(),
"coverage trial produced infinite width"
);
if y_star >= itv.lo && y_star <= itv.hi {
covered += 1;
}
}
let rate = covered as f64 / trials as f64;
assert!(
rate >= 0.74,
"jackknife+ empirical coverage {rate} fell below the 1−2α target with slack"
);
}
}