use super::*;
use gam_problem::{InverseLink, LikelihoodSpec, ResponseFamily, StandardLink};
use gam_terms::smooth::BlockwisePenalty;
use ndarray::{Array1, Array2};
use rand::rngs::StdRng;
use rand::{RngExt, SeedableRng};
const N: usize = 300;
const P: usize = 160;
const NOISE_SD: f64 = 0.02;
const Z95: f64 = 1.959964;
fn erf_approx(x: f64) -> f64 {
let sign = if x < 0.0 { -1.0 } else { 1.0 };
let x = x.abs();
let t = 1.0 / (1.0 + 0.3275911 * x);
let y = 1.0
- (((((1.061405429 * t - 1.453152027) * t) + 1.421413741) * t - 0.284496736) * t
+ 0.254829592)
* t
* (-x * x).exp();
sign * y
}
fn normal_cdf(z: f64) -> f64 {
0.5 * (1.0 + erf_approx(z / std::f64::consts::SQRT_2))
}
fn ks_vs_uniform(mut u: Vec<f64>) -> f64 {
u.sort_by(|a, b| a.partial_cmp(b).unwrap());
let n = u.len() as f64;
let mut d = 0.0f64;
for (i, &ui) in u.iter().enumerate() {
let lo = i as f64 / n;
let hi = (i as f64 + 1.0) / n;
d = d.max((ui - lo).abs()).max((hi - ui).abs());
}
d
}
fn box_muller(rng: &mut StdRng) -> f64 {
let u1: f64 = rng.random::<f64>().max(1e-12);
let u2: f64 = rng.random::<f64>();
(-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos()
}
fn quad_form(cov: &Array2<f64>, x: &Array2<f64>, row: usize, p: usize) -> f64 {
let mut acc = 0.0;
for a in 0..p {
let xa = x[[row, a]];
if xa == 0.0 {
continue;
}
let mut inner = 0.0;
for b in 0..p {
inner += cov[[a, b]] * x[[row, b]];
}
acc += xa * inner;
}
acc
}
#[test]
fn gaussian_observation_interval_calibrated_high_edf_1765() {
let mut rng = StdRng::seed_from_u64(13);
let mut x = Array2::<f64>::zeros((N, P));
let mut mean_true = Array1::<f64>::zeros(N);
for i in 0..N {
let t = (i as f64) / ((N - 1) as f64);
x[[i, 0]] = 1.0;
for j in 1..P {
let freq = ((j + 1) / 2) as f64;
let arg = std::f64::consts::PI * freq * t;
x[[i, j]] = if j % 2 == 1 { arg.sin() } else { arg.cos() };
}
let mut m = 0.0;
for k in 1..=8 {
m += (1.0 / k as f64) * (std::f64::consts::PI * k as f64 * t).sin();
}
mean_true[i] = m;
}
let mut y = Array1::<f64>::zeros(N);
for i in 0..N {
y[i] = mean_true[i] + box_muller(&mut rng) * NOISE_SD;
}
let mut s = Array2::<f64>::zeros((P, P));
for j in 0..P {
s[[j, j]] = 1.0;
}
let weights = Array1::<f64>::ones(N);
let offset = Array1::<f64>::zeros(N);
let penalty = BlockwisePenalty::new(0..P, s.clone());
let opts = FitOptions {
compute_inference: true,
max_iter: 200,
tol: 1e-11,
nullspace_dims: vec![0],
..FitOptions::default()
};
let fit = fit_gam(
x.clone(),
y.view(),
weights.view(),
offset.view(),
&[penalty],
LikelihoodSpec::new(
ResponseFamily::Gaussian,
InverseLink::Standard(StandardLink::Identity),
),
&opts,
)
.expect("Gaussian high-EDF fit");
let edf = fit.edf_total().expect("edf_total with inference");
let sigma = fit.standard_deviation;
let fitted: Array1<f64> = x.dot(&fit.beta);
let rss: f64 = y
.iter()
.zip(fitted.iter())
.map(|(&yi, &fi)| (yi - fi).powi(2))
.sum();
let nf = N as f64;
let edf_ratio = edf / nf;
assert!(
edf_ratio > 0.15,
"fixture must reach the high-EDF regime for #1765, got edf/n={edf_ratio:.3} (edf={edf:.1})"
);
let sig2_edf = rss / (nf - edf);
let sig2_mle = rss / nf;
let rel_scale = (sigma * sigma - sig2_edf).abs() / sig2_edf.max(1e-18);
assert!(
rel_scale < 5e-3,
"σ̂²={:.10} must equal RSS/(n-edf)={sig2_edf:.10} (rel={rel_scale:.4}), not the MLE \
RSS/n={sig2_mle:.10} (#1765 scale)",
sigma * sigma
);
assert!(
(sig2_edf - sig2_mle) / sig2_edf > 0.10,
"residual-df vs MLE scale gap too small to test (edf/n={edf_ratio:.3})"
);
let vp = fit
.beta_covariance_corrected()
.expect("smoothing-corrected covariance Vp");
let mut hits_full = 0usize;
let mut hits_noise_only = 0usize;
let mut hits_mle = 0usize;
let mut mean_etavar = 0.0;
let mut pit_full = Vec::with_capacity(N);
for i in 0..N {
let y_new = mean_true[i] + box_muller(&mut rng) * NOISE_SD;
let etavar = quad_form(vp, &x, i, P);
mean_etavar += etavar;
let mu = fitted[i];
let sd_full = (etavar + sigma * sigma).max(0.0).sqrt();
let sd_noise_only = sigma;
let sd_mle = (etavar * (sig2_mle / (sigma * sigma)) + sig2_mle).max(0.0).sqrt();
let dev = (y_new - mu).abs();
if dev <= Z95 * sd_full {
hits_full += 1;
}
if dev <= Z95 * sd_noise_only {
hits_noise_only += 1;
}
if dev <= Z95 * sd_mle {
hits_mle += 1;
}
pit_full.push(normal_cdf((y_new - mu) / sd_full));
}
mean_etavar /= nf;
let cov_full = hits_full as f64 / nf;
let cov_noise_only = hits_noise_only as f64 / nf;
let cov_mle = hits_mle as f64 / nf;
let ks_full = ks_vs_uniform(pit_full);
assert!(
mean_etavar > 0.15 * sigma * sigma,
"mean-posterior variance {mean_etavar:.8} should be a real share of σ²={:.8} at high EDF",
sigma * sigma
);
assert!(
(0.925..=0.975).contains(&cov_full),
"full observation-band coverage {cov_full:.3} off nominal 0.95 (edf/n={edf_ratio:.3}, \
σ̂={sigma:.6}); the high-EDF Gaussian observation interval is mis-calibrated (#1765)"
);
assert!(
ks_full < 0.07,
"full-band PIT KS {ks_full:.4} too large (issue reported ~0.18); the predictive scale \
at high EDF is mis-estimated (#1765)"
);
assert!(
cov_noise_only < cov_full - 0.02,
"dropping Var(μ̂) should undercover at high EDF: noise-only coverage {cov_noise_only:.3} \
vs full {cov_full:.3} (#1765)"
);
assert!(
cov_mle < cov_full - 0.02,
"the MLE-collapsed scale should undercover: MLE coverage {cov_mle:.3} vs full \
{cov_full:.3} (#1765)"
);
}