use ndarray::{Array1, Array2, ArrayView1};
pub const PSI_GRAM_CERT_RTOL: f64 = 1.0e-9;
pub const PSI_GRAM_SPOT_RTOL: f64 = 1.0e-10;
pub const PSI_GRAM_NODE_LADDER: [usize; 1] = [513];
pub const PSI_GRAM_SPOT_POINTS: usize = 3;
pub const PSI_GRAM_SKIP_RANK_RTOL: f64 = 1.0e-10;
pub const PSI_GRAM_SKIP_PROJ_ATOL: f64 = 1.0e-7;
pub struct PsiGramTensor {
psi_lo: f64,
psi_hi: f64,
grad_psi_lo: f64,
grad_psi_hi: f64,
n_coeff: usize,
k: usize,
gram: Vec<Array2<f64>>,
rhs: Vec<Array1<f64>>,
zt_w_z: f64,
}
enum BuildOutcome {
EvalFailed(String),
TailNotCertified,
Candidate(PsiGramTensor),
}
fn cheb_t(x: f64, n: usize) -> Vec<f64> {
let mut t = vec![0.0; n];
if n > 0 {
t[0] = 1.0;
}
if n > 1 {
t[1] = x;
}
for d in 2..n {
t[d] = 2.0 * x * t[d - 1] - t[d - 2];
}
t
}
fn cheb_t_prime(x: f64, n: usize) -> Vec<f64> {
let mut u = vec![0.0; n.max(1)];
if !u.is_empty() {
u[0] = 1.0;
}
if n > 1 {
u[1] = 2.0 * x;
}
for d in 2..n {
u[d] = 2.0 * x * u[d - 1] - u[d - 2];
}
let mut tp = vec![0.0; n];
for d in 1..n {
tp[d] = d as f64 * u[d - 1];
}
tp
}
fn cheb_t_double_prime(x: f64, n: usize) -> Vec<f64> {
let mut t = vec![0.0; n];
let mut tp = vec![0.0; n];
let mut tpp = vec![0.0; n];
if n > 0 {
t[0] = 1.0; }
if n > 1 {
t[1] = x; tp[1] = 1.0;
}
for d in 2..n {
t[d] = 2.0 * x * t[d - 1] - t[d - 2];
tp[d] = 2.0 * t[d - 1] + 2.0 * x * tp[d - 1] - tp[d - 2];
tpp[d] = 4.0 * tp[d - 1] + 2.0 * x * tpp[d - 1] - tpp[d - 2];
}
tpp
}
fn kahan_scaled_add_array2(
out: &mut Array2<f64>,
comp: &mut Array2<f64>,
scale: f64,
x: &Array2<f64>,
) {
for ((slot, c), &value) in out.iter_mut().zip(comp.iter_mut()).zip(x.iter()) {
let y = scale * value - *c;
let t = *slot + y;
*c = (t - *slot) - y;
*slot = t;
}
}
fn kahan_scaled_add_array1(
out: &mut Array1<f64>,
comp: &mut Array1<f64>,
scale: f64,
x: &Array1<f64>,
) {
for ((slot, c), &value) in out.iter_mut().zip(comp.iter_mut()).zip(x.iter()) {
let y = scale * value - *c;
let t = *slot + y;
*c = (t - *slot) - y;
*slot = t;
}
}
fn subspace_spectral_distance(m: &Array2<f64>) -> Option<f64> {
use crate::faer_ndarray::FaerEigh;
if m.iter().any(|v| !v.is_finite()) {
return None;
}
let msym = 0.5 * (m + &m.t());
let (evals, _evecs) = msym.eigh(faer::Side::Lower).ok()?;
Some(evals.iter().fold(0.0_f64, |acc, &v| acc.max(v.abs())))
}
impl PsiGramTensor {
pub fn build(
mut eval_design: impl FnMut(f64) -> Result<Array2<f64>, String>,
weights: ArrayView1<'_, f64>,
z: ArrayView1<'_, f64>,
psi_lo: f64,
psi_hi: f64,
) -> Result<Self, String> {
if !(psi_lo.is_finite() && psi_hi.is_finite()) || psi_hi <= psi_lo {
return Err(format!(
"ψ window must be finite with psi_hi > psi_lo (got [{psi_lo}, {psi_hi}])"
));
}
let mut last_uncertified: Option<usize> = None;
for &m in PSI_GRAM_NODE_LADDER.iter() {
match Self::build_at(&mut eval_design, weights, z, psi_lo, psi_hi, m) {
BuildOutcome::EvalFailed(why) => {
return Err(format!(
"exact design evaluation failed at ladder rung m={m}: {why}"
));
}
BuildOutcome::TailNotCertified => {
last_uncertified = Some(m);
continue;
}
BuildOutcome::Candidate(mut candidate) => {
if candidate.spot_check(&mut eval_design, weights) {
candidate.grad_psi_lo = psi_lo;
candidate.grad_psi_hi = psi_hi;
return Ok(candidate);
}
last_uncertified = Some(m);
}
}
}
let top_rung = PSI_GRAM_NODE_LADDER.last().copied().unwrap_or(0);
Err(match last_uncertified {
Some(m) => format!(
"Chebyshev series did not certify within the node ladder (reached rung \
m={m}, top rung {top_rung}): the design is not analytic enough in ψ over \
[{psi_lo}, {psi_hi}] (a kink or non-finite curvature), so the n-free \
tensor is refused and the exact per-trial path must be used"
),
None => "empty Chebyshev node ladder".to_string(),
})
}
fn build_at(
eval_design: &mut impl FnMut(f64) -> Result<Array2<f64>, String>,
weights: ArrayView1<'_, f64>,
z: ArrayView1<'_, f64>,
psi_lo: f64,
psi_hi: f64,
m: usize,
) -> BuildOutcome {
let mut nodes_x = vec![0.0_f64; m];
let mut designs: Vec<Array2<f64>> = Vec::with_capacity(m);
for (i, x_slot) in nodes_x.iter_mut().enumerate() {
let x = (std::f64::consts::PI * (2 * i + 1) as f64 / (2 * m) as f64).cos();
*x_slot = x;
let psi = 0.5 * (psi_lo + psi_hi) + 0.5 * (psi_hi - psi_lo) * x;
let design = match eval_design(psi) {
Ok(design) => design,
Err(why) => {
return BuildOutcome::EvalFailed(format!(
"design evaluation refused at node ψ={psi:.6}: {why}"
));
}
};
if design.iter().any(|v| !v.is_finite()) {
return BuildOutcome::EvalFailed(format!(
"design at node ψ={psi:.6} contains a non-finite entry"
));
}
designs.push(design);
}
let (n, k) = designs[0].dim();
if designs.iter().any(|d| d.dim() != (n, k)) {
return BuildOutcome::EvalFailed(format!(
"design dimensions vary across ψ nodes (first node is {n}×{k})"
));
}
if weights.len() != n || z.len() != n || n == 0 || k == 0 {
return BuildOutcome::EvalFailed(format!(
"incompatible build inputs: design {n}×{k}, weights.len()={}, z.len()={}",
weights.len(),
z.len()
));
}
let t_at_nodes: Vec<Vec<f64>> = nodes_x.iter().map(|&x| cheb_t(x, m)).collect();
let mut coeff_slabs: Vec<Array2<f64>> = Vec::with_capacity(m);
for d in 0..m {
let gamma = if d == 0 { 1.0 } else { 2.0 };
let mut slab = Array2::<f64>::zeros((n, k));
let mut slab_comp = Array2::<f64>::zeros((n, k));
for (i, design) in designs.iter().enumerate() {
let wgt = gamma / m as f64 * t_at_nodes[i][d];
kahan_scaled_add_array2(&mut slab, &mut slab_comp, wgt, design);
}
coeff_slabs.push(slab);
}
let mut col_scale = vec![0.0_f64; k];
for slab in &coeff_slabs {
for (j, scale) in col_scale.iter_mut().enumerate() {
for i in 0..n {
*scale = scale.max(slab[[i, j]].abs());
}
}
}
let tail_start = m - (m / 4).max(1);
for slab in coeff_slabs.iter().skip(tail_start) {
for (j, &scale) in col_scale.iter().enumerate() {
let bound = PSI_GRAM_CERT_RTOL * scale.max(1e-300);
for i in 0..n {
if slab[[i, j]].abs() > bound {
return BuildOutcome::TailNotCertified;
}
}
}
}
let mut wz = Array1::<f64>::zeros(z.len());
let mut zt_w_z = 0.0_f64;
let mut zt_w_z_comp = 0.0_f64;
for ((slot, &w), &zv) in wz.iter_mut().zip(weights.iter()).zip(z.iter()) {
*slot = w * zv;
let add = w * zv * zv;
let y = add - zt_w_z_comp;
let t = zt_w_z + y;
zt_w_z_comp = (t - zt_w_z) - y;
zt_w_z = t;
}
let mut node_grams: Vec<Array2<f64>> = Vec::with_capacity(m);
let mut node_rhs: Vec<Array1<f64>> = Vec::with_capacity(m);
for design in &designs {
let mut wd = design.clone();
for (mut row, &w) in wd.outer_iter_mut().zip(weights.iter()) {
row.mapv_inplace(|v| v * w);
}
node_grams.push(design.t().dot(&wd));
node_rhs.push(design.t().dot(&wz));
}
let mut gram: Vec<Array2<f64>> = (0..m).map(|_| Array2::<f64>::zeros((k, k))).collect();
let mut gram_comp: Vec<Array2<f64>> =
(0..m).map(|_| Array2::<f64>::zeros((k, k))).collect();
let mut rhs: Vec<Array1<f64>> = (0..m).map(|_| Array1::<f64>::zeros(k)).collect();
let mut rhs_comp: Vec<Array1<f64>> = (0..m).map(|_| Array1::<f64>::zeros(k)).collect();
for d in 0..m {
let gamma = if d == 0 { 1.0 } else { 2.0 };
for i in 0..m {
let wgt = gamma / m as f64 * t_at_nodes[i][d];
kahan_scaled_add_array2(&mut gram[d], &mut gram_comp[d], wgt, &node_grams[i]);
kahan_scaled_add_array1(&mut rhs[d], &mut rhs_comp[d], wgt, &node_rhs[i]);
}
}
drop(node_grams);
drop(node_rhs);
drop(designs);
drop(coeff_slabs);
BuildOutcome::Candidate(Self {
psi_lo,
psi_hi,
grad_psi_lo: psi_lo,
grad_psi_hi: psi_hi,
n_coeff: m,
k,
gram,
rhs,
zt_w_z,
})
}
fn spot_check(
&self,
eval_design: &mut impl FnMut(f64) -> Result<Array2<f64>, String>,
weights: ArrayView1<'_, f64>,
) -> bool {
for s in 0..PSI_GRAM_SPOT_POINTS {
let frac = ((s as f64 + 1.0) * 0.618_033_988_749_894_9).fract();
let psi = self.psi_lo + frac * (self.psi_hi - self.psi_lo);
let Ok(design) = eval_design(psi) else {
return false;
};
let mut wd = design.clone();
for (mut row, &w) in wd.outer_iter_mut().zip(weights.iter()) {
row.mapv_inplace(|v| v * w);
}
let exact = design.t().dot(&wd);
let assembled = self.gram_at(psi);
let scale = exact
.iter()
.fold(0.0_f64, |acc, &v| acc.max(v.abs()))
.max(1e-300);
for (a, b) in assembled.iter().zip(exact.iter()) {
if (a - b).abs() > PSI_GRAM_SPOT_RTOL * scale {
return false;
}
}
}
true
}
fn range_projector(&self, psi: f64, rank_rtol: f64) -> Option<(Array2<f64>, usize)> {
use crate::faer_ndarray::FaerEigh;
let g = self.gram_at(psi);
if g.iter().any(|v| !v.is_finite()) {
return None;
}
let gsym = 0.5 * (&g + &g.t());
let (evals, evecs) = gsym.eigh(faer::Side::Lower).ok()?;
let lambda_max = evals.iter().cloned().fold(0.0_f64, f64::max);
if !(lambda_max > 0.0) {
return None;
}
let cutoff = rank_rtol * lambda_max;
let mut proj = Array2::<f64>::zeros((self.k, self.k));
let mut rank = 0usize;
for (col, &lam) in evals.iter().enumerate() {
if lam > cutoff {
let u = evecs.column(col);
for a in 0..self.k {
for b in 0..self.k {
proj[[a, b]] += u[a] * u[b];
}
}
rank += 1;
}
}
Some((proj, rank))
}
pub fn reduced_basis_equal(&self, psi_ref: f64, psi_new: f64) -> bool {
if !(self.contains(psi_ref) && self.contains(psi_new)) {
return false;
}
if psi_ref == psi_new {
return true;
}
let Some((p_ref, r_ref)) = self.range_projector(psi_ref, PSI_GRAM_SKIP_RANK_RTOL) else {
return false;
};
let Some((p_new, r_new)) = self.range_projector(psi_new, PSI_GRAM_SKIP_RANK_RTOL) else {
return false;
};
if r_ref != r_new {
return false;
}
let diff = &p_ref - &p_new;
subspace_spectral_distance(&diff)
.map(|d| d <= PSI_GRAM_SKIP_PROJ_ATOL)
.unwrap_or(false)
}
pub fn contains(&self, psi: f64) -> bool {
psi.is_finite() && psi >= self.psi_lo && psi <= self.psi_hi
}
pub fn contains_for_gradient(&self, psi: f64) -> bool {
psi.is_finite()
&& self.grad_psi_lo.is_finite()
&& self.grad_psi_hi.is_finite()
&& psi >= self.grad_psi_lo
&& psi <= self.grad_psi_hi
}
fn mapped(&self, psi: f64) -> f64 {
(2.0 * psi - (self.psi_lo + self.psi_hi)) / (self.psi_hi - self.psi_lo)
}
pub fn gram_at(&self, psi: f64) -> Array2<f64> {
let x = self.mapped(psi);
let t = cheb_t(x, self.gram.len());
let mut out = Array2::<f64>::zeros((self.k, self.k));
let mut comp = Array2::<f64>::zeros((self.k, self.k));
for (d, td) in t.iter().enumerate() {
kahan_scaled_add_array2(&mut out, &mut comp, *td, &self.gram[d]);
}
out
}
pub fn rhs_at(&self, psi: f64) -> Array1<f64> {
let x = self.mapped(psi);
let t = cheb_t(x, self.n_coeff);
let mut out = Array1::<f64>::zeros(self.k);
let mut comp = Array1::<f64>::zeros(self.k);
for (d, td) in t.iter().enumerate() {
kahan_scaled_add_array1(&mut out, &mut comp, *td, &self.rhs[d]);
}
out
}
pub fn dgram_dpsi(&self, psi: f64) -> Array2<f64> {
let x = self.mapped(psi);
let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
let tp = cheb_t_prime(x, self.gram.len());
let mut out = Array2::<f64>::zeros((self.k, self.k));
let mut comp = Array2::<f64>::zeros((self.k, self.k));
for (d, tpd) in tp.iter().enumerate() {
kahan_scaled_add_array2(&mut out, &mut comp, *tpd * dx_dpsi, &self.gram[d]);
}
out
}
pub fn drhs_dpsi(&self, psi: f64) -> Array1<f64> {
let x = self.mapped(psi);
let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
let tp = cheb_t_prime(x, self.n_coeff);
let mut out = Array1::<f64>::zeros(self.k);
let mut comp = Array1::<f64>::zeros(self.k);
for (d, tpd) in tp.iter().enumerate() {
kahan_scaled_add_array1(&mut out, &mut comp, *tpd * dx_dpsi, &self.rhs[d]);
}
out
}
pub fn d2gram_dpsi2(&self, psi: f64) -> Array2<f64> {
let x = self.mapped(psi);
let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
let dx_dpsi_sq = dx_dpsi * dx_dpsi;
let tpp = cheb_t_double_prime(x, self.gram.len());
let mut out = Array2::<f64>::zeros((self.k, self.k));
let mut comp = Array2::<f64>::zeros((self.k, self.k));
for (d, tppd) in tpp.iter().enumerate() {
kahan_scaled_add_array2(&mut out, &mut comp, *tppd * dx_dpsi_sq, &self.gram[d]);
}
out
}
pub fn d2rhs_dpsi2(&self, psi: f64) -> Array1<f64> {
let x = self.mapped(psi);
let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
let dx_dpsi_sq = dx_dpsi * dx_dpsi;
let tpp = cheb_t_double_prime(x, self.n_coeff);
let mut out = Array1::<f64>::zeros(self.k);
let mut comp = Array1::<f64>::zeros(self.k);
for (d, tppd) in tpp.iter().enumerate() {
kahan_scaled_add_array1(&mut out, &mut comp, *tppd * dx_dpsi_sq, &self.rhs[d]);
}
out
}
pub fn gaussian_fixed_cache_at(&self, psi: f64) -> crate::pirls::GaussianFixedCache {
crate::pirls::GaussianFixedCache {
xtwx_orig: self.gram_at(psi),
xtwy_orig: self.rhs_at(psi),
centered_weighted_y_sq: self.zt_w_z,
row_prediction_is_stale: true,
xtwx_sparse_orig: None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn synth_design(psi: f64, n: usize, k: usize) -> Result<Array2<f64>, String> {
let mut x = Array2::<f64>::zeros((n, k));
for i in 0..n {
for j in 0..k {
let r = 0.05 + (i as f64 + 1.0) * (j as f64 + 1.0) / (n as f64 * k as f64) * 3.0;
if j == k - 1 {
x[[i, j]] = r * r * r;
} else {
let s = r * psi.exp();
x[[i, j]] = (1.0 + s) * (-s).exp();
}
}
}
Ok(x)
}
fn exact_gram(psi: f64, n: usize, k: usize, w: &Array1<f64>) -> Array2<f64> {
let design = synth_design(psi, n, k).unwrap();
let mut wd = design.clone();
for (mut row, &wi) in wd.outer_iter_mut().zip(w.iter()) {
row.mapv_inplace(|v| v * wi);
}
design.t().dot(&wd)
}
#[test]
fn psi_gram_tensor_matches_exact_gram_and_fd_gradient() {
let (n, k) = (160usize, 7usize);
let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)));
let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.37).sin()));
let (psi_lo, psi_hi) = (-1.2_f64, 1.0_f64);
let tensor = PsiGramTensor::build(
|psi| synth_design(psi, n, k),
w.view(),
z.view(),
psi_lo,
psi_hi,
)
.expect("analytic synthetic design must certify");
for &psi in &[-1.1, -0.63, 0.0, 0.41, 0.97] {
assert!(tensor.contains(psi));
let exact = exact_gram(psi, n, k, &w);
let fast = tensor.gram_at(psi);
let scale = exact.iter().fold(0.0_f64, |a, &v| a.max(v.abs()));
for (a, b) in fast.iter().zip(exact.iter()) {
assert!(
(a - b).abs() <= 1e-9 * scale,
"gram mismatch at psi={psi}: fast={a}, exact={b}"
);
}
let design = synth_design(psi, n, k).unwrap();
let mut wz = Array1::<f64>::zeros(n);
for ((slot, &wi), &zi) in wz.iter_mut().zip(w.iter()).zip(z.iter()) {
*slot = wi * zi;
}
let exact_rhs = design.t().dot(&wz);
let fast_rhs = tensor.rhs_at(psi);
let rscale = exact_rhs.iter().fold(0.0_f64, |a, &v| a.max(v.abs()));
for (a, b) in fast_rhs.iter().zip(exact_rhs.iter()) {
assert!(
(a - b).abs() <= 1e-9 * rscale,
"rhs mismatch at psi={psi}: fast={a}, exact={b}"
);
}
let h = 1e-5;
let g_plus = exact_gram(psi + h, n, k, &w);
let g_minus = exact_gram(psi - h, n, k, &w);
let dg = tensor.dgram_dpsi(psi);
let dscale = dg.iter().fold(0.0_f64, |a, &v| a.max(v.abs())).max(1e-12);
for ((a, p), m_) in dg.iter().zip(g_plus.iter()).zip(g_minus.iter()) {
let fd = (p - m_) / (2.0 * h);
assert!(
(a - fd).abs() <= 1e-5 * dscale,
"dgram/dpsi mismatch at psi={psi}: analytic={a}, fd={fd}"
);
}
}
assert!(!tensor.contains(psi_hi + 0.5));
assert!(!tensor.contains(psi_lo - 0.5));
for &psi in &[-0.9, 0.2, 0.8] {
let cache = tensor.gaussian_fixed_cache_at(psi);
let design = synth_design(psi, n, k).unwrap();
let mut wd = design.clone();
for (mut row, &wi) in wd.outer_iter_mut().zip(w.iter()) {
row.mapv_inplace(|v| v * wi);
}
let exact_gram = design.t().dot(&wd);
let exact_rhs = wd.t().dot(&z);
let exact_ztwz: f64 = w.iter().zip(z.iter()).map(|(&wi, &zi)| wi * zi * zi).sum();
assert!(
(cache.centered_weighted_y_sq - exact_ztwz).abs()
<= 1e-12 * exact_ztwz.abs().max(1e-300),
"zᵀWz drift: cache={}, exact={exact_ztwz}",
cache.centered_weighted_y_sq
);
let solve = |g: &Array2<f64>, r: &Array1<f64>| -> Array1<f64> {
let mut a = g.clone();
for i in 0..k {
a[[i, i]] += 1.0;
}
let mut aug = Array2::<f64>::zeros((k, k + 1));
aug.slice_mut(ndarray::s![.., ..k]).assign(&a);
aug.slice_mut(ndarray::s![.., k]).assign(r);
for col in 0..k {
let piv = (col..k)
.max_by(|&p, &q| aug[[p, col]].abs().total_cmp(&aug[[q, col]].abs()))
.unwrap();
if piv != col {
for j in 0..=k {
let tmp = aug[[col, j]];
aug[[col, j]] = aug[[piv, j]];
aug[[piv, j]] = tmp;
}
}
let p = aug[[col, col]];
for row in 0..k {
if row == col {
continue;
}
let f = aug[[row, col]] / p;
for j in col..=k {
aug[[row, j]] -= f * aug[[col, j]];
}
}
}
Array1::from_iter((0..k).map(|i| aug[[i, k]] / aug[[i, i]]))
};
let beta_fast = solve(&cache.xtwx_orig, &cache.xtwy_orig);
let beta_exact = solve(&exact_gram, &exact_rhs);
let bscale = beta_exact
.iter()
.fold(0.0_f64, |a, &v| a.max(v.abs()))
.max(1e-300);
for (a, b) in beta_fast.iter().zip(beta_exact.iter()) {
assert!(
(a - b).abs() <= 1e-8 * bscale,
"penalized solve drift at psi={psi}: fast={a}, exact={b}"
);
}
}
}
#[test]
fn psi_gram_tensor_trial_accessors_touch_no_data_rows() {
use std::cell::Cell;
let (n, k) = (256usize, 6usize);
let w = Array1::from_iter((0..n).map(|i| 0.8 + 0.4 * ((i % 4) as f64) / 3.0));
let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.21).sin() + 0.2));
let (psi_lo, psi_hi) = (-1.1_f64, 0.95_f64);
let calls = Cell::new(0usize);
let tensor = PsiGramTensor::build(
|psi| {
calls.set(calls.get() + 1);
synth_design(psi, n, k)
},
w.view(),
z.view(),
psi_lo,
psi_hi,
)
.expect("analytic synthetic design must certify");
let build_calls = calls.get();
assert!(
build_calls > 0,
"build must have streamed the design at least once (sanity)"
);
let m = 64usize;
let lo = psi_lo + 0.05;
let hi = psi_hi - 0.05;
for i in 0..m {
let psi = lo + (hi - lo) * (i as f64) / (m as f64 - 1.0);
assert!(tensor.contains(psi));
let _g = tensor.gram_at(psi);
let _r = tensor.rhs_at(psi);
let _dg = tensor.dgram_dpsi(psi);
let _dr = tensor.drhs_dpsi(psi);
let _d2g = tensor.d2gram_dpsi2(psi);
let _d2r = tensor.d2rhs_dpsi2(psi);
let _cache = tensor.gaussian_fixed_cache_at(psi);
}
assert_eq!(
calls.get(),
build_calls,
"n-independence VIOLATED: a per-trial accessor re-streamed the n×k \
design ({} extra eval_design calls across {m} ψ-trials). The certified \
κ/ψ outer loop must serve value + gradient + Hessian curvature + the \
inner-solver cache from k-space sufficient statistics only, with NO \
per-trial n-row work.",
calls.get() - build_calls
);
}
#[test]
fn psi_gram_tensor_second_derivative_matches_fd_of_gradient() {
let (n, k) = (160usize, 7usize);
let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)));
let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.37).sin()));
let (psi_lo, psi_hi) = (-1.2_f64, 1.0_f64);
let tensor = PsiGramTensor::build(
|psi| synth_design(psi, n, k),
w.view(),
z.view(),
psi_lo,
psi_hi,
)
.expect("analytic synthetic design must certify");
let h = 1e-5;
for &psi in &[-1.0, -0.5, 0.0, 0.4, 0.9] {
let dg_plus = tensor.dgram_dpsi(psi + h);
let dg_minus = tensor.dgram_dpsi(psi - h);
let d2g = tensor.d2gram_dpsi2(psi);
let gscale = d2g.iter().fold(0.0_f64, |a, &v| a.max(v.abs())).max(1e-9);
for ((a, p), m_) in d2g.iter().zip(dg_plus.iter()).zip(dg_minus.iter()) {
let fd = (p - m_) / (2.0 * h);
assert!(
(a - fd).abs() <= 1e-4 * gscale,
"d2gram/dpsi2 mismatch at psi={psi}: analytic={a}, fd={fd}"
);
}
let dr_plus = tensor.drhs_dpsi(psi + h);
let dr_minus = tensor.drhs_dpsi(psi - h);
let d2r = tensor.d2rhs_dpsi2(psi);
let rscale = d2r.iter().fold(0.0_f64, |a, &v| a.max(v.abs())).max(1e-9);
for ((a, p), m_) in d2r.iter().zip(dr_plus.iter()).zip(dr_minus.iter()) {
let fd = (p - m_) / (2.0 * h);
assert!(
(a - fd).abs() <= 1e-4 * rscale,
"d2rhs/dpsi2 mismatch at psi={psi}: analytic={a}, fd={fd}"
);
}
}
}
fn profile_deviance(
g: &Array2<f64>,
r: &Array1<f64>,
c: f64,
s_ridge: &Array2<f64>,
lambda: f64,
k: usize,
) -> (f64, Array1<f64>) {
let mut a = g.clone();
a.scaled_add(lambda, s_ridge);
let mut aug = Array2::<f64>::zeros((k, k + 1));
aug.slice_mut(ndarray::s![.., ..k]).assign(&a);
aug.slice_mut(ndarray::s![.., k]).assign(r);
for col in 0..k {
let piv = (col..k)
.max_by(|&p, &q| aug[[p, col]].abs().total_cmp(&aug[[q, col]].abs()))
.unwrap();
if piv != col {
for j in 0..=k {
let tmp = aug[[col, j]];
aug[[col, j]] = aug[[piv, j]];
aug[[piv, j]] = tmp;
}
}
let p = aug[[col, col]];
for row in 0..k {
if row == col {
continue;
}
let f = aug[[row, col]] / p;
for j in col..=k {
aug[[row, j]] -= f * aug[[col, j]];
}
}
}
let beta = Array1::from_iter((0..k).map(|i| aug[[i, k]] / aug[[i, i]]));
let deviance = c - beta.dot(r);
(deviance, beta)
}
#[test]
fn psi_gram_tensor_bit_tight_hessian_and_kappa_optimum() {
let (n, k) = (200usize, 6usize);
let w = Array1::from_iter((0..n).map(|i| 0.7 + 0.6 * (((i * 7) % 5) as f64) / 4.0));
let z = Array1::from_iter((0..n).map(|i| {
let t = (i as f64) / (n as f64 - 1.0);
(3.0 * t).sin() + 0.3 * (7.0 * t).cos()
}));
let (psi_lo, psi_hi) = (-1.0_f64, 0.9_f64);
let s_ridge = Array2::<f64>::eye(k);
let lambda = 0.5_f64;
let tensor = PsiGramTensor::build(
|psi| synth_design(psi, n, k),
w.view(),
z.view(),
psi_lo,
psi_hi,
)
.expect("analytic synthetic design must certify");
let exact_ztwz: f64 = w.iter().zip(z.iter()).map(|(&wi, &zi)| wi * zi * zi).sum();
let exact_deviance = |psi: f64| -> f64 {
let design = synth_design(psi, n, k).unwrap();
let mut wd = design.clone();
for (mut row, &wi) in wd.outer_iter_mut().zip(w.iter()) {
row.mapv_inplace(|v| v * wi);
}
let g = design.t().dot(&wd);
let r = wd.t().dot(&z);
profile_deviance(&g, &r, exact_ztwz, &s_ridge, lambda, k).0
};
let fast_deviance = |psi: f64| -> f64 {
let g = tensor.gram_at(psi);
let r = tensor.rhs_at(psi);
profile_deviance(&g, &r, exact_ztwz, &s_ridge, lambda, k).0
};
let m = 81usize;
let lo = psi_lo + 0.06;
let hi = psi_hi - 0.06;
let grid: Vec<f64> = (0..m)
.map(|i| lo + (hi - lo) * (i as f64) / (m as f64 - 1.0))
.collect();
let mut worst_value_rel = 0.0_f64;
let (mut fast_argmin, mut fast_min) = (f64::NAN, f64::INFINITY);
let (mut exact_argmin, mut exact_min) = (f64::NAN, f64::INFINITY);
for &psi in &grid {
let de = exact_deviance(psi);
let df = fast_deviance(psi);
let rel = (de - df).abs() / de.abs().max(1e-300);
worst_value_rel = worst_value_rel.max(rel);
if df < fast_min {
fast_min = df;
fast_argmin = psi;
}
if de < exact_min {
exact_min = de;
exact_argmin = psi;
}
}
assert!(
worst_value_rel <= 1e-9,
"penalized profile deviance: fast n-free assembly diverged from exact \
streamed by rel {worst_value_rel:.3e} (> 1e-9) somewhere on the ψ grid"
);
assert_eq!(
fast_argmin.to_bits(),
exact_argmin.to_bits(),
"κ-optimum mismatch: fast argmin ψ={fast_argmin}, exact argmin ψ={exact_argmin} \
— the n-free objective located a different optimum"
);
assert!(
fast_argmin > lo + 1e-9 && fast_argmin < hi - 1e-9,
"κ-optimum landed on the grid edge ψ={fast_argmin}; the fixture must have \
an INTERIOR minimum for this to test the outer search, not a boundary"
);
let opt_rel = (exact_min - fast_min).abs() / exact_min.abs().max(1e-300);
assert!(
opt_rel <= 1e-9,
"κ-optimum objective value drift at ψ={fast_argmin}: fast={fast_min}, \
exact={exact_min}, rel={opt_rel:.3e}"
);
let solve_a = |g: &Array2<f64>, r: &Array1<f64>| -> Array1<f64> {
profile_deviance(g, r, exact_ztwz, &s_ridge, lambda, k).1
};
let analytic_grad = |psi: f64| -> f64 {
let g = tensor.gram_at(psi);
let r = tensor.rhs_at(psi);
let beta = solve_a(&g, &r);
let dg = tensor.dgram_dpsi(psi);
let dr = tensor.drhs_dpsi(psi);
-2.0 * beta.dot(&dr) + beta.dot(&dg.dot(&beta))
};
let h_grad = 1e-6_f64;
let h_curv = 2e-4_f64;
let mut worst_grad_rel = 0.0_f64;
let mut worst_hess_rel = 0.0_f64;
let mut tested = 0usize;
for &psi in &grid {
if !tensor.contains_for_gradient(psi - 2.0 * h_curv)
|| !tensor.contains_for_gradient(psi + 2.0 * h_curv)
{
continue;
}
tested += 1;
let exact_g1 =
(exact_deviance(psi + h_grad) - exact_deviance(psi - h_grad)) / (2.0 * h_grad);
let ag = analytic_grad(psi);
let gscale = exact_g1.abs().max(1e-6);
worst_grad_rel = worst_grad_rel.max((exact_g1 - ag).abs() / gscale);
let analytic_h2 =
(analytic_grad(psi + h_curv) - analytic_grad(psi - h_curv)) / (2.0 * h_curv);
let exact_h2 = (exact_deviance(psi + h_curv) - 2.0 * exact_deviance(psi)
+ exact_deviance(psi - h_curv))
/ (h_curv * h_curv);
let hscale = exact_h2.abs().max(1e-3);
worst_hess_rel = worst_hess_rel.max((analytic_h2 - exact_h2).abs() / hscale);
}
assert!(
tested > 0,
"no ψ on the grid lay inside the certified gradient sub-window"
);
assert!(
worst_grad_rel <= 1e-5,
"ψ-gradient mismatch: the tensor's analytic n-free objective gradient diverged \
from the exact streamed objective by rel {worst_grad_rel:.3e} (> 1e-5)"
);
assert!(
worst_hess_rel <= 1e-3,
"ψ-curvature (Hessian) mismatch: fast n-free objective curvature diverged \
from the exact streamed objective by rel {worst_hess_rel:.3e} (> 1e-3) — \
the outer Newton step would read a different curvature than the truth"
);
eprintln!(
"[psi-gram-bittight] n={n} k={k} grid={m} grad-tested={tested} \
worst |ΔD|/D={worst_value_rel:.2e} worst |ΔD'|/D'={worst_grad_rel:.2e} \
worst |ΔD''|/D''={worst_hess_rel:.2e} κ-opt ψ={fast_argmin:.6} (interior, bit-identical)"
);
}
#[test]
fn psi_gram_tensor_refuses_non_analytic_design() {
let (n, k) = (40usize, 3usize);
let w = Array1::from_elem(n, 1.0);
let z = Array1::from_elem(n, 0.5);
let tensor = PsiGramTensor::build(
|psi| {
let mut x = Array2::<f64>::zeros((n, k));
for i in 0..n {
for j in 0..k {
x[[i, j]] = psi.abs() + (i + j) as f64 / (n + k) as f64;
}
}
Ok(x)
},
w.view(),
z.view(),
-1.0,
1.0,
);
assert!(
tensor.is_err(),
"kinked design must fail the tail-decay/spot-check certificates"
);
}
#[test]
fn reduced_basis_witness_reflexive_and_gauge_invariant() {
let (n, k) = (160usize, 6usize);
let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.3 * ((i % 5) as f64)));
let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.29).sin()));
let (psi_lo, psi_hi) = (-1.0_f64, 0.8_f64);
let tensor = PsiGramTensor::build(
|psi| synth_design(psi, n, k),
w.view(),
z.view(),
psi_lo,
psi_hi,
)
.expect("analytic synthetic design must certify");
for &psi in &[-0.9, -0.2, 0.0, 0.5, 0.79] {
assert!(
tensor.reduced_basis_equal(psi, psi),
"witness must be reflexive at psi={psi}"
);
}
let grid: Vec<f64> = (0..=12).map(|i| psi_lo + 0.05 + 0.06 * i as f64).collect();
for &a in &grid {
for &b in &grid {
assert!(
tensor.reduced_basis_equal(a, b),
"full-rank design: range is ψ-invariant (identity projector), \
so the skip witness must certify (ψ_ref={a}, ψ_new={b})"
);
}
}
assert!(!tensor.reduced_basis_equal(psi_lo - 0.5, 0.0));
assert!(!tensor.reduced_basis_equal(0.0, psi_hi + 0.5));
}
#[test]
fn reduced_basis_witness_refuses_across_subspace_change() {
let (n, k) = (200usize, 3usize);
let base = |i: usize, j: usize| -> f64 {
let t = (i as f64 + 0.5) / n as f64;
match j {
0 => 1.0,
1 => (2.0 * std::f64::consts::PI * t).sin(),
_ => (4.0 * std::f64::consts::PI * t).cos(),
}
};
let alpha = 10.0_f64;
let design = move |psi: f64| -> Result<Array2<f64>, String> {
let eps = (alpha * psi).exp();
let mut x = Array2::<f64>::zeros((n, k));
for i in 0..n {
x[[i, 0]] = base(i, 0);
x[[i, 1]] = base(i, 1);
x[[i, 2]] = eps * base(i, 2);
}
Ok(x)
};
let w = Array1::from_elem(n, 1.0);
let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.13).sin()));
let (psi_lo, psi_hi) = (-1.6_f64, -0.8_f64);
let tensor = PsiGramTensor::build(design, w.view(), z.view(), psi_lo, psi_hi)
.expect("smooth ε(ψ) design must still certify (analytic, no kink)");
let rank_at = |psi: f64| -> usize {
tensor
.range_projector(psi, PSI_GRAM_SKIP_RANK_RTOL)
.map(|(_, r)| r)
.unwrap_or(0)
};
let lo_rank = rank_at(psi_lo + 0.02);
let hi_rank = rank_at(psi_hi - 0.02);
assert_eq!(
lo_rank, 2,
"low-ψ end must be rank-2 (third column below cutoff)"
);
assert_eq!(
hi_rank, 3,
"high-ψ end must be rank-3 (third column above cutoff)"
);
let psi_low_a = psi_lo + 0.05;
let psi_low_b = psi_lo + 0.10;
assert_eq!(rank_at(psi_low_a), 2);
assert_eq!(rank_at(psi_low_b), 2);
assert!(
tensor.reduced_basis_equal(psi_low_a, psi_low_b),
"two low-ψ trials share the rank-2 reduced basis → skip is sound"
);
let psi_high_a = psi_hi - 0.05;
let psi_high_b = psi_hi - 0.10;
assert_eq!(rank_at(psi_high_a), 3);
assert_eq!(rank_at(psi_high_b), 3);
assert!(
tensor.reduced_basis_equal(psi_high_a, psi_high_b),
"two high-ψ trials share the rank-3 reduced basis → skip is sound"
);
assert!(
!tensor.reduced_basis_equal(psi_low_a, psi_high_a),
"witness must REFUSE a skip that straddles the reduced-basis (rank) \
change — freezing the low-ψ rank-2 basis and re-keying the high-ψ \
rank-3 Gram is the exact ~7.8e-2 β̂ regression #1264 guards"
);
assert!(
!tensor.reduced_basis_equal(psi_high_a, psi_low_a),
"witness must refuse symmetrically (high pin, low trial)"
);
}
#[test]
fn reduced_basis_witness_certifies_across_pure_rotation_1033() {
let (n, k) = (240usize, 3usize);
let p0 = |i: usize| -> f64 {
let t = (i as f64 + 0.5) / n as f64;
(2.0 * std::f64::consts::PI * t).sin()
};
let p1 = |i: usize| -> f64 {
let t = (i as f64 + 0.5) / n as f64;
(2.0 * std::f64::consts::PI * t).cos()
};
let design = move |psi: f64| -> Result<Array2<f64>, String> {
let phi = 0.7 * psi; let (c, s) = (phi.cos(), phi.sin());
let mut x = Array2::<f64>::zeros((n, k));
for i in 0..n {
let (a, b) = (p0(i), p1(i));
x[[i, 0]] = c * a - s * b;
x[[i, 1]] = s * a + c * b;
}
Ok(x)
};
let w = Array1::from_elem(n, 1.0);
let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.17).cos()));
let (psi_lo, psi_hi) = (-1.0_f64, 1.0_f64);
let tensor = PsiGramTensor::build(design, w.view(), z.view(), psi_lo, psi_hi)
.expect("smooth rotation design must certify (analytic, no kink)");
let rank_at = |psi: f64| -> usize {
tensor
.range_projector(psi, PSI_GRAM_SKIP_RANK_RTOL)
.map(|(_, r)| r)
.unwrap_or(0)
};
for &psi in &[-0.95, -0.4, 0.0, 0.4, 0.95] {
assert_eq!(rank_at(psi), 2, "rotation keeps rank 2 at psi={psi}");
}
let grid: Vec<f64> = (0..=10).map(|i| psi_lo + 0.05 + 0.09 * i as f64).collect();
for &a in &grid {
for &b in &grid {
assert!(
tensor.reduced_basis_equal(a, b),
"pure in-plane rotation preserves the range subspace → the \
subspace-distance skip witness must certify (#1033) \
(ψ_ref={a}, ψ_new={b})"
);
}
}
}
}