use ndarray::{Array1, Array2, ArrayView1};
pub const PSI_GRAM_CERT_RTOL: f64 = 1.0e-9;
pub const PSI_GRAM_SPOT_RTOL: f64 = 1.0e-10;
pub const PSI_GRAM_GRAD_SPOT_RTOL: f64 = 1.0e-6;
pub const PSI_GRAM_GRAD_SCAN_POINTS: usize = 64;
pub const PSI_GRAM_NODE_LADDER: [usize; 5] = [9, 17, 33, 65, 129];
pub const PSI_GRAM_SPOT_POINTS: usize = 3;
pub const PSI_GRAM_SKIP_RRQR_MARGIN_MIN: f64 = 16.0;
pub const PSI_GRAM_SKIP_SCAN_POINTS: usize = 64;
pub struct PsiGramTensor {
psi_lo: f64,
psi_hi: f64,
grad_psi_lo: f64,
grad_psi_hi: f64,
skip_psi_lo: f64,
skip_psi_hi: f64,
n_rows: usize,
n_coeff: usize,
k: usize,
gram: Vec<Array2<f64>>,
rhs: Vec<Array1<f64>>,
zt_w_z: f64,
}
enum BuildOutcome {
EvalFailed,
TailNotCertified,
Candidate(PsiGramTensor),
}
fn cheb_t(x: f64, n: usize) -> Vec<f64> {
let mut t = vec![0.0; n];
if n > 0 {
t[0] = 1.0;
}
if n > 1 {
t[1] = x;
}
for d in 2..n {
t[d] = 2.0 * x * t[d - 1] - t[d - 2];
}
t
}
fn cheb_t_prime(x: f64, n: usize) -> Vec<f64> {
let mut u = vec![0.0; n.max(1)];
if !u.is_empty() {
u[0] = 1.0;
}
if n > 1 {
u[1] = 2.0 * x;
}
for d in 2..n {
u[d] = 2.0 * x * u[d - 1] - u[d - 2];
}
let mut tp = vec![0.0; n];
for d in 1..n {
tp[d] = d as f64 * u[d - 1];
}
tp
}
fn cheb_t_double_prime(x: f64, n: usize) -> Vec<f64> {
let mut t = vec![0.0; n];
let mut tp = vec![0.0; n];
let mut tpp = vec![0.0; n];
if n > 0 {
t[0] = 1.0; }
if n > 1 {
t[1] = x; tp[1] = 1.0;
}
for d in 2..n {
t[d] = 2.0 * x * t[d - 1] - t[d - 2];
tp[d] = 2.0 * t[d - 1] + 2.0 * x * tp[d - 1] - tp[d - 2];
tpp[d] = 4.0 * tp[d - 1] + 2.0 * x * tpp[d - 1] - tpp[d - 2];
}
tpp
}
impl PsiGramTensor {
pub fn build(
mut eval_design: impl FnMut(f64) -> Result<Array2<f64>, String>,
weights: ArrayView1<'_, f64>,
z: ArrayView1<'_, f64>,
psi_lo: f64,
psi_hi: f64,
) -> Option<Self> {
if !(psi_lo.is_finite() && psi_hi.is_finite()) || psi_hi <= psi_lo {
return None;
}
for &m in PSI_GRAM_NODE_LADDER.iter() {
match Self::build_at(&mut eval_design, weights, z, psi_lo, psi_hi, m) {
BuildOutcome::EvalFailed => return None,
BuildOutcome::TailNotCertified => continue,
BuildOutcome::Candidate(mut candidate) => {
if candidate.spot_check(&mut eval_design, weights) {
candidate.certify_gradient_window(&mut eval_design, weights);
candidate.compute_skip_window();
return Some(candidate);
}
}
}
}
None
}
fn build_at(
eval_design: &mut impl FnMut(f64) -> Result<Array2<f64>, String>,
weights: ArrayView1<'_, f64>,
z: ArrayView1<'_, f64>,
psi_lo: f64,
psi_hi: f64,
m: usize,
) -> BuildOutcome {
let mut nodes_x = vec![0.0_f64; m];
let mut designs: Vec<Array2<f64>> = Vec::with_capacity(m);
for (i, x_slot) in nodes_x.iter_mut().enumerate() {
let x = (std::f64::consts::PI * (2 * i + 1) as f64 / (2 * m) as f64).cos();
*x_slot = x;
let psi = 0.5 * (psi_lo + psi_hi) + 0.5 * (psi_hi - psi_lo) * x;
let Ok(design) = eval_design(psi) else {
return BuildOutcome::EvalFailed;
};
if design.iter().any(|v| !v.is_finite()) {
return BuildOutcome::EvalFailed;
}
designs.push(design);
}
let (n, k) = designs[0].dim();
if designs.iter().any(|d| d.dim() != (n, k))
|| weights.len() != n
|| z.len() != n
|| n == 0
|| k == 0
{
return BuildOutcome::EvalFailed;
}
let t_at_nodes: Vec<Vec<f64>> = nodes_x.iter().map(|&x| cheb_t(x, m)).collect();
let mut coeff_slabs: Vec<Array2<f64>> = Vec::with_capacity(m);
for d in 0..m {
let gamma = if d == 0 { 1.0 } else { 2.0 };
let mut slab = Array2::<f64>::zeros((n, k));
for (i, design) in designs.iter().enumerate() {
let wgt = gamma / m as f64 * t_at_nodes[i][d];
slab.scaled_add(wgt, design);
}
coeff_slabs.push(slab);
}
drop(designs);
let mut col_scale = vec![0.0_f64; k];
for slab in &coeff_slabs {
for (j, scale) in col_scale.iter_mut().enumerate() {
for i in 0..n {
*scale = scale.max(slab[[i, j]].abs());
}
}
}
let tail_start = m - (m / 4).max(1);
for slab in coeff_slabs.iter().skip(tail_start) {
for (j, &scale) in col_scale.iter().enumerate() {
let bound = PSI_GRAM_CERT_RTOL * scale.max(1e-300);
for i in 0..n {
if slab[[i, j]].abs() > bound {
return BuildOutcome::TailNotCertified;
}
}
}
}
let mut weighted: Vec<Array2<f64>> = Vec::with_capacity(m);
for slab in &coeff_slabs {
let mut ws = slab.clone();
for (mut row, &w) in ws.outer_iter_mut().zip(weights.iter()) {
row.mapv_inplace(|v| v * w);
}
weighted.push(ws);
}
let mut wz = Array1::<f64>::zeros(z.len());
let mut zt_w_z = 0.0_f64;
for ((slot, &w), &zv) in wz.iter_mut().zip(weights.iter()).zip(z.iter()) {
*slot = w * zv;
zt_w_z += w * zv * zv;
}
let mut gram: Vec<Array2<f64>> = Vec::with_capacity(m * m);
let mut rhs = Vec::with_capacity(m);
for d in 0..m {
for e in 0..m {
if e < d {
let g: Array2<f64> = gram[e * m + d].t().to_owned();
gram.push(g);
} else {
gram.push(coeff_slabs[d].t().dot(&weighted[e]));
}
}
rhs.push(coeff_slabs[d].t().dot(&wz));
}
BuildOutcome::Candidate(Self {
psi_lo,
psi_hi,
grad_psi_lo: psi_lo,
grad_psi_hi: psi_hi,
skip_psi_lo: psi_lo,
skip_psi_hi: psi_hi,
n_rows: n,
n_coeff: m,
k,
gram,
rhs,
zt_w_z,
})
}
fn spot_check(
&self,
eval_design: &mut impl FnMut(f64) -> Result<Array2<f64>, String>,
weights: ArrayView1<'_, f64>,
) -> bool {
for s in 0..PSI_GRAM_SPOT_POINTS {
let frac = ((s as f64 + 1.0) * 0.618_033_988_749_894_9).fract();
let psi = self.psi_lo + frac * (self.psi_hi - self.psi_lo);
let Ok(design) = eval_design(psi) else {
return false;
};
let mut wd = design.clone();
for (mut row, &w) in wd.outer_iter_mut().zip(weights.iter()) {
row.mapv_inplace(|v| v * w);
}
let exact = design.t().dot(&wd);
let assembled = self.gram_at(psi);
let scale = exact
.iter()
.fold(0.0_f64, |acc, &v| acc.max(v.abs()))
.max(1e-300);
for (a, b) in assembled.iter().zip(exact.iter()) {
if (a - b).abs() > PSI_GRAM_SPOT_RTOL * scale {
return false;
}
}
}
true
}
fn certify_gradient_window(
&mut self,
eval_design: &mut impl FnMut(f64) -> Result<Array2<f64>, String>,
weights: ArrayView1<'_, f64>,
) {
let span = self.psi_hi - self.psi_lo;
let fd4 = |psi: f64,
h: f64,
eval: &mut dyn FnMut(f64) -> Result<Array2<f64>, String>|
-> Option<Array2<f64>> {
let weighted_gram = |p: f64,
eval: &mut dyn FnMut(f64) -> Result<Array2<f64>, String>|
-> Option<Array2<f64>> {
let design = eval(p).ok()?;
let mut wd = design.clone();
for (mut row, &w) in wd.outer_iter_mut().zip(weights.iter()) {
row.mapv_inplace(|v| v * w);
}
Some(design.t().dot(&wd))
};
let g_m2 = weighted_gram(psi - 2.0 * h, eval)?;
let g_m1 = weighted_gram(psi - h, eval)?;
let g_p1 = weighted_gram(psi + h, eval)?;
let g_p2 = weighted_gram(psi + 2.0 * h, eval)?;
Some((g_m2 - 8.0 * &g_m1 + 8.0 * &g_p1 - g_p2) / (12.0 * h))
};
const FD_CONVERGED_RTOL: f64 = 1e-9; let h = (span * 2e-4).max(1e-6);
let exact_dgram = move |psi: f64,
eval: &mut dyn FnMut(f64) -> Result<Array2<f64>, String>|
-> Option<Array2<f64>> {
let fd_h = fd4(psi, h, eval)?; let fd_h2 = fd4(psi, 0.5 * h, eval)?; let scale = fd_h2 .iter()
.fold(0.0_f64, |acc, &v| acc.max(v.abs()))
.max(1e-300);
let converged = fd_h .iter()
.zip(fd_h2.iter()) .all(|(a, b)| (a - b).abs() <= FD_CONVERGED_RTOL * scale); if !converged {
return None;
}
Some((16.0 * &fd_h2 - &fd_h) / 15.0) };
let certifies = |me: &Self,
psi: f64,
eval: &mut dyn FnMut(f64) -> Result<Array2<f64>, String>|
-> bool {
if psi - 2.0 * h <= me.psi_lo || psi + 2.0 * h >= me.psi_hi {
return false;
}
let Some(exact) = exact_dgram(psi, eval) else {
return false;
};
let analytic = me.dgram_dpsi(psi);
let scale = exact
.iter()
.fold(0.0_f64, |acc, &v| acc.max(v.abs()))
.max(1e-300);
analytic
.iter()
.zip(exact.iter())
.all(|(a, b)| (a - b).abs() <= PSI_GRAM_GRAD_SPOT_RTOL * scale)
};
let n = PSI_GRAM_GRAD_SCAN_POINTS;
let mut lo = self.psi_hi;
let mut hi = self.psi_lo;
let mut found = false;
for i in 0..=n {
let psi = self.psi_lo + span * (i as f64) / (n as f64);
if certifies(self, psi, eval_design) {
lo = psi;
found = true;
break;
}
}
for i in (0..=n).rev() {
let psi = self.psi_lo + span * (i as f64) / (n as f64);
if certifies(self, psi, eval_design) {
hi = psi;
break;
}
}
if found && hi > lo {
self.grad_psi_lo = lo;
self.grad_psi_hi = hi;
} else {
self.grad_psi_lo = f64::NAN;
self.grad_psi_hi = f64::NAN;
}
}
fn compute_skip_window(&mut self) {
#[derive(Clone, Eq, PartialEq)]
struct Frame {
rank: usize,
permutation: Vec<usize>,
}
let frame_at = |me: &Self, psi: f64| -> Option<Frame> {
let g = me.gram_at(psi);
if g.iter().any(|v| !v.is_finite()) {
return None;
}
let rrqr = crate::linalg::faer_ndarray::rrqr_from_gram_with_permutation(
&g,
me.n_rows,
crate::linalg::faer_ndarray::default_rrqr_rank_alpha(),
)
.ok()?;
if !rrqr.verdict_margin.is_finite()
|| rrqr.verdict_margin < PSI_GRAM_SKIP_RRQR_MARGIN_MIN
{
return None;
}
Some(Frame {
rank: rrqr.rank,
permutation: rrqr.column_permutation,
})
};
let span = self.psi_hi - self.psi_lo;
if !(span.is_finite() && span > 0.0) {
self.skip_psi_lo = f64::NAN;
self.skip_psi_hi = f64::NAN;
return;
}
let n = PSI_GRAM_SKIP_SCAN_POINTS;
let probes: Vec<(f64, Option<Frame>)> = (0..=n)
.map(|i| {
let psi = self.psi_lo + span * (i as f64) / (n as f64);
(psi, frame_at(self, psi))
})
.collect();
let mut best_start = None;
let mut best_end = None;
let mut best_len = 0usize;
let mut start = 0usize;
while start < probes.len() {
let Some(frame) = probes[start].1.as_ref() else {
start += 1;
continue;
};
let mut end = start;
while end + 1 < probes.len() && probes[end + 1].1.as_ref() == Some(frame) {
end += 1;
}
let len = end - start + 1;
if len > best_len {
best_len = len;
best_start = Some(start);
best_end = Some(end);
}
start = end + 1;
}
if let (Some(start), Some(end)) = (best_start, best_end)
&& best_len >= 3
{
self.skip_psi_lo = probes[start].0;
self.skip_psi_hi = probes[end].0;
} else {
self.skip_psi_lo = f64::NAN;
self.skip_psi_hi = f64::NAN;
}
}
pub fn contains(&self, psi: f64) -> bool {
psi.is_finite() && psi >= self.psi_lo && psi <= self.psi_hi
}
pub fn contains_for_skip(&self, psi: f64) -> bool {
psi.is_finite()
&& self.skip_psi_lo.is_finite()
&& self.skip_psi_hi.is_finite()
&& psi >= self.skip_psi_lo
&& psi <= self.skip_psi_hi
}
pub fn contains_for_gradient(&self, psi: f64) -> bool {
psi.is_finite()
&& self.grad_psi_lo.is_finite()
&& self.grad_psi_hi.is_finite()
&& psi >= self.grad_psi_lo
&& psi <= self.grad_psi_hi
}
fn mapped(&self, psi: f64) -> f64 {
(2.0 * psi - (self.psi_lo + self.psi_hi)) / (self.psi_hi - self.psi_lo)
}
pub fn gram_at(&self, psi: f64) -> Array2<f64> {
let x = self.mapped(psi);
let t = cheb_t(x, self.n_coeff);
let mut out = Array2::<f64>::zeros((self.k, self.k));
for d in 0..self.n_coeff {
for e in 0..self.n_coeff {
out.scaled_add(t[d] * t[e], &self.gram[d * self.n_coeff + e]);
}
}
out
}
pub fn rhs_at(&self, psi: f64) -> Array1<f64> {
let x = self.mapped(psi);
let t = cheb_t(x, self.n_coeff);
let mut out = Array1::<f64>::zeros(self.k);
for (d, td) in t.iter().enumerate() {
out.scaled_add(*td, &self.rhs[d]);
}
out
}
pub fn dgram_dpsi(&self, psi: f64) -> Array2<f64> {
let x = self.mapped(psi);
let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
let t = cheb_t(x, self.n_coeff);
let tp = cheb_t_prime(x, self.n_coeff);
let mut out = Array2::<f64>::zeros((self.k, self.k));
for d in 0..self.n_coeff {
for e in 0..self.n_coeff {
out.scaled_add(
(tp[d] * t[e] + t[d] * tp[e]) * dx_dpsi,
&self.gram[d * self.n_coeff + e],
);
}
}
out
}
pub fn drhs_dpsi(&self, psi: f64) -> Array1<f64> {
let x = self.mapped(psi);
let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
let tp = cheb_t_prime(x, self.n_coeff);
let mut out = Array1::<f64>::zeros(self.k);
for (d, tpd) in tp.iter().enumerate() {
out.scaled_add(*tpd * dx_dpsi, &self.rhs[d]);
}
out
}
pub fn d2gram_dpsi2(&self, psi: f64) -> Array2<f64> {
let x = self.mapped(psi);
let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
let dx_dpsi_sq = dx_dpsi * dx_dpsi;
let t = cheb_t(x, self.n_coeff);
let tp = cheb_t_prime(x, self.n_coeff);
let tpp = cheb_t_double_prime(x, self.n_coeff);
let mut out = Array2::<f64>::zeros((self.k, self.k));
for d in 0..self.n_coeff {
for e in 0..self.n_coeff {
let coef = (tpp[d] * t[e] + 2.0 * tp[d] * tp[e] + t[d] * tpp[e]) * dx_dpsi_sq;
out.scaled_add(coef, &self.gram[d * self.n_coeff + e]);
}
}
out
}
pub fn d2rhs_dpsi2(&self, psi: f64) -> Array1<f64> {
let x = self.mapped(psi);
let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
let dx_dpsi_sq = dx_dpsi * dx_dpsi;
let tpp = cheb_t_double_prime(x, self.n_coeff);
let mut out = Array1::<f64>::zeros(self.k);
for (d, tppd) in tpp.iter().enumerate() {
out.scaled_add(*tppd * dx_dpsi_sq, &self.rhs[d]);
}
out
}
pub fn gaussian_fixed_cache_at(&self, psi: f64) -> crate::pirls::GaussianFixedCache {
crate::pirls::GaussianFixedCache {
xtwx_orig: self.gram_at(psi),
xtwy_orig: self.rhs_at(psi),
centered_weighted_y_sq: self.zt_w_z,
xtwx_sparse_orig: None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn synth_design(psi: f64, n: usize, k: usize) -> Result<Array2<f64>, String> {
let mut x = Array2::<f64>::zeros((n, k));
for i in 0..n {
for j in 0..k {
let r = 0.05 + (i as f64 + 1.0) * (j as f64 + 1.0) / (n as f64 * k as f64) * 3.0;
if j == k - 1 {
x[[i, j]] = r * r * r;
} else {
let s = r * psi.exp();
x[[i, j]] = (1.0 + s) * (-s).exp();
}
}
}
Ok(x)
}
fn exact_gram(psi: f64, n: usize, k: usize, w: &Array1<f64>) -> Array2<f64> {
let design = synth_design(psi, n, k).unwrap();
let mut wd = design.clone();
for (mut row, &wi) in wd.outer_iter_mut().zip(w.iter()) {
row.mapv_inplace(|v| v * wi);
}
design.t().dot(&wd)
}
#[test]
fn psi_gram_tensor_matches_exact_gram_and_fd_gradient() {
let (n, k) = (160usize, 7usize);
let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)));
let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.37).sin()));
let (psi_lo, psi_hi) = (-1.2_f64, 1.0_f64);
let tensor = PsiGramTensor::build(
|psi| synth_design(psi, n, k),
w.view(),
z.view(),
psi_lo,
psi_hi,
)
.expect("analytic synthetic design must certify");
for &psi in &[-1.1, -0.63, 0.0, 0.41, 0.97] {
assert!(tensor.contains(psi));
let exact = exact_gram(psi, n, k, &w);
let fast = tensor.gram_at(psi);
let scale = exact.iter().fold(0.0_f64, |a, &v| a.max(v.abs()));
for (a, b) in fast.iter().zip(exact.iter()) {
assert!(
(a - b).abs() <= 1e-9 * scale,
"gram mismatch at psi={psi}: fast={a}, exact={b}"
);
}
let design = synth_design(psi, n, k).unwrap();
let mut wz = Array1::<f64>::zeros(n);
for ((slot, &wi), &zi) in wz.iter_mut().zip(w.iter()).zip(z.iter()) {
*slot = wi * zi;
}
let exact_rhs = design.t().dot(&wz);
let fast_rhs = tensor.rhs_at(psi);
let rscale = exact_rhs.iter().fold(0.0_f64, |a, &v| a.max(v.abs()));
for (a, b) in fast_rhs.iter().zip(exact_rhs.iter()) {
assert!(
(a - b).abs() <= 1e-9 * rscale,
"rhs mismatch at psi={psi}: fast={a}, exact={b}"
);
}
let h = 1e-5;
let g_plus = exact_gram(psi + h, n, k, &w);
let g_minus = exact_gram(psi - h, n, k, &w);
let dg = tensor.dgram_dpsi(psi);
let dscale = dg.iter().fold(0.0_f64, |a, &v| a.max(v.abs())).max(1e-12);
for ((a, p), m_) in dg.iter().zip(g_plus.iter()).zip(g_minus.iter()) {
let fd = (p - m_) / (2.0 * h);
assert!(
(a - fd).abs() <= 1e-5 * dscale,
"dgram/dpsi mismatch at psi={psi}: analytic={a}, fd={fd}"
);
}
}
assert!(!tensor.contains(psi_hi + 0.5));
assert!(!tensor.contains(psi_lo - 0.5));
for &psi in &[-0.9, 0.2, 0.8] {
let cache = tensor.gaussian_fixed_cache_at(psi);
let design = synth_design(psi, n, k).unwrap();
let mut wd = design.clone();
for (mut row, &wi) in wd.outer_iter_mut().zip(w.iter()) {
row.mapv_inplace(|v| v * wi);
}
let exact_gram = design.t().dot(&wd);
let exact_rhs = wd.t().dot(&z);
let exact_ztwz: f64 = w.iter().zip(z.iter()).map(|(&wi, &zi)| wi * zi * zi).sum();
assert!(
(cache.centered_weighted_y_sq - exact_ztwz).abs()
<= 1e-12 * exact_ztwz.abs().max(1e-300),
"zᵀWz drift: cache={}, exact={exact_ztwz}",
cache.centered_weighted_y_sq
);
let solve = |g: &Array2<f64>, r: &Array1<f64>| -> Array1<f64> {
let mut a = g.clone();
for i in 0..k {
a[[i, i]] += 1.0;
}
let mut aug = Array2::<f64>::zeros((k, k + 1));
aug.slice_mut(ndarray::s![.., ..k]).assign(&a);
aug.slice_mut(ndarray::s![.., k]).assign(r);
for col in 0..k {
let piv = (col..k)
.max_by(|&p, &q| aug[[p, col]].abs().total_cmp(&aug[[q, col]].abs()))
.unwrap();
if piv != col {
for j in 0..=k {
let tmp = aug[[col, j]];
aug[[col, j]] = aug[[piv, j]];
aug[[piv, j]] = tmp;
}
}
let p = aug[[col, col]];
for row in 0..k {
if row == col {
continue;
}
let f = aug[[row, col]] / p;
for j in col..=k {
aug[[row, j]] -= f * aug[[col, j]];
}
}
}
Array1::from_iter((0..k).map(|i| aug[[i, k]] / aug[[i, i]]))
};
let beta_fast = solve(&cache.xtwx_orig, &cache.xtwy_orig);
let beta_exact = solve(&exact_gram, &exact_rhs);
let bscale = beta_exact
.iter()
.fold(0.0_f64, |a, &v| a.max(v.abs()))
.max(1e-300);
for (a, b) in beta_fast.iter().zip(beta_exact.iter()) {
assert!(
(a - b).abs() <= 1e-8 * bscale,
"penalized solve drift at psi={psi}: fast={a}, exact={b}"
);
}
}
}
#[test]
fn psi_gram_tensor_trial_accessors_touch_no_data_rows() {
use std::cell::Cell;
let (n, k) = (256usize, 6usize);
let w = Array1::from_iter((0..n).map(|i| 0.8 + 0.4 * ((i % 4) as f64) / 3.0));
let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.21).sin() + 0.2));
let (psi_lo, psi_hi) = (-1.1_f64, 0.95_f64);
let calls = Cell::new(0usize);
let tensor = PsiGramTensor::build(
|psi| {
calls.set(calls.get() + 1);
synth_design(psi, n, k)
},
w.view(),
z.view(),
psi_lo,
psi_hi,
)
.expect("analytic synthetic design must certify");
let build_calls = calls.get();
assert!(
build_calls > 0,
"build must have streamed the design at least once (sanity)"
);
let m = 64usize;
let lo = psi_lo + 0.05;
let hi = psi_hi - 0.05;
for i in 0..m {
let psi = lo + (hi - lo) * (i as f64) / (m as f64 - 1.0);
assert!(tensor.contains(psi));
let _g = tensor.gram_at(psi);
let _r = tensor.rhs_at(psi);
let _dg = tensor.dgram_dpsi(psi);
let _dr = tensor.drhs_dpsi(psi);
let _d2g = tensor.d2gram_dpsi2(psi);
let _d2r = tensor.d2rhs_dpsi2(psi);
let _cache = tensor.gaussian_fixed_cache_at(psi);
}
assert_eq!(
calls.get(),
build_calls,
"n-independence VIOLATED: a per-trial accessor re-streamed the n×k \
design ({} extra eval_design calls across {m} ψ-trials). The certified \
κ/ψ outer loop must serve value + gradient + Hessian curvature + the \
inner-solver cache from k-space sufficient statistics only, with NO \
per-trial n-row work.",
calls.get() - build_calls
);
}
#[test]
fn psi_gram_tensor_second_derivative_matches_fd_of_gradient() {
let (n, k) = (160usize, 7usize);
let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)));
let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.37).sin()));
let (psi_lo, psi_hi) = (-1.2_f64, 1.0_f64);
let tensor = PsiGramTensor::build(
|psi| synth_design(psi, n, k),
w.view(),
z.view(),
psi_lo,
psi_hi,
)
.expect("analytic synthetic design must certify");
let h = 1e-5;
for &psi in &[-1.0, -0.5, 0.0, 0.4, 0.9] {
let dg_plus = tensor.dgram_dpsi(psi + h);
let dg_minus = tensor.dgram_dpsi(psi - h);
let d2g = tensor.d2gram_dpsi2(psi);
let gscale = d2g.iter().fold(0.0_f64, |a, &v| a.max(v.abs())).max(1e-9);
for ((a, p), m_) in d2g.iter().zip(dg_plus.iter()).zip(dg_minus.iter()) {
let fd = (p - m_) / (2.0 * h);
assert!(
(a - fd).abs() <= 1e-4 * gscale,
"d2gram/dpsi2 mismatch at psi={psi}: analytic={a}, fd={fd}"
);
}
let dr_plus = tensor.drhs_dpsi(psi + h);
let dr_minus = tensor.drhs_dpsi(psi - h);
let d2r = tensor.d2rhs_dpsi2(psi);
let rscale = d2r.iter().fold(0.0_f64, |a, &v| a.max(v.abs())).max(1e-9);
for ((a, p), m_) in d2r.iter().zip(dr_plus.iter()).zip(dr_minus.iter()) {
let fd = (p - m_) / (2.0 * h);
assert!(
(a - fd).abs() <= 1e-4 * rscale,
"d2rhs/dpsi2 mismatch at psi={psi}: analytic={a}, fd={fd}"
);
}
}
}
fn profile_deviance(
g: &Array2<f64>,
r: &Array1<f64>,
c: f64,
s_ridge: &Array2<f64>,
lambda: f64,
k: usize,
) -> (f64, Array1<f64>) {
let mut a = g.clone();
a.scaled_add(lambda, s_ridge);
let mut aug = Array2::<f64>::zeros((k, k + 1));
aug.slice_mut(ndarray::s![.., ..k]).assign(&a);
aug.slice_mut(ndarray::s![.., k]).assign(r);
for col in 0..k {
let piv = (col..k)
.max_by(|&p, &q| aug[[p, col]].abs().total_cmp(&aug[[q, col]].abs()))
.unwrap();
if piv != col {
for j in 0..=k {
let tmp = aug[[col, j]];
aug[[col, j]] = aug[[piv, j]];
aug[[piv, j]] = tmp;
}
}
let p = aug[[col, col]];
for row in 0..k {
if row == col {
continue;
}
let f = aug[[row, col]] / p;
for j in col..=k {
aug[[row, j]] -= f * aug[[col, j]];
}
}
}
let beta = Array1::from_iter((0..k).map(|i| aug[[i, k]] / aug[[i, i]]));
let deviance = c - beta.dot(r);
(deviance, beta)
}
#[test]
fn psi_gram_tensor_bit_tight_hessian_and_kappa_optimum() {
let (n, k) = (200usize, 6usize);
let w = Array1::from_iter((0..n).map(|i| 0.7 + 0.6 * (((i * 7) % 5) as f64) / 4.0));
let z = Array1::from_iter((0..n).map(|i| {
let t = (i as f64) / (n as f64 - 1.0);
(3.0 * t).sin() + 0.3 * (7.0 * t).cos()
}));
let (psi_lo, psi_hi) = (-1.0_f64, 0.9_f64);
let s_ridge = Array2::<f64>::eye(k);
let lambda = 0.5_f64;
let tensor = PsiGramTensor::build(
|psi| synth_design(psi, n, k),
w.view(),
z.view(),
psi_lo,
psi_hi,
)
.expect("analytic synthetic design must certify");
let exact_ztwz: f64 = w.iter().zip(z.iter()).map(|(&wi, &zi)| wi * zi * zi).sum();
let exact_deviance = |psi: f64| -> f64 {
let design = synth_design(psi, n, k).unwrap();
let mut wd = design.clone();
for (mut row, &wi) in wd.outer_iter_mut().zip(w.iter()) {
row.mapv_inplace(|v| v * wi);
}
let g = design.t().dot(&wd);
let r = wd.t().dot(&z);
profile_deviance(&g, &r, exact_ztwz, &s_ridge, lambda, k).0
};
let fast_deviance = |psi: f64| -> f64 {
let g = tensor.gram_at(psi);
let r = tensor.rhs_at(psi);
profile_deviance(&g, &r, exact_ztwz, &s_ridge, lambda, k).0
};
let m = 81usize;
let lo = psi_lo + 0.06;
let hi = psi_hi - 0.06;
let grid: Vec<f64> = (0..m)
.map(|i| lo + (hi - lo) * (i as f64) / (m as f64 - 1.0))
.collect();
let mut worst_value_rel = 0.0_f64;
let (mut fast_argmin, mut fast_min) = (f64::NAN, f64::INFINITY);
let (mut exact_argmin, mut exact_min) = (f64::NAN, f64::INFINITY);
for &psi in &grid {
let de = exact_deviance(psi);
let df = fast_deviance(psi);
let rel = (de - df).abs() / de.abs().max(1e-300);
worst_value_rel = worst_value_rel.max(rel);
if df < fast_min {
fast_min = df;
fast_argmin = psi;
}
if de < exact_min {
exact_min = de;
exact_argmin = psi;
}
}
assert!(
worst_value_rel <= 1e-9,
"penalized profile deviance: fast n-free assembly diverged from exact \
streamed by rel {worst_value_rel:.3e} (> 1e-9) somewhere on the ψ grid"
);
assert_eq!(
fast_argmin.to_bits(),
exact_argmin.to_bits(),
"κ-optimum mismatch: fast argmin ψ={fast_argmin}, exact argmin ψ={exact_argmin} \
— the n-free objective located a different optimum"
);
assert!(
fast_argmin > lo + 1e-9 && fast_argmin < hi - 1e-9,
"κ-optimum landed on the grid edge ψ={fast_argmin}; the fixture must have \
an INTERIOR minimum for this to test the outer search, not a boundary"
);
let opt_rel = (exact_min - fast_min).abs() / exact_min.abs().max(1e-300);
assert!(
opt_rel <= 1e-9,
"κ-optimum objective value drift at ψ={fast_argmin}: fast={fast_min}, \
exact={exact_min}, rel={opt_rel:.3e}"
);
let solve_a = |g: &Array2<f64>, r: &Array1<f64>| -> Array1<f64> {
profile_deviance(g, r, exact_ztwz, &s_ridge, lambda, k).1
};
let analytic_grad = |psi: f64| -> f64 {
let g = tensor.gram_at(psi);
let r = tensor.rhs_at(psi);
let beta = solve_a(&g, &r);
let dg = tensor.dgram_dpsi(psi);
let dr = tensor.drhs_dpsi(psi);
-2.0 * beta.dot(&dr) + beta.dot(&dg.dot(&beta))
};
let h_grad = 1e-6_f64;
let h_curv = 2e-4_f64;
let mut worst_grad_rel = 0.0_f64;
let mut worst_hess_rel = 0.0_f64;
let mut tested = 0usize;
for &psi in &grid {
if !tensor.contains_for_gradient(psi - 2.0 * h_curv)
|| !tensor.contains_for_gradient(psi + 2.0 * h_curv)
{
continue;
}
tested += 1;
let exact_g1 =
(exact_deviance(psi + h_grad) - exact_deviance(psi - h_grad)) / (2.0 * h_grad);
let ag = analytic_grad(psi);
let gscale = exact_g1.abs().max(1e-6);
worst_grad_rel = worst_grad_rel.max((exact_g1 - ag).abs() / gscale);
let analytic_h2 =
(analytic_grad(psi + h_curv) - analytic_grad(psi - h_curv)) / (2.0 * h_curv);
let exact_h2 = (exact_deviance(psi + h_curv) - 2.0 * exact_deviance(psi)
+ exact_deviance(psi - h_curv))
/ (h_curv * h_curv);
let hscale = exact_h2.abs().max(1e-3);
worst_hess_rel = worst_hess_rel.max((analytic_h2 - exact_h2).abs() / hscale);
}
assert!(
tested > 0,
"no ψ on the grid lay inside the certified gradient sub-window"
);
assert!(
worst_grad_rel <= 1e-5,
"ψ-gradient mismatch: the tensor's analytic n-free objective gradient diverged \
from the exact streamed objective by rel {worst_grad_rel:.3e} (> 1e-5)"
);
assert!(
worst_hess_rel <= 1e-3,
"ψ-curvature (Hessian) mismatch: fast n-free objective curvature diverged \
from the exact streamed objective by rel {worst_hess_rel:.3e} (> 1e-3) — \
the outer Newton step would read a different curvature than the truth"
);
eprintln!(
"[psi-gram-bittight] n={n} k={k} grid={m} grad-tested={tested} \
worst |ΔD|/D={worst_value_rel:.2e} worst |ΔD'|/D'={worst_grad_rel:.2e} \
worst |ΔD''|/D''={worst_hess_rel:.2e} κ-opt ψ={fast_argmin:.6} (interior, bit-identical)"
);
}
#[test]
fn psi_gram_tensor_refuses_non_analytic_design() {
let (n, k) = (40usize, 3usize);
let w = Array1::from_elem(n, 1.0);
let z = Array1::from_elem(n, 0.5);
let tensor = PsiGramTensor::build(
|psi| {
let mut x = Array2::<f64>::zeros((n, k));
for i in 0..n {
for j in 0..k {
x[[i, j]] = psi.abs() + (i + j) as f64 / (n + k) as f64;
}
}
Ok(x)
},
w.view(),
z.view(),
-1.0,
1.0,
);
assert!(
tensor.is_none(),
"kinked design must fail the tail-decay/spot-check certificates"
);
}
}