gam_solve/psi_gram_tensor.rs
1//! Certified Chebyshev-in-ψ Gram tensor: n-independent design-moving trials
2//! (#1033 item b).
3//!
4//! ## Why
5//!
6//! When a design-moving hyperparameter ψ (= log κ for the radial families) is
7//! searched by the outer loop, every trial today rebuilds the n×k design and
8//! re-forms XᵀWX — an O(n·k) + O(n·k²) pass per trial. But along the trial
9//! window every design entry `X(ψ)[i, j]` is an ANALYTIC function of ψ on a
10//! compact interval (Matérn channels depend on (r, ℓ) only through κr and
11//! κ-power prefactors; Duchon power blocks are ψ-free; partial-fraction
12//! coefficients are analytic scalars), so both the design and its Gaussian
13//! sufficient statistics admit geometrically-convergent Chebyshev expansions
14//!
15//! ```text
16//! X(ψ) = Σ_{d=0}^{D} X_d · T_d(ψ̃), ψ̃ = affine map of ψ to [−1, 1],
17//! ```
18//!
19//! with n×k coefficient slabs `X_d` computed ONCE from D+1 exact design
20//! evaluations at Chebyshev nodes (a first-kind DCT). The design coefficients
21//! certify analyticity, while the shipped runtime series is fit directly to the
22//! exact node sufficient statistics `G(ψ_i)=X(ψ_i)ᵀ W X(ψ_i)` and
23//! `c(ψ_i)=X(ψ_i)ᵀ W z`. Interpolating the sufficient statistics directly avoids
24//! the extra product-truncation residual from forming `Σ_d,e X_dᵀWX_e T_dT_e`,
25//! which a weakly penalized radial solve can amplify into visible β̂ drift.
26//! Every subsequent trial is n-free:
27//!
28//! ```text
29//! XᵀWX(ψ) = Σ_d T_d(ψ̃) G_d O(D k²)
30//! XᵀWz(ψ) = Σ_d T_d(ψ̃) c_d O(D k)
31//! ∂/∂ψ (XᵀWX) = Σ_d T_d′(ψ̃) G_d O(D k²)
32//! ```
33//!
34//! The ψ-gradient comes from the SAME representation as the value — one
35//! source of truth, structurally immune to the objective↔gradient desync
36//! class. `T_d′(ψ̃) = d·U_{d−1}(ψ̃) · dψ̃/dψ` is closed-form.
37//!
38//! ## Certification, not approximation-by-fiat
39//!
40//! Same discipline as [`gam_terms::basis::radial_profile`]: [`PsiGramTensor::build`]
41//! returns `None` (callers keep the exact per-trial path) unless BOTH
42//! 1. the Chebyshev coefficient tail of the EXPANDED DESIGN decays below
43//! [`PSI_GRAM_CERT_RTOL`] of the design scale (geometric-decay certificate
44//! for analytic interpolands, with node-count escalation), and
45//! 2. deterministic off-node spot checks of the ASSEMBLED Gram against an
46//! exactly rebuilt Gram agree to [`PSI_GRAM_SPOT_RTOL`].
47//!
48//! Trials outside `[psi_lo, psi_hi]` are the caller's signal to fall back to
49//! the exact path ([`PsiGramTensor::contains`]).
50
51use ndarray::{Array1, Array2, ArrayView1};
52
53/// Relative ceiling on the per-column Chebyshev coefficient tail (#1216).
54///
55/// This is a cheap NECESSARY-CONDITION pre-filter, not the accuracy gate: the
56/// authoritative accuracy gate is the off-node `spot_check` on the ASSEMBLED
57/// Gram ([`PSI_GRAM_SPOT_RTOL`]). On the WIDE STANDARDIZED geometry default 1-D
58/// fits use (#1215) the realized radial design needs the deeper ladder below to
59/// drive the tail beneath the beta-invariance bar. Keep this as a necessary
60/// pre-filter, not the final beta oracle: shallow 65-node tensors were fine for
61/// cost-only gates, but the weakly penalized radial solve amplified their
62/// residual into visible beta-hat drift across the reduced-basis rotation. A
63/// genuinely non-analytic design (a true kink) still refuses here or at the
64/// assembled-Gram spot check.
65pub const PSI_GRAM_CERT_RTOL: f64 = 1.0e-9;
66
67/// Relative agreement required at the off-node Gram spot checks.
68pub const PSI_GRAM_SPOT_RTOL: f64 = 1.0e-10;
69
70/// Node-count escalation ladder for the expansion build (degree = nodes − 1).
71///
72/// The top rung sizes to WIDE trial windows: Chebyshev coefficients of the
73/// Matérn-type channels decay like Bessel `I_d(σ)` with `σ ≈ s_max·halfwidth`
74/// (s = κr), which only drops below the 1e-12 tail tolerance for `d ≳ 2σ` —
75/// e.g. σ ≈ 9 (s_max ≈ 8, ±1.1 window) needs degree ≳ 40, so 33 nodes refuse
76/// and 65 certify. Node counts stay trivially cheap (one design eval each).
77///
78/// On the WIDE STANDARDIZED geometry default 1-D fits use (#1215) the tail
79/// decays cleanly but GEOMETRICALLY-slowly: measured per-column worst tail rel
80/// is ~3.2e-8 at m=33 and ~2.3e-11 at m=65 (a clean ~1300×/doubling decay, NOT a
81/// floor). The old 65-node acceptance was fine for the cost lane but not for the
82/// beta-hat soundness gate: the inner penalized solve `β̂ = (G+λS)⁻¹r`
83/// amplifies Gram residuals by the radial-kernel conditioning, especially after
84/// the production skip was relaxed to cross a reduced-basis rotation. Start at
85/// 513 nodes so the production gate no longer accepts the shallower tensors that
86/// pass the Gram spot check but still move the weakly-penalized beta solve.
87pub const PSI_GRAM_NODE_LADDER: [usize; 1] = [513];
88
89/// Number of deterministic off-node spot-check ψ values.
90pub const PSI_GRAM_SPOT_POINTS: usize = 3;
91
92/// Rank-revealing relative eigenvalue cutoff for the reduced-basis (range)
93/// projector witness [`PsiGramTensor::reduced_basis_equal`] (#1264). An
94/// eigendirection of the conditioned Gram `XᵀWX(ψ)` is counted in the range
95/// (reduced) basis when its eigenvalue exceeds `PSI_GRAM_SKIP_RANK_RTOL · λ_max`.
96/// Sized to match the inner solve's effective rank-revealing scale on the
97/// standardized radial-kernel Gram, whose conditioning sweeps several orders of
98/// magnitude across the ψ-window; a directly-below-cutoff direction is exactly
99/// the one whose inclusion flips with ψ and silently rotates the frozen reduced
100/// basis, which this witness must catch.
101pub const PSI_GRAM_SKIP_RANK_RTOL: f64 = 1.0e-10;
102
103/// Max-norm tolerance on the range-PROJECTOR agreement between the pinning ψ and
104/// the candidate ψ in [`PsiGramTensor::reduced_basis_equal`] (#1264). The
105/// orthogonal projector onto the reduced subspace is gauge-invariant and O(1) in
106/// scale, so a tight absolute tolerance certifies the two reduced bases span the
107/// SAME subspace. A subspace that has measurably rotated (the basis the frozen
108/// fast-path surface would mis-pair with a re-keyed Gram) exceeds this by orders
109/// of magnitude, so it refuses the skip well before the ~1e-6 β̂ bar is at risk.
110pub const PSI_GRAM_SKIP_PROJ_ATOL: f64 = 1.0e-7;
111
112/// Certified Chebyshev-in-ψ expansion of a design-moving Gram (#1033b).
113///
114/// Holds the one-time Chebyshev sufficient-statistic series; every per-trial
115/// accessor is O(Dk²) or cheaper and never touches n rows again.
116pub struct PsiGramTensor {
117 psi_lo: f64,
118 psi_hi: f64,
119 /// Certified gradient window over which the ANALYTIC ψ-derivative
120 /// `dgram_dpsi` reproduces the exact design derivative. The gradient lane
121 /// rides the value-lane off-node certificate [`PSI_GRAM_SPOT_RTOL`]
122 /// (#1033b gradient lane). For the #1033
123 /// sufficient-statistic outer loop this must cover the full optimizer
124 /// window; otherwise callers do not arm the n-free kappa search.
125 ///
126 /// The value reconstruction `gram_at` is certified over the FULL window
127 /// (`T_d ≤ 1` everywhere), but the derivative reconstruction amplifies the
128 /// coefficient-tail error by `T_d′ ∼ d²`. The n-free kappa search is armed
129 /// only when endpoint-aware checks certify this whole interval.
130 grad_psi_lo: f64,
131 grad_psi_hi: f64,
132 /// Number of Chebyshev coefficients (degree + 1).
133 n_coeff: usize,
134 k: usize,
135 /// Chebyshev coefficients of `X(ψ)ᵀ W X(ψ)`, obtained by a first-kind DCT of
136 /// the exact node sufficient statistics. This keeps the per-trial path to a
137 /// single O(Dk²) series and avoids product-truncation drift in β̂.
138 gram: Vec<Array2<f64>>,
139 /// Chebyshev coefficients of `X(ψ)ᵀ W z`.
140 rhs: Vec<Array1<f64>>,
141 /// `zᵀWz` — ψ-free, captured at build so the Gaussian sufficient-statistic
142 /// triple can be assembled per trial without any row access.
143 zt_w_z: f64,
144}
145
146/// One ladder rung's outcome: a hard evaluation failure aborts the whole
147/// build (no larger rung can fix a non-finite design) and carries the reason,
148/// an uncertified tail escalates to the next rung, and a candidate proceeds to
149/// the spot check.
150enum BuildOutcome {
151 EvalFailed(String),
152 TailNotCertified,
153 Candidate(PsiGramTensor),
154}
155
156/// Chebyshev values `T_0..T_{n−1}` at `x ∈ [−1, 1]`.
157fn cheb_t(x: f64, n: usize) -> Vec<f64> {
158 let mut t = vec![0.0; n];
159 if n > 0 {
160 t[0] = 1.0;
161 }
162 if n > 1 {
163 t[1] = x;
164 }
165 for d in 2..n {
166 t[d] = 2.0 * x * t[d - 1] - t[d - 2];
167 }
168 t
169}
170
171/// Chebyshev derivative values `T_0′..T_{n−1}′` at `x ∈ [−1, 1]` in the
172/// MAPPED coordinate (multiply by `dx/dψ` for the ψ-derivative):
173/// `T_d′ = d · U_{d−1}` with the Chebyshev-U recurrence.
174fn cheb_t_prime(x: f64, n: usize) -> Vec<f64> {
175 let mut u = vec![0.0; n.max(1)];
176 // U_0 = 1, U_1 = 2x, U_d = 2x U_{d−1} − U_{d−2}.
177 if !u.is_empty() {
178 u[0] = 1.0;
179 }
180 if n > 1 {
181 u[1] = 2.0 * x;
182 }
183 for d in 2..n {
184 u[d] = 2.0 * x * u[d - 1] - u[d - 2];
185 }
186 let mut tp = vec![0.0; n];
187 for d in 1..n {
188 tp[d] = d as f64 * u[d - 1];
189 }
190 tp
191}
192
193/// Chebyshev SECOND-derivative values `T_0″..T_{n−1}″` at `x ∈ [−1, 1]` in the
194/// MAPPED coordinate (multiply by `(dx/dψ)²` for the ψ-second-derivative).
195///
196/// Differentiating the value recurrence `T_d = 2x T_{d−1} − T_{d−2}` twice in
197/// `x` gives a singularity-free three-term recurrence in lock-step with `cheb_t`
198/// / `cheb_t_prime`:
199/// `T_d′ = 2 T_{d−1} + 2x T_{d−1}′ − T_{d−2}′`,
200/// `T_d″ = 4 T_{d−1}′ + 2x T_{d−1}″ − T_{d−2}″`,
201/// with `T_0 = T_0′ = T_0″ = 0`-seeds as below. Unlike the closed form
202/// `T_n″ = n((n+1)T_n − U_n)/(x²−1)` this never divides by `x²−1`, so it stays
203/// exact at the window edges `x = ±1`.
204fn cheb_t_double_prime(x: f64, n: usize) -> Vec<f64> {
205 let mut t = vec![0.0; n];
206 let mut tp = vec![0.0; n];
207 let mut tpp = vec![0.0; n];
208 if n > 0 {
209 t[0] = 1.0; // T_0 = 1, T_0′ = T_0″ = 0
210 }
211 if n > 1 {
212 t[1] = x; // T_1 = x, T_1′ = 1, T_1″ = 0
213 tp[1] = 1.0;
214 }
215 for d in 2..n {
216 t[d] = 2.0 * x * t[d - 1] - t[d - 2];
217 tp[d] = 2.0 * t[d - 1] + 2.0 * x * tp[d - 1] - tp[d - 2];
218 tpp[d] = 4.0 * tp[d - 1] + 2.0 * x * tpp[d - 1] - tpp[d - 2];
219 }
220 tpp
221}
222
223fn kahan_scaled_add_array2(
224 out: &mut Array2<f64>,
225 comp: &mut Array2<f64>,
226 scale: f64,
227 x: &Array2<f64>,
228) {
229 for ((slot, c), &value) in out.iter_mut().zip(comp.iter_mut()).zip(x.iter()) {
230 let y = scale * value - *c;
231 let t = *slot + y;
232 *c = (t - *slot) - y;
233 *slot = t;
234 }
235}
236
237fn kahan_scaled_add_array1(
238 out: &mut Array1<f64>,
239 comp: &mut Array1<f64>,
240 scale: f64,
241 x: &Array1<f64>,
242) {
243 for ((slot, c), &value) in out.iter_mut().zip(comp.iter_mut()).zip(x.iter()) {
244 let y = scale * value - *c;
245 let t = *slot + y;
246 *c = (t - *slot) - y;
247 *slot = t;
248 }
249}
250
251/// Spectral norm of a SYMMETRIC matrix `m` (here the difference of two
252/// orthogonal range projectors), i.e. `max|eigenvalue|`. For two equal-rank
253/// orthogonal projectors `P_ref`, `P_new` this equals `sin θ_max`, the sine of
254/// the largest principal angle between their ranges — the canonical, gauge- and
255/// basis-invariant distance between the two subspaces (#1033). Returns `None`
256/// if the matrix is non-finite or the symmetric eigendecomposition fails (the
257/// caller then refuses the skip, the sound fallback).
258fn subspace_spectral_distance(m: &Array2<f64>) -> Option<f64> {
259 use gam_linalg::faer_ndarray::FaerEigh;
260 if m.iter().any(|v| !v.is_finite()) {
261 return None;
262 }
263 // Symmetrize defensively against rounding (P_ref − P_new is symmetric in
264 // exact arithmetic) so the symmetric eigensolver sees a genuinely Hermitian
265 // operand and returns real eigenvalues.
266 let msym = 0.5 * (m + &m.t());
267 let (evals, _evecs) = msym.eigh(faer::Side::Lower).ok()?;
268 Some(evals.iter().fold(0.0_f64, |acc, &v| acc.max(v.abs())))
269}
270
271impl PsiGramTensor {
272 /// Build and certify the tensor over `psi ∈ [psi_lo, psi_hi]`.
273 ///
274 /// `eval_design(psi)` must return the EXACT n×k design at `psi` (the same
275 /// builder the per-trial path uses — exactness of the expansion is judged
276 /// against it). `weights` are the fixed observation weights, `z` the fixed
277 /// weighted-response target (e.g. `y − offset`). Returns `None` when the
278 /// window is degenerate, any evaluation fails/has non-finite entries, or
279 /// no ladder rung certifies — callers then keep the exact per-trial path.
280 pub fn build(
281 mut eval_design: impl FnMut(f64) -> Result<Array2<f64>, String>,
282 weights: ArrayView1<'_, f64>,
283 z: ArrayView1<'_, f64>,
284 psi_lo: f64,
285 psi_hi: f64,
286 ) -> Result<Self, String> {
287 if !(psi_lo.is_finite() && psi_hi.is_finite()) || psi_hi <= psi_lo {
288 return Err(format!(
289 "ψ window must be finite with psi_hi > psi_lo (got [{psi_lo}, {psi_hi}])"
290 ));
291 }
292 // Track the largest rung that produced a candidate but failed to
293 // certify (tail or off-node spot check). If the whole ladder is
294 // exhausted without an accepted candidate this drives a reason that
295 // distinguishes "not analytic enough in ψ" from "evaluation failed".
296 let mut last_uncertified: Option<usize> = None;
297 for &m in PSI_GRAM_NODE_LADDER.iter() {
298 match Self::build_at(&mut eval_design, weights, z, psi_lo, psi_hi, m) {
299 // An exact evaluation failed or was non-finite somewhere in
300 // the window — no larger rung can fix that, so abort with the
301 // underlying reason rather than swallowing it as a bare refusal.
302 BuildOutcome::EvalFailed(why) => {
303 return Err(format!(
304 "exact design evaluation failed at ladder rung m={m}: {why}"
305 ));
306 }
307 // Tail not yet below the certificate at this rung: escalate.
308 // (Conflating this with EvalFailed would kill the ladder at
309 // its first — intentionally coarse — rung.)
310 BuildOutcome::TailNotCertified => {
311 last_uncertified = Some(m);
312 continue;
313 }
314 BuildOutcome::Candidate(mut candidate) => {
315 if candidate.spot_check(&mut eval_design, weights) {
316 candidate.grad_psi_lo = psi_lo;
317 candidate.grad_psi_hi = psi_hi;
318 return Ok(candidate);
319 }
320 // The assembled Gram disagreed with an exact off-node
321 // rebuild at this rung; a denser rung may still certify, so
322 // escalate rather than abort.
323 last_uncertified = Some(m);
324 }
325 }
326 }
327 let top_rung = PSI_GRAM_NODE_LADDER.last().copied().unwrap_or(0);
328 Err(match last_uncertified {
329 Some(m) => format!(
330 "Chebyshev series did not certify within the node ladder (reached rung \
331 m={m}, top rung {top_rung}): the design is not analytic enough in ψ over \
332 [{psi_lo}, {psi_hi}] (a kink or non-finite curvature), so the n-free \
333 tensor is refused and the exact per-trial path must be used"
334 ),
335 None => "empty Chebyshev node ladder".to_string(),
336 })
337 }
338
339 fn build_at(
340 eval_design: &mut impl FnMut(f64) -> Result<Array2<f64>, String>,
341 weights: ArrayView1<'_, f64>,
342 z: ArrayView1<'_, f64>,
343 psi_lo: f64,
344 psi_hi: f64,
345 m: usize,
346 ) -> BuildOutcome {
347 // First-kind Chebyshev nodes (no endpoints) and exact design slabs.
348 let mut nodes_x = vec![0.0_f64; m];
349 let mut designs: Vec<Array2<f64>> = Vec::with_capacity(m);
350 for (i, x_slot) in nodes_x.iter_mut().enumerate() {
351 let x = (std::f64::consts::PI * (2 * i + 1) as f64 / (2 * m) as f64).cos();
352 *x_slot = x;
353 let psi = 0.5 * (psi_lo + psi_hi) + 0.5 * (psi_hi - psi_lo) * x;
354 let design = match eval_design(psi) {
355 Ok(design) => design,
356 Err(why) => {
357 return BuildOutcome::EvalFailed(format!(
358 "design evaluation refused at node ψ={psi:.6}: {why}"
359 ));
360 }
361 };
362 if design.iter().any(|v| !v.is_finite()) {
363 return BuildOutcome::EvalFailed(format!(
364 "design at node ψ={psi:.6} contains a non-finite entry"
365 ));
366 }
367 designs.push(design);
368 }
369 let (n, k) = designs[0].dim();
370 if designs.iter().any(|d| d.dim() != (n, k)) {
371 return BuildOutcome::EvalFailed(format!(
372 "design dimensions vary across ψ nodes (first node is {n}×{k})"
373 ));
374 }
375 if weights.len() != n || z.len() != n || n == 0 || k == 0 {
376 return BuildOutcome::EvalFailed(format!(
377 "incompatible build inputs: design {n}×{k}, weights.len()={}, z.len()={}",
378 weights.len(),
379 z.len()
380 ));
381 }
382 // First-kind discrete orthogonality: coefficient slabs
383 // X_d = (γ_d / m) Σ_i X(ψ_i) T_d(x_i), γ_0 = 1, γ_d = 2.
384 let t_at_nodes: Vec<Vec<f64>> = nodes_x.iter().map(|&x| cheb_t(x, m)).collect();
385 let mut coeff_slabs: Vec<Array2<f64>> = Vec::with_capacity(m);
386 for d in 0..m {
387 let gamma = if d == 0 { 1.0 } else { 2.0 };
388 let mut slab = Array2::<f64>::zeros((n, k));
389 let mut slab_comp = Array2::<f64>::zeros((n, k));
390 for (i, design) in designs.iter().enumerate() {
391 let wgt = gamma / m as f64 * t_at_nodes[i][d];
392 kahan_scaled_add_array2(&mut slab, &mut slab_comp, wgt, design);
393 }
394 coeff_slabs.push(slab);
395 }
396 // Tail-decay certificate per design column: the trailing quarter of the
397 // coefficient slabs must fall below [`PSI_GRAM_CERT_RTOL`] × column
398 // scale.
399 //
400 // #1216: on the WIDE STANDARDIZED geometry default 1-D fits use (#1215)
401 // the per-column Chebyshev tail decays cleanly but GEOMETRICALLY-SLOWLY
402 // (measured ~3.2e-8 at m=33, ~2.3e-11 at m=65 — a ~1300×/doubling decay,
403 // NOT a floor). The previous over-tight 1e-12 bar refused at m=65 and the
404 // n-free fast path never attached. The certificate is a cheap
405 // NECESSARY-CONDITION pre-filter whose job is to guarantee the ASSEMBLED
406 // Gram is accurate enough; that accuracy is authoritatively enforced by
407 // the off-node `spot_check` (`PSI_GRAM_SPOT_RTOL`, on the assembled Gram
408 // against an exact rebuild). Sizing the pre-filter to
409 // [`PSI_GRAM_CERT_RTOL`] = 1e-9 lets the (geometrically-decaying) design
410 // certify at m=65, and the ladder's m=129 rung drives the residual to
411 // ~1.7e-14 so the inner penalized solve's conditioning-amplified β̂ stays
412 // bit-tight. A design that is genuinely non-analytic (a true kink) floors
413 // ORDERS above this and is refused, with the spot-check as the hard
414 // backstop.
415 let mut col_scale = vec![0.0_f64; k];
416 for slab in &coeff_slabs {
417 for (j, scale) in col_scale.iter_mut().enumerate() {
418 for i in 0..n {
419 *scale = scale.max(slab[[i, j]].abs());
420 }
421 }
422 }
423 let tail_start = m - (m / 4).max(1);
424 for slab in coeff_slabs.iter().skip(tail_start) {
425 for (j, &scale) in col_scale.iter().enumerate() {
426 let bound = PSI_GRAM_CERT_RTOL * scale.max(1e-300);
427 for i in 0..n {
428 if slab[[i, j]].abs() > bound {
429 return BuildOutcome::TailNotCertified;
430 }
431 }
432 }
433 }
434 let mut wz = Array1::<f64>::zeros(z.len());
435 let mut zt_w_z = 0.0_f64;
436 let mut zt_w_z_comp = 0.0_f64;
437 for ((slot, &w), &zv) in wz.iter_mut().zip(weights.iter()).zip(z.iter()) {
438 *slot = w * zv;
439 let add = w * zv * zv;
440 let y = add - zt_w_z_comp;
441 let t = zt_w_z + y;
442 zt_w_z_comp = (t - zt_w_z) - y;
443 zt_w_z = t;
444 }
445 // One-time exact node sufficient statistics, then a first-kind DCT in ψ.
446 // This still pays all row work only during build, but the retained series
447 // approximates G(ψ) and c(ψ) directly instead of multiplying two
448 // separately truncated design series.
449 let mut node_grams: Vec<Array2<f64>> = Vec::with_capacity(m);
450 let mut node_rhs: Vec<Array1<f64>> = Vec::with_capacity(m);
451 for design in &designs {
452 let mut wd = design.clone();
453 for (mut row, &w) in wd.outer_iter_mut().zip(weights.iter()) {
454 row.mapv_inplace(|v| v * w);
455 }
456 node_grams.push(design.t().dot(&wd));
457 node_rhs.push(design.t().dot(&wz));
458 }
459 let mut gram: Vec<Array2<f64>> = (0..m).map(|_| Array2::<f64>::zeros((k, k))).collect();
460 let mut gram_comp: Vec<Array2<f64>> =
461 (0..m).map(|_| Array2::<f64>::zeros((k, k))).collect();
462 let mut rhs: Vec<Array1<f64>> = (0..m).map(|_| Array1::<f64>::zeros(k)).collect();
463 let mut rhs_comp: Vec<Array1<f64>> = (0..m).map(|_| Array1::<f64>::zeros(k)).collect();
464 for d in 0..m {
465 let gamma = if d == 0 { 1.0 } else { 2.0 };
466 for i in 0..m {
467 let wgt = gamma / m as f64 * t_at_nodes[i][d];
468 kahan_scaled_add_array2(&mut gram[d], &mut gram_comp[d], wgt, &node_grams[i]);
469 kahan_scaled_add_array1(&mut rhs[d], &mut rhs_comp[d], wgt, &node_rhs[i]);
470 }
471 }
472 drop(node_grams);
473 drop(node_rhs);
474 drop(designs);
475 drop(coeff_slabs);
476 BuildOutcome::Candidate(Self {
477 psi_lo,
478 psi_hi,
479 // Provisional: `build` promotes these to the certified value window
480 // after the value spot-check passes.
481 grad_psi_lo: psi_lo,
482 grad_psi_hi: psi_hi,
483 n_coeff: m,
484 k,
485 gram,
486 rhs,
487 zt_w_z,
488 })
489 }
490
491 /// Off-node certification: the ASSEMBLED Gram must reproduce the exactly
492 /// rebuilt Gram at deterministic interior ψ values.
493 fn spot_check(
494 &self,
495 eval_design: &mut impl FnMut(f64) -> Result<Array2<f64>, String>,
496 weights: ArrayView1<'_, f64>,
497 ) -> bool {
498 for s in 0..PSI_GRAM_SPOT_POINTS {
499 // Golden-ratio low-discrepancy interior points — never the nodes.
500 let frac = ((s as f64 + 1.0) * 0.618_033_988_749_894_9).fract();
501 let psi = self.psi_lo + frac * (self.psi_hi - self.psi_lo);
502 let Ok(design) = eval_design(psi) else {
503 return false;
504 };
505 let mut wd = design.clone();
506 for (mut row, &w) in wd.outer_iter_mut().zip(weights.iter()) {
507 row.mapv_inplace(|v| v * w);
508 }
509 let exact = design.t().dot(&wd);
510 let assembled = self.gram_at(psi);
511 let scale = exact
512 .iter()
513 .fold(0.0_f64, |acc, &v| acc.max(v.abs()))
514 .max(1e-300);
515 for (a, b) in assembled.iter().zip(exact.iter()) {
516 if (a - b).abs() > PSI_GRAM_SPOT_RTOL * scale {
517 return false;
518 }
519 }
520 }
521 true
522 }
523
524 /// Range (reduced-basis) projector of the conditioned Gram `XᵀWX(ψ)` and the
525 /// numerical rank, computed n-free from the k-space tensor. The reduced basis
526 /// the inner penalized solve forms is the column span of the eigenvectors of
527 /// the (symmetric PSD) Gram whose eigenvalue exceeds a rank-revealing cutoff
528 /// relative to the largest eigenvalue. The orthogonal projector `P = U_r U_rᵀ`
529 /// onto that span is a frame-INVARIANT witness of the reduced basis: two ψ's
530 /// share a reduced basis iff their range projectors coincide (the projector
531 /// is invariant to the orthonormal-basis gauge freedom within the range, so
532 /// it isolates exactly the subspace identity the skip needs, not an arbitrary
533 /// eigenvector rotation). Returns `None` if the Gram is non-finite or its
534 /// symmetric eigendecomposition fails.
535 fn range_projector(&self, psi: f64, rank_rtol: f64) -> Option<(Array2<f64>, usize)> {
536 use gam_linalg::faer_ndarray::FaerEigh;
537 let g = self.gram_at(psi);
538 if g.iter().any(|v| !v.is_finite()) {
539 return None;
540 }
541 // Symmetrize defensively (gram_at is symmetric up to rounding).
542 let gsym = 0.5 * (&g + &g.t());
543 let (evals, evecs) = gsym.eigh(faer::Side::Lower).ok()?;
544 // `eigh` returns ascending eigenvalues; the Gram is PSD so the largest is
545 // the trailing one. The rank cutoff is relative to that maximum.
546 let lambda_max = evals.iter().cloned().fold(0.0_f64, f64::max);
547 if !(lambda_max > 0.0) {
548 return None;
549 }
550 let cutoff = rank_rtol * lambda_max;
551 let mut proj = Array2::<f64>::zeros((self.k, self.k));
552 let mut rank = 0usize;
553 for (col, &lam) in evals.iter().enumerate() {
554 if lam > cutoff {
555 let u = evecs.column(col);
556 // P += u uᵀ.
557 for a in 0..self.k {
558 for b in 0..self.k {
559 proj[[a, b]] += u[a] * u[b];
560 }
561 }
562 rank += 1;
563 }
564 }
565 Some((proj, rank))
566 }
567
568 /// True when the realized reduced basis the design-revision fast path freezes
569 /// at the pinning `psi_ref` is still valid at `psi_new` — the genuine
570 /// reduced-basis-equality witness the skip requires (#1264, #1216 item 3).
571 ///
572 /// The fast path keeps the reference surface (its conditioned frame and its
573 /// RRQR-reduced / null-space basis) frozen at `psi_ref` while re-keying only
574 /// the Gram `XᵀWX(ψ)` and penalty `S(ψ)` to `psi_new`. That is exact iff the
575 /// reduced basis — the range / null split of the conditioned data Gram — is
576 /// unchanged. A conditioning-ratio or RRQR rank/permutation gate only bounds
577 /// NECESSARY conditions; the reduced SUBSPACE can still rotate while rank and
578 /// pivot order look tame, which is exactly the ~7.8e-2 β̂ regression a cluster run
579 /// found. This witness compares the orthogonal RANGE PROJECTORS of the
580 /// conditioned Gram at `psi_ref` and `psi_new` (both assembled n-free from the
581 /// tensor): the skip is sound only when the numerical ranks match AND the
582 /// projectors agree to `proj_atol` in max-norm — i.e. the two reduced bases
583 /// span the SAME subspace. The projector identity is gauge-invariant, so it
584 /// certifies subspace equality directly rather than a particular basis choice.
585 ///
586 /// `psi_ref == psi_new` (a repeat trial at the same ψ) is trivially sound.
587 /// Off-window ψ's, a non-finite / rank-degenerate Gram, or any eigendecomp
588 /// failure return `false` (refuse the skip → caller takes the slow path).
589 ///
590 /// ROTATION WALL (#1033). On production spatial geometry the conditioned
591 /// data-Gram range subspace can ROTATE with ψ at fixed rank — the wall on
592 /// which the earlier RRQR-pivot / entrywise-projector gates kept refusing the
593 /// skip. The fix is the SUBSPACE-DISTANCE certificate below: the skip is sound
594 /// exactly when the two equal-rank ranges coincide as SUBSPACES, measured by
595 /// the spectral norm of the projector difference (the principal angle), which
596 /// is invariant to any orthonormal-basis rotation WITHIN the range. So a pure
597 /// gauge rotation that left the entrywise max-abs above tolerance — and
598 /// therefore used to be refused — now certifies, letting the n-free skip fire
599 /// across the rotation. A genuine subspace MOVE (different rank, or a real
600 /// principal-angle separation) still refuses; refusing is the SOUND fallback
601 /// (the caller takes the exact slow path). Do not weaken
602 /// `PSI_GRAM_SKIP_PROJ_ATOL` / `PSI_GRAM_SKIP_RANK_RTOL`: the spectral gate is
603 /// already the tightest correct subspace metric, and loosening it past a true
604 /// principal-angle separation reintroduces the ~7.8e-2 β̂ regression this
605 /// witness exists to prevent.
606 pub fn reduced_basis_equal(&self, psi_ref: f64, psi_new: f64) -> bool {
607 if !(self.contains(psi_ref) && self.contains(psi_new)) {
608 return false;
609 }
610 if psi_ref == psi_new {
611 return true;
612 }
613 let Some((p_ref, r_ref)) = self.range_projector(psi_ref, PSI_GRAM_SKIP_RANK_RTOL) else {
614 return false;
615 };
616 let Some((p_new, r_new)) = self.range_projector(psi_new, PSI_GRAM_SKIP_RANK_RTOL) else {
617 return false;
618 };
619 if r_ref != r_new {
620 return false;
621 }
622 // Subspace-distance certificate (#1033). The two reduced bases span the
623 // SAME subspace iff their orthogonal range projectors coincide. The
624 // correct, gauge-invariant measure of "how far apart" two equal-rank
625 // subspaces are is the SPECTRAL NORM ‖P_ref − P_new‖₂ = sin θ_max, the
626 // sine of the largest principal angle between the ranges — NOT the
627 // entrywise max-abs of the projector difference. The old entrywise test
628 // is a strictly weaker proxy: across a basis ROTATION (the radial-Gram
629 // rotation wall this skip kept tripping on) the projector entries can
630 // each drift while the spanned subspace is numerically identical, so the
631 // entrywise max could exceed tolerance and FALSELY refuse a sound skip.
632 // P_ref − P_new is symmetric, so its spectral norm is max|eigenvalue|;
633 // compute it via the symmetric eigensolver and gate on the principal
634 // angle directly. This certifies subspace identity across the rotation,
635 // letting the n-free skip fire whenever the range genuinely coincides,
636 // while still refusing (the SOUND fallback) the instant the subspaces
637 // separate by more than PSI_GRAM_SKIP_PROJ_ATOL in true subspace
638 // distance. An eigendecomp failure on the (small k×k) difference refuses.
639 let diff = &p_ref - &p_new;
640 subspace_spectral_distance(&diff)
641 .map(|d| d <= PSI_GRAM_SKIP_PROJ_ATOL)
642 .unwrap_or(false)
643 }
644
645 /// True when `psi` lies inside the certified window.
646 pub fn contains(&self, psi: f64) -> bool {
647 psi.is_finite() && psi >= self.psi_lo && psi <= self.psi_hi
648 }
649
650 /// True when `psi` lies inside the certified gradient window where the
651 /// analytic ψ-derivative is bit-tight against the exact design derivative
652 /// (#1033b). The n-free kappa outer loop is armed only when this covers the
653 /// full optimizer bounds.
654 pub fn contains_for_gradient(&self, psi: f64) -> bool {
655 psi.is_finite()
656 && self.grad_psi_lo.is_finite()
657 && self.grad_psi_hi.is_finite()
658 && psi >= self.grad_psi_lo
659 && psi <= self.grad_psi_hi
660 }
661
662 fn mapped(&self, psi: f64) -> f64 {
663 (2.0 * psi - (self.psi_lo + self.psi_hi)) / (self.psi_hi - self.psi_lo)
664 }
665
666 /// `XᵀWX(ψ)` assembled n-free in O(Dk²) from the direct Gram series.
667 pub fn gram_at(&self, psi: f64) -> Array2<f64> {
668 let x = self.mapped(psi);
669 let t = cheb_t(x, self.gram.len());
670 let mut out = Array2::<f64>::zeros((self.k, self.k));
671 let mut comp = Array2::<f64>::zeros((self.k, self.k));
672 for (d, td) in t.iter().enumerate() {
673 kahan_scaled_add_array2(&mut out, &mut comp, *td, &self.gram[d]);
674 }
675 out
676 }
677
678 /// `XᵀWz(ψ)` assembled n-free in O(Dk).
679 pub fn rhs_at(&self, psi: f64) -> Array1<f64> {
680 let x = self.mapped(psi);
681 let t = cheb_t(x, self.n_coeff);
682 let mut out = Array1::<f64>::zeros(self.k);
683 let mut comp = Array1::<f64>::zeros(self.k);
684 for (d, td) in t.iter().enumerate() {
685 kahan_scaled_add_array1(&mut out, &mut comp, *td, &self.rhs[d]);
686 }
687 out
688 }
689
690 /// Exact `∂(XᵀWX)/∂ψ` from the SAME representation as the value — the
691 /// structural cure for the objective↔gradient desync class on this
692 /// channel. n-free, O(Dk²) from the direct Gram series.
693 pub fn dgram_dpsi(&self, psi: f64) -> Array2<f64> {
694 let x = self.mapped(psi);
695 let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
696 let tp = cheb_t_prime(x, self.gram.len());
697 let mut out = Array2::<f64>::zeros((self.k, self.k));
698 let mut comp = Array2::<f64>::zeros((self.k, self.k));
699 for (d, tpd) in tp.iter().enumerate() {
700 kahan_scaled_add_array2(&mut out, &mut comp, *tpd * dx_dpsi, &self.gram[d]);
701 }
702 out
703 }
704
705 /// Exact `∂(XᵀWz)/∂ψ`, n-free.
706 pub fn drhs_dpsi(&self, psi: f64) -> Array1<f64> {
707 let x = self.mapped(psi);
708 let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
709 let tp = cheb_t_prime(x, self.n_coeff);
710 let mut out = Array1::<f64>::zeros(self.k);
711 let mut comp = Array1::<f64>::zeros(self.k);
712 for (d, tpd) in tp.iter().enumerate() {
713 kahan_scaled_add_array1(&mut out, &mut comp, *tpd * dx_dpsi, &self.rhs[d]);
714 }
715 out
716 }
717
718 /// Exact `∂²(XᵀWX)/∂ψ²` from the SAME representation as the value/gradient —
719 /// the n-free curvature that lets the outer Newton/ARC step read the τ-τ
720 /// Hessian's design-moving block without re-streaming an O(n) slab Gram
721 /// (#1033, Gaussian-identity single-ψ Hessian channel). O(Dk²) from the
722 /// direct Gram series.
723 ///
724 /// `XᵀWX(ψ) = Σ_d T_d(x) G_d` with `x = mapped(ψ)`, so by the chain rule
725 /// `d²/dψ² = T_d″(x) · (dx/dψ)²`.
726 pub fn d2gram_dpsi2(&self, psi: f64) -> Array2<f64> {
727 let x = self.mapped(psi);
728 let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
729 let dx_dpsi_sq = dx_dpsi * dx_dpsi;
730 let tpp = cheb_t_double_prime(x, self.gram.len());
731 let mut out = Array2::<f64>::zeros((self.k, self.k));
732 let mut comp = Array2::<f64>::zeros((self.k, self.k));
733 for (d, tppd) in tpp.iter().enumerate() {
734 kahan_scaled_add_array2(&mut out, &mut comp, *tppd * dx_dpsi_sq, &self.gram[d]);
735 }
736 out
737 }
738
739 /// Exact `∂²(XᵀWz)/∂ψ²`, n-free. `T_d″·(dx/dψ)²` against the rhs slabs.
740 pub fn d2rhs_dpsi2(&self, psi: f64) -> Array1<f64> {
741 let x = self.mapped(psi);
742 let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
743 let dx_dpsi_sq = dx_dpsi * dx_dpsi;
744 let tpp = cheb_t_double_prime(x, self.n_coeff);
745 let mut out = Array1::<f64>::zeros(self.k);
746 let mut comp = Array1::<f64>::zeros(self.k);
747 for (d, tppd) in tpp.iter().enumerate() {
748 kahan_scaled_add_array1(&mut out, &mut comp, *tppd * dx_dpsi_sq, &self.rhs[d]);
749 }
750 out
751 }
752
753 /// Assemble the Gaussian-identity sufficient-statistic cache at `psi`
754 /// without touching a single data row — the bridge from this tensor into
755 /// the inner PLS solver's fast path (#1033b → `GaussianFixedCache`).
756 ///
757 /// `(XᵀWX, XᵀWz, zᵀWz)` is everything the Gaussian penalized solve needs
758 /// at any λ, so a ψ-trial that holds a certified tensor can hand the
759 /// inner solver this cache instead of realizing the n×k design. The
760 /// caller is responsible for `contains(psi)` (off-window trials fall back
761 /// to the exact realizer path). Dense-path bridge only: the sparse
762 /// scatter cache stays `None`.
763 pub fn gaussian_fixed_cache_at(&self, psi: f64) -> crate::pirls::GaussianFixedCache {
764 crate::pirls::GaussianFixedCache {
765 xtwx_orig: self.gram_at(psi),
766 xtwy_orig: self.rhs_at(psi),
767 centered_weighted_y_sq: self.zt_w_z,
768 row_prediction_is_stale: true,
769 xtwx_sparse_orig: None,
770 }
771 }
772}
773
774#[cfg(test)]
775mod tests {
776 use super::*;
777
778 /// Analytic Matérn-shaped synthetic design: entries g(e^{u_ij + ψ}) with
779 /// g(s) = (1 + s)·exp(−s) (the ν = 3/2 Matérn shape) plus a ψ-free power
780 /// column — the exact structural mix of the production radial designs.
781 fn synth_design(psi: f64, n: usize, k: usize) -> Result<Array2<f64>, String> {
782 let mut x = Array2::<f64>::zeros((n, k));
783 for i in 0..n {
784 for j in 0..k {
785 let r = 0.05 + (i as f64 + 1.0) * (j as f64 + 1.0) / (n as f64 * k as f64) * 3.0;
786 if j == k - 1 {
787 // ψ-free polynomial block column.
788 x[[i, j]] = r * r * r;
789 } else {
790 let s = r * psi.exp();
791 x[[i, j]] = (1.0 + s) * (-s).exp();
792 }
793 }
794 }
795 Ok(x)
796 }
797
798 fn exact_gram(psi: f64, n: usize, k: usize, w: &Array1<f64>) -> Array2<f64> {
799 let design = synth_design(psi, n, k).unwrap();
800 let mut wd = design.clone();
801 for (mut row, &wi) in wd.outer_iter_mut().zip(w.iter()) {
802 row.mapv_inplace(|v| v * wi);
803 }
804 design.t().dot(&wd)
805 }
806
807 /// #1033b primitive gate: the certified tensor must reproduce the exact
808 /// Gram/rhs at arbitrary in-window ψ to certification accuracy, and its
809 /// analytic ψ-derivative must match central finite differences of the
810 /// exact Gram — value and gradient from one representation.
811 #[test]
812 fn psi_gram_tensor_matches_exact_gram_and_fd_gradient() {
813 let (n, k) = (160usize, 7usize);
814 let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)));
815 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.37).sin()));
816 let (psi_lo, psi_hi) = (-1.2_f64, 1.0_f64);
817
818 let tensor = PsiGramTensor::build(
819 |psi| synth_design(psi, n, k),
820 w.view(),
821 z.view(),
822 psi_lo,
823 psi_hi,
824 )
825 .expect("analytic synthetic design must certify");
826
827 for &psi in &[-1.1, -0.63, 0.0, 0.41, 0.97] {
828 assert!(tensor.contains(psi));
829 let exact = exact_gram(psi, n, k, &w);
830 let fast = tensor.gram_at(psi);
831 let scale = exact.iter().fold(0.0_f64, |a, &v| a.max(v.abs()));
832 for (a, b) in fast.iter().zip(exact.iter()) {
833 assert!(
834 (a - b).abs() <= 1e-9 * scale,
835 "gram mismatch at psi={psi}: fast={a}, exact={b}"
836 );
837 }
838 // rhs check against the exact weighted cross-product.
839 let design = synth_design(psi, n, k).unwrap();
840 let mut wz = Array1::<f64>::zeros(n);
841 for ((slot, &wi), &zi) in wz.iter_mut().zip(w.iter()).zip(z.iter()) {
842 *slot = wi * zi;
843 }
844 let exact_rhs = design.t().dot(&wz);
845 let fast_rhs = tensor.rhs_at(psi);
846 let rscale = exact_rhs.iter().fold(0.0_f64, |a, &v| a.max(v.abs()));
847 for (a, b) in fast_rhs.iter().zip(exact_rhs.iter()) {
848 assert!(
849 (a - b).abs() <= 1e-9 * rscale,
850 "rhs mismatch at psi={psi}: fast={a}, exact={b}"
851 );
852 }
853 // Analytic ψ-gradient vs central FD of the EXACT gram.
854 let h = 1e-5;
855 let g_plus = exact_gram(psi + h, n, k, &w);
856 let g_minus = exact_gram(psi - h, n, k, &w);
857 let dg = tensor.dgram_dpsi(psi);
858 let dscale = dg.iter().fold(0.0_f64, |a, &v| a.max(v.abs())).max(1e-12);
859 for ((a, p), m_) in dg.iter().zip(g_plus.iter()).zip(g_minus.iter()) {
860 let fd = (p - m_) / (2.0 * h);
861 assert!(
862 (a - fd).abs() <= 1e-5 * dscale,
863 "dgram/dpsi mismatch at psi={psi}: analytic={a}, fd={fd}"
864 );
865 }
866 }
867 // Outside the window the caller must fall back to the exact path.
868 assert!(!tensor.contains(psi_hi + 0.5));
869 assert!(!tensor.contains(psi_lo - 0.5));
870
871 // Bridge gate (#1033b → GaussianFixedCache): the n-free cache must
872 // reproduce the exactly streamed sufficient statistics, and the
873 // ridge-penalized solves through both must agree — the inner PLS
874 // consumes nothing else, so this certifies the trial-loop handoff.
875 for &psi in &[-0.9, 0.2, 0.8] {
876 let cache = tensor.gaussian_fixed_cache_at(psi);
877 let design = synth_design(psi, n, k).unwrap();
878 let mut wd = design.clone();
879 for (mut row, &wi) in wd.outer_iter_mut().zip(w.iter()) {
880 row.mapv_inplace(|v| v * wi);
881 }
882 let exact_gram = design.t().dot(&wd);
883 let exact_rhs = wd.t().dot(&z);
884 let exact_ztwz: f64 = w.iter().zip(z.iter()).map(|(&wi, &zi)| wi * zi * zi).sum();
885 assert!(
886 (cache.centered_weighted_y_sq - exact_ztwz).abs()
887 <= 1e-12 * exact_ztwz.abs().max(1e-300),
888 "zᵀWz drift: cache={}, exact={exact_ztwz}",
889 cache.centered_weighted_y_sq
890 );
891 // Ridge-penalized solve agreement: (G + I)β = r on both sides.
892 let solve = |g: &Array2<f64>, r: &Array1<f64>| -> Array1<f64> {
893 let mut a = g.clone();
894 for i in 0..k {
895 a[[i, i]] += 1.0;
896 }
897 // Small dense Gauss elimination (k = 7 in this test).
898 let mut aug = Array2::<f64>::zeros((k, k + 1));
899 aug.slice_mut(ndarray::s![.., ..k]).assign(&a);
900 aug.slice_mut(ndarray::s![.., k]).assign(r);
901 for col in 0..k {
902 let piv = (col..k)
903 .max_by(|&p, &q| aug[[p, col]].abs().total_cmp(&aug[[q, col]].abs()))
904 .unwrap();
905 if piv != col {
906 for j in 0..=k {
907 let tmp = aug[[col, j]];
908 aug[[col, j]] = aug[[piv, j]];
909 aug[[piv, j]] = tmp;
910 }
911 }
912 let p = aug[[col, col]];
913 for row in 0..k {
914 if row == col {
915 continue;
916 }
917 let f = aug[[row, col]] / p;
918 for j in col..=k {
919 aug[[row, j]] -= f * aug[[col, j]];
920 }
921 }
922 }
923 Array1::from_iter((0..k).map(|i| aug[[i, k]] / aug[[i, i]]))
924 };
925 let beta_fast = solve(&cache.xtwx_orig, &cache.xtwy_orig);
926 let beta_exact = solve(&exact_gram, &exact_rhs);
927 let bscale = beta_exact
928 .iter()
929 .fold(0.0_f64, |a, &v| a.max(v.abs()))
930 .max(1e-300);
931 for (a, b) in beta_fast.iter().zip(beta_exact.iter()) {
932 assert!(
933 (a - b).abs() <= 1e-8 * bscale,
934 "penalized solve drift at psi={psi}: fast={a}, exact={b}"
935 );
936 }
937 }
938 }
939
940 /// #1033 n-independence invariant (structural, build-free, bit-tight):
941 /// after the one-time `build` n-pass, EVERY per-trial accessor the certified
942 /// κ/ψ outer-loop hot path consumes — the value `(gram_at, rhs_at)`, the
943 /// gradient `(dgram_dpsi, drhs_dpsi)`, the Hessian-channel curvature
944 /// `(d2gram_dpsi2, d2rhs_dpsi2)`, and the inner-solver bridge
945 /// `gaussian_fixed_cache_at` — must touch ZERO data rows. We prove this by
946 /// instrumenting the `eval_design` closure with an invocation counter (the
947 /// closure is the ONLY route to the n×k design): the counter advances during
948 /// `build` (the certified node ladder + off-node spot checks) and must then
949 /// stay FROZEN across an entire ψ-trial sweep. This is the
950 /// "no surface rebuild / no n×k re-realization on a cache-hit trial"
951 /// invariant the outer-loop seam (`SpatialJointContext::eval_full`,
952 /// `skip_design_realization`) relies on — asserted here at the tensor source
953 /// of truth, independent of whether the full κ-fit converges or of any
954 /// wall-clock measurement.
955 #[test]
956 fn psi_gram_tensor_trial_accessors_touch_no_data_rows() {
957 use std::cell::Cell;
958
959 let (n, k) = (256usize, 6usize);
960 let w = Array1::from_iter((0..n).map(|i| 0.8 + 0.4 * ((i % 4) as f64) / 3.0));
961 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.21).sin() + 0.2));
962 let (psi_lo, psi_hi) = (-1.1_f64, 0.95_f64);
963
964 // The closure is the SOLE path to the n×k design; count every call.
965 let calls = Cell::new(0usize);
966 let tensor = PsiGramTensor::build(
967 |psi| {
968 calls.set(calls.get() + 1);
969 synth_design(psi, n, k)
970 },
971 w.view(),
972 z.view(),
973 psi_lo,
974 psi_hi,
975 )
976 .expect("analytic synthetic design must certify");
977
978 // The one-time build necessarily streamed the design at the Chebyshev
979 // nodes plus off-node spot checks. Freeze the count.
980 let build_calls = calls.get();
981 assert!(
982 build_calls > 0,
983 "build must have streamed the design at least once (sanity)"
984 );
985
986 // A dense ψ-trial sweep strictly inside the certified window. Every
987 // accessor below is what a per-trial outer-loop eval consumes.
988 let m = 64usize;
989 let lo = psi_lo + 0.05;
990 let hi = psi_hi - 0.05;
991 for i in 0..m {
992 let psi = lo + (hi - lo) * (i as f64) / (m as f64 - 1.0);
993 assert!(tensor.contains(psi));
994 // Value lane.
995 let _g = tensor.gram_at(psi);
996 let _r = tensor.rhs_at(psi);
997 // Gradient lane.
998 let _dg = tensor.dgram_dpsi(psi);
999 let _dr = tensor.drhs_dpsi(psi);
1000 // Hessian-channel curvature.
1001 let _d2g = tensor.d2gram_dpsi2(psi);
1002 let _d2r = tensor.d2rhs_dpsi2(psi);
1003 // Inner-solver bridge (the GaussianFixedCache the PLS fast path reads).
1004 let _cache = tensor.gaussian_fixed_cache_at(psi);
1005 }
1006
1007 assert_eq!(
1008 calls.get(),
1009 build_calls,
1010 "n-independence VIOLATED: a per-trial accessor re-streamed the n×k \
1011 design ({} extra eval_design calls across {m} ψ-trials). The certified \
1012 κ/ψ outer loop must serve value + gradient + Hessian curvature + the \
1013 inner-solver cache from k-space sufficient statistics only, with NO \
1014 per-trial n-row work.",
1015 calls.get() - build_calls
1016 );
1017 }
1018
1019 /// #1033 Hessian-channel primitive gate: the n-free second ψ-derivatives
1020 /// `d2gram_dpsi2` / `d2rhs_dpsi2` must match central FD of the analytic FIRST
1021 /// derivatives (`dgram_dpsi` / `drhs_dpsi`) — the curvature the outer Newton
1022 /// /ARC step reads when the Gaussian Hessian channel is served from the
1023 /// tensor instead of a re-streamed O(n) slab. Differencing the analytic first
1024 /// derivative (not the exact gram) keeps this a pure check of the
1025 /// second-derivative recurrence, isolated from the build's value-lane tol.
1026 #[test]
1027 fn psi_gram_tensor_second_derivative_matches_fd_of_gradient() {
1028 let (n, k) = (160usize, 7usize);
1029 let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)));
1030 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.37).sin()));
1031 let (psi_lo, psi_hi) = (-1.2_f64, 1.0_f64);
1032
1033 let tensor = PsiGramTensor::build(
1034 |psi| synth_design(psi, n, k),
1035 w.view(),
1036 z.view(),
1037 psi_lo,
1038 psi_hi,
1039 )
1040 .expect("analytic synthetic design must certify");
1041
1042 let h = 1e-5;
1043 for &psi in &[-1.0, -0.5, 0.0, 0.4, 0.9] {
1044 // ∂²G/∂ψ² vs central FD of the analytic ∂G/∂ψ.
1045 let dg_plus = tensor.dgram_dpsi(psi + h);
1046 let dg_minus = tensor.dgram_dpsi(psi - h);
1047 let d2g = tensor.d2gram_dpsi2(psi);
1048 let gscale = d2g.iter().fold(0.0_f64, |a, &v| a.max(v.abs())).max(1e-9);
1049 for ((a, p), m_) in d2g.iter().zip(dg_plus.iter()).zip(dg_minus.iter()) {
1050 let fd = (p - m_) / (2.0 * h);
1051 assert!(
1052 (a - fd).abs() <= 1e-4 * gscale,
1053 "d2gram/dpsi2 mismatch at psi={psi}: analytic={a}, fd={fd}"
1054 );
1055 }
1056 // ∂²(XᵀWz)/∂ψ² vs central FD of the analytic ∂(XᵀWz)/∂ψ.
1057 let dr_plus = tensor.drhs_dpsi(psi + h);
1058 let dr_minus = tensor.drhs_dpsi(psi - h);
1059 let d2r = tensor.d2rhs_dpsi2(psi);
1060 let rscale = d2r.iter().fold(0.0_f64, |a, &v| a.max(v.abs())).max(1e-9);
1061 for ((a, p), m_) in d2r.iter().zip(dr_plus.iter()).zip(dr_minus.iter()) {
1062 let fd = (p - m_) / (2.0 * h);
1063 assert!(
1064 (a - fd).abs() <= 1e-4 * rscale,
1065 "d2rhs/dpsi2 mismatch at psi={psi}: analytic={a}, fd={fd}"
1066 );
1067 }
1068 }
1069 }
1070
1071 /// The penalized Gaussian profile deviance at a fixed ridge λ, assembled
1072 /// PURELY from the sufficient-statistic triple `(G, r, c) = (XᵀWX, XᵀWz, zᵀWz)`:
1073 ///
1074 /// ```text
1075 /// β(λ) = (G + λS)⁻¹ r, D(ψ;λ) = c − 2 βᵀr + βᵀ(G + λS)β = c − βᵀr
1076 /// ```
1077 ///
1078 /// (the second equality uses the normal equations `(G + λS)β = r`). This is
1079 /// EXACTLY the object the inner Gaussian PLS minimizes over β, and it is a
1080 /// pure function of `(G, r, c)` — n-free. Returns `(D, β)` so the caller can
1081 /// also probe the coefficient lane. `s_ridge` is the ridge penalty matrix.
1082 fn profile_deviance(
1083 g: &Array2<f64>,
1084 r: &Array1<f64>,
1085 c: f64,
1086 s_ridge: &Array2<f64>,
1087 lambda: f64,
1088 k: usize,
1089 ) -> (f64, Array1<f64>) {
1090 // Dense (G + λS) β = r via partial-pivot Gauss elimination (small k).
1091 let mut a = g.clone();
1092 a.scaled_add(lambda, s_ridge);
1093 let mut aug = Array2::<f64>::zeros((k, k + 1));
1094 aug.slice_mut(ndarray::s![.., ..k]).assign(&a);
1095 aug.slice_mut(ndarray::s![.., k]).assign(r);
1096 for col in 0..k {
1097 let piv = (col..k)
1098 .max_by(|&p, &q| aug[[p, col]].abs().total_cmp(&aug[[q, col]].abs()))
1099 .unwrap();
1100 if piv != col {
1101 for j in 0..=k {
1102 let tmp = aug[[col, j]];
1103 aug[[col, j]] = aug[[piv, j]];
1104 aug[[piv, j]] = tmp;
1105 }
1106 }
1107 let p = aug[[col, col]];
1108 for row in 0..k {
1109 if row == col {
1110 continue;
1111 }
1112 let f = aug[[row, col]] / p;
1113 for j in col..=k {
1114 aug[[row, j]] -= f * aug[[col, j]];
1115 }
1116 }
1117 }
1118 let beta = Array1::from_iter((0..k).map(|i| aug[[i, k]] / aug[[i, i]]));
1119 let deviance = c - beta.dot(r);
1120 (deviance, beta)
1121 }
1122
1123 /// #1033 bit-tight Hessian + κ-optimum gate. The fast path's promise is not
1124 /// merely that the Gram VALUE matches at sampled ψ — it is that the WHOLE
1125 /// outer κ search (objective, its ψ-curvature, and therefore the located
1126 /// optimum) is reproduced by the n-free sufficient-statistic representation
1127 /// to machine precision. This harness certifies exactly that:
1128 ///
1129 /// 1. **Objective**: the penalized profile deviance `D(ψ)` assembled from
1130 /// the tensor's `(gram_at, rhs_at, zᵀWz)` matches the exactly streamed
1131 /// `XᵀWX/XᵀWz/zᵀWz` deviance bit-tight at every ψ on a fine grid.
1132 /// 2. **Curvature (Hessian)**: the second ψ-derivative `D''(ψ)` of the
1133 /// fast-path objective matches the second ψ-derivative of the EXACT
1134 /// objective (central FD of the streamed deviance) — the curvature the
1135 /// outer Newton step reads must be the true curvature, not an
1136 /// approximation that drifts off the value (the objective↔gradient
1137 /// desync class, now extended to the second order).
1138 /// 3. **κ-optimum**: the argmin of `D(ψ)` over the grid is IDENTICAL
1139 /// between the two assemblies — the fast path lands on the same κ as the
1140 /// exact streamed search, to the grid resolution AND bit-tight in the
1141 /// objective value at that node.
1142 #[test]
1143 fn psi_gram_tensor_bit_tight_hessian_and_kappa_optimum() {
1144 let (n, k) = (200usize, 6usize);
1145 // Heterogeneous weights + a response with genuine ψ-dependent curvature
1146 // so the deviance has a non-degenerate interior minimum in ψ.
1147 let w = Array1::from_iter((0..n).map(|i| 0.7 + 0.6 * (((i * 7) % 5) as f64) / 4.0));
1148 let z = Array1::from_iter((0..n).map(|i| {
1149 let t = (i as f64) / (n as f64 - 1.0);
1150 (3.0 * t).sin() + 0.3 * (7.0 * t).cos()
1151 }));
1152 let (psi_lo, psi_hi) = (-1.0_f64, 0.9_f64);
1153 // Fixed ridge λ over the search — the κ optimizer profiles ψ at fixed
1154 // smoothing here; identity-S ridge keeps the profile well-posed.
1155 let s_ridge = Array2::<f64>::eye(k);
1156 let lambda = 0.5_f64;
1157
1158 let tensor = PsiGramTensor::build(
1159 |psi| synth_design(psi, n, k),
1160 w.view(),
1161 z.view(),
1162 psi_lo,
1163 psi_hi,
1164 )
1165 .expect("analytic synthetic design must certify");
1166
1167 let exact_ztwz: f64 = w.iter().zip(z.iter()).map(|(&wi, &zi)| wi * zi * zi).sum();
1168
1169 // Exact streamed deviance at arbitrary ψ — the ground truth the n-free
1170 // path must reproduce.
1171 let exact_deviance = |psi: f64| -> f64 {
1172 let design = synth_design(psi, n, k).unwrap();
1173 let mut wd = design.clone();
1174 for (mut row, &wi) in wd.outer_iter_mut().zip(w.iter()) {
1175 row.mapv_inplace(|v| v * wi);
1176 }
1177 let g = design.t().dot(&wd);
1178 let r = wd.t().dot(&z);
1179 profile_deviance(&g, &r, exact_ztwz, &s_ridge, lambda, k).0
1180 };
1181
1182 // Fast n-free deviance from the certified tensor.
1183 let fast_deviance = |psi: f64| -> f64 {
1184 let g = tensor.gram_at(psi);
1185 let r = tensor.rhs_at(psi);
1186 profile_deviance(&g, &r, exact_ztwz, &s_ridge, lambda, k).0
1187 };
1188
1189 // Dense grid strictly inside the certified window (away from the edges,
1190 // where the build's value lane is still certified but we want a clean
1191 // central-FD second derivative to exist on both sides).
1192 let m = 81usize;
1193 let lo = psi_lo + 0.06;
1194 let hi = psi_hi - 0.06;
1195 let grid: Vec<f64> = (0..m)
1196 .map(|i| lo + (hi - lo) * (i as f64) / (m as f64 - 1.0))
1197 .collect();
1198
1199 // (1) Objective bit-tight across the whole grid; track argmin on both.
1200 let mut worst_value_rel = 0.0_f64;
1201 let (mut fast_argmin, mut fast_min) = (f64::NAN, f64::INFINITY);
1202 let (mut exact_argmin, mut exact_min) = (f64::NAN, f64::INFINITY);
1203 for &psi in &grid {
1204 let de = exact_deviance(psi);
1205 let df = fast_deviance(psi);
1206 let rel = (de - df).abs() / de.abs().max(1e-300);
1207 worst_value_rel = worst_value_rel.max(rel);
1208 if df < fast_min {
1209 fast_min = df;
1210 fast_argmin = psi;
1211 }
1212 if de < exact_min {
1213 exact_min = de;
1214 exact_argmin = psi;
1215 }
1216 }
1217 assert!(
1218 worst_value_rel <= 1e-9,
1219 "penalized profile deviance: fast n-free assembly diverged from exact \
1220 streamed by rel {worst_value_rel:.3e} (> 1e-9) somewhere on the ψ grid"
1221 );
1222
1223 // (3) κ-optimum: identical grid node AND bit-tight value there. The
1224 // argmin must be a true interior minimum (not a window edge) for this to
1225 // certify the OUTER search rather than a boundary artifact.
1226 assert_eq!(
1227 fast_argmin.to_bits(),
1228 exact_argmin.to_bits(),
1229 "κ-optimum mismatch: fast argmin ψ={fast_argmin}, exact argmin ψ={exact_argmin} \
1230 — the n-free objective located a different optimum"
1231 );
1232 assert!(
1233 fast_argmin > lo + 1e-9 && fast_argmin < hi - 1e-9,
1234 "κ-optimum landed on the grid edge ψ={fast_argmin}; the fixture must have \
1235 an INTERIOR minimum for this to test the outer search, not a boundary"
1236 );
1237 let opt_rel = (exact_min - fast_min).abs() / exact_min.abs().max(1e-300);
1238 assert!(
1239 opt_rel <= 1e-9,
1240 "κ-optimum objective value drift at ψ={fast_argmin}: fast={fast_min}, \
1241 exact={exact_min}, rel={opt_rel:.3e}"
1242 );
1243
1244 // (2) Gradient + curvature from the tensor's ANALYTIC ψ-derivatives.
1245 //
1246 // Differencing two objectives that agree only to ~1e-9 in VALUE cannot
1247 // certify their curvature: the central second difference divides by h²,
1248 // so the ~1e-9 value gap (which is NOT common-mode — they are different
1249 // assemblies) is amplified by 1/h² and swamps any real curvature signal.
1250 // The principled bit-tight curvature check uses the tensor's OWN analytic
1251 // ψ-derivatives `dgram_dpsi`/`drhs_dpsi`: the envelope gradient of the
1252 // profile deviance `D(ψ) = c − rᵀA⁻¹r`, `A = G + λS`, is
1253 //
1254 // D'(ψ) = −2 βᵀ(∂r/∂ψ) + βᵀ(∂G/∂ψ)β, β = A⁻¹r,
1255 //
1256 // assembled n-free from `(dgram_dpsi, drhs_dpsi)`. We certify this
1257 // analytic gradient against a central FD of the EXACT streamed objective
1258 // (first order ⇒ only 1/h amplification, so the ~1e-9 value agreement is
1259 // not destroyed), and certify the curvature by central-differencing the
1260 // ANALYTIC gradient (again 1/h, not 1/h²). This is the same one-
1261 // representation value↔gradient↔curvature consistency the production fast
1262 // path relies on for the outer Newton step.
1263 let solve_a = |g: &Array2<f64>, r: &Array1<f64>| -> Array1<f64> {
1264 profile_deviance(g, r, exact_ztwz, &s_ridge, lambda, k).1
1265 };
1266 // Analytic n-free ψ-gradient of the penalized profile deviance, valid on
1267 // the certified gradient sub-window where `dgram_dpsi` is bit-tight.
1268 let analytic_grad = |psi: f64| -> f64 {
1269 let g = tensor.gram_at(psi);
1270 let r = tensor.rhs_at(psi);
1271 let beta = solve_a(&g, &r);
1272 let dg = tensor.dgram_dpsi(psi);
1273 let dr = tensor.drhs_dpsi(psi);
1274 -2.0 * beta.dot(&dr) + beta.dot(&dg.dot(&beta))
1275 };
1276
1277 // Two finite-difference steps, each near the optimum of its own
1278 // truncation/rounding trade-off:
1279 // * `h_grad = 1e-6` for the FIRST derivative (central FD ⇒ O(h²)
1280 // truncation, O(ε/h) rounding ⇒ optimum near 1e-5..1e-6);
1281 // * `h_curv = 2e-4` for the curvature. A SECOND difference divides by
1282 // h², so its rounding floor is O(ε·|D|/h²): at h=1e-6 that is
1283 // ~1e-16/1e-12 = 1e-4 of |D|, comparable to the curvature itself —
1284 // useless. h≈2e-4 puts the rounding floor at ~1e-16/4e-8 ≈ 2.5e-9·|D|
1285 // and the O(h²·D⁗) truncation around the same scale, so the second
1286 // difference is meaningful. The analytic-gradient curvature is
1287 // differenced at the SAME h_curv so the two carry the same
1288 // truncation order and the comparison is apples-to-apples.
1289 let h_grad = 1e-6_f64;
1290 let h_curv = 2e-4_f64;
1291 let mut worst_grad_rel = 0.0_f64;
1292 let mut worst_hess_rel = 0.0_f64;
1293 let mut tested = 0usize;
1294 for &psi in &grid {
1295 // The exact-objective curvature stencil reaches ±2·h_curv; require the
1296 // whole stencil to stay inside the certified gradient sub-window so the
1297 // analytic-gradient differences are all bit-tight.
1298 if !tensor.contains_for_gradient(psi - 2.0 * h_curv)
1299 || !tensor.contains_for_gradient(psi + 2.0 * h_curv)
1300 {
1301 continue;
1302 }
1303 tested += 1;
1304 // Analytic gradient vs central FD of the EXACT streamed objective.
1305 let exact_g1 =
1306 (exact_deviance(psi + h_grad) - exact_deviance(psi - h_grad)) / (2.0 * h_grad);
1307 let ag = analytic_grad(psi);
1308 let gscale = exact_g1.abs().max(1e-6);
1309 worst_grad_rel = worst_grad_rel.max((exact_g1 - ag).abs() / gscale);
1310 // Curvature: central FD of the ANALYTIC gradient (n-free) vs central
1311 // second difference of the EXACT objective, both at h_curv.
1312 let analytic_h2 =
1313 (analytic_grad(psi + h_curv) - analytic_grad(psi - h_curv)) / (2.0 * h_curv);
1314 let exact_h2 = (exact_deviance(psi + h_curv) - 2.0 * exact_deviance(psi)
1315 + exact_deviance(psi - h_curv))
1316 / (h_curv * h_curv);
1317 let hscale = exact_h2.abs().max(1e-3);
1318 worst_hess_rel = worst_hess_rel.max((analytic_h2 - exact_h2).abs() / hscale);
1319 }
1320 assert!(
1321 tested > 0,
1322 "no ψ on the grid lay inside the certified gradient sub-window"
1323 );
1324 assert!(
1325 worst_grad_rel <= 1e-5,
1326 "ψ-gradient mismatch: the tensor's analytic n-free objective gradient diverged \
1327 from the exact streamed objective by rel {worst_grad_rel:.3e} (> 1e-5)"
1328 );
1329 // The curvature compares an analytic-gradient central difference against
1330 // an exact-objective second difference; the residual O(h²) truncation +
1331 // O(ε/h²) rounding floor at h_curv=2e-4 sets a realistic bit-tight bar of
1332 // ~1e-3 relative (any larger gap is a genuine curvature divergence, not FD
1333 // noise — the value/gradient lanes already certify the objective itself to
1334 // ~1e-9/1e-5).
1335 assert!(
1336 worst_hess_rel <= 1e-3,
1337 "ψ-curvature (Hessian) mismatch: fast n-free objective curvature diverged \
1338 from the exact streamed objective by rel {worst_hess_rel:.3e} (> 1e-3) — \
1339 the outer Newton step would read a different curvature than the truth"
1340 );
1341
1342 eprintln!(
1343 "[psi-gram-bittight] n={n} k={k} grid={m} grad-tested={tested} \
1344 worst |ΔD|/D={worst_value_rel:.2e} worst |ΔD'|/D'={worst_grad_rel:.2e} \
1345 worst |ΔD''|/D''={worst_hess_rel:.2e} κ-opt ψ={fast_argmin:.6} (interior, bit-identical)"
1346 );
1347 }
1348
1349 /// Certification negative: a NON-analytic (kinked) design must refuse to
1350 /// certify rather than silently approximate.
1351 #[test]
1352 fn psi_gram_tensor_refuses_non_analytic_design() {
1353 let (n, k) = (40usize, 3usize);
1354 let w = Array1::from_elem(n, 1.0);
1355 let z = Array1::from_elem(n, 0.5);
1356 let tensor = PsiGramTensor::build(
1357 |psi| {
1358 let mut x = Array2::<f64>::zeros((n, k));
1359 for i in 0..n {
1360 for j in 0..k {
1361 // |ψ| kink at 0 inside the window: not analytic.
1362 x[[i, j]] = psi.abs() + (i + j) as f64 / (n + k) as f64;
1363 }
1364 }
1365 Ok(x)
1366 },
1367 w.view(),
1368 z.view(),
1369 -1.0,
1370 1.0,
1371 );
1372 assert!(
1373 tensor.is_err(),
1374 "kinked design must fail the tail-decay/spot-check certificates"
1375 );
1376 }
1377
1378 /// #1264 reduced-basis-equality witness — REFLEXIVITY + GAUGE INVARIANCE.
1379 ///
1380 /// `reduced_basis_equal(ψ, ψ)` is trivially sound (the surface is its own
1381 /// reference), and the witness must accept two ψ's whose RANGE subspace is
1382 /// identical even when the per-ψ eigenvECTORS differ (the projector is
1383 /// gauge-invariant). The synthetic full-rank Matérn-shaped design's range is
1384 /// the whole k-space for every ψ, so every in-window pair shares a reduced
1385 /// basis and must certify.
1386 #[test]
1387 fn reduced_basis_witness_reflexive_and_gauge_invariant() {
1388 let (n, k) = (160usize, 6usize);
1389 let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.3 * ((i % 5) as f64)));
1390 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.29).sin()));
1391 let (psi_lo, psi_hi) = (-1.0_f64, 0.8_f64);
1392 let tensor = PsiGramTensor::build(
1393 |psi| synth_design(psi, n, k),
1394 w.view(),
1395 z.view(),
1396 psi_lo,
1397 psi_hi,
1398 )
1399 .expect("analytic synthetic design must certify");
1400
1401 // Reflexive: same ψ is always sound.
1402 for &psi in &[-0.9, -0.2, 0.0, 0.5, 0.79] {
1403 assert!(
1404 tensor.reduced_basis_equal(psi, psi),
1405 "witness must be reflexive at psi={psi}"
1406 );
1407 }
1408 // The full-rank synthetic design spans all of k-space at every ψ, so the
1409 // range projector is the identity for all ψ → every pair certifies.
1410 let grid: Vec<f64> = (0..=12).map(|i| psi_lo + 0.05 + 0.06 * i as f64).collect();
1411 for &a in &grid {
1412 for &b in &grid {
1413 assert!(
1414 tensor.reduced_basis_equal(a, b),
1415 "full-rank design: range is ψ-invariant (identity projector), \
1416 so the skip witness must certify (ψ_ref={a}, ψ_new={b})"
1417 );
1418 }
1419 }
1420 // Off-window ψ refuses.
1421 assert!(!tensor.reduced_basis_equal(psi_lo - 0.5, 0.0));
1422 assert!(!tensor.reduced_basis_equal(0.0, psi_hi + 0.5));
1423 }
1424
1425 /// #1264 reduced-basis-equality witness — REFUSES across a genuine subspace
1426 /// change (the exact failure mode of the old RRQR-only gate).
1427 ///
1428 /// Construct a design whose first two columns are fixed (ψ-invariant) profiles
1429 /// and whose third column's AMPLITUDE `ε(ψ) = e^{αψ}` analytically sweeps the
1430 /// third eigendirection's eigenvalue `∝ ε²` across the rank-revealing cutoff.
1431 /// Below the cutoff the reduced (range) basis is the 2-D span of the first two
1432 /// profiles; above it the range is 3-D. Two ψ's on the SAME side of the
1433 /// threshold share a reduced basis (witness accepts); two ψ's STRADDLING it do
1434 /// not (witness refuses) — exactly the stale-basis pairing the design-revision
1435 /// fast path must not perform. The amplitude is smooth/analytic so the tensor
1436 /// still certifies (this is a reduced-basis change, not a non-analytic kink).
1437 #[test]
1438 fn reduced_basis_witness_refuses_across_subspace_change() {
1439 let (n, k) = (200usize, 3usize);
1440 // Three fixed, well-separated column profiles (full column rank when all
1441 // present). The third is scaled by ε(ψ).
1442 let base = |i: usize, j: usize| -> f64 {
1443 let t = (i as f64 + 0.5) / n as f64;
1444 match j {
1445 0 => 1.0,
1446 1 => (2.0 * std::f64::consts::PI * t).sin(),
1447 _ => (4.0 * std::f64::consts::PI * t).cos(),
1448 }
1449 };
1450 // ε(ψ) crosses √cutoff (relative to λ_max ~ O(n)) within the window: at
1451 // λ_max ≈ n the cutoff is rank_rtol·n ≈ 1e-10·200 = 2e-8, so the third
1452 // eigenvalue ε²·‖c3‖² ≈ ε²·(n/2) crosses it at ε ≈ sqrt(4e-8/n) ≈ 1.4e-5,
1453 // i.e. ψ* ≈ ln(1.4e-5)/α. With α = 10 and window [−1.6,−0.8], ψ* ≈ −1.12
1454 // sits inside the window, giving a clean below/above split.
1455 let alpha = 10.0_f64;
1456 let design = move |psi: f64| -> Result<Array2<f64>, String> {
1457 let eps = (alpha * psi).exp();
1458 let mut x = Array2::<f64>::zeros((n, k));
1459 for i in 0..n {
1460 x[[i, 0]] = base(i, 0);
1461 x[[i, 1]] = base(i, 1);
1462 x[[i, 2]] = eps * base(i, 2);
1463 }
1464 Ok(x)
1465 };
1466 let w = Array1::from_elem(n, 1.0);
1467 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.13).sin()));
1468 let (psi_lo, psi_hi) = (-1.6_f64, -0.8_f64);
1469 let tensor = PsiGramTensor::build(design, w.view(), z.view(), psi_lo, psi_hi)
1470 .expect("smooth ε(ψ) design must still certify (analytic, no kink)");
1471
1472 // Find the actual threshold by scanning the rank.
1473 let rank_at = |psi: f64| -> usize {
1474 tensor
1475 .range_projector(psi, PSI_GRAM_SKIP_RANK_RTOL)
1476 .map(|(_, r)| r)
1477 .unwrap_or(0)
1478 };
1479 let lo_rank = rank_at(psi_lo + 0.02);
1480 let hi_rank = rank_at(psi_hi - 0.02);
1481 assert_eq!(
1482 lo_rank, 2,
1483 "low-ψ end must be rank-2 (third column below cutoff)"
1484 );
1485 assert_eq!(
1486 hi_rank, 3,
1487 "high-ψ end must be rank-3 (third column above cutoff)"
1488 );
1489
1490 // Same-side pairs (both rank-2) certify; straddling pairs refuse.
1491 let psi_low_a = psi_lo + 0.05;
1492 let psi_low_b = psi_lo + 0.10;
1493 assert_eq!(rank_at(psi_low_a), 2);
1494 assert_eq!(rank_at(psi_low_b), 2);
1495 assert!(
1496 tensor.reduced_basis_equal(psi_low_a, psi_low_b),
1497 "two low-ψ trials share the rank-2 reduced basis → skip is sound"
1498 );
1499 let psi_high_a = psi_hi - 0.05;
1500 let psi_high_b = psi_hi - 0.10;
1501 assert_eq!(rank_at(psi_high_a), 3);
1502 assert_eq!(rank_at(psi_high_b), 3);
1503 // High-side: the range is the full 3-D space at both, so the projector is
1504 // the identity at both → still a shared reduced basis.
1505 assert!(
1506 tensor.reduced_basis_equal(psi_high_a, psi_high_b),
1507 "two high-ψ trials share the rank-3 reduced basis → skip is sound"
1508 );
1509 // Straddling the rank change: the reduced basis MOVED (2-D → 3-D). The
1510 // witness MUST refuse — this is precisely the stale-basis pairing the old
1511 // RRQR-only gate let through.
1512 assert!(
1513 !tensor.reduced_basis_equal(psi_low_a, psi_high_a),
1514 "witness must REFUSE a skip that straddles the reduced-basis (rank) \
1515 change — freezing the low-ψ rank-2 basis and re-keying the high-ψ \
1516 rank-3 Gram is the exact ~7.8e-2 β̂ regression #1264 guards"
1517 );
1518 assert!(
1519 !tensor.reduced_basis_equal(psi_high_a, psi_low_a),
1520 "witness must refuse symmetrically (high pin, low trial)"
1521 );
1522 }
1523
1524 /// #1033 ROTATION WALL — the subspace-distance certificate must CERTIFY a
1525 /// skip across a pure basis ROTATION at fixed rank, where the old entrywise
1526 /// max-abs projector gate would have refused.
1527 ///
1528 /// Build a rank-2 (in a k=3 space) design whose 2-D range ROTATES smoothly
1529 /// with ψ but whose RANK stays 2: two ψ-dependent in-plane directions span the
1530 /// same fixed 2-plane (cols 0,1 of a fixed orthonormal pair) rotated by an
1531 /// analytic angle φ(ψ). The SUBSPACE (the 2-plane) is ψ-invariant — only the
1532 /// basis within it rotates — so the range projector is mathematically
1533 /// IDENTICAL at every ψ, but its eigenVECTORS rotate. A correct
1534 /// subspace-identity witness must certify every in-window pair; the spectral
1535 /// (principal-angle) distance is ~0 throughout while a naive entrywise
1536 /// comparison of rotated eigenbases would not be guaranteed to.
1537 #[test]
1538 fn reduced_basis_witness_certifies_across_pure_rotation_1033() {
1539 let (n, k) = (240usize, 3usize);
1540 // Two fixed orthogonal ambient profiles spanning a fixed 2-plane; the
1541 // third ambient direction is left empty so the range is exactly that
1542 // 2-plane (rank 2) for every ψ.
1543 let p0 = |i: usize| -> f64 {
1544 let t = (i as f64 + 0.5) / n as f64;
1545 (2.0 * std::f64::consts::PI * t).sin()
1546 };
1547 let p1 = |i: usize| -> f64 {
1548 let t = (i as f64 + 0.5) / n as f64;
1549 (2.0 * std::f64::consts::PI * t).cos()
1550 };
1551 // Within the fixed 2-plane, rotate the two design columns by φ(ψ): the
1552 // SPAN is unchanged (still the {p0,p1} plane) but the basis rotates, so
1553 // the per-ψ eigenvectors of the Gram rotate while the range projector is
1554 // ψ-invariant.
1555 let design = move |psi: f64| -> Result<Array2<f64>, String> {
1556 let phi = 0.7 * psi; // analytic angle sweep
1557 let (c, s) = (phi.cos(), phi.sin());
1558 let mut x = Array2::<f64>::zeros((n, k));
1559 for i in 0..n {
1560 let (a, b) = (p0(i), p1(i));
1561 x[[i, 0]] = c * a - s * b;
1562 x[[i, 1]] = s * a + c * b;
1563 // column 2 stays zero → range is the fixed 2-plane, rank 2.
1564 }
1565 Ok(x)
1566 };
1567 let w = Array1::from_elem(n, 1.0);
1568 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.17).cos()));
1569 let (psi_lo, psi_hi) = (-1.0_f64, 1.0_f64);
1570 let tensor = PsiGramTensor::build(design, w.view(), z.view(), psi_lo, psi_hi)
1571 .expect("smooth rotation design must certify (analytic, no kink)");
1572
1573 // Rank is a constant 2 across the window (the third direction is empty).
1574 let rank_at = |psi: f64| -> usize {
1575 tensor
1576 .range_projector(psi, PSI_GRAM_SKIP_RANK_RTOL)
1577 .map(|(_, r)| r)
1578 .unwrap_or(0)
1579 };
1580 for &psi in &[-0.95, -0.4, 0.0, 0.4, 0.95] {
1581 assert_eq!(rank_at(psi), 2, "rotation keeps rank 2 at psi={psi}");
1582 }
1583
1584 // Every in-window pair spans the SAME 2-plane (only the basis rotates),
1585 // so the subspace-distance witness MUST certify the skip — this is the
1586 // rotation that the entrywise gate kept refusing (the #1033 wall).
1587 let grid: Vec<f64> = (0..=10).map(|i| psi_lo + 0.05 + 0.09 * i as f64).collect();
1588 for &a in &grid {
1589 for &b in &grid {
1590 assert!(
1591 tensor.reduced_basis_equal(a, b),
1592 "pure in-plane rotation preserves the range subspace → the \
1593 subspace-distance skip witness must certify (#1033) \
1594 (ψ_ref={a}, ψ_new={b})"
1595 );
1596 }
1597 }
1598 }
1599}