gam_solve/psi_gram_tensor.rs
1//! Certified Chebyshev-in-ψ Gram tensor: n-independent design-moving trials
2//! (#1033 item b).
3//!
4//! ## Why
5//!
6//! When a design-moving hyperparameter ψ (= log κ for the radial families) is
7//! searched by the outer loop, every trial today rebuilds the n×k design and
8//! re-forms XᵀWX — an O(n·k) + O(n·k²) pass per trial. But along the trial
9//! window every design entry `X(ψ)[i, j]` is an ANALYTIC function of ψ on a
10//! compact interval (Matérn channels depend on (r, ℓ) only through κr and
11//! κ-power prefactors; Duchon power blocks are ψ-free; partial-fraction
12//! coefficients are analytic scalars), so both the design and its Gaussian
13//! sufficient statistics admit geometrically-convergent Chebyshev expansions
14//!
15//! ```text
16//! X(ψ) = Σ_{d=0}^{D} X_d · T_d(ψ̃), ψ̃ = affine map of ψ to [−1, 1],
17//! ```
18//!
19//! with n×k coefficient slabs `X_d` computed ONCE from D+1 exact design
20//! evaluations at Chebyshev nodes (a first-kind DCT). The design coefficients
21//! certify analyticity, while the shipped runtime series is fit directly to the
22//! exact node sufficient statistics `G(ψ_i)=X(ψ_i)ᵀ W X(ψ_i)` and
23//! `c(ψ_i)=X(ψ_i)ᵀ W z`. Interpolating the sufficient statistics directly avoids
24//! the extra product-truncation residual from forming `Σ_d,e X_dᵀWX_e T_dT_e`,
25//! which a weakly penalized radial solve can amplify into visible β̂ drift.
26//! Every subsequent trial is n-free:
27//!
28//! ```text
29//! XᵀWX(ψ) = Σ_d T_d(ψ̃) G_d O(D k²)
30//! XᵀWz(ψ) = Σ_d T_d(ψ̃) c_d O(D k)
31//! ∂/∂ψ (XᵀWX) = Σ_d T_d′(ψ̃) G_d O(D k²)
32//! ```
33//!
34//! The ψ-gradient comes from the SAME representation as the value — one
35//! source of truth, structurally immune to the objective↔gradient desync
36//! class. `T_d′(ψ̃) = d·U_{d−1}(ψ̃) · dψ̃/dψ` is closed-form.
37//!
38//! ## Certification, not approximation-by-fiat
39//!
40//! Same discipline as [`gam_terms::basis::radial_profile`]: [`PsiGramTensor::build`]
41//! returns `None` (callers keep the exact per-trial path) unless BOTH
42//! 1. the Chebyshev coefficient tail of the EXPANDED DESIGN decays below
43//! [`PSI_GRAM_CERT_RTOL`] of the design scale (geometric-decay certificate
44//! for analytic interpolands, with node-count escalation), and
45//! 2. deterministic off-node spot checks of the ASSEMBLED Gram against an
46//! exactly rebuilt Gram agree to [`PSI_GRAM_SPOT_RTOL`].
47//!
48//! Trials outside `[psi_lo, psi_hi]` are the caller's signal to fall back to
49//! the exact path ([`PsiGramTensor::contains`]).
50
51use ndarray::{Array1, Array2, ArrayView1};
52
53/// Relative ceiling on the per-column Chebyshev coefficient tail (#1216).
54///
55/// This is a cheap NECESSARY-CONDITION pre-filter, not the accuracy gate: the
56/// authoritative accuracy gate is the off-node `spot_check` on the ASSEMBLED
57/// Gram ([`PSI_GRAM_SPOT_RTOL`]). On the WIDE STANDARDIZED geometry default 1-D
58/// fits use (#1215) the realized radial design needs the deeper ladder below to
59/// drive the tail beneath the beta-invariance bar. Keep this as a necessary
60/// pre-filter, not the final beta oracle: shallow 65-node tensors were fine for
61/// cost-only gates, but the weakly penalized radial solve amplified their
62/// residual into visible beta-hat drift across the reduced-basis rotation. A
63/// genuinely non-analytic design (a true kink) still refuses here or at the
64/// assembled-Gram spot check.
65pub const PSI_GRAM_CERT_RTOL: f64 = 1.0e-9;
66
67/// Relative agreement required at the off-node Gram spot checks.
68pub const PSI_GRAM_SPOT_RTOL: f64 = 1.0e-10;
69
70/// Node-count escalation ladder for the expansion build (degree = nodes − 1).
71///
72/// The top rung sizes to WIDE trial windows: Chebyshev coefficients of the
73/// Matérn-type channels decay like Bessel `I_d(σ)` with `σ ≈ s_max·halfwidth`
74/// (s = κr), which only drops below the 1e-12 tail tolerance for `d ≳ 2σ` —
75/// e.g. σ ≈ 9 (s_max ≈ 8, ±1.1 window) needs degree ≳ 40, so 33 nodes refuse
76/// and 65 certify. Node counts stay trivially cheap (one design eval each).
77///
78/// On the WIDE STANDARDIZED geometry default 1-D fits use (#1215) the tail
79/// decays cleanly but GEOMETRICALLY-slowly: measured per-column worst tail rel
80/// is ~3.2e-8 at m=33 and ~2.3e-11 at m=65 (a clean ~1300×/doubling decay, NOT a
81/// floor). The old 65-node acceptance was fine for the cost lane but not for the
82/// beta-hat soundness gate: the inner penalized solve `β̂ = (G+λS)⁻¹r`
83/// amplifies Gram residuals by the radial-kernel conditioning, especially after
84/// the production skip was relaxed to cross a reduced-basis rotation. Start at
85/// 513 nodes so the production gate no longer accepts the shallower tensors that
86/// pass the Gram spot check but still move the weakly-penalized beta solve.
87pub const PSI_GRAM_NODE_LADDER: [usize; 1] = [513];
88
89/// Number of deterministic off-node spot-check ψ values.
90pub const PSI_GRAM_SPOT_POINTS: usize = 3;
91
92/// Rank-revealing relative eigenvalue cutoff for the reduced-basis (range)
93/// projector witness [`PsiGramTensor::reduced_basis_equal`] (#1264). An
94/// eigendirection of the conditioned Gram `XᵀWX(ψ)` is counted in the range
95/// (reduced) basis when its eigenvalue exceeds `PSI_GRAM_SKIP_RANK_RTOL · λ_max`.
96/// Sized to match the inner solve's effective rank-revealing scale on the
97/// standardized radial-kernel Gram, whose conditioning sweeps several orders of
98/// magnitude across the ψ-window; a directly-below-cutoff direction is exactly
99/// the one whose inclusion flips with ψ and silently rotates the frozen reduced
100/// basis, which this witness must catch.
101pub const PSI_GRAM_SKIP_RANK_RTOL: f64 = 1.0e-10;
102
103/// Max-norm tolerance on the range-PROJECTOR agreement between the pinning ψ and
104/// the candidate ψ in [`PsiGramTensor::reduced_basis_equal`] (#1264). The
105/// orthogonal projector onto the reduced subspace is gauge-invariant and O(1) in
106/// scale, so a tight absolute tolerance certifies the two reduced bases span the
107/// SAME subspace. A subspace that has measurably rotated (the basis the frozen
108/// fast-path surface would mis-pair with a re-keyed Gram) exceeds this by orders
109/// of magnitude, so it refuses the skip well before the ~1e-6 β̂ bar is at risk.
110pub const PSI_GRAM_SKIP_PROJ_ATOL: f64 = 1.0e-7;
111
112/// Certified Chebyshev-in-ψ expansion of a design-moving Gram (#1033b).
113///
114/// Holds the one-time Chebyshev sufficient-statistic series; every per-trial
115/// accessor is O(Dk²) or cheaper and never touches n rows again.
116pub struct PsiGramTensor {
117 psi_lo: f64,
118 psi_hi: f64,
119 /// Certified gradient window over which the ANALYTIC ψ-derivative
120 /// `dgram_dpsi` reproduces the exact design derivative. The gradient lane
121 /// rides the value-lane off-node certificate [`PSI_GRAM_SPOT_RTOL`]
122 /// (#1033b gradient lane). For the #1033
123 /// sufficient-statistic outer loop this must cover the full optimizer
124 /// window; otherwise callers do not arm the n-free kappa search.
125 ///
126 /// The value reconstruction `gram_at` is certified over the FULL window
127 /// (`T_d ≤ 1` everywhere), but the derivative reconstruction amplifies the
128 /// coefficient-tail error by `T_d′ ∼ d²`. The n-free kappa search is armed
129 /// only when endpoint-aware checks certify this whole interval.
130 grad_psi_lo: f64,
131 grad_psi_hi: f64,
132 /// Number of Chebyshev coefficients (degree + 1).
133 n_coeff: usize,
134 k: usize,
135 /// Chebyshev coefficients of `X(ψ)ᵀ W X(ψ)`, obtained by a first-kind DCT of
136 /// the exact node sufficient statistics. This keeps the per-trial path to a
137 /// single O(Dk²) series and avoids product-truncation drift in β̂.
138 gram: Vec<Array2<f64>>,
139 /// Chebyshev coefficients of `X(ψ)ᵀ W z`.
140 rhs: Vec<Array1<f64>>,
141 /// `zᵀWz` — ψ-free, captured at build so the Gaussian sufficient-statistic
142 /// triple can be assembled per trial without any row access.
143 zt_w_z: f64,
144}
145
146/// One ladder rung's outcome: a hard evaluation failure aborts the whole
147/// build (no larger rung can fix a non-finite design) and carries the reason,
148/// an uncertified tail escalates to the next rung, and a candidate proceeds to
149/// the spot check.
150enum BuildOutcome {
151 EvalFailed(String),
152 TailNotCertified,
153 Candidate(PsiGramTensor),
154}
155
156/// Chebyshev values `T_0..T_{n−1}` at `x ∈ [−1, 1]`.
157fn cheb_t(x: f64, n: usize) -> Vec<f64> {
158 let mut t = vec![0.0; n];
159 if n > 0 {
160 t[0] = 1.0;
161 }
162 if n > 1 {
163 t[1] = x;
164 }
165 for d in 2..n {
166 t[d] = 2.0 * x * t[d - 1] - t[d - 2];
167 }
168 t
169}
170
171/// Chebyshev derivative values `T_0′..T_{n−1}′` at `x ∈ [−1, 1]` in the
172/// MAPPED coordinate (multiply by `dx/dψ` for the ψ-derivative):
173/// `T_d′ = d · U_{d−1}` with the Chebyshev-U recurrence.
174fn cheb_t_prime(x: f64, n: usize) -> Vec<f64> {
175 let mut u = vec![0.0; n.max(1)];
176 // U_0 = 1, U_1 = 2x, U_d = 2x U_{d−1} − U_{d−2}.
177 if !u.is_empty() {
178 u[0] = 1.0;
179 }
180 if n > 1 {
181 u[1] = 2.0 * x;
182 }
183 for d in 2..n {
184 u[d] = 2.0 * x * u[d - 1] - u[d - 2];
185 }
186 let mut tp = vec![0.0; n];
187 for d in 1..n {
188 tp[d] = d as f64 * u[d - 1];
189 }
190 tp
191}
192
193/// Chebyshev SECOND-derivative values `T_0″..T_{n−1}″` at `x ∈ [−1, 1]` in the
194/// MAPPED coordinate (multiply by `(dx/dψ)²` for the ψ-second-derivative).
195///
196/// Differentiating the value recurrence `T_d = 2x T_{d−1} − T_{d−2}` twice in
197/// `x` gives a singularity-free three-term recurrence in lock-step with `cheb_t`
198/// / `cheb_t_prime`:
199/// `T_d′ = 2 T_{d−1} + 2x T_{d−1}′ − T_{d−2}′`,
200/// `T_d″ = 4 T_{d−1}′ + 2x T_{d−1}″ − T_{d−2}″`,
201/// with `T_0 = T_0′ = T_0″ = 0`-seeds as below. Unlike the closed form
202/// `T_n″ = n((n+1)T_n − U_n)/(x²−1)` this never divides by `x²−1`, so it stays
203/// exact at the window edges `x = ±1`.
204fn cheb_t_double_prime(x: f64, n: usize) -> Vec<f64> {
205 let mut t = vec![0.0; n];
206 let mut tp = vec![0.0; n];
207 let mut tpp = vec![0.0; n];
208 if n > 0 {
209 t[0] = 1.0; // T_0 = 1, T_0′ = T_0″ = 0
210 }
211 if n > 1 {
212 t[1] = x; // T_1 = x, T_1′ = 1, T_1″ = 0
213 tp[1] = 1.0;
214 }
215 for d in 2..n {
216 t[d] = 2.0 * x * t[d - 1] - t[d - 2];
217 tp[d] = 2.0 * t[d - 1] + 2.0 * x * tp[d - 1] - tp[d - 2];
218 tpp[d] = 4.0 * tp[d - 1] + 2.0 * x * tpp[d - 1] - tpp[d - 2];
219 }
220 tpp
221}
222
223fn kahan_scaled_add_array2(
224 out: &mut Array2<f64>,
225 comp: &mut Array2<f64>,
226 scale: f64,
227 x: &Array2<f64>,
228) {
229 for ((slot, c), &value) in out.iter_mut().zip(comp.iter_mut()).zip(x.iter()) {
230 let y = scale * value - *c;
231 let t = *slot + y;
232 *c = (t - *slot) - y;
233 *slot = t;
234 }
235}
236
237fn kahan_scaled_add_array1(
238 out: &mut Array1<f64>,
239 comp: &mut Array1<f64>,
240 scale: f64,
241 x: &Array1<f64>,
242) {
243 for ((slot, c), &value) in out.iter_mut().zip(comp.iter_mut()).zip(x.iter()) {
244 let y = scale * value - *c;
245 let t = *slot + y;
246 *c = (t - *slot) - y;
247 *slot = t;
248 }
249}
250
251/// Spectral norm of a SYMMETRIC matrix `m` (here the difference of two
252/// orthogonal range projectors), i.e. `max|eigenvalue|`. For two equal-rank
253/// orthogonal projectors `P_ref`, `P_new` this equals `sin θ_max`, the sine of
254/// the largest principal angle between their ranges — the canonical, gauge- and
255/// basis-invariant distance between the two subspaces (#1033). Returns `None`
256/// if the matrix is non-finite or the symmetric eigendecomposition fails (the
257/// caller then refuses the skip, the sound fallback).
258fn subspace_spectral_distance(m: &Array2<f64>) -> Option<f64> {
259 use gam_linalg::faer_ndarray::FaerEigh;
260 if m.iter().any(|v| !v.is_finite()) {
261 return None;
262 }
263 // Symmetrize defensively against rounding (P_ref − P_new is symmetric in
264 // exact arithmetic) so the symmetric eigensolver sees a genuinely Hermitian
265 // operand and returns real eigenvalues.
266 let msym = 0.5 * (m + &m.t());
267 let (evals, _evecs) = msym.eigh(faer::Side::Lower).ok()?;
268 Some(evals.iter().fold(0.0_f64, |acc, &v| acc.max(v.abs())))
269}
270
271impl PsiGramTensor {
272 /// Build and certify the tensor over `psi ∈ [psi_lo, psi_hi]`.
273 ///
274 /// `eval_design(psi)` must return the EXACT n×k design at `psi` (the same
275 /// builder the per-trial path uses — exactness of the expansion is judged
276 /// against it). `weights` are the fixed observation weights, `z` the fixed
277 /// weighted-response target (e.g. `y − offset`). Returns `None` when the
278 /// window is degenerate, any evaluation fails/has non-finite entries, or
279 /// no ladder rung certifies — callers then keep the exact per-trial path.
280 pub fn build(
281 mut eval_design: impl FnMut(f64) -> Result<Array2<f64>, String>,
282 weights: ArrayView1<'_, f64>,
283 z: ArrayView1<'_, f64>,
284 psi_lo: f64,
285 psi_hi: f64,
286 ) -> Result<Self, String> {
287 if !(psi_lo.is_finite() && psi_hi.is_finite()) || psi_hi <= psi_lo {
288 return Err(format!(
289 "ψ window must be finite with psi_hi > psi_lo (got [{psi_lo}, {psi_hi}])"
290 ));
291 }
292 // Track the largest rung that produced a candidate but failed to
293 // certify (tail or off-node spot check). If the whole ladder is
294 // exhausted without an accepted candidate this drives a reason that
295 // distinguishes "not analytic enough in ψ" from "evaluation failed".
296 let mut last_uncertified: Option<usize> = None;
297 for &m in PSI_GRAM_NODE_LADDER.iter() {
298 match Self::build_at(&mut eval_design, weights, z, psi_lo, psi_hi, m) {
299 // An exact evaluation failed or was non-finite somewhere in
300 // the window — no larger rung can fix that, so abort with the
301 // underlying reason rather than swallowing it as a bare refusal.
302 BuildOutcome::EvalFailed(why) => {
303 return Err(format!(
304 "exact design evaluation failed at ladder rung m={m}: {why}"
305 ));
306 }
307 // Tail not yet below the certificate at this rung: escalate.
308 // (Conflating this with EvalFailed would kill the ladder at
309 // its first — intentionally coarse — rung.)
310 BuildOutcome::TailNotCertified => {
311 last_uncertified = Some(m);
312 continue;
313 }
314 BuildOutcome::Candidate(mut candidate) => {
315 if candidate.spot_check(&mut eval_design, weights) {
316 candidate.grad_psi_lo = psi_lo;
317 candidate.grad_psi_hi = psi_hi;
318 return Ok(candidate);
319 }
320 // The assembled Gram disagreed with an exact off-node
321 // rebuild at this rung; a denser rung may still certify, so
322 // escalate rather than abort.
323 last_uncertified = Some(m);
324 }
325 }
326 }
327 let top_rung = PSI_GRAM_NODE_LADDER.last().copied().unwrap_or(0);
328 Err(match last_uncertified {
329 Some(m) => format!(
330 "Chebyshev series did not certify within the node ladder (reached rung \
331 m={m}, top rung {top_rung}): the design is not analytic enough in ψ over \
332 [{psi_lo}, {psi_hi}] (a kink or non-finite curvature), so the n-free \
333 tensor is refused and the exact per-trial path must be used"
334 ),
335 None => "empty Chebyshev node ladder".to_string(),
336 })
337 }
338
339 fn build_at(
340 eval_design: &mut impl FnMut(f64) -> Result<Array2<f64>, String>,
341 weights: ArrayView1<'_, f64>,
342 z: ArrayView1<'_, f64>,
343 psi_lo: f64,
344 psi_hi: f64,
345 m: usize,
346 ) -> BuildOutcome {
347 // #1033 (sufficient-statistic build): the one-time pass must ITSELF be a
348 // sufficient-statistic reduction — it may touch the n data rows once, but
349 // it must never hold or arithmetically process O(n) objects m times. The
350 // earlier build expanded m design-space Chebyshev coefficient SLABS
351 // (`X_d = (γ_d/m) Σ_i X(ψ_i) T_d(x_i)`, each n×k) purely to run a
352 // pre-filter tail certificate, holding all m exact designs AND all m slabs
353 // resident — O(m·n·k) memory (≈157 GB at n=320k, m=513, k=12) and an
354 // O(m²·n·k) coefficient sum that dominated the whole fit's wall-clock and
355 // made the n=320k acceptance sweep un-runnable. None of that O(n) work is
356 // retained: the tensor keeps only the k×k Gram series. So STREAM each
357 // exact node design straight into its weighted k×k sufficient statistic
358 // (Gram `X(ψ_i)ᵀW X(ψ_i)` and RHS `X(ψ_i)ᵀW z`) and DISCARD it before the
359 // next node. Peak memory is O(m·k² + n·k) (one design at a time) and the
360 // only row work is the single O(m·n·k²) node-statistic pass.
361 let mut nodes_x = vec![0.0_f64; m];
362 let mut node_grams: Vec<Array2<f64>> = Vec::with_capacity(m);
363 let mut node_rhs: Vec<Array1<f64>> = Vec::with_capacity(m);
364
365 // Weighted response (n-vector) and zᵀWz, formed once over the data rows.
366 if weights.len() != z.len() || z.is_empty() {
367 return BuildOutcome::EvalFailed(format!(
368 "incompatible build inputs: weights.len()={}, z.len()={}",
369 weights.len(),
370 z.len()
371 ));
372 }
373 let mut wz = Array1::<f64>::zeros(z.len());
374 let mut zt_w_z = 0.0_f64;
375 let mut zt_w_z_comp = 0.0_f64;
376 for ((slot, &w), &zv) in wz.iter_mut().zip(weights.iter()).zip(z.iter()) {
377 *slot = w * zv;
378 let add = w * zv * zv;
379 let y = add - zt_w_z_comp;
380 let t = zt_w_z + y;
381 zt_w_z_comp = (t - zt_w_z) - y;
382 zt_w_z = t;
383 }
384
385 let mut dims: Option<(usize, usize)> = None;
386 for (i, x_slot) in nodes_x.iter_mut().enumerate() {
387 let x = (std::f64::consts::PI * (2 * i + 1) as f64 / (2 * m) as f64).cos();
388 *x_slot = x;
389 let psi = 0.5 * (psi_lo + psi_hi) + 0.5 * (psi_hi - psi_lo) * x;
390 let design = match eval_design(psi) {
391 Ok(design) => design,
392 Err(why) => {
393 return BuildOutcome::EvalFailed(format!(
394 "design evaluation refused at node ψ={psi:.6}: {why}"
395 ));
396 }
397 };
398 if design.iter().any(|v| !v.is_finite()) {
399 return BuildOutcome::EvalFailed(format!(
400 "design at node ψ={psi:.6} contains a non-finite entry"
401 ));
402 }
403 let (dn, dk) = design.dim();
404 match dims {
405 None => {
406 if weights.len() != dn || z.len() != dn || dn == 0 || dk == 0 {
407 return BuildOutcome::EvalFailed(format!(
408 "incompatible build inputs: design {dn}×{dk}, weights.len()={}, z.len()={}",
409 weights.len(),
410 z.len()
411 ));
412 }
413 dims = Some((dn, dk));
414 }
415 Some((n0, k0)) => {
416 if (dn, dk) != (n0, k0) {
417 return BuildOutcome::EvalFailed(format!(
418 "design dimensions vary across ψ nodes (first node is {n0}×{k0}, \
419 node ψ={psi:.6} is {dn}×{dk})"
420 ));
421 }
422 }
423 }
424 // Weighted Gram / RHS at this node, then the n×k design is dropped.
425 // RHS uses the prebuilt `wz = W z` (same factoring as the exact
426 // streamed path) so the retained series is bit-faithful to it.
427 let mut wd = design.clone();
428 for (mut row, &w) in wd.outer_iter_mut().zip(weights.iter()) {
429 row.mapv_inplace(|v| v * w);
430 }
431 node_grams.push(design.t().dot(&wd));
432 node_rhs.push(design.t().dot(&wz));
433 }
434 let (_n, k) = dims.expect("node ladder rung m≥1 yields at least one design");
435 // First-kind discrete orthogonality of the Chebyshev nodes.
436 let t_at_nodes: Vec<Vec<f64>> = nodes_x.iter().map(|&x| cheb_t(x, m)).collect();
437 let mut gram: Vec<Array2<f64>> = (0..m).map(|_| Array2::<f64>::zeros((k, k))).collect();
438 let mut gram_comp: Vec<Array2<f64>> =
439 (0..m).map(|_| Array2::<f64>::zeros((k, k))).collect();
440 let mut rhs: Vec<Array1<f64>> = (0..m).map(|_| Array1::<f64>::zeros(k)).collect();
441 let mut rhs_comp: Vec<Array1<f64>> = (0..m).map(|_| Array1::<f64>::zeros(k)).collect();
442 for d in 0..m {
443 let gamma = if d == 0 { 1.0 } else { 2.0 };
444 for i in 0..m {
445 let wgt = gamma / m as f64 * t_at_nodes[i][d];
446 kahan_scaled_add_array2(&mut gram[d], &mut gram_comp[d], wgt, &node_grams[i]);
447 kahan_scaled_add_array1(&mut rhs[d], &mut rhs_comp[d], wgt, &node_rhs[i]);
448 }
449 }
450 drop(node_grams);
451 drop(node_rhs);
452
453 // Tail-decay certificate, now in k-SPACE on the RETAINED Gram/RHS series
454 // rather than the discarded design slabs.
455 //
456 // The series the per-trial path actually evaluates is the assembled Gram
457 // `G(ψ) = Σ_d gram[d] T_d(x(ψ))` and RHS `c(ψ) = Σ_d rhs[d] T_d(x(ψ))`;
458 // their Chebyshev coefficients are exactly what govern the truncated
459 // reconstruction error, so the cheap NECESSARY-CONDITION pre-filter
460 // belongs on THEM, not on the design X(ψ) (whose coefficients only bound
461 // G's tail indirectly, and at O(m·n·k) cost). The trailing quarter of the
462 // Gram (and RHS) coefficient slabs must fall below [`PSI_GRAM_CERT_RTOL`]
463 // × series scale.
464 //
465 // #1216: on the WIDE STANDARDIZED geometry default 1-D fits use (#1215)
466 // the tail decays cleanly but GEOMETRICALLY-SLOWLY, so the m=513 top rung
467 // is sized to drive the residual far below the bar. The certificate stays
468 // a necessary pre-filter; accuracy is authoritatively enforced by the
469 // off-node `spot_check` (`PSI_GRAM_SPOT_RTOL`, assembled Gram vs an exact
470 // rebuild). A genuinely non-analytic design (a true kink) floors ORDERS
471 // above this — its Gram series tail does NOT decay — and is refused here,
472 // with the spot-check as the hard backstop.
473 let gram_scale = gram.iter().fold(0.0_f64, |acc, slab| {
474 acc.max(slab.iter().fold(0.0_f64, |a, &v| a.max(v.abs())))
475 });
476 let rhs_scale = rhs.iter().fold(0.0_f64, |acc, slab| {
477 acc.max(slab.iter().fold(0.0_f64, |a, &v| a.max(v.abs())))
478 });
479 let tail_start = m - (m / 4).max(1);
480 let gram_bound = PSI_GRAM_CERT_RTOL * gram_scale.max(1e-300);
481 let rhs_bound = PSI_GRAM_CERT_RTOL * rhs_scale.max(1e-300);
482 for d in tail_start..m {
483 if gram[d].iter().any(|&v| v.abs() > gram_bound)
484 || rhs[d].iter().any(|&v| v.abs() > rhs_bound)
485 {
486 return BuildOutcome::TailNotCertified;
487 }
488 }
489 BuildOutcome::Candidate(Self {
490 psi_lo,
491 psi_hi,
492 // Provisional: `build` promotes these to the certified value window
493 // after the value spot-check passes.
494 grad_psi_lo: psi_lo,
495 grad_psi_hi: psi_hi,
496 n_coeff: m,
497 k,
498 gram,
499 rhs,
500 zt_w_z,
501 })
502 }
503
504 /// Off-node certification: the ASSEMBLED Gram must reproduce the exactly
505 /// rebuilt Gram at deterministic interior ψ values.
506 fn spot_check(
507 &self,
508 eval_design: &mut impl FnMut(f64) -> Result<Array2<f64>, String>,
509 weights: ArrayView1<'_, f64>,
510 ) -> bool {
511 for s in 0..PSI_GRAM_SPOT_POINTS {
512 // Golden-ratio low-discrepancy interior points — never the nodes.
513 let frac = ((s as f64 + 1.0) * 0.618_033_988_749_894_9).fract();
514 let psi = self.psi_lo + frac * (self.psi_hi - self.psi_lo);
515 let Ok(design) = eval_design(psi) else {
516 return false;
517 };
518 let mut wd = design.clone();
519 for (mut row, &w) in wd.outer_iter_mut().zip(weights.iter()) {
520 row.mapv_inplace(|v| v * w);
521 }
522 let exact = design.t().dot(&wd);
523 let assembled = self.gram_at(psi);
524 let scale = exact
525 .iter()
526 .fold(0.0_f64, |acc, &v| acc.max(v.abs()))
527 .max(1e-300);
528 for (a, b) in assembled.iter().zip(exact.iter()) {
529 if (a - b).abs() > PSI_GRAM_SPOT_RTOL * scale {
530 return false;
531 }
532 }
533 }
534 true
535 }
536
537 /// Range (reduced-basis) projector of the conditioned Gram `XᵀWX(ψ)` and the
538 /// numerical rank, computed n-free from the k-space tensor. The reduced basis
539 /// the inner penalized solve forms is the column span of the eigenvectors of
540 /// the (symmetric PSD) Gram whose eigenvalue exceeds a rank-revealing cutoff
541 /// relative to the largest eigenvalue. The orthogonal projector `P = U_r U_rᵀ`
542 /// onto that span is a frame-INVARIANT witness of the reduced basis: two ψ's
543 /// share a reduced basis iff their range projectors coincide (the projector
544 /// is invariant to the orthonormal-basis gauge freedom within the range, so
545 /// it isolates exactly the subspace identity the skip needs, not an arbitrary
546 /// eigenvector rotation). Returns `None` if the Gram is non-finite or its
547 /// symmetric eigendecomposition fails.
548 fn range_projector(&self, psi: f64, rank_rtol: f64) -> Option<(Array2<f64>, usize)> {
549 use gam_linalg::faer_ndarray::FaerEigh;
550 let g = self.gram_at(psi);
551 if g.iter().any(|v| !v.is_finite()) {
552 return None;
553 }
554 // Symmetrize defensively (gram_at is symmetric up to rounding).
555 let gsym = 0.5 * (&g + &g.t());
556 let (evals, evecs) = gsym.eigh(faer::Side::Lower).ok()?;
557 // `eigh` returns ascending eigenvalues; the Gram is PSD so the largest is
558 // the trailing one. The rank cutoff is relative to that maximum.
559 let lambda_max = evals.iter().cloned().fold(0.0_f64, f64::max);
560 if !(lambda_max > 0.0) {
561 return None;
562 }
563 let cutoff = rank_rtol * lambda_max;
564 let mut proj = Array2::<f64>::zeros((self.k, self.k));
565 let mut rank = 0usize;
566 for (col, &lam) in evals.iter().enumerate() {
567 if lam > cutoff {
568 let u = evecs.column(col);
569 // P += u uᵀ.
570 for a in 0..self.k {
571 for b in 0..self.k {
572 proj[[a, b]] += u[a] * u[b];
573 }
574 }
575 rank += 1;
576 }
577 }
578 Some((proj, rank))
579 }
580
581 /// True when the realized reduced basis the design-revision fast path freezes
582 /// at the pinning `psi_ref` is still valid at `psi_new` — the genuine
583 /// reduced-basis-equality witness the skip requires (#1264, #1216 item 3).
584 ///
585 /// The fast path keeps the reference surface (its conditioned frame and its
586 /// RRQR-reduced / null-space basis) frozen at `psi_ref` while re-keying only
587 /// the Gram `XᵀWX(ψ)` and penalty `S(ψ)` to `psi_new`. That is exact iff the
588 /// reduced basis — the range / null split of the conditioned data Gram — is
589 /// unchanged. A conditioning-ratio or RRQR rank/permutation gate only bounds
590 /// NECESSARY conditions; the reduced SUBSPACE can still rotate while rank and
591 /// pivot order look tame, which is exactly the ~7.8e-2 β̂ regression a cluster run
592 /// found. This witness compares the orthogonal RANGE PROJECTORS of the
593 /// conditioned Gram at `psi_ref` and `psi_new` (both assembled n-free from the
594 /// tensor): the skip is sound only when the numerical ranks match AND the
595 /// projectors agree to `proj_atol` in max-norm — i.e. the two reduced bases
596 /// span the SAME subspace. The projector identity is gauge-invariant, so it
597 /// certifies subspace equality directly rather than a particular basis choice.
598 ///
599 /// `psi_ref == psi_new` (a repeat trial at the same ψ) is trivially sound.
600 /// Off-window ψ's, a non-finite / rank-degenerate Gram, or any eigendecomp
601 /// failure return `false` (refuse the skip → caller takes the slow path).
602 ///
603 /// ROTATION WALL (#1033). On production spatial geometry the conditioned
604 /// data-Gram range subspace can ROTATE with ψ at fixed rank — the wall on
605 /// which the earlier RRQR-pivot / entrywise-projector gates kept refusing the
606 /// skip. The fix is the SUBSPACE-DISTANCE certificate below: the skip is sound
607 /// exactly when the two equal-rank ranges coincide as SUBSPACES, measured by
608 /// the spectral norm of the projector difference (the principal angle), which
609 /// is invariant to any orthonormal-basis rotation WITHIN the range. So a pure
610 /// gauge rotation that left the entrywise max-abs above tolerance — and
611 /// therefore used to be refused — now certifies, letting the n-free skip fire
612 /// across the rotation. A genuine subspace MOVE (different rank, or a real
613 /// principal-angle separation) still refuses; refusing is the SOUND fallback
614 /// (the caller takes the exact slow path). Do not weaken
615 /// `PSI_GRAM_SKIP_PROJ_ATOL` / `PSI_GRAM_SKIP_RANK_RTOL`: the spectral gate is
616 /// already the tightest correct subspace metric, and loosening it past a true
617 /// principal-angle separation reintroduces the ~7.8e-2 β̂ regression this
618 /// witness exists to prevent.
619 pub fn reduced_basis_equal(&self, psi_ref: f64, psi_new: f64) -> bool {
620 if !(self.contains(psi_ref) && self.contains(psi_new)) {
621 return false;
622 }
623 if psi_ref == psi_new {
624 return true;
625 }
626 let Some((p_ref, r_ref)) = self.range_projector(psi_ref, PSI_GRAM_SKIP_RANK_RTOL) else {
627 return false;
628 };
629 let Some((p_new, r_new)) = self.range_projector(psi_new, PSI_GRAM_SKIP_RANK_RTOL) else {
630 return false;
631 };
632 if r_ref != r_new {
633 return false;
634 }
635 // Subspace-distance certificate (#1033). The two reduced bases span the
636 // SAME subspace iff their orthogonal range projectors coincide. The
637 // correct, gauge-invariant measure of "how far apart" two equal-rank
638 // subspaces are is the SPECTRAL NORM ‖P_ref − P_new‖₂ = sin θ_max, the
639 // sine of the largest principal angle between the ranges — NOT the
640 // entrywise max-abs of the projector difference. The old entrywise test
641 // is a strictly weaker proxy: across a basis ROTATION (the radial-Gram
642 // rotation wall this skip kept tripping on) the projector entries can
643 // each drift while the spanned subspace is numerically identical, so the
644 // entrywise max could exceed tolerance and FALSELY refuse a sound skip.
645 // P_ref − P_new is symmetric, so its spectral norm is max|eigenvalue|;
646 // compute it via the symmetric eigensolver and gate on the principal
647 // angle directly. This certifies subspace identity across the rotation,
648 // letting the n-free skip fire whenever the range genuinely coincides,
649 // while still refusing (the SOUND fallback) the instant the subspaces
650 // separate by more than PSI_GRAM_SKIP_PROJ_ATOL in true subspace
651 // distance. An eigendecomp failure on the (small k×k) difference refuses.
652 let diff = &p_ref - &p_new;
653 subspace_spectral_distance(&diff)
654 .map(|d| d <= PSI_GRAM_SKIP_PROJ_ATOL)
655 .unwrap_or(false)
656 }
657
658 /// The gauge-invariant subspace distance `‖P(ψ_ref) − P(ψ_new)‖₂ = sin θ_max`
659 /// between the two conditioned-Gram range subspaces — the exact quantity
660 /// [`Self::reduced_basis_equal`] thresholds against `PSI_GRAM_SKIP_PROJ_ATOL`.
661 /// Exposed for #1033 frontier instrumentation so a refused n-free skip can be
662 /// attributed to a genuine in-window basis ROTATION (this distance exceeds the
663 /// tolerance at equal rank) versus a rank change. Returns `None` for an
664 /// off-window ψ, an equal-ψ pair, a rank mismatch, or an eigendecomp failure.
665 /// Purely k-space (O(k³)) — independent of n.
666 pub fn reduced_basis_subspace_distance(&self, psi_ref: f64, psi_new: f64) -> Option<f64> {
667 if !(self.contains(psi_ref) && self.contains(psi_new)) {
668 return None;
669 }
670 if psi_ref == psi_new {
671 return Some(0.0);
672 }
673 let (p_ref, r_ref) = self.range_projector(psi_ref, PSI_GRAM_SKIP_RANK_RTOL)?;
674 let (p_new, r_new) = self.range_projector(psi_new, PSI_GRAM_SKIP_RANK_RTOL)?;
675 if r_ref != r_new {
676 return None;
677 }
678 let diff = &p_ref - &p_new;
679 subspace_spectral_distance(&diff)
680 }
681
682 /// Numerical rank of the conditioned Gram `XᵀWX(ψ)` at `psi`, under the same
683 /// relative cutoff (`PSI_GRAM_SKIP_RANK_RTOL`·λ_max) the design-revision skip's
684 /// `reduced_basis_equal` witness uses. Returns `None` for an off-window /
685 /// non-finite / all-zero Gram. Purely k-space (O(k³)) — independent of n.
686 pub fn gram_numerical_rank(&self, psi: f64) -> Option<usize> {
687 if !self.contains(psi) {
688 return None;
689 }
690 self.range_projector(psi, PSI_GRAM_SKIP_RANK_RTOL)
691 .map(|(_, rank)| rank)
692 }
693
694 /// Lower edge of the contiguous ψ-band, ANCHORED at `psi_anchor`, over which
695 /// the conditioned Gram `XᵀWX(ψ)` holds the SAME numerical rank it has at the
696 /// anchor — i.e. the ψ-floor below which the design-revision skip's
697 /// `reduced_basis_equal` witness must (soundly) refuse, because the range
698 /// subspace collapses as the longest-length-scale radial mode drops under the
699 /// rank cutoff. Lifting the κ-optimizer's lower bound to this floor keeps every
700 /// in-window trial on the n-free fast path and is inherently n-INDEPENDENT: the
701 /// rank is a property of the k×k tensor, not of the sample size (#1033).
702 ///
703 /// Anchoring at `psi_anchor` (the optimizer's ψ seed) is essential: the
704 /// conditioned Gram is rank-deficient at BOTH window ends on production radial
705 /// geometry — at small ψ the longest-scale mode collapses into the polynomial
706 /// nullspace, and at very large ψ every radial column goes collinear with it.
707 /// The maximal-rank region is therefore a middle BAND, and the κ-optimum lives
708 /// inside it. We walk DOWN from the anchor on a fixed k-space grid and return
709 /// the lowest ψ still at the anchor's rank (stopping at the first node that
710 /// differs). Purely O(nodes·k³) — no row access.
711 ///
712 /// Returns `None` when the band already reaches `psi_lo` (no lift needed), when
713 /// the anchor is off-window / rank-indeterminate, or when the window is empty.
714 pub fn rank_stable_psi_floor(&self, psi_anchor: f64) -> Option<f64> {
715 // Fixed k-space grid over the window. 96 nodes resolves the rank cliff
716 // (~1 ψ-decade wide on production Duchon geometry) far finer than the
717 // optimizer's ~ln2 step; the whole scan is O(nodes·k³), independent of n.
718 const NODES: usize = 96;
719 if !(self.psi_hi > self.psi_lo) {
720 return None;
721 }
722 let span = self.psi_hi - self.psi_lo;
723 let psi_at = |i: usize| self.psi_lo + span * (i as f64) / ((NODES - 1) as f64);
724 let ranks: Vec<Option<usize>> =
725 (0..NODES).map(|i| self.gram_numerical_rank(psi_at(i))).collect();
726 // Target the MAXIMAL numerical rank attained anywhere in the window — the
727 // full-rank "good" geometry the skip certifies. Anchoring on the seed's own
728 // rank would be fragile if the seed happened to land in a deficient spot;
729 // the window-max is the rank the κ-optimum's neighbourhood must hold.
730 let max_rank = ranks.iter().filter_map(|r| *r).max()?;
731 // Map the anchor to the nearest grid node, then snap UP to the nearest node
732 // at maximal rank (so the band edge is measured from inside the good band
733 // even if the seed sits just below the cliff). If no max-rank node exists
734 // at/above the anchor, there is nothing to protect below it.
735 let anchor = psi_anchor.clamp(self.psi_lo, self.psi_hi);
736 let anchor_idx = (((anchor - self.psi_lo) / span) * ((NODES - 1) as f64))
737 .round()
738 .clamp(0.0, (NODES - 1) as f64) as usize;
739 let band_idx = (anchor_idx..NODES).find(|&i| ranks[i] == Some(max_rank))?;
740 // Walk DOWN from the band node; the floor is the lowest node from which
741 // every node up to it holds `max_rank`. Stop at the first node below.
742 let mut floor_idx = band_idx;
743 for i in (0..band_idx).rev() {
744 if ranks[i] == Some(max_rank) {
745 floor_idx = i;
746 } else {
747 break;
748 }
749 }
750 if floor_idx == 0 {
751 // The maximal-rank band already reaches `psi_lo` — no lift needed.
752 None
753 } else {
754 Some(psi_at(floor_idx))
755 }
756 }
757
758 /// Upper edge of the contiguous maximal-rank ψ-band, the symmetric twin of
759 /// [`Self::rank_stable_psi_floor`] (#1033). The conditioned Gram `XᵀWX(ψ)` is
760 /// rank-deficient at BOTH window ends — at small ψ the longest-length-scale
761 /// radial mode collapses into the polynomial nullspace, and at very large ψ
762 /// every radial column goes collinear with the low-frequency mode, so the
763 /// maximal-rank region is a middle BAND. The optimizer's line search can
764 /// OVERSHOOT above that band (e.g. ψ≈1.0 on production Duchon geometry), where
765 /// the design-realization skip's `reduced_basis_equal` witness must soundly
766 /// refuse (the range subspace dropped a dimension) → an O(n) `reset_surface`,
767 /// AND the pinning ψ recorded at that reset is itself rank-deficient, so the
768 /// NEXT in-band trial mismatches its reference and resets a SECOND time. Both
769 /// resets vanish once the optimizer's UPPER bound is clamped down to this
770 /// n-free k-space ceiling, keeping every trial inside the maximal-rank band.
771 ///
772 /// Walks UP from the anchor on the same fixed k-space grid as the floor and
773 /// returns the highest ψ still at the window's maximal numerical rank
774 /// (stopping at the first node above that differs). Purely O(nodes·k³) — no
775 /// row access, inherently n-INDEPENDENT (rank is a property of the k×k tensor).
776 ///
777 /// Returns `None` when the band already reaches `psi_hi` (no clamp needed),
778 /// when the anchor is off-window / rank-indeterminate, or when the window is
779 /// empty.
780 pub fn rank_stable_psi_ceiling(&self, psi_anchor: f64) -> Option<f64> {
781 // Same grid + max-rank target + anchor→band snap as `rank_stable_psi_floor`
782 // so the floor and ceiling bracket the SAME contiguous maximal-rank band.
783 const NODES: usize = 96;
784 if !(self.psi_hi > self.psi_lo) {
785 return None;
786 }
787 let span = self.psi_hi - self.psi_lo;
788 let psi_at = |i: usize| self.psi_lo + span * (i as f64) / ((NODES - 1) as f64);
789 let ranks: Vec<Option<usize>> =
790 (0..NODES).map(|i| self.gram_numerical_rank(psi_at(i))).collect();
791 let max_rank = ranks.iter().filter_map(|r| *r).max()?;
792 let anchor = psi_anchor.clamp(self.psi_lo, self.psi_hi);
793 let anchor_idx = (((anchor - self.psi_lo) / span) * ((NODES - 1) as f64))
794 .round()
795 .clamp(0.0, (NODES - 1) as f64) as usize;
796 // Snap to the nearest max-rank node at/below the anchor (the mirror of the
797 // floor's snap-UP), so the band edge is measured from inside the good band.
798 let band_idx = (0..=anchor_idx).rev().find(|&i| ranks[i] == Some(max_rank))?;
799 // Walk UP from the band node; the ceiling is the highest node from which
800 // every node down to it holds `max_rank`. Stop at the first node above.
801 let mut ceil_idx = band_idx;
802 for i in (band_idx + 1)..NODES {
803 if ranks[i] == Some(max_rank) {
804 ceil_idx = i;
805 } else {
806 break;
807 }
808 }
809 if ceil_idx == NODES - 1 {
810 // The maximal-rank band already reaches `psi_hi` — no clamp needed.
811 None
812 } else {
813 Some(psi_at(ceil_idx))
814 }
815 }
816
817 /// True when `psi` lies inside the certified window.
818 pub fn contains(&self, psi: f64) -> bool {
819 psi.is_finite() && psi >= self.psi_lo && psi <= self.psi_hi
820 }
821
822 /// The certified value window `[psi_lo, psi_hi]` (#1033 instrumentation).
823 pub fn psi_window(&self) -> (f64, f64) {
824 (self.psi_lo, self.psi_hi)
825 }
826
827 /// True when `psi` lies inside the certified gradient window where the
828 /// analytic ψ-derivative is bit-tight against the exact design derivative
829 /// (#1033b). The n-free kappa outer loop is armed only when this covers the
830 /// full optimizer bounds.
831 pub fn contains_for_gradient(&self, psi: f64) -> bool {
832 psi.is_finite()
833 && self.grad_psi_lo.is_finite()
834 && self.grad_psi_hi.is_finite()
835 && psi >= self.grad_psi_lo
836 && psi <= self.grad_psi_hi
837 }
838
839 fn mapped(&self, psi: f64) -> f64 {
840 (2.0 * psi - (self.psi_lo + self.psi_hi)) / (self.psi_hi - self.psi_lo)
841 }
842
843 /// `XᵀWX(ψ)` assembled n-free in O(Dk²) from the direct Gram series.
844 pub fn gram_at(&self, psi: f64) -> Array2<f64> {
845 let x = self.mapped(psi);
846 let t = cheb_t(x, self.gram.len());
847 let mut out = Array2::<f64>::zeros((self.k, self.k));
848 let mut comp = Array2::<f64>::zeros((self.k, self.k));
849 for (d, td) in t.iter().enumerate() {
850 kahan_scaled_add_array2(&mut out, &mut comp, *td, &self.gram[d]);
851 }
852 out
853 }
854
855 /// `XᵀWz(ψ)` assembled n-free in O(Dk).
856 pub fn rhs_at(&self, psi: f64) -> Array1<f64> {
857 let x = self.mapped(psi);
858 let t = cheb_t(x, self.n_coeff);
859 let mut out = Array1::<f64>::zeros(self.k);
860 let mut comp = Array1::<f64>::zeros(self.k);
861 for (d, td) in t.iter().enumerate() {
862 kahan_scaled_add_array1(&mut out, &mut comp, *td, &self.rhs[d]);
863 }
864 out
865 }
866
867 /// Exact `∂(XᵀWX)/∂ψ` from the SAME representation as the value — the
868 /// structural cure for the objective↔gradient desync class on this
869 /// channel. n-free, O(Dk²) from the direct Gram series.
870 pub fn dgram_dpsi(&self, psi: f64) -> Array2<f64> {
871 let x = self.mapped(psi);
872 let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
873 let tp = cheb_t_prime(x, self.gram.len());
874 let mut out = Array2::<f64>::zeros((self.k, self.k));
875 let mut comp = Array2::<f64>::zeros((self.k, self.k));
876 for (d, tpd) in tp.iter().enumerate() {
877 kahan_scaled_add_array2(&mut out, &mut comp, *tpd * dx_dpsi, &self.gram[d]);
878 }
879 out
880 }
881
882 /// Exact `∂(XᵀWz)/∂ψ`, n-free.
883 pub fn drhs_dpsi(&self, psi: f64) -> Array1<f64> {
884 let x = self.mapped(psi);
885 let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
886 let tp = cheb_t_prime(x, self.n_coeff);
887 let mut out = Array1::<f64>::zeros(self.k);
888 let mut comp = Array1::<f64>::zeros(self.k);
889 for (d, tpd) in tp.iter().enumerate() {
890 kahan_scaled_add_array1(&mut out, &mut comp, *tpd * dx_dpsi, &self.rhs[d]);
891 }
892 out
893 }
894
895 /// Exact `∂²(XᵀWX)/∂ψ²` from the SAME representation as the value/gradient —
896 /// the n-free curvature that lets the outer Newton/ARC step read the τ-τ
897 /// Hessian's design-moving block without re-streaming an O(n) slab Gram
898 /// (#1033, Gaussian-identity single-ψ Hessian channel). O(Dk²) from the
899 /// direct Gram series.
900 ///
901 /// `XᵀWX(ψ) = Σ_d T_d(x) G_d` with `x = mapped(ψ)`, so by the chain rule
902 /// `d²/dψ² = T_d″(x) · (dx/dψ)²`.
903 pub fn d2gram_dpsi2(&self, psi: f64) -> Array2<f64> {
904 let x = self.mapped(psi);
905 let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
906 let dx_dpsi_sq = dx_dpsi * dx_dpsi;
907 let tpp = cheb_t_double_prime(x, self.gram.len());
908 let mut out = Array2::<f64>::zeros((self.k, self.k));
909 let mut comp = Array2::<f64>::zeros((self.k, self.k));
910 for (d, tppd) in tpp.iter().enumerate() {
911 kahan_scaled_add_array2(&mut out, &mut comp, *tppd * dx_dpsi_sq, &self.gram[d]);
912 }
913 out
914 }
915
916 /// Exact `∂²(XᵀWz)/∂ψ²`, n-free. `T_d″·(dx/dψ)²` against the rhs slabs.
917 pub fn d2rhs_dpsi2(&self, psi: f64) -> Array1<f64> {
918 let x = self.mapped(psi);
919 let dx_dpsi = 2.0 / (self.psi_hi - self.psi_lo);
920 let dx_dpsi_sq = dx_dpsi * dx_dpsi;
921 let tpp = cheb_t_double_prime(x, self.n_coeff);
922 let mut out = Array1::<f64>::zeros(self.k);
923 let mut comp = Array1::<f64>::zeros(self.k);
924 for (d, tppd) in tpp.iter().enumerate() {
925 kahan_scaled_add_array1(&mut out, &mut comp, *tppd * dx_dpsi_sq, &self.rhs[d]);
926 }
927 out
928 }
929
930 /// Assemble the Gaussian-identity sufficient-statistic cache at `psi`
931 /// without touching a single data row — the bridge from this tensor into
932 /// the inner PLS solver's fast path (#1033b → `GaussianFixedCache`).
933 ///
934 /// `(XᵀWX, XᵀWz, zᵀWz)` is everything the Gaussian penalized solve needs
935 /// at any λ, so a ψ-trial that holds a certified tensor can hand the
936 /// inner solver this cache instead of realizing the n×k design. The
937 /// caller is responsible for `contains(psi)` (off-window trials fall back
938 /// to the exact realizer path). Dense-path bridge only: the sparse
939 /// scatter cache stays `None`.
940 pub fn gaussian_fixed_cache_at(&self, psi: f64) -> crate::pirls::GaussianFixedCache {
941 crate::pirls::GaussianFixedCache {
942 xtwx_orig: self.gram_at(psi),
943 xtwy_orig: self.rhs_at(psi),
944 centered_weighted_y_sq: self.zt_w_z,
945 row_prediction_is_stale: true,
946 xtwx_sparse_orig: None,
947 }
948 }
949}
950
951#[cfg(test)]
952mod tests {
953 use super::*;
954
955 /// Analytic Matérn-shaped synthetic design: entries g(e^{u_ij + ψ}) with
956 /// g(s) = (1 + s)·exp(−s) (the ν = 3/2 Matérn shape) plus a ψ-free power
957 /// column — the exact structural mix of the production radial designs.
958 fn synth_design(psi: f64, n: usize, k: usize) -> Result<Array2<f64>, String> {
959 let mut x = Array2::<f64>::zeros((n, k));
960 for i in 0..n {
961 for j in 0..k {
962 let r = 0.05 + (i as f64 + 1.0) * (j as f64 + 1.0) / (n as f64 * k as f64) * 3.0;
963 if j == k - 1 {
964 // ψ-free polynomial block column.
965 x[[i, j]] = r * r * r;
966 } else {
967 let s = r * psi.exp();
968 x[[i, j]] = (1.0 + s) * (-s).exp();
969 }
970 }
971 }
972 Ok(x)
973 }
974
975 /// A genuinely FULL-RANK, well-conditioned, ψ-dependent synthetic design for
976 /// the gauge-invariance witness test. Unlike `synth_design` (whose Matérn-like
977 /// `(1+s)e^{-s}` columns over a narrow `r`-range collapse to a numerical rank
978 /// of 3–4 of `k=6` and whose near-null subspace *rotates* across the window —
979 /// so `reduced_basis_equal` correctly refuses), this builds `k` near-orthogonal
980 /// Fourier/Chebyshev-flavoured base columns and applies a mild, sign-varying
981 /// per-column amplitude `e^{c_j·ψ}`. The base columns are linearly independent
982 /// with a Gram condition number `≈3`, so the weighted Gram is full column rank
983 /// (numerical rank `= k`) at *every* ψ in the window — its range is the whole
984 /// k-space and the orthogonal range projector is the identity for all ψ. The
985 /// amplitude modulation still genuinely *rotates the eigenvectors* with ψ, so
986 /// the witness must certify (identical range subspace) despite a per-ψ
987 /// eigenvector gauge that differs — exactly the gauge invariance under test.
988 /// The amplitudes are entire in ψ, so the Chebyshev tensor still certifies.
989 fn synth_full_rank_design(psi: f64, n: usize, k: usize) -> Result<Array2<f64>, String> {
990 use std::f64::consts::PI;
991 assert!(k >= 2 && k % 2 == 0, "helper assumes an even k ≥ 2");
992 // ψ-analytic Givens angle: rotates each adjacent column plane by θ(ψ). A
993 // rotation is orthogonal, so it preserves the COLUMN SPACE and the Gram
994 // SPECTRUM (rank = k, condition number constant in ψ) while genuinely
995 // turning the eigenvECTORS — the precise setting in which the range
996 // projector is ψ-invariant (identity at full rank) but the per-ψ gauge
997 // differs. cos/sin are entire, so the Chebyshev tensor still certifies.
998 let theta = 0.6 * psi;
999 let (c, s) = (theta.cos(), theta.sin());
1000 let mut x = Array2::<f64>::zeros((n, k));
1001 for i in 0..n {
1002 let t = (i as f64 + 0.5) / n as f64;
1003 // Distinctly-scaled near-orthogonal base columns → distinct, separated
1004 // eigenvalues so each eigenvector is well-defined (no degenerate plane
1005 // that would make the rotation gauge-ambiguous).
1006 let mut b = vec![0.0_f64; k];
1007 for (j, slot) in b.iter_mut().enumerate() {
1008 let base = if j % 2 == 0 {
1009 ((j as f64) * PI * t).cos()
1010 } else {
1011 (((j + 1) as f64) * PI * t).sin()
1012 };
1013 *slot = (1.0 + 0.5 * j as f64) * base;
1014 }
1015 // Apply the Givens rotation to every adjacent (2m, 2m+1) plane,
1016 // including the dominant top plane, so the LEADING eigenvector rotates
1017 // too (a rotation confined to the small-eigenvalue planes would leave
1018 // the leading eigenvector fixed and make the gauge check vacuous).
1019 let mut row = b.clone();
1020 let mut p = 0;
1021 while p + 1 < k {
1022 let (bp, bq) = (b[p], b[p + 1]);
1023 row[p] = c * bp - s * bq;
1024 row[p + 1] = s * bp + c * bq;
1025 p += 2;
1026 }
1027 for (j, &v) in row.iter().enumerate() {
1028 x[[i, j]] = v;
1029 }
1030 }
1031 Ok(x)
1032 }
1033
1034 fn exact_gram(psi: f64, n: usize, k: usize, w: &Array1<f64>) -> Array2<f64> {
1035 let design = synth_design(psi, n, k).unwrap();
1036 let mut wd = design.clone();
1037 for (mut row, &wi) in wd.outer_iter_mut().zip(w.iter()) {
1038 row.mapv_inplace(|v| v * wi);
1039 }
1040 design.t().dot(&wd)
1041 }
1042
1043 /// #1033b primitive gate: the certified tensor must reproduce the exact
1044 /// Gram/rhs at arbitrary in-window ψ to certification accuracy, and its
1045 /// analytic ψ-derivative must match central finite differences of the
1046 /// exact Gram — value and gradient from one representation.
1047 #[test]
1048 fn psi_gram_tensor_matches_exact_gram_and_fd_gradient() {
1049 let (n, k) = (160usize, 7usize);
1050 let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)));
1051 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.37).sin()));
1052 let (psi_lo, psi_hi) = (-1.2_f64, 1.0_f64);
1053
1054 let tensor = PsiGramTensor::build(
1055 |psi| synth_design(psi, n, k),
1056 w.view(),
1057 z.view(),
1058 psi_lo,
1059 psi_hi,
1060 )
1061 .expect("analytic synthetic design must certify");
1062
1063 for &psi in &[-1.1, -0.63, 0.0, 0.41, 0.97] {
1064 assert!(tensor.contains(psi));
1065 let exact = exact_gram(psi, n, k, &w);
1066 let fast = tensor.gram_at(psi);
1067 let scale = exact.iter().fold(0.0_f64, |a, &v| a.max(v.abs()));
1068 for (a, b) in fast.iter().zip(exact.iter()) {
1069 assert!(
1070 (a - b).abs() <= 1e-9 * scale,
1071 "gram mismatch at psi={psi}: fast={a}, exact={b}"
1072 );
1073 }
1074 // rhs check against the exact weighted cross-product.
1075 let design = synth_design(psi, n, k).unwrap();
1076 let mut wz = Array1::<f64>::zeros(n);
1077 for ((slot, &wi), &zi) in wz.iter_mut().zip(w.iter()).zip(z.iter()) {
1078 *slot = wi * zi;
1079 }
1080 let exact_rhs = design.t().dot(&wz);
1081 let fast_rhs = tensor.rhs_at(psi);
1082 let rscale = exact_rhs.iter().fold(0.0_f64, |a, &v| a.max(v.abs()));
1083 for (a, b) in fast_rhs.iter().zip(exact_rhs.iter()) {
1084 assert!(
1085 (a - b).abs() <= 1e-9 * rscale,
1086 "rhs mismatch at psi={psi}: fast={a}, exact={b}"
1087 );
1088 }
1089 // Analytic ψ-gradient vs central FD of the EXACT gram.
1090 let h = 1e-5;
1091 let g_plus = exact_gram(psi + h, n, k, &w);
1092 let g_minus = exact_gram(psi - h, n, k, &w);
1093 let dg = tensor.dgram_dpsi(psi);
1094 let dscale = dg.iter().fold(0.0_f64, |a, &v| a.max(v.abs())).max(1e-12);
1095 for ((a, p), m_) in dg.iter().zip(g_plus.iter()).zip(g_minus.iter()) {
1096 let fd = (p - m_) / (2.0 * h);
1097 assert!(
1098 (a - fd).abs() <= 1e-5 * dscale,
1099 "dgram/dpsi mismatch at psi={psi}: analytic={a}, fd={fd}"
1100 );
1101 }
1102 }
1103 // Outside the window the caller must fall back to the exact path.
1104 assert!(!tensor.contains(psi_hi + 0.5));
1105 assert!(!tensor.contains(psi_lo - 0.5));
1106
1107 // Bridge gate (#1033b → GaussianFixedCache): the n-free cache must
1108 // reproduce the exactly streamed sufficient statistics, and the
1109 // ridge-penalized solves through both must agree — the inner PLS
1110 // consumes nothing else, so this certifies the trial-loop handoff.
1111 for &psi in &[-0.9, 0.2, 0.8] {
1112 let cache = tensor.gaussian_fixed_cache_at(psi);
1113 let design = synth_design(psi, n, k).unwrap();
1114 let mut wd = design.clone();
1115 for (mut row, &wi) in wd.outer_iter_mut().zip(w.iter()) {
1116 row.mapv_inplace(|v| v * wi);
1117 }
1118 let exact_gram = design.t().dot(&wd);
1119 let exact_rhs = wd.t().dot(&z);
1120 let exact_ztwz: f64 = w.iter().zip(z.iter()).map(|(&wi, &zi)| wi * zi * zi).sum();
1121 assert!(
1122 (cache.centered_weighted_y_sq - exact_ztwz).abs()
1123 <= 1e-12 * exact_ztwz.abs().max(1e-300),
1124 "zᵀWz drift: cache={}, exact={exact_ztwz}",
1125 cache.centered_weighted_y_sq
1126 );
1127 // Ridge-penalized solve agreement: (G + I)β = r on both sides.
1128 let solve = |g: &Array2<f64>, r: &Array1<f64>| -> Array1<f64> {
1129 let mut a = g.clone();
1130 for i in 0..k {
1131 a[[i, i]] += 1.0;
1132 }
1133 // Small dense Gauss elimination (k = 7 in this test).
1134 let mut aug = Array2::<f64>::zeros((k, k + 1));
1135 aug.slice_mut(ndarray::s![.., ..k]).assign(&a);
1136 aug.slice_mut(ndarray::s![.., k]).assign(r);
1137 for col in 0..k {
1138 let piv = (col..k)
1139 .max_by(|&p, &q| aug[[p, col]].abs().total_cmp(&aug[[q, col]].abs()))
1140 .unwrap();
1141 if piv != col {
1142 for j in 0..=k {
1143 let tmp = aug[[col, j]];
1144 aug[[col, j]] = aug[[piv, j]];
1145 aug[[piv, j]] = tmp;
1146 }
1147 }
1148 let p = aug[[col, col]];
1149 for row in 0..k {
1150 if row == col {
1151 continue;
1152 }
1153 let f = aug[[row, col]] / p;
1154 for j in col..=k {
1155 aug[[row, j]] -= f * aug[[col, j]];
1156 }
1157 }
1158 }
1159 Array1::from_iter((0..k).map(|i| aug[[i, k]] / aug[[i, i]]))
1160 };
1161 let beta_fast = solve(&cache.xtwx_orig, &cache.xtwy_orig);
1162 let beta_exact = solve(&exact_gram, &exact_rhs);
1163 let bscale = beta_exact
1164 .iter()
1165 .fold(0.0_f64, |a, &v| a.max(v.abs()))
1166 .max(1e-300);
1167 for (a, b) in beta_fast.iter().zip(beta_exact.iter()) {
1168 assert!(
1169 (a - b).abs() <= 1e-8 * bscale,
1170 "penalized solve drift at psi={psi}: fast={a}, exact={b}"
1171 );
1172 }
1173 }
1174 }
1175
1176 /// #1033 rank-stable κ-floor: the conditioned radial Gram goes numerically
1177 /// rank-deficient at the LARGE-length-scale (small-ψ) window edge — the
1178 /// `synth_design` Matérn columns over a narrow `r`-range collapse toward the
1179 /// polynomial nullspace there. `rank_stable_psi_floor` must (a) detect that the
1180 /// maximal-rank band does NOT reach `psi_lo` and return a floor strictly inside
1181 /// the window, (b) report that floor as the lower edge of the maximal-rank band
1182 /// containing the seed, and (c) be a pure k-space property — IDENTICAL whether
1183 /// the tensor was built from n=200 or n=4000 rows (the n-independence the κ
1184 /// outer loop relies on). A design that is full-rank across the whole window
1185 /// must return `None` (no lift needed).
1186 #[test]
1187 fn rank_stable_psi_floor_is_inside_window_and_n_independent() {
1188 let k = 7usize;
1189 // Window spanning the small-ψ rank cliff. Kept moderate so the Chebyshev
1190 // ladder certifies at a low rung (the build is the only n-pass; a wide
1191 // window forces high rungs = many design realizations = slow test).
1192 let (psi_lo, psi_hi) = (-1.6_f64, 1.0_f64);
1193 let build_at = |n: usize| {
1194 let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)));
1195 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.37).sin()));
1196 PsiGramTensor::build(|psi| synth_design(psi, n, k), w.view(), z.view(), psi_lo, psi_hi)
1197 .expect("analytic synthetic design must certify")
1198 };
1199
1200 let t_small = build_at(120);
1201 // Seed at the well-conditioned (small-length-scale) window end — the
1202 // κ-optimum's neighbourhood, guaranteed to be at the window-maximal rank
1203 // (the synthetic radial design's rank rises toward large ψ). The floor is
1204 // the lower edge of the maximal-rank band reaching this seed.
1205 let seed = psi_hi;
1206 let floor_small = t_small.rank_stable_psi_floor(seed);
1207
1208 // (a) the band does not reach psi_lo → a floor is returned, strictly inside.
1209 let floor = floor_small.expect("collapsing-rank design must lift the floor off psi_lo");
1210 assert!(
1211 floor > psi_lo && floor <= seed,
1212 "floor {floor} must lie in (psi_lo {psi_lo}, seed {seed}]"
1213 );
1214
1215 // (b) below the floor the Gram is rank-deficient relative to the seed; at/
1216 // above the floor it holds the window-maximal rank. Verify the rank at the
1217 // floor equals the rank at the seed, and the rank just below the floor is
1218 // strictly lower (the floor is a genuine rank edge, not an interior node).
1219 let rank_at = |psi: f64| t_small.gram_numerical_rank(psi).unwrap();
1220 let max_rank = rank_at(seed);
1221 assert_eq!(
1222 rank_at(floor),
1223 max_rank,
1224 "the floor must sit at the window-maximal rank"
1225 );
1226 let probe_below = floor - 0.25;
1227 if t_small.contains(probe_below) {
1228 assert!(
1229 rank_at(probe_below) < max_rank,
1230 "rank just below the floor ({}) must drop under the band rank {max_rank}",
1231 rank_at(probe_below)
1232 );
1233 }
1234
1235 // (c) n-independence: the floor from a 5× larger build must match to grid
1236 // resolution. The tensor is certified to the same Chebyshev tolerance, so
1237 // the k-space rank structure — hence the floor — is the same object.
1238 let t_big = build_at(1000);
1239 let floor_big = t_big
1240 .rank_stable_psi_floor(seed)
1241 .expect("the rank cliff is an n-free property; the big build must also lift");
1242 let grid_step = (psi_hi - psi_lo) / 95.0; // NODES - 1 = 95
1243 assert!(
1244 (floor_small.unwrap() - floor_big).abs() <= 1.5 * grid_step,
1245 "rank-stable floor must be n-independent: n=200 → {}, n=4000 → {floor_big} \
1246 (grid step {grid_step})",
1247 floor_small.unwrap()
1248 );
1249
1250 // A genuinely full-rank, well-conditioned design across the window needs no
1251 // lift → None. `synth_full_rank_design` requires an even k.
1252 let n = 200usize;
1253 let kk = 6usize;
1254 let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)));
1255 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.29).cos()));
1256 let full = PsiGramTensor::build(
1257 |psi| synth_full_rank_design(psi, n, kk),
1258 w.view(),
1259 z.view(),
1260 psi_lo,
1261 psi_hi,
1262 )
1263 .expect("full-rank design must certify");
1264 assert!(
1265 full.rank_stable_psi_floor(seed).is_none(),
1266 "a window-wide full-rank design must not lift the floor"
1267 );
1268 }
1269
1270 /// #1033 (rank-stable κ-CEILING): the symmetric twin of the floor test. The
1271 /// `synth_design` radial columns `(1+s)e^{-s}` with `s = r·e^ψ` collapse at the
1272 /// HIGH ψ edge — every column decays toward zero as `s→∞`, so the conditioned
1273 /// Gram drops rank near `psi_hi`. `rank_stable_psi_ceiling` must (a) detect that
1274 /// the maximal-rank band does NOT reach `psi_hi` and return a ceiling strictly
1275 /// inside the window, (b) report that ceiling as the upper edge of the band
1276 /// containing the seed (rank at the ceiling = window-maximal, rank just above it
1277 /// strictly lower), and (c) be a pure k-space property — IDENTICAL whether built
1278 /// from few or many rows (the n-independence the κ outer loop relies on). A
1279 /// design full-rank up to `psi_hi` must return `None` (no clamp needed). This is
1280 /// the regression guard for the n=16000 fast-ladder resets: the κ line search
1281 /// overshot above the band to a rank-deficient ψ and tripped two O(n) resets.
1282 #[test]
1283 fn rank_stable_psi_ceiling_is_inside_window_and_n_independent() {
1284 let k = 7usize;
1285 // `synth_design`'s radial Gram rank RISES with ψ (the columns separate as
1286 // s = r·e^ψ grows), so it collapses at the LOW edge — the floor's setting.
1287 // To exercise the CEILING we feed the ψ-REFLECTED design `synth_design(-ψ)`,
1288 // whose rank instead collapses at the HIGH edge (rank 7→3 as ψ→psi_hi),
1289 // exactly the high-edge degeneracy the κ-ceiling guards against. Seed at a
1290 // window-maximal-rank node (located by a coarse scan, not assumed at an
1291 // edge); the ceiling is the upper edge of the maximal-rank band.
1292 let (psi_lo, psi_hi) = (-2.6_f64, 1.0_f64);
1293 let build_at = |n: usize| {
1294 let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)));
1295 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.41).cos()));
1296 PsiGramTensor::build(|psi| synth_design(-psi, n, k), w.view(), z.view(), psi_lo, psi_hi)
1297 .expect("analytic synthetic design must certify")
1298 };
1299
1300 let t_small = build_at(120);
1301 let rank_at = |psi: f64| t_small.gram_numerical_rank(psi).unwrap();
1302 let scan: Vec<(f64, usize)> = (0..96)
1303 .map(|i| {
1304 let p = psi_lo + (psi_hi - psi_lo) * (i as f64) / 95.0;
1305 (p, rank_at(p))
1306 })
1307 .collect();
1308 let window_max_rank = scan.iter().map(|&(_, r)| r).max().unwrap();
1309 let seed = scan
1310 .iter()
1311 .find(|&&(_, r)| r == window_max_rank)
1312 .map(|&(p, _)| p)
1313 .expect("some node must hold the window-maximal rank");
1314 let ceil_small = t_small.rank_stable_psi_ceiling(seed);
1315
1316 // (a) the band does not reach psi_hi → a ceiling is returned, strictly inside.
1317 let ceiling =
1318 ceil_small.expect("high-edge-collapsing design must clamp the ceiling off psi_hi");
1319 assert!(
1320 ceiling < psi_hi && ceiling >= seed,
1321 "ceiling {ceiling} must lie in [seed {seed}, psi_hi {psi_hi})"
1322 );
1323
1324 // (b) at/below the ceiling the Gram holds the window-maximal rank; above it
1325 // the rank drops. The ceiling is a genuine rank edge, not an interior node.
1326 let max_rank = window_max_rank;
1327 assert_eq!(
1328 rank_at(seed),
1329 max_rank,
1330 "the seed must sit at the window-maximal rank"
1331 );
1332 assert_eq!(
1333 rank_at(ceiling),
1334 max_rank,
1335 "the ceiling must sit at the window-maximal rank"
1336 );
1337 let probe_above = ceiling + 0.25;
1338 if t_small.contains(probe_above) {
1339 assert!(
1340 rank_at(probe_above) < max_rank,
1341 "rank just above the ceiling ({}) must drop under the band rank {max_rank}",
1342 rank_at(probe_above)
1343 );
1344 }
1345
1346 // (c) n-independence: the ceiling from a larger build matches to grid
1347 // resolution — the rank cliff is an n-free k-space property.
1348 let t_big = build_at(1000);
1349 let ceil_big = t_big
1350 .rank_stable_psi_ceiling(seed)
1351 .expect("the rank cliff is an n-free property; the big build must also clamp");
1352 let grid_step = (psi_hi - psi_lo) / 95.0; // NODES - 1 = 95
1353 assert!(
1354 (ceil_small.unwrap() - ceil_big).abs() <= 1.5 * grid_step,
1355 "rank-stable ceiling must be n-independent: n=120 → {}, n=1000 → {ceil_big} \
1356 (grid step {grid_step})",
1357 ceil_small.unwrap()
1358 );
1359
1360 // A genuinely full-rank design across the window needs no clamp → None.
1361 let n = 200usize;
1362 let kk = 6usize;
1363 let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)));
1364 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.23).sin()));
1365 let full = PsiGramTensor::build(
1366 |psi| synth_full_rank_design(psi, n, kk),
1367 w.view(),
1368 z.view(),
1369 psi_lo,
1370 psi_hi,
1371 )
1372 .expect("full-rank design must certify");
1373 assert!(
1374 full.rank_stable_psi_ceiling(seed).is_none(),
1375 "a window-wide full-rank design must not clamp the ceiling"
1376 );
1377 }
1378
1379 /// #1033 n-independence invariant (structural, build-free, bit-tight):
1380 /// after the one-time `build` n-pass, EVERY per-trial accessor the certified
1381 /// κ/ψ outer-loop hot path consumes — the value `(gram_at, rhs_at)`, the
1382 /// gradient `(dgram_dpsi, drhs_dpsi)`, the Hessian-channel curvature
1383 /// `(d2gram_dpsi2, d2rhs_dpsi2)`, and the inner-solver bridge
1384 /// `gaussian_fixed_cache_at` — must touch ZERO data rows. We prove this by
1385 /// instrumenting the `eval_design` closure with an invocation counter (the
1386 /// closure is the ONLY route to the n×k design): the counter advances during
1387 /// `build` (the certified node ladder + off-node spot checks) and must then
1388 /// stay FROZEN across an entire ψ-trial sweep. This is the
1389 /// "no surface rebuild / no n×k re-realization on a cache-hit trial"
1390 /// invariant the outer-loop seam (`SpatialJointContext::eval_full`,
1391 /// `skip_design_realization`) relies on — asserted here at the tensor source
1392 /// of truth, independent of whether the full κ-fit converges or of any
1393 /// wall-clock measurement.
1394 #[test]
1395 fn psi_gram_tensor_trial_accessors_touch_no_data_rows() {
1396 use std::cell::Cell;
1397
1398 let (n, k) = (256usize, 6usize);
1399 let w = Array1::from_iter((0..n).map(|i| 0.8 + 0.4 * ((i % 4) as f64) / 3.0));
1400 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.21).sin() + 0.2));
1401 let (psi_lo, psi_hi) = (-1.1_f64, 0.95_f64);
1402
1403 // The closure is the SOLE path to the n×k design; count every call.
1404 let calls = Cell::new(0usize);
1405 let tensor = PsiGramTensor::build(
1406 |psi| {
1407 calls.set(calls.get() + 1);
1408 synth_design(psi, n, k)
1409 },
1410 w.view(),
1411 z.view(),
1412 psi_lo,
1413 psi_hi,
1414 )
1415 .expect("analytic synthetic design must certify");
1416
1417 // The one-time build necessarily streamed the design at the Chebyshev
1418 // nodes plus off-node spot checks. Freeze the count.
1419 let build_calls = calls.get();
1420 assert!(
1421 build_calls > 0,
1422 "build must have streamed the design at least once (sanity)"
1423 );
1424
1425 // A dense ψ-trial sweep strictly inside the certified window. Every
1426 // accessor below is what a per-trial outer-loop eval consumes.
1427 let m = 64usize;
1428 let lo = psi_lo + 0.05;
1429 let hi = psi_hi - 0.05;
1430 for i in 0..m {
1431 let psi = lo + (hi - lo) * (i as f64) / (m as f64 - 1.0);
1432 assert!(tensor.contains(psi));
1433 // Value lane.
1434 let _g = tensor.gram_at(psi);
1435 let _r = tensor.rhs_at(psi);
1436 // Gradient lane.
1437 let _dg = tensor.dgram_dpsi(psi);
1438 let _dr = tensor.drhs_dpsi(psi);
1439 // Hessian-channel curvature.
1440 let _d2g = tensor.d2gram_dpsi2(psi);
1441 let _d2r = tensor.d2rhs_dpsi2(psi);
1442 // Inner-solver bridge (the GaussianFixedCache the PLS fast path reads).
1443 let _cache = tensor.gaussian_fixed_cache_at(psi);
1444 }
1445
1446 assert_eq!(
1447 calls.get(),
1448 build_calls,
1449 "n-independence VIOLATED: a per-trial accessor re-streamed the n×k \
1450 design ({} extra eval_design calls across {m} ψ-trials). The certified \
1451 κ/ψ outer loop must serve value + gradient + Hessian curvature + the \
1452 inner-solver cache from k-space sufficient statistics only, with NO \
1453 per-trial n-row work.",
1454 calls.get() - build_calls
1455 );
1456 }
1457
1458 /// #1033 Hessian-channel primitive gate: the n-free second ψ-derivatives
1459 /// `d2gram_dpsi2` / `d2rhs_dpsi2` must match central FD of the analytic FIRST
1460 /// derivatives (`dgram_dpsi` / `drhs_dpsi`) — the curvature the outer Newton
1461 /// /ARC step reads when the Gaussian Hessian channel is served from the
1462 /// tensor instead of a re-streamed O(n) slab. Differencing the analytic first
1463 /// derivative (not the exact gram) keeps this a pure check of the
1464 /// second-derivative recurrence, isolated from the build's value-lane tol.
1465 #[test]
1466 fn psi_gram_tensor_second_derivative_matches_fd_of_gradient() {
1467 let (n, k) = (160usize, 7usize);
1468 let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.5 * ((i % 3) as f64)));
1469 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.37).sin()));
1470 let (psi_lo, psi_hi) = (-1.2_f64, 1.0_f64);
1471
1472 let tensor = PsiGramTensor::build(
1473 |psi| synth_design(psi, n, k),
1474 w.view(),
1475 z.view(),
1476 psi_lo,
1477 psi_hi,
1478 )
1479 .expect("analytic synthetic design must certify");
1480
1481 let h = 1e-5;
1482 for &psi in &[-1.0, -0.5, 0.0, 0.4, 0.9] {
1483 // ∂²G/∂ψ² vs central FD of the analytic ∂G/∂ψ.
1484 let dg_plus = tensor.dgram_dpsi(psi + h);
1485 let dg_minus = tensor.dgram_dpsi(psi - h);
1486 let d2g = tensor.d2gram_dpsi2(psi);
1487 let gscale = d2g.iter().fold(0.0_f64, |a, &v| a.max(v.abs())).max(1e-9);
1488 for ((a, p), m_) in d2g.iter().zip(dg_plus.iter()).zip(dg_minus.iter()) {
1489 let fd = (p - m_) / (2.0 * h);
1490 assert!(
1491 (a - fd).abs() <= 1e-4 * gscale,
1492 "d2gram/dpsi2 mismatch at psi={psi}: analytic={a}, fd={fd}"
1493 );
1494 }
1495 // ∂²(XᵀWz)/∂ψ² vs central FD of the analytic ∂(XᵀWz)/∂ψ.
1496 let dr_plus = tensor.drhs_dpsi(psi + h);
1497 let dr_minus = tensor.drhs_dpsi(psi - h);
1498 let d2r = tensor.d2rhs_dpsi2(psi);
1499 let rscale = d2r.iter().fold(0.0_f64, |a, &v| a.max(v.abs())).max(1e-9);
1500 for ((a, p), m_) in d2r.iter().zip(dr_plus.iter()).zip(dr_minus.iter()) {
1501 let fd = (p - m_) / (2.0 * h);
1502 assert!(
1503 (a - fd).abs() <= 1e-4 * rscale,
1504 "d2rhs/dpsi2 mismatch at psi={psi}: analytic={a}, fd={fd}"
1505 );
1506 }
1507 }
1508 }
1509
1510 /// The penalized Gaussian profile deviance at a fixed ridge λ, assembled
1511 /// PURELY from the sufficient-statistic triple `(G, r, c) = (XᵀWX, XᵀWz, zᵀWz)`:
1512 ///
1513 /// ```text
1514 /// β(λ) = (G + λS)⁻¹ r, D(ψ;λ) = c − 2 βᵀr + βᵀ(G + λS)β = c − βᵀr
1515 /// ```
1516 ///
1517 /// (the second equality uses the normal equations `(G + λS)β = r`). This is
1518 /// EXACTLY the object the inner Gaussian PLS minimizes over β, and it is a
1519 /// pure function of `(G, r, c)` — n-free. Returns `(D, β)` so the caller can
1520 /// also probe the coefficient lane. `s_ridge` is the ridge penalty matrix.
1521 fn profile_deviance(
1522 g: &Array2<f64>,
1523 r: &Array1<f64>,
1524 c: f64,
1525 s_ridge: &Array2<f64>,
1526 lambda: f64,
1527 k: usize,
1528 ) -> (f64, Array1<f64>) {
1529 // Dense (G + λS) β = r via partial-pivot Gauss elimination (small k).
1530 let mut a = g.clone();
1531 a.scaled_add(lambda, s_ridge);
1532 let mut aug = Array2::<f64>::zeros((k, k + 1));
1533 aug.slice_mut(ndarray::s![.., ..k]).assign(&a);
1534 aug.slice_mut(ndarray::s![.., k]).assign(r);
1535 for col in 0..k {
1536 let piv = (col..k)
1537 .max_by(|&p, &q| aug[[p, col]].abs().total_cmp(&aug[[q, col]].abs()))
1538 .unwrap();
1539 if piv != col {
1540 for j in 0..=k {
1541 let tmp = aug[[col, j]];
1542 aug[[col, j]] = aug[[piv, j]];
1543 aug[[piv, j]] = tmp;
1544 }
1545 }
1546 let p = aug[[col, col]];
1547 for row in 0..k {
1548 if row == col {
1549 continue;
1550 }
1551 let f = aug[[row, col]] / p;
1552 for j in col..=k {
1553 aug[[row, j]] -= f * aug[[col, j]];
1554 }
1555 }
1556 }
1557 let beta = Array1::from_iter((0..k).map(|i| aug[[i, k]] / aug[[i, i]]));
1558 let deviance = c - beta.dot(r);
1559 (deviance, beta)
1560 }
1561
1562 /// #1033 bit-tight Hessian + κ-optimum gate. The fast path's promise is not
1563 /// merely that the Gram VALUE matches at sampled ψ — it is that the WHOLE
1564 /// outer κ search (objective, its ψ-curvature, and therefore the located
1565 /// optimum) is reproduced by the n-free sufficient-statistic representation
1566 /// to machine precision. This harness certifies exactly that:
1567 ///
1568 /// 1. **Objective**: the penalized profile deviance `D(ψ)` assembled from
1569 /// the tensor's `(gram_at, rhs_at, zᵀWz)` matches the exactly streamed
1570 /// `XᵀWX/XᵀWz/zᵀWz` deviance bit-tight at every ψ on a fine grid.
1571 /// 2. **Curvature (Hessian)**: the second ψ-derivative `D''(ψ)` of the
1572 /// fast-path objective matches the second ψ-derivative of the EXACT
1573 /// objective (central FD of the streamed deviance) — the curvature the
1574 /// outer Newton step reads must be the true curvature, not an
1575 /// approximation that drifts off the value (the objective↔gradient
1576 /// desync class, now extended to the second order).
1577 /// 3. **κ-optimum**: the argmin of `D(ψ)` over the grid is IDENTICAL
1578 /// between the two assemblies — the fast path lands on the same κ as the
1579 /// exact streamed search, to the grid resolution AND bit-tight in the
1580 /// objective value at that node.
1581 #[test]
1582 fn psi_gram_tensor_bit_tight_hessian_and_kappa_optimum() {
1583 let (n, k) = (200usize, 6usize);
1584 // Heterogeneous weights + a response with genuine ψ-dependent curvature
1585 // so the deviance has a non-degenerate interior minimum in ψ.
1586 let w = Array1::from_iter((0..n).map(|i| 0.7 + 0.6 * (((i * 7) % 5) as f64) / 4.0));
1587 let z = Array1::from_iter((0..n).map(|i| {
1588 let t = (i as f64) / (n as f64 - 1.0);
1589 (3.0 * t).sin() + 0.3 * (7.0 * t).cos()
1590 }));
1591 let (psi_lo, psi_hi) = (-1.0_f64, 0.9_f64);
1592 // Fixed ridge λ over the search — the κ optimizer profiles ψ at fixed
1593 // smoothing here; identity-S ridge keeps the profile well-posed.
1594 let s_ridge = Array2::<f64>::eye(k);
1595 let lambda = 0.5_f64;
1596
1597 let tensor = PsiGramTensor::build(
1598 |psi| synth_design(psi, n, k),
1599 w.view(),
1600 z.view(),
1601 psi_lo,
1602 psi_hi,
1603 )
1604 .expect("analytic synthetic design must certify");
1605
1606 let exact_ztwz: f64 = w.iter().zip(z.iter()).map(|(&wi, &zi)| wi * zi * zi).sum();
1607
1608 // Exact streamed deviance at arbitrary ψ — the ground truth the n-free
1609 // path must reproduce.
1610 let exact_deviance = |psi: f64| -> f64 {
1611 let design = synth_design(psi, n, k).unwrap();
1612 let mut wd = design.clone();
1613 for (mut row, &wi) in wd.outer_iter_mut().zip(w.iter()) {
1614 row.mapv_inplace(|v| v * wi);
1615 }
1616 let g = design.t().dot(&wd);
1617 let r = wd.t().dot(&z);
1618 profile_deviance(&g, &r, exact_ztwz, &s_ridge, lambda, k).0
1619 };
1620
1621 // Fast n-free deviance from the certified tensor.
1622 let fast_deviance = |psi: f64| -> f64 {
1623 let g = tensor.gram_at(psi);
1624 let r = tensor.rhs_at(psi);
1625 profile_deviance(&g, &r, exact_ztwz, &s_ridge, lambda, k).0
1626 };
1627
1628 // Dense grid strictly inside the certified window (away from the edges,
1629 // where the build's value lane is still certified but we want a clean
1630 // central-FD second derivative to exist on both sides).
1631 let m = 81usize;
1632 let lo = psi_lo + 0.06;
1633 let hi = psi_hi - 0.06;
1634 let grid: Vec<f64> = (0..m)
1635 .map(|i| lo + (hi - lo) * (i as f64) / (m as f64 - 1.0))
1636 .collect();
1637
1638 // (1) Objective bit-tight across the whole grid; track argmin on both.
1639 let mut worst_value_rel = 0.0_f64;
1640 let (mut fast_argmin, mut fast_min) = (f64::NAN, f64::INFINITY);
1641 let (mut exact_argmin, mut exact_min) = (f64::NAN, f64::INFINITY);
1642 for &psi in &grid {
1643 let de = exact_deviance(psi);
1644 let df = fast_deviance(psi);
1645 let rel = (de - df).abs() / de.abs().max(1e-300);
1646 worst_value_rel = worst_value_rel.max(rel);
1647 if df < fast_min {
1648 fast_min = df;
1649 fast_argmin = psi;
1650 }
1651 if de < exact_min {
1652 exact_min = de;
1653 exact_argmin = psi;
1654 }
1655 }
1656 assert!(
1657 worst_value_rel <= 1e-9,
1658 "penalized profile deviance: fast n-free assembly diverged from exact \
1659 streamed by rel {worst_value_rel:.3e} (> 1e-9) somewhere on the ψ grid"
1660 );
1661
1662 // (3) κ-optimum: identical grid node AND bit-tight value there. The
1663 // argmin must be a true interior minimum (not a window edge) for this to
1664 // certify the OUTER search rather than a boundary artifact.
1665 assert_eq!(
1666 fast_argmin.to_bits(),
1667 exact_argmin.to_bits(),
1668 "κ-optimum mismatch: fast argmin ψ={fast_argmin}, exact argmin ψ={exact_argmin} \
1669 — the n-free objective located a different optimum"
1670 );
1671 assert!(
1672 fast_argmin > lo + 1e-9 && fast_argmin < hi - 1e-9,
1673 "κ-optimum landed on the grid edge ψ={fast_argmin}; the fixture must have \
1674 an INTERIOR minimum for this to test the outer search, not a boundary"
1675 );
1676 let opt_rel = (exact_min - fast_min).abs() / exact_min.abs().max(1e-300);
1677 assert!(
1678 opt_rel <= 1e-9,
1679 "κ-optimum objective value drift at ψ={fast_argmin}: fast={fast_min}, \
1680 exact={exact_min}, rel={opt_rel:.3e}"
1681 );
1682
1683 // (2) Gradient + curvature from the tensor's ANALYTIC ψ-derivatives.
1684 //
1685 // Differencing two objectives that agree only to ~1e-9 in VALUE cannot
1686 // certify their curvature: the central second difference divides by h²,
1687 // so the ~1e-9 value gap (which is NOT common-mode — they are different
1688 // assemblies) is amplified by 1/h² and swamps any real curvature signal.
1689 // The principled bit-tight curvature check uses the tensor's OWN analytic
1690 // ψ-derivatives `dgram_dpsi`/`drhs_dpsi`: the envelope gradient of the
1691 // profile deviance `D(ψ) = c − rᵀA⁻¹r`, `A = G + λS`, is
1692 //
1693 // D'(ψ) = −2 βᵀ(∂r/∂ψ) + βᵀ(∂G/∂ψ)β, β = A⁻¹r,
1694 //
1695 // assembled n-free from `(dgram_dpsi, drhs_dpsi)`. We certify this
1696 // analytic gradient against a central FD of the EXACT streamed objective
1697 // (first order ⇒ only 1/h amplification, so the ~1e-9 value agreement is
1698 // not destroyed), and certify the curvature by central-differencing the
1699 // ANALYTIC gradient (again 1/h, not 1/h²). This is the same one-
1700 // representation value↔gradient↔curvature consistency the production fast
1701 // path relies on for the outer Newton step.
1702 let solve_a = |g: &Array2<f64>, r: &Array1<f64>| -> Array1<f64> {
1703 profile_deviance(g, r, exact_ztwz, &s_ridge, lambda, k).1
1704 };
1705 // Analytic n-free ψ-gradient of the penalized profile deviance, valid on
1706 // the certified gradient sub-window where `dgram_dpsi` is bit-tight.
1707 let analytic_grad = |psi: f64| -> f64 {
1708 let g = tensor.gram_at(psi);
1709 let r = tensor.rhs_at(psi);
1710 let beta = solve_a(&g, &r);
1711 let dg = tensor.dgram_dpsi(psi);
1712 let dr = tensor.drhs_dpsi(psi);
1713 -2.0 * beta.dot(&dr) + beta.dot(&dg.dot(&beta))
1714 };
1715
1716 // Two finite-difference steps, each near the optimum of its own
1717 // truncation/rounding trade-off:
1718 // * `h_grad = 1e-6` for the FIRST derivative (central FD ⇒ O(h²)
1719 // truncation, O(ε/h) rounding ⇒ optimum near 1e-5..1e-6);
1720 // * `h_curv = 2e-4` for the curvature. A SECOND difference divides by
1721 // h², so its rounding floor is O(ε·|D|/h²): at h=1e-6 that is
1722 // ~1e-16/1e-12 = 1e-4 of |D|, comparable to the curvature itself —
1723 // useless. h≈2e-4 puts the rounding floor at ~1e-16/4e-8 ≈ 2.5e-9·|D|
1724 // and the O(h²·D⁗) truncation around the same scale, so the second
1725 // difference is meaningful. The analytic-gradient curvature is
1726 // differenced at the SAME h_curv so the two carry the same
1727 // truncation order and the comparison is apples-to-apples.
1728 let h_grad = 1e-6_f64;
1729 let h_curv = 2e-4_f64;
1730 let mut worst_grad_rel = 0.0_f64;
1731 let mut worst_hess_rel = 0.0_f64;
1732 let mut tested = 0usize;
1733 for &psi in &grid {
1734 // The exact-objective curvature stencil reaches ±2·h_curv; require the
1735 // whole stencil to stay inside the certified gradient sub-window so the
1736 // analytic-gradient differences are all bit-tight.
1737 if !tensor.contains_for_gradient(psi - 2.0 * h_curv)
1738 || !tensor.contains_for_gradient(psi + 2.0 * h_curv)
1739 {
1740 continue;
1741 }
1742 tested += 1;
1743 // Analytic gradient vs central FD of the EXACT streamed objective.
1744 let exact_g1 =
1745 (exact_deviance(psi + h_grad) - exact_deviance(psi - h_grad)) / (2.0 * h_grad);
1746 let ag = analytic_grad(psi);
1747 let gscale = exact_g1.abs().max(1e-6);
1748 worst_grad_rel = worst_grad_rel.max((exact_g1 - ag).abs() / gscale);
1749 // Curvature: central FD of the ANALYTIC gradient (n-free) vs central
1750 // second difference of the EXACT objective, both at h_curv.
1751 let analytic_h2 =
1752 (analytic_grad(psi + h_curv) - analytic_grad(psi - h_curv)) / (2.0 * h_curv);
1753 let exact_h2 = (exact_deviance(psi + h_curv) - 2.0 * exact_deviance(psi)
1754 + exact_deviance(psi - h_curv))
1755 / (h_curv * h_curv);
1756 let hscale = exact_h2.abs().max(1e-3);
1757 worst_hess_rel = worst_hess_rel.max((analytic_h2 - exact_h2).abs() / hscale);
1758 }
1759 assert!(
1760 tested > 0,
1761 "no ψ on the grid lay inside the certified gradient sub-window"
1762 );
1763 assert!(
1764 worst_grad_rel <= 1e-5,
1765 "ψ-gradient mismatch: the tensor's analytic n-free objective gradient diverged \
1766 from the exact streamed objective by rel {worst_grad_rel:.3e} (> 1e-5)"
1767 );
1768 // The curvature compares an analytic-gradient central difference against
1769 // an exact-objective second difference; the residual O(h²) truncation +
1770 // O(ε/h²) rounding floor at h_curv=2e-4 sets a realistic bit-tight bar of
1771 // ~1e-3 relative (any larger gap is a genuine curvature divergence, not FD
1772 // noise — the value/gradient lanes already certify the objective itself to
1773 // ~1e-9/1e-5).
1774 assert!(
1775 worst_hess_rel <= 1e-3,
1776 "ψ-curvature (Hessian) mismatch: fast n-free objective curvature diverged \
1777 from the exact streamed objective by rel {worst_hess_rel:.3e} (> 1e-3) — \
1778 the outer Newton step would read a different curvature than the truth"
1779 );
1780
1781 eprintln!(
1782 "[psi-gram-bittight] n={n} k={k} grid={m} grad-tested={tested} \
1783 worst |ΔD|/D={worst_value_rel:.2e} worst |ΔD'|/D'={worst_grad_rel:.2e} \
1784 worst |ΔD''|/D''={worst_hess_rel:.2e} κ-opt ψ={fast_argmin:.6} (interior, bit-identical)"
1785 );
1786 }
1787
1788 /// Certification negative: a NON-analytic (kinked) design must refuse to
1789 /// certify rather than silently approximate.
1790 #[test]
1791 fn psi_gram_tensor_refuses_non_analytic_design() {
1792 let (n, k) = (40usize, 3usize);
1793 let w = Array1::from_elem(n, 1.0);
1794 let z = Array1::from_elem(n, 0.5);
1795 let tensor = PsiGramTensor::build(
1796 |psi| {
1797 let mut x = Array2::<f64>::zeros((n, k));
1798 for i in 0..n {
1799 for j in 0..k {
1800 // |ψ| kink at 0 inside the window: not analytic.
1801 x[[i, j]] = psi.abs() + (i + j) as f64 / (n + k) as f64;
1802 }
1803 }
1804 Ok(x)
1805 },
1806 w.view(),
1807 z.view(),
1808 -1.0,
1809 1.0,
1810 );
1811 assert!(
1812 tensor.is_err(),
1813 "kinked design must fail the tail-decay/spot-check certificates"
1814 );
1815 }
1816
1817 /// #1264 reduced-basis-equality witness — REFLEXIVITY + GAUGE INVARIANCE.
1818 ///
1819 /// `reduced_basis_equal(ψ, ψ)` is trivially sound (the surface is its own
1820 /// reference), and the witness must accept two ψ's whose RANGE subspace is
1821 /// identical even when the per-ψ eigenvECTORS differ (the projector is
1822 /// gauge-invariant). The synthetic full-rank Matérn-shaped design's range is
1823 /// the whole k-space for every ψ, so every in-window pair shares a reduced
1824 /// basis and must certify.
1825 #[test]
1826 fn reduced_basis_witness_reflexive_and_gauge_invariant() {
1827 let (n, k) = (160usize, 6usize);
1828 let w = Array1::from_iter((0..n).map(|i| 1.0 + 0.3 * ((i % 5) as f64)));
1829 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.29).sin()));
1830 let (psi_lo, psi_hi) = (-1.0_f64, 0.8_f64);
1831 // Use the genuinely full-rank, well-conditioned design: its weighted Gram
1832 // has numerical rank `= k` at every ψ (range = whole k-space, identity
1833 // range projector), so the gauge-invariance premise actually holds. The
1834 // narrow-`r` `synth_design` does NOT satisfy this — its Gram is rank 3–4 of
1835 // 6 with a near-null subspace that ROTATES across the window, on which the
1836 // witness *correctly* refuses (refusing a rotating reduced basis is the
1837 // sound fallback the production skip gate exists for). See
1838 // `synth_full_rank_design`.
1839 let tensor = PsiGramTensor::build(
1840 |psi| synth_full_rank_design(psi, n, k),
1841 w.view(),
1842 z.view(),
1843 psi_lo,
1844 psi_hi,
1845 )
1846 .expect("analytic full-rank synthetic design must certify");
1847
1848 // PREMISE CHECK: the design is full column rank (numerical rank = k) and
1849 // the range projector is the identity at every grid ψ, so the test really
1850 // is exercising gauge invariance over a ψ-invariant subspace — not riding a
1851 // rank-deficient fixture the witness would (correctly) refuse.
1852 let grid: Vec<f64> = (0..=12).map(|i| psi_lo + 0.05 + 0.06 * i as f64).collect();
1853 let identity = Array2::<f64>::eye(k);
1854 for &psi in &grid {
1855 let (proj, rank) = tensor
1856 .range_projector(psi, PSI_GRAM_SKIP_RANK_RTOL)
1857 .expect("full-rank Gram must yield a range projector");
1858 assert_eq!(
1859 rank, k,
1860 "full-rank design must have numerical rank k={k} at psi={psi} \
1861 (got {rank}) — otherwise the gauge-invariance premise is vacuous"
1862 );
1863 let proj_dev = (&proj - &identity)
1864 .iter()
1865 .fold(0.0_f64, |acc, &v| acc.max(v.abs()));
1866 assert!(
1867 proj_dev <= 1e-8,
1868 "range projector must be the identity at psi={psi} \
1869 (max|P−I|={proj_dev:.2e})"
1870 );
1871 }
1872
1873 // GAUGE-INVARIANCE CHECK: the per-ψ eigenvectors genuinely rotate across
1874 // the window (so the witness is exercised against a moving gauge, not a
1875 // static one), yet the spanned subspace is identical. Confirm the rotation
1876 // is real by checking the leading eigenvector turns measurably end-to-end.
1877 let leading_evec = |psi: f64| -> Array1<f64> {
1878 use gam_linalg::faer_ndarray::FaerEigh;
1879 let g = tensor.gram_at(psi);
1880 let gsym = 0.5 * (&g + &g.t());
1881 let (evals, evecs) = gsym.eigh(faer::Side::Lower).unwrap();
1882 // `eigh` returns ascending eigenvalues; the leading one is the last.
1883 let top = evals.len() - 1;
1884 evecs.column(top).to_owned()
1885 };
1886 let v_lo = leading_evec(grid[0]);
1887 let v_hi = leading_evec(*grid.last().unwrap());
1888 let cos_angle = v_lo.dot(&v_hi).abs()
1889 / (v_lo.dot(&v_lo).sqrt() * v_hi.dot(&v_hi).sqrt()).max(1e-300);
1890 assert!(
1891 cos_angle <= 0.999,
1892 "the design's eigenvectors must rotate with ψ for the gauge-invariance \
1893 test to be non-trivial (|cos∠(v_lo,v_hi)|={cos_angle:.6} — too close to 1)"
1894 );
1895
1896 // Reflexive: same ψ is always sound.
1897 for &psi in &[-0.9, -0.2, 0.0, 0.5, 0.79] {
1898 assert!(
1899 tensor.reduced_basis_equal(psi, psi),
1900 "witness must be reflexive at psi={psi}"
1901 );
1902 }
1903 // The full-rank synthetic design spans all of k-space at every ψ, so the
1904 // range projector is the identity for all ψ → every pair certifies despite
1905 // the eigenvector rotation just verified (gauge invariance).
1906 for &a in &grid {
1907 for &b in &grid {
1908 assert!(
1909 tensor.reduced_basis_equal(a, b),
1910 "full-rank design: range is ψ-invariant (identity projector), \
1911 so the skip witness must certify (ψ_ref={a}, ψ_new={b})"
1912 );
1913 }
1914 }
1915 // Off-window ψ refuses.
1916 assert!(!tensor.reduced_basis_equal(psi_lo - 0.5, 0.0));
1917 assert!(!tensor.reduced_basis_equal(0.0, psi_hi + 0.5));
1918 }
1919
1920 /// #1264 reduced-basis-equality witness — REFUSES across a genuine subspace
1921 /// change (the exact failure mode of the old RRQR-only gate).
1922 ///
1923 /// Construct a design whose first two columns are fixed (ψ-invariant) profiles
1924 /// and whose third column's AMPLITUDE `ε(ψ) = e^{αψ}` analytically sweeps the
1925 /// third eigendirection's eigenvalue `∝ ε²` across the rank-revealing cutoff.
1926 /// Below the cutoff the reduced (range) basis is the 2-D span of the first two
1927 /// profiles; above it the range is 3-D. Two ψ's on the SAME side of the
1928 /// threshold share a reduced basis (witness accepts); two ψ's STRADDLING it do
1929 /// not (witness refuses) — exactly the stale-basis pairing the design-revision
1930 /// fast path must not perform. The amplitude is smooth/analytic so the tensor
1931 /// still certifies (this is a reduced-basis change, not a non-analytic kink).
1932 #[test]
1933 fn reduced_basis_witness_refuses_across_subspace_change() {
1934 let (n, k) = (200usize, 3usize);
1935 // Three fixed, well-separated column profiles (full column rank when all
1936 // present). The third is scaled by ε(ψ).
1937 let base = |i: usize, j: usize| -> f64 {
1938 let t = (i as f64 + 0.5) / n as f64;
1939 match j {
1940 0 => 1.0,
1941 1 => (2.0 * std::f64::consts::PI * t).sin(),
1942 _ => (4.0 * std::f64::consts::PI * t).cos(),
1943 }
1944 };
1945 // ε(ψ) crosses √cutoff (relative to λ_max ~ O(n)) within the window: at
1946 // λ_max ≈ n the cutoff is rank_rtol·n ≈ 1e-10·200 = 2e-8, so the third
1947 // eigenvalue ε²·‖c3‖² ≈ ε²·(n/2) crosses it at ε ≈ sqrt(4e-8/n) ≈ 1.4e-5,
1948 // i.e. ψ* ≈ ln(1.4e-5)/α. With α = 10 and window [−1.6,−0.8], ψ* ≈ −1.12
1949 // sits inside the window, giving a clean below/above split.
1950 let alpha = 10.0_f64;
1951 let design = move |psi: f64| -> Result<Array2<f64>, String> {
1952 let eps = (alpha * psi).exp();
1953 let mut x = Array2::<f64>::zeros((n, k));
1954 for i in 0..n {
1955 x[[i, 0]] = base(i, 0);
1956 x[[i, 1]] = base(i, 1);
1957 x[[i, 2]] = eps * base(i, 2);
1958 }
1959 Ok(x)
1960 };
1961 let w = Array1::from_elem(n, 1.0);
1962 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.13).sin()));
1963 let (psi_lo, psi_hi) = (-1.6_f64, -0.8_f64);
1964 let tensor = PsiGramTensor::build(design, w.view(), z.view(), psi_lo, psi_hi)
1965 .expect("smooth ε(ψ) design must still certify (analytic, no kink)");
1966
1967 // Find the actual threshold by scanning the rank.
1968 let rank_at = |psi: f64| -> usize {
1969 tensor
1970 .range_projector(psi, PSI_GRAM_SKIP_RANK_RTOL)
1971 .map(|(_, r)| r)
1972 .unwrap_or(0)
1973 };
1974 let lo_rank = rank_at(psi_lo + 0.02);
1975 let hi_rank = rank_at(psi_hi - 0.02);
1976 assert_eq!(
1977 lo_rank, 2,
1978 "low-ψ end must be rank-2 (third column below cutoff)"
1979 );
1980 assert_eq!(
1981 hi_rank, 3,
1982 "high-ψ end must be rank-3 (third column above cutoff)"
1983 );
1984
1985 // Same-side pairs (both rank-2) certify; straddling pairs refuse.
1986 let psi_low_a = psi_lo + 0.05;
1987 let psi_low_b = psi_lo + 0.10;
1988 assert_eq!(rank_at(psi_low_a), 2);
1989 assert_eq!(rank_at(psi_low_b), 2);
1990 assert!(
1991 tensor.reduced_basis_equal(psi_low_a, psi_low_b),
1992 "two low-ψ trials share the rank-2 reduced basis → skip is sound"
1993 );
1994 let psi_high_a = psi_hi - 0.05;
1995 let psi_high_b = psi_hi - 0.10;
1996 assert_eq!(rank_at(psi_high_a), 3);
1997 assert_eq!(rank_at(psi_high_b), 3);
1998 // High-side: the range is the full 3-D space at both, so the projector is
1999 // the identity at both → still a shared reduced basis.
2000 assert!(
2001 tensor.reduced_basis_equal(psi_high_a, psi_high_b),
2002 "two high-ψ trials share the rank-3 reduced basis → skip is sound"
2003 );
2004 // Straddling the rank change: the reduced basis MOVED (2-D → 3-D). The
2005 // witness MUST refuse — this is precisely the stale-basis pairing the old
2006 // RRQR-only gate let through.
2007 assert!(
2008 !tensor.reduced_basis_equal(psi_low_a, psi_high_a),
2009 "witness must REFUSE a skip that straddles the reduced-basis (rank) \
2010 change — freezing the low-ψ rank-2 basis and re-keying the high-ψ \
2011 rank-3 Gram is the exact ~7.8e-2 β̂ regression #1264 guards"
2012 );
2013 assert!(
2014 !tensor.reduced_basis_equal(psi_high_a, psi_low_a),
2015 "witness must refuse symmetrically (high pin, low trial)"
2016 );
2017 }
2018
2019 /// #1033 ROTATION WALL — the subspace-distance certificate must CERTIFY a
2020 /// skip across a pure basis ROTATION at fixed rank, where the old entrywise
2021 /// max-abs projector gate would have refused.
2022 ///
2023 /// Build a rank-2 (in a k=3 space) design whose 2-D range ROTATES smoothly
2024 /// with ψ but whose RANK stays 2: two ψ-dependent in-plane directions span the
2025 /// same fixed 2-plane (cols 0,1 of a fixed orthonormal pair) rotated by an
2026 /// analytic angle φ(ψ). The SUBSPACE (the 2-plane) is ψ-invariant — only the
2027 /// basis within it rotates — so the range projector is mathematically
2028 /// IDENTICAL at every ψ, but its eigenVECTORS rotate. A correct
2029 /// subspace-identity witness must certify every in-window pair; the spectral
2030 /// (principal-angle) distance is ~0 throughout while a naive entrywise
2031 /// comparison of rotated eigenbases would not be guaranteed to.
2032 #[test]
2033 fn reduced_basis_witness_certifies_across_pure_rotation_1033() {
2034 let (n, k) = (240usize, 3usize);
2035 // Two fixed orthogonal ambient profiles spanning a fixed 2-plane; the
2036 // third ambient direction is left empty so the range is exactly that
2037 // 2-plane (rank 2) for every ψ.
2038 let p0 = |i: usize| -> f64 {
2039 let t = (i as f64 + 0.5) / n as f64;
2040 (2.0 * std::f64::consts::PI * t).sin()
2041 };
2042 let p1 = |i: usize| -> f64 {
2043 let t = (i as f64 + 0.5) / n as f64;
2044 (2.0 * std::f64::consts::PI * t).cos()
2045 };
2046 // Within the fixed 2-plane, rotate the two design columns by φ(ψ): the
2047 // SPAN is unchanged (still the {p0,p1} plane) but the basis rotates, so
2048 // the per-ψ eigenvectors of the Gram rotate while the range projector is
2049 // ψ-invariant.
2050 let design = move |psi: f64| -> Result<Array2<f64>, String> {
2051 let phi = 0.7 * psi; // analytic angle sweep
2052 let (c, s) = (phi.cos(), phi.sin());
2053 let mut x = Array2::<f64>::zeros((n, k));
2054 for i in 0..n {
2055 let (a, b) = (p0(i), p1(i));
2056 x[[i, 0]] = c * a - s * b;
2057 x[[i, 1]] = s * a + c * b;
2058 // column 2 stays zero → range is the fixed 2-plane, rank 2.
2059 }
2060 Ok(x)
2061 };
2062 let w = Array1::from_elem(n, 1.0);
2063 let z = Array1::from_iter((0..n).map(|i| ((i as f64) * 0.17).cos()));
2064 let (psi_lo, psi_hi) = (-1.0_f64, 1.0_f64);
2065 let tensor = PsiGramTensor::build(design, w.view(), z.view(), psi_lo, psi_hi)
2066 .expect("smooth rotation design must certify (analytic, no kink)");
2067
2068 // Rank is a constant 2 across the window (the third direction is empty).
2069 let rank_at = |psi: f64| -> usize {
2070 tensor
2071 .range_projector(psi, PSI_GRAM_SKIP_RANK_RTOL)
2072 .map(|(_, r)| r)
2073 .unwrap_or(0)
2074 };
2075 for &psi in &[-0.95, -0.4, 0.0, 0.4, 0.95] {
2076 assert_eq!(rank_at(psi), 2, "rotation keeps rank 2 at psi={psi}");
2077 }
2078
2079 // Every in-window pair spans the SAME 2-plane (only the basis rotates),
2080 // so the subspace-distance witness MUST certify the skip — this is the
2081 // rotation that the entrywise gate kept refusing (the #1033 wall).
2082 let grid: Vec<f64> = (0..=10).map(|i| psi_lo + 0.05 + 0.09 * i as f64).collect();
2083 for &a in &grid {
2084 for &b in &grid {
2085 assert!(
2086 tensor.reduced_basis_equal(a, b),
2087 "pure in-plane rotation preserves the range subspace → the \
2088 subspace-distance skip witness must certify (#1033) \
2089 (ψ_ref={a}, ψ_new={b})"
2090 );
2091 }
2092 }
2093 }
2094}