gam_solve/sensitivity.rs
1//! ONE sensitivity operator (#935): every "how does the fit move?"
2//! question is the same solve.
3//!
4//! At a penalized optimum the stationarity condition `g(β̂; t) = 0` makes
5//! every sensitivity of the fit one object — the factored fitted curvature
6//! applied to a perturbation of the score:
7//!
8//! ```text
9//! ∂β̂/∂t = −H⁻¹ · ∂g/∂t
10//! ```
11//!
12//! for ANY perturbation channel `t`: smoothing parameters (the REML outer
13//! gradient), case weights (ALO / leave-one-out / Cook's distance),
14//! responses (data attribution). One identity, read off in whichever
15//! direction a diagnostic needs it.
16//!
17//! Before this, the tree computed `H⁻¹·` in independent dialects with
18//! independent factorizations — `AloFactoredHessian` (runtime.rs), an
19//! `ift_dbeta_drho_from_solver` solve-closure and a separate coned variant
20//! (evidence.rs), and the projected pseudo-inverse of the rank-deficient
21//! LAML kernel (unified.rs) — so each site had to answer on its own the
22//! question that actually causes bugs: **which inverse is "H⁻¹"?** The
23//! large-scale fix 0dc469bd and the #901 layer-2 investigation are both
24//! incidents of two sites answering differently.
25//!
26//! [`FitSensitivity`] is the single answer. It is built once at the optimum
27//! from whichever factored form the solver already has — a faer Cholesky
28//! factor, a raw lower-triangular (arrow-Schur) factor, or the projected
29//! pseudo-inverse `U · M⁻¹ · Uᵀ` (the #752/#901 intrinsic-quotient
30//! convention) — and every consumer asks it, never a factor directly.
31//! Consumers therefore cannot disagree about the inverse, and every
32//! batching/cone improvement made inside [`FitSensitivity::apply_multi`] is
33//! inherited by all of them at once.
34//!
35//! The channels, each a one-line restatement of the identity above:
36//!
37//! - [`mode_response`](FitSensitivity::mode_response) — `−H⁻¹ ∂g/∂t`, the
38//! REML outer gradient's `∂β̂/∂ρ` (evidence `ift_dbeta_drho`).
39//! - [`mode_response_coned`](FitSensitivity::mode_response_coned) — the same
40//! response confined to its cone of influence (#779); the lazy/local form
41//! the smoothing-correction IFT uses.
42//! - [`leverage_block`](FitSensitivity::leverage_block) — `H⁻¹Xᵀ`, whose
43//! column `i` is at once ALO's per-row solve and the case/response channel.
44//! - [`case_deletion`](FitSensitivity::case_deletion) — dfbetas + Cook's
45//! distance, the leave-one-out channel, one scaled column of `H⁻¹Xᵀ` each.
46//!
47//! What is deliberately NOT folded in: the matrix-free `hop.solve_multi`
48//! (PCG/GPU), the constrained kernel `K_T = K_S − K_S Aᵀ(A K_S Aᵀ)⁻¹A K_S`,
49//! and `alo.rs`'s zero-copy `StableSolver` loop. Those are distinct inverse
50//! *representations*, not duplicate spellings of the same factored inverse —
51//! routing them through here would regress performance and couple unrelated
52//! concerns rather than remove the bug class.
53
54use ndarray::{Array1, Array2, ArrayView1, ArrayView2};
55
56use gam_linalg::faer_ndarray::FaerCholeskyFactor;
57
58/// The fitted curvature in whichever factored form the solver produced —
59/// the SINGLE place that knows how to invert it.
60pub enum FittedInverse<'a> {
61 /// Cholesky factor of the (stabilized) penalized Hessian: the
62 /// full-rank convention (PIRLS / ALO path).
63 FaerCholesky(&'a FaerCholeskyFactor),
64 /// Raw lower-triangular Cholesky factor `L` with `H = L·Lᵀ` (the
65 /// arrow-Schur reduced factor in evidence.rs).
66 LowerTriangular(&'a Array2<f64>),
67 /// Projected (pseudo-)inverse `U · M⁻¹ · Uᵀ` over a column basis `U`
68 /// (p × r) with reduced inverse `M⁻¹` (r × r) — the rank-deficient
69 /// LAML convention (#752/0dc469bd/#901): the inverse acts on
70 /// range(U) and annihilates its complement.
71 Projected {
72 basis: &'a Array2<f64>,
73 reduced_inverse: &'a Array2<f64>,
74 },
75}
76
77/// The one sensitivity operator built at the optimum. See module docs.
78pub struct FitSensitivity<'a> {
79 inverse: FittedInverse<'a>,
80 dim: usize,
81}
82
83impl<'a> FitSensitivity<'a> {
84 pub fn from_faer_cholesky(factor: &'a FaerCholeskyFactor, dim: usize) -> Self {
85 Self {
86 inverse: FittedInverse::FaerCholesky(factor),
87 dim,
88 }
89 }
90
91 pub fn from_lower_triangular(factor: &'a Array2<f64>) -> Self {
92 let dim = factor.nrows();
93 Self {
94 inverse: FittedInverse::LowerTriangular(factor),
95 dim,
96 }
97 }
98
99 pub fn from_projected(basis: &'a Array2<f64>, reduced_inverse: &'a Array2<f64>) -> Self {
100 let dim = basis.nrows();
101 Self {
102 inverse: FittedInverse::Projected {
103 basis,
104 reduced_inverse,
105 },
106 dim,
107 }
108 }
109
110 /// Coefficient dimension `p` the operator acts on.
111 pub fn dim(&self) -> usize {
112 self.dim
113 }
114
115 /// `H⁻¹ · rhs` (pseudo-inverse action for the projected variant).
116 pub fn apply(&self, rhs: &Array1<f64>) -> Array1<f64> {
117 assert_eq!(rhs.len(), self.dim, "FitSensitivity rhs dimension");
118 match &self.inverse {
119 FittedInverse::FaerCholesky(factor) => factor.solvevec(rhs),
120 FittedInverse::LowerTriangular(factor) => {
121 gam_linalg::triangular::cholesky_solve_vector(factor.view(), rhs.view())
122 }
123 FittedInverse::Projected {
124 basis,
125 reduced_inverse,
126 } => {
127 // `U · (M⁻¹ · (Uᵀ · a))` via faer SIMD contractions — the
128 // single spelling of the projected (rank-deficient LAML)
129 // inverse, shared with `PenaltySubspaceTrace`.
130 let proj = gam_linalg::faer_ndarray::fast_atv(basis, rhs);
131 let reduced = reduced_inverse.dot(&proj);
132 gam_linalg::faer_ndarray::fast_av(basis, &reduced)
133 }
134 }
135 }
136
137 /// `H⁻¹ · RHS` for a (p × m) block of right-hand sides — the batched
138 /// form every multi-channel consumer should use (outer ρ-pair solves,
139 /// ALO's `H⁻¹Xᵀ` leverage block) so the factor is traversed once per
140 /// block instead of once per column.
141 pub fn apply_multi(&self, rhs: ArrayView2<'_, f64>) -> Array2<f64> {
142 assert_eq!(rhs.nrows(), self.dim, "FitSensitivity RHS dimension");
143 match &self.inverse {
144 FittedInverse::FaerCholesky(factor) => {
145 let mut out = rhs.to_owned();
146 factor.solve_mat_in_place(&mut out);
147 out
148 }
149 FittedInverse::LowerTriangular(factor) => {
150 gam_linalg::triangular::cholesky_solve_matrix(*factor, rhs)
151 }
152 FittedInverse::Projected {
153 basis,
154 reduced_inverse,
155 } => {
156 let reduced = gam_linalg::faer_ndarray::fast_atb(basis, &rhs.to_owned());
157 gam_linalg::faer_ndarray::fast_ab(basis, &reduced_inverse.dot(&reduced))
158 }
159 }
160 }
161
162 /// The IFT mode response `∂β̂/∂t = −H⁻¹ · ∂g/∂t` for a (p × m) block
163 /// of score perturbations — THE object of #935.
164 ///
165 /// Returns `None` if any solved entry is non-finite (the factored
166 /// curvature was unusable for this channel); callers must not
167 /// substitute an approximation, matching the contract of the deleted
168 /// `ift_dbeta_drho_from_solver`.
169 pub fn mode_response(&self, dg_dt: ArrayView2<'_, f64>) -> Option<Array2<f64>> {
170 if dg_dt.nrows() != self.dim {
171 return None;
172 }
173 let mut out = self.apply_multi(dg_dt);
174 if out.iter().any(|value| !value.is_finite()) {
175 return None;
176 }
177 out.mapv_inplace(|value| -value);
178 Some(out)
179 }
180
181 /// Cone-of-influence mode response (#779), the lazy/local form of
182 /// [`Self::mode_response`]. Each perturbation column `∂g/∂t_a` is
183 /// structurally supported only within `col_supports[a]`, so its response
184 /// `−H⁻¹ ∂g/∂t_a` is exactly zero outside the coupling component of
185 /// `hessian` containing that support. Columns whose support is empty (a
186 /// structurally inactive channel) are skipped with no solve; the active
187 /// columns are solved as ONE batched block through [`Self::apply_multi`]
188 /// — strictly better than the per-column BLAS-2 loop this replaces — and
189 /// each result confined to its cone. On a fully coupled `hessian` every
190 /// cone is the whole space and the result equals [`Self::mode_response`]
191 /// bit-for-bit.
192 ///
193 /// `hessian` must be the same curvature this operator inverts; a
194 /// dimension mismatch (or any non-finite solved entry) returns `None`
195 /// rather than silently substituting an approximation.
196 pub fn mode_response_coned(
197 &self,
198 hessian: ArrayView2<'_, f64>,
199 dg_dt: ArrayView2<'_, f64>,
200 col_supports: &[std::ops::Range<usize>],
201 ) -> Option<Array2<f64>> {
202 let p = self.dim;
203 let r = dg_dt.ncols();
204 if dg_dt.nrows() != p
205 || hessian.nrows() != p
206 || hessian.ncols() != p
207 || col_supports.len() != r
208 {
209 return None;
210 }
211 let labels = crate::evidence::coupling_components(hessian);
212 if labels.len() != p {
213 return None;
214 }
215
216 // Active columns + their cones; structurally inactive columns (empty
217 // support → empty cone) contribute an identically-zero sensitivity
218 // and are skipped entirely (no solve).
219 let mut active: Vec<(usize, Vec<usize>)> = Vec::new();
220 for a in 0..r {
221 let sr = &col_supports[a];
222 let support: Vec<usize> = (sr.start..sr.end)
223 .filter(|idx| *idx < p)
224 .filter(|idx| dg_dt[[*idx, a]] != 0.0)
225 .collect();
226 let cone = crate::evidence::cone_of_influence(&labels, &support);
227 if !cone.is_empty() {
228 active.push((a, cone));
229 }
230 }
231
232 let mut out = Array2::<f64>::zeros((p, r));
233 if active.is_empty() {
234 return Some(out);
235 }
236 // One batched solve over only the active columns.
237 let mut rhs = Array2::<f64>::zeros((p, active.len()));
238 for (j, (a, _)) in active.iter().enumerate() {
239 rhs.column_mut(j).assign(&dg_dt.column(*a));
240 }
241 let solved = self.apply_multi(rhs.view());
242 if solved.iter().any(|value| !value.is_finite()) {
243 return None;
244 }
245 for (j, (a, cone)) in active.iter().enumerate() {
246 for &row in cone {
247 out[[row, *a]] = -solved[[row, j]];
248 }
249 }
250 Some(out)
251 }
252
253 /// `H⁻¹Xᵀ` (p × n) — the shared leverage/case-sensitivity block: its
254 /// column i is simultaneously ALO's per-observation solve, the case-
255 /// weight channel `∂g/∂w_i ∝ x_i`, and the response channel
256 /// `∂g/∂y_i ∝ x_i`. One blocked solve serves all three diagnostics.
257 pub fn leverage_block(&self, design: &Array2<f64>) -> Array2<f64> {
258 assert_eq!(design.ncols(), self.dim, "FitSensitivity design width");
259 self.apply_multi(design.t())
260 }
261
262 /// Data attribution `∂β̂/∂y` (p × n) — how each fitted coefficient
263 /// responds to each response value, the `t = y` channel of the one
264 /// identity `∂β̂/∂t = −H⁻¹ ∂g/∂t`.
265 ///
266 /// The response enters the penalized score only through the working
267 /// residual, so `∂g/∂y_i = −w_i x_i` and therefore
268 ///
269 /// ```text
270 /// ∂β̂/∂y_i = w_i · H⁻¹ x_i,
271 /// ```
272 /// i.e. column `i` of [`Self::leverage_block`] scaled by the working
273 /// weight `w_i`. Contracting back through the design recovers the
274 /// smoother/hat matrix `A = X (∂β̂/∂y) = X H⁻¹ Xᵀ W`, whose diagonal is
275 /// the leverage already reported elsewhere. For a Gaussian penalized fit
276 /// `β̂ = H⁻¹ Xᵀ y`, so this Jacobian is exact (and weight-free); for a GLM
277 /// it is the one-step attribution at the fitted working weights.
278 ///
279 /// Returns `None` on a shape mismatch.
280 pub fn response_jacobian(
281 &self,
282 design: &Array2<f64>,
283 working_weights: ArrayView1<'_, f64>,
284 ) -> Option<Array2<f64>> {
285 let n = design.nrows();
286 if design.ncols() != self.dim || working_weights.len() != n {
287 return None;
288 }
289 // Column i is H⁻¹ x_i; scale it by w_i to get ∂β̂/∂y_i.
290 let mut dbeta_dy = self.leverage_block(design);
291 for i in 0..n {
292 let w_i = working_weights[i];
293 dbeta_dy.column_mut(i).mapv_inplace(|v| w_i * v);
294 }
295 Some(dbeta_dy)
296 }
297
298 /// Case-deletion influence (dfbetas + Cook's distance) for every
299 /// observation, built from the one sensitivity operator — the
300 /// "leave-one-out" channel #935 was designed to unify.
301 ///
302 /// Deleting observation `i` perturbs the penalized score by exactly its
303 /// own contribution, so the IFT mode response gives the coefficient
304 /// change in closed form (the penalized Sherman–Morrison identity):
305 ///
306 /// ```text
307 /// β̂ − β̂₍ᵢ₎ = (w_i r_i / (1 − h_ii)) · H⁻¹ x_i
308 /// ```
309 ///
310 /// where `x_i` is row `i` of `design`, `w_i = working_weights[i]` the
311 /// IRLS working weight, `r_i = working_residual[i]` the working residual
312 /// `z_i − x_iᵀβ̂`, and `h_ii = w_i x_iᵀ H⁻¹ x_i` the leverage. Column `i`
313 /// of [`Self::leverage_block`] **is** `H⁻¹ x_i`, so each dfbeta is one
314 /// scaled column — no per-observation refit, no second factorization.
315 /// For a Gaussian penalized fit the identity is exact; for a GLM it is
316 /// the standard one-step (ALO) approximation, consistent with the
317 /// leverage already reported by `AloDiagnostics`.
318 ///
319 /// Cook's distance uses the metric the fit actually moves in,
320 /// `H = XᵀWX + S`:
321 ///
322 /// ```text
323 /// D_i = (β̂−β̂₍ᵢ₎)ᵀ H (β̂−β̂₍ᵢ₎) / (p · φ)
324 /// = scale_i² · (x_iᵀ H⁻¹ x_i) / (p · φ), scale_i = w_i r_i / (1 − h_ii),
325 /// ```
326 ///
327 /// the second form following from `(H⁻¹x_i)ᵀ H (H⁻¹x_i) = x_iᵀ H⁻¹ x_i`,
328 /// so the single quadratic form `x_iᵀ H⁻¹ x_i` gates the leverage, the
329 /// deletion denominator, and Cook's distance alike — no separate `H` apply.
330 ///
331 /// This is an *opt-in* diagnostic: `dfbeta` is `n × p` and is never
332 /// materialized on the default fit path (it would be ruinous at large-scale
333 /// scale). Returns `None` on a shape mismatch or if any leverage reaches
334 /// `1` (a point the deletion identity cannot resolve).
335 pub fn case_deletion(
336 &self,
337 design: &Array2<f64>,
338 working_weights: ArrayView1<'_, f64>,
339 working_residual: ArrayView1<'_, f64>,
340 phi: f64,
341 ) -> Option<CaseDeletionInfluence> {
342 let n = design.nrows();
343 let p = design.ncols();
344 if p != self.dim
345 || working_weights.len() != n
346 || working_residual.len() != n
347 || !(phi.is_finite() && phi > 0.0)
348 || p == 0
349 {
350 return None;
351 }
352 // Column i of H⁻¹Xᵀ is H⁻¹ x_i — one blocked solve for all n.
353 let h_inv_xt = self.leverage_block(design);
354
355 let mut dfbeta = Array2::<f64>::zeros((n, p));
356 let mut leverage = Array1::<f64>::zeros(n);
357 let mut cooks = Array1::<f64>::zeros(n);
358 let p_phi = p as f64 * phi;
359 for i in 0..n {
360 // hinv_xi = H⁻¹x_i is column i of the leverage block; the single
361 // quadratic form x_iᵀ H⁻¹ x_i gates everything below.
362 let hinv_xi = h_inv_xt.column(i);
363 let xhx = design.row(i).dot(&hinv_xi);
364 let h_ii = working_weights[i] * xhx;
365 let denom = 1.0 - h_ii;
366 // Leverage 1 pins the row to its own fit: the closed-form
367 // deletion is singular there, so we refuse rather than emit ∞.
368 if !denom.is_finite() || denom.abs() < f64::EPSILON {
369 return None;
370 }
371 // β̂ − β̂₍ᵢ₎ = scale · H⁻¹x_i — one scaled column, no refit.
372 let scale = working_weights[i] * working_residual[i] / denom;
373 dfbeta.row_mut(i).assign(&(&hinv_xi * scale));
374 leverage[i] = h_ii;
375 cooks[i] = scale * scale * xhx / p_phi;
376 }
377 Some(CaseDeletionInfluence {
378 dfbeta,
379 leverage,
380 cooks_distance: cooks,
381 })
382 }
383}
384
385/// Exact (Gaussian) / one-step (GLM) case-deletion influence produced by
386/// [`FitSensitivity::case_deletion`]. See that method for the identities.
387pub struct CaseDeletionInfluence {
388 /// `dfbeta[[i, j]]` = change in coefficient `j` when observation `i` is
389 /// left out, `β̂_j − β̂₍ᵢ₎_j`.
390 pub dfbeta: Array2<f64>,
391 /// Leverage (hat value) `h_ii = w_i x_iᵀ H⁻¹ x_i` per observation.
392 pub leverage: Array1<f64>,
393 /// Cook's distance per observation.
394 pub cooks_distance: Array1<f64>,
395}
396
397#[cfg(test)]
398mod tests {
399 use super::*;
400 use gam_linalg::faer_ndarray::FaerCholesky;
401 use faer::Side;
402 use ndarray::array;
403
404 /// Textbook 3×3 lower-Cholesky, written out so the LowerTriangular
405 /// variant is tested against an independently constructed factor.
406 fn lower_cholesky_3x3(h: &Array2<f64>) -> Array2<f64> {
407 let mut l = Array2::<f64>::zeros((3, 3));
408 for i in 0..3 {
409 for j in 0..=i {
410 let mut acc = h[[i, j]];
411 for k in 0..j {
412 acc -= l[[i, k]] * l[[j, k]];
413 }
414 if i == j {
415 l[[i, j]] = acc.sqrt();
416 } else {
417 l[[i, j]] = acc / l[[j, j]];
418 }
419 }
420 }
421 l
422 }
423
424 /// All three factored forms of the SAME matrix must agree on every
425 /// entry point — this is the "no site can pick a different inverse"
426 /// guarantee, checked directly.
427 #[test]
428 fn all_variants_agree_on_the_same_curvature() {
429 let h = array![[4.0, 1.0, 0.5], [1.0, 3.0, 0.2], [0.5, 0.2, 2.0]];
430 let faer = h.cholesky(Side::Lower).expect("SPD factor");
431 let lower = lower_cholesky_3x3(&h);
432 // Projected variant with full basis = the plain inverse.
433 let eye = Array2::eye(3);
434 let h_inv = {
435 let mut out: Array2<f64> = Array2::eye(3);
436 faer.solve_mat_in_place(&mut out);
437 out
438 };
439 let s_faer = FitSensitivity::from_faer_cholesky(&faer, 3);
440 let s_tri = FitSensitivity::from_lower_triangular(&lower);
441 let s_proj = FitSensitivity::from_projected(&eye, &h_inv);
442
443 let rhs = array![0.7, -1.2, 0.4];
444 let block = array![[0.7, 1.0], [-1.2, 0.0], [0.4, -2.0]];
445 let a = s_faer.apply(&rhs);
446 let b = s_tri.apply(&rhs);
447 let c = s_proj.apply(&rhs);
448 for i in 0..3 {
449 assert!((a[i] - b[i]).abs() <= 1e-12, "faer vs triangular [{i}]");
450 assert!((a[i] - c[i]).abs() <= 1e-12, "faer vs projected [{i}]");
451 }
452 let ma = s_faer.apply_multi(block.view());
453 let mb = s_tri.apply_multi(block.view());
454 let mc = s_proj.apply_multi(block.view());
455 let resp = s_faer.mode_response(block.view()).expect("finite");
456 for i in 0..3 {
457 for j in 0..2 {
458 assert!((ma[[i, j]] - mb[[i, j]]).abs() <= 1e-12);
459 assert!((ma[[i, j]] - mc[[i, j]]).abs() <= 1e-12);
460 assert!(
461 (resp[[i, j]] + ma[[i, j]]).abs() <= 1e-15,
462 "mode response sign"
463 );
464 }
465 }
466 // H · apply(rhs) must reproduce rhs (it is a true inverse here).
467 let back = h.dot(&a);
468 for i in 0..3 {
469 assert!((back[i] - rhs[i]).abs() <= 1e-12, "inverse residual [{i}]");
470 }
471 }
472
473 /// Case-deletion dfbetas from the operator must equal the EXACT
474 /// leave-one-out refit of a ridge-penalized Gaussian fit — brute-force
475 /// dropping each row and re-solving — to machine precision. This is the
476 /// penalized Sherman–Morrison identity, analytic ground truth, no
477 /// external tool. Cook's distance is checked against its own definition
478 /// `(β−β₍ᵢ₎)ᵀ H (β−β₍ᵢ₎)/(p·φ)` using the exact refit.
479 #[test]
480 fn case_deletion_matches_exact_loo_refit() {
481 // Small over-determined ridge problem: H = XᵀX + S, β = H⁻¹ Xᵀ y,
482 // working weights w_i = 1 and working residual r_i = y_i − x_iᵀβ
483 // (the Gaussian identity-link IRLS reduction).
484 let x = array![
485 [1.0, 0.2, -0.5],
486 [1.0, -1.1, 0.3],
487 [1.0, 0.7, 1.4],
488 [1.0, 2.0, -0.8],
489 [1.0, -0.4, 0.9],
490 [1.0, 1.3, 0.1],
491 ];
492 let y = array![0.5, -1.2, 2.1, 0.3, -0.7, 1.0];
493 let n = x.nrows();
494 let p = x.ncols();
495 // Penalty S (a mild ridge on the two slopes, intercept unpenalized).
496 let mut s = Array2::<f64>::zeros((p, p));
497 s[[1, 1]] = 0.4;
498 s[[2, 2]] = 0.4;
499
500 let xtx = x.t().dot(&x);
501 let h = &xtx + &s;
502 let h_inv = {
503 let f = h.cholesky(Side::Lower).expect("SPD");
504 let mut out: Array2<f64> = Array2::eye(p);
505 f.solve_mat_in_place(&mut out);
506 out
507 };
508 let xty = x.t().dot(&y);
509 let beta = h_inv.dot(&xty);
510
511 let w = Array1::<f64>::ones(n);
512 let resid = &y - &x.dot(&beta);
513
514 let faer = h.cholesky(Side::Lower).expect("SPD");
515 let op = FitSensitivity::from_faer_cholesky(&faer, p);
516 let infl = op
517 .case_deletion(&x, w.view(), resid.view(), 1.0)
518 .expect("case deletion");
519
520 for i in 0..n {
521 // Exact refit with row i removed: H₍ᵢ₎ = H − x_i x_iᵀ (penalty S
522 // is a prior, unchanged by data deletion), rhs = Xᵀy − x_i y_i.
523 let x_i = x.row(i).to_owned();
524 let mut h_del = h.clone();
525 for a in 0..p {
526 for b in 0..p {
527 h_del[[a, b]] -= x_i[a] * x_i[b];
528 }
529 }
530 let rhs_del = &xty - &(&x_i * y[i]);
531 let h_del_inv = {
532 let f = h_del.cholesky(Side::Lower).expect("SPD deleted");
533 let mut out: Array2<f64> = Array2::eye(p);
534 f.solve_mat_in_place(&mut out);
535 out
536 };
537 let beta_del = h_del_inv.dot(&rhs_del);
538 let exact_dfbeta = &beta - &beta_del;
539
540 for j in 0..p {
541 assert!(
542 (infl.dfbeta[[i, j]] - exact_dfbeta[j]).abs() < 1e-9,
543 "dfbeta[{i},{j}]: operator {} vs exact refit {}",
544 infl.dfbeta[[i, j]],
545 exact_dfbeta[j]
546 );
547 }
548 // Cook's distance against its definition with the exact refit.
549 let cook_exact = exact_dfbeta.dot(&h.dot(&exact_dfbeta)) / (p as f64 * 1.0);
550 assert!(
551 (infl.cooks_distance[i] - cook_exact).abs() < 1e-9,
552 "cooks[{i}]: {} vs {}",
553 infl.cooks_distance[i],
554 cook_exact
555 );
556 // Leverage must match the hat value x_iᵀ H⁻¹ x_i (w_i = 1).
557 let h_ii = x_i.dot(&h_inv.dot(&x_i));
558 assert!((infl.leverage[i] - h_ii).abs() < 1e-12, "leverage[{i}]");
559 }
560 }
561
562 /// Data attribution `∂β̂/∂y` must equal the actual change in the fitted
563 /// coefficients when a response value is perturbed and the ridge fit is
564 /// re-solved — exact for the Gaussian penalized model (`β̂ = H⁻¹Xᵀy`).
565 #[test]
566 fn response_jacobian_matches_refit_perturbation() {
567 let x = array![
568 [1.0, 0.2, -0.5],
569 [1.0, -1.1, 0.3],
570 [1.0, 0.7, 1.4],
571 [1.0, 2.0, -0.8],
572 [1.0, -0.4, 0.9],
573 ];
574 let y = array![0.5, -1.2, 2.1, 0.3, -0.7];
575 let n = x.nrows();
576 let p = x.ncols();
577 let mut s = Array2::<f64>::zeros((p, p));
578 s[[1, 1]] = 0.4;
579 s[[2, 2]] = 0.4;
580 let h = &x.t().dot(&x) + &s;
581 let faer = h.cholesky(Side::Lower).expect("SPD");
582
583 let solve = |rhs: &Array1<f64>| faer.solvevec(rhs);
584 let beta = solve(&x.t().dot(&y));
585
586 let op = FitSensitivity::from_faer_cholesky(&faer, p);
587 let w = Array1::<f64>::ones(n);
588 let dbeta_dy = op.response_jacobian(&x, w.view()).expect("jacobian");
589
590 // Exact (linear) refit: bump y_j, re-solve, compare (β'−β)/ε to col j.
591 let eps = 1e-6;
592 for j in 0..n {
593 let mut yp = y.clone();
594 yp[j] += eps;
595 let beta_p = solve(&x.t().dot(&yp));
596 for c in 0..p {
597 let fd = (beta_p[c] - beta[c]) / eps;
598 assert!(
599 (dbeta_dy[[c, j]] - fd).abs() < 1e-7,
600 "∂β̂_{c}/∂y_{j}: analytic {} vs refit {}",
601 dbeta_dy[[c, j]],
602 fd
603 );
604 }
605 }
606 }
607
608 #[test]
609 fn mode_response_refuses_non_finite_channels() {
610 let h = array![[2.0, 0.0], [0.0, 1.0]];
611 let faer = h.cholesky(Side::Lower).expect("SPD factor");
612 let s = FitSensitivity::from_faer_cholesky(&faer, 2);
613 let bad = array![[1.0], [f64::NAN]];
614 assert!(s.mode_response(bad.view()).is_none());
615 let wrong_dim = array![[1.0], [0.0], [0.0]];
616 assert!(s.mode_response(wrong_dim.view()).is_none());
617 }
618}