gam_solve/
sensitivity.rs

1//! ONE sensitivity operator (#935): every "how does the fit move?"
2//! question is the same solve.
3//!
4//! At a penalized optimum the stationarity condition `g(β̂; t) = 0` makes
5//! every sensitivity of the fit one object — the factored fitted curvature
6//! applied to a perturbation of the score:
7//!
8//! ```text
9//!   ∂β̂/∂t = −H⁻¹ · ∂g/∂t
10//! ```
11//!
12//! for ANY perturbation channel `t`: smoothing parameters (the REML outer
13//! gradient), case weights (ALO / leave-one-out / Cook's distance),
14//! responses (data attribution). One identity, read off in whichever
15//! direction a diagnostic needs it.
16//!
17//! Before this, the tree computed `H⁻¹·` in independent dialects with
18//! independent factorizations — `AloFactoredHessian` (runtime.rs), an
19//! `ift_dbeta_drho_from_solver` solve-closure and a separate coned variant
20//! (evidence.rs), and the projected pseudo-inverse of the rank-deficient
21//! LAML kernel (unified.rs) — so each site had to answer on its own the
22//! question that actually causes bugs: **which inverse is "H⁻¹"?** The
23//! large-scale fix 0dc469bd and the #901 layer-2 investigation are both
24//! incidents of two sites answering differently.
25//!
26//! [`FitSensitivity`] is the single answer. It is built once at the optimum
27//! from whichever factored form the solver already has — a faer Cholesky
28//! factor, a raw lower-triangular (arrow-Schur) factor, or the projected
29//! pseudo-inverse `U · M⁻¹ · Uᵀ` (the #752/#901 intrinsic-quotient
30//! convention) — and every consumer asks it, never a factor directly.
31//! Consumers therefore cannot disagree about the inverse, and every
32//! batching/cone improvement made inside [`FitSensitivity::apply_multi`] is
33//! inherited by all of them at once.
34//!
35//! The channels, each a one-line restatement of the identity above:
36//!
37//! - [`mode_response`](FitSensitivity::mode_response) — `−H⁻¹ ∂g/∂t`, the
38//!   REML outer gradient's `∂β̂/∂ρ` (evidence `ift_dbeta_drho`).
39//! - [`mode_response_coned`](FitSensitivity::mode_response_coned) — the same
40//!   response confined to its cone of influence (#779); the lazy/local form
41//!   the smoothing-correction IFT uses.
42//! - [`leverage_block`](FitSensitivity::leverage_block) — `H⁻¹Xᵀ`, whose
43//!   column `i` is at once ALO's per-row solve and the case/response channel.
44//! - [`case_deletion`](FitSensitivity::case_deletion) — dfbetas + Cook's
45//!   distance, the leave-one-out channel, one scaled column of `H⁻¹Xᵀ` each.
46//!
47//! What is deliberately NOT folded in: the matrix-free `hop.solve_multi`
48//! (PCG/GPU), the constrained kernel `K_T = K_S − K_S Aᵀ(A K_S Aᵀ)⁻¹A K_S`,
49//! and `alo.rs`'s zero-copy `StableSolver` loop. Those are distinct inverse
50//! *representations*, not duplicate spellings of the same factored inverse —
51//! routing them through here would regress performance and couple unrelated
52//! concerns rather than remove the bug class.
53
54use ndarray::{Array1, Array2, ArrayView1, ArrayView2};
55
56use gam_linalg::faer_ndarray::FaerCholeskyFactor;
57
58/// The fitted curvature in whichever factored form the solver produced —
59/// the SINGLE place that knows how to invert it.
60pub enum FittedInverse<'a> {
61    /// Cholesky factor of the (stabilized) penalized Hessian: the
62    /// full-rank convention (PIRLS / ALO path).
63    FaerCholesky(&'a FaerCholeskyFactor),
64    /// Raw lower-triangular Cholesky factor `L` with `H = L·Lᵀ` (the
65    /// arrow-Schur reduced factor in evidence.rs).
66    LowerTriangular(&'a Array2<f64>),
67    /// Projected (pseudo-)inverse `U · M⁻¹ · Uᵀ` over a column basis `U`
68    /// (p × r) with reduced inverse `M⁻¹` (r × r) — the rank-deficient
69    /// LAML convention (#752/0dc469bd/#901): the inverse acts on
70    /// range(U) and annihilates its complement.
71    Projected {
72        basis: &'a Array2<f64>,
73        reduced_inverse: &'a Array2<f64>,
74    },
75}
76
77/// The one sensitivity operator built at the optimum. See module docs.
78pub struct FitSensitivity<'a> {
79    inverse: FittedInverse<'a>,
80    dim: usize,
81}
82
83impl<'a> FitSensitivity<'a> {
84    pub fn from_faer_cholesky(factor: &'a FaerCholeskyFactor, dim: usize) -> Self {
85        Self {
86            inverse: FittedInverse::FaerCholesky(factor),
87            dim,
88        }
89    }
90
91    pub fn from_lower_triangular(factor: &'a Array2<f64>) -> Self {
92        let dim = factor.nrows();
93        Self {
94            inverse: FittedInverse::LowerTriangular(factor),
95            dim,
96        }
97    }
98
99    pub fn from_projected(basis: &'a Array2<f64>, reduced_inverse: &'a Array2<f64>) -> Self {
100        let dim = basis.nrows();
101        Self {
102            inverse: FittedInverse::Projected {
103                basis,
104                reduced_inverse,
105            },
106            dim,
107        }
108    }
109
110    /// Coefficient dimension `p` the operator acts on.
111    pub fn dim(&self) -> usize {
112        self.dim
113    }
114
115    /// `H⁻¹ · rhs` (pseudo-inverse action for the projected variant).
116    pub fn apply(&self, rhs: &Array1<f64>) -> Array1<f64> {
117        assert_eq!(rhs.len(), self.dim, "FitSensitivity rhs dimension");
118        match &self.inverse {
119            FittedInverse::FaerCholesky(factor) => factor.solvevec(rhs),
120            FittedInverse::LowerTriangular(factor) => {
121                gam_linalg::triangular::cholesky_solve_vector(factor.view(), rhs.view())
122            }
123            FittedInverse::Projected {
124                basis,
125                reduced_inverse,
126            } => {
127                // `U · (M⁻¹ · (Uᵀ · a))` via faer SIMD contractions — the
128                // single spelling of the projected (rank-deficient LAML)
129                // inverse, shared with `PenaltySubspaceTrace`.
130                let proj = gam_linalg::faer_ndarray::fast_atv(basis, rhs);
131                let reduced = reduced_inverse.dot(&proj);
132                gam_linalg::faer_ndarray::fast_av(basis, &reduced)
133            }
134        }
135    }
136
137    /// `H⁻¹ · RHS` for a (p × m) block of right-hand sides — the batched
138    /// form every multi-channel consumer should use (outer ρ-pair solves,
139    /// ALO's `H⁻¹Xᵀ` leverage block) so the factor is traversed once per
140    /// block instead of once per column.
141    pub fn apply_multi(&self, rhs: ArrayView2<'_, f64>) -> Array2<f64> {
142        assert_eq!(rhs.nrows(), self.dim, "FitSensitivity RHS dimension");
143        match &self.inverse {
144            FittedInverse::FaerCholesky(factor) => {
145                let mut out = rhs.to_owned();
146                factor.solve_mat_in_place(&mut out);
147                out
148            }
149            FittedInverse::LowerTriangular(factor) => {
150                gam_linalg::triangular::cholesky_solve_matrix(*factor, rhs)
151            }
152            FittedInverse::Projected {
153                basis,
154                reduced_inverse,
155            } => {
156                let reduced = gam_linalg::faer_ndarray::fast_atb(basis, &rhs.to_owned());
157                gam_linalg::faer_ndarray::fast_ab(basis, &reduced_inverse.dot(&reduced))
158            }
159        }
160    }
161
162    /// The IFT mode response `∂β̂/∂t = −H⁻¹ · ∂g/∂t` for a (p × m) block
163    /// of score perturbations — THE object of #935.
164    ///
165    /// Returns `None` if any solved entry is non-finite (the factored
166    /// curvature was unusable for this channel); callers must not
167    /// substitute an approximation, matching the contract of the deleted
168    /// `ift_dbeta_drho_from_solver`.
169    pub fn mode_response(&self, dg_dt: ArrayView2<'_, f64>) -> Option<Array2<f64>> {
170        if dg_dt.nrows() != self.dim {
171            return None;
172        }
173        let mut out = self.apply_multi(dg_dt);
174        if out.iter().any(|value| !value.is_finite()) {
175            return None;
176        }
177        out.mapv_inplace(|value| -value);
178        Some(out)
179    }
180
181    /// Cone-of-influence mode response (#779), the lazy/local form of
182    /// [`Self::mode_response`]. Each perturbation column `∂g/∂t_a` is
183    /// structurally supported only within `col_supports[a]`, so its response
184    /// `−H⁻¹ ∂g/∂t_a` is exactly zero outside the coupling component of
185    /// `hessian` containing that support. Columns whose support is empty (a
186    /// structurally inactive channel) are skipped with no solve; the active
187    /// columns are solved as ONE batched block through [`Self::apply_multi`]
188    /// — strictly better than the per-column BLAS-2 loop this replaces — and
189    /// each result confined to its cone. On a fully coupled `hessian` every
190    /// cone is the whole space and the result equals [`Self::mode_response`]
191    /// bit-for-bit.
192    ///
193    /// `hessian` must be the same curvature this operator inverts; a
194    /// dimension mismatch (or any non-finite solved entry) returns `None`
195    /// rather than silently substituting an approximation.
196    pub fn mode_response_coned(
197        &self,
198        hessian: ArrayView2<'_, f64>,
199        dg_dt: ArrayView2<'_, f64>,
200        col_supports: &[std::ops::Range<usize>],
201    ) -> Option<Array2<f64>> {
202        let p = self.dim;
203        let r = dg_dt.ncols();
204        if dg_dt.nrows() != p
205            || hessian.nrows() != p
206            || hessian.ncols() != p
207            || col_supports.len() != r
208        {
209            return None;
210        }
211        let labels = crate::evidence::coupling_components(hessian);
212        if labels.len() != p {
213            return None;
214        }
215
216        // Active columns + their cones; structurally inactive columns (empty
217        // support → empty cone) contribute an identically-zero sensitivity
218        // and are skipped entirely (no solve).
219        let mut active: Vec<(usize, Vec<usize>)> = Vec::new();
220        for a in 0..r {
221            let sr = &col_supports[a];
222            let support: Vec<usize> = (sr.start..sr.end)
223                .filter(|idx| *idx < p)
224                .filter(|idx| dg_dt[[*idx, a]] != 0.0)
225                .collect();
226            let cone = crate::evidence::cone_of_influence(&labels, &support);
227            if !cone.is_empty() {
228                active.push((a, cone));
229            }
230        }
231
232        let mut out = Array2::<f64>::zeros((p, r));
233        if active.is_empty() {
234            return Some(out);
235        }
236        // One batched solve over only the active columns.
237        let mut rhs = Array2::<f64>::zeros((p, active.len()));
238        for (j, (a, _)) in active.iter().enumerate() {
239            rhs.column_mut(j).assign(&dg_dt.column(*a));
240        }
241        let solved = self.apply_multi(rhs.view());
242        if solved.iter().any(|value| !value.is_finite()) {
243            return None;
244        }
245        for (j, (a, cone)) in active.iter().enumerate() {
246            for &row in cone {
247                out[[row, *a]] = -solved[[row, j]];
248            }
249        }
250        Some(out)
251    }
252
253    /// `H⁻¹Xᵀ` (p × n) — the shared leverage/case-sensitivity block: its
254    /// column i is simultaneously ALO's per-observation solve, the case-
255    /// weight channel `∂g/∂w_i ∝ x_i`, and the response channel
256    /// `∂g/∂y_i ∝ x_i`. One blocked solve serves all three diagnostics.
257    pub fn leverage_block(&self, design: &Array2<f64>) -> Array2<f64> {
258        assert_eq!(design.ncols(), self.dim, "FitSensitivity design width");
259        self.apply_multi(design.t())
260    }
261
262    /// Data attribution `∂β̂/∂y` (p × n) — how each fitted coefficient
263    /// responds to each response value, the `t = y` channel of the one
264    /// identity `∂β̂/∂t = −H⁻¹ ∂g/∂t`.
265    ///
266    /// The response enters the penalized score only through the working
267    /// residual, so `∂g/∂y_i = −w_i x_i` and therefore
268    ///
269    /// ```text
270    ///   ∂β̂/∂y_i = w_i · H⁻¹ x_i,
271    /// ```
272    /// i.e. column `i` of [`Self::leverage_block`] scaled by the working
273    /// weight `w_i`. Contracting back through the design recovers the
274    /// smoother/hat matrix `A = X (∂β̂/∂y) = X H⁻¹ Xᵀ W`, whose diagonal is
275    /// the leverage already reported elsewhere. For a Gaussian penalized fit
276    /// `β̂ = H⁻¹ Xᵀ y`, so this Jacobian is exact (and weight-free); for a GLM
277    /// it is the one-step attribution at the fitted working weights.
278    ///
279    /// Returns `None` on a shape mismatch.
280    pub fn response_jacobian(
281        &self,
282        design: &Array2<f64>,
283        working_weights: ArrayView1<'_, f64>,
284    ) -> Option<Array2<f64>> {
285        let n = design.nrows();
286        if design.ncols() != self.dim || working_weights.len() != n {
287            return None;
288        }
289        // Column i is H⁻¹ x_i; scale it by w_i to get ∂β̂/∂y_i.
290        let mut dbeta_dy = self.leverage_block(design);
291        for i in 0..n {
292            let w_i = working_weights[i];
293            dbeta_dy.column_mut(i).mapv_inplace(|v| w_i * v);
294        }
295        Some(dbeta_dy)
296    }
297
298    /// Case-deletion influence (dfbetas + Cook's distance) for every
299    /// observation, built from the one sensitivity operator — the
300    /// "leave-one-out" channel #935 was designed to unify.
301    ///
302    /// Deleting observation `i` perturbs the penalized score by exactly its
303    /// own contribution, so the IFT mode response gives the coefficient
304    /// change in closed form (the penalized Sherman–Morrison identity):
305    ///
306    /// ```text
307    ///   β̂ − β̂₍ᵢ₎ = (w_i r_i / (1 − h_ii)) · H⁻¹ x_i
308    /// ```
309    ///
310    /// where `x_i` is row `i` of `design`, `w_i = working_weights[i]` the
311    /// IRLS working weight, `r_i = working_residual[i]` the working residual
312    /// `z_i − x_iᵀβ̂`, and `h_ii = w_i x_iᵀ H⁻¹ x_i` the leverage. Column `i`
313    /// of [`Self::leverage_block`] **is** `H⁻¹ x_i`, so each dfbeta is one
314    /// scaled column — no per-observation refit, no second factorization.
315    /// For a Gaussian penalized fit the identity is exact; for a GLM it is
316    /// the standard one-step (ALO) approximation, consistent with the
317    /// leverage already reported by `AloDiagnostics`.
318    ///
319    /// Cook's distance uses the metric the fit actually moves in,
320    /// `H = XᵀWX + S`:
321    ///
322    /// ```text
323    ///   D_i = (β̂−β̂₍ᵢ₎)ᵀ H (β̂−β̂₍ᵢ₎) / (p · φ)
324    ///       = scale_i² · (x_iᵀ H⁻¹ x_i) / (p · φ),   scale_i = w_i r_i / (1 − h_ii),
325    /// ```
326    ///
327    /// the second form following from `(H⁻¹x_i)ᵀ H (H⁻¹x_i) = x_iᵀ H⁻¹ x_i`,
328    /// so the single quadratic form `x_iᵀ H⁻¹ x_i` gates the leverage, the
329    /// deletion denominator, and Cook's distance alike — no separate `H` apply.
330    ///
331    /// This is an *opt-in* diagnostic: `dfbeta` is `n × p` and is never
332    /// materialized on the default fit path (it would be ruinous at large-scale
333    /// scale). Returns `None` on a shape mismatch or if any leverage reaches
334    /// `1` (a point the deletion identity cannot resolve).
335    pub fn case_deletion(
336        &self,
337        design: &Array2<f64>,
338        working_weights: ArrayView1<'_, f64>,
339        working_residual: ArrayView1<'_, f64>,
340        phi: f64,
341    ) -> Option<CaseDeletionInfluence> {
342        let n = design.nrows();
343        let p = design.ncols();
344        if p != self.dim
345            || working_weights.len() != n
346            || working_residual.len() != n
347            || !(phi.is_finite() && phi > 0.0)
348            || p == 0
349        {
350            return None;
351        }
352        // Column i of H⁻¹Xᵀ is H⁻¹ x_i — one blocked solve for all n.
353        let h_inv_xt = self.leverage_block(design);
354
355        let mut dfbeta = Array2::<f64>::zeros((n, p));
356        let mut leverage = Array1::<f64>::zeros(n);
357        let mut cooks = Array1::<f64>::zeros(n);
358        let p_phi = p as f64 * phi;
359        for i in 0..n {
360            // hinv_xi = H⁻¹x_i is column i of the leverage block; the single
361            // quadratic form x_iᵀ H⁻¹ x_i gates everything below.
362            let hinv_xi = h_inv_xt.column(i);
363            let xhx = design.row(i).dot(&hinv_xi);
364            let h_ii = working_weights[i] * xhx;
365            let denom = 1.0 - h_ii;
366            // Leverage 1 pins the row to its own fit: the closed-form
367            // deletion is singular there, so we refuse rather than emit ∞.
368            if !denom.is_finite() || denom.abs() < f64::EPSILON {
369                return None;
370            }
371            // β̂ − β̂₍ᵢ₎ = scale · H⁻¹x_i — one scaled column, no refit.
372            let scale = working_weights[i] * working_residual[i] / denom;
373            dfbeta.row_mut(i).assign(&(&hinv_xi * scale));
374            leverage[i] = h_ii;
375            cooks[i] = scale * scale * xhx / p_phi;
376        }
377        Some(CaseDeletionInfluence {
378            dfbeta,
379            leverage,
380            cooks_distance: cooks,
381        })
382    }
383}
384
385/// Exact (Gaussian) / one-step (GLM) case-deletion influence produced by
386/// [`FitSensitivity::case_deletion`]. See that method for the identities.
387pub struct CaseDeletionInfluence {
388    /// `dfbeta[[i, j]]` = change in coefficient `j` when observation `i` is
389    /// left out, `β̂_j − β̂₍ᵢ₎_j`.
390    pub dfbeta: Array2<f64>,
391    /// Leverage (hat value) `h_ii = w_i x_iᵀ H⁻¹ x_i` per observation.
392    pub leverage: Array1<f64>,
393    /// Cook's distance per observation.
394    pub cooks_distance: Array1<f64>,
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400    use gam_linalg::faer_ndarray::FaerCholesky;
401    use faer::Side;
402    use ndarray::array;
403
404    /// Textbook 3×3 lower-Cholesky, written out so the LowerTriangular
405    /// variant is tested against an independently constructed factor.
406    fn lower_cholesky_3x3(h: &Array2<f64>) -> Array2<f64> {
407        let mut l = Array2::<f64>::zeros((3, 3));
408        for i in 0..3 {
409            for j in 0..=i {
410                let mut acc = h[[i, j]];
411                for k in 0..j {
412                    acc -= l[[i, k]] * l[[j, k]];
413                }
414                if i == j {
415                    l[[i, j]] = acc.sqrt();
416                } else {
417                    l[[i, j]] = acc / l[[j, j]];
418                }
419            }
420        }
421        l
422    }
423
424    /// All three factored forms of the SAME matrix must agree on every
425    /// entry point — this is the "no site can pick a different inverse"
426    /// guarantee, checked directly.
427    #[test]
428    fn all_variants_agree_on_the_same_curvature() {
429        let h = array![[4.0, 1.0, 0.5], [1.0, 3.0, 0.2], [0.5, 0.2, 2.0]];
430        let faer = h.cholesky(Side::Lower).expect("SPD factor");
431        let lower = lower_cholesky_3x3(&h);
432        // Projected variant with full basis = the plain inverse.
433        let eye = Array2::eye(3);
434        let h_inv = {
435            let mut out: Array2<f64> = Array2::eye(3);
436            faer.solve_mat_in_place(&mut out);
437            out
438        };
439        let s_faer = FitSensitivity::from_faer_cholesky(&faer, 3);
440        let s_tri = FitSensitivity::from_lower_triangular(&lower);
441        let s_proj = FitSensitivity::from_projected(&eye, &h_inv);
442
443        let rhs = array![0.7, -1.2, 0.4];
444        let block = array![[0.7, 1.0], [-1.2, 0.0], [0.4, -2.0]];
445        let a = s_faer.apply(&rhs);
446        let b = s_tri.apply(&rhs);
447        let c = s_proj.apply(&rhs);
448        for i in 0..3 {
449            assert!((a[i] - b[i]).abs() <= 1e-12, "faer vs triangular [{i}]");
450            assert!((a[i] - c[i]).abs() <= 1e-12, "faer vs projected [{i}]");
451        }
452        let ma = s_faer.apply_multi(block.view());
453        let mb = s_tri.apply_multi(block.view());
454        let mc = s_proj.apply_multi(block.view());
455        let resp = s_faer.mode_response(block.view()).expect("finite");
456        for i in 0..3 {
457            for j in 0..2 {
458                assert!((ma[[i, j]] - mb[[i, j]]).abs() <= 1e-12);
459                assert!((ma[[i, j]] - mc[[i, j]]).abs() <= 1e-12);
460                assert!(
461                    (resp[[i, j]] + ma[[i, j]]).abs() <= 1e-15,
462                    "mode response sign"
463                );
464            }
465        }
466        // H · apply(rhs) must reproduce rhs (it is a true inverse here).
467        let back = h.dot(&a);
468        for i in 0..3 {
469            assert!((back[i] - rhs[i]).abs() <= 1e-12, "inverse residual [{i}]");
470        }
471    }
472
473    /// Case-deletion dfbetas from the operator must equal the EXACT
474    /// leave-one-out refit of a ridge-penalized Gaussian fit — brute-force
475    /// dropping each row and re-solving — to machine precision. This is the
476    /// penalized Sherman–Morrison identity, analytic ground truth, no
477    /// external tool. Cook's distance is checked against its own definition
478    /// `(β−β₍ᵢ₎)ᵀ H (β−β₍ᵢ₎)/(p·φ)` using the exact refit.
479    #[test]
480    fn case_deletion_matches_exact_loo_refit() {
481        // Small over-determined ridge problem: H = XᵀX + S, β = H⁻¹ Xᵀ y,
482        // working weights w_i = 1 and working residual r_i = y_i − x_iᵀβ
483        // (the Gaussian identity-link IRLS reduction).
484        let x = array![
485            [1.0, 0.2, -0.5],
486            [1.0, -1.1, 0.3],
487            [1.0, 0.7, 1.4],
488            [1.0, 2.0, -0.8],
489            [1.0, -0.4, 0.9],
490            [1.0, 1.3, 0.1],
491        ];
492        let y = array![0.5, -1.2, 2.1, 0.3, -0.7, 1.0];
493        let n = x.nrows();
494        let p = x.ncols();
495        // Penalty S (a mild ridge on the two slopes, intercept unpenalized).
496        let mut s = Array2::<f64>::zeros((p, p));
497        s[[1, 1]] = 0.4;
498        s[[2, 2]] = 0.4;
499
500        let xtx = x.t().dot(&x);
501        let h = &xtx + &s;
502        let h_inv = {
503            let f = h.cholesky(Side::Lower).expect("SPD");
504            let mut out: Array2<f64> = Array2::eye(p);
505            f.solve_mat_in_place(&mut out);
506            out
507        };
508        let xty = x.t().dot(&y);
509        let beta = h_inv.dot(&xty);
510
511        let w = Array1::<f64>::ones(n);
512        let resid = &y - &x.dot(&beta);
513
514        let faer = h.cholesky(Side::Lower).expect("SPD");
515        let op = FitSensitivity::from_faer_cholesky(&faer, p);
516        let infl = op
517            .case_deletion(&x, w.view(), resid.view(), 1.0)
518            .expect("case deletion");
519
520        for i in 0..n {
521            // Exact refit with row i removed: H₍ᵢ₎ = H − x_i x_iᵀ (penalty S
522            // is a prior, unchanged by data deletion), rhs = Xᵀy − x_i y_i.
523            let x_i = x.row(i).to_owned();
524            let mut h_del = h.clone();
525            for a in 0..p {
526                for b in 0..p {
527                    h_del[[a, b]] -= x_i[a] * x_i[b];
528                }
529            }
530            let rhs_del = &xty - &(&x_i * y[i]);
531            let h_del_inv = {
532                let f = h_del.cholesky(Side::Lower).expect("SPD deleted");
533                let mut out: Array2<f64> = Array2::eye(p);
534                f.solve_mat_in_place(&mut out);
535                out
536            };
537            let beta_del = h_del_inv.dot(&rhs_del);
538            let exact_dfbeta = &beta - &beta_del;
539
540            for j in 0..p {
541                assert!(
542                    (infl.dfbeta[[i, j]] - exact_dfbeta[j]).abs() < 1e-9,
543                    "dfbeta[{i},{j}]: operator {} vs exact refit {}",
544                    infl.dfbeta[[i, j]],
545                    exact_dfbeta[j]
546                );
547            }
548            // Cook's distance against its definition with the exact refit.
549            let cook_exact = exact_dfbeta.dot(&h.dot(&exact_dfbeta)) / (p as f64 * 1.0);
550            assert!(
551                (infl.cooks_distance[i] - cook_exact).abs() < 1e-9,
552                "cooks[{i}]: {} vs {}",
553                infl.cooks_distance[i],
554                cook_exact
555            );
556            // Leverage must match the hat value x_iᵀ H⁻¹ x_i (w_i = 1).
557            let h_ii = x_i.dot(&h_inv.dot(&x_i));
558            assert!((infl.leverage[i] - h_ii).abs() < 1e-12, "leverage[{i}]");
559        }
560    }
561
562    /// Data attribution `∂β̂/∂y` must equal the actual change in the fitted
563    /// coefficients when a response value is perturbed and the ridge fit is
564    /// re-solved — exact for the Gaussian penalized model (`β̂ = H⁻¹Xᵀy`).
565    #[test]
566    fn response_jacobian_matches_refit_perturbation() {
567        let x = array![
568            [1.0, 0.2, -0.5],
569            [1.0, -1.1, 0.3],
570            [1.0, 0.7, 1.4],
571            [1.0, 2.0, -0.8],
572            [1.0, -0.4, 0.9],
573        ];
574        let y = array![0.5, -1.2, 2.1, 0.3, -0.7];
575        let n = x.nrows();
576        let p = x.ncols();
577        let mut s = Array2::<f64>::zeros((p, p));
578        s[[1, 1]] = 0.4;
579        s[[2, 2]] = 0.4;
580        let h = &x.t().dot(&x) + &s;
581        let faer = h.cholesky(Side::Lower).expect("SPD");
582
583        let solve = |rhs: &Array1<f64>| faer.solvevec(rhs);
584        let beta = solve(&x.t().dot(&y));
585
586        let op = FitSensitivity::from_faer_cholesky(&faer, p);
587        let w = Array1::<f64>::ones(n);
588        let dbeta_dy = op.response_jacobian(&x, w.view()).expect("jacobian");
589
590        // Exact (linear) refit: bump y_j, re-solve, compare (β'−β)/ε to col j.
591        let eps = 1e-6;
592        for j in 0..n {
593            let mut yp = y.clone();
594            yp[j] += eps;
595            let beta_p = solve(&x.t().dot(&yp));
596            for c in 0..p {
597                let fd = (beta_p[c] - beta[c]) / eps;
598                assert!(
599                    (dbeta_dy[[c, j]] - fd).abs() < 1e-7,
600                    "∂β̂_{c}/∂y_{j}: analytic {} vs refit {}",
601                    dbeta_dy[[c, j]],
602                    fd
603                );
604            }
605        }
606    }
607
608    #[test]
609    fn mode_response_refuses_non_finite_channels() {
610        let h = array![[2.0, 0.0], [0.0, 1.0]];
611        let faer = h.cholesky(Side::Lower).expect("SPD factor");
612        let s = FitSensitivity::from_faer_cholesky(&faer, 2);
613        let bad = array![[1.0], [f64::NAN]];
614        assert!(s.mode_response(bad.view()).is_none());
615        let wrong_dim = array![[1.0], [0.0], [0.0]];
616        assert!(s.mode_response(wrong_dim.view()).is_none());
617    }
618}
gam_solve/sensitivity.rs

gam_solve/
sensitivity.rs