gam 0.3.121

Generalized penalized likelihood engine
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
//! ONE sensitivity operator (#935): every "how does the fit move?"
//! question is the same solve.
//!
//! At a penalized optimum the stationarity condition `g(β̂; t) = 0` makes
//! every sensitivity of the fit one object — the factored fitted curvature
//! applied to a perturbation of the score:
//!
//! ```text
//!   ∂β̂/∂t = −H⁻¹ · ∂g/∂t
//! ```
//!
//! for ANY perturbation channel `t`: smoothing parameters (the REML outer
//! gradient), case weights (ALO / leave-one-out / Cook's distance),
//! responses (data attribution). One identity, read off in whichever
//! direction a diagnostic needs it.
//!
//! Before this, the tree computed `H⁻¹·` in independent dialects with
//! independent factorizations — `AloFactoredHessian` (runtime.rs), an
//! `ift_dbeta_drho_from_solver` solve-closure and a separate coned variant
//! (evidence.rs), and the projected pseudo-inverse of the rank-deficient
//! LAML kernel (unified.rs) — so each site had to answer on its own the
//! question that actually causes bugs: **which inverse is "H⁻¹"?** The
//! large-scale fix 0dc469bd and the #901 layer-2 investigation are both
//! incidents of two sites answering differently.
//!
//! [`FitSensitivity`] is the single answer. It is built once at the optimum
//! from whichever factored form the solver already has — a faer Cholesky
//! factor, a raw lower-triangular (arrow-Schur) factor, or the projected
//! pseudo-inverse `U · M⁻¹ · Uᵀ` (the #752/#901 intrinsic-quotient
//! convention) — and every consumer asks it, never a factor directly.
//! Consumers therefore cannot disagree about the inverse, and every
//! batching/cone improvement made inside [`FitSensitivity::apply_multi`] is
//! inherited by all of them at once.
//!
//! The channels, each a one-line restatement of the identity above:
//!
//! - [`mode_response`](FitSensitivity::mode_response) — `−H⁻¹ ∂g/∂t`, the
//!   REML outer gradient's `∂β̂/∂ρ` (evidence `ift_dbeta_drho`).
//! - [`mode_response_coned`](FitSensitivity::mode_response_coned) — the same
//!   response confined to its cone of influence (#779); the lazy/local form
//!   the smoothing-correction IFT uses.
//! - [`leverage_block`](FitSensitivity::leverage_block) — `H⁻¹Xᵀ`, whose
//!   column `i` is at once ALO's per-row solve and the case/response channel.
//! - [`case_deletion`](FitSensitivity::case_deletion) — dfbetas + Cook's
//!   distance, the leave-one-out channel, one scaled column of `H⁻¹Xᵀ` each.
//!
//! What is deliberately NOT folded in: the matrix-free `hop.solve_multi`
//! (PCG/GPU), the constrained kernel `K_T = K_S − K_S Aᵀ(A K_S Aᵀ)⁻¹A K_S`,
//! and `alo.rs`'s zero-copy `StableSolver` loop. Those are distinct inverse
//! *representations*, not duplicate spellings of the same factored inverse —
//! routing them through here would regress performance and couple unrelated
//! concerns rather than remove the bug class.

use ndarray::{Array1, Array2, ArrayView1, ArrayView2};

use crate::linalg::faer_ndarray::FaerCholeskyFactor;

/// The fitted curvature in whichever factored form the solver produced —
/// the SINGLE place that knows how to invert it.
pub enum FittedInverse<'a> {
    /// Cholesky factor of the (stabilized) penalized Hessian: the
    /// full-rank convention (PIRLS / ALO path).
    FaerCholesky(&'a FaerCholeskyFactor),
    /// Raw lower-triangular Cholesky factor `L` with `H = L·Lᵀ` (the
    /// arrow-Schur reduced factor in evidence.rs).
    LowerTriangular(&'a Array2<f64>),
    /// Projected (pseudo-)inverse `U · M⁻¹ · Uᵀ` over a column basis `U`
    /// (p × r) with reduced inverse `M⁻¹` (r × r) — the rank-deficient
    /// LAML convention (#752/0dc469bd/#901): the inverse acts on
    /// range(U) and annihilates its complement.
    Projected {
        basis: &'a Array2<f64>,
        reduced_inverse: &'a Array2<f64>,
    },
}

/// The one sensitivity operator built at the optimum. See module docs.
pub struct FitSensitivity<'a> {
    inverse: FittedInverse<'a>,
    dim: usize,
}

impl<'a> FitSensitivity<'a> {
    pub fn from_faer_cholesky(factor: &'a FaerCholeskyFactor, dim: usize) -> Self {
        Self {
            inverse: FittedInverse::FaerCholesky(factor),
            dim,
        }
    }

    pub fn from_lower_triangular(factor: &'a Array2<f64>) -> Self {
        let dim = factor.nrows();
        Self {
            inverse: FittedInverse::LowerTriangular(factor),
            dim,
        }
    }

    pub fn from_projected(basis: &'a Array2<f64>, reduced_inverse: &'a Array2<f64>) -> Self {
        let dim = basis.nrows();
        Self {
            inverse: FittedInverse::Projected {
                basis,
                reduced_inverse,
            },
            dim,
        }
    }

    /// Coefficient dimension `p` the operator acts on.
    pub fn dim(&self) -> usize {
        self.dim
    }

    /// `H⁻¹ · rhs` (pseudo-inverse action for the projected variant).
    pub fn apply(&self, rhs: &Array1<f64>) -> Array1<f64> {
        assert_eq!(rhs.len(), self.dim, "FitSensitivity rhs dimension");
        match &self.inverse {
            FittedInverse::FaerCholesky(factor) => factor.solvevec(rhs),
            FittedInverse::LowerTriangular(factor) => {
                crate::linalg::triangular::cholesky_solve_vector(factor.view(), rhs.view())
            }
            FittedInverse::Projected {
                basis,
                reduced_inverse,
            } => {
                // `U · (M⁻¹ · (Uᵀ · a))` via faer SIMD contractions — the
                // single spelling of the projected (rank-deficient LAML)
                // inverse, shared with `PenaltySubspaceTrace`.
                let proj = crate::linalg::faer_ndarray::fast_atv(basis, rhs);
                let reduced = reduced_inverse.dot(&proj);
                crate::linalg::faer_ndarray::fast_av(basis, &reduced)
            }
        }
    }

    /// `H⁻¹ · RHS` for a (p × m) block of right-hand sides — the batched
    /// form every multi-channel consumer should use (outer ρ-pair solves,
    /// ALO's `H⁻¹Xᵀ` leverage block) so the factor is traversed once per
    /// block instead of once per column.
    pub fn apply_multi(&self, rhs: ArrayView2<'_, f64>) -> Array2<f64> {
        assert_eq!(rhs.nrows(), self.dim, "FitSensitivity RHS dimension");
        match &self.inverse {
            FittedInverse::FaerCholesky(factor) => {
                let mut out = rhs.to_owned();
                factor.solve_mat_in_place(&mut out);
                out
            }
            FittedInverse::LowerTriangular(factor) => {
                crate::linalg::triangular::cholesky_solve_matrix(*factor, rhs)
            }
            FittedInverse::Projected {
                basis,
                reduced_inverse,
            } => {
                let reduced = crate::linalg::faer_ndarray::fast_atb(basis, &rhs.to_owned());
                crate::linalg::faer_ndarray::fast_ab(basis, &reduced_inverse.dot(&reduced))
            }
        }
    }

    /// The IFT mode response `∂β̂/∂t = −H⁻¹ · ∂g/∂t` for a (p × m) block
    /// of score perturbations — THE object of #935.
    ///
    /// Returns `None` if any solved entry is non-finite (the factored
    /// curvature was unusable for this channel); callers must not
    /// substitute an approximation, matching the contract of the deleted
    /// `ift_dbeta_drho_from_solver`.
    pub fn mode_response(&self, dg_dt: ArrayView2<'_, f64>) -> Option<Array2<f64>> {
        if dg_dt.nrows() != self.dim {
            return None;
        }
        let mut out = self.apply_multi(dg_dt);
        if out.iter().any(|value| !value.is_finite()) {
            return None;
        }
        out.mapv_inplace(|value| -value);
        Some(out)
    }

    /// Cone-of-influence mode response (#779), the lazy/local form of
    /// [`Self::mode_response`]. Each perturbation column `∂g/∂t_a` is
    /// structurally supported only within `col_supports[a]`, so its response
    /// `−H⁻¹ ∂g/∂t_a` is exactly zero outside the coupling component of
    /// `hessian` containing that support. Columns whose support is empty (a
    /// structurally inactive channel) are skipped with no solve; the active
    /// columns are solved as ONE batched block through [`Self::apply_multi`]
    /// — strictly better than the per-column BLAS-2 loop this replaces — and
    /// each result confined to its cone. On a fully coupled `hessian` every
    /// cone is the whole space and the result equals [`Self::mode_response`]
    /// bit-for-bit.
    ///
    /// `hessian` must be the same curvature this operator inverts; a
    /// dimension mismatch (or any non-finite solved entry) returns `None`
    /// rather than silently substituting an approximation.
    pub fn mode_response_coned(
        &self,
        hessian: ArrayView2<'_, f64>,
        dg_dt: ArrayView2<'_, f64>,
        col_supports: &[std::ops::Range<usize>],
    ) -> Option<Array2<f64>> {
        let p = self.dim;
        let r = dg_dt.ncols();
        if dg_dt.nrows() != p
            || hessian.nrows() != p
            || hessian.ncols() != p
            || col_supports.len() != r
        {
            return None;
        }
        let labels = crate::solver::evidence::coupling_components(hessian);
        if labels.len() != p {
            return None;
        }

        // Active columns + their cones; structurally inactive columns (empty
        // support → empty cone) contribute an identically-zero sensitivity
        // and are skipped entirely (no solve).
        let mut active: Vec<(usize, Vec<usize>)> = Vec::new();
        for a in 0..r {
            let sr = &col_supports[a];
            let support: Vec<usize> = (sr.start..sr.end)
                .filter(|idx| *idx < p)
                .filter(|idx| dg_dt[[*idx, a]] != 0.0)
                .collect();
            let cone = crate::solver::evidence::cone_of_influence(&labels, &support);
            if !cone.is_empty() {
                active.push((a, cone));
            }
        }

        let mut out = Array2::<f64>::zeros((p, r));
        if active.is_empty() {
            return Some(out);
        }
        // One batched solve over only the active columns.
        let mut rhs = Array2::<f64>::zeros((p, active.len()));
        for (j, (a, _)) in active.iter().enumerate() {
            rhs.column_mut(j).assign(&dg_dt.column(*a));
        }
        let solved = self.apply_multi(rhs.view());
        if solved.iter().any(|value| !value.is_finite()) {
            return None;
        }
        for (j, (a, cone)) in active.iter().enumerate() {
            for &row in cone {
                out[[row, *a]] = -solved[[row, j]];
            }
        }
        Some(out)
    }

    /// `H⁻¹Xᵀ` (p × n) — the shared leverage/case-sensitivity block: its
    /// column i is simultaneously ALO's per-observation solve, the case-
    /// weight channel `∂g/∂w_i ∝ x_i`, and the response channel
    /// `∂g/∂y_i ∝ x_i`. One blocked solve serves all three diagnostics.
    pub fn leverage_block(&self, design: &Array2<f64>) -> Array2<f64> {
        assert_eq!(design.ncols(), self.dim, "FitSensitivity design width");
        self.apply_multi(design.t())
    }

    /// Data attribution `∂β̂/∂y` (p × n) — how each fitted coefficient
    /// responds to each response value, the `t = y` channel of the one
    /// identity `∂β̂/∂t = −H⁻¹ ∂g/∂t`.
    ///
    /// The response enters the penalized score only through the working
    /// residual, so `∂g/∂y_i = −w_i x_i` and therefore
    ///
    /// ```text
    ///   ∂β̂/∂y_i = w_i · H⁻¹ x_i,
    /// ```
    /// i.e. column `i` of [`Self::leverage_block`] scaled by the working
    /// weight `w_i`. Contracting back through the design recovers the
    /// smoother/hat matrix `A = X (∂β̂/∂y) = X H⁻¹ Xᵀ W`, whose diagonal is
    /// the leverage already reported elsewhere. For a Gaussian penalized fit
    /// `β̂ = H⁻¹ Xᵀ y`, so this Jacobian is exact (and weight-free); for a GLM
    /// it is the one-step attribution at the fitted working weights.
    ///
    /// Returns `None` on a shape mismatch.
    pub fn response_jacobian(
        &self,
        design: &Array2<f64>,
        working_weights: ArrayView1<'_, f64>,
    ) -> Option<Array2<f64>> {
        let n = design.nrows();
        if design.ncols() != self.dim || working_weights.len() != n {
            return None;
        }
        // Column i is H⁻¹ x_i; scale it by w_i to get ∂β̂/∂y_i.
        let mut dbeta_dy = self.leverage_block(design);
        for i in 0..n {
            let w_i = working_weights[i];
            dbeta_dy.column_mut(i).mapv_inplace(|v| w_i * v);
        }
        Some(dbeta_dy)
    }

    /// Case-deletion influence (dfbetas + Cook's distance) for every
    /// observation, built from the one sensitivity operator — the
    /// "leave-one-out" channel #935 was designed to unify.
    ///
    /// Deleting observation `i` perturbs the penalized score by exactly its
    /// own contribution, so the IFT mode response gives the coefficient
    /// change in closed form (the penalized Sherman–Morrison identity):
    ///
    /// ```text
    ///   β̂ − β̂₍ᵢ₎ = (w_i r_i / (1 − h_ii)) · H⁻¹ x_i
    /// ```
    ///
    /// where `x_i` is row `i` of `design`, `w_i = working_weights[i]` the
    /// IRLS working weight, `r_i = working_residual[i]` the working residual
    /// `z_i − x_iᵀβ̂`, and `h_ii = w_i x_iᵀ H⁻¹ x_i` the leverage. Column `i`
    /// of [`Self::leverage_block`] **is** `H⁻¹ x_i`, so each dfbeta is one
    /// scaled column — no per-observation refit, no second factorization.
    /// For a Gaussian penalized fit the identity is exact; for a GLM it is
    /// the standard one-step (ALO) approximation, consistent with the
    /// leverage already reported by `AloDiagnostics`.
    ///
    /// Cook's distance uses the metric the fit actually moves in,
    /// `H = XᵀWX + S`:
    ///
    /// ```text
    ///   D_i = (β̂−β̂₍ᵢ₎)ᵀ H (β̂−β̂₍ᵢ₎) / (p · φ)
    ///       = scale_i² · (x_iᵀ H⁻¹ x_i) / (p · φ),   scale_i = w_i r_i / (1 − h_ii),
    /// ```
    ///
    /// the second form following from `(H⁻¹x_i)ᵀ H (H⁻¹x_i) = x_iᵀ H⁻¹ x_i`,
    /// so the single quadratic form `x_iᵀ H⁻¹ x_i` gates the leverage, the
    /// deletion denominator, and Cook's distance alike — no separate `H` apply.
    ///
    /// This is an *opt-in* diagnostic: `dfbeta` is `n × p` and is never
    /// materialized on the default fit path (it would be ruinous at large-scale
    /// scale). Returns `None` on a shape mismatch or if any leverage reaches
    /// `1` (a point the deletion identity cannot resolve).
    pub fn case_deletion(
        &self,
        design: &Array2<f64>,
        working_weights: ArrayView1<'_, f64>,
        working_residual: ArrayView1<'_, f64>,
        phi: f64,
    ) -> Option<CaseDeletionInfluence> {
        let n = design.nrows();
        let p = design.ncols();
        if p != self.dim
            || working_weights.len() != n
            || working_residual.len() != n
            || !(phi.is_finite() && phi > 0.0)
            || p == 0
        {
            return None;
        }
        // Column i of H⁻¹Xᵀ is H⁻¹ x_i — one blocked solve for all n.
        let h_inv_xt = self.leverage_block(design);

        let mut dfbeta = Array2::<f64>::zeros((n, p));
        let mut leverage = Array1::<f64>::zeros(n);
        let mut cooks = Array1::<f64>::zeros(n);
        let p_phi = p as f64 * phi;
        for i in 0..n {
            // hinv_xi = H⁻¹x_i is column i of the leverage block; the single
            // quadratic form x_iᵀ H⁻¹ x_i gates everything below.
            let hinv_xi = h_inv_xt.column(i);
            let xhx = design.row(i).dot(&hinv_xi);
            let h_ii = working_weights[i] * xhx;
            let denom = 1.0 - h_ii;
            // Leverage 1 pins the row to its own fit: the closed-form
            // deletion is singular there, so we refuse rather than emit ∞.
            if !denom.is_finite() || denom.abs() < f64::EPSILON {
                return None;
            }
            // β̂ − β̂₍ᵢ₎ = scale · H⁻¹x_i — one scaled column, no refit.
            let scale = working_weights[i] * working_residual[i] / denom;
            dfbeta.row_mut(i).assign(&(&hinv_xi * scale));
            leverage[i] = h_ii;
            cooks[i] = scale * scale * xhx / p_phi;
        }
        Some(CaseDeletionInfluence {
            dfbeta,
            leverage,
            cooks_distance: cooks,
        })
    }
}

/// Exact (Gaussian) / one-step (GLM) case-deletion influence produced by
/// [`FitSensitivity::case_deletion`]. See that method for the identities.
pub struct CaseDeletionInfluence {
    /// `dfbeta[[i, j]]` = change in coefficient `j` when observation `i` is
    /// left out, `β̂_j − β̂₍ᵢ₎_j`.
    pub dfbeta: Array2<f64>,
    /// Leverage (hat value) `h_ii = w_i x_iᵀ H⁻¹ x_i` per observation.
    pub leverage: Array1<f64>,
    /// Cook's distance per observation.
    pub cooks_distance: Array1<f64>,
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::linalg::faer_ndarray::FaerCholesky;
    use faer::Side;
    use ndarray::array;

    /// Textbook 3×3 lower-Cholesky, written out so the LowerTriangular
    /// variant is tested against an independently constructed factor.
    fn lower_cholesky_3x3(h: &Array2<f64>) -> Array2<f64> {
        let mut l = Array2::<f64>::zeros((3, 3));
        for i in 0..3 {
            for j in 0..=i {
                let mut acc = h[[i, j]];
                for k in 0..j {
                    acc -= l[[i, k]] * l[[j, k]];
                }
                if i == j {
                    l[[i, j]] = acc.sqrt();
                } else {
                    l[[i, j]] = acc / l[[j, j]];
                }
            }
        }
        l
    }

    /// All three factored forms of the SAME matrix must agree on every
    /// entry point — this is the "no site can pick a different inverse"
    /// guarantee, checked directly.
    #[test]
    fn all_variants_agree_on_the_same_curvature() {
        let h = array![[4.0, 1.0, 0.5], [1.0, 3.0, 0.2], [0.5, 0.2, 2.0]];
        let faer = h.cholesky(Side::Lower).expect("SPD factor");
        let lower = lower_cholesky_3x3(&h);
        // Projected variant with full basis = the plain inverse.
        let eye = Array2::eye(3);
        let h_inv = {
            let mut out: Array2<f64> = Array2::eye(3);
            faer.solve_mat_in_place(&mut out);
            out
        };
        let s_faer = FitSensitivity::from_faer_cholesky(&faer, 3);
        let s_tri = FitSensitivity::from_lower_triangular(&lower);
        let s_proj = FitSensitivity::from_projected(&eye, &h_inv);

        let rhs = array![0.7, -1.2, 0.4];
        let block = array![[0.7, 1.0], [-1.2, 0.0], [0.4, -2.0]];
        let a = s_faer.apply(&rhs);
        let b = s_tri.apply(&rhs);
        let c = s_proj.apply(&rhs);
        for i in 0..3 {
            assert!((a[i] - b[i]).abs() <= 1e-12, "faer vs triangular [{i}]");
            assert!((a[i] - c[i]).abs() <= 1e-12, "faer vs projected [{i}]");
        }
        let ma = s_faer.apply_multi(block.view());
        let mb = s_tri.apply_multi(block.view());
        let mc = s_proj.apply_multi(block.view());
        let resp = s_faer.mode_response(block.view()).expect("finite");
        for i in 0..3 {
            for j in 0..2 {
                assert!((ma[[i, j]] - mb[[i, j]]).abs() <= 1e-12);
                assert!((ma[[i, j]] - mc[[i, j]]).abs() <= 1e-12);
                assert!(
                    (resp[[i, j]] + ma[[i, j]]).abs() <= 1e-15,
                    "mode response sign"
                );
            }
        }
        // H · apply(rhs) must reproduce rhs (it is a true inverse here).
        let back = h.dot(&a);
        for i in 0..3 {
            assert!((back[i] - rhs[i]).abs() <= 1e-12, "inverse residual [{i}]");
        }
    }

    /// Case-deletion dfbetas from the operator must equal the EXACT
    /// leave-one-out refit of a ridge-penalized Gaussian fit — brute-force
    /// dropping each row and re-solving — to machine precision. This is the
    /// penalized Sherman–Morrison identity, analytic ground truth, no
    /// external tool. Cook's distance is checked against its own definition
    /// `(β−β₍ᵢ₎)ᵀ H (β−β₍ᵢ₎)/(p·φ)` using the exact refit.
    #[test]
    fn case_deletion_matches_exact_loo_refit() {
        // Small over-determined ridge problem: H = XᵀX + S, β = H⁻¹ Xᵀ y,
        // working weights w_i = 1 and working residual r_i = y_i − x_iᵀβ
        // (the Gaussian identity-link IRLS reduction).
        let x = array![
            [1.0, 0.2, -0.5],
            [1.0, -1.1, 0.3],
            [1.0, 0.7, 1.4],
            [1.0, 2.0, -0.8],
            [1.0, -0.4, 0.9],
            [1.0, 1.3, 0.1],
        ];
        let y = array![0.5, -1.2, 2.1, 0.3, -0.7, 1.0];
        let n = x.nrows();
        let p = x.ncols();
        // Penalty S (a mild ridge on the two slopes, intercept unpenalized).
        let mut s = Array2::<f64>::zeros((p, p));
        s[[1, 1]] = 0.4;
        s[[2, 2]] = 0.4;

        let xtx = x.t().dot(&x);
        let h = &xtx + &s;
        let h_inv = {
            let f = h.cholesky(Side::Lower).expect("SPD");
            let mut out: Array2<f64> = Array2::eye(p);
            f.solve_mat_in_place(&mut out);
            out
        };
        let xty = x.t().dot(&y);
        let beta = h_inv.dot(&xty);

        let w = Array1::<f64>::ones(n);
        let resid = &y - &x.dot(&beta);

        let faer = h.cholesky(Side::Lower).expect("SPD");
        let op = FitSensitivity::from_faer_cholesky(&faer, p);
        let infl = op
            .case_deletion(&x, w.view(), resid.view(), 1.0)
            .expect("case deletion");

        for i in 0..n {
            // Exact refit with row i removed: H₍ᵢ₎ = H − x_i x_iᵀ (penalty S
            // is a prior, unchanged by data deletion), rhs = Xᵀy − x_i y_i.
            let x_i = x.row(i).to_owned();
            let mut h_del = h.clone();
            for a in 0..p {
                for b in 0..p {
                    h_del[[a, b]] -= x_i[a] * x_i[b];
                }
            }
            let rhs_del = &xty - &(&x_i * y[i]);
            let h_del_inv = {
                let f = h_del.cholesky(Side::Lower).expect("SPD deleted");
                let mut out: Array2<f64> = Array2::eye(p);
                f.solve_mat_in_place(&mut out);
                out
            };
            let beta_del = h_del_inv.dot(&rhs_del);
            let exact_dfbeta = &beta - &beta_del;

            for j in 0..p {
                assert!(
                    (infl.dfbeta[[i, j]] - exact_dfbeta[j]).abs() < 1e-9,
                    "dfbeta[{i},{j}]: operator {} vs exact refit {}",
                    infl.dfbeta[[i, j]],
                    exact_dfbeta[j]
                );
            }
            // Cook's distance against its definition with the exact refit.
            let cook_exact = exact_dfbeta.dot(&h.dot(&exact_dfbeta)) / (p as f64 * 1.0);
            assert!(
                (infl.cooks_distance[i] - cook_exact).abs() < 1e-9,
                "cooks[{i}]: {} vs {}",
                infl.cooks_distance[i],
                cook_exact
            );
            // Leverage must match the hat value x_iᵀ H⁻¹ x_i (w_i = 1).
            let h_ii = x_i.dot(&h_inv.dot(&x_i));
            assert!((infl.leverage[i] - h_ii).abs() < 1e-12, "leverage[{i}]");
        }
    }

    /// Data attribution `∂β̂/∂y` must equal the actual change in the fitted
    /// coefficients when a response value is perturbed and the ridge fit is
    /// re-solved — exact for the Gaussian penalized model (`β̂ = H⁻¹Xᵀy`).
    #[test]
    fn response_jacobian_matches_refit_perturbation() {
        let x = array![
            [1.0, 0.2, -0.5],
            [1.0, -1.1, 0.3],
            [1.0, 0.7, 1.4],
            [1.0, 2.0, -0.8],
            [1.0, -0.4, 0.9],
        ];
        let y = array![0.5, -1.2, 2.1, 0.3, -0.7];
        let n = x.nrows();
        let p = x.ncols();
        let mut s = Array2::<f64>::zeros((p, p));
        s[[1, 1]] = 0.4;
        s[[2, 2]] = 0.4;
        let h = &x.t().dot(&x) + &s;
        let faer = h.cholesky(Side::Lower).expect("SPD");

        let solve = |rhs: &Array1<f64>| faer.solvevec(rhs);
        let beta = solve(&x.t().dot(&y));

        let op = FitSensitivity::from_faer_cholesky(&faer, p);
        let w = Array1::<f64>::ones(n);
        let dbeta_dy = op.response_jacobian(&x, w.view()).expect("jacobian");

        // Exact (linear) refit: bump y_j, re-solve, compare (β'−β)/ε to col j.
        let eps = 1e-6;
        for j in 0..n {
            let mut yp = y.clone();
            yp[j] += eps;
            let beta_p = solve(&x.t().dot(&yp));
            for c in 0..p {
                let fd = (beta_p[c] - beta[c]) / eps;
                assert!(
                    (dbeta_dy[[c, j]] - fd).abs() < 1e-7,
                    "∂β̂_{c}/∂y_{j}: analytic {} vs refit {}",
                    dbeta_dy[[c, j]],
                    fd
                );
            }
        }
    }

    #[test]
    fn mode_response_refuses_non_finite_channels() {
        let h = array![[2.0, 0.0], [0.0, 1.0]];
        let faer = h.cholesky(Side::Lower).expect("SPD factor");
        let s = FitSensitivity::from_faer_cholesky(&faer, 2);
        let bad = array![[1.0], [f64::NAN]];
        assert!(s.mode_response(bad.view()).is_none());
        let wrong_dim = array![[1.0], [0.0], [0.0]];
        assert!(s.mode_response(wrong_dim.view()).is_none());
    }
}