gam_terms/inference/
lawley.rs

1//! Lawley (1956) cumulant assembly for Bartlett corrections (#939).
2//!
3//! [`crate::inference::higher_order`] owns the mean-to-factor helper: given the
4//! second-order null mean `E[W] = q + ε_k − ε_{k−q}` of a likelihood-ratio
5//! statistic, it returns the Bartlett factor `E[W] / q`. This module computes
6//! the missing analytic ingredient — the Lawley ε terms — exactly, from per-row
7//! expected log-likelihood derivative cumulants.
8//!
9//! # Lawley's expansion
10//!
11//! For a `k`-parameter model, `ε_k = Σ_θ (λ_rstu − λ_rstuvw)` with
12//!
13//! ```text
14//! λ_rstu   = κ^{rs} κ^{tu} { κ_rstu/4 − κ_rst^(u) + κ_rt^(su) },
15//! λ_rstuvw = κ^{rs} κ^{tu} κ^{vw} { κ_rtv (κ_suw/6 − κ_sw^(u))
16//!            + κ_rtu (κ_svw/4 − κ_sw^(v)) + κ_rt^(v) κ_sw^(u)
17//!            + κ_rt^(u) κ_sw^(v) },
18//! ```
19//!
20//! where `κ_rs = E[∂²ℓ/∂θ_r∂θ_s]`, `κ_rst = E[∂³ℓ]`, `κ_rs^(t) = ∂κ_rs/∂θ_t`,
21//! and `κ^{rs}` is the matrix inverse of `[κ_rs]` (Lawley 1956; the display
22//! follows Cribari-Neto & Queiroz, "Bartlett corrections in beta regression
23//! models", arXiv:1501.07551, eq. 4). `ε_{k−q}` is the same sum restricted to
24//! the nuisance block, i.e. this function evaluated on the null model.
25//!
26//! # Why the "joint cumulants" are computable here
27//!
28//! GLM-type log-likelihoods are *linear in y*, so every η-derivative of `ℓ_i`
29//! is linear in y and all the arrays above need only `E[y] = μ`: no third or
30//! fourth response moments enter. The mixed arrays `κ_rs^(t)` — which are NOT
31//! recoverable from the pointwise expected contractions ν₃/ν₄ — are
32//! η-derivatives of the expected curvature as a *function*, and those are exact
33//! jet compositions of the link and variance jets. With `c(η) = μ′/V(μ)` and
34//! `u₀ = μ′·c` (the Fisher weight),
35//!
36//! ```text
37//! κ₂ = −u₀          κ₂' = −u₀'                κ₂'' = −u₀''
38//! κ₃ = −(u₀' + μ′c′)            κ₃' = −(u₀'' + μ″c′ + μ′c″)
39//! κ₄ = −(u₀'' + μ″c′ + 2μ′c″)
40//! ```
41//!
42//! (primes are η-derivatives; all divided by the dispersion φ). Canonical
43//! links have `c′ = c″ = 0`, hence `κ₃ = κ₂'` and `κ₄ = κ₃'` — pinned in tests.
44//!
45//! # Row-pair (hat) reduction
46//!
47//! For `θ`-derivatives chained through `η_i = x_iᵀβ` every array is an
48//! `X`-contraction of per-row scalars, and the six-index sum collapses to
49//! pairwise contractions of `E = X K⁻¹ Xᵀ` (`h_i = E_ii`):
50//!
51//! ```text
52//! λ₄ = Σ_i a_i h_i²,                       a_i = κ₄/4 − κ₃' + κ₂''
53//! λ₆ = −Σ_ij { E_ij³ [κ₃ᵢκ₃ⱼ/6 − κ₃ᵢκ₂'ⱼ + κ₂'ᵢκ₂'ⱼ]
54//!            + h_i h_j E_ij [κ₃ᵢκ₃ⱼ/4 − κ₃ᵢκ₂'ⱼ + κ₂'ᵢκ₂'ⱼ] }
55//! ε  = λ₄ − λ₆.
56//! ```
57//!
58//! The reduction is verified against the raw six-index Lawley sum in tests, and
59//! against two *exact* finite-sample distributions: the exponential/log-link
60//! intercept model, where exact `E[W] = 2n(log n − ψ(n))` with expansion
61//! `1 + 1/(6n) + O(n⁻³)`, and the Poisson/log intercept model by exact pmf
62//! summation, where the classical factor is `1/(6nλ)`.
63//!
64//! # Penalized models
65//!
66//! A quadratic penalty `−½βᵀS_λβ` is deterministic *at fixed λ*: it shifts
67//! `κ_rs` by `−S_λ` and leaves every third/fourth-order and derivative array
68//! unchanged. When the null value annihilates the penalty (`S_λ β₀ = 0` — the
69//! usual smooth-term null "this smooth is zero"), the penalized score has mean
70//! zero at the null and Lawley's expansion applies verbatim with the penalized
71//! information: pass `penalty` to fold `S_λ` into `K`. That gives the
72//! *conditional* mean shift `Δε(ρ)` = `E[W | λ]`.
73//!
74//! When λ is **estimated**, ρ̂ = log λ̂ has its own sampling variation and
75//! `E[W]` picks up the extra second-order delta-method term
76//! `½ Σ (∂²Δε/∂ρ_b∂ρ_{b'}) Cov(ρ̂_b, ρ̂_{b'})` —
77//! [`lawley_lr_mean_shift_with_rho_variation`] assembles it exactly from the
78//! curvature of the deterministic `Δε(ρ)` and the inverse REML outer Hessian
79//! `Cov(ρ̂)` (the #740 quantity). This is the genuinely-new penalized-Bartlett
80//! contribution #939 deliverable (2) asks for. All of this remains LR-only
81//! machinery; it is not a calibration for Wood's rank-truncated Wald statistic.
82//!
83//! Cost: `O(n²k)` time and `O(n²)` memory for the pair matrix `E` — fine for
84//! the small-`n` regimes where Bartlett corrections matter (the correction is
85//! `O(n⁻¹)`; at large `n` the first-order test is already calibrated).
86
87use ndarray::{Array1, Array2, ArrayView2};
88
89use gam_linalg::faer_ndarray::{FaerArrayView, factorize_symmetricwith_fallback};
90use gam_linalg::matrix::FactorizedSystem;
91use faer::Side;
92
93/// Order-2 jet (value, first, second η-derivative) product.
94#[inline]
95fn jet_mul(a: [f64; 3], b: [f64; 3]) -> [f64; 3] {
96    [
97        a[0] * b[0],
98        a[0] * b[1] + a[1] * b[0],
99        a[0] * b[2] + 2.0 * a[1] * b[1] + a[2] * b[0],
100    ]
101}
102
103/// Order-2 jet quotient `a / b` (requires `b[0] != 0`).
104#[inline]
105fn jet_div(a: [f64; 3], b: [f64; 3]) -> [f64; 3] {
106    let q0 = a[0] / b[0];
107    let q1 = (a[1] - q0 * b[1]) / b[0];
108    let q2 = (a[2] - q0 * b[2] - 2.0 * q1 * b[1]) / b[0];
109    [q0, q1, q2]
110}
111
112/// Per-row expected jets of a GLM family/link pair at a linear predictor value:
113/// the η-derivatives of the inverse link and the μ-derivatives of the variance
114/// function. Everything Lawley needs is assembled from these by exact jet
115/// composition — no finite differences anywhere.
116#[derive(Debug, Clone, Copy)]
117pub struct RowExpectedJets {
118    /// dμ/dη.
119    pub mu1: f64,
120    /// d²μ/dη².
121    pub mu2: f64,
122    /// d³μ/dη³.
123    pub mu3: f64,
124    /// V(μ).
125    pub var: f64,
126    /// dV/dμ.
127    pub dvar_dmu: f64,
128    /// d²V/dμ².
129    pub d2var_dmu2: f64,
130    /// Dispersion φ (log-likelihood is `[yθ − b(θ)]/φ`): scales every cumulant
131    /// by `1/φ`.
132    pub dispersion: f64,
133}
134
135/// The per-row expected cumulant scalars entering Lawley's arrays.
136#[derive(Debug, Clone, Copy)]
137pub struct RowKappas {
138    /// κ₂ = E[ℓ″] (= −Fisher weight).
139    pub k2: f64,
140    /// κ₃ = E[ℓ‴].
141    pub k3: f64,
142    /// κ₄ = E[ℓ⁗].
143    pub k4: f64,
144    /// κ₂' = dκ₂/dη.
145    pub k2_1: f64,
146    /// κ₂'' = d²κ₂/dη².
147    pub k2_11: f64,
148    /// κ₃' = dκ₃/dη.
149    pub k3_1: f64,
150}
151
152impl RowKappas {
153    /// Scale every cumulant by a prior weight `w`: a row observed with weight
154    /// `w` contributes `w·ℓᵢ` to the log-likelihood (for binomial data `w`
155    /// trials at the same η are exactly the sum of `w` independent Bernoulli
156    /// rows), so all six expected derivative cumulants scale linearly in `w`.
157    pub fn weighted(self, w: f64) -> Self {
158        Self {
159            k2: self.k2 * w,
160            k3: self.k3 * w,
161            k4: self.k4 * w,
162            k2_1: self.k2_1 * w,
163            k2_11: self.k2_11 * w,
164            k3_1: self.k3_1 * w,
165        }
166    }
167}
168
169impl RowExpectedJets {
170    /// Assemble the per-row cumulant scalars by jet composition of
171    /// `c = μ′/V(μ)` and `u₀ = μ′·c`.
172    pub fn kappas(&self) -> Result<RowKappas, String> {
173        let phi = self.dispersion;
174        if !(phi.is_finite() && phi > 0.0) {
175            return Err(format!(
176                "RowExpectedJets::kappas: dispersion must be finite and positive; got {phi}"
177            ));
178        }
179        if !(self.var.is_finite() && self.var > 0.0) {
180            return Err(format!(
181                "RowExpectedJets::kappas: variance function must be finite and positive; got {}",
182                self.var
183            ));
184        }
185        // η-jets of μ′ and of V(μ(η)) (chain rule for the composition).
186        let mu1_jet = [self.mu1, self.mu2, self.mu3];
187        let v_jet = [
188            self.var,
189            self.dvar_dmu * self.mu1,
190            self.d2var_dmu2 * self.mu1 * self.mu1 + self.dvar_dmu * self.mu2,
191        ];
192        let c = jet_div(mu1_jet, v_jet);
193        let u0 = jet_mul(mu1_jet, c);
194        let inv_phi = 1.0 / phi;
195        Ok(RowKappas {
196            k2: -u0[0] * inv_phi,
197            k2_1: -u0[1] * inv_phi,
198            k2_11: -u0[2] * inv_phi,
199            k3: -(u0[1] + self.mu1 * c[1]) * inv_phi,
200            k3_1: -(u0[2] + self.mu2 * c[1] + self.mu1 * c[2]) * inv_phi,
201            k4: -(u0[2] + self.mu2 * c[1] + 2.0 * self.mu1 * c[2]) * inv_phi,
202        })
203    }
204
205    /// Gaussian family, identity link, variance φ = σ².
206    pub fn gaussian_identity(dispersion: f64) -> Self {
207        Self {
208            mu1: 1.0,
209            mu2: 0.0,
210            mu3: 0.0,
211            var: 1.0,
212            dvar_dmu: 0.0,
213            d2var_dmu2: 0.0,
214            dispersion,
215        }
216    }
217
218    /// Poisson family, log link (canonical), at linear predictor `eta`.
219    pub fn poisson_log(eta: f64) -> Self {
220        let mu = eta.exp();
221        Self {
222            mu1: mu,
223            mu2: mu,
224            mu3: mu,
225            var: mu,
226            dvar_dmu: 1.0,
227            d2var_dmu2: 0.0,
228            dispersion: 1.0,
229        }
230    }
231
232    /// Bernoulli family, logit link (canonical), at linear predictor `eta`.
233    pub fn binomial_logit(eta: f64) -> Self {
234        let mu = 1.0 / (1.0 + (-eta).exp());
235        let mu1 = mu * (1.0 - mu);
236        let mu2 = mu1 * (1.0 - 2.0 * mu);
237        let mu3 = mu2 * (1.0 - 2.0 * mu) - 2.0 * mu1 * mu1;
238        Self {
239            mu1,
240            mu2,
241            mu3,
242            var: mu1,
243            dvar_dmu: 1.0 - 2.0 * mu,
244            d2var_dmu2: -2.0,
245            dispersion: 1.0,
246        }
247    }
248
249    /// Gamma family, log link (non-canonical), at linear predictor `eta` with
250    /// dispersion φ (shape 1/φ; φ = 1 is the exponential distribution).
251    pub fn gamma_log(eta: f64, dispersion: f64) -> Self {
252        let mu = eta.exp();
253        Self {
254            mu1: mu,
255            mu2: mu,
256            mu3: mu,
257            var: mu * mu,
258            dvar_dmu: 2.0 * mu,
259            d2var_dmu2: 2.0,
260            dispersion,
261        }
262    }
263}
264
265/// Lawley's `ε` for a GLM block: design `x` (n × k), per-row cumulants, and an
266/// optional quadratic penalty `S_λ` folded into the information (valid for
267/// nulls with `S_λ β₀ = 0`; see the module docs). Evaluate at the null fit.
268///
269/// `ε_k − ε_{k−q}` (full minus nuisance-restricted, the latter being this
270/// function on the null model) is the second-order mean shift of the LR
271/// statistic; feed `q + ε_k − ε_{k−q}` to
272/// [`crate::inference::higher_order::bartlett_factor_from_mean`].
273pub fn lawley_epsilon(
274    x: ArrayView2<'_, f64>,
275    kappas: &[RowKappas],
276    penalty: Option<ArrayView2<'_, f64>>,
277) -> Result<f64, String> {
278    let n = x.nrows();
279    let k = x.ncols();
280    if n == 0 || k == 0 {
281        return Err(format!(
282            "lawley_epsilon: empty design ({n} rows, {k} columns)"
283        ));
284    }
285    if kappas.len() != n {
286        return Err(format!(
287            "lawley_epsilon: {} cumulant rows for {n} design rows",
288            kappas.len()
289        ));
290    }
291    // J = −[κ_rs] (+ S_λ): the (penalized) expected information, SPD.
292    let mut j_mat = Array2::<f64>::zeros((k, k));
293    for (i, row_kappas) in kappas.iter().enumerate() {
294        let weight = -row_kappas.k2;
295        if !weight.is_finite() {
296            return Err(format!(
297                "lawley_epsilon: non-finite Fisher weight at row {i}"
298            ));
299        }
300        for r in 0..k {
301            let xr = x[[i, r]] * weight;
302            for s in 0..k {
303                j_mat[[r, s]] += xr * x[[i, s]];
304            }
305        }
306    }
307    if let Some(s_pen) = penalty {
308        if s_pen.nrows() != k || s_pen.ncols() != k {
309            return Err(format!(
310                "lawley_epsilon: penalty is {}×{}, expected {k}×{k}",
311                s_pen.nrows(),
312                s_pen.ncols()
313            ));
314        }
315        j_mat += &s_pen;
316    }
317    let j_view = FaerArrayView::new(&j_mat);
318    let factor = factorize_symmetricwith_fallback(j_view.as_ref(), Side::Lower)
319        .map_err(|e| format!("lawley_epsilon: information factorization failed: {e:?}"))?;
320    let j_inv = FactorizedSystem::solvemulti(&factor, &Array2::<f64>::eye(k))?;
321
322    // Pair matrix E = X J⁻¹ Xᵀ and its diagonal h.
323    let e_pairs = x.dot(&j_inv).dot(&x.t());
324    let h = e_pairs.diag().to_owned();
325
326    // λ₄ = Σ_i a_i h_i².
327    let mut lambda4 = 0.0;
328    for (i, row_kappas) in kappas.iter().enumerate() {
329        let a_i = row_kappas.k4 / 4.0 - row_kappas.k3_1 + row_kappas.k2_11;
330        lambda4 += a_i * h[i] * h[i];
331    }
332
333    // λ₆ = −Σ_ij { E³·b6_ij + h h E·b4_ij } with
334    // b6_ij = κ₃ᵢκ₃ⱼ/6 − κ₃ᵢκ₂'ⱼ + κ₂'ᵢκ₂'ⱼ and b4 the same with 1/4.
335    let k3: Array1<f64> = kappas.iter().map(|r| r.k3).collect();
336    let k21: Array1<f64> = kappas.iter().map(|r| r.k2_1).collect();
337    let mut lambda6 = 0.0;
338    for i in 0..n {
339        for j in 0..n {
340            let e_ij = e_pairs[[i, j]];
341            let cross = k3[i] * k3[j];
342            let mixed = -k3[i] * k21[j] + k21[i] * k21[j];
343            lambda6 -= e_ij * e_ij * e_ij * (cross / 6.0 + mixed)
344                + h[i] * h[j] * e_ij * (cross / 4.0 + mixed);
345        }
346    }
347
348    let epsilon = lambda4 - lambda6;
349    if !epsilon.is_finite() {
350        return Err(format!(
351            "lawley_epsilon: non-finite ε (λ₄={lambda4}, λ₆={lambda6})"
352        ));
353    }
354    Ok(epsilon)
355}
356
357/// Row cap for LR consumers that build the `O(n²)`-memory pair matrix `E` per
358/// tested block: at `n = 2048` the matrix is 32 MB and the `O(n⁻¹)` correction
359/// is still resolvable; beyond that the first-order LR reference is already
360/// calibrated and the quadratic cost buys nothing.
361pub const LAWLEY_PAIR_MATRIX_MAX_ROWS: usize = 2048;
362
363/// Lawley's second-order mean shift `Δε = ε_k − ε_{k−q}` of the LR statistic
364/// for the null "the coefficients in `tested` are zero" inside the `k`-column
365/// model `x`: `E[W] = q + ε_k − ε_{k−q} + O(n⁻²)` (Lawley 1956; module docs).
366///
367/// * `ε_k` is [`lawley_epsilon`] on the full design (with `penalty` folded in).
368/// * `ε_{k−q}` is the same on the nuisance design — the tested columns removed
369///   and the matching rows/columns of `penalty` dropped. When the tested block
370///   is the whole design the null model is fully specified and `ε_0 = 0`.
371///
372/// The per-row cumulants are expectations, so both ε terms use the same
373/// `kappas`. Lawley evaluates them at the null fit; supplying cumulants at the
374/// full fit instead perturbs ε — itself `O(n⁻¹)` — by the `O(n⁻¹ᐟ²)` null-true
375/// parameter drift, an `O(n⁻³ᐟ²)` error inside the `O(n⁻²)` Bartlett target.
376///
377/// The Bartlett factor against a `d`-df reference is `c = E[W]/d = 1 + Δε/d`
378/// ([`lawley_lr_bartlett_factor`]).
379pub fn lawley_lr_mean_shift(
380    x: ArrayView2<'_, f64>,
381    kappas: &[RowKappas],
382    penalty: Option<ArrayView2<'_, f64>>,
383    tested: std::ops::Range<usize>,
384) -> Result<f64, String> {
385    let n = x.nrows();
386    let k = x.ncols();
387    if tested.start >= tested.end || tested.end > k {
388        return Err(format!(
389            "lawley_lr_mean_shift: tested block {}..{} out of range for {k} columns",
390            tested.start, tested.end
391        ));
392    }
393    // Full-model ε (validates x/kappas/penalty shapes).
394    let eps_full = lawley_epsilon(x, kappas, penalty)?;
395    let nuisance: Vec<usize> = (0..k).filter(|c| !tested.contains(c)).collect();
396    if nuisance.is_empty() {
397        // Fully specified null: the nuisance model has no parameters, ε_0 = 0.
398        return Ok(eps_full);
399    }
400    let m = nuisance.len();
401    let mut x_null = Array2::<f64>::zeros((n, m));
402    for (col_null, &col_full) in nuisance.iter().enumerate() {
403        for i in 0..n {
404            x_null[[i, col_null]] = x[[i, col_full]];
405        }
406    }
407    let penalty_null = penalty.map(|s_pen| {
408        let mut out = Array2::<f64>::zeros((m, m));
409        for (r_null, &r_full) in nuisance.iter().enumerate() {
410            for (c_null, &c_full) in nuisance.iter().enumerate() {
411                out[[r_null, c_null]] = s_pen[[r_full, c_full]];
412            }
413        }
414        out
415    });
416    let eps_null = lawley_epsilon(
417        x_null.view(),
418        kappas,
419        penalty_null.as_ref().map(|s_pen| s_pen.view()),
420    )?;
421    Ok(eps_full - eps_null)
422}
423
424/// The Lawley LR Bartlett factor `c = E[W]/d = 1 + (ε_k − ε_{k−q})/d` for the
425/// null "`tested` is zero", referenced against `χ²_d` with `d = ref_df` (the
426/// consumer's LR reference degrees of freedom; `Δε` already carries any penalty
427/// through the information, as scoped in the module docs).
428pub fn lawley_lr_bartlett_factor(
429    x: ArrayView2<'_, f64>,
430    kappas: &[RowKappas],
431    penalty: Option<ArrayView2<'_, f64>>,
432    tested: std::ops::Range<usize>,
433    ref_df: f64,
434) -> Result<f64, String> {
435    if !(ref_df.is_finite() && ref_df > 0.0) {
436        return Err(format!(
437            "lawley_lr_bartlett_factor: reference df must be finite and positive; got {ref_df}"
438        ));
439    }
440    let shift = lawley_lr_mean_shift(x, kappas, penalty, tested)?;
441    let mean_w = ref_df + shift;
442    let factor = crate::inference::higher_order::bartlett_factor_from_mean(mean_w, ref_df)
443        .ok_or_else(|| {
444            format!(
445                "lawley_lr_bartlett_factor: degenerate mean {mean_w} (Δε = {shift}, d = {ref_df})"
446            )
447        })?;
448    if !(factor.is_finite() && factor > 0.0) {
449        return Err(format!(
450            "lawley_lr_bartlett_factor: degenerate factor {factor} (Δε = {shift}, d = {ref_df})"
451        ));
452    }
453    Ok(factor)
454}
455
456/// A single smoothing-parameter direction for the ρ̂-variation correction: the
457/// component penalty matrix `S_b` (already at its fitted scale `e^{ρ_b} S_b^unit`)
458/// that the smoothing parameter `λ_b = e^{ρ_b}` multiplies inside the total
459/// penalty `S_λ = Σ_b S_b`. Differentiating the conditional mean shift in
460/// `ρ_b = log λ_b` scales *this* block (`S_b → e^{t} S_b`) while holding the
461/// others fixed; that is exactly `∂S_λ/∂ρ_b = S_b` at the fitted point.
462#[derive(Debug, Clone)]
463pub struct RhoPenaltyComponent {
464    /// The `k × k` component penalty `S_b` at its fitted scale (the term's
465    /// contribution to `S_λ`, i.e. `λ_b` times its unit penalty).
466    pub s_component: Array2<f64>,
467}
468
469/// Step in `ρ = log λ` for the deterministic central-difference curvature of the
470/// conditional Lawley mean shift `Δε(ρ)`. `Δε` is a smooth *deterministic*
471/// function of ρ (ρ enters only through `S_λ = Σ_b e^{ρ_b} S_b` inside the SPD
472/// information `J`), so a central difference of the known scalar is exact to its
473/// truncation order — no statistical approximation is introduced (only numerical
474/// differentiation of a closed form, verified against the analytic Gaussian-zero
475/// anchor and a Richardson refinement in tests). `0.05` resolves the `O(n⁻¹)`
476/// shift's curvature comfortably while keeping the `O(h²)` truncation far below
477/// the `O(n⁻²)` Bartlett target.
478const RHO_VARIATION_STEP: f64 = 0.05;
479
480/// The ρ̂-sampling-variation contribution to the penalized-null Bartlett mean
481/// shift (#939 deliverable 2, the genuinely-new penalized theory piece).
482///
483/// The conditional (fixed-λ) Lawley shift `Δε(ρ)` from [`lawley_lr_mean_shift`]
484/// is `E[W | λ]` — the LR mean with the smoothing parameter *held at its fitted
485/// value*. When λ is **estimated**, ρ̂ = log λ̂ carries its own sampling
486/// variation, and `E[W]` picks up the extra second-order term of a delta-method
487/// expansion of `W(ρ̂)` about the population ρ₀:
488///
489/// ```text
490/// E[W(ρ̂)] = Δε(ρ₀) + ½ Σ_{b,b'} (∂²Δε/∂ρ_b ∂ρ_{b'}) · Cov(ρ̂_b, ρ̂_{b'}) + O(·).
491/// ```
492///
493/// Both ingredients are exact and already in the engine:
494///
495/// * `∂²Δε/∂ρ_b ∂ρ_{b'}` — the curvature of the *deterministic* conditional
496///   shift in the log-smoothing parameters. ρ enters `Δε` only through
497///   `S_λ = Σ_b e^{ρ_b} S_b` folded into the SPD information `J`, so scaling each
498///   `components[b].s_component` by `e^{±h}` and central-differencing the known
499///   scalar `Δε` recovers the Hessian exactly (to `O(h²)`, well inside the
500///   `O(n⁻²)` Bartlett target). Cross terms use the symmetric 4-point stencil.
501/// * `Cov(ρ̂)` — the inverse REML/LAML **outer Hessian** (the #740 quantity the
502///   solver already maintains), passed as `rho_cov` (`m × m` for `m`
503///   smoothing parameters). This is the sampling covariance of ρ̂.
504///
505/// Returns the **total** mean shift `Δε(ρ̂) = Δε_conditional + ½·tr(HᵨᵨCov)`. The
506/// caller forms the Bartlett factor `c = 1 + Δε(ρ̂)/d` exactly as for the
507/// conditional shift; the difference from the conditional factor is the size
508/// correction attributable specifically to ρ̂-variation.
509///
510/// `components` must have one entry per row/column of `rho_cov`; `penalty` is the
511/// total fitted `S_λ` (the conditional anchor). Errors on shape mismatch or a
512/// non-finite curvature.
513pub fn lawley_lr_mean_shift_with_rho_variation(
514    x: ArrayView2<'_, f64>,
515    kappas: &[RowKappas],
516    penalty: ArrayView2<'_, f64>,
517    tested: std::ops::Range<usize>,
518    components: &[RhoPenaltyComponent],
519    rho_cov: ArrayView2<'_, f64>,
520) -> Result<f64, String> {
521    let k = x.ncols();
522    let m = components.len();
523    if m == 0 {
524        return Err(
525            "lawley_lr_mean_shift_with_rho_variation: no smoothing-parameter components"
526                .to_string(),
527        );
528    }
529    if rho_cov.nrows() != m || rho_cov.ncols() != m {
530        return Err(format!(
531            "lawley_lr_mean_shift_with_rho_variation: rho_cov is {}×{}, expected {m}×{m}",
532            rho_cov.nrows(),
533            rho_cov.ncols()
534        ));
535    }
536    for b in 0..m {
537        for c in 0..m {
538            let v_bc = rho_cov[[b, c]];
539            if !v_bc.is_finite() {
540                return Err(format!(
541                    "lawley_lr_mean_shift_with_rho_variation: rho_cov[{b},{c}] is not finite"
542                ));
543            }
544            let v_cb = rho_cov[[c, b]];
545            let tol = 1e-10 * (1.0 + v_bc.abs().max(v_cb.abs()));
546            if (v_bc - v_cb).abs() > tol {
547                return Err(format!(
548                    "lawley_lr_mean_shift_with_rho_variation: rho_cov must be symmetric; \
549                     entries [{b},{c}]={v_bc} and [{c},{b}]={v_cb} differ"
550                ));
551            }
552        }
553    }
554    if penalty.nrows() != k || penalty.ncols() != k {
555        return Err(format!(
556            "lawley_lr_mean_shift_with_rho_variation: penalty is {}×{}, expected {k}×{k}",
557            penalty.nrows(),
558            penalty.ncols()
559        ));
560    }
561    for (b, comp) in components.iter().enumerate() {
562        if comp.s_component.nrows() != k || comp.s_component.ncols() != k {
563            return Err(format!(
564                "lawley_lr_mean_shift_with_rho_variation: component {b} is {}×{}, expected {k}×{k}",
565                comp.s_component.nrows(),
566                comp.s_component.ncols()
567            ));
568        }
569    }
570
571    // Conditional shift Δε(ρ̂): the existing fixed-λ Lawley mean shift at the
572    // fitted total penalty.
573    let conditional = lawley_lr_mean_shift(x, kappas, Some(penalty), tested.clone())?;
574
575    // Δε at a perturbed log-smoothing vector: total penalty
576    // S_λ(t) = Σ_b e^{t_b} S_b. With S_λ = Σ_b S_b at the fitted point (t = 0),
577    // a shift t_b replaces the b-th block S_b by (e^{t_b} − 1)·S_b added on top.
578    let shift_at = |steps: &[(usize, f64)]| -> Result<f64, String> {
579        let mut s = penalty.to_owned();
580        for &(b, t) in steps {
581            // ∂S_λ/∂ρ_b = S_b, so e^{t}·S_b replaces S_b: add (e^{t} − 1)·S_b.
582            let scale = t.exp() - 1.0;
583            s.scaled_add(scale, &components[b].s_component);
584        }
585        lawley_lr_mean_shift(x, kappas, Some(s.view()), tested.clone())
586    };
587
588    let h = RHO_VARIATION_STEP;
589    // Hessian of Δε in ρ by symmetric finite differences of the deterministic
590    // scalar. Diagonal: standard 3-point second derivative; off-diagonal: the
591    // 4-point mixed-partial stencil.
592    let mut quad = 0.0; // ½ Σ_{b,b'} H_{bb'} Cov_{bb'}
593    let base = conditional;
594    for b in 0..m {
595        let fp = shift_at(&[(b, h)])?;
596        let fm = shift_at(&[(b, -h)])?;
597        let hbb = (fp - 2.0 * base + fm) / (h * h);
598        if !hbb.is_finite() {
599            return Err(format!(
600                "lawley_lr_mean_shift_with_rho_variation: non-finite curvature H[{b},{b}]"
601            ));
602        }
603        quad += 0.5 * hbb * rho_cov[[b, b]];
604        for c in (b + 1)..m {
605            let fpp = shift_at(&[(b, h), (c, h)])?;
606            let fpm = shift_at(&[(b, h), (c, -h)])?;
607            let fmp = shift_at(&[(b, -h), (c, h)])?;
608            let fmm = shift_at(&[(b, -h), (c, -h)])?;
609            let hbc = (fpp - fpm - fmp + fmm) / (4.0 * h * h);
610            if !hbc.is_finite() {
611                return Err(format!(
612                    "lawley_lr_mean_shift_with_rho_variation: non-finite curvature H[{b},{c}]"
613                ));
614            }
615            // Symmetric covariance: H_{bc}=H_{cb}, Cov_{bc}=Cov_{cb} ⇒ two equal
616            // off-diagonal contributions, i.e. ½·2·H_{bc}Cov_{bc} = H_{bc}Cov_{bc}.
617            quad += hbc * rho_cov[[b, c]];
618        }
619    }
620
621    let total = conditional + quad;
622    if !total.is_finite() {
623        return Err(format!(
624            "lawley_lr_mean_shift_with_rho_variation: non-finite total shift \
625             (conditional={conditional}, rho-variation={quad})"
626        ));
627    }
628    Ok(total)
629}
630
631/// The Lawley LR Bartlett factor `c = E[W]/d = 1 + Δε(ρ̂)/d` for an **estimated**
632/// smoothing parameter — the ρ̂-variation analogue of [`lawley_lr_bartlett_factor`].
633///
634/// [`lawley_lr_bartlett_factor`] folds the penalty in at a *fixed* `λ`, giving the
635/// conditional factor `1 + Δε(ρ₀)/d`. When `λ` is estimated, ρ̂ = log λ̂ carries
636/// its own sampling variation and `E[W]` picks up the extra delta-method term
637/// (#939 deliverable 2, the genuinely-new penalized piece);
638/// [`lawley_lr_mean_shift_with_rho_variation`] assembles the resulting **total**
639/// mean shift `Δε(ρ̂)` from the conditional shift's ρ-curvature and the inverse
640/// REML outer Hessian `Cov(ρ̂)` (the #740 quantity). This function is the
641/// single-call factor entry point a live consumer wires symmetrically with the
642/// fixed-λ factor: it performs the same `c = E[W]/d` reduction and the same
643/// degeneracy guards, so the call site never re-derives `1 + Δε/d` (and never
644/// re-implements the positivity / finiteness checks) by hand.
645///
646/// Returns the factor against a `d = ref_df` reference; `penalty` is the total
647/// fitted `S_λ`, `components`/`rho_cov` are the per-smoothing-parameter penalty
648/// blocks and their sampling covariance, exactly as
649/// [`lawley_lr_mean_shift_with_rho_variation`] takes them. Errors on a degenerate
650/// reference df, a non-finite/degenerate mean, or any error from the underlying
651/// shift assembly.
652pub fn lawley_lr_bartlett_factor_with_rho_variation(
653    x: ArrayView2<'_, f64>,
654    kappas: &[RowKappas],
655    penalty: ArrayView2<'_, f64>,
656    tested: std::ops::Range<usize>,
657    components: &[RhoPenaltyComponent],
658    rho_cov: ArrayView2<'_, f64>,
659    ref_df: f64,
660) -> Result<f64, String> {
661    if !(ref_df.is_finite() && ref_df > 0.0) {
662        return Err(format!(
663            "lawley_lr_bartlett_factor_with_rho_variation: reference df must be finite and positive; got {ref_df}"
664        ));
665    }
666    let shift =
667        lawley_lr_mean_shift_with_rho_variation(x, kappas, penalty, tested, components, rho_cov)?;
668    let mean_w = ref_df + shift;
669    let factor = crate::inference::higher_order::bartlett_factor_from_mean(mean_w, ref_df)
670        .ok_or_else(|| {
671            format!(
672                "lawley_lr_bartlett_factor_with_rho_variation: degenerate mean {mean_w} \
673                 (Δε(ρ̂) = {shift}, d = {ref_df})"
674            )
675        })?;
676    if !(factor.is_finite() && factor > 0.0) {
677        return Err(format!(
678            "lawley_lr_bartlett_factor_with_rho_variation: degenerate factor {factor} \
679             (Δε(ρ̂) = {shift}, d = {ref_df})"
680        ));
681    }
682    Ok(factor)
683}
684
685/// Expected jets for a known-scale GLM family/link pair at linear predictor
686/// `eta`, when the pair has an exact closed-form jet constructor. Returns
687/// `None` for pairs whose cumulant jets are not derived yet — the consumer
688/// then reports first-order inference only (#939).
689pub fn known_scale_expected_jets(
690    family: &gam_spec::LikelihoodSpec,
691    eta: f64,
692) -> Option<RowExpectedJets> {
693    known_scale_expected_jets_with_dispersion(family, eta, 1.0)
694}
695
696/// Expected jets for a GLM family/link pair at linear predictor `eta` with an
697/// explicit dispersion `φ` (Gaussian σ², Gamma φ = 1/shape; 1 for the
698/// scale-free Poisson/Binomial). Returns `None` for pairs whose cumulant jets
699/// are not derived yet — the consumer then reports first-order inference only
700/// (#939). This is the dispersion-carrying sibling of
701/// [`known_scale_expected_jets`]; the per-term LR Bartlett path (#1063) needs it
702/// for the estimated-scale Gaussian/Gamma families whose ε depends on φ.
703pub fn known_scale_expected_jets_with_dispersion(
704    family: &gam_spec::LikelihoodSpec,
705    eta: f64,
706    dispersion: f64,
707) -> Option<RowExpectedJets> {
708    use gam_spec::{InverseLink, ResponseFamily, StandardLink};
709    match (&family.response, &family.link) {
710        (ResponseFamily::Poisson, InverseLink::Standard(StandardLink::Log)) => {
711            Some(RowExpectedJets::poisson_log(eta))
712        }
713        (ResponseFamily::Binomial, InverseLink::Standard(StandardLink::Logit)) => {
714            Some(RowExpectedJets::binomial_logit(eta))
715        }
716        (ResponseFamily::Gaussian, InverseLink::Standard(StandardLink::Identity)) => {
717            (dispersion.is_finite() && dispersion > 0.0)
718                .then(|| RowExpectedJets::gaussian_identity(dispersion))
719        }
720        (ResponseFamily::Gamma, InverseLink::Standard(StandardLink::Log)) => {
721            (dispersion.is_finite() && dispersion > 0.0)
722                .then(|| RowExpectedJets::gamma_log(eta, dispersion))
723        }
724        _ => None,
725    }
726}
727
728#[cfg(test)]
729mod tests {
730    use super::*;
731    use ndarray::Array2;
732
733    /// Raw six-index Lawley sum (eq. 4 of the module docs) on dense arrays:
734    /// the independent oracle for the row-pair reduction. `κ^{rs}` is the
735    /// inverse of `[κ_rs]`, i.e. MINUS the inverse information.
736    fn lawley_epsilon_index_oracle(
737        x: &Array2<f64>,
738        kappas: &[RowKappas],
739        penalty: Option<&Array2<f64>>,
740    ) -> f64 {
741        let n = x.nrows();
742        let k = x.ncols();
743        // κ_rs (with penalty subtracted: penalty adds −S to κ_rs).
744        let mut kappa2 = Array2::<f64>::zeros((k, k));
745        for i in 0..n {
746            for r in 0..k {
747                for s in 0..k {
748                    kappa2[[r, s]] += kappas[i].k2 * x[[i, r]] * x[[i, s]];
749                }
750            }
751        }
752        if let Some(s_pen) = penalty {
753            kappa2 -= s_pen;
754        }
755        // κ^{rs}: dense inverse via faer.
756        let j_view = FaerArrayView::new(&kappa2);
757        let factor = factorize_symmetricwith_fallback(j_view.as_ref(), faer::Side::Lower)
758            .expect("oracle κ_rs factorization");
759        let kappa_up = FactorizedSystem::solvemulti(&factor, &Array2::<f64>::eye(k))
760            .expect("oracle κ_rs inverse");
761
762        // Dense symmetric 3- and 4-index arrays as closures over row sums.
763        let arr3 = |weights: &dyn Fn(usize) -> f64, r: usize, s: usize, t: usize| -> f64 {
764            (0..n)
765                .map(|i| weights(i) * x[[i, r]] * x[[i, s]] * x[[i, t]])
766                .sum()
767        };
768        let arr4 =
769            |weights: &dyn Fn(usize) -> f64, r: usize, s: usize, t: usize, u: usize| -> f64 {
770                (0..n)
771                    .map(|i| weights(i) * x[[i, r]] * x[[i, s]] * x[[i, t]] * x[[i, u]])
772                    .sum()
773            };
774        let w_k3 = |i: usize| kappas[i].k3;
775        let w_k21 = |i: usize| kappas[i].k2_1;
776        let w_k4 = |i: usize| kappas[i].k4;
777        let w_k31 = |i: usize| kappas[i].k3_1;
778        let w_k211 = |i: usize| kappas[i].k2_11;
779
780        let mut lambda4 = 0.0;
781        for r in 0..k {
782            for s in 0..k {
783                for t in 0..k {
784                    for u in 0..k {
785                        let braces = arr4(&w_k4, r, s, t, u) / 4.0 - arr4(&w_k31, r, s, t, u)
786                            + arr4(&w_k211, r, t, s, u);
787                        lambda4 += kappa_up[[r, s]] * kappa_up[[t, u]] * braces;
788                    }
789                }
790            }
791        }
792        let mut lambda6 = 0.0;
793        for r in 0..k {
794            for s in 0..k {
795                for t in 0..k {
796                    for u in 0..k {
797                        for v in 0..k {
798                            for w in 0..k {
799                                let braces = arr3(&w_k3, r, t, v)
800                                    * (arr3(&w_k3, s, u, w) / 6.0 - arr3(&w_k21, s, w, u))
801                                    + arr3(&w_k3, r, t, u)
802                                        * (arr3(&w_k3, s, v, w) / 4.0 - arr3(&w_k21, s, w, v))
803                                    + arr3(&w_k21, r, t, v) * arr3(&w_k21, s, w, u)
804                                    + arr3(&w_k21, r, t, u) * arr3(&w_k21, s, w, v);
805                                lambda6 +=
806                                    kappa_up[[r, s]] * kappa_up[[t, u]] * kappa_up[[v, w]] * braces;
807                            }
808                        }
809                    }
810                }
811            }
812        }
813        lambda4 - lambda6
814    }
815
816    fn intercept_design(n: usize) -> Array2<f64> {
817        Array2::<f64>::ones((n, 1))
818    }
819
820    /// ψ(n) at integer n, exactly: ψ(n) = −γ + Σ_{j=1}^{n−1} 1/j.
821    fn digamma_integer(n: usize) -> f64 {
822        const EULER_GAMMA: f64 = 0.577_215_664_901_532_9;
823        -EULER_GAMMA + (1..n).map(|j| 1.0 / j as f64).sum::<f64>()
824    }
825
826    #[test]
827    fn exponential_intercept_matches_exact_digamma_expansion() {
828        // y_i ~ Exponential(mean μ), log link, intercept-only. Exact null mean:
829        // E[W] = 2n(log n − ψ(n)) = 1 + 1/(6n) + O(n⁻³) (ȳ ~ Gamma(n, μ/n)).
830        let eta = 0.4;
831        let mut residual_prev = f64::INFINITY;
832        for &n in &[8usize, 16, 32] {
833            let jets = RowExpectedJets::gamma_log(eta, 1.0);
834            let kappas = vec![jets.kappas().expect("exponential kappas"); n];
835            let x = intercept_design(n);
836            let eps = lawley_epsilon(x.view(), &kappas, None).expect("ε");
837            let analytic = 1.0 / (6.0 * n as f64);
838            assert!(
839                (eps - analytic).abs() < 1e-12,
840                "n={n}: ε={eps} vs analytic 1/(6n)={analytic}"
841            );
842            let exact_mean = 2.0 * n as f64 * ((n as f64).ln() - digamma_integer(n));
843            let residual = (exact_mean - 1.0 - eps).abs();
844            assert!(
845                residual < 0.6 / (n * n) as f64,
846                "n={n}: |E[W] − 1 − ε| = {residual} is not O(n⁻²)"
847            );
848            assert!(
849                residual < residual_prev,
850                "n={n}: residual {residual} did not shrink from {residual_prev}"
851            );
852            residual_prev = residual;
853        }
854    }
855
856    /// DELIVERABLE (2) — penalty deterministic-shift term is consumed. The
857    /// Lawley ε folds `S_λ` into the information `J = X'WX + S_λ`
858    /// ([`lawley_epsilon`]); adding the penalty therefore moves ε on any family
859    /// whose `ε ≠ 0`. This regression proves the penalty arm is live (not
860    /// dropped) and that a larger λ shrinks |ε| monotonically — the penalty
861    /// stiffens the information `J = X'WX + S_λ`, which moves the finite-sample
862    /// LR mean shift — exactly the penalized-null behavior #939 deliverable (2)
863    /// requires.
864    ///
865    /// (The ρ̂-variation term — the extra deterministic shift from λ̂ being
866    /// *estimated* rather than fixed — is NOT in this conditional-on-λ̂ factor;
867    /// it is the genuinely-new theory piece the issue flags, validated via the
868    /// null-bootstrap arm. This test pins the penalty deterministic-shift only.)
869    #[test]
870    fn penalty_shift_term_is_consumed() {
871        // Poisson/log, a 2-column design (intercept + a centered covariate) so
872        // the penalty on the second column actually couples into ε.
873        let n = 40usize;
874        let eta = 0.2_f64;
875        let jets = RowExpectedJets::poisson_log(eta);
876        let kappas = vec![jets.kappas().expect("poisson kappas"); n];
877        let mut x = Array2::<f64>::ones((n, 2));
878        for i in 0..n {
879            // Centered covariate in the second column.
880            x[[i, 1]] = (i as f64) / (n as f64) - 0.5;
881        }
882        let eps_unpen = lawley_epsilon(x.view(), &kappas, None).expect("ε unpenalized");
883
884        // A ridge penalty on the second (smooth-like) column only. ε must depend
885        // on λ — if S were dropped, every λ would give the unpenalized value.
886        let mut distinct = std::collections::BTreeSet::new();
887        for &lambda in &[0.5_f64, 2.0, 8.0, 32.0] {
888            let mut s = Array2::<f64>::zeros((2, 2));
889            s[[1, 1]] = lambda;
890            let eps_pen = lawley_epsilon(x.view(), &kappas, Some(s.view())).expect("ε penalized");
891            // The penalty MUST change ε (proves S is consumed, deliverable 2).
892            assert!(
893                (eps_pen - eps_unpen).abs() > 1e-9,
894                "λ={lambda}: penalty did not move ε ({eps_pen} vs {eps_unpen}) — S is being dropped"
895            );
896            // As λ → ∞ the penalized column is frozen out; ε must stay finite.
897            assert!(
898                eps_pen.is_finite(),
899                "λ={lambda}: ε must be finite, got {eps_pen}"
900            );
901            distinct.insert((eps_pen * 1e9) as i64);
902        }
903        // Different λ give genuinely different ε (S enters the information, not a
904        // no-op): at least three of the four ridge strengths are distinct.
905        assert!(
906            distinct.len() >= 3,
907            "ε must vary with λ; got {} distinct values",
908            distinct.len()
909        );
910    }
911
912    #[test]
913    fn poisson_intercept_matches_exact_pmf_mean() {
914        // y_i ~ Poisson(λ), log link, intercept-only: ε = 1/(6nλ) classically;
915        // E[W] computed exactly by pmf summation over S = Σy ~ Poisson(nλ).
916        let lambda: f64 = 1.7;
917        for &n in &[20usize, 40] {
918            let jets = RowExpectedJets::poisson_log(lambda.ln());
919            let kappas = vec![jets.kappas().expect("poisson kappas"); n];
920            let x = intercept_design(n);
921            let eps = lawley_epsilon(x.view(), &kappas, None).expect("ε");
922            let analytic = 1.0 / (6.0 * n as f64 * lambda);
923            assert!(
924                (eps - analytic).abs() < 1e-12,
925                "n={n}: ε={eps} vs analytic 1/(6nλ)={analytic}"
926            );
927            let total_rate = n as f64 * lambda;
928            let mut pmf = (-total_rate).exp();
929            let mut exact_mean = 0.0;
930            let s_max = (total_rate + 60.0 * total_rate.sqrt()).ceil() as usize;
931            for s in 0..=s_max {
932                if s > 0 {
933                    pmf *= total_rate / s as f64;
934                }
935                let s_f = s as f64;
936                let w = if s == 0 {
937                    2.0 * total_rate
938                } else {
939                    2.0 * (total_rate - s_f + s_f * (s_f / total_rate).ln())
940                };
941                exact_mean += pmf * w;
942            }
943            let residual = (exact_mean - 1.0 - eps).abs();
944            assert!(
945                residual < 0.7 / (n * n) as f64,
946                "n={n}: |E[W] − 1 − ε| = {residual} is not O(n⁻²)"
947            );
948        }
949    }
950
951    /// CERTIFICATION FIXTURE 1 (#939, Gaussian known variance): every third and
952    /// fourth expected cumulant and every η-derivative of the curvature vanish
953    /// (ℓᵢ = −½(yᵢ−ηᵢ)²/φ is exactly quadratic), so ε_k = ε_{k−q} = 0 and the
954    /// Lawley LR Bartlett factor is exactly 1 at every n. The χ²_q reference is
955    /// exactly calibrated only in the unpenalized quadratic model; with a penalty
956    /// the statistic is a weighted χ² form, e.g. one-parameter ridge gives
957    /// `(n / (n + λ))χ²₁`.
958    #[test]
959    fn gaussian_known_variance_lr_factor_is_exactly_one() {
960        let n = 20;
961        let k = 3;
962        let mut x = Array2::<f64>::zeros((n, k));
963        for i in 0..n {
964            let z = i as f64 / n as f64;
965            x[[i, 0]] = 1.0;
966            x[[i, 1]] = (5.0 * z).sin();
967            x[[i, 2]] = z - 0.5;
968        }
969        let kappas = vec![
970            RowExpectedJets::gaussian_identity(1.7)
971                .kappas()
972                .expect("gaussian kappas");
973            n
974        ];
975        let s_pen = Array2::<f64>::eye(k) * 0.4;
976        for q in [1usize, 2] {
977            let shift = lawley_lr_mean_shift(x.view(), &kappas, Some(s_pen.view()), k - q..k)
978                .expect("shift");
979            assert!(
980                shift.abs() < 1e-13,
981                "Gaussian known-variance Δε must be 0; got {shift}"
982            );
983            let c = lawley_lr_bartlett_factor(
984                x.view(),
985                &kappas,
986                Some(s_pen.view()),
987                k - q..k,
988                q as f64,
989            )
990            .expect("factor");
991            assert!(
992                (c - 1.0).abs() < 1e-13,
993                "Gaussian known-variance Bartlett factor must be exactly 1; got {c}"
994            );
995        }
996    }
997
998    /// CERTIFICATION FIXTURE 2 (#939, Exponential rate, scalar MLE): Lawley's
999    /// analytic second-order Bartlett factor is `c = 1 + 1/(6n)`.
1000    ///
1001    /// Derivation. yᵢ ~ Exp(rate θ), ℓ(θ) = n·log θ − θ·Σyᵢ, θ̂ = 1/ȳ. The LR
1002    /// statistic for H₀: θ = θ₀ at the truth is
1003    ///   W = 2[ℓ(θ̂) − ℓ(θ₀)] = 2n(θ₀ȳ − 1 − log(θ₀ȳ)).
1004    /// With T = θ₀·Σyᵢ ~ Gamma(n, 1): E[T] = n, E[log T] = ψ(n), so
1005    ///   E[W] = 2(E[T] − n) + 2n(log n − E[log T]) = 2n(log n − ψ(n))   (exact).
1006    /// The digamma expansion ψ(n) = log n − 1/(2n) − 1/(12n²) + O(n⁻⁴) gives
1007    ///   E[W] = 1 + 1/(6n) + O(n⁻³).
1008    /// The tested block is the whole (intercept-only) design — a fully
1009    /// specified null, ε₀ = 0 — and the factor must match both the analytic
1010    /// 1 + 1/(6n) (to machine precision) and the exact digamma mean through the
1011    /// expected higher-order remainder.
1012    #[test]
1013    fn exponential_rate_lr_factor_is_one_plus_one_sixth_n() {
1014        let eta = -0.7; // ε is reparametrization-invariant; any η works.
1015        for &n in &[8usize, 16, 32] {
1016            let jets = RowExpectedJets::gamma_log(eta, 1.0);
1017            let kappas = vec![jets.kappas().expect("exponential kappas"); n];
1018            let x = intercept_design(n);
1019            let c = lawley_lr_bartlett_factor(x.view(), &kappas, None, 0..1, 1.0).expect("factor");
1020            let analytic = 1.0 + 1.0 / (6.0 * n as f64);
1021            assert!(
1022                (c - analytic).abs() < 1e-12,
1023                "n={n}: factor {c} vs analytic 1 + 1/(6n) = {analytic}"
1024            );
1025            let exact_mean = 2.0 * n as f64 * ((n as f64).ln() - digamma_integer(n));
1026            assert!(
1027                (exact_mean - c).abs() < 0.6 / (n * n) as f64,
1028                "n={n}: |E[W] − c| = {} is not O(n⁻²)",
1029                (exact_mean - c).abs()
1030            );
1031        }
1032    }
1033
1034    /// CERTIFICATION FIXTURE 3 (#939, Bernoulli/logit, scalar MLE): analytic
1035    /// Lawley shift `ε = (1 − u)/(6nu)`, `u = μ(1−μ)`, certified against the
1036    /// exact binomial pmf mean of W.
1037    ///
1038    /// Hand derivation of ε from the row-pair form (module docs), intercept-only
1039    /// design (h_i = E_ij = 1/(nu)), canonical link (κ₃ = κ₂′, κ₄ = κ₃′), with
1040    /// u = μ(1−μ), u′ = u(1−2μ), u″ = u(1−2μ)² − 2u² = u(1−6u) (since
1041    /// (1−2μ)² = 1−4u):
1042    ///   κ₂ = −u, κ₂′ = κ₃ = −u′, κ₂″ = κ₃′ = κ₄ = −u″
1043    ///   a_i  = κ₄/4 − κ₃′ + κ₂″ = −u″/4
1044    ///   λ₄   = n·(−u″/4)·(1/(nu))² = −u″/(4nu²)
1045    ///   b6   = κ₃²/6 − κ₃κ₂′ + κ₂′² = u′²/6,   b4 = u′²/4
1046    ///   λ₆   = −n²·(1/(nu))³·(u′²/6 + u′²/4) = −5u′²/(12nu³)
1047    ///   ε    = λ₄ − λ₆ = [−3(1−6u) + 5(1−4u)]/(12nu) = (1 − u)/(6nu).
1048    /// Cross-check: Poisson has u = u′ = u″ = μ ⇒ ε = (5/12 − 1/4)/(nμ) =
1049    /// 1/(6nμ), the classical value asserted in the Poisson fixture above.
1050    /// Exact certification: S = Σyᵢ ~ Binomial(n, μ₀), W(S) = 2[S·log(S/(nμ₀))
1051    /// + (n−S)·log((n−S)/(n(1−μ₀)))] (0·log 0 = 0); |E[W] − 1 − ε| must be
1052    /// O(n⁻²) (numerically ≈ 1.5/n² at μ₀ = 0.3) and shrink with n.
1053    #[test]
1054    fn bernoulli_logit_intercept_factor_matches_exact_pmf_mean() {
1055        let mu: f64 = 0.3;
1056        let u = mu * (1.0 - mu);
1057        let eta = (mu / (1.0 - mu)).ln();
1058        let mut residual_prev = f64::INFINITY;
1059        for &n in &[24usize, 48, 96] {
1060            let jets = RowExpectedJets::binomial_logit(eta);
1061            let kappas = vec![jets.kappas().expect("bernoulli kappas"); n];
1062            let x = intercept_design(n);
1063            let shift = lawley_lr_mean_shift(x.view(), &kappas, None, 0..1).expect("Δε");
1064            let analytic = (1.0 - u) / (6.0 * n as f64 * u);
1065            assert!(
1066                (shift - analytic).abs() < 1e-12,
1067                "n={n}: Δε = {shift} vs analytic (1−u)/(6nu) = {analytic}"
1068            );
1069            let c = lawley_lr_bartlett_factor(x.view(), &kappas, None, 0..1, 1.0).expect("factor");
1070            assert!(
1071                (c - (1.0 + analytic)).abs() < 1e-12,
1072                "n={n}: factor {c} vs 1 + ε = {}",
1073                1.0 + analytic
1074            );
1075            // Exact E[W] by binomial pmf summation.
1076            let nf = n as f64;
1077            let mut pmf = (1.0 - mu).powi(n as i32); // P(S = 0)
1078            let mut exact_mean = 0.0;
1079            for s in 0..=n {
1080                if s > 0 {
1081                    pmf *= mu / (1.0 - mu) * (n - s + 1) as f64 / s as f64;
1082                }
1083                let s_f = s as f64;
1084                let t1 = if s == 0 {
1085                    0.0
1086                } else {
1087                    s_f * (s_f / (nf * mu)).ln()
1088                };
1089                let t2 = if s == n {
1090                    0.0
1091                } else {
1092                    (nf - s_f) * ((nf - s_f) / (nf * (1.0 - mu))).ln()
1093                };
1094                exact_mean += pmf * 2.0 * (t1 + t2);
1095            }
1096            let residual = (exact_mean - 1.0 - shift).abs();
1097            assert!(
1098                residual < 2.5 / (n * n) as f64,
1099                "n={n}: |E[W] − 1 − ε| = {residual} is not O(n⁻²)"
1100            );
1101            assert!(
1102                residual < residual_prev,
1103                "n={n}: residual {residual} did not shrink from {residual_prev}"
1104            );
1105            residual_prev = residual;
1106        }
1107    }
1108
1109    /// The mean shift is exactly `ε_full − ε_nuisance` with the tested columns
1110    /// (and the matching penalty rows/columns) dropped from the nuisance model.
1111    #[test]
1112    fn mean_shift_is_full_minus_nuisance_epsilon() {
1113        let n = 19;
1114        let mut x = Array2::<f64>::zeros((n, 2));
1115        let mut kappas = Vec::with_capacity(n);
1116        for i in 0..n {
1117            let z = i as f64 / n as f64;
1118            x[[i, 0]] = 1.0;
1119            x[[i, 1]] = z - 0.5;
1120            let eta = 0.3 - 0.8 * (z - 0.5);
1121            kappas.push(
1122                RowExpectedJets::binomial_logit(eta)
1123                    .kappas()
1124                    .expect("binomial kappas"),
1125            );
1126        }
1127        let mut s_pen = Array2::<f64>::zeros((2, 2));
1128        s_pen[[1, 1]] = 0.6;
1129        let shift =
1130            lawley_lr_mean_shift(x.view(), &kappas, Some(s_pen.view()), 1..2).expect("shift");
1131        let eps_full = lawley_epsilon(x.view(), &kappas, Some(s_pen.view())).expect("ε_full");
1132        let x_null = x.slice(ndarray::s![.., 0..1]).to_owned();
1133        let s_null = s_pen.slice(ndarray::s![0..1, 0..1]).to_owned();
1134        let eps_null = lawley_epsilon(x_null.view(), &kappas, Some(s_null.view())).expect("ε_null");
1135        assert!(
1136            (shift - (eps_full - eps_null)).abs() < 1e-14,
1137            "Δε = {shift} must equal ε_full − ε_null = {}",
1138            eps_full - eps_null
1139        );
1140        // Weighted rows: doubling every weight must equal doubling n by row
1141        // duplication — certified through the linear scaling of the cumulants.
1142        let kappas_w: Vec<RowKappas> = kappas.iter().map(|r| r.weighted(2.0)).collect();
1143        let mut x2 = Array2::<f64>::zeros((2 * n, 2));
1144        let mut kappas2 = Vec::with_capacity(2 * n);
1145        for i in 0..n {
1146            for rep in 0..2 {
1147                let row = 2 * i + rep;
1148                x2[[row, 0]] = x[[i, 0]];
1149                x2[[row, 1]] = x[[i, 1]];
1150                kappas2.push(kappas[i]);
1151            }
1152        }
1153        let shift_w = lawley_lr_mean_shift(x.view(), &kappas_w, Some(s_pen.view()), 1..2)
1154            .expect("weighted shift");
1155        let shift_dup = lawley_lr_mean_shift(x2.view(), &kappas2, Some(s_pen.view()), 1..2)
1156            .expect("duplicated shift");
1157        assert!(
1158            (shift_w - shift_dup).abs() < 1e-12 * (1.0 + shift_dup.abs()),
1159            "weight-2 rows ({shift_w}) must equal duplicated rows ({shift_dup})"
1160        );
1161    }
1162
1163    #[test]
1164    fn row_pair_reduction_matches_index_oracle() {
1165        // Non-canonical rows (gamma/log at varying η) on a k=3 design: the
1166        // production row-pair form must equal the raw six-index Lawley sum.
1167        let n = 17;
1168        let k = 3;
1169        let mut x = Array2::<f64>::zeros((n, k));
1170        let mut kappas = Vec::with_capacity(n);
1171        for i in 0..n {
1172            let z = i as f64 / n as f64;
1173            x[[i, 0]] = 1.0;
1174            x[[i, 1]] = (7.3 * z).sin();
1175            x[[i, 2]] = z * z - 0.4;
1176            let eta = 0.2 + 0.5 * x[[i, 1]] - 0.3 * x[[i, 2]];
1177            kappas.push(
1178                RowExpectedJets::gamma_log(eta, 1.3)
1179                    .kappas()
1180                    .expect("gamma kappas"),
1181            );
1182        }
1183        let fast = lawley_epsilon(x.view(), &kappas, None).expect("hat form");
1184        let oracle = lawley_epsilon_index_oracle(&x, &kappas, None);
1185        assert!(
1186            (fast - oracle).abs() < 1e-10 * (1.0 + oracle.abs()),
1187            "row-pair ε={fast} vs index-form ε={oracle}"
1188        );
1189
1190        // And with a penalty folded into the information (penalized Lawley).
1191        let mut s_pen = Array2::<f64>::eye(k);
1192        s_pen[[0, 0]] = 0.0; // unpenalized intercept
1193        s_pen *= 0.8;
1194        let fast_pen = lawley_epsilon(x.view(), &kappas, Some(s_pen.view())).expect("hat form");
1195        let oracle_pen = lawley_epsilon_index_oracle(&x, &kappas, Some(&s_pen));
1196        assert!(
1197            (fast_pen - oracle_pen).abs() < 1e-10 * (1.0 + oracle_pen.abs()),
1198            "penalized row-pair ε={fast_pen} vs index-form ε={oracle_pen}"
1199        );
1200        assert!(
1201            (fast_pen - fast).abs() > 1e-6,
1202            "penalty must move ε (got {fast} → {fast_pen})"
1203        );
1204    }
1205
1206    #[test]
1207    fn canonical_links_collapse_the_mixed_arrays() {
1208        // Canonical links have c′ = c″ = 0, so κ₃ = κ₂' and κ₄ = κ₃' — the
1209        // derivative ("joint") arrays coincide with the pure ones and the
1210        // Bartlett ingredients reduce to classical canonical-GLM form.
1211        for eta in [-1.3, 0.0, 0.7] {
1212            for jets in [
1213                RowExpectedJets::poisson_log(eta),
1214                RowExpectedJets::binomial_logit(eta),
1215            ] {
1216                let kappas = jets.kappas().expect("canonical kappas");
1217                assert!(
1218                    (kappas.k3 - kappas.k2_1).abs() < 1e-13 * (1.0 + kappas.k3.abs()),
1219                    "canonical link must satisfy κ₃ = κ₂' (η={eta}): {kappas:?}"
1220                );
1221                assert!(
1222                    (kappas.k4 - kappas.k3_1).abs() < 1e-13 * (1.0 + kappas.k4.abs()),
1223                    "canonical link must satisfy κ₄ = κ₃' (η={eta}): {kappas:?}"
1224                );
1225            }
1226        }
1227    }
1228
1229    #[test]
1230    fn gaussian_identity_needs_no_correction_even_penalized() {
1231        // Gaussian/identity: all third/fourth-order and derivative arrays
1232        // vanish, so ε = 0 exactly — including with a penalty (the LR of a
1233        // penalized-quadratic model is exactly pivotal at fixed λ).
1234        let n = 12;
1235        let jets = RowExpectedJets::gaussian_identity(2.3);
1236        let kappas = vec![jets.kappas().expect("gaussian kappas"); n];
1237        let mut x = Array2::<f64>::ones((n, 2));
1238        for i in 0..n {
1239            x[[i, 1]] = i as f64 - 5.0;
1240        }
1241        let s_pen = Array2::<f64>::eye(2) * 0.5;
1242        let eps = lawley_epsilon(x.view(), &kappas, Some(s_pen.view())).expect("ε");
1243        assert!(
1244            eps.abs() < 1e-14,
1245            "Gaussian-identity ε must be 0; got {eps}"
1246        );
1247    }
1248
1249    /// DELIVERABLE (2), ρ̂-variation anchor — Gaussian known variance: `Δε ≡ 0`
1250    /// at every λ, so its ρ-curvature is identically zero and the ρ̂-variation
1251    /// correction is exactly 0 regardless of `Cov(ρ̂)`. The total shift must
1252    /// equal the conditional shift (both zero).
1253    #[test]
1254    fn rho_variation_correction_is_zero_for_gaussian() {
1255        let n = 16usize;
1256        let jets = RowExpectedJets::gaussian_identity(1.3);
1257        let kappas = vec![jets.kappas().expect("gaussian kappas"); n];
1258        let mut x = Array2::<f64>::ones((n, 2));
1259        for i in 0..n {
1260            x[[i, 1]] = i as f64 / n as f64 - 0.5;
1261        }
1262        // One smoothing parameter on the second column (a ridge of strength λ = 2).
1263        let mut s_comp = Array2::<f64>::zeros((2, 2));
1264        s_comp[[1, 1]] = 2.0;
1265        let penalty = s_comp.clone();
1266        let components = vec![RhoPenaltyComponent {
1267            s_component: s_comp,
1268        }];
1269        // A large ρ-variance: with zero curvature it still contributes nothing.
1270        let rho_cov = Array2::from_shape_vec((1, 1), vec![5.0]).unwrap();
1271        let total = lawley_lr_mean_shift_with_rho_variation(
1272            x.view(),
1273            &kappas,
1274            penalty.view(),
1275            1..2,
1276            &components,
1277            rho_cov.view(),
1278        )
1279        .expect("rho-variation shift");
1280        assert!(
1281            total.abs() < 1e-12,
1282            "Gaussian ρ̂-variation total shift must be 0; got {total}"
1283        );
1284    }
1285
1286    /// DELIVERABLE (2), ρ̂-variation core — the correction is exactly
1287    /// `½·Hᵨᵨ·Var(ρ̂)` for a single smoothing parameter, where `Hᵨᵨ = Δε''(ρ)`
1288    /// is the second ρ-derivative of the deterministic conditional shift. This
1289    /// test pins three things on a Poisson/log smooth (non-zero ε):
1290    ///   (i)   the total = conditional + ½ H Var, with H matching an INDEPENDENT
1291    ///         high-accuracy (Richardson-extrapolated) finite difference of
1292    ///         `lawley_lr_mean_shift` in ρ — proving the internal stencil is the
1293    ///         curvature it claims to be;
1294    ///   (ii)  zero ρ-variance recovers the conditional shift exactly (the
1295    ///         correction switches off as λ → fixed);
1296    ///   (iii) the correction is genuinely non-zero (the curvature and variance
1297    ///         are both non-zero), so the ρ̂-variation term is load-bearing.
1298    #[test]
1299    fn rho_variation_correction_matches_curvature_times_variance() {
1300        let n = 50usize;
1301        let mut x = Array2::<f64>::ones((n, 2));
1302        let mut kappas = Vec::with_capacity(n);
1303        for i in 0..n {
1304            let z = i as f64 / n as f64 - 0.5;
1305            x[[i, 1]] = z;
1306            let eta = 0.3 + 0.6 * z;
1307            kappas.push(
1308                RowExpectedJets::poisson_log(eta)
1309                    .kappas()
1310                    .expect("poisson kappas"),
1311            );
1312        }
1313        // One smoothing parameter (λ = 3) penalizing the second column.
1314        let lambda = 3.0_f64;
1315        let mut s_comp = Array2::<f64>::zeros((2, 2));
1316        s_comp[[1, 1]] = lambda;
1317        let penalty = s_comp.clone();
1318        let components = vec![RhoPenaltyComponent {
1319            s_component: s_comp.clone(),
1320        }];
1321        let tested = 1..2;
1322
1323        // Conditional (fixed-λ) shift.
1324        let conditional =
1325            lawley_lr_mean_shift(x.view(), &kappas, Some(penalty.view()), tested.clone())
1326                .expect("conditional shift");
1327
1328        // INDEPENDENT curvature Δε''(ρ): scale the WHOLE block by e^{t} (ρ = log λ
1329        // ⇒ S_λ(t) = e^{t}·λ·S_unit) and Richardson-combine two central
1330        // differences for an O(h⁴)-accurate second derivative.
1331        let de_at = |t: f64| {
1332            let mut s = Array2::<f64>::zeros((2, 2));
1333            s[[1, 1]] = lambda * t.exp();
1334            lawley_lr_mean_shift(x.view(), &kappas, Some(s.view()), tested.clone())
1335                .expect("perturbed shift")
1336        };
1337        let h = 0.05_f64;
1338        let d2_h = (de_at(h) - 2.0 * conditional + de_at(-h)) / (h * h);
1339        let d2_2h = (de_at(2.0 * h) - 2.0 * conditional + de_at(-2.0 * h)) / (4.0 * h * h);
1340        // Richardson: (4·D(h) − D(2h))/3 cancels the O(h²) term.
1341        let curvature = (4.0 * d2_h - d2_2h) / 3.0;
1342        assert!(
1343            curvature.abs() > 1e-9,
1344            "fixture must have non-zero ρ-curvature; got {curvature}"
1345        );
1346
1347        let var_rho = 0.8_f64; // Var(ρ̂) (an inverse-outer-Hessian magnitude)
1348        let rho_cov = Array2::from_shape_vec((1, 1), vec![var_rho]).unwrap();
1349        let total = lawley_lr_mean_shift_with_rho_variation(
1350            x.view(),
1351            &kappas,
1352            penalty.view(),
1353            tested.clone(),
1354            &components,
1355            rho_cov.view(),
1356        )
1357        .expect("rho-variation shift");
1358
1359        // (i) total = conditional + ½ H Var, H from the independent Richardson FD.
1360        let expected = conditional + 0.5 * curvature * var_rho;
1361        assert!(
1362            (total - expected).abs() < 1e-6 * (1.0 + expected.abs()),
1363            "ρ̂-variation total {total} must equal conditional + ½ H Var = {expected} \
1364             (conditional={conditional}, H={curvature}, Var={var_rho})"
1365        );
1366        // (iii) the ρ̂-variation correction is genuinely non-zero.
1367        assert!(
1368            (total - conditional).abs() > 1e-9,
1369            "ρ̂-variation correction must be non-zero (H={curvature}, Var={var_rho}); \
1370             total={total} conditional={conditional}"
1371        );
1372
1373        // (ii) zero variance recovers the conditional shift exactly.
1374        let zero_cov = Array2::from_shape_vec((1, 1), vec![0.0]).unwrap();
1375        let total_zero = lawley_lr_mean_shift_with_rho_variation(
1376            x.view(),
1377            &kappas,
1378            penalty.view(),
1379            tested.clone(),
1380            &components,
1381            zero_cov.view(),
1382        )
1383        .expect("zero-variance shift");
1384        assert!(
1385            (total_zero - conditional).abs() < 1e-12,
1386            "zero ρ-variance must recover the conditional shift: {total_zero} vs {conditional}"
1387        );
1388    }
1389
1390    /// DELIVERABLE (2) — the ρ̂-variation **factor** entry point
1391    /// [`lawley_lr_bartlett_factor_with_rho_variation`] is the single-call
1392    /// `c = 1 + Δε(ρ̂)/d` reduction a live consumer wires symmetrically with the
1393    /// fixed-λ [`lawley_lr_bartlett_factor`]. This pins:
1394    ///   (i)   it equals `1 + (total ρ̂-variation shift)/d`, i.e. it folds the
1395    ///         estimated-λ correction into the factor (not just the conditional
1396    ///         fixed-λ shift);
1397    ///   (ii)  on a Poisson/log smooth with a non-zero curvature and a positive
1398    ///         `Var(ρ̂)`, the estimated-λ factor differs from the fixed-λ factor —
1399    ///         the ρ̂-variation contribution is load-bearing in the factor;
1400    ///   (iii) Gaussian known-variance (Δε ≡ 0, zero ρ-curvature) gives factor 1
1401    ///         at any `Cov(ρ̂)` — the closed-form anchor;
1402    ///   (iv)  a degenerate reference df is rejected.
1403    #[test]
1404    fn rho_variation_factor_folds_estimated_lambda_into_c() {
1405        // Poisson/log smooth substrate with a non-zero ε and a single smoothing
1406        // parameter on the second column.
1407        let n = 50usize;
1408        let mut x = Array2::<f64>::ones((n, 2));
1409        let mut kappas = Vec::with_capacity(n);
1410        for i in 0..n {
1411            let z = i as f64 / n as f64 - 0.5;
1412            x[[i, 1]] = z;
1413            let eta = 0.3 + 0.6 * z;
1414            kappas.push(
1415                RowExpectedJets::poisson_log(eta)
1416                    .kappas()
1417                    .expect("poisson kappas"),
1418            );
1419        }
1420        let lambda = 3.0_f64;
1421        let mut s_comp = Array2::<f64>::zeros((2, 2));
1422        s_comp[[1, 1]] = lambda;
1423        let penalty = s_comp.clone();
1424        let components = vec![RhoPenaltyComponent {
1425            s_component: s_comp,
1426        }];
1427        let tested = 1..2;
1428        let ref_df = 1.0_f64;
1429        let var_rho = 0.8_f64;
1430        let rho_cov = Array2::from_shape_vec((1, 1), vec![var_rho]).unwrap();
1431
1432        // (i) factor = 1 + (total estimated-λ shift)/d.
1433        let total = lawley_lr_mean_shift_with_rho_variation(
1434            x.view(),
1435            &kappas,
1436            penalty.view(),
1437            tested.clone(),
1438            &components,
1439            rho_cov.view(),
1440        )
1441        .expect("total shift");
1442        let factor = lawley_lr_bartlett_factor_with_rho_variation(
1443            x.view(),
1444            &kappas,
1445            penalty.view(),
1446            tested.clone(),
1447            &components,
1448            rho_cov.view(),
1449            ref_df,
1450        )
1451        .expect("estimated-λ factor");
1452        assert!(
1453            (factor - (1.0 + total / ref_df)).abs() < 1e-12,
1454            "estimated-λ factor {factor} must equal 1 + Δε(ρ̂)/d = {}",
1455            1.0 + total / ref_df
1456        );
1457
1458        // (ii) it differs from the fixed-λ (conditional) factor — the
1459        // ρ̂-variation term is load-bearing in c.
1460        let conditional_factor = lawley_lr_bartlett_factor(
1461            x.view(),
1462            &kappas,
1463            Some(penalty.view()),
1464            tested.clone(),
1465            ref_df,
1466        )
1467        .expect("conditional factor");
1468        assert!(
1469            (factor - conditional_factor).abs() > 1e-9,
1470            "estimated-λ factor {factor} must differ from the fixed-λ factor \
1471             {conditional_factor} (ρ̂-variation is load-bearing)"
1472        );
1473
1474        // (iii) Gaussian anchor: Δε ≡ 0 ⇒ zero curvature ⇒ factor exactly 1 at any
1475        // Cov(ρ̂).
1476        let g_kappas = vec![
1477            RowExpectedJets::gaussian_identity(1.3)
1478                .kappas()
1479                .expect("gaussian kappas");
1480            n
1481        ];
1482        let big_cov = Array2::from_shape_vec((1, 1), vec![5.0]).unwrap();
1483        let g_factor = lawley_lr_bartlett_factor_with_rho_variation(
1484            x.view(),
1485            &g_kappas,
1486            penalty.view(),
1487            tested.clone(),
1488            &components,
1489            big_cov.view(),
1490            ref_df,
1491        )
1492        .expect("gaussian factor");
1493        assert!(
1494            (g_factor - 1.0).abs() < 1e-12,
1495            "Gaussian known-variance estimated-λ factor must be exactly 1; got {g_factor}"
1496        );
1497
1498        // (iv) degenerate reference df is rejected.
1499        assert!(
1500            lawley_lr_bartlett_factor_with_rho_variation(
1501                x.view(),
1502                &kappas,
1503                penalty.view(),
1504                tested.clone(),
1505                &components,
1506                rho_cov.view(),
1507                0.0,
1508            )
1509            .is_err()
1510        );
1511    }
1512
1513    /// DELIVERABLE (2), ρ̂-variation cross terms — for two smoothing parameters
1514    /// the correction is `½ Σ_{b,b'} H_{bb'} Cov_{bb'}`, and the off-diagonal
1515    /// (mixed-partial) stencil must match an independent product-of-steps FD.
1516    /// Pins that the symmetric cross contribution `H_{01}Cov_{01}` is included
1517    /// (a diagonal-only assembly would miss it).
1518    #[test]
1519    fn rho_variation_includes_symmetric_cross_terms() {
1520        let n = 40usize;
1521        let mut x = Array2::<f64>::ones((n, 3));
1522        let mut kappas = Vec::with_capacity(n);
1523        for i in 0..n {
1524            let z = i as f64 / n as f64 - 0.5;
1525            x[[i, 1]] = z;
1526            x[[i, 2]] = z * z - 0.1;
1527            let eta = 0.2 + 0.5 * z - 0.3 * x[[i, 2]];
1528            kappas.push(
1529                RowExpectedJets::binomial_logit(eta)
1530                    .kappas()
1531                    .expect("binomial kappas"),
1532            );
1533        }
1534        // Two smoothing parameters, one on column 1, one on column 2.
1535        let (l1, l2) = (2.0_f64, 4.0_f64);
1536        let mut s1 = Array2::<f64>::zeros((3, 3));
1537        s1[[1, 1]] = l1;
1538        let mut s2 = Array2::<f64>::zeros((3, 3));
1539        s2[[2, 2]] = l2;
1540        let penalty = &s1 + &s2;
1541        let components = vec![
1542            RhoPenaltyComponent {
1543                s_component: s1.clone(),
1544            },
1545            RhoPenaltyComponent {
1546                s_component: s2.clone(),
1547            },
1548        ];
1549        // Test the joint nuisance/smooth block {1,2} against the intercept.
1550        let tested = 1..3;
1551        let conditional =
1552            lawley_lr_mean_shift(x.view(), &kappas, Some(penalty.view()), tested.clone())
1553                .expect("conditional");
1554
1555        // Independent Hessian by FD on the deterministic shift as a function of
1556        // (t0, t1): block b scaled by e^{t_b}.
1557        let de = |t0: f64, t1: f64| {
1558            let mut s = Array2::<f64>::zeros((3, 3));
1559            s[[1, 1]] = l1 * t0.exp();
1560            s[[2, 2]] = l2 * t1.exp();
1561            lawley_lr_mean_shift(x.view(), &kappas, Some(s.view()), tested.clone())
1562                .expect("perturbed")
1563        };
1564        let h = 0.05_f64;
1565        let h00 = (de(h, 0.0) - 2.0 * conditional + de(-h, 0.0)) / (h * h);
1566        let h11 = (de(0.0, h) - 2.0 * conditional + de(0.0, -h)) / (h * h);
1567        let h01 = (de(h, h) - de(h, -h) - de(-h, h) + de(-h, -h)) / (4.0 * h * h);
1568
1569        // A full (non-diagonal) ρ-covariance.
1570        let rho_cov = Array2::from_shape_vec((2, 2), vec![0.7, 0.2, 0.2, 0.5]).unwrap();
1571        let total = lawley_lr_mean_shift_with_rho_variation(
1572            x.view(),
1573            &kappas,
1574            penalty.view(),
1575            tested.clone(),
1576            &components,
1577            rho_cov.view(),
1578        )
1579        .expect("rho-variation shift");
1580
1581        // Expected = conditional + ½(H00 V00 + H11 V11) + H01 V01 (symmetric cross).
1582        let expected = conditional
1583            + 0.5 * (h00 * rho_cov[[0, 0]] + h11 * rho_cov[[1, 1]])
1584            + h01 * rho_cov[[0, 1]];
1585        assert!(
1586            (total - expected).abs() < 1e-6 * (1.0 + expected.abs()),
1587            "two-parameter ρ̂-variation {total} must equal {expected} \
1588             (H00={h00}, H11={h11}, H01={h01})"
1589        );
1590        // The cross term is non-trivial — a diagonal-only assembly would differ.
1591        let diag_only = conditional + 0.5 * (h00 * rho_cov[[0, 0]] + h11 * rho_cov[[1, 1]]);
1592        assert!(
1593            (total - diag_only).abs() > 1e-9,
1594            "cross term H01·Cov01 must be included (off-diagonal non-zero): \
1595             total={total} diag_only={diag_only}"
1596        );
1597    }
1598
1599    /// Shape guards: component/cov dimension mismatches are rejected.
1600    #[test]
1601    fn rho_variation_rejects_shape_mismatch() {
1602        let n = 8usize;
1603        let jets = RowExpectedJets::poisson_log(0.1);
1604        let kappas = vec![jets.kappas().expect("kappas"); n];
1605        let mut x = Array2::<f64>::ones((n, 2));
1606        for i in 0..n {
1607            x[[i, 1]] = i as f64 - 4.0;
1608        }
1609        let mut s = Array2::<f64>::zeros((2, 2));
1610        s[[1, 1]] = 1.0;
1611        let components = vec![RhoPenaltyComponent {
1612            s_component: s.clone(),
1613        }];
1614        // rho_cov is 2×2 but there is only 1 component.
1615        let bad_cov = Array2::<f64>::eye(2);
1616        assert!(
1617            lawley_lr_mean_shift_with_rho_variation(
1618                x.view(),
1619                &kappas,
1620                s.view(),
1621                1..2,
1622                &components,
1623                bad_cov.view(),
1624            )
1625            .is_err()
1626        );
1627        // No components at all.
1628        let cov1 = Array2::from_shape_vec((1, 1), vec![1.0]).unwrap();
1629        assert!(
1630            lawley_lr_mean_shift_with_rho_variation(
1631                x.view(),
1632                &kappas,
1633                s.view(),
1634                1..2,
1635                &[],
1636                cov1.view(),
1637            )
1638            .is_err()
1639        );
1640        // Component with the wrong dimension.
1641        let wrong = vec![RhoPenaltyComponent {
1642            s_component: Array2::<f64>::eye(3),
1643        }];
1644        assert!(
1645            lawley_lr_mean_shift_with_rho_variation(
1646                x.view(),
1647                &kappas,
1648                s.view(),
1649                1..2,
1650                &wrong,
1651                cov1.view(),
1652            )
1653            .is_err()
1654        );
1655        // The #740 handoff is a covariance matrix; accepting a non-symmetric
1656        // matrix would silently use only the upper triangle and misstate the
1657        // ρ̂-variation contribution.
1658        let nonsymmetric_cov = Array2::from_shape_vec((1, 1), vec![1.0]).unwrap();
1659        assert!(
1660            lawley_lr_mean_shift_with_rho_variation(
1661                x.view(),
1662                &kappas,
1663                s.view(),
1664                1..2,
1665                &components,
1666                nonsymmetric_cov.view(),
1667            )
1668            .is_ok()
1669        );
1670        let components2 = vec![
1671            RhoPenaltyComponent {
1672                s_component: s.clone(),
1673            },
1674            RhoPenaltyComponent {
1675                s_component: s.clone(),
1676            },
1677        ];
1678        let bad_sym = Array2::from_shape_vec((2, 2), vec![1.0, 0.25, 0.20, 1.0]).unwrap();
1679        assert!(
1680            lawley_lr_mean_shift_with_rho_variation(
1681                x.view(),
1682                &kappas,
1683                s.view(),
1684                1..2,
1685                &components2,
1686                bad_sym.view(),
1687            )
1688            .is_err()
1689        );
1690    }
1691
1692    #[test]
1693    fn epsilon_is_invariant_under_linear_reparametrization() {
1694        // ε is a property of the model, not the basis: X → X·T for invertible
1695        // T must leave it unchanged (the penalty transforms congruently).
1696        let n = 15;
1697        let k = 3;
1698        let mut x = Array2::<f64>::zeros((n, k));
1699        let mut kappas = Vec::with_capacity(n);
1700        for i in 0..n {
1701            let z = i as f64 / n as f64;
1702            x[[i, 0]] = 1.0;
1703            x[[i, 1]] = (3.1 * z).cos();
1704            x[[i, 2]] = z - 0.5;
1705            let eta = -0.1 + 0.6 * x[[i, 1]] + 0.4 * x[[i, 2]];
1706            kappas.push(
1707                RowExpectedJets::binomial_logit(eta)
1708                    .kappas()
1709                    .expect("binomial kappas"),
1710            );
1711        }
1712        let t_mat = ndarray::arr2(&[[1.0, 0.3, -0.2], [0.0, 1.4, 0.5], [0.0, 0.0, 0.8]]);
1713        let xt = x.dot(&t_mat);
1714        let eps = lawley_epsilon(x.view(), &kappas, None).expect("ε");
1715        let eps_t = lawley_epsilon(xt.view(), &kappas, None).expect("ε reparam");
1716        assert!(
1717            (eps - eps_t).abs() < 1e-9 * (1.0 + eps.abs()),
1718            "ε not reparametrization-invariant: {eps} vs {eps_t}"
1719        );
1720    }
1721}
gam_terms/inference/lawley.rs

gam_terms/inference/
lawley.rs