Skip to main content

gam_inference/
model_comparison.rs

1//! Honest, calibrated model comparison computed from machinery already present
2//! at the fit optimum — exact smoothing-corrected conditional AIC and zero-refit
3//! ALO elpd with an influence diagnostic (issue #946).
4//!
5//! Every consumer (the topology race, the SAE fit payload, the `compare`
6//! entry point) reads the same two channels:
7//!
8//! * **Corrected conditional AIC.** The conditional AIC `−2·ℓ + 2·edf` treats
9//!   the smoothing parameters as known and is biased toward complexity exactly
10//!   where users rely on it (random-effect-vs-null, is-a-wiggle-real). The
11//!   Wood–Pya–Säfken (2016, JASA) correction replaces `edf = tr(F)` by
12//!   `τ = tr(F) + tr(X'WX · Σ_ρ)`, where `Σ_ρ` is the smoothing-parameter
13//!   uncertainty covariance in coefficient space. gam carries `Σ_ρ` *exactly*
14//!   (assembled from the IFT `dβ̂/dρ` and the exact outer Hessian at the fit
15//!   optimum, retained on the fit as [`UnifiedFitResult::smoothing_correction`]),
16//!   so the correction is the first exact instance of this estimator — not the
17//!   approximation mgcv must use, and not the omission most software ships.
18//!
19//! * **ALO elpd.** Pointwise log predictive densities evaluated at the
20//!   ALO-corrected leave-one-out predictions (no refits — the ALO solves reuse
21//!   the fit's factored Hessian). The summed elpd is exactly
22//!   `Σᵢ ℓ(yᵢ|η̃₋ᵢ)`. A Pareto tail fit of the cross-observation fitted-vs-ALO
23//!   ratio distribution is reported only as an influence diagnostic; it is not
24//!   draw-wise PSIS-LOO and does not alter the pointwise contributions.
25//!
26//! Both channels are *corroboration*: they ride alongside the evidence headline
27//! a race already produces, never replacing it.
28
29use gam_solve::estimate::UnifiedFitResult;
30use crate::alo::AloDiagnostics;
31use gam_solve::psis::pareto_smooth_weights;
32use gam_problem::types::{GlmLikelihoodSpec, LikelihoodSpec};
33use ndarray::{Array1, ArrayView1, ArrayView2};
34
35/// ALO predictive-accuracy summary at zero refit cost.
36#[derive(Debug, Clone)]
37pub struct AloElpd {
38    /// Expected log pointwise predictive density, `Σᵢ ℓ(yᵢ|η̃₋ᵢ)`.
39    pub elpd: f64,
40    /// Standard error of `elpd`, `√(n · Var(pointwise))`.
41    pub se: f64,
42    /// Per-observation ALO elpd contributions (length `n`).
43    pub pointwise: Array1<f64>,
44    /// GPD tail-shape `k̂` of the cross-observation fitted-vs-ALO ratio
45    /// distribution. This is an influence diagnostic, not a PSIS-LOO reliability
46    /// diagnostic.
47    pub k_hat_max: f64,
48    /// Number of tail observations flagged when the influence diagnostic exceeds
49    /// the `0.7` heavy-tail cutoff.
50    pub n_k_bad: usize,
51}
52
53/// Effective-degrees-of-freedom pair: the conditional `tr(F)` and the
54/// Wood–Pya–Säfken correction that accounts for smoothing-parameter
55/// uncertainty.
56#[derive(Debug, Clone, Copy)]
57pub struct CorrectedEdf {
58    /// `tr(F)` with `F = H⁻¹X'WX`, conditional on `λ̂`.
59    pub conditional: f64,
60    /// `τ = tr(F) + tr(X'WX · Σ_ρ)`, the exact WPS corrected EDF. Equals
61    /// [`Self::conditional`] when no smoothing correction is available (e.g.
62    /// `K = 0`, or the post-fit IFT solve was skipped).
63    pub corrected: f64,
64}
65
66impl CorrectedEdf {
67    /// The per-fit measurement the issue calls out: how much λ-uncertainty is
68    /// inflating the user's model-choice complexity penalty, `τ − tr(F)`.
69    pub fn rho_uncertainty_df(&self) -> f64 {
70        self.corrected - self.conditional
71    }
72}
73
74/// The full comparison payload reported alongside a fit's evidence headline.
75#[derive(Debug, Clone)]
76pub struct ModelComparison {
77    /// Log-likelihood at the converged mode (the engine's
78    /// constants-omitted value — see note on cross-fit comparability below).
79    pub log_lik: f64,
80    /// Conditional and WPS-corrected effective degrees of freedom.
81    pub edf: CorrectedEdf,
82    /// `−2·ℓ + 2·edf_conditional` (treats `λ̂` as known).
83    pub aic_conditional: f64,
84    /// `−2·ℓ + 2·edf_corrected` (Wood–Pya–Säfken).
85    pub aic_corrected: f64,
86    /// Zero-refit ALO predictive comparison, when ALO diagnostics and the per-row
87    /// family kernel are available.
88    pub loo: Option<AloElpd>,
89}
90
91/// Exact Wood–Pya–Säfken corrected effective degrees of freedom.
92///
93/// `edf_conditional = tr(F)` with `F = H⁻¹X'WX` (the engine's `edf_total`).
94/// The correction term is `tr(X'WX · Σ_ρ)` where `Σ_ρ` is the H⁻¹-scale
95/// smoothing-parameter uncertainty covariance. The engine stores the genuine
96/// symmetric-PSD weighted Gram `X'WX = H − S(λ)` directly on the fit
97/// ([`UnifiedFitResult::weighted_gram`], issue #1027) — pairing it with
98/// `Σ_ρ = smoothing_correction / φ` makes the correction the nonnegative
99/// `tr(A½ B A½)` it is defined to be, instead of the indefinite `H·F`
100/// reconstruction (where the stored `H` need not satisfy `H·F = X'WX`) that
101/// drove the corrected EDF below the conditional EDF.
102///
103/// Returns `edf_conditional` unchanged when any exact input is absent —
104/// the conditional value is the honest fallback, never an approximation of
105/// the correction.
106pub fn corrected_edf(
107    edf_conditional: f64,
108    weighted_gram: Option<ArrayView2<'_, f64>>,
109    smoothing_correction: Option<ArrayView2<'_, f64>>,
110    phi: f64,
111) -> CorrectedEdf {
112    let correction = wps_correction_term(weighted_gram, smoothing_correction, phi);
113    CorrectedEdf {
114        conditional: edf_conditional,
115        corrected: edf_conditional + correction,
116    }
117}
118
119/// `tr(X'WX · Σ_ρ)` with `Σ_ρ = smoothing_correction / φ` and `X'WX` the
120/// stored PSD weighted Gram. Returns `0.0` when any input is missing,
121/// non-square, dimension-mismatched, or non-finite. Nonnegative by
122/// construction (both factors are symmetric PSD).
123fn wps_correction_term(
124    weighted_gram: Option<ArrayView2<'_, f64>>,
125    smoothing_correction: Option<ArrayView2<'_, f64>>,
126    phi: f64,
127) -> f64 {
128    let (Some(xwx), Some(corr)) = (weighted_gram, smoothing_correction) else {
129        return 0.0;
130    };
131    let k = xwx.nrows();
132    if k == 0
133        || xwx.ncols() != k
134        || corr.nrows() != k
135        || corr.ncols() != k
136        || !(phi.is_finite() && phi > 0.0)
137    {
138        return 0.0;
139    }
140    // tr(X'WX · corr/φ) = (1/φ) Σ_{ij} X'WX_{ij} corr_{ji}; both symmetric, so
141    // this is the nonnegative tr(A^½ B A^½).
142    let mut trace = 0.0;
143    for i in 0..k {
144        for j in 0..k {
145            trace += xwx[[i, j]] * corr[[j, i]];
146        }
147    }
148    trace /= phi;
149    if trace.is_finite() { trace } else { 0.0 }
150}
151
152/// ALO elpd from ALO-corrected leave-one-out predictions.
153///
154/// `loglik_fitted` and `loglik_loo` are the per-observation log predictive
155/// densities at the *fitted* (`η̂`) and *ALO leave-one-out* (`η̃₋ᵢ`) linear
156/// predictors respectively. The returned elpd is the honest ALO estimand
157/// `Σᵢ loglik_loo[i]`; each pointwise contribution is exactly `loglik_loo[i]`.
158///
159/// The raw fitted-vs-ALO ratio for observation `i` is
160/// `r_i = exp(ℓ(yᵢ|η̂ᵢ) − ℓ(yᵢ|η̃₋ᵢ))` — large where dropping `i` would have
161/// moved the fit a lot. We fit a GPD tail to this cross-observation ratio vector
162/// only to report an influence diagnostic: `k_hat_max` is the fitted tail shape
163/// and `n_k_bad` is the tail count when `k̂ > 0.7`. This is not draw-wise
164/// PSIS-LOO: there is no posterior-draw dimension, the Pareto fit is across
165/// observations, and the diagnostic never changes elpd.
166///
167/// Returns `None` when the inputs are degenerate (non-finite, mismatched
168/// lengths, or empty). If the influence tail fit is unavailable, `k_hat_max` is
169/// `NaN` and `n_k_bad` is zero.
170pub fn alo_elpd(
171    loglik_fitted: ArrayView1<'_, f64>,
172    loglik_loo: ArrayView1<'_, f64>,
173) -> Option<AloElpd> {
174    let n = loglik_loo.len();
175    if n == 0 || loglik_fitted.len() != n {
176        return None;
177    }
178    if loglik_fitted
179        .iter()
180        .chain(loglik_loo.iter())
181        .any(|v| !v.is_finite())
182    {
183        return None;
184    }
185    // Cross-observation influence ratios r_i = p(y_i|η̂_i) / p(y_i|η̃₋ᵢ).
186    // Stabilize by subtracting the max log-ratio before exponentiating; the
187    // multiplicative constant does not change the fitted GPD shape.
188    let log_ratio: Array1<f64> = &loglik_fitted.to_owned() - &loglik_loo.to_owned();
189    let max_lr = log_ratio.iter().copied().fold(f64::NEG_INFINITY, f64::max);
190    if !max_lr.is_finite() {
191        return None;
192    }
193    let raw: Vec<f64> = log_ratio.iter().map(|&lr| (lr - max_lr).exp()).collect();
194
195    let (k_hat_max, n_k_bad);
196    match pareto_smooth_weights(&raw) {
197        Some(psis) => {
198            k_hat_max = psis.k_hat;
199            n_k_bad = if psis.k_hat > 0.7 { psis.tail_count } else { 0 };
200        }
201        None => {
202            k_hat_max = f64::NAN;
203            n_k_bad = 0;
204        }
205    }
206
207    let pointwise = loglik_loo.to_owned();
208    let elpd: f64 = pointwise.iter().sum();
209    let mean = elpd / n as f64;
210    let var = pointwise
211        .iter()
212        .map(|&p| (p - mean) * (p - mean))
213        .sum::<f64>()
214        / n as f64;
215    let se = (n as f64 * var).sqrt();
216    Some(AloElpd {
217        elpd,
218        se,
219        pointwise,
220        k_hat_max,
221        n_k_bad,
222    })
223}
224
225/// Result of comparing two fits on the same response: the paired predictive
226/// difference with its standard error plus the
227/// corrected-AIC gap. Both differences are oriented `a − b`: positive `delta_elpd`
228/// favours `a`, negative `delta_aic_corrected` favours `a`.
229#[derive(Debug, Clone)]
230pub struct ComparisonReport {
231    /// `Σᵢ (elpd_aᵢ − elpd_bᵢ)`; positive favours `a`.
232    pub delta_elpd: f64,
233    /// SE of `delta_elpd` from the pointwise paired differences,
234    /// `√(n · Var(elpd_aᵢ − elpd_bᵢ))`.
235    pub delta_elpd_se: f64,
236    /// `AIC_corrected(a) − AIC_corrected(b)`; negative favours `a`.
237    pub delta_aic_corrected: f64,
238    /// `false` when the two fits have a different number of observations and the
239    /// paired predictive difference could not be formed; `delta_elpd` is then
240    /// `NaN` and only the AIC gap is meaningful.
241    pub rows_aligned: bool,
242}
243
244/// Paired comparison of two fits. The predictive difference is paired
245/// row-by-row, so the two fits must have been computed on
246/// the same response in the same order; we refuse the paired difference when the
247/// observation counts disagree and surface only the AIC gap.
248pub fn compare(a: &ModelComparison, b: &ModelComparison) -> ComparisonReport {
249    let delta_aic_corrected = a.aic_corrected - b.aic_corrected;
250    match (&a.loo, &b.loo) {
251        (Some(la), Some(lb))
252            if la.pointwise.len() == lb.pointwise.len() && !la.pointwise.is_empty() =>
253        {
254            let n = la.pointwise.len();
255            let diff: Array1<f64> = &la.pointwise - &lb.pointwise;
256            let delta_elpd: f64 = diff.iter().sum();
257            let mean = delta_elpd / n as f64;
258            let var = diff.iter().map(|&d| (d - mean) * (d - mean)).sum::<f64>() / n as f64;
259            ComparisonReport {
260                delta_elpd,
261                delta_elpd_se: (n as f64 * var).sqrt(),
262                delta_aic_corrected,
263                rows_aligned: true,
264            }
265        }
266        _ => ComparisonReport {
267            delta_elpd: f64::NAN,
268            delta_elpd_se: f64::NAN,
269            delta_aic_corrected,
270            rows_aligned: false,
271        },
272    }
273}
274
275/// Assemble the comparison payload for a fitted GLM/GAM from the fit result plus
276/// optional ALO diagnostics.
277///
278/// The corrected-AIC channel is always populated (it needs only fit-retained
279/// fields). The ALO elpd channel is populated when `alo` is supplied and the
280/// fit carries an engine-level family: the leave-one-out linear predictors are
281/// the ALO `eta_tilde`, mapped through the family inverse link to means and
282/// scored by the per-row family log-likelihood kernel.
283///
284/// `eta_hat` is the *fitted* linear predictor (including offset) and `y` the
285/// response, both length `n`.
286pub fn model_comparison_from_unified(
287    fit: &UnifiedFitResult,
288    y: ArrayView1<'_, f64>,
289    eta_hat: ArrayView1<'_, f64>,
290    prior_weights: ArrayView1<'_, f64>,
291    alo: Option<&AloDiagnostics>,
292) -> ModelComparison {
293    let phi = fit.dispersion_phi();
294    let edf_conditional = fit.edf_total().unwrap_or(f64::NAN);
295    let edf = corrected_edf(
296        edf_conditional,
297        fit.weighted_gram().map(|g| g.view()),
298        fit.smoothing_correction().map(|c| c.view()),
299        phi,
300    );
301
302    // The user-facing `log_likelihood` (and the AIC / elpd derived from it) must
303    // be the *fully normalized, scale-aware* absolute log-likelihood — not the
304    // REML building block stored on the fit, which deliberately drops every
305    // family- and saturated-likelihood normalizing constant and the Gaussian
306    // scale (#1581/#1582/#1583). Recompute it here at the fitted means with the
307    // profiled Gaussian scale concretized into σ̂². For custom / GAMLSS fits with
308    // no engine-level family there is no per-row kernel to call, so we fall back
309    // to the stored value (those paths supply their own normalized log-lik).
310    let log_lik = fit
311        .likelihood_family
312        .as_ref()
313        .and_then(|spec| {
314            let scale = reporting_scale(spec, &fit.likelihood_scale, phi);
315            full_loglikelihood_at_eta(y, eta_hat, prior_weights, spec, scale)
316        })
317        .unwrap_or(fit.log_likelihood);
318
319    // An estimated / profiled dispersion is a fitted parameter and adds one
320    // degree of freedom to the conditional AIC — mgcv's `2·(edf + 1)` for a
321    // scale-estimated family (#1583). Fixed-scale families (Poisson, Binomial,
322    // user-fixed φ/θ) add none.
323    let scale_dof = fit
324        .likelihood_family
325        .as_ref()
326        .map(|spec| scale_parameter_count(spec, &fit.likelihood_scale))
327        .unwrap_or(0.0);
328
329    let aic_conditional = -2.0 * log_lik + 2.0 * (edf.conditional + scale_dof);
330    let aic_corrected = -2.0 * log_lik + 2.0 * (edf.corrected + scale_dof);
331
332    let loo = alo.and_then(|alo| {
333        let spec = fit.likelihood_family.clone()?;
334        let scale = reporting_scale(&spec, &fit.likelihood_scale, phi);
335        alo_elpd_from_family(
336            y,
337            eta_hat,
338            alo.eta_tilde.view(),
339            prior_weights,
340            &spec,
341            scale,
342        )
343    });
344
345    ModelComparison {
346        log_lik,
347        edf,
348        aic_conditional,
349        aic_corrected,
350        loo,
351    }
352}
353
354/// ALO elpd for an engine-level family: map the fitted and ALO leave-one-out
355/// linear predictors through the family inverse link, score both with the
356/// per-row log-likelihood kernel, and compute the ALO elpd plus influence
357/// diagnostic.
358pub fn alo_elpd_from_family(
359    y: ArrayView1<'_, f64>,
360    eta_hat: ArrayView1<'_, f64>,
361    eta_loo: ArrayView1<'_, f64>,
362    prior_weights: ArrayView1<'_, f64>,
363    spec: &LikelihoodSpec,
364    scale: gam_problem::types::LikelihoodScaleMetadata,
365) -> Option<AloElpd> {
366    use gam_models::family_runtime::{FamilyStrategy, strategy_for_spec};
367    use gam_solve::pirls::pointwise_loglikelihood;
368
369    let n = y.len();
370    if eta_hat.len() != n || eta_loo.len() != n || prior_weights.len() != n || n == 0 {
371        return None;
372    }
373    let strategy = strategy_for_spec(spec);
374    let mu_hat = strategy.inverse_link_array(eta_hat).ok()?;
375    let mu_loo = strategy.inverse_link_array(eta_loo).ok()?;
376    let glm = GlmLikelihoodSpec {
377        spec: spec.clone(),
378        scale,
379    };
380    // The PSIS-LOO `elpd` reported to the user is an *absolute* log predictive
381    // density, so it must use the fully normalized, scale-aware kernel (the
382    // profiled Gaussian scale is concretized by the caller). The dropped
383    // constants are identical for the fitted and LOO evaluations of a row (they
384    // depend only on yᵢ and the scale, not on μ), so the PSIS importance ratios
385    // r_i = exp(ℓ̂_i − ℓ_loo,i) — and hence k̂ — are unchanged; only the absolute
386    // elpd is corrected (#1581/#1582/#1583).
387    let ll_hat = pointwise_loglikelihood(y, &mu_hat, &glm, prior_weights);
388    let ll_loo = pointwise_loglikelihood(y, &mu_loo, &glm, prior_weights);
389    alo_elpd(ll_hat.view(), ll_loo.view())
390}
391
392/// Total fully-normalized log-likelihood at the fitted linear predictor
393/// `eta_hat`: map through the family inverse link and sum the per-row reporting
394/// kernel. `None` when the inverse link fails, the lengths disagree, or the
395/// result is non-finite (so callers fall back to the stored value).
396fn full_loglikelihood_at_eta(
397    y: ArrayView1<'_, f64>,
398    eta_hat: ArrayView1<'_, f64>,
399    prior_weights: ArrayView1<'_, f64>,
400    spec: &LikelihoodSpec,
401    scale: gam_problem::types::LikelihoodScaleMetadata,
402) -> Option<f64> {
403    use gam_models::family_runtime::{FamilyStrategy, strategy_for_spec};
404    use gam_solve::pirls::calculate_loglikelihood;
405
406    let n = y.len();
407    if eta_hat.len() != n || prior_weights.len() != n || n == 0 {
408        return None;
409    }
410    let mu_hat = strategy_for_spec(spec).inverse_link_array(eta_hat).ok()?;
411    let glm = GlmLikelihoodSpec {
412        spec: spec.clone(),
413        scale,
414    };
415    let ll = calculate_loglikelihood(y, &mu_hat, &glm, prior_weights);
416    ll.is_finite().then_some(ll)
417}
418
419/// Concretize the response-scale metadata for the *reporting* log-likelihood.
420///
421/// The profiled Gaussian carries no fixed scale (`ProfiledGaussian`), so its
422/// predictive density would silently collapse to the unit-variance form. Here we
423/// resolve the estimated residual variance `σ̂² = phi` into a concrete
424/// `FixedDispersion`, so the reporting kernel scores the density on the right
425/// measure and obeys the change-of-variables law (#1583). An explicitly fixed φ
426/// is honored as-is; every other family already carries the parameters its
427/// density needs (Beta φ, NB θ, Gamma shape, Tweedie φ), so its scale is
428/// returned unchanged.
429fn reporting_scale(
430    spec: &LikelihoodSpec,
431    scale: &gam_problem::types::LikelihoodScaleMetadata,
432    phi: f64,
433) -> gam_problem::types::LikelihoodScaleMetadata {
434    use gam_problem::types::{LikelihoodScaleMetadata, ResponseFamily};
435    match spec.response {
436        ResponseFamily::Gaussian => match scale.fixed_phi() {
437            Some(p) if p.is_finite() && p > 0.0 => LikelihoodScaleMetadata::FixedDispersion { phi: p },
438            _ if phi.is_finite() && phi > 0.0 => LikelihoodScaleMetadata::FixedDispersion { phi },
439            _ => scale.clone(),
440        },
441        _ => scale.clone(),
442    }
443}
444
445/// Number of estimated dispersion / scale parameters a family contributes to the
446/// conditional-AIC degrees of freedom (`2·(edf + scale_dof)`, #1583).
447///
448/// Gaussian profiles σ̂² (one extra dof) unless φ was user-fixed; Gamma / Beta /
449/// Tweedie / Negative-Binomial add one only when their dispersion is *estimated*
450/// from data; Poisson and Binomial carry φ ≡ 1 and add none.
451fn scale_parameter_count(
452    spec: &LikelihoodSpec,
453    scale: &gam_problem::types::LikelihoodScaleMetadata,
454) -> f64 {
455    use gam_problem::types::{LikelihoodScaleMetadata, ResponseFamily};
456    let estimated = match spec.response {
457        ResponseFamily::Gaussian => {
458            !matches!(scale, LikelihoodScaleMetadata::FixedDispersion { .. })
459        }
460        ResponseFamily::Gamma => {
461            matches!(scale, LikelihoodScaleMetadata::EstimatedGammaShape { .. })
462        }
463        ResponseFamily::Beta { .. } => {
464            matches!(scale, LikelihoodScaleMetadata::EstimatedBetaPhi { .. })
465        }
466        ResponseFamily::Tweedie { .. } => {
467            matches!(scale, LikelihoodScaleMetadata::EstimatedTweediePhi { .. })
468        }
469        ResponseFamily::NegativeBinomial { .. } => {
470            matches!(scale, LikelihoodScaleMetadata::EstimatedNegBinTheta { .. })
471        }
472        ResponseFamily::Poisson
473        | ResponseFamily::Binomial
474        | ResponseFamily::RoystonParmar => false,
475    };
476    if estimated { 1.0 } else { 0.0 }
477}
478
479#[cfg(test)]
480mod tests {
481    use super::*;
482    use ndarray::{Array2, array};
483
484    #[test]
485    fn wps_correction_is_trace_of_h_f_sigma_over_phi() {
486        // X'WX = I, φ = 2 → correction is tr(X'WX·corr)/φ = tr(corr)/φ.
487        let xwx = Array2::<f64>::eye(3);
488        let corr = array![[2.0, 0.0, 0.0], [0.0, 4.0, 0.0], [0.0, 0.0, 6.0]];
489        let edf = corrected_edf(3.0, Some(xwx.view()), Some(corr.view()), 2.0);
490        // tr(corr)/φ = (2+4+6)/2 = 6, so corrected = 3 + 6 = 9, ρ-df = 6.
491        assert!((edf.corrected - 9.0).abs() < 1e-12);
492        assert!((edf.rho_uncertainty_df() - 6.0).abs() < 1e-12);
493        assert!((edf.conditional - 3.0).abs() < 1e-12);
494    }
495
496    #[test]
497    fn corrected_edf_falls_back_to_conditional_without_inputs() {
498        let edf = corrected_edf(5.5, None, None, 1.0);
499        assert_eq!(edf.conditional, 5.5);
500        assert_eq!(edf.corrected, 5.5);
501        assert_eq!(edf.rho_uncertainty_df(), 0.0);
502    }
503
504    #[test]
505    fn alo_elpd_sums_pointwise_and_flags_no_tail() {
506        // Identical fitted and LOO log-densities → all importance ratios 1,
507        // elpd = Σ ℓ₋ᵢ.
508        let ll: Array1<f64> = array![-1.0, -2.0, -0.5, -1.5, -0.8, -1.2, -0.9, -1.1, -0.7, -1.3];
509        let loo = alo_elpd(ll.view(), ll.view()).expect("alo elpd");
510        let expected: f64 = ll.iter().sum();
511        assert!((loo.elpd - expected).abs() < 1e-9);
512        assert_eq!(loo.pointwise.len(), ll.len());
513        // No spread in importance ratios → k̂ finite, no bad points expected.
514        assert_eq!(loo.n_k_bad, 0);
515    }
516
517    #[test]
518    fn alo_elpd_pointwise_is_local_to_alo_loglikelihoods() {
519        let ll_loo: Array1<f64> = array![
520            -1.0, -1.1, -1.2, -1.3, -1.4, -1.5, -1.6, -1.7, -1.8, -1.9, -2.0, -2.1
521        ];
522        let ll_hat = ll_loo.clone();
523        let mut ll_hat_perturbed = ll_loo.clone();
524        ll_hat_perturbed[7] += 10.0;
525
526        let base = alo_elpd(ll_hat.view(), ll_loo.view()).expect("alo elpd");
527        let perturbed = alo_elpd(ll_hat_perturbed.view(), ll_loo.view()).expect("alo elpd");
528
529        for i in 0..ll_loo.len() {
530            assert_eq!(base.pointwise[i], ll_loo[i]);
531            assert_eq!(perturbed.pointwise[i], ll_loo[i]);
532            if i != 7 {
533                assert_eq!(base.pointwise[i], perturbed.pointwise[i]);
534            }
535        }
536        assert_eq!(perturbed.elpd, base.elpd);
537    }
538
539    fn gpd_sample(u: f64, k: f64, sigma: f64) -> f64 {
540        sigma * ((1.0 - u).powf(-k) - 1.0) / k
541    }
542
543    #[test]
544    fn alo_elpd_influence_diagnostic_fires_on_heavy_tailed_ratios() {
545        let mut ratios = vec![1.0; 200];
546        for i in 1..=120 {
547            let u = (i as f64 - 0.5) / 120.0;
548            ratios.push(1.0 + gpd_sample(u, 1.2, 0.5));
549        }
550        let ll_loo: Array1<f64> = Array1::from_elem(ratios.len(), -1.0);
551        let ll_hat: Array1<f64> = Array1::from_iter(
552            ll_loo
553                .iter()
554                .zip(ratios.iter())
555                .map(|(&ll, &ratio)| ll + ratio.ln()),
556        );
557
558        let loo = alo_elpd(ll_hat.view(), ll_loo.view()).expect("alo elpd");
559
560        assert_eq!(loo.pointwise, ll_loo);
561        assert!((loo.elpd - -(ratios.len() as f64)).abs() < 1e-12);
562        assert!(
563            loo.k_hat_max > 0.7,
564            "heavy fitted-vs-ALO ratio tail should fire influence diagnostic; got k_hat={}",
565            loo.k_hat_max
566        );
567        assert!(
568            loo.n_k_bad > 0,
569            "heavy fitted-vs-ALO ratio tail should count influential tail observations"
570        );
571    }
572
573    #[test]
574    fn compare_pairs_pointwise_and_orients_a_minus_b() {
575        let mk = |pw: Array1<f64>, aic: f64| ModelComparison {
576            log_lik: 0.0,
577            edf: CorrectedEdf {
578                conditional: 0.0,
579                corrected: 0.0,
580            },
581            aic_conditional: aic,
582            aic_corrected: aic,
583            loo: Some(AloElpd {
584                elpd: pw.iter().sum(),
585                se: 0.0,
586                pointwise: pw,
587                k_hat_max: 0.1,
588                n_k_bad: 0,
589            }),
590        };
591        let a = mk(array![-1.0, -1.0, -1.0, -1.0], 10.0);
592        let b = mk(array![-2.0, -2.0, -2.0, -2.0], 14.0);
593        let rep = compare(&a, &b);
594        assert!(rep.rows_aligned);
595        // a − b: elpd diff = (-4) - (-8) = +4 favours a; aic diff = 10 - 14 = -4 favours a.
596        assert!((rep.delta_elpd - 4.0).abs() < 1e-12);
597        assert!((rep.delta_aic_corrected + 4.0).abs() < 1e-12);
598        assert!(rep.delta_elpd_se.abs() < 1e-12);
599    }
600
601    #[test]
602    fn compare_refuses_unpaired_rows() {
603        let mk = |pw: Array1<f64>| ModelComparison {
604            log_lik: 0.0,
605            edf: CorrectedEdf {
606                conditional: 0.0,
607                corrected: 0.0,
608            },
609            aic_conditional: 0.0,
610            aic_corrected: 5.0,
611            loo: Some(AloElpd {
612                elpd: pw.iter().sum(),
613                se: 0.0,
614                pointwise: pw,
615                k_hat_max: 0.1,
616                n_k_bad: 0,
617            }),
618        };
619        let a = mk(array![-1.0, -1.0, -1.0]);
620        let b = mk(array![-1.0, -1.0]);
621        let rep = compare(&a, &b);
622        assert!(!rep.rows_aligned);
623        assert!(rep.delta_elpd.is_nan());
624        // AIC gap still reported.
625        assert!((rep.delta_aic_corrected - 0.0).abs() < 1e-12);
626    }
627}