gam_inference/model_comparison.rs
1//! Honest, calibrated model comparison computed from machinery already present
2//! at the fit optimum — exact smoothing-corrected conditional AIC and zero-refit
3//! ALO elpd with an influence diagnostic (issue #946).
4//!
5//! Every consumer (the topology race, the SAE fit payload, the `compare`
6//! entry point) reads the same two channels:
7//!
8//! * **Corrected conditional AIC.** The conditional AIC `−2·ℓ + 2·edf` treats
9//! the smoothing parameters as known and is biased toward complexity exactly
10//! where users rely on it (random-effect-vs-null, is-a-wiggle-real). The
11//! Wood–Pya–Säfken (2016, JASA) correction replaces `edf = tr(F)` by
12//! `τ = tr(F) + tr(X'WX · Σ_ρ)`, where `Σ_ρ` is the smoothing-parameter
13//! uncertainty covariance in coefficient space. gam carries `Σ_ρ` *exactly*
14//! (assembled from the IFT `dβ̂/dρ` and the exact outer Hessian at the fit
15//! optimum, retained on the fit as [`UnifiedFitResult::smoothing_correction`]),
16//! so the correction is the first exact instance of this estimator — not the
17//! approximation mgcv must use, and not the omission most software ships.
18//!
19//! * **ALO elpd.** Pointwise log predictive densities evaluated at the
20//! ALO-corrected leave-one-out predictions (no refits — the ALO solves reuse
21//! the fit's factored Hessian). The summed elpd is exactly
22//! `Σᵢ ℓ(yᵢ|η̃₋ᵢ)`. A Pareto tail fit of the cross-observation fitted-vs-ALO
23//! ratio distribution is reported only as an influence diagnostic; it is not
24//! draw-wise PSIS-LOO and does not alter the pointwise contributions.
25//!
26//! Both channels are *corroboration*: they ride alongside the evidence headline
27//! a race already produces, never replacing it.
28
29use gam_solve::estimate::UnifiedFitResult;
30use crate::alo::AloDiagnostics;
31use gam_solve::psis::pareto_smooth_weights;
32use gam_problem::types::{GlmLikelihoodSpec, LikelihoodSpec};
33use ndarray::{Array1, ArrayView1, ArrayView2};
34
35/// ALO predictive-accuracy summary at zero refit cost.
36#[derive(Debug, Clone)]
37pub struct AloElpd {
38 /// Expected log pointwise predictive density, `Σᵢ ℓ(yᵢ|η̃₋ᵢ)`.
39 pub elpd: f64,
40 /// Standard error of `elpd`, `√(n · Var(pointwise))`.
41 pub se: f64,
42 /// Per-observation ALO elpd contributions (length `n`).
43 pub pointwise: Array1<f64>,
44 /// GPD tail-shape `k̂` of the cross-observation fitted-vs-ALO ratio
45 /// distribution. This is an influence diagnostic, not a PSIS-LOO reliability
46 /// diagnostic.
47 pub k_hat_max: f64,
48 /// Number of tail observations flagged when the influence diagnostic exceeds
49 /// the `0.7` heavy-tail cutoff.
50 pub n_k_bad: usize,
51}
52
53/// Effective-degrees-of-freedom pair: the conditional `tr(F)` and the
54/// Wood–Pya–Säfken correction that accounts for smoothing-parameter
55/// uncertainty.
56#[derive(Debug, Clone, Copy)]
57pub struct CorrectedEdf {
58 /// `tr(F)` with `F = H⁻¹X'WX`, conditional on `λ̂`.
59 pub conditional: f64,
60 /// `τ = tr(F) + tr(X'WX · Σ_ρ)`, the exact WPS corrected EDF. Equals
61 /// [`Self::conditional`] when no smoothing correction is available (e.g.
62 /// `K = 0`, or the post-fit IFT solve was skipped).
63 pub corrected: f64,
64}
65
66impl CorrectedEdf {
67 /// The per-fit measurement the issue calls out: how much λ-uncertainty is
68 /// inflating the user's model-choice complexity penalty, `τ − tr(F)`.
69 pub fn rho_uncertainty_df(&self) -> f64 {
70 self.corrected - self.conditional
71 }
72}
73
74/// The full comparison payload reported alongside a fit's evidence headline.
75#[derive(Debug, Clone)]
76pub struct ModelComparison {
77 /// Log-likelihood at the converged mode (the engine's
78 /// constants-omitted value — see note on cross-fit comparability below).
79 pub log_lik: f64,
80 /// Conditional and WPS-corrected effective degrees of freedom.
81 pub edf: CorrectedEdf,
82 /// `−2·ℓ + 2·edf_conditional` (treats `λ̂` as known).
83 pub aic_conditional: f64,
84 /// `−2·ℓ + 2·edf_corrected` (Wood–Pya–Säfken).
85 pub aic_corrected: f64,
86 /// Zero-refit ALO predictive comparison, when ALO diagnostics and the per-row
87 /// family kernel are available.
88 pub loo: Option<AloElpd>,
89}
90
91/// Exact Wood–Pya–Säfken corrected effective degrees of freedom.
92///
93/// `edf_conditional = tr(F)` with `F = H⁻¹X'WX` (the engine's `edf_total`).
94/// The correction term is `tr(X'WX · Σ_ρ)` where `Σ_ρ` is the H⁻¹-scale
95/// smoothing-parameter uncertainty covariance. The engine stores the genuine
96/// symmetric-PSD weighted Gram `X'WX = H − S(λ)` directly on the fit
97/// ([`UnifiedFitResult::weighted_gram`], issue #1027) — pairing it with
98/// `Σ_ρ = smoothing_correction / φ` makes the correction the nonnegative
99/// `tr(A½ B A½)` it is defined to be, instead of the indefinite `H·F`
100/// reconstruction (where the stored `H` need not satisfy `H·F = X'WX`) that
101/// drove the corrected EDF below the conditional EDF.
102///
103/// Returns `edf_conditional` unchanged when any exact input is absent —
104/// the conditional value is the honest fallback, never an approximation of
105/// the correction.
106pub fn corrected_edf(
107 edf_conditional: f64,
108 weighted_gram: Option<ArrayView2<'_, f64>>,
109 smoothing_correction: Option<ArrayView2<'_, f64>>,
110 phi: f64,
111) -> CorrectedEdf {
112 let correction = wps_correction_term(weighted_gram, smoothing_correction, phi);
113 CorrectedEdf {
114 conditional: edf_conditional,
115 corrected: edf_conditional + correction,
116 }
117}
118
119/// `tr(X'WX · Σ_ρ)` with `Σ_ρ = smoothing_correction / φ` and `X'WX` the
120/// stored PSD weighted Gram. Returns `0.0` when any input is missing,
121/// non-square, dimension-mismatched, or non-finite. Nonnegative by
122/// construction (both factors are symmetric PSD).
123fn wps_correction_term(
124 weighted_gram: Option<ArrayView2<'_, f64>>,
125 smoothing_correction: Option<ArrayView2<'_, f64>>,
126 phi: f64,
127) -> f64 {
128 let (Some(xwx), Some(corr)) = (weighted_gram, smoothing_correction) else {
129 return 0.0;
130 };
131 let k = xwx.nrows();
132 if k == 0
133 || xwx.ncols() != k
134 || corr.nrows() != k
135 || corr.ncols() != k
136 || !(phi.is_finite() && phi > 0.0)
137 {
138 return 0.0;
139 }
140 // tr(X'WX · corr/φ) = (1/φ) Σ_{ij} X'WX_{ij} corr_{ji}; both symmetric, so
141 // this is the nonnegative tr(A^½ B A^½).
142 let mut trace = 0.0;
143 for i in 0..k {
144 for j in 0..k {
145 trace += xwx[[i, j]] * corr[[j, i]];
146 }
147 }
148 trace /= phi;
149 if trace.is_finite() { trace } else { 0.0 }
150}
151
152/// ALO elpd from ALO-corrected leave-one-out predictions.
153///
154/// `loglik_fitted` and `loglik_loo` are the per-observation log predictive
155/// densities at the *fitted* (`η̂`) and *ALO leave-one-out* (`η̃₋ᵢ`) linear
156/// predictors respectively. The returned elpd is the honest ALO estimand
157/// `Σᵢ loglik_loo[i]`; each pointwise contribution is exactly `loglik_loo[i]`.
158///
159/// The raw fitted-vs-ALO ratio for observation `i` is
160/// `r_i = exp(ℓ(yᵢ|η̂ᵢ) − ℓ(yᵢ|η̃₋ᵢ))` — large where dropping `i` would have
161/// moved the fit a lot. We fit a GPD tail to this cross-observation ratio vector
162/// only to report an influence diagnostic: `k_hat_max` is the fitted tail shape
163/// and `n_k_bad` is the tail count when `k̂ > 0.7`. This is not draw-wise
164/// PSIS-LOO: there is no posterior-draw dimension, the Pareto fit is across
165/// observations, and the diagnostic never changes elpd.
166///
167/// Returns `None` when the inputs are degenerate (non-finite, mismatched
168/// lengths, or empty). If the influence tail fit is unavailable, `k_hat_max` is
169/// `NaN` and `n_k_bad` is zero.
170pub fn alo_elpd(
171 loglik_fitted: ArrayView1<'_, f64>,
172 loglik_loo: ArrayView1<'_, f64>,
173) -> Option<AloElpd> {
174 let n = loglik_loo.len();
175 if n == 0 || loglik_fitted.len() != n {
176 return None;
177 }
178 if loglik_fitted
179 .iter()
180 .chain(loglik_loo.iter())
181 .any(|v| !v.is_finite())
182 {
183 return None;
184 }
185 // Cross-observation influence ratios r_i = p(y_i|η̂_i) / p(y_i|η̃₋ᵢ).
186 // Stabilize by subtracting the max log-ratio before exponentiating; the
187 // multiplicative constant does not change the fitted GPD shape.
188 let log_ratio: Array1<f64> = &loglik_fitted.to_owned() - &loglik_loo.to_owned();
189 let max_lr = log_ratio.iter().copied().fold(f64::NEG_INFINITY, f64::max);
190 if !max_lr.is_finite() {
191 return None;
192 }
193 let raw: Vec<f64> = log_ratio.iter().map(|&lr| (lr - max_lr).exp()).collect();
194
195 let (k_hat_max, n_k_bad);
196 match pareto_smooth_weights(&raw) {
197 Some(psis) => {
198 k_hat_max = psis.k_hat;
199 n_k_bad = if psis.k_hat > 0.7 { psis.tail_count } else { 0 };
200 }
201 None => {
202 k_hat_max = f64::NAN;
203 n_k_bad = 0;
204 }
205 }
206
207 let pointwise = loglik_loo.to_owned();
208 let elpd: f64 = pointwise.iter().sum();
209 let mean = elpd / n as f64;
210 let var = pointwise
211 .iter()
212 .map(|&p| (p - mean) * (p - mean))
213 .sum::<f64>()
214 / n as f64;
215 let se = (n as f64 * var).sqrt();
216 Some(AloElpd {
217 elpd,
218 se,
219 pointwise,
220 k_hat_max,
221 n_k_bad,
222 })
223}
224
225/// Result of comparing two fits on the same response: the paired predictive
226/// difference with its standard error plus the
227/// corrected-AIC gap. Both differences are oriented `a − b`: positive `delta_elpd`
228/// favours `a`, negative `delta_aic_corrected` favours `a`.
229#[derive(Debug, Clone)]
230pub struct ComparisonReport {
231 /// `Σᵢ (elpd_aᵢ − elpd_bᵢ)`; positive favours `a`.
232 pub delta_elpd: f64,
233 /// SE of `delta_elpd` from the pointwise paired differences,
234 /// `√(n · Var(elpd_aᵢ − elpd_bᵢ))`.
235 pub delta_elpd_se: f64,
236 /// `AIC_corrected(a) − AIC_corrected(b)`; negative favours `a`.
237 pub delta_aic_corrected: f64,
238 /// `false` when the two fits have a different number of observations and the
239 /// paired predictive difference could not be formed; `delta_elpd` is then
240 /// `NaN` and only the AIC gap is meaningful.
241 pub rows_aligned: bool,
242}
243
244/// Paired comparison of two fits. The predictive difference is paired
245/// row-by-row, so the two fits must have been computed on
246/// the same response in the same order; we refuse the paired difference when the
247/// observation counts disagree and surface only the AIC gap.
248pub fn compare(a: &ModelComparison, b: &ModelComparison) -> ComparisonReport {
249 let delta_aic_corrected = a.aic_corrected - b.aic_corrected;
250 match (&a.loo, &b.loo) {
251 (Some(la), Some(lb))
252 if la.pointwise.len() == lb.pointwise.len() && !la.pointwise.is_empty() =>
253 {
254 let n = la.pointwise.len();
255 let diff: Array1<f64> = &la.pointwise - &lb.pointwise;
256 let delta_elpd: f64 = diff.iter().sum();
257 let mean = delta_elpd / n as f64;
258 let var = diff.iter().map(|&d| (d - mean) * (d - mean)).sum::<f64>() / n as f64;
259 ComparisonReport {
260 delta_elpd,
261 delta_elpd_se: (n as f64 * var).sqrt(),
262 delta_aic_corrected,
263 rows_aligned: true,
264 }
265 }
266 _ => ComparisonReport {
267 delta_elpd: f64::NAN,
268 delta_elpd_se: f64::NAN,
269 delta_aic_corrected,
270 rows_aligned: false,
271 },
272 }
273}
274
275/// Assemble the comparison payload for a fitted GLM/GAM from the fit result plus
276/// optional ALO diagnostics.
277///
278/// The corrected-AIC channel is always populated (it needs only fit-retained
279/// fields). The ALO elpd channel is populated when `alo` is supplied and the
280/// fit carries an engine-level family: the leave-one-out linear predictors are
281/// the ALO `eta_tilde`, mapped through the family inverse link to means and
282/// scored by the per-row family log-likelihood kernel.
283///
284/// `eta_hat` is the *fitted* linear predictor (including offset) and `y` the
285/// response, both length `n`.
286pub fn model_comparison_from_unified(
287 fit: &UnifiedFitResult,
288 y: ArrayView1<'_, f64>,
289 eta_hat: ArrayView1<'_, f64>,
290 prior_weights: ArrayView1<'_, f64>,
291 alo: Option<&AloDiagnostics>,
292) -> ModelComparison {
293 let phi = fit.dispersion_phi();
294 let edf_conditional = fit.edf_total().unwrap_or(f64::NAN);
295 let edf = corrected_edf(
296 edf_conditional,
297 fit.weighted_gram().map(|g| g.view()),
298 fit.smoothing_correction().map(|c| c.view()),
299 phi,
300 );
301
302 // The user-facing `log_likelihood` (and the AIC / elpd derived from it) must
303 // be the *fully normalized, scale-aware* absolute log-likelihood — not the
304 // REML building block stored on the fit, which deliberately drops every
305 // family- and saturated-likelihood normalizing constant and the Gaussian
306 // scale (#1581/#1582/#1583). Recompute it here at the fitted means with the
307 // profiled Gaussian scale concretized into σ̂². For custom / GAMLSS fits with
308 // no engine-level family there is no per-row kernel to call, so we fall back
309 // to the stored value (those paths supply their own normalized log-lik).
310 let log_lik = fit
311 .likelihood_family
312 .as_ref()
313 .and_then(|spec| {
314 let scale = reporting_scale(spec, &fit.likelihood_scale, phi);
315 full_loglikelihood_at_eta(y, eta_hat, prior_weights, spec, scale)
316 })
317 .unwrap_or(fit.log_likelihood);
318
319 // An estimated / profiled dispersion is a fitted parameter and adds one
320 // degree of freedom to the conditional AIC — mgcv's `2·(edf + 1)` for a
321 // scale-estimated family (#1583). Fixed-scale families (Poisson, Binomial,
322 // user-fixed φ/θ) add none.
323 let scale_dof = fit
324 .likelihood_family
325 .as_ref()
326 .map(|spec| scale_parameter_count(spec, &fit.likelihood_scale))
327 .unwrap_or(0.0);
328
329 let aic_conditional = -2.0 * log_lik + 2.0 * (edf.conditional + scale_dof);
330 let aic_corrected = -2.0 * log_lik + 2.0 * (edf.corrected + scale_dof);
331
332 let loo = alo.and_then(|alo| {
333 let spec = fit.likelihood_family.clone()?;
334 let scale = reporting_scale(&spec, &fit.likelihood_scale, phi);
335 alo_elpd_from_family(
336 y,
337 eta_hat,
338 alo.eta_tilde.view(),
339 prior_weights,
340 &spec,
341 scale,
342 )
343 });
344
345 ModelComparison {
346 log_lik,
347 edf,
348 aic_conditional,
349 aic_corrected,
350 loo,
351 }
352}
353
354/// ALO elpd for an engine-level family: map the fitted and ALO leave-one-out
355/// linear predictors through the family inverse link, score both with the
356/// per-row log-likelihood kernel, and compute the ALO elpd plus influence
357/// diagnostic.
358pub fn alo_elpd_from_family(
359 y: ArrayView1<'_, f64>,
360 eta_hat: ArrayView1<'_, f64>,
361 eta_loo: ArrayView1<'_, f64>,
362 prior_weights: ArrayView1<'_, f64>,
363 spec: &LikelihoodSpec,
364 scale: gam_problem::types::LikelihoodScaleMetadata,
365) -> Option<AloElpd> {
366 use gam_models::family_runtime::{FamilyStrategy, strategy_for_spec};
367 use gam_solve::pirls::pointwise_loglikelihood;
368
369 let n = y.len();
370 if eta_hat.len() != n || eta_loo.len() != n || prior_weights.len() != n || n == 0 {
371 return None;
372 }
373 let strategy = strategy_for_spec(spec);
374 let mu_hat = strategy.inverse_link_array(eta_hat).ok()?;
375 let mu_loo = strategy.inverse_link_array(eta_loo).ok()?;
376 let glm = GlmLikelihoodSpec {
377 spec: spec.clone(),
378 scale,
379 };
380 // The PSIS-LOO `elpd` reported to the user is an *absolute* log predictive
381 // density, so it must use the fully normalized, scale-aware kernel (the
382 // profiled Gaussian scale is concretized by the caller). The dropped
383 // constants are identical for the fitted and LOO evaluations of a row (they
384 // depend only on yᵢ and the scale, not on μ), so the PSIS importance ratios
385 // r_i = exp(ℓ̂_i − ℓ_loo,i) — and hence k̂ — are unchanged; only the absolute
386 // elpd is corrected (#1581/#1582/#1583).
387 let ll_hat = pointwise_loglikelihood(y, &mu_hat, &glm, prior_weights);
388 let ll_loo = pointwise_loglikelihood(y, &mu_loo, &glm, prior_weights);
389 alo_elpd(ll_hat.view(), ll_loo.view())
390}
391
392/// Total fully-normalized log-likelihood at the fitted linear predictor
393/// `eta_hat`: map through the family inverse link and sum the per-row reporting
394/// kernel. `None` when the inverse link fails, the lengths disagree, or the
395/// result is non-finite (so callers fall back to the stored value).
396fn full_loglikelihood_at_eta(
397 y: ArrayView1<'_, f64>,
398 eta_hat: ArrayView1<'_, f64>,
399 prior_weights: ArrayView1<'_, f64>,
400 spec: &LikelihoodSpec,
401 scale: gam_problem::types::LikelihoodScaleMetadata,
402) -> Option<f64> {
403 use gam_models::family_runtime::{FamilyStrategy, strategy_for_spec};
404 use gam_solve::pirls::calculate_loglikelihood;
405
406 let n = y.len();
407 if eta_hat.len() != n || prior_weights.len() != n || n == 0 {
408 return None;
409 }
410 let mu_hat = strategy_for_spec(spec).inverse_link_array(eta_hat).ok()?;
411 let glm = GlmLikelihoodSpec {
412 spec: spec.clone(),
413 scale,
414 };
415 let ll = calculate_loglikelihood(y, &mu_hat, &glm, prior_weights);
416 ll.is_finite().then_some(ll)
417}
418
419/// Concretize the response-scale metadata for the *reporting* log-likelihood.
420///
421/// The profiled Gaussian carries no fixed scale (`ProfiledGaussian`), so its
422/// predictive density would silently collapse to the unit-variance form. Here we
423/// resolve the estimated residual variance `σ̂² = phi` into a concrete
424/// `FixedDispersion`, so the reporting kernel scores the density on the right
425/// measure and obeys the change-of-variables law (#1583). An explicitly fixed φ
426/// is honored as-is; every other family already carries the parameters its
427/// density needs (Beta φ, NB θ, Gamma shape, Tweedie φ), so its scale is
428/// returned unchanged.
429fn reporting_scale(
430 spec: &LikelihoodSpec,
431 scale: &gam_problem::types::LikelihoodScaleMetadata,
432 phi: f64,
433) -> gam_problem::types::LikelihoodScaleMetadata {
434 use gam_problem::types::{LikelihoodScaleMetadata, ResponseFamily};
435 match spec.response {
436 ResponseFamily::Gaussian => match scale.fixed_phi() {
437 Some(p) if p.is_finite() && p > 0.0 => LikelihoodScaleMetadata::FixedDispersion { phi: p },
438 _ if phi.is_finite() && phi > 0.0 => LikelihoodScaleMetadata::FixedDispersion { phi },
439 _ => scale.clone(),
440 },
441 _ => scale.clone(),
442 }
443}
444
445/// Number of estimated dispersion / scale parameters a family contributes to the
446/// conditional-AIC degrees of freedom (`2·(edf + scale_dof)`, #1583).
447///
448/// Gaussian profiles σ̂² (one extra dof) unless φ was user-fixed; Gamma / Beta /
449/// Tweedie / Negative-Binomial add one only when their dispersion is *estimated*
450/// from data; Poisson and Binomial carry φ ≡ 1 and add none.
451fn scale_parameter_count(
452 spec: &LikelihoodSpec,
453 scale: &gam_problem::types::LikelihoodScaleMetadata,
454) -> f64 {
455 use gam_problem::types::{LikelihoodScaleMetadata, ResponseFamily};
456 let estimated = match spec.response {
457 ResponseFamily::Gaussian => {
458 !matches!(scale, LikelihoodScaleMetadata::FixedDispersion { .. })
459 }
460 ResponseFamily::Gamma => {
461 matches!(scale, LikelihoodScaleMetadata::EstimatedGammaShape { .. })
462 }
463 ResponseFamily::Beta { .. } => {
464 matches!(scale, LikelihoodScaleMetadata::EstimatedBetaPhi { .. })
465 }
466 ResponseFamily::Tweedie { .. } => {
467 matches!(scale, LikelihoodScaleMetadata::EstimatedTweediePhi { .. })
468 }
469 ResponseFamily::NegativeBinomial { .. } => {
470 matches!(scale, LikelihoodScaleMetadata::EstimatedNegBinTheta { .. })
471 }
472 ResponseFamily::Poisson
473 | ResponseFamily::Binomial
474 | ResponseFamily::RoystonParmar => false,
475 };
476 if estimated { 1.0 } else { 0.0 }
477}
478
479#[cfg(test)]
480mod tests {
481 use super::*;
482 use ndarray::{Array2, array};
483
484 #[test]
485 fn wps_correction_is_trace_of_h_f_sigma_over_phi() {
486 // X'WX = I, φ = 2 → correction is tr(X'WX·corr)/φ = tr(corr)/φ.
487 let xwx = Array2::<f64>::eye(3);
488 let corr = array![[2.0, 0.0, 0.0], [0.0, 4.0, 0.0], [0.0, 0.0, 6.0]];
489 let edf = corrected_edf(3.0, Some(xwx.view()), Some(corr.view()), 2.0);
490 // tr(corr)/φ = (2+4+6)/2 = 6, so corrected = 3 + 6 = 9, ρ-df = 6.
491 assert!((edf.corrected - 9.0).abs() < 1e-12);
492 assert!((edf.rho_uncertainty_df() - 6.0).abs() < 1e-12);
493 assert!((edf.conditional - 3.0).abs() < 1e-12);
494 }
495
496 #[test]
497 fn corrected_edf_falls_back_to_conditional_without_inputs() {
498 let edf = corrected_edf(5.5, None, None, 1.0);
499 assert_eq!(edf.conditional, 5.5);
500 assert_eq!(edf.corrected, 5.5);
501 assert_eq!(edf.rho_uncertainty_df(), 0.0);
502 }
503
504 #[test]
505 fn alo_elpd_sums_pointwise_and_flags_no_tail() {
506 // Identical fitted and LOO log-densities → all importance ratios 1,
507 // elpd = Σ ℓ₋ᵢ.
508 let ll: Array1<f64> = array![-1.0, -2.0, -0.5, -1.5, -0.8, -1.2, -0.9, -1.1, -0.7, -1.3];
509 let loo = alo_elpd(ll.view(), ll.view()).expect("alo elpd");
510 let expected: f64 = ll.iter().sum();
511 assert!((loo.elpd - expected).abs() < 1e-9);
512 assert_eq!(loo.pointwise.len(), ll.len());
513 // No spread in importance ratios → k̂ finite, no bad points expected.
514 assert_eq!(loo.n_k_bad, 0);
515 }
516
517 #[test]
518 fn alo_elpd_pointwise_is_local_to_alo_loglikelihoods() {
519 let ll_loo: Array1<f64> = array![
520 -1.0, -1.1, -1.2, -1.3, -1.4, -1.5, -1.6, -1.7, -1.8, -1.9, -2.0, -2.1
521 ];
522 let ll_hat = ll_loo.clone();
523 let mut ll_hat_perturbed = ll_loo.clone();
524 ll_hat_perturbed[7] += 10.0;
525
526 let base = alo_elpd(ll_hat.view(), ll_loo.view()).expect("alo elpd");
527 let perturbed = alo_elpd(ll_hat_perturbed.view(), ll_loo.view()).expect("alo elpd");
528
529 for i in 0..ll_loo.len() {
530 assert_eq!(base.pointwise[i], ll_loo[i]);
531 assert_eq!(perturbed.pointwise[i], ll_loo[i]);
532 if i != 7 {
533 assert_eq!(base.pointwise[i], perturbed.pointwise[i]);
534 }
535 }
536 assert_eq!(perturbed.elpd, base.elpd);
537 }
538
539 fn gpd_sample(u: f64, k: f64, sigma: f64) -> f64 {
540 sigma * ((1.0 - u).powf(-k) - 1.0) / k
541 }
542
543 #[test]
544 fn alo_elpd_influence_diagnostic_fires_on_heavy_tailed_ratios() {
545 let mut ratios = vec![1.0; 200];
546 for i in 1..=120 {
547 let u = (i as f64 - 0.5) / 120.0;
548 ratios.push(1.0 + gpd_sample(u, 1.2, 0.5));
549 }
550 let ll_loo: Array1<f64> = Array1::from_elem(ratios.len(), -1.0);
551 let ll_hat: Array1<f64> = Array1::from_iter(
552 ll_loo
553 .iter()
554 .zip(ratios.iter())
555 .map(|(&ll, &ratio)| ll + ratio.ln()),
556 );
557
558 let loo = alo_elpd(ll_hat.view(), ll_loo.view()).expect("alo elpd");
559
560 assert_eq!(loo.pointwise, ll_loo);
561 assert!((loo.elpd - -(ratios.len() as f64)).abs() < 1e-12);
562 assert!(
563 loo.k_hat_max > 0.7,
564 "heavy fitted-vs-ALO ratio tail should fire influence diagnostic; got k_hat={}",
565 loo.k_hat_max
566 );
567 assert!(
568 loo.n_k_bad > 0,
569 "heavy fitted-vs-ALO ratio tail should count influential tail observations"
570 );
571 }
572
573 #[test]
574 fn compare_pairs_pointwise_and_orients_a_minus_b() {
575 let mk = |pw: Array1<f64>, aic: f64| ModelComparison {
576 log_lik: 0.0,
577 edf: CorrectedEdf {
578 conditional: 0.0,
579 corrected: 0.0,
580 },
581 aic_conditional: aic,
582 aic_corrected: aic,
583 loo: Some(AloElpd {
584 elpd: pw.iter().sum(),
585 se: 0.0,
586 pointwise: pw,
587 k_hat_max: 0.1,
588 n_k_bad: 0,
589 }),
590 };
591 let a = mk(array![-1.0, -1.0, -1.0, -1.0], 10.0);
592 let b = mk(array![-2.0, -2.0, -2.0, -2.0], 14.0);
593 let rep = compare(&a, &b);
594 assert!(rep.rows_aligned);
595 // a − b: elpd diff = (-4) - (-8) = +4 favours a; aic diff = 10 - 14 = -4 favours a.
596 assert!((rep.delta_elpd - 4.0).abs() < 1e-12);
597 assert!((rep.delta_aic_corrected + 4.0).abs() < 1e-12);
598 assert!(rep.delta_elpd_se.abs() < 1e-12);
599 }
600
601 #[test]
602 fn compare_refuses_unpaired_rows() {
603 let mk = |pw: Array1<f64>| ModelComparison {
604 log_lik: 0.0,
605 edf: CorrectedEdf {
606 conditional: 0.0,
607 corrected: 0.0,
608 },
609 aic_conditional: 0.0,
610 aic_corrected: 5.0,
611 loo: Some(AloElpd {
612 elpd: pw.iter().sum(),
613 se: 0.0,
614 pointwise: pw,
615 k_hat_max: 0.1,
616 n_k_bad: 0,
617 }),
618 };
619 let a = mk(array![-1.0, -1.0, -1.0]);
620 let b = mk(array![-1.0, -1.0]);
621 let rep = compare(&a, &b);
622 assert!(!rep.rows_aligned);
623 assert!(rep.delta_elpd.is_nan());
624 // AIC gap still reported.
625 assert!((rep.delta_aic_corrected - 0.0).abs() < 1e-12);
626 }
627}