gam 0.3.84

Generalized penalized likelihood engine
Documentation
//! Wood-style smooth-component Wald tests.
//!
//! The test follows the rank-truncated covariance inverse used by Wood (2013):
//! the penalized part of the coefficient block is tested with a pseudo-inverse
//! of rank approximately equal to the term EDF, while unpenalized null-space
//! coefficients are kept full-rank. The reference degrees of freedom use the
//! coefficient-space influence block `F_jj = (H⁻¹ X'WX)_jj`.

use crate::faer_ndarray::FaerEigh;
use ndarray::{Array1, Array2, ArrayView1, s};
use statrs::distribution::{ChiSquared, ContinuousCDF, FisherSnedecor};
use std::ops::Range;

/// Whether the residual dispersion `φ` is known or estimated from the
/// fit.  Selects the reference distribution for the Wald p-value: `Known`
/// → `χ²_{ref_df}` (e.g. binomial/Poisson), `Estimated` → `F_{ref_df,
/// residual_df}` (e.g. Gaussian where `φ̂` carries its own sampling
/// variability).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SmoothTestScale {
    Known,
    Estimated,
}

/// Inputs to `wood_smooth_test`. `beta` is the full coefficient vector;
/// the term block being tested is `beta[coeff_range]`. `covariance` is the
/// matching posterior covariance Σ̂ (full p×p; the diagonal block is sliced
/// out). **`covariance` must be the scale-included posterior covariance**
/// (mgcv `Vb`/`Vp`, i.e. `H⁻¹` already multiplied by the dispersion `φ̂`),
/// so the Wald statistic `T = β̂'·Σ̂⁻·β̂` is dimensionless — the residual
/// dispersion has already been divided out and the F-statistic is `T/ref_df`
/// with *no* further `φ̂` factor. `influence_matrix` is the optional
/// coefficient-space influence `F = H⁻¹ X'WX`; when present
/// `tr(F_jj)² / tr(F_jj²)` is used as the Wood-corrected reference d.f.
/// `edf` is the smooth's effective d.f. (rank truncation for the penalized
/// subblock); `nullspace_dim` is the fixed-effect (unpenalized) leading
/// dimension within the block. `residual_df` is the denominator d.f. for the
/// `Estimated`-scale F branch.
#[derive(Debug, Clone)]
pub struct SmoothTestInput<'a> {
    pub beta: ArrayView1<'a, f64>,
    pub covariance: &'a Array2<f64>,
    pub influence_matrix: Option<&'a Array2<f64>>,
    pub coeff_range: Range<usize>,
    pub edf: f64,
    pub nullspace_dim: usize,
    pub residual_df: f64,
    pub scale: SmoothTestScale,
}

/// Output of `wood_smooth_test`: the Wald statistic
/// `T = β̂'·Σ̂⁻ᵣ·β̂` (rank-`r` pseudo-inverse on the penalized subblock,
/// full-rank on the nullspace subblock), the reference d.f. used to compute
/// the tail probability, and the resulting `p_value` (clamped to `[0,1]`).
#[derive(Debug, Clone)]
pub struct SmoothTestResult {
    pub statistic: f64,
    pub ref_df: f64,
    pub p_value: f64,
}

/// Wood (2013) rank-truncated Wald smooth-component test.
///
/// Splits `beta[coeff_range]` into a leading `nullspace_dim` unpenalized
/// block (tested at full rank) and a trailing penalized block (tested at
/// rank `round(edf − nullspace_dim)` via the spectral truncated
/// pseudo-inverse of the matching Σ̂ subblock). The combined statistic
/// `T` is compared against `χ²_{ref_df}` when the scale is `Known`, or
/// `F = T/ref_df` against `F_{ref_df, residual_df}` when `Estimated`.
///
/// Because `covariance` is the scale-included posterior covariance, `T`
/// already has the dispersion `φ̂` divided out (it is a proper Wald χ²);
/// the estimated-scale F-statistic is therefore `T/ref_df` with no extra
/// `φ̂` factor. Dividing by `φ̂` a second time — the historical defect
/// fixed in issue #675 — makes the p-value scale as `1/φ̂` and so depend on
/// the units of the response. Returns `None` on degenerate inputs (empty
/// block, non-finite EDF, non-finite stat, or non-positive residual d.f.
/// in the F branch).
pub fn wood_smooth_test(input: SmoothTestInput<'_>) -> Option<SmoothTestResult> {
    let start = input.coeff_range.start;
    let end = input.coeff_range.end;
    if start >= end
        || end > input.beta.len()
        || end > input.covariance.nrows()
        || end > input.covariance.ncols()
        || !input.edf.is_finite()
        || input.edf <= 0.0
    {
        return None;
    }
    let k = end - start;
    let beta = input.beta.slice(s![start..end]).to_owned();
    let cov = block(input.covariance, start, end)?;
    let null_dim = input.nullspace_dim.min(k);
    let pen_dim = k.saturating_sub(null_dim);

    let mut statistic = 0.0;
    if null_dim > 0 {
        let beta_null = beta.slice(s![0..null_dim]).to_owned();
        let cov_null = cov.slice(s![0..null_dim, 0..null_dim]).to_owned();
        statistic += full_rank_quadratic(&beta_null, &cov_null)?;
    }
    if pen_dim > 0 {
        let beta_pen = beta.slice(s![null_dim..k]).to_owned();
        let cov_pen = cov.slice(s![null_dim..k, null_dim..k]).to_owned();
        let rank = truncated_rank(input.edf - null_dim as f64, pen_dim);
        if rank > 0 {
            statistic += truncated_quadratic(&beta_pen, &cov_pen, rank)?;
        }
    }

    let ref_df = reference_df(input.influence_matrix, start, end).unwrap_or(input.edf.max(1e-12));
    if !statistic.is_finite() || statistic < 0.0 || !ref_df.is_finite() || ref_df <= 0.0 {
        return None;
    }
    let p_value = match input.scale {
        SmoothTestScale::Known => {
            let dist = ChiSquared::new(ref_df).ok()?;
            1.0 - dist.cdf(statistic)
        }
        SmoothTestScale::Estimated => {
            if !input.residual_df.is_finite() || input.residual_df <= 0.0 {
                return None;
            }
            // `statistic` is already a dispersion-free Wald χ² (the covariance
            // is scale-included), so the estimated-scale F-statistic is the
            // χ² divided by its reference d.f. only — mgcv's `Tr/rank`. Dividing
            // by `φ̂` again would re-introduce a response-unit dependence (#675).
            let f_stat = statistic / ref_df;
            let dist = FisherSnedecor::new(ref_df, input.residual_df).ok()?;
            1.0 - dist.cdf(f_stat)
        }
    };
    if !p_value.is_finite() {
        return None;
    }
    Some(SmoothTestResult {
        statistic,
        ref_df,
        p_value: p_value.clamp(0.0, 1.0),
    })
}

fn truncated_rank(edf_pen: f64, pen_dim: usize) -> usize {
    if pen_dim == 0 || !edf_pen.is_finite() || edf_pen <= 0.0 {
        return 0;
    }
    (edf_pen.round() as usize).clamp(1, pen_dim)
}

fn block(matrix: &Array2<f64>, start: usize, end: usize) -> Option<Array2<f64>> {
    if start >= end || end > matrix.nrows() || end > matrix.ncols() {
        return None;
    }
    Some(matrix.slice(s![start..end, start..end]).to_owned())
}

fn full_rank_quadratic(beta: &Array1<f64>, cov: &Array2<f64>) -> Option<f64> {
    truncated_quadratic(beta, cov, beta.len())
}

fn truncated_quadratic(beta: &Array1<f64>, cov: &Array2<f64>, rank: usize) -> Option<f64> {
    if beta.is_empty() || cov.nrows() != beta.len() || cov.ncols() != beta.len() || rank == 0 {
        return None;
    }
    let (evals, evecs) = cov.to_owned().eigh(faer::Side::Lower).ok()?;
    let mut order: Vec<usize> = (0..evals.len()).collect();
    order.sort_by(|&a, &b| evals[b].total_cmp(&evals[a]));
    let tol = evals
        .iter()
        .copied()
        .fold(0.0_f64, |acc, v| acc.max(v.abs()))
        * 1e-10;
    let mut q = 0.0;
    let mut used = 0usize;
    for idx in order {
        let lambda = evals[idx];
        if lambda <= tol {
            continue;
        }
        let v = evecs.column(idx);
        let proj = beta.dot(&v);
        q += proj * proj / lambda;
        used += 1;
        if used >= rank {
            break;
        }
    }
    (used > 0 && q.is_finite()).then_some(q.max(0.0))
}

fn reference_df(influence: Option<&Array2<f64>>, start: usize, end: usize) -> Option<f64> {
    let f = influence?;
    let f_block = block(f, start, end)?;
    let tr = (0..f_block.nrows()).map(|i| f_block[[i, i]]).sum::<f64>();
    let tr2 = f_block.dot(&f_block).diag().sum();
    if tr.is_finite() && tr2.is_finite() && tr > 0.0 && tr2 > 0.0 {
        Some((tr * tr / tr2).max(1e-12))
    } else {
        None
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use ndarray::array;

    #[test]
    fn reference_df_uses_trace_correction() {
        let beta = array![1.0, 2.0];
        let cov = array![[2.0, 0.0], [0.0, 3.0]];
        let f = array![[0.5, 0.0], [0.0, 0.25]];
        let out = wood_smooth_test(SmoothTestInput {
            beta: beta.view(),
            covariance: &cov,
            influence_matrix: Some(&f),
            coeff_range: 0..2,
            edf: 1.0,
            nullspace_dim: 0,
            residual_df: 20.0,
            scale: SmoothTestScale::Known,
        })
        .expect("smooth test");
        assert!((out.ref_df - 1.8).abs() < 1e-12);
        assert!(out.statistic > 0.0);
        assert!((0.0..=1.0).contains(&out.p_value));
    }

    /// Rescaling the response by `c` is `β → c·β`, `Σ → c²·Σ` (the covariance
    /// is scale-included). The Wald statistic `T = β'Σ⁻β` is then invariant,
    /// and — because the estimated-scale F-statistic is `T/ref_df` with no
    /// further `φ̂` factor — so is the p-value. This is the unit-level guard
    /// for issue #675: the historical `T/(ref_df·φ̂)` made the p-value scale
    /// as `1/c²` even though `T` did not move.
    #[test]
    fn estimated_scale_pvalue_is_response_unit_invariant() {
        let beta = array![2.5, -3.5, 1.8];
        let cov = array![[2.0, 0.3, 0.0], [0.3, 1.5, 0.1], [0.0, 0.1, 0.9]];
        let f = array![[0.7, 0.0, 0.0], [0.0, 0.6, 0.0], [0.0, 0.0, 0.4]];

        let run = |c: f64| {
            let beta_c = &beta * c;
            let cov_c = &cov * (c * c);
            wood_smooth_test(SmoothTestInput {
                beta: beta_c.view(),
                covariance: &cov_c,
                influence_matrix: Some(&f),
                coeff_range: 0..3,
                edf: 2.0,
                nullspace_dim: 0,
                residual_df: 50.0,
                scale: SmoothTestScale::Estimated,
            })
            .expect("smooth test")
        };

        let base = run(1.0);
        assert!(base.statistic > 0.0);
        // A non-trivial, clearly-significant p-value so the invariance check is
        // not vacuously comparing two values pinned at a boundary.
        assert!(base.p_value > 0.0 && base.p_value < 0.05);
        for c in [1e-3, 0.1, 10.0, 1e3, 1e6] {
            let scaled = run(c);
            let rel_stat = (scaled.statistic - base.statistic).abs() / base.statistic;
            assert!(
                rel_stat < 1e-9,
                "Wald statistic not scale-invariant at c={c}: {} vs {}",
                scaled.statistic,
                base.statistic
            );
            let rel_p = (scaled.p_value - base.p_value).abs() / base.p_value;
            assert!(
                rel_p < 1e-9,
                "estimated-scale p-value not scale-invariant at c={c}: {} vs {}",
                scaled.p_value,
                base.p_value
            );
        }
    }
}