ndarray-glm 0.1.0

//! Tests for p-value methods behind the `stats` feature gate.
//!
//! This test was generated by claude code, hence the inelegant hard-coded values. Still, it
//! shouldn't hurt to run.
#![cfg(feature = "stats")]

use anyhow::Result;
use approx::assert_abs_diff_eq;
use ndarray::Axis;
use ndarray::array;
use ndarray_glm::{Linear, Logistic, ModelBuilder, Poisson};

/// Linear model: check pvalue_lr_test, pvalue_wald, and pvalue_exact against R's glm output.
///
/// R reference:
/// ```r
/// y <- c(0.3, 1.5, 0.8, 2.1, 1.7, 3.2, 2.5, 0.9)
/// x1 <- c(0.1, 0.5, 0.2, 0.8, 0.6, 1.1, 0.9, 0.3)
/// x2 <- c(0.4, 0.1, 0.3, 0.7, 0.2, 0.5, 0.8, 0.6)
/// m <- glm(y ~ x1 + x2, family=gaussian())
/// ```
#[test]
fn linear_pvalues() -> Result<()> {
    let y = array![0.3_f64, 1.5, 0.8, 2.1, 1.7, 3.2, 2.5, 0.9];
    let x = array![
        [0.1_f64, 0.4],
        [0.5, 0.1],
        [0.2, 0.3],
        [0.8, 0.7],
        [0.6, 0.2],
        [1.1, 0.5],
        [0.9, 0.8],
        [0.3, 0.6]
    ];
    let model = ModelBuilder::<Linear>::data(&y, &x).build()?;
    let fit = model.fit()?;

    let model_nostd = ModelBuilder::<Linear>::data(&y, &x)
        .no_standardize()
        .build()?;
    let fit_nostd = model_nostd.fit()?;

    // R: pchisq(6.395584, df=2, lower.tail=FALSE) = 0.04085231
    let lr_p = fit.pvalue_lr_test();
    let lr_p_nostd = fit_nostd.pvalue_lr_test();
    assert_abs_diff_eq!(lr_p, 0.04085231, epsilon = 1e-5);
    assert_abs_diff_eq!(lr_p_nostd, 0.04085231, epsilon = 1e-5);

    // These are the p-values from R. For the linear model, they are equal for both approaches.
    let p_r = array![0.1203156, 4.007292e-06, 0.2835236];

    // R: summary(m)$coefficients[,"Pr(>|t|)"]
    // Intercept: 0.1203156, x1: 4.007292e-06, x2: 0.2835236
    let wald_p = fit.pvalue_wald()?;
    let wald_p_nostd = fit_nostd.pvalue_wald()?;
    assert_abs_diff_eq!(wald_p, p_r, epsilon = 1e-6);
    assert_abs_diff_eq!(wald_p_nostd, p_r, epsilon = 1e-6);

    // R: anova(glm(y~x1+x2-1), m, test="F") and drop1(m, test="F") Pr(>F)
    // intercept: 0.1203156, x1: 4.007e-06, x2: 0.2835
    let exact_p = fit.pvalue_exact()?;
    let exact_p_nostd = fit_nostd.pvalue_exact()?;
    assert_abs_diff_eq!(exact_p, p_r, epsilon = 1e-6);
    assert_abs_diff_eq!(exact_p_nostd, p_r, epsilon = 1e-6);

    Ok(())
}

/// Logistic model: check p-values against R (uses normal distribution, not t).
///
/// R reference:
/// ```r
/// y <- c(1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1)
/// x <- c(0.5, -0.3, 0.2, 0.8, 0.1, 0.6, -0.1, -0.4, 0.3, 0.4, 0.7, -0.2)
/// m <- glm(y ~ x, family=binomial())
/// ```
#[test]
fn logistic_pvalues() -> Result<()> {
    let y = array![
        true, false, true, true, false, true, false, false, true, false, true, true
    ];
    let x = array![
        0.5_f64, -0.3, 0.2, 0.8, 0.1, 0.6, -0.1, -0.4, 0.3, 0.4, 0.7, -0.2
    ]
    .insert_axis(Axis(1));
    let model = ModelBuilder::<Logistic>::data(&y, &x).build()?;
    let fit = model.fit()?;

    // R: pchisq(5.026923, df=1, lower.tail=FALSE) = 0.0249562
    let lr_p = fit.pvalue_lr_test();
    assert_abs_diff_eq!(lr_p, 0.0249562, epsilon = 1e-4);

    // R: Pr(>|z|) for intercept: 0.6150234, x: 0.07092299
    let wald_p = fit.pvalue_wald()?;
    assert_abs_diff_eq!(wald_p[0], 0.6150234, epsilon = 1e-3);
    assert_abs_diff_eq!(wald_p[1], 0.07092299, epsilon = 1e-3);

    // R: anova(glm(y~x-1,family=binomial()), m, test="Chisq") and drop1 Pr(>Chi)
    // intercept: 0.6042003, x: 0.02496
    let exact_p = fit.pvalue_exact()?;
    assert_abs_diff_eq!(exact_p[0], 0.6042003, epsilon = 1e-4);
    assert_abs_diff_eq!(exact_p[1], 0.02496, epsilon = 1e-3);

    Ok(())
}

/// Poisson model: check p-values against R (uses normal distribution like logistic).
///
/// R reference:
/// ```r
/// y <- c(1, 3, 2, 5, 4, 7, 6, 2)
/// x <- c(0.1, 0.4, 0.3, 0.8, 0.6, 1.0, 0.9, 0.2)
/// m <- glm(y ~ x, family=poisson())
/// ```
#[test]
fn poisson_pvalues() -> Result<()> {
    let y = array![1_usize, 3, 2, 5, 4, 7, 6, 2];
    let x = array![0.1_f64, 0.4, 0.3, 0.8, 0.6, 1.0, 0.9, 0.2].insert_axis(Axis(1));
    let model = ModelBuilder::<Poisson>::data(&y, &x).build()?;
    let fit = model.fit()?;

    // R: pchisq(8.403798, df=1, lower.tail=FALSE) = 0.003744379
    let lr_p = fit.pvalue_lr_test();
    assert_abs_diff_eq!(lr_p, 0.003744379, epsilon = 1e-5);

    // R: Pr(>|z|) for intercept: 0.6136655, x: 0.00560404
    let wald_p = fit.pvalue_wald()?;
    assert_abs_diff_eq!(wald_p[0], 0.6136655, epsilon = 1e-4);
    assert_abs_diff_eq!(wald_p[1], 0.00560404, epsilon = 1e-4);

    // R: anova(glm(y~xp-1,family=poisson()), m, test="Chisq") and drop1 Pr(>Chi)
    // intercept: 0.6222284, x: 0.003744
    let exact_p = fit.pvalue_exact()?;
    assert_abs_diff_eq!(exact_p[0], 0.6222284, epsilon = 1e-4);
    assert_abs_diff_eq!(exact_p[1], 0.003744, epsilon = 1e-3);

    Ok(())
}

/// Verify that pvalue_exact and pvalue_wald agree roughly for well-behaved linear models.
#[test]
fn wald_exact_agreement() -> Result<()> {
    let y = array![0.3_f64, 1.5, 0.8, 2.1, 1.7, 3.2, 2.5, 0.9];
    let x = array![
        [0.1_f64, 0.4],
        [0.5, 0.1],
        [0.2, 0.3],
        [0.8, 0.7],
        [0.6, 0.2],
        [1.1, 0.5],
        [0.9, 0.8],
        [0.3, 0.6]
    ];
    let model = ModelBuilder::<Linear>::data(&y, &x).build()?;
    let fit = model.fit()?;

    let wald_p = fit.pvalue_wald()?;
    let exact_p = fit.pvalue_exact()?;

    // For non-intercept parameters, Wald and exact should be in reasonable agreement
    // (exact match for linear models with sufficient data)
    assert_abs_diff_eq!(wald_p, exact_p, epsilon = 1e-6);

    Ok(())
}