limma-rust 0.1.0

//! Genuine association of gene expression profiles. Port of limma's `genas`
//! (`genas.R`).
//!
//! Given a linear-model fit with at least two coefficients, `genas` separates
//! the *technical* correlation between two coefficient estimates (induced by a
//! shared design, read straight off `cov.coefficients`) from their *biological*
//! correlation across genes. The biological covariance `V0` is estimated by
//! maximizing a bivariate moderated-t likelihood: a null fit with `V0` diagonal
//! and a full fit with an off-diagonal term, both optimized by Nelder–Mead
//! ([`crate::optim::nelder_mead`], a faithful port of R's `optim`). A
//! likelihood-ratio statistic and its `chi^2_1` p-value test for non-zero
//! biological correlation.
//!
//! All of limma's gene-subset selectors are supported ([`GenasSubset`]): the
//! default `all`, the differential-expression filters `Fpval`/`p.union`/`p.int`,
//! and the fold-change filters `logFC`/`predFC`. (R's fold-change branches read
//! as if they soft-threshold the coefficients, but that write is inert — see
//! [`which_genes`] — so all selectors refit on the raw coefficients.) As in R,
//! the fit must come from an
//! unweighted, no-missing least-squares fit so that a single common
//! `cov.coefficients` applies to all genes, and it must already have been run
//! through `eBayes`.

use anyhow::{anyhow, bail, Result};
use ndarray::{Array1, Array2};
use statrs::distribution::{ContinuousCDF, Normal};

use crate::classifytestsf::classify_tests_fstat;
use crate::ebayes::ebayes;
use crate::fit::MArrayLM;
use crate::fitgamma::fit_gamma_intercept;
use crate::linalg::cov2cor;
use crate::optim::nelder_mead;
use crate::predfcm::pred_fcm;
use crate::proptruenull::{prop_true_null, PropTrueNullMethod};
use crate::special::{chi2_sf, f_sf};

/// Gene subset used by [`genas`] (`genas`'s `subset` argument).
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum GenasSubset {
    /// `"all"`: every gene contributes (the default).
    All,
    /// `"Fpval"`: genes ranked into the estimated DE proportion by `F.p.value`.
    Fpval,
    /// `"p.union"`: union of the per-coefficient DE-ranked genes.
    PUnion,
    /// `"p.int"`: intersection of the per-coefficient DE-ranked genes.
    PInt,
    /// `"logFC"`: genes whose `|coef|` exceeds its 90th percentile (either
    /// coefficient).
    LogFC,
    /// `"predFC"`: as `logFC` but on predictive fold changes ([`pred_fcm`]).
    PredFC,
}

/// Result of [`genas`], mirroring R's `genas` return list.
#[derive(Debug, Clone)]
pub struct Genas {
    /// Correlation of the two coefficient *estimates* from the design alone.
    pub technical_correlation: f64,
    /// Estimated biological covariance matrix `V0` (the `L D Lᵀ` fit).
    pub covariance_matrix: [[f64; 2]; 2],
    /// Biological correlation implied by `covariance_matrix`.
    pub biological_correlation: f64,
    /// Likelihood-ratio deviance `|2(ℓ_null − ℓ_full)|`.
    pub deviance: f64,
    /// `chi^2_1` upper-tail p-value of `deviance`.
    pub p_value: f64,
    /// Number of genes used.
    pub n: usize,
}

/// Compute genuine (biological) association between two coefficients of `fit`.
///
/// `coef` gives the two 0-based coefficient columns to correlate (R's default
/// `coef = c(1, 2)` is `(0, 1)` here). `subset` selects which genes feed the
/// estimate ([`GenasSubset::All`] reproduces the original behaviour); the
/// non-`all` selectors refit `eBayes` on the chosen genes exactly as limma
/// does. The fit must already have been run through `eBayes` (so `s2_post`,
/// `df_total`, `p_value` and `f_p_value` are present).
pub fn genas(fit: &MArrayLM, coef: (usize, usize), subset: GenasSubset) -> Result<Genas> {
    let ngenes = fit.coefficients.nrows();
    if ngenes < 1 {
        return Ok(null_genas());
    }
    let (c1, c2) = coef;
    let ncoef = fit.coefficients.ncols();
    if c1 >= ncoef || c2 >= ncoef {
        bail!("coef out of range for a {ncoef}-coefficient fit");
    }
    let s2post = fit
        .s2_post
        .as_ref()
        .ok_or_else(|| anyhow!("genas requires an eBayes fit (s2_post missing)"))?;
    let dft = fit
        .df_total
        .as_ref()
        .ok_or_else(|| anyhow!("genas requires an eBayes fit (df_total missing)"))?;

    // The Nelder–Mead start is fixed from the full two-coefficient fit (all
    // genes), before any subset refit, exactly as limma computes it once at the
    // top of `genas` and reuses it for the subset `optim` calls.
    let b0_full = fit.coefficients.column(c1).to_vec();
    let b1_full = fit.coefficients.column(c2).to_vec();
    let v = cov_block(fit, c1, c2);
    let s2post_slice = s2post.as_slice().expect("contiguous s2_post");
    let x_start = gamma_start(&b0_full, &b1_full, v, s2post_slice);

    // Default path: correlate the two coefficient columns over all genes.
    if subset == GenasSubset::All {
        return Ok(genas_core(
            &b0_full,
            &b1_full,
            v,
            s2post_slice,
            dft.as_slice().expect("contiguous df_total"),
            x_start,
        ));
    }

    // Subset path: pick differentially expressed genes, refit eBayes on them,
    // then correlate. trend/robust are inherited from the incoming fit's priors.
    let mask = which_genes(fit, subset, c1, c2)?;
    let selected: Vec<usize> = (0..ngenes).filter(|&g| mask[g]).collect();
    if selected.is_empty() {
        return Ok(null_genas());
    }
    let trend = is_varying(fit.s2_prior.as_ref());
    let robust = is_varying(fit.df_prior.as_ref());
    let mut sub = build_subfit(fit, &selected, c1, c2);
    ebayes(&mut sub, 0.01, (0.1, 4.0), trend, robust)?;
    let s2post2 = sub.s2_post.as_ref().expect("eBayes fills s2_post");
    let dft2 = sub.df_total.as_ref().expect("eBayes fills df_total");
    let b0 = sub.coefficients.column(0).to_vec();
    let b1 = sub.coefficients.column(1).to_vec();
    Ok(genas_core(
        &b0,
        &b1,
        v,
        s2post2.as_slice().expect("contiguous s2_post"),
        dft2.as_slice().expect("contiguous df_total"),
        x_start,
    ))
}

/// Nelder–Mead starting point: per-coefficient gamma-intercept variances mapped
/// through `log(diag(chol(diag(x1, x2)))) = (½ln x1, ½ln x2)`. limma computes
/// this once from the *full* two-coefficient fit and reuses it even when the
/// subset selectors refit `eBayes` on fewer genes, so the start must be passed
/// into [`genas_core`] rather than recomputed from the (possibly subsetted) data.
fn gamma_start(b0: &[f64], b1: &[f64], v: [[f64; 2]; 2], s2post: &[f64]) -> [f64; 2] {
    let ngenes = b0.len();
    let ya: Vec<f64> = (0..ngenes).map(|g| b0[g] * b0[g] / s2post[g]).collect();
    let yb: Vec<f64> = (0..ngenes).map(|g| b1[g] * b1[g] / s2post[g]).collect();
    let x1 = fit_gamma_intercept(&ya, &[v[0][0]], 1000);
    let x2 = fit_gamma_intercept(&yb, &[v[1][1]], 1000);
    if x1 > 0.0 && x2 > 0.0 {
        [0.5 * x1.ln(), 0.5 * x2.ln()]
    } else {
        [0.0, 0.0]
    }
}

/// The two-coefficient biological-correlation estimate shared by every subset:
/// null then full moderated-t optimisation from the shared `x_start`, deviance.
fn genas_core(
    b0: &[f64],
    b1: &[f64],
    v: [[f64; 2]; 2],
    s2post: &[f64],
    dft: &[f64],
    x_start: [f64; 2],
) -> Genas {
    let ngenes = b0.len();
    let x_start = x_start.to_vec();
    let m = 2.0;

    // Null fit (V0 diagonal) then full fit (with off-diagonal term).
    let q2 = nelder_mead(&x_start, |x| loglik_null(x, v, b0, b1, s2post, dft, m));
    let q1_start = [q2.par[0], q2.par[1], 0.0];
    let q1 = nelder_mead(&q1_start, |x| loglik_full(x, v, b0, b1, s2post, dft, m));

    // Biological covariance V0 = L D Lᵀ at the full optimum.
    let d1 = q1.par[0].exp();
    let d2 = q1.par[1].exp();
    let bb = q1.par[2];
    let v0 = [[d1, bb * d1], [bb * d1, bb * bb * d1 + d2]];
    let rhobiol = v0[1][0] / (v0[0][0] * v0[1][1]).sqrt();
    let rhotech = v[1][0] / (v[0][0] * v[1][1]).sqrt();

    let deviance = (2.0 * (q2.value - q1.value)).abs();
    // pchisq(deviance, df=1, lower.tail=FALSE) = 2*Phi(-sqrt(deviance)).
    let normal = Normal::new(0.0, 1.0).expect("standard normal");
    let p_value = 2.0 * normal.cdf(-deviance.sqrt());

    Genas {
        technical_correlation: rhotech,
        covariance_matrix: v0,
        biological_correlation: rhobiol,
        deviance,
        p_value,
        n: ngenes,
    }
}

/// The 2x2 technical covariance block for coefficient columns `c1`, `c2`.
fn cov_block(fit: &MArrayLM, c1: usize, c2: usize) -> [[f64; 2]; 2] {
    [
        [
            fit.cov_coefficients[[c1, c1]],
            fit.cov_coefficients[[c1, c2]],
        ],
        [
            fit.cov_coefficients[[c2, c1]],
            fit.cov_coefficients[[c2, c2]],
        ],
    ]
}

/// The F-test p-value recomputed over *only* coefficients `c1`, `c2`. limma's
/// `genas` calls `.whichGenes` on `fit[,coef]`, and `[.MArrayLM` regenerates
/// `F`/`F.p.value` after a column subset via `classifyTestsF(fstat.only=TRUE)`.
/// The `"Fpval"` selector therefore ranks on this two-coefficient F-test, not on
/// the original full-model `F.p.value`.
fn fpval_two_coef(fit: &MArrayLM, c1: usize, c2: usize) -> Result<Vec<f64>> {
    let t = fit
        .t
        .as_ref()
        .ok_or_else(|| anyhow!("genas subset=\"Fpval\" needs moderated t (run eBayes)"))?;
    let dfp = fit
        .df_prior
        .as_ref()
        .ok_or_else(|| anyhow!("genas subset=\"Fpval\" needs df.prior (run eBayes)"))?;
    let ng = t.nrows();
    let mut t2 = Array2::<f64>::zeros((ng, 2));
    for g in 0..ng {
        t2[[g, 0]] = t[[g, c1]];
        t2[[g, 1]] = t[[g, c2]];
    }
    // cov2cor of the 2x2 technical block, lifting any exactly-zero variance to 1
    // exactly as `classifyTestsF` does before whitening.
    let blk = cov_block(fit, c1, c2);
    let mut cov = Array2::<f64>::zeros((2, 2));
    for i in 0..2 {
        for j in 0..2 {
            cov[[i, j]] = blk[i][j];
        }
        if cov[[i, i]] == 0.0 {
            cov[[i, i]] = 1.0;
        }
    }
    let cor = cov2cor(&cov);
    let (fstat, r) = classify_tests_fstat(&t2, Some(&cor));
    let df1 = r as f64;
    let mut fp = vec![0.0; ng];
    for g in 0..ng {
        let df2 = dfp[g] + fit.df_residual[g];
        fp[g] = if df2.is_finite() {
            f_sf(fstat[g], df1, df2)
        } else {
            chi2_sf(df1 * fstat[g], df1)
        };
    }
    Ok(fp)
}

/// Port of limma's `.whichGenes`: the boolean gene-inclusion mask for a subset
/// selector.
///
/// limma's `logFC`/`predFC` branches *appear* to soft-threshold the coefficients
/// (`fit$coeff[,j] <- sign(.)*(abs(.)-q)`), but that assignment — via R's partial
/// name matching — writes a throwaway `coeff` element while the downstream
/// likelihood reads the untouched `coefficients`. The thresholding is therefore
/// inert; only the gene *selection* differs between selectors, and the refit
/// always uses the raw coefficients. (This is also why `logFC` and `predFC`
/// yield identical estimates: they differ only in the discarded write.) We
/// replicate that exactly by returning just the mask.
fn which_genes(fit: &MArrayLM, subset: GenasSubset, c1: usize, c2: usize) -> Result<Vec<bool>> {
    let ng = fit.coefficients.nrows();
    let mask = match subset {
        GenasSubset::All => vec![true; ng],
        GenasSubset::Fpval => {
            let fp = fpval_two_coef(fit, c1, c2)?;
            let p = 1.0 - prop_true_null(&fp, PropTrueNullMethod::Lfdr, 20);
            let r = rank_average(&fp);
            let cut = p * ng as f64;
            r.iter().map(|&ri| ri <= cut).collect()
        }
        GenasSubset::PUnion | GenasSubset::PInt => {
            let pv = fit
                .p_value
                .as_ref()
                .ok_or_else(|| anyhow!("genas subset needs p.value (run eBayes)"))?;
            let p1col = pv.column(c1).to_vec();
            let p2col = pv.column(c2).to_vec();
            let p1 = 1.0 - prop_true_null(&p1col, PropTrueNullMethod::Lfdr, 20);
            let p2 = 1.0 - prop_true_null(&p2col, PropTrueNullMethod::Lfdr, 20);
            let cut1 = p1 * ng as f64;
            let cut2 = p2 * ng as f64;
            if subset == GenasSubset::PUnion && p1 == 0.0 && p2 == 0.0 {
                vec![false; ng]
            } else {
                let r1 = rank_average(&p1col);
                let r2 = rank_average(&p2col);
                (0..ng)
                    .map(|g| {
                        let a = r1[g] <= cut1;
                        let b = r2[g] <= cut2;
                        if subset == GenasSubset::PInt {
                            a && b
                        } else {
                            a || b
                        }
                    })
                    .collect()
            }
        }
        GenasSubset::LogFC => {
            let a1: Vec<f64> = fit
                .coefficients
                .column(c1)
                .iter()
                .map(|v| v.abs())
                .collect();
            let a2: Vec<f64> = fit
                .coefficients
                .column(c2)
                .iter()
                .map(|v| v.abs())
                .collect();
            let q1 = quantile_type7(&a1, 0.9);
            let q2 = quantile_type7(&a2, 0.9);
            (0..ng).map(|g| a1[g] > q1 || a2[g] > q2).collect()
        }
        GenasSubset::PredFC => {
            let pfc1 = pred_fcm(fit, c1, true, true, PropTrueNullMethod::Lfdr)?;
            let pfc2 = pred_fcm(fit, c2, true, true, PropTrueNullMethod::Lfdr)?;
            let a1: Vec<f64> = pfc1.iter().map(|v| v.abs()).collect();
            let a2: Vec<f64> = pfc2.iter().map(|v| v.abs()).collect();
            let q1 = quantile_type7(&a1, 0.9);
            let q2 = quantile_type7(&a2, 0.9);
            (0..ng).map(|g| a1[g] > q1 || a2[g] > q2).collect()
        }
    };
    Ok(mask)
}

/// Assemble a two-coefficient fit over the selected genes (`fit[genes, coef]`),
/// reading the raw coefficient columns `c1`/`c2`. eBayes output fields are
/// cleared so the caller can refit.
fn build_subfit(fit: &MArrayLM, sel: &[usize], c1: usize, c2: usize) -> MArrayLM {
    let m = sel.len();
    let mut coefficients = Array2::<f64>::zeros((m, 2));
    let mut stdev_unscaled = Array2::<f64>::zeros((m, 2));
    let mut sigma = Array1::<f64>::zeros(m);
    let mut df_residual = Array1::<f64>::zeros(m);
    let mut amean = Array1::<f64>::zeros(m);
    let mut gene_names = Vec::with_capacity(m);
    for (i, &g) in sel.iter().enumerate() {
        coefficients[[i, 0]] = fit.coefficients[[g, c1]];
        coefficients[[i, 1]] = fit.coefficients[[g, c2]];
        stdev_unscaled[[i, 0]] = fit.stdev_unscaled[[g, c1]];
        stdev_unscaled[[i, 1]] = fit.stdev_unscaled[[g, c2]];
        sigma[i] = fit.sigma[g];
        df_residual[i] = fit.df_residual[g];
        amean[i] = if g < fit.amean.len() {
            fit.amean[g]
        } else {
            0.0
        };
        gene_names.push(fit.gene_names.get(g).cloned().unwrap_or_default());
    }
    let b = cov_block(fit, c1, c2);
    let mut cov_coefficients = Array2::<f64>::zeros((2, 2));
    cov_coefficients[[0, 0]] = b[0][0];
    cov_coefficients[[0, 1]] = b[0][1];
    cov_coefficients[[1, 0]] = b[1][0];
    cov_coefficients[[1, 1]] = b[1][1];
    let coef_names = vec![
        fit.coef_names.get(c1).cloned().unwrap_or_default(),
        fit.coef_names.get(c2).cloned().unwrap_or_default(),
    ];

    MArrayLM {
        coefficients,
        stdev_unscaled,
        sigma,
        df_residual,
        cov_coefficients,
        gene_names,
        coef_names,
        amean,
        design: None,
        contrasts: None,
        df_prior: None,
        s2_prior: None,
        var_prior: None,
        proportion: None,
        s2_post: None,
        t: None,
        df_total: None,
        p_value: None,
        lods: None,
        f_stat: None,
        f_p_value: None,
    }
}

/// The NA-filled result limma returns when no genes survive the subset filter.
fn null_genas() -> Genas {
    Genas {
        technical_correlation: f64::NAN,
        covariance_matrix: [[f64::NAN; 2]; 2],
        biological_correlation: f64::NAN,
        deviance: 0.0,
        p_value: 1.0,
        n: 0,
    }
}

/// `TRUE` when a prior vector actually varies by gene (limma's trend/robust flag
/// `length(prior) > 1`; our `MArrayLM` always stores a full-length vector).
fn is_varying(v: Option<&Array1<f64>>) -> bool {
    match v {
        Some(a) => a.len() > 1 && a.iter().any(|&x| x != a[0]),
        None => false,
    }
}

/// `quantile(x, prob)` with R's default type-7 (linear interpolation).
fn quantile_type7(x: &[f64], prob: f64) -> f64 {
    let n = x.len();
    if n == 0 {
        return f64::NAN;
    }
    let mut s = x.to_vec();
    s.sort_by(|a, b| a.partial_cmp(b).unwrap());
    if n == 1 {
        return s[0];
    }
    let h = (n as f64 - 1.0) * prob;
    let lo = h.floor() as usize;
    let frac = h - lo as f64;
    if lo + 1 < n {
        s[lo] + frac * (s[lo + 1] - s[lo])
    } else {
        s[lo]
    }
}

/// Average ranks (R's `rank(x, ties.method="average")`), 1-based, ascending.
fn rank_average(x: &[f64]) -> Vec<f64> {
    let n = x.len();
    let mut idx: Vec<usize> = (0..n).collect();
    idx.sort_by(|&a, &b| x[a].partial_cmp(&x[b]).unwrap());
    let mut ranks = vec![0.0; n];
    let mut i = 0;
    while i < n {
        let mut j = i;
        while j + 1 < n && x[idx[j + 1]] == x[idx[i]] {
            j += 1;
        }
        let avg = ((i + 1 + j + 1) as f64) / 2.0;
        for &k in &idx[i..=j] {
            ranks[k] = avg;
        }
        i = j + 1;
    }
    ranks
}

/// Shared bivariate moderated-t negative log-likelihood given a biological
/// covariance `v0` and technical covariance `v`. Returns `Σ_g (Second + Third)`
/// exactly as limma's `.multTLogLik*` helpers.
fn loglik_core(
    v0: [[f64; 2]; 2],
    v: [[f64; 2]; 2],
    b0: &[f64],
    b1: &[f64],
    s2post: &[f64],
    dft: &[f64],
    m: f64,
) -> f64 {
    // Upper Cholesky R of M = V0 + V (a 2x2 SPD matrix).
    let m00 = v0[0][0] + v[0][0];
    let m01 = v0[0][1] + v[0][1];
    let m11 = v0[1][1] + v[1][1];
    let r11 = m00.sqrt();
    let r12 = m01 / r11;
    let r22 = (m11 - r12 * r12).sqrt();
    let second = r11.ln() + r22.ln();

    let mut total = 0.0;
    for g in 0..b0.len() {
        // Solve Rᵀ w = (b0,b1) by forward substitution.
        let w0 = b0[g] / r11;
        let w1 = (b1[g] - r12 * w0) / r22;
        let q = w0 * w0 + w1 * w1;
        let third = 0.5 * (m + dft[g]) * (1.0 + q / (s2post[g] * dft[g])).ln();
        total += second + third;
    }
    total
}

/// Null model: `V0 = diag(exp(2a1), exp(2a2))` (limma's `.multTLogLikNull`).
fn loglik_null(
    x: &[f64],
    v: [[f64; 2]; 2],
    b0: &[f64],
    b1: &[f64],
    s2post: &[f64],
    dft: &[f64],
    m: f64,
) -> f64 {
    let v0 = [[(2.0 * x[0]).exp(), 0.0], [0.0, (2.0 * x[1]).exp()]];
    loglik_core(v0, v, b0, b1, s2post, dft, m)
}

/// Full model: `V0 = L D Lᵀ`, `L = [[1,0],[b,1]]`, `D = diag(exp(a1), exp(a2))`
/// (limma's `.multTLogLik`).
fn loglik_full(
    x: &[f64],
    v: [[f64; 2]; 2],
    b0: &[f64],
    b1: &[f64],
    s2post: &[f64],
    dft: &[f64],
    m: f64,
) -> f64 {
    let d1 = x[0].exp();
    let d2 = x[1].exp();
    let bb = x[2];
    let v0 = [[d1, bb * d1], [bb * d1, bb * bb * d1 + d2]];
    loglik_core(v0, v, b0, b1, s2post, dft, m)
}

#[cfg(test)]
#[allow(clippy::excessive_precision, clippy::approx_constant)]
mod tests {
    use super::*;
    use crate::fit::lmfit;
    use ndarray::Array2;

    fn rclose(a: f64, b: f64) -> bool {
        (a - b).abs() <= 1e-6 * (1.0 + b.abs())
    }

    /// Rebuild the bit-identical fixture from scratch/genas_ref.R: a 60x9
    /// expression matrix from a purely rational, 0-indexed formula, plus a
    /// common-intercept + two-treatment design.
    fn fixture() -> (Array2<f64>, Array2<f64>) {
        let ngenes = 60usize;
        let narrays = 9usize;
        let g_a = [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0];
        let g_b = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
        let mut y = Array2::<f64>::zeros((ngenes, narrays));
        for g in 0..ngenes {
            let gi = g as i64;
            let base = ((gi % 7) - 3) as f64;
            let extra = (((gi * 5) % 11) - 5) as f64;
            let own_b = (((gi * 3) % 13) - 6) as f64;
            let eff_a = base * 0.4 + extra * 0.1;
            let eff_b = base * 0.32 + own_b * 0.1;
            let mu = 8.0 + (gi % 5) as f64 * 0.25;
            for k in 0..narrays {
                let ki = k as i64;
                let noise = (((gi * 13 + ki * 17) % 19) - 9) as f64 * 0.05;
                y[[g, k]] = mu + eff_a * g_a[k] + eff_b * g_b[k] + noise;
            }
        }
        let mut design = Array2::<f64>::zeros((narrays, 3));
        for k in 0..narrays {
            design[[k, 0]] = 1.0;
            design[[k, 1]] = g_a[k];
            design[[k, 2]] = g_b[k];
        }
        (y, design)
    }

    fn ebayes_fit() -> MArrayLM {
        let (y, design) = fixture();
        let names: Vec<String> = (1..=60).map(|i| format!("g{i}")).collect();
        let coefs = vec!["Intercept".to_string(), "gA".to_string(), "gB".to_string()];
        let mut fit = lmfit(&y, &design, names, coefs).unwrap();
        // eBayes defaults: proportion=0.01, stdev.coef.lim=c(0.1,4).
        ebayes(&mut fit, 0.01, (0.1, 4.0), false, false).unwrap();
        fit
    }

    #[test]
    fn genas_matches_r() {
        let fit = ebayes_fit();
        // R: genas(fit, coef=c(2,3), subset="all") -> 0-based (1,2).
        let g = genas(&fit, (1, 2), GenasSubset::All).unwrap();

        assert_eq!(g.n, 60);
        assert!(
            rclose(g.technical_correlation, 0.49999999999999994),
            "tech {}",
            g.technical_correlation
        );
        assert!(
            rclose(g.biological_correlation, 0.72110133367492135),
            "biol {}",
            g.biological_correlation
        );
        assert!(
            rclose(g.covariance_matrix[0][0], 20.311293729600376),
            "v00 {}",
            g.covariance_matrix[0][0]
        );
        assert!(
            rclose(g.covariance_matrix[0][1], 12.393027523641022),
            "v01 {}",
            g.covariance_matrix[0][1]
        );
        assert!(
            rclose(g.covariance_matrix[1][0], 12.393027523641022),
            "v10 {}",
            g.covariance_matrix[1][0]
        );
        assert!(
            rclose(g.covariance_matrix[1][1], 14.542016870927945),
            "v11 {}",
            g.covariance_matrix[1][1]
        );
        assert!(rclose(g.deviance, 35.500819277411154), "dev {}", g.deviance);
        assert!(rclose(g.p_value, 2.5494331700086711e-09), "p {}", g.p_value);
    }

    #[test]
    fn genas_subset_matches_r() {
        // Reference values from scratch/genas_subset_ref.R (same fixture).
        let fit = ebayes_fit();
        let check = |subset: GenasSubset,

                     n: usize,
                     biol: f64,
                     cov: [f64; 4],
                     dev: f64,
                     pval: f64,
                     tag: &str| {
            let g = genas(&fit, (1, 2), subset).unwrap();
            assert_eq!(g.n, n, "{tag} n");
            assert!(rclose(g.technical_correlation, 0.5), "{tag} tech");
            assert!(rclose(g.biological_correlation, biol), "{tag} biol");
            assert!(rclose(g.covariance_matrix[0][0], cov[0]), "{tag} v00");
            assert!(rclose(g.covariance_matrix[0][1], cov[1]), "{tag} v01");
            assert!(rclose(g.covariance_matrix[1][0], cov[2]), "{tag} v10");
            assert!(rclose(g.covariance_matrix[1][1], cov[3]), "{tag} v11");
            assert!(rclose(g.deviance, dev), "{tag} dev");
            assert!(rclose(g.p_value, pval), "{tag} pval");
        };

        check(
            GenasSubset::Fpval,
            55,
            0.71848111283183225,
            [
                23.231392431559968,
                14.120763825027172,
                14.120763825027172,
                16.626867102316595,
            ],
            32.698047599684912,
            1.0764529339097856e-08,
            "Fpval",
        );
        check(
            GenasSubset::PUnion,
            55,
            0.72141688090435385,
            [
                23.20355805632785,
                14.181934574327077,
                14.181934574327077,
                16.654966711355684,
            ],
            33.078191943303352,
            8.8526093918565966e-09,
            "p.union",
        );
        check(
            GenasSubset::PInt,
            38,
            0.83237776620740411,
            [
                31.358733693853345,
                22.383537180965163,
                22.383537180965163,
                23.059929516708259,
            ],
            37.494823730220958,
            9.1655906123216993e-10,
            "p.int",
        );
        check(
            GenasSubset::LogFC,
            10,
            0.9036479641020686,
            [
                74.055616589393026,
                62.941872383594607,
                62.941872383594607,
                65.512287646621971,
            ],
            15.829494073613475,
            6.9313604519058013e-05,
            "logFC",
        );
        check(
            GenasSubset::PredFC,
            10,
            0.9036479641020686,
            [
                74.055616589393026,
                62.941872383594607,
                62.941872383594607,
                65.512287646621971,
            ],
            15.829494073613475,
            6.9313604519058013e-05,
            "predFC",
        );
    }
}