limma-rust 0.1.0

//! `gls.series` (lmfit.R): gene-wise generalized least squares allowing for a
//! known correlation between duplicate spots (`ndups`) or between samples that
//! share a `block`. This is the fitting engine `lmFit` dispatches to whenever
//! `ndups > 1` or `block` is supplied together with a `correlation` (typically
//! the `consensus.correlation` from [`crate::duplicate_correlation`]).
//!
//! Two cormatrix constructions are reproduced:
//!   * **duplicates** — `cormatrix = diag(correlation, narrays) ⊗ J(ndups)`
//!     with unit diagonal, after `unwrapdups` folds the `ndups` spots of each
//!     gene into extra columns and the design is replicated row-wise;
//!   * **blocks** — `cormatrix[i,j] = correlation` when `block[i] == block[j]`,
//!     unit diagonal.
//!
//! And both of limma's code paths:
//!   * the fast multi-response `lm.fit` when every value is finite and no probe
//!     weights are supplied (one shared `chol(V)` and design QR);
//!   * the slow per-gene iteration when probe weights or missing values are
//!     present (each gene re-derives `V` over its observed arrays).
//!
//! `cov.coefficients` is always the unweighted `(Xᵀ V⁻¹ X)⁻¹` of the full
//! design (limma computes it from `chol(cormatrix)` even on the weighted path).
//!
//! Not reproduced (rare, documented): the array-weight fast path that scales `V`
//! by `1/sqrt(arrayweights)` — array weights are uncommon with `gls.series`, and
//! a full probe-weight matrix flows correctly through the slow path instead; and
//! per-gene rank deficiency (an observed-array subset smaller than the number of
//! coefficients), which limma resolves by pivoted column dropping.

use anyhow::{bail, Result};
use ndarray::{Array1, Array2};

use crate::dups::{kron_rows, unwrapdups};
use crate::fit::{new_fit, MArrayLM};
use crate::linalg::{cholesky_upper, qr_econ, solve_upper, solve_upper_transpose, xtx_inv_from_r};

/// NaN-skipping mean of an array view; `NaN` when nothing is finite.
fn nanmean<'a, I: IntoIterator<Item = &'a f64>>(it: I) -> f64 {
    let mut sum = 0.0;
    let mut cnt = 0usize;
    for &v in it {
        if v.is_finite() {
            sum += v;
            cnt += 1;
        }
    }
    if cnt > 0 {
        sum / cnt as f64
    } else {
        f64::NAN
    }
}

/// `Amean` exactly as `lmFit` would populate it: row means of the original
/// expression matrix, reduced through `unwrapdups` when `ndups > 1`.
fn gls_amean(exprs: &Array2<f64>, ndups: usize, spacing: usize) -> Array1<f64> {
    let nspots = exprs.nrows();
    let spotmean: Vec<f64> = (0..nspots).map(|i| nanmean(exprs.row(i))).collect();
    if ndups <= 1 {
        return Array1::from(spotmean);
    }
    let col = Array2::from_shape_vec((nspots, 1), spotmean).expect("column shape");
    let uw = unwrapdups(&col, ndups, spacing.max(1));
    (0..uw.nrows()).map(|g| nanmean(uw.row(g))).collect()
}

/// Apply `backsolve(u, b, transpose = TRUE)` column by column: solve `Uᵀ X = B`.
fn backsolve_t_cols(u: &Array2<f64>, b: &Array2<f64>) -> Array2<f64> {
    let (n, k) = b.dim();
    let mut out = Array2::<f64>::zeros((n, k));
    for j in 0..k {
        let sol = solve_upper_transpose(u, &b.column(j).to_owned());
        out.column_mut(j).assign(&sol);
    }
    out
}

/// Fit gene-wise linear models by generalized least squares. Port of limma's
/// `gls.series`. `correlation` must be supplied (limma would otherwise call
/// `duplicateCorrelation`); pass [`crate::duplicate_correlation`]'s consensus.
///
/// Exactly one of `ndups > 1` (with `block == None`) or `block == Some(..)` is
/// the intended use; when `block` is given, `ndups`/`spacing` are forced to 1.
/// `weights`, when supplied, is a full probe-weight matrix matching `exprs`.
#[allow(clippy::too_many_arguments)]
pub fn gls_series(
    exprs: &Array2<f64>,
    design: &Array2<f64>,
    ndups: usize,
    spacing: usize,
    block: Option<&[i64]>,
    correlation: f64,
    weights: Option<&Array2<f64>>,
    gene_names: Vec<String>,
    coef_names: Vec<String>,
) -> Result<MArrayLM> {
    let narrays0 = exprs.ncols();
    if design.nrows() != narrays0 {
        bail!(
            "design rows ({}) must match number of arrays ({})",
            design.nrows(),
            narrays0
        );
    }
    if correlation.abs() >= 1.0 {
        bail!("correlation is 1 or -1, so the model is degenerate");
    }
    let nbeta = design.ncols();

    // Amean reflects the original data, before weight-driven NA masking below.
    let amean = gls_amean(exprs, ndups, spacing);

    // Clean weights: NA -> 0, then mask M and weights where weight < 1e-15.
    let mut m = exprs.clone();
    let mut wmat: Option<Array2<f64>> = None;
    if let Some(w) = weights {
        if w.dim() != exprs.dim() {
            bail!("weights dimensions must match exprs");
        }
        let mut wc = w.clone();
        for i in 0..wc.nrows() {
            for j in 0..wc.ncols() {
                if wc[[i, j]].is_nan() {
                    wc[[i, j]] = 0.0;
                }
                if wc[[i, j]] < 1e-15 {
                    m[[i, j]] = f64::NAN;
                    wc[[i, j]] = f64::NAN;
                }
            }
        }
        wmat = Some(wc);
    }

    // Build cormatrix and, for the duplicates path, unwrap M/weights and expand
    // the design so rows align with the Kronecker correlation blocks.
    let (cormatrix, mwork, wwork, dwork) = match block {
        None => {
            let nd = if ndups < 2 { 1 } else { ndups };
            let corr = if ndups < 2 { 0.0 } else { correlation };
            let sp = spacing.max(1);
            let mu = unwrapdups(&m, nd, sp);
            let wu = wmat.as_ref().map(|w| unwrapdups(w, nd, sp));
            let du = kron_rows(design, nd);
            let n = du.nrows();
            let mut cor = Array2::<f64>::zeros((n, n));
            for i in 0..n {
                for j in 0..n {
                    if i / nd == j / nd {
                        cor[[i, j]] = corr;
                    }
                }
                cor[[i, i]] = 1.0;
            }
            (cor, mu, wu, du)
        }
        Some(b) => {
            if b.len() != narrays0 {
                bail!("length of block does not match number of arrays");
            }
            let n = narrays0;
            let mut cor = Array2::<f64>::zeros((n, n));
            for i in 0..n {
                for j in 0..n {
                    if b[i] == b[j] {
                        cor[[i, j]] = correlation;
                    }
                }
                cor[[i, i]] = 1.0;
            }
            (cor, m, wmat, design.clone())
        }
    };

    let ngenes = mwork.nrows();
    let n = mwork.ncols();

    let all_finite = mwork.iter().all(|v| v.is_finite());
    let no_probe_wts = all_finite && wwork.is_none();

    let mut coefficients = Array2::<f64>::from_elem((ngenes, nbeta), f64::NAN);
    let mut stdev_unscaled = Array2::<f64>::from_elem((ngenes, nbeta), f64::NAN);
    let mut sigma = Array1::<f64>::from_elem(ngenes, f64::NAN);
    let mut df_residual = Array1::<f64>::zeros(ngenes);

    // cov.coefficients = (Xstar' Xstar)^-1 with Xstar = chol(cormatrix)^-T design
    // (limma's unweighted full-design covariance, shared by both paths).
    let cholv_full = cholesky_upper(&cormatrix);
    let xstar_full = backsolve_t_cols(&cholv_full, &dwork);
    let (qf, rf) = qr_econ(&xstar_full);
    let cov_coefficients = xtx_inv_from_r(&rf);

    if no_probe_wts {
        // Fast path: one shared whitening transform, multi-response lm.fit.
        let df = (n - nbeta) as f64;
        let stdev_row: Vec<f64> = (0..nbeta)
            .map(|j| cov_coefficients[[j, j]].sqrt())
            .collect();
        for g in 0..ngenes {
            let zstar = solve_upper_transpose(&cholv_full, &mwork.row(g).to_owned());
            let qty = qf.t().dot(&zstar);
            let beta = solve_upper(&rf, &qty);
            for j in 0..nbeta {
                coefficients[[g, j]] = beta[j];
                stdev_unscaled[[g, j]] = stdev_row[j];
            }
            df_residual[g] = df;
            if df > 0.0 {
                let ssy: f64 = zstar.iter().map(|&v| v * v).sum();
                let ssfit: f64 = qty.iter().map(|&v| v * v).sum();
                sigma[g] = ((ssy - ssfit).max(0.0) / df).sqrt();
            }
        }
    } else {
        // Slow path: re-derive V over each gene's observed arrays.
        for g in 0..ngenes {
            let yfull = mwork.row(g);
            let obs: Vec<usize> = (0..n).filter(|&k| yfull[k].is_finite()).collect();
            let nobs = obs.len();
            if nobs == 0 {
                continue;
            }
            let mut xsub = Array2::<f64>::zeros((nobs, nbeta));
            let mut vsub = Array2::<f64>::zeros((nobs, nobs));
            for (ri, &ki) in obs.iter().enumerate() {
                for j in 0..nbeta {
                    xsub[[ri, j]] = dwork[[ki, j]];
                }
                for (cj, &kj) in obs.iter().enumerate() {
                    vsub[[ri, cj]] = cormatrix[[ki, kj]];
                }
            }
            if let Some(w) = &wwork {
                let wrs: Vec<f64> = obs.iter().map(|&k| 1.0 / w[[g, k]].sqrt()).collect();
                for ri in 0..nobs {
                    for cj in 0..nobs {
                        vsub[[ri, cj]] *= wrs[ri] * wrs[cj];
                    }
                }
            }
            let cholv = cholesky_upper(&vsub);
            let yobs: Array1<f64> = obs.iter().map(|&k| yfull[k]).collect();
            let ystar = solve_upper_transpose(&cholv, &yobs);

            if xsub.iter().all(|&v| v == 0.0) {
                df_residual[g] = nobs as f64;
                let ss: f64 = ystar.iter().map(|&v| v * v).sum();
                sigma[g] = (ss / nobs as f64).sqrt();
                continue;
            }
            if nobs < nbeta {
                // Rank-deficient gene (fewer observed arrays than coefficients);
                // limma drops columns by pivoting — left as non-estimable here.
                continue;
            }
            let xstar = backsolve_t_cols(&cholv, &xsub);
            let (q, r) = qr_econ(&xstar);
            let qty = q.t().dot(&ystar);
            let beta = solve_upper(&r, &qty);
            let xtx_inv = xtx_inv_from_r(&r);
            for j in 0..nbeta {
                coefficients[[g, j]] = beta[j];
                stdev_unscaled[[g, j]] = xtx_inv[[j, j]].sqrt();
            }
            let df = (nobs - nbeta) as f64;
            df_residual[g] = df;
            if df > 0.0 {
                let ssy: f64 = ystar.iter().map(|&v| v * v).sum();
                let ssfit: f64 = qty.iter().map(|&v| v * v).sum();
                sigma[g] = ((ssy - ssfit).max(0.0) / df).sqrt();
            }
        }
    }

    let gnames: Vec<String> = if gene_names.len() == ngenes {
        gene_names
    } else {
        (0..ngenes).map(|i| i.to_string()).collect()
    };
    let cnames: Vec<String> = if coef_names.len() == nbeta {
        coef_names
    } else {
        (0..nbeta).map(|j| format!("coef{j}")).collect()
    };

    let mut fit = new_fit(
        coefficients,
        stdev_unscaled,
        sigma,
        df_residual,
        cov_coefficients,
        &gnames,
        &cnames,
    );
    fit.amean = amean;
    fit.design = Some(dwork);
    Ok(fit)
}

#[cfg(test)]
#[allow(clippy::excessive_precision)]
mod tests {
    use super::*;

    fn rclose(a: f64, b: f64) -> bool {
        (a - b).abs() <= 1e-7 * (1.0 + b.abs())
    }

    fn names(n: usize, p: usize) -> (Vec<String>, Vec<String>) {
        (
            (0..n).map(|i| format!("g{i}")).collect(),
            (0..p).map(|j| format!("c{j}")).collect(),
        )
    }

    /// Scenario 1 fixture: 8 spots x 6 arrays, ndups=2 (-> 4 genes).
    fn dup_fixture() -> Array2<f64> {
        let group = [0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
        let mut m = Array2::<f64>::zeros((8, 6));
        for g0 in 0..8 {
            let base = ((g0 as i64) % 7) - 3;
            let extra = ((g0 as i64 * 5) % 11) - 5;
            let lvl = 5.0 + (g0 % 10) as f64 * 0.2;
            let eff = base as f64 * 0.25;
            for k0 in 0..6 {
                let noise = (((g0 as i64 * 13 + k0 as i64 * 17) % 19) - 9) as f64 * 0.04;
                m[[g0, k0]] = lvl + eff * group[k0] + extra as f64 * 0.05 + noise;
            }
        }
        m
    }

    fn dup_design() -> Array2<f64> {
        let group = [0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
        let mut d = Array2::<f64>::zeros((6, 2));
        for k0 in 0..6 {
            d[[k0, 0]] = 1.0;
            d[[k0, 1]] = group[k0];
        }
        d
    }

    /// Scenario 2/3 fixture: 6 genes x 8 arrays, block path.
    fn block_fixture() -> Array2<f64> {
        let mut m = Array2::<f64>::zeros((6, 8));
        for g0 in 0..6 {
            let lvl2 = 4.0 + (g0 % 5) as f64 * 0.3;
            let slope = (((g0 as i64 * 2) % 7) - 3) as f64 * 0.15;
            for k0 in 0..8 {
                let x = (k0 % 3) as f64;
                let noise2 = (((g0 as i64 * 11 + k0 as i64 * 5) % 17) - 8) as f64 * 0.05;
                m[[g0, k0]] = lvl2 + slope * x + noise2;
            }
        }
        m
    }

    fn block_design() -> Array2<f64> {
        let mut d = Array2::<f64>::zeros((8, 2));
        for k0 in 0..8 {
            d[[k0, 0]] = 1.0;
            d[[k0, 1]] = (k0 % 3) as f64;
        }
        d
    }

    #[test]
    fn gls_series_ndups_fast_matches_r() {
        let m = dup_fixture();
        let (gn, cn) = names(4, 2);
        let fit = gls_series(&m, &dup_design(), 2, 1, None, 0.4, None, gn, cn).unwrap();

        let coef = [
            [5.048333333333332, -0.73833333333333273],
            [5.5733333333333341, 0.015000000000000385],
            [5.9500000000000011, 0.2616666666666671],
            [6.3266666666666627, 0.013333333333335673],
        ];
        let sig = [
            0.38390381980014643,
            0.30262541674013937,
            0.27637319489621803,
            0.94559882765216252,
        ];
        let amean = [
            4.6791666666666671,
            5.5808333333333335,
            6.0808333333333335,
            6.3333333333333339,
        ];
        for g in 0..4 {
            assert!(rclose(fit.coefficients[[g, 0]], coef[g][0]), "coef0 g{g}");
            assert!(rclose(fit.coefficients[[g, 1]], coef[g][1]), "coef1 g{g}");
            assert!(rclose(fit.sigma[g], sig[g]), "sigma g{g}");
            assert!(rclose(fit.amean[g], amean[g]), "amean g{g}");
            assert!(rclose(fit.stdev_unscaled[[g, 0]], 0.48304589153964794));
            assert!(rclose(fit.stdev_unscaled[[g, 1]], 0.68313005106397318));
            assert_eq!(fit.df_residual[g], 10.0);
        }
        assert!(rclose(fit.cov_coefficients[[0, 0]], 0.23333333333333331));
        assert!(rclose(fit.cov_coefficients[[0, 1]], -0.23333333333333331));
        assert!(rclose(fit.cov_coefficients[[1, 1]], 0.46666666666666662));
    }

    #[test]
    fn gls_series_block_fast_matches_r() {
        let m = block_fixture();
        let block = [0i64, 0, 1, 1, 2, 2, 3, 3];
        let (gn, cn) = names(6, 2);
        let fit = gls_series(&m, &block_design(), 1, 1, Some(&block), 0.3, None, gn, cn).unwrap();

        let coef = [
            [3.9896825396825411, -0.5024943310657598],
            [4.461904761904762, -0.25646258503401381],
            [4.340476190476191, 0.42517006802721086],
            [5.0690476190476206, 0.25680272108843544],
            [5.3119047619047643, -0.40646258503401367],
            [3.6904761904761902, 0.27517006802721089],
        ];
        let sig = [
            0.31011362178297863,
            0.26676429774418003,
            0.078484562609406242,
            0.28938639706404889,
            0.26676429774418015,
            0.078484562609406339,
        ];
        let amean = [
            3.5499999999999998,
            4.2374999999999998,
            4.7124999999999995,
            5.2937500000000002,
            4.9562500000000007,
            3.9312499999999999,
        ];
        for g in 0..6 {
            assert!(rclose(fit.coefficients[[g, 0]], coef[g][0]), "coef0 g{g}");
            assert!(rclose(fit.coefficients[[g, 1]], coef[g][1]), "coef1 g{g}");
            assert!(rclose(fit.sigma[g], sig[g]), "sigma g{g}");
            assert!(rclose(fit.amean[g], amean[g]), "amean g{g}");
            assert!(rclose(fit.stdev_unscaled[[g, 0]], 0.53748384988657005));
            assert!(rclose(fit.stdev_unscaled[[g, 1]], 0.40629960014669614));
            assert_eq!(fit.df_residual[g], 6.0);
        }
        assert!(rclose(fit.cov_coefficients[[0, 0]], 0.28888888888888903));
        assert!(rclose(fit.cov_coefficients[[0, 1]], -0.14444444444444454));
        assert!(rclose(fit.cov_coefficients[[1, 1]], 0.16507936507936516));
    }

    #[test]
    fn gls_series_block_slow_one_na_matches_r() {
        let mut m = block_fixture();
        m[[2, 5]] = f64::NAN; // 0-indexed gene 2, array 5 -> triggers slow path
        let block = [0i64, 0, 1, 1, 2, 2, 3, 3];
        let (gn, cn) = names(6, 2);
        let fit = gls_series(&m, &block_design(), 1, 1, Some(&block), 0.3, None, gn, cn).unwrap();

        // Affected gene 2: one observation dropped (df 6 -> 5).
        assert_eq!(fit.df_residual[2], 5.0);
        assert!(rclose(fit.coefficients[[2, 0]], 4.3380116959064319));
        assert!(rclose(fit.coefficients[[2, 1]], 0.43538011695906442));
        assert!(rclose(fit.sigma[2], 0.083551863073296567));
        assert!(rclose(fit.stdev_unscaled[[2, 0]], 0.54022713197881478));
        assert!(rclose(fit.stdev_unscaled[[2, 1]], 0.46456642400542719));
        assert!(rclose(fit.amean[2], 4.6499999999999995));

        // Untouched genes match the fast-path result exactly.
        assert_eq!(fit.df_residual[0], 6.0);
        assert!(rclose(fit.coefficients[[0, 0]], 3.9896825396825411));
        assert!(rclose(fit.sigma[0], 0.31011362178297869));
        assert!(rclose(fit.coefficients[[5, 1]], 0.27517006802721089));
        assert!(rclose(fit.sigma[5], 0.078484562609406339));

        // cov.coefficients still the unweighted full-design covariance.
        assert!(rclose(fit.cov_coefficients[[0, 0]], 0.28888888888888903));
        assert!(rclose(fit.cov_coefficients[[1, 1]], 0.16507936507936516));
    }
}