gsva-rust 0.1.0

Pure-Rust port of the GSVA family of gene-set enrichment methods (GSVA, ssGSEA, z-score, PLAGE), validated for numeric parity against the Bioconductor GSVA package.
Documentation
//! Shared helpers for the parity integration tests.
// Each integration-test binary compiles this module independently and may use
// only a subset of the helpers, so unused-helper warnings here are expected.
#![allow(dead_code)]

use gsva::{io::read_tsv_matrix, ExprMatrix};

/// Worst absolute and relative error between a computed `sets x samples` score
/// block and an R reference matrix (TSV text), aligned by gene-set and sample
/// name.
pub fn worst_errors(
    sets: &[String],
    samples: &[String],
    scores: &[f64],
    reference_tsv: &str,
) -> (f64, f64) {
    let reference: ExprMatrix = read_tsv_matrix(reference_tsv);
    let ref_col: Vec<usize> = samples
        .iter()
        .map(|s| {
            reference
                .col_names()
                .iter()
                .position(|c| c == s)
                .unwrap_or_else(|| panic!("sample '{s}' missing from reference"))
        })
        .collect();

    let nsamp = samples.len();
    let mut worst_abs = 0.0f64;
    let mut worst_rel = 0.0f64;
    for (si, set) in sets.iter().enumerate() {
        let rrow = reference
            .row_of(set)
            .unwrap_or_else(|| panic!("gene set '{set}' missing from reference"));
        for j in 0..nsamp {
            let a = scores[si * nsamp + j];
            let b = reference.get(rrow, ref_col[j]);
            let abs = (a - b).abs();
            let denom = a.abs().max(b.abs());
            let rel = if denom > 0.0 { abs / denom } else { 0.0 };
            worst_abs = worst_abs.max(abs);
            worst_rel = worst_rel.max(rel);
        }
    }
    (worst_abs, worst_rel)
}

/// Like [`worst_errors`] but aligns each gene set's sign to the reference before
/// scoring (the metagene of a singular-vector method is only defined up to sign).
/// Returns `(worst_abs, worst_rel, n_flips)` where `n_flips` counts rows whose
/// sign had to be negated to match the reference.
pub fn worst_errors_sign_aligned(
    sets: &[String],
    samples: &[String],
    scores: &[f64],
    reference_tsv: &str,
) -> (f64, f64, usize) {
    let reference: ExprMatrix = read_tsv_matrix(reference_tsv);
    let ref_col: Vec<usize> = samples
        .iter()
        .map(|s| {
            reference
                .col_names()
                .iter()
                .position(|c| c == s)
                .unwrap_or_else(|| panic!("sample '{s}' missing from reference"))
        })
        .collect();

    let nsamp = samples.len();
    let mut worst_abs = 0.0f64;
    let mut worst_rel = 0.0f64;
    let mut n_flips = 0usize;
    for (si, set) in sets.iter().enumerate() {
        let rrow = reference
            .row_of(set)
            .unwrap_or_else(|| panic!("gene set '{set}' missing from reference"));
        // Choose the sign (+1 or -1) minimizing this row's worst absolute error.
        let mut best = (f64::INFINITY, 0.0f64, false);
        for &flip in &[false, true] {
            let mul = if flip { -1.0 } else { 1.0 };
            let mut wabs = 0.0f64;
            let mut wrel = 0.0f64;
            for j in 0..nsamp {
                let a = mul * scores[si * nsamp + j];
                let b = reference.get(rrow, ref_col[j]);
                let abs = (a - b).abs();
                let denom = a.abs().max(b.abs());
                let rel = if denom > 0.0 { abs / denom } else { 0.0 };
                wabs = wabs.max(abs);
                wrel = wrel.max(rel);
            }
            if wabs < best.0 {
                best = (wabs, wrel, flip);
            }
        }
        worst_abs = worst_abs.max(best.0);
        worst_rel = worst_rel.max(best.1);
        if best.2 {
            n_flips += 1;
        }
    }
    (worst_abs, worst_rel, n_flips)
}