gsva-rust 0.1.0

Pure-Rust port of the GSVA family of gene-set enrichment methods (GSVA, ssGSEA, z-score, PLAGE), validated for numeric parity against the Bioconductor GSVA package.
Documentation
//! Minimal, dependency-free TSV reader for expression matrices.

use crate::matrix::ExprMatrix;

/// Parse a tab-separated expression matrix.
///
/// Format: a header row whose first cell is a corner label (ignored) followed
/// by sample names, then one row per feature consisting of the feature name
/// followed by one numeric cell per sample. The literal `NA` parses to
/// [`f64::NAN`]. Blank lines are ignored.
///
/// Panics on a ragged matrix or an unparseable numeric cell.
pub fn read_tsv_matrix(text: &str) -> ExprMatrix {
    let mut lines = text.lines().filter(|l| !l.trim().is_empty());
    let header = lines.next().expect("matrix has no header line");
    let col_names: Vec<String> = header.split('\t').skip(1).map(str::to_string).collect();
    let ncol = col_names.len();

    let mut row_names = Vec::new();
    let mut data = Vec::new();
    for line in lines {
        let mut it = line.split('\t');
        let name = it.next().expect("row missing name");
        row_names.push(name.to_string());
        let before = data.len();
        for cell in it {
            let cell = cell.trim();
            let value = if cell == "NA" {
                f64::NAN
            } else {
                cell.parse::<f64>()
                    .unwrap_or_else(|_| panic!("could not parse '{cell}' as f64 in row '{name}'"))
            };
            data.push(value);
        }
        let got = data.len() - before;
        assert_eq!(got, ncol, "row '{name}' has {got} cells, expected {ncol}");
    }
    ExprMatrix::new(row_names, col_names, data)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn reads_small_matrix() {
        let text = "gene\tA\tB\nG1\t1.5\t2.5\nG2\t3\tNA\n";
        let m = read_tsv_matrix(text);
        assert_eq!(m.row_names(), ["G1", "G2"]);
        assert_eq!(m.col_names(), ["A", "B"]);
        assert_eq!(m.get(0, 0), 1.5);
        assert_eq!(m.get(0, 1), 2.5);
        assert_eq!(m.get(1, 0), 3.0);
        assert!(m.get(1, 1).is_nan());
    }
}