gsva-rust 0.1.0

Pure-Rust port of the GSVA family of gene-set enrichment methods (GSVA, ssGSEA, z-score, PLAGE), validated for numeric parity against the Bioconductor GSVA package.
Documentation
//! Minimal expression-matrix container: features (genes) in rows, samples in
//! columns. Data is stored row-major, so a whole gene (row) is contiguous.

use std::collections::HashMap;

/// An expression matrix with named rows (features/genes) and columns (samples).
///
/// Element `(row i, col j)` is stored at `data[i * ncol + j]`. Missing values
/// are represented as [`f64::NAN`] (the analog of R's `NA`).
#[derive(Clone, Debug)]
pub struct ExprMatrix {
    row_names: Vec<String>,
    col_names: Vec<String>,
    data: Vec<f64>,
    /// First-occurrence index for each row name, mirroring R's by-name matrix
    /// indexing (`m[name, ]` selects the first row with that name).
    row_index: HashMap<String, usize>,
}

impl ExprMatrix {
    /// Build a matrix from row names, column names, and row-major data.
    ///
    /// Panics if `data.len() != row_names.len() * col_names.len()`.
    pub fn new(row_names: Vec<String>, col_names: Vec<String>, data: Vec<f64>) -> Self {
        assert_eq!(
            data.len(),
            row_names.len() * col_names.len(),
            "data length must equal nrow * ncol"
        );
        let mut row_index = HashMap::with_capacity(row_names.len());
        for (i, name) in row_names.iter().enumerate() {
            // First occurrence wins, matching R's `m[name, ]` indexing.
            row_index.entry(name.clone()).or_insert(i);
        }
        ExprMatrix {
            row_names,
            col_names,
            data,
            row_index,
        }
    }

    /// Number of rows (features).
    pub fn nrow(&self) -> usize {
        self.row_names.len()
    }

    /// Number of columns (samples).
    pub fn ncol(&self) -> usize {
        self.col_names.len()
    }

    /// Row (feature) names.
    pub fn row_names(&self) -> &[String] {
        &self.row_names
    }

    /// Column (sample) names.
    pub fn col_names(&self) -> &[String] {
        &self.col_names
    }

    /// First row index for a feature name, if present.
    pub fn row_of(&self, name: &str) -> Option<usize> {
        self.row_index.get(name).copied()
    }

    /// Value at `(row, col)`.
    #[inline]
    pub fn get(&self, row: usize, col: usize) -> f64 {
        self.data[row * self.col_names.len() + col]
    }

    /// A whole row (feature) as a contiguous slice across all samples.
    #[inline]
    pub fn row(&self, row: usize) -> &[f64] {
        let ncol = self.col_names.len();
        &self.data[row * ncol..(row + 1) * ncol]
    }

    /// Collect a whole column (sample) as a freshly allocated vector.
    pub fn column(&self, col: usize) -> Vec<f64> {
        (0..self.nrow()).map(|r| self.get(r, col)).collect()
    }
}