gsva/matrix.rs
1//! Minimal expression-matrix container: features (genes) in rows, samples in
2//! columns. Data is stored row-major, so a whole gene (row) is contiguous.
3
4use std::collections::HashMap;
5
6/// An expression matrix with named rows (features/genes) and columns (samples).
7///
8/// Element `(row i, col j)` is stored at `data[i * ncol + j]`. Missing values
9/// are represented as [`f64::NAN`] (the analog of R's `NA`).
10#[derive(Clone, Debug)]
11pub struct ExprMatrix {
12 row_names: Vec<String>,
13 col_names: Vec<String>,
14 data: Vec<f64>,
15 /// First-occurrence index for each row name, mirroring R's by-name matrix
16 /// indexing (`m[name, ]` selects the first row with that name).
17 row_index: HashMap<String, usize>,
18}
19
20impl ExprMatrix {
21 /// Build a matrix from row names, column names, and row-major data.
22 ///
23 /// Panics if `data.len() != row_names.len() * col_names.len()`.
24 pub fn new(row_names: Vec<String>, col_names: Vec<String>, data: Vec<f64>) -> Self {
25 assert_eq!(
26 data.len(),
27 row_names.len() * col_names.len(),
28 "data length must equal nrow * ncol"
29 );
30 let mut row_index = HashMap::with_capacity(row_names.len());
31 for (i, name) in row_names.iter().enumerate() {
32 // First occurrence wins, matching R's `m[name, ]` indexing.
33 row_index.entry(name.clone()).or_insert(i);
34 }
35 ExprMatrix {
36 row_names,
37 col_names,
38 data,
39 row_index,
40 }
41 }
42
43 /// Number of rows (features).
44 pub fn nrow(&self) -> usize {
45 self.row_names.len()
46 }
47
48 /// Number of columns (samples).
49 pub fn ncol(&self) -> usize {
50 self.col_names.len()
51 }
52
53 /// Row (feature) names.
54 pub fn row_names(&self) -> &[String] {
55 &self.row_names
56 }
57
58 /// Column (sample) names.
59 pub fn col_names(&self) -> &[String] {
60 &self.col_names
61 }
62
63 /// First row index for a feature name, if present.
64 pub fn row_of(&self, name: &str) -> Option<usize> {
65 self.row_index.get(name).copied()
66 }
67
68 /// Value at `(row, col)`.
69 #[inline]
70 pub fn get(&self, row: usize, col: usize) -> f64 {
71 self.data[row * self.col_names.len() + col]
72 }
73
74 /// A whole row (feature) as a contiguous slice across all samples.
75 #[inline]
76 pub fn row(&self, row: usize) -> &[f64] {
77 let ncol = self.col_names.len();
78 &self.data[row * ncol..(row + 1) * ncol]
79 }
80
81 /// Collect a whole column (sample) as a freshly allocated vector.
82 pub fn column(&self, col: usize) -> Vec<f64> {
83 (0..self.nrow()).map(|r| self.get(r, col)).collect()
84 }
85}