1use gsva::io::read_tsv_matrix;
8use gsva::{ExprMatrix, GeneSets};
9
10const SIGNATURES_GMT: &str = include_str!("data/signatures.gmt");
11const GENES_TXT: &str = include_str!("data/genes.txt");
12const CELLTYPES_TXT: &str = include_str!("data/celltypes.txt");
13const SPILL_K_TSV: &str = include_str!("data/spill_K.tsv");
14const SPILL_FV_TSV: &str = include_str!("data/spill_fv.tsv");
15const ARRAY_K_TSV: &str = include_str!("data/array_K.tsv");
16const ARRAY_FV_TSV: &str = include_str!("data/array_fv.tsv");
17
18pub struct SpillModel {
22 pub k: ExprMatrix,
25 pub fv: ExprMatrix,
28}
29
30impl SpillModel {
31 fn parse(k_tsv: &str, fv_tsv: &str) -> Self {
32 let k = read_tsv_matrix(k_tsv);
33 let fv = read_tsv_matrix(fv_tsv);
34 assert_eq!(k.nrow(), k.ncol(), "spill K must be square");
35 assert_eq!(fv.ncol(), 3, "spill fv must have columns V1, V2, V3");
36 SpillModel { k, fv }
37 }
38
39 pub fn v2(&self, cell_type: &str) -> Option<f64> {
41 self.fv.row_of(cell_type).map(|r| self.fv.get(r, 1))
42 }
43
44 pub fn v3(&self, cell_type: &str) -> Option<f64> {
46 self.fv.row_of(cell_type).map(|r| self.fv.get(r, 2))
47 }
48}
49
50pub struct XCellModel {
53 pub signatures: GeneSets,
55 pub genes: Vec<String>,
57 pub cell_types: Vec<String>,
60 pub spill: SpillModel,
62 pub spill_array: SpillModel,
64}
65
66impl XCellModel {
67 pub fn load() -> Self {
69 let signatures = GeneSets::from_gmt(SIGNATURES_GMT);
70 let genes = lines(GENES_TXT);
71 let cell_types = lines(CELLTYPES_TXT);
72 assert_eq!(signatures.len(), 489, "expected 489 xCell signatures");
73 assert_eq!(genes.len(), 10808, "expected 10808 universe genes");
74 assert_eq!(cell_types.len(), 64, "expected 64 cell types");
75 XCellModel {
76 signatures,
77 genes,
78 cell_types,
79 spill: SpillModel::parse(SPILL_K_TSV, SPILL_FV_TSV),
80 spill_array: SpillModel::parse(ARRAY_K_TSV, ARRAY_FV_TSV),
81 }
82 }
83
84 pub fn spill_for(&self, rnaseq: bool) -> &SpillModel {
86 if rnaseq {
87 &self.spill
88 } else {
89 &self.spill_array
90 }
91 }
92}
93
94fn lines(text: &str) -> Vec<String> {
95 text.lines()
96 .map(str::trim)
97 .filter(|l| !l.is_empty())
98 .map(str::to_string)
99 .collect()
100}
101
102#[cfg(test)]
103mod tests {
104 use super::*;
105
106 #[test]
107 fn model_loads_with_expected_shapes() {
108 let m = XCellModel::load();
109 assert_eq!(m.signatures.len(), 489);
110 assert_eq!(m.genes.len(), 10808);
111 assert_eq!(m.cell_types.len(), 64);
112 assert_eq!(m.spill.k.nrow(), 64);
113 assert_eq!(m.spill.k.ncol(), 64);
114 assert_eq!(m.spill_array.k.nrow(), 64);
115 assert_eq!(m.cell_types[0], "aDC");
117 for s in m.signatures.iter() {
119 let ct = s.name.split('%').next().unwrap();
120 assert!(
121 m.cell_types.iter().any(|c| c == ct),
122 "unknown cell type {ct}"
123 );
124 }
125 assert!(m.spill.v2("B-cells").is_some());
127 assert!(m.spill.v3("B-cells").is_some());
128 }
129}