use gsva::io::read_tsv_matrix;
use gsva::{ExprMatrix, GeneSets};
const SIGNATURES_GMT: &str = include_str!("data/signatures.gmt");
const GENES_TXT: &str = include_str!("data/genes.txt");
const CELLTYPES_TXT: &str = include_str!("data/celltypes.txt");
const SPILL_K_TSV: &str = include_str!("data/spill_K.tsv");
const SPILL_FV_TSV: &str = include_str!("data/spill_fv.tsv");
const ARRAY_K_TSV: &str = include_str!("data/array_K.tsv");
const ARRAY_FV_TSV: &str = include_str!("data/array_fv.tsv");
pub struct SpillModel {
pub k: ExprMatrix,
pub fv: ExprMatrix,
}
impl SpillModel {
fn parse(k_tsv: &str, fv_tsv: &str) -> Self {
let k = read_tsv_matrix(k_tsv);
let fv = read_tsv_matrix(fv_tsv);
assert_eq!(k.nrow(), k.ncol(), "spill K must be square");
assert_eq!(fv.ncol(), 3, "spill fv must have columns V1, V2, V3");
SpillModel { k, fv }
}
pub fn v2(&self, cell_type: &str) -> Option<f64> {
self.fv.row_of(cell_type).map(|r| self.fv.get(r, 1))
}
pub fn v3(&self, cell_type: &str) -> Option<f64> {
self.fv.row_of(cell_type).map(|r| self.fv.get(r, 2))
}
}
pub struct XCellModel {
pub signatures: GeneSets,
pub genes: Vec<String>,
pub cell_types: Vec<String>,
pub spill: SpillModel,
pub spill_array: SpillModel,
}
impl XCellModel {
pub fn load() -> Self {
let signatures = GeneSets::from_gmt(SIGNATURES_GMT);
let genes = lines(GENES_TXT);
let cell_types = lines(CELLTYPES_TXT);
assert_eq!(signatures.len(), 489, "expected 489 xCell signatures");
assert_eq!(genes.len(), 10808, "expected 10808 universe genes");
assert_eq!(cell_types.len(), 64, "expected 64 cell types");
XCellModel {
signatures,
genes,
cell_types,
spill: SpillModel::parse(SPILL_K_TSV, SPILL_FV_TSV),
spill_array: SpillModel::parse(ARRAY_K_TSV, ARRAY_FV_TSV),
}
}
pub fn spill_for(&self, rnaseq: bool) -> &SpillModel {
if rnaseq {
&self.spill
} else {
&self.spill_array
}
}
}
fn lines(text: &str) -> Vec<String> {
text.lines()
.map(str::trim)
.filter(|l| !l.is_empty())
.map(str::to_string)
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn model_loads_with_expected_shapes() {
let m = XCellModel::load();
assert_eq!(m.signatures.len(), 489);
assert_eq!(m.genes.len(), 10808);
assert_eq!(m.cell_types.len(), 64);
assert_eq!(m.spill.k.nrow(), 64);
assert_eq!(m.spill.k.ncol(), 64);
assert_eq!(m.spill_array.k.nrow(), 64);
assert_eq!(m.cell_types[0], "aDC");
for s in m.signatures.iter() {
let ct = s.name.split('%').next().unwrap();
assert!(
m.cell_types.iter().any(|c| c == ct),
"unknown cell type {ct}"
);
}
assert!(m.spill.v2("B-cells").is_some());
assert!(m.spill.v3("B-cells").is_some());
}
}