mcpcounter_rust/
mcpcounter.rs1use std::collections::{HashMap, HashSet};
13
14use crate::matrix::ExprMatrix;
15use crate::result::DeconvResult;
16use crate::stats::r_mean_narm;
17
18const GENES_TSV: &str = include_str!("signatures/mcpcounter_genes.txt");
19const PROBESETS_TSV: &str = include_str!("signatures/mcpcounter_probesets.txt");
20
21#[derive(Clone, Copy, Debug, PartialEq, Eq)]
24pub enum FeaturesType {
25 Affy133P2Probesets,
27 HugoSymbols,
29 EntrezId,
31 EnsemblId,
33}
34
35fn unquote(s: &str) -> &str {
37 let s = s.trim();
38 s.strip_prefix('"')
39 .and_then(|x| x.strip_suffix('"'))
40 .unwrap_or(s)
41}
42
43fn signature_pairs(ft: FeaturesType) -> Vec<(String, String)> {
46 if ft == FeaturesType::Affy133P2Probesets {
47 return PROBESETS_TSV
49 .lines()
50 .filter(|l| !l.trim().is_empty())
51 .map(|l| {
52 let mut it = l.split('\t');
53 let id = unquote(it.next().unwrap_or(""));
54 let pop = unquote(it.next().unwrap_or(""));
55 (id.to_string(), pop.to_string())
56 })
57 .collect();
58 }
59
60 let col = match ft {
62 FeaturesType::HugoSymbols => 0,
63 FeaturesType::EntrezId => 2,
64 FeaturesType::EnsemblId => 3,
65 FeaturesType::Affy133P2Probesets => unreachable!(),
66 };
67 GENES_TSV
68 .lines()
69 .skip(1) .filter(|l| !l.trim().is_empty())
71 .filter_map(|l| {
72 let fields: Vec<&str> = l.split('\t').collect();
73 let id = unquote(fields.get(col).copied().unwrap_or(""));
74 let pop = unquote(fields.get(1).copied().unwrap_or(""));
75 if id.is_empty() || id == "NA" {
77 None
78 } else {
79 Some((id.to_string(), pop.to_string()))
80 }
81 })
82 .collect()
83}
84
85pub fn estimate(expr: &ExprMatrix, features_type: FeaturesType) -> DeconvResult {
91 let pairs = signature_pairs(features_type);
92
93 let mut pop_order: Vec<&str> = Vec::new();
95 let mut seen: HashSet<&str> = HashSet::new();
96 for (_, pop) in &pairs {
97 if seen.insert(pop.as_str()) {
98 pop_order.push(pop.as_str());
99 }
100 }
101
102 let mut pop_rows: HashMap<&str, Vec<usize>> = HashMap::new();
107 for (id, pop) in &pairs {
108 if let Some(row) = expr.row_of(id) {
109 pop_rows.entry(pop.as_str()).or_default().push(row);
110 }
111 }
112 for rows in pop_rows.values_mut() {
118 rows.sort_unstable();
119 rows.dedup();
120 }
121
122 let ncol = expr.ncol();
123 let mut populations: Vec<String> = Vec::new();
124 let mut scores: Vec<f64> = Vec::new();
125 let mut col_vals: Vec<f64> = Vec::new();
126
127 for pop in &pop_order {
128 let Some(rows) = pop_rows.get(pop) else {
129 continue;
130 };
131 if rows.is_empty() {
132 continue;
133 }
134
135 populations.push((*pop).to_string());
136 for j in 0..ncol {
137 col_vals.clear();
138 for &i in rows {
139 col_vals.push(expr.get(i, j));
140 }
141 scores.push(r_mean_narm(&col_vals));
142 }
143 }
144
145 DeconvResult {
146 populations,
147 samples: expr.col_names().to_vec(),
148 scores,
149 }
150}
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155
156 fn approx(a: f64, b: f64) {
157 assert!((a - b).abs() < 1e-12, "{a} vs {b}");
158 }
159
160 #[test]
161 fn hugo_symbols_marker_means() {
162 let rows = vec![
165 "CD3D".to_string(),
166 "CD3G".to_string(),
167 "CD28".to_string(),
168 "CD8B".to_string(),
169 "GAPDH".to_string(),
170 ];
171 let cols = vec!["s1".to_string(), "s2".to_string()];
172 #[rustfmt::skip]
173 let data = vec![
174 10.0, 20.0, 12.0, 18.0, 14.0, 22.0, 5.0, 7.0, 100.0, 200.0, ];
180 let m = ExprMatrix::new(rows, cols, data);
181 let res = estimate(&m, FeaturesType::HugoSymbols);
182
183 assert_eq!(res.populations, vec!["T cells", "CD8 T cells"]);
185 assert_eq!(res.samples, vec!["s1", "s2"]);
186 approx(res.score(0, 0), 12.0);
188 approx(res.score(0, 1), 20.0);
189 approx(res.score(1, 0), 5.0);
191 approx(res.score(1, 1), 7.0);
192 }
193
194 #[test]
195 fn probesets_default_smoke() {
196 let rows = vec!["204777_s_at".to_string(), "206485_at".to_string()];
198 let cols = vec!["a".to_string()];
199 let data = vec![10.0, 30.0];
200 let m = ExprMatrix::new(rows, cols, data);
201 let res = estimate(&m, FeaturesType::Affy133P2Probesets);
202 assert_eq!(res.populations, vec!["T cells"]);
203 approx(res.score(0, 0), 20.0);
204 }
205
206 #[test]
207 fn absent_markers_drop_population() {
208 let rows = vec!["FOO".to_string(), "BAR".to_string()];
210 let cols = vec!["s1".to_string()];
211 let data = vec![1.0, 2.0];
212 let m = ExprMatrix::new(rows, cols, data);
213 let res = estimate(&m, FeaturesType::HugoSymbols);
214 assert!(res.populations.is_empty());
215 assert!(res.scores.is_empty());
216 }
217}