1#[derive(Clone, Debug, PartialEq, Eq)]
5pub struct GeneSet {
6 pub name: String,
8 pub genes: Vec<String>,
10}
11
12impl GeneSet {
13 pub fn new(name: impl Into<String>, genes: Vec<String>) -> Self {
15 GeneSet {
16 name: name.into(),
17 genes,
18 }
19 }
20}
21
22#[derive(Clone, Debug, Default, PartialEq, Eq)]
24pub struct GeneSets {
25 pub sets: Vec<GeneSet>,
27}
28
29impl GeneSets {
30 pub fn new(sets: Vec<GeneSet>) -> Self {
32 GeneSets { sets }
33 }
34
35 pub fn from_gmt(text: &str) -> Self {
42 let mut sets = Vec::new();
43 for line in text.lines() {
44 if line.trim().is_empty() {
45 continue;
46 }
47 let mut fields = line.split('\t');
48 let name = match fields.next() {
49 Some(n) if !n.is_empty() => n.to_string(),
50 _ => continue,
51 };
52 let _description = fields.next();
54 let genes: Vec<String> = fields
55 .filter(|g| !g.trim().is_empty())
56 .map(|g| g.trim().to_string())
57 .collect();
58 sets.push(GeneSet::new(name, genes));
59 }
60 GeneSets { sets }
61 }
62
63 pub fn len(&self) -> usize {
65 self.sets.len()
66 }
67
68 pub fn is_empty(&self) -> bool {
70 self.sets.is_empty()
71 }
72
73 pub fn iter(&self) -> std::slice::Iter<'_, GeneSet> {
75 self.sets.iter()
76 }
77}
78
79#[cfg(test)]
80mod tests {
81 use super::*;
82
83 #[test]
84 fn parses_gmt() {
85 let text = "SET_A\tdesc A\tG1\tG2\tG3\nSET_B\tdesc B\tG2\tG4\t\n";
86 let gs = GeneSets::from_gmt(text);
87 assert_eq!(gs.len(), 2);
88 assert_eq!(
89 gs.sets[0],
90 GeneSet::new("SET_A", vec!["G1".into(), "G2".into(), "G3".into()])
91 );
92 assert_eq!(
94 gs.sets[1],
95 GeneSet::new("SET_B", vec!["G2".into(), "G4".into()])
96 );
97 }
98
99 #[test]
100 fn skips_blank_lines() {
101 let text = "\nSET_A\tna\tG1\n\n";
102 let gs = GeneSets::from_gmt(text);
103 assert_eq!(gs.len(), 1);
104 }
105}