#[derive(Clone, Debug, PartialEq, Eq)]
pub struct GeneSet {
pub name: String,
pub genes: Vec<String>,
}
impl GeneSet {
pub fn new(name: impl Into<String>, genes: Vec<String>) -> Self {
GeneSet {
name: name.into(),
genes,
}
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct GeneSets {
pub sets: Vec<GeneSet>,
}
impl GeneSets {
pub fn new(sets: Vec<GeneSet>) -> Self {
GeneSets { sets }
}
pub fn from_gmt(text: &str) -> Self {
let mut sets = Vec::new();
for line in text.lines() {
if line.trim().is_empty() {
continue;
}
let mut fields = line.split('\t');
let name = match fields.next() {
Some(n) if !n.is_empty() => n.to_string(),
_ => continue,
};
let _description = fields.next();
let genes: Vec<String> = fields
.filter(|g| !g.trim().is_empty())
.map(|g| g.trim().to_string())
.collect();
sets.push(GeneSet::new(name, genes));
}
GeneSets { sets }
}
pub fn len(&self) -> usize {
self.sets.len()
}
pub fn is_empty(&self) -> bool {
self.sets.is_empty()
}
pub fn iter(&self) -> std::slice::Iter<'_, GeneSet> {
self.sets.iter()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_gmt() {
let text = "SET_A\tdesc A\tG1\tG2\tG3\nSET_B\tdesc B\tG2\tG4\t\n";
let gs = GeneSets::from_gmt(text);
assert_eq!(gs.len(), 2);
assert_eq!(
gs.sets[0],
GeneSet::new("SET_A", vec!["G1".into(), "G2".into(), "G3".into()])
);
assert_eq!(
gs.sets[1],
GeneSet::new("SET_B", vec!["G2".into(), "G4".into()])
);
}
#[test]
fn skips_blank_lines() {
let text = "\nSET_A\tna\tG1\n\n";
let gs = GeneSets::from_gmt(text);
assert_eq!(gs.len(), 1);
}
}