census_proteomics/
dataset.rs

1//! Collection of `Protein` objects representing a single dataset
2use super::*;
3#[cfg(feature = "serialization")]
4use serde::{Deserialize, Serialize};
5use std::collections::{HashMap, HashSet};
6
7#[cfg_attr(feature = "serde", derive(Serialize))]
8/// Container for proteomics data read from a Census version file
9pub struct Dataset {
10    /// TMT data for each protein in the dataset
11    pub proteins: Vec<Protein>,
12    /// Number of TMT channels in the dataset
13    pub channels: u8,
14}
15
16impl Dataset {
17    /// Return a set of all UniProt KB accession ID's present in the
18    /// `Dataset`
19    pub fn accessions(&self) -> HashSet<&'_ str> {
20        self.proteins
21            .iter()
22            .map(|pr| pr.accession.as_ref())
23            .collect()
24    }
25
26    /// Create a `HashMap` correlating a UniProt KB accession to Protein-level
27    /// quant data
28    pub fn map(&self) -> HashMap<&'_ str, &Protein> {
29        self.proteins
30            .iter()
31            .map(|pr| (pr.accession.as_ref(), pr))
32            .collect()
33    }
34
35    pub fn filter(self, filter: &Filter) -> Self {
36        filter.filter_dataset(self)
37    }
38}