census_proteomics/
protein.rs

1#[cfg(feature = "serialization")]
2use serde::Serialize;
3
4#[cfg_attr(feature = "serde", derive(Serialize))]
5#[derive(PartialEq, PartialOrd, Clone, Default)]
6/// Protein-level TMT quantification data, as well as additional
7/// metadata about the protein that is output in the Census file
8pub struct Protein {
9    /// Uniprot accession identifier
10    pub accession: String,
11    /// Long-form description
12    pub description: String,
13    /// Number of spectral counts
14    pub spectral_count: u16,
15    /// Number of unique sequence counts
16    pub sequence_count: u16,
17    /// Sequence coverage
18    pub sequence_coverage: f32,
19    /// Molecular weight
20    pub molecular_weight: u32,
21    /// Raw signal intensity channels
22    pub peptides: Vec<Peptide>,
23
24    pub channels: u8,
25}
26
27impl Protein {
28    /// Return the summed intensities for all peptides
29    pub fn total(&self) -> Vec<u32> {
30        let mut v = Vec::with_capacity(self.channels as usize);
31        for c in 0..self.channels {
32            let sum = self.peptides.iter().map(|pep| pep.values[c as usize]).sum();
33            v.push(sum);
34        }
35        v
36    }
37
38    /// Return a vector of normalized ratios, where the signal intensity
39    /// for each channel is divided by the sum of all channels
40    pub fn ratios(&self) -> Vec<f64> {
41        let values = self.total();
42        let total = values.iter().sum::<u32>() as f64;
43        values.iter().map(|v| *v as f64 / total).collect()
44    }
45}
46
47#[cfg_attr(feature = "serde", derive(Serialize))]
48#[derive(PartialEq, PartialOrd, Clone, Debug, Default)]
49/// Peptide-level TMT quantification data
50pub struct Peptide {
51    /// Peptide sequence
52    pub sequence: String,
53    /// Raw isobaric ion intensity values
54    pub values: Vec<u32>,
55    /// Is this a unique peptide?
56    pub unique: bool,
57
58    pub purity: f32,
59
60    pub scan: usize,
61}
62
63impl Peptide {
64    /// Return a boolean indicating whether the peptide has 2 tryptic sites
65    pub fn tryptic(&self) -> bool {
66        let cterm = self.sequence.ends_with('-');
67        let front = self.sequence.starts_with(|c| match c {
68            'K' | 'R' | '-' => true,
69            _ => false,
70        });
71        let end = self
72            .sequence
73            .split('.')
74            .skip(1)
75            .next()
76            .map(|s| {
77                s.ends_with(|c| match c {
78                    'K' | 'R' => true,
79                    _ => cterm,
80                })
81            })
82            .unwrap_or(false);
83        front && end
84        // let kdot = self.sequence.matches("K.").count();
85        // let rdot = self.sequence.matches("R.").count();
86        // kdot + rdot == 2
87    }
88
89    /// Return a vector of normalized ratios, where the signal intensity
90    /// for each channel is divided by the sum of all channels
91    pub fn ratios(&self) -> Vec<f64> {
92        let total: f64 = self.values.iter().sum::<u32>() as f64;
93        self.values.iter().map(|v| *v as f64 / total).collect()
94    }
95
96    /// Swap channels A and B, which are 0 indexed into the peptide values
97    /// vector.
98    ///
99    /// # May panic
100    ///
101    /// May panic if A or B exceed the length of the vector
102    pub fn swap_channels(&mut self, a: usize, b: usize) {
103        self.values.swap(a, b)
104    }
105}
106
107#[cfg(test)]
108mod test {
109    use super::*;
110
111    fn gen_peptide(sequence: &str) -> Peptide {
112        Peptide {
113            sequence: sequence.into(),
114            ..Peptide::default()
115        }
116    }
117    #[test]
118    fn test_trypic() {
119        assert!(gen_peptide("-.KMDKDK.-").tryptic());
120        assert!(!gen_peptide("S.KMDKDK.-").tryptic());
121        assert!(gen_peptide("R.TLDGFK*K.F").tryptic());
122        assert!(!gen_peptide("K.KMDKDT.A").tryptic());
123    }
124}