omics_molecule/polymer/
dna.rs

1//! Deoxyribonucleic Acid.
2
3mod nucleotide;
4
5pub use nucleotide::Nucleotide;
6
7/// An error related to a [`Molecule`].
8#[derive(Debug)]
9pub enum Error {
10    /// An error when processing a [`Nucleotide`].
11    NucleotideError(nucleotide::Error),
12}
13
14impl std::fmt::Display for Error {
15    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
16        match self {
17            Error::NucleotideError(err) => write!(f, "nucleotide error: {err}"),
18        }
19    }
20}
21
22impl std::error::Error for Error {}
23
24/// A molecule representing Deoxyribonucleic Acid, otherwise known as DNA.
25#[derive(Debug)]
26pub struct Molecule(Vec<Nucleotide>);
27
28impl Molecule {
29    /// Gets the inner [`Vec<Nucleotide>`] by reference.
30    ///
31    /// # Examples
32    ///
33    /// ```
34    /// use omics_molecule::polymer::dna::Molecule;
35    ///
36    /// let m = "ACGT".parse::<Molecule>()?;
37    /// assert_eq!(m.inner().len(), 4);
38    ///
39    /// # Ok::<(), Box<dyn std::error::Error>>(())
40    /// ```
41    pub fn inner(&self) -> &Vec<Nucleotide> {
42        self.0.as_ref()
43    }
44
45    /// Consumes the [`Molecule`] and returns the inner [`Vec<Nucleotide>`].
46    ///
47    /// # Examples
48    ///
49    /// ```
50    /// use omics_molecule::polymer::dna::Molecule;
51    /// use omics_molecule::polymer::dna::Nucleotide;
52    ///
53    /// let m = "ACGT".parse::<Molecule>()?;
54    /// let nucleotides = m.into_inner();
55    ///
56    /// assert_eq!(nucleotides, vec![
57    ///     Nucleotide::A,
58    ///     Nucleotide::C,
59    ///     Nucleotide::G,
60    ///     Nucleotide::T,
61    /// ]);
62    ///
63    /// # Ok::<(), Box<dyn std::error::Error>>(())
64    /// ```
65    pub fn into_inner(self) -> Vec<Nucleotide> {
66        self.0
67    }
68
69    /// Gets the GC content of this [`Molecule`].
70    ///
71    /// # Examples
72    ///
73    /// ```
74    /// use omics_molecule::polymer::dna::Molecule;
75    ///
76    /// let m = "ACGT".parse::<Molecule>()?;
77    /// assert_eq!(m.gc_content(), 0.5);
78    ///
79    /// # Ok::<(), Box<dyn std::error::Error>>(())
80    /// ```
81    pub fn gc_content(&self) -> f32 {
82        let numerator = self
83            .0
84            .iter()
85            .filter(|n| *n == &Nucleotide::C || *n == &Nucleotide::G)
86            .count();
87
88        numerator as f32 / self.0.len() as f32
89    }
90}
91
92impl From<Vec<Nucleotide>> for Molecule {
93    fn from(v: Vec<Nucleotide>) -> Self {
94        Self(v)
95    }
96}
97
98impl std::str::FromStr for Molecule {
99    type Err = Error;
100
101    fn from_str(s: &str) -> Result<Self, Self::Err> {
102        s.chars()
103            .map(|c| Nucleotide::try_from(c).map_err(Error::NucleotideError))
104            .collect::<Result<Vec<_>, Error>>()
105            .map(Self::from)
106    }
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112
113    #[test]
114    fn it_creates_a_molecule_from_a_vec_of_nucleotides() {
115        let nucleotides = vec![Nucleotide::A, Nucleotide::C, Nucleotide::G, Nucleotide::T];
116
117        let dna = Molecule::from(nucleotides);
118        assert_eq!(dna.inner().len(), 4);
119    }
120
121    #[test]
122    fn it_parses_a_molecule_from_a_valid_string() -> Result<(), Box<dyn std::error::Error>> {
123        Ok("ACGT".parse::<Molecule>().map(|_| ())?)
124    }
125
126    #[test]
127    fn it_fails_to_parse_a_molecule_from_an_invalid_string() {
128        let err = "QQQQ".parse::<Molecule>().unwrap_err();
129        assert_eq!(err.to_string(), "nucleotide error: invalid nucleotide: Q");
130    }
131}