Skip to main content

omics_variation/
snv.rs

1//! Single nucleotide variations.
2
3use std::str::FromStr;
4
5use omics_coordinate::Coordinate;
6use omics_coordinate::Strand;
7use omics_coordinate::coordinate;
8use omics_coordinate::system::Base;
9use omics_core::VARIANT_SEPARATOR;
10use omics_molecule::compound::Nucleotide;
11use omics_molecule::compound::nucleotide::relation;
12use omics_molecule::compound::nucleotide::relation::Relation;
13use thiserror::Error;
14
15/// A parse error related to a [`Variant`].
16#[derive(Error, Debug)]
17pub enum ParseError<N: Nucleotide>
18where
19    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
20{
21    /// An invalid format was encountered when parsing a [`Variant`].
22    #[error("invalid format: {0}")]
23    InvalidFormat(String),
24
25    /// An issue occurred when parsing the coordinate of the [`Variant`].
26    #[error(transparent)]
27    CoordinateError(#[from] coordinate::Error),
28
29    /// An issue occurred when parsing the reference nucleotide of the
30    /// [`Variant`].
31    #[error("reference nucleotide error: {0}")]
32    ReferenceNucleotide(<N as FromStr>::Err),
33
34    /// An issue occurred when parsing the alternate nucleotide of the
35    /// [`Variant`].
36    #[error("alternate nucleotide error: {0}")]
37    AlternateNucleotide(<N as FromStr>::Err),
38}
39
40/// An error related to a [`Variant`].
41#[derive(Error, Debug)]
42pub enum Error<N: Nucleotide>
43where
44    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
45{
46    /// Attempted to create a [`Variant`] with identical reference and
47    /// alternate nucleotides.
48    #[error("identical nucleotides for snv: {0}")]
49    Identical(N),
50
51    /// Unsuccessfully attempted to parse a [`Variant`] from a string.
52    #[error(transparent)]
53    Parse(#[from] ParseError<N>),
54
55    /// An error constructing a relation.
56    #[error(transparent)]
57    Relation(#[from] relation::Error<N>),
58}
59
60/// A single nucleotide variant.
61#[derive(Debug)]
62pub struct Variant<N: Nucleotide> {
63    /// The coordinate.
64    coordinate: Coordinate<Base>,
65
66    /// The relation.
67    relation: Relation<N>,
68}
69
70impl<N: Nucleotide> Variant<N>
71where
72    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
73{
74    /// Attempts to create a new [`Variant`].
75    ///
76    /// # Examples
77    ///
78    /// ```
79    /// use omics_coordinate::base::Coordinate;
80    /// use omics_coordinate::system::Base;
81    /// use omics_molecule::polymer::dna;
82    /// use omics_variation::snv::Variant;
83    ///
84    /// let variant = Variant::<dna::Nucleotide>::try_new(
85    ///     "seq0:+:1".parse::<Coordinate>()?,
86    ///     dna::Nucleotide::A,
87    ///     dna::Nucleotide::T,
88    /// )?;
89    ///
90    /// # Ok::<(), Box<dyn std::error::Error>>(())
91    /// ```
92    pub fn try_new(
93        coordinate: impl Into<Coordinate<Base>>,
94        reference_nucleotide: impl Into<N>,
95        alternate_nucleotide: impl Into<N>,
96    ) -> Result<Self, Error<N>> {
97        let coordinate = coordinate.into();
98        let reference_nucleotide = reference_nucleotide.into();
99        let alternate_nucleotide = alternate_nucleotide.into();
100
101        let relation = Relation::try_new(Some(reference_nucleotide), Some(alternate_nucleotide))
102            .map_err(Error::Relation)?;
103
104        if let Relation::Identical(nucleotide) = relation {
105            return Err(Error::Identical(nucleotide));
106        }
107
108        Ok(Self {
109            coordinate,
110            relation,
111        })
112    }
113
114    /// Gets the [`Coordinate`] for this [`Variant`].
115    ///
116    /// # Examples
117    ///
118    /// ```
119    /// use omics_coordinate::Strand;
120    /// use omics_coordinate::base::Coordinate;
121    /// use omics_coordinate::system::Base;
122    /// use omics_molecule::polymer::dna;
123    /// use omics_variation::snv::Variant;
124    ///
125    /// let variant = Variant::<dna::Nucleotide>::try_new(
126    ///     "seq0:+:1".parse::<Coordinate>()?,
127    ///     dna::Nucleotide::A,
128    ///     dna::Nucleotide::T,
129    /// )?;
130    ///
131    /// assert_eq!(variant.coordinate().contig().as_str(), "seq0");
132    /// assert_eq!(variant.coordinate().strand(), Strand::Positive);
133    /// assert_eq!(variant.coordinate().position().get(), 1);
134    ///
135    /// # Ok::<(), Box<dyn std::error::Error>>(())
136    /// ```
137    pub fn coordinate(&self) -> &Coordinate<Base> {
138        &self.coordinate
139    }
140
141    /// Gets the reference nucleotide as a [`Nucleotide`] from the [`Variant`].
142    ///
143    /// # Examples
144    ///
145    /// ```
146    /// use omics_coordinate::base::Coordinate;
147    /// use omics_coordinate::system::Base;
148    /// use omics_molecule::polymer::dna;
149    /// use omics_variation::snv::Variant;
150    ///
151    /// let variant = "seq0:+:1:A:T".parse::<Variant<dna::Nucleotide>>()?;
152    /// assert_eq!(variant.reference(), dna::Nucleotide::A);
153    ///
154    /// # Ok::<(), Box<dyn std::error::Error>>(())
155    /// ```
156    pub fn reference(&self) -> N {
157        // SAFETY: because a single nucleotide variant is guaranteed to have a
158        // reference nucleotide within the inner [`Relation`], this will
159        // always unwrap successfully.
160        self.relation.reference().unwrap()
161    }
162
163    /// Gets the alternate nucleotide as a [`Nucleotide`] from the [`Variant`].
164    ///
165    /// # Examples
166    ///
167    /// ```
168    /// use omics_coordinate::base::Coordinate;
169    /// use omics_coordinate::system::Base;
170    /// use omics_molecule::polymer::dna;
171    /// use omics_variation::snv::Variant;
172    ///
173    /// let variant = "seq0:+:1:A:T".parse::<Variant<dna::Nucleotide>>()?;
174    /// assert_eq!(variant.alternate(), dna::Nucleotide::T);
175    ///
176    /// # Ok::<(), Box<dyn std::error::Error>>(())
177    /// ```
178    pub fn alternate(&self) -> N {
179        // SAFETY: because a single nucleotide variant is guaranteed to have a
180        // alternate nucleotide within the inner [`Relation`], this will
181        // always unwrap successfully.
182        self.relation.alternate().unwrap()
183    }
184}
185
186impl<N: Nucleotide> std::str::FromStr for Variant<N>
187where
188    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
189{
190    type Err = Error<N>;
191
192    fn from_str(s: &str) -> Result<Self, Self::Err> {
193        let parts = s.split(VARIANT_SEPARATOR).collect::<Vec<_>>();
194        let num_parts = parts.len();
195
196        if num_parts != 4 && num_parts != 5 {
197            return Err(Error::Parse(ParseError::InvalidFormat(s.to_owned())));
198        }
199
200        let mut parts = parts.into_iter();
201
202        let coordinate = match num_parts {
203            4 => {
204                let positive = Strand::Positive.to_string();
205
206                // SAFETY: we just ensured that the number of parts is four.
207                // Since we have not taken any items from the iterator, these
208                // two items will always unwrap.
209                [
210                    parts.next().unwrap(),
211                    positive.as_str(),
212                    parts.next().unwrap(),
213                ]
214                .join(VARIANT_SEPARATOR)
215            }
216            5 => {
217                // SAFETY: we just ensured that the number of parts is five.
218                // Since we have not taken any items from the iterator, these
219                // three items will always unwrap.
220                [
221                    parts.next().unwrap(),
222                    parts.next().unwrap(),
223                    parts.next().unwrap(),
224                ]
225                .join(VARIANT_SEPARATOR)
226            }
227            // SAFETY: we ensured above that the number of parts must be either four or five.
228            _ => unreachable!(),
229        };
230
231        let coordinate = match coordinate.parse::<Coordinate<Base>>() {
232            Ok(coordinate) => coordinate,
233            Err(err) => return Err(Error::Parse(ParseError::CoordinateError(err))),
234        };
235
236        // SAFETY: in all cases above, we leave two items in the iterator. Since we have
237        // not taken any items yet, this will always unwrap.
238        let reference_nucleotide = parts
239            .next()
240            .unwrap()
241            .parse::<N>()
242            .map_err(|err| Error::Parse(ParseError::ReferenceNucleotide(err)))?;
243
244        // SAFETY: in all cases above, we leave two items in the iterator. Since we have
245        // only taken one item so far, this will always unwrap.
246        let alternate_nucleotide = parts
247            .next()
248            .unwrap()
249            .parse::<N>()
250            .map_err(|err| Error::Parse(ParseError::AlternateNucleotide(err)))?;
251
252        Self::try_new(coordinate, reference_nucleotide, alternate_nucleotide)
253    }
254}
255
256impl<N: Nucleotide> std::fmt::Display for Variant<N>
257where
258    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
259{
260    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
261        let coordinate = self.coordinate().to_string();
262
263        let parts = [
264            coordinate.as_str(),
265            &self.reference().to_string(),
266            &self.alternate().to_string(),
267        ];
268
269        write!(f, "{}", parts.join(self::VARIANT_SEPARATOR))
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use omics_molecule::polymer::dna;
276    use omics_molecule::polymer::rna;
277
278    use super::*;
279
280    #[test]
281    fn it_creates_a_variant_in_a_dna_context() -> Result<(), Box<dyn std::error::Error>> {
282        let variant = "seq0:+:1:A:C".parse::<Variant<dna::Nucleotide>>()?;
283
284        assert_eq!(variant.coordinate().contig().as_str(), "seq0");
285        assert_eq!(variant.coordinate().strand(), Strand::Positive);
286        assert_eq!(variant.coordinate().position().get(), 1);
287        assert_eq!(variant.reference(), dna::Nucleotide::A);
288        assert_eq!(variant.alternate(), dna::Nucleotide::C);
289
290        Ok(())
291    }
292
293    #[test]
294    fn it_creates_a_variant_in_a_rna_context() -> Result<(), Box<dyn std::error::Error>> {
295        let variant = "seq0:+:1:U:C".parse::<Variant<rna::Nucleotide>>()?;
296
297        assert_eq!(variant.coordinate().contig().as_str(), "seq0");
298        assert_eq!(variant.coordinate().strand(), Strand::Positive);
299        assert_eq!(variant.coordinate().position().get(), 1);
300        assert_eq!(variant.reference(), rna::Nucleotide::U);
301        assert_eq!(variant.alternate(), rna::Nucleotide::C);
302
303        Ok(())
304    }
305
306    #[test]
307    fn it_creates_a_variant_on_the_negative_strand_in_a_dna_context()
308    -> Result<(), Box<dyn std::error::Error>> {
309        let variant = "seq0:-:1:A:C".parse::<Variant<dna::Nucleotide>>()?;
310
311        assert_eq!(variant.coordinate().contig().as_str(), "seq0");
312        assert_eq!(variant.coordinate().strand(), Strand::Negative);
313        assert_eq!(variant.coordinate().position().get(), 1);
314        assert_eq!(variant.reference(), dna::Nucleotide::A);
315        assert_eq!(variant.alternate(), dna::Nucleotide::C);
316
317        Ok(())
318    }
319
320    #[test]
321    fn it_creates_a_variant_on_the_negative_strand_in_a_rna_context()
322    -> Result<(), Box<dyn std::error::Error>> {
323        let variant = "seq0:-:1:U:C".parse::<Variant<rna::Nucleotide>>()?;
324
325        assert_eq!(variant.coordinate().contig().as_str(), "seq0");
326        assert_eq!(variant.coordinate().strand(), Strand::Negative);
327        assert_eq!(variant.coordinate().position().get(), 1);
328        assert_eq!(variant.reference(), rna::Nucleotide::U);
329        assert_eq!(variant.alternate(), rna::Nucleotide::C);
330
331        Ok(())
332    }
333
334    #[test]
335    fn it_creates_a_variant_with_no_specified_strand_in_a_dna_context()
336    -> Result<(), Box<dyn std::error::Error>> {
337        let variant = "seq0:1:A:C".parse::<Variant<dna::Nucleotide>>()?;
338
339        assert_eq!(variant.coordinate().contig().as_str(), "seq0");
340        assert_eq!(variant.coordinate().strand(), Strand::Positive);
341        assert_eq!(variant.coordinate().position().get(), 1);
342        assert_eq!(variant.reference(), dna::Nucleotide::A);
343        assert_eq!(variant.alternate(), dna::Nucleotide::C);
344
345        Ok(())
346    }
347
348    #[test]
349    fn it_creates_a_variant_with_no_specified_strand_in_a_rna_context()
350    -> Result<(), Box<dyn std::error::Error>> {
351        let variant = "seq0:1:U:C".parse::<Variant<rna::Nucleotide>>()?;
352
353        assert_eq!(variant.coordinate().contig().as_str(), "seq0");
354        assert_eq!(variant.coordinate().strand(), Strand::Positive);
355        assert_eq!(variant.coordinate().position().get(), 1);
356        assert_eq!(variant.reference(), rna::Nucleotide::U);
357        assert_eq!(variant.alternate(), rna::Nucleotide::C);
358
359        Ok(())
360    }
361
362    #[test]
363    fn it_fails_when_creating_a_variant_with_identical_nucleotides() {
364        let err = "seq0:+:1:A:A"
365            .parse::<Variant<dna::Nucleotide>>()
366            .unwrap_err();
367
368        assert_eq!(err.to_string(), "identical nucleotides for snv: A");
369    }
370
371    #[test]
372    fn it_fails_when_attempting_to_represent_an_insertion() {
373        let err = "seq0:+:1:.:A"
374            .parse::<Variant<dna::Nucleotide>>()
375            .unwrap_err();
376
377        assert_eq!(
378            err.to_string(),
379            "reference nucleotide error: invalid nucleotide `.`"
380        );
381    }
382
383    #[test]
384    fn it_fails_when_attempting_to_represent_a_deletion() {
385        let err = "seq0:+:1:A:."
386            .parse::<Variant<dna::Nucleotide>>()
387            .unwrap_err();
388
389        assert_eq!(
390            err.to_string(),
391            "alternate nucleotide error: invalid nucleotide `.`"
392        );
393    }
394
395    #[test]
396    fn it_fails_when_attempting_to_represent_an_empty_pair() {
397        let err = "seq0:+:1:.:."
398            .parse::<Variant<dna::Nucleotide>>()
399            .unwrap_err();
400
401        assert_eq!(
402            err.to_string(),
403            "reference nucleotide error: invalid nucleotide `.`"
404        );
405    }
406}