omics_variation/
snv.rs

1//! Single nucleotide variations.
2
3use std::str::FromStr;
4
5use omics_coordinate::Coordinate;
6use omics_coordinate::Strand;
7use omics_coordinate::coordinate;
8use omics_coordinate::system::Base;
9use omics_core::VARIANT_SEPARATOR;
10use omics_molecule::compound::Nucleotide;
11use omics_molecule::compound::nucleotide::relation;
12use omics_molecule::compound::nucleotide::relation::Relation;
13
14/// A parse error related to a [`Variant`].
15#[derive(Debug)]
16pub enum ParseError<N: Nucleotide>
17where
18    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
19{
20    /// An invalid format was encountered when parsing a [`Variant`].
21    InvalidFormat(String),
22
23    /// An issue occurred when parsing the coordinate of the [`Variant`].
24    CoordinateError(coordinate::Error),
25
26    /// An issue occurred when parsing the reference nucleotide of the
27    /// [`Variant`].
28    ReferenceNucleotide(<N as FromStr>::Err),
29
30    /// An issue occurred when parsing the alternate nucleotide of the
31    /// [`Variant`].
32    AlternateNucleotide(<N as FromStr>::Err),
33}
34
35impl<N: Nucleotide> std::fmt::Display for ParseError<N>
36where
37    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
38{
39    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40        match self {
41            ParseError::InvalidFormat(value) => write!(f, "invalid format: {value}"),
42            ParseError::CoordinateError(err) => write!(f, "coordinate error: {err}"),
43            ParseError::ReferenceNucleotide(err) => write!(f, "reference nucleotide error: {err}"),
44            ParseError::AlternateNucleotide(err) => write!(f, "alternate nucleotide error: {err}"),
45        }
46    }
47}
48
49impl<N: Nucleotide> std::error::Error for ParseError<N> where
50    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display
51{
52}
53
54/// An error related to a [`Variant`].
55#[derive(Debug)]
56pub enum Error<N: Nucleotide>
57where
58    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
59{
60    /// Attempted to create a [`Variant`] with identical reference and
61    /// alternate nucleotides.
62    Identical(N),
63
64    /// Unsuccessfully attempted to parse a [`Variant`] from a string.
65    Parse(ParseError<N>),
66
67    /// An error constructing a relation.
68    Relation(relation::Error<N>),
69}
70
71impl<N: Nucleotide> std::fmt::Display for Error<N>
72where
73    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
74{
75    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
76        match self {
77            Error::Identical(nucleotide) => {
78                write!(f, "identical nucleotides for snv: {nucleotide}")
79            }
80            Error::Parse(err) => write!(f, "parse error: {err}"),
81            Error::Relation(err) => write!(f, "relation error: {err}"),
82        }
83    }
84}
85
86impl<N: Nucleotide> std::error::Error for Error<N> where
87    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display
88{
89}
90
91/// A single nucleotide variant.
92#[derive(Debug)]
93pub struct Variant<N: Nucleotide> {
94    /// The coordinate.
95    coordinate: Coordinate<Base>,
96
97    /// The relation.
98    relation: Relation<N>,
99}
100
101impl<N: Nucleotide> Variant<N>
102where
103    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
104{
105    /// Attempts to create a new [`Variant`].
106    ///
107    /// # Examples
108    ///
109    /// ```
110    /// use omics_coordinate::base::Coordinate;
111    /// use omics_coordinate::system::Base;
112    /// use omics_molecule::polymer::dna;
113    /// use omics_variation::snv::Variant;
114    ///
115    /// let variant = Variant::<dna::Nucleotide>::try_new(
116    ///     "seq0:+:1".parse::<Coordinate>()?,
117    ///     dna::Nucleotide::A,
118    ///     dna::Nucleotide::T,
119    /// )?;
120    ///
121    /// # Ok::<(), Box<dyn std::error::Error>>(())
122    /// ```
123    pub fn try_new(
124        coordinate: impl Into<Coordinate<Base>>,
125        reference_nucleotide: impl Into<N>,
126        alternate_nucleotide: impl Into<N>,
127    ) -> Result<Self, Error<N>> {
128        let coordinate = coordinate.into();
129        let reference_nucleotide = reference_nucleotide.into();
130        let alternate_nucleotide = alternate_nucleotide.into();
131
132        let relation = Relation::try_new(Some(reference_nucleotide), Some(alternate_nucleotide))
133            .map_err(Error::Relation)?;
134
135        if let Relation::Identical(nucleotide) = relation {
136            return Err(Error::Identical(nucleotide));
137        }
138
139        Ok(Self {
140            coordinate,
141            relation,
142        })
143    }
144
145    /// Gets the [`Coordinate`] for this [`Variant`].
146    ///
147    /// # Examples
148    ///
149    /// ```
150    /// use omics_coordinate::Strand;
151    /// use omics_coordinate::base::Coordinate;
152    /// use omics_coordinate::system::Base;
153    /// use omics_molecule::polymer::dna;
154    /// use omics_variation::snv::Variant;
155    ///
156    /// let variant = Variant::<dna::Nucleotide>::try_new(
157    ///     "seq0:+:1".parse::<Coordinate>()?,
158    ///     dna::Nucleotide::A,
159    ///     dna::Nucleotide::T,
160    /// )?;
161    ///
162    /// assert_eq!(variant.coordinate().contig().as_str(), "seq0");
163    /// assert_eq!(variant.coordinate().strand(), Strand::Positive);
164    /// assert_eq!(variant.coordinate().position().get(), 1);
165    ///
166    /// # Ok::<(), Box<dyn std::error::Error>>(())
167    /// ```
168    pub fn coordinate(&self) -> &Coordinate<Base> {
169        &self.coordinate
170    }
171
172    /// Gets the reference nucleotide as a [`Nucleotide`] from the [`Variant`].
173    ///
174    /// # Examples
175    ///
176    /// ```
177    /// use omics_coordinate::base::Coordinate;
178    /// use omics_coordinate::system::Base;
179    /// use omics_molecule::polymer::dna;
180    /// use omics_variation::snv::Variant;
181    ///
182    /// let variant = "seq0:+:1:A:T".parse::<Variant<dna::Nucleotide>>()?;
183    /// assert_eq!(variant.reference(), &dna::Nucleotide::A);
184    ///
185    /// # Ok::<(), Box<dyn std::error::Error>>(())
186    /// ```
187    pub fn reference(&self) -> &N {
188        // SAFETY: because a single nucleotide variant is guaranteed to have a
189        // reference nucleotide within the inner [`Relation`], this will
190        // always unwrap successfully.
191        self.relation.reference().unwrap()
192    }
193
194    /// Gets the alternate nucleotide as a [`Nucleotide`] from the [`Variant`].
195    ///
196    /// # Examples
197    ///
198    /// ```
199    /// use omics_coordinate::base::Coordinate;
200    /// use omics_coordinate::system::Base;
201    /// use omics_molecule::polymer::dna;
202    /// use omics_variation::snv::Variant;
203    ///
204    /// let variant = "seq0:+:1:A:T".parse::<Variant<dna::Nucleotide>>()?;
205    /// assert_eq!(variant.alternate(), &dna::Nucleotide::T);
206    ///
207    /// # Ok::<(), Box<dyn std::error::Error>>(())
208    /// ```
209    pub fn alternate(&self) -> &N {
210        // SAFETY: because a single nucleotide variant is guaranteed to have a
211        // alternate nucleotide within the inner [`Relation`], this will
212        // always unwrap successfully.
213        self.relation.alternate().unwrap()
214    }
215}
216
217impl<N: Nucleotide> std::str::FromStr for Variant<N>
218where
219    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
220{
221    type Err = Error<N>;
222
223    fn from_str(s: &str) -> Result<Self, Self::Err> {
224        let parts = s.split(VARIANT_SEPARATOR).collect::<Vec<_>>();
225        let num_parts = parts.len();
226
227        if num_parts != 4 && num_parts != 5 {
228            return Err(Error::Parse(ParseError::InvalidFormat(s.to_owned())));
229        }
230
231        let mut parts = parts.into_iter();
232
233        let coordinate = match num_parts {
234            4 => {
235                let positive = Strand::Positive.to_string();
236
237                // SAFETY: we just ensured that the number of parts is four.
238                // Since we have not taken any items from the iterator, these
239                // two items will always unwrap.
240                [
241                    parts.next().unwrap(),
242                    positive.as_str(),
243                    parts.next().unwrap(),
244                ]
245                .join(VARIANT_SEPARATOR)
246            }
247            5 => {
248                // SAFETY: we just ensured that the number of parts is five.
249                // Since we have not taken any items from the iterator, these
250                // three items will always unwrap.
251                [
252                    parts.next().unwrap(),
253                    parts.next().unwrap(),
254                    parts.next().unwrap(),
255                ]
256                .join(VARIANT_SEPARATOR)
257            }
258            // SAFETY: we ensured above that the number of parts must be either four or five.
259            _ => unreachable!(),
260        };
261
262        let coordinate = match coordinate.parse::<Coordinate<Base>>() {
263            Ok(coordinate) => coordinate,
264            Err(err) => return Err(Error::Parse(ParseError::CoordinateError(err))),
265        };
266
267        // SAFETY: in all cases above, we leave two items in the iterator. Since we have
268        // not taken any items yet, this will always unwrap.
269        let reference_nucleotide = parts
270            .next()
271            .unwrap()
272            .parse::<N>()
273            .map_err(|err| Error::Parse(ParseError::ReferenceNucleotide(err)))?;
274
275        // SAFETY: in all cases above, we leave two items in the iterator. Since we have
276        // only taken one item so far, this will always unwrap.
277        let alternate_nucleotide = parts
278            .next()
279            .unwrap()
280            .parse::<N>()
281            .map_err(|err| Error::Parse(ParseError::AlternateNucleotide(err)))?;
282
283        Self::try_new(coordinate, reference_nucleotide, alternate_nucleotide)
284    }
285}
286
287impl<N: Nucleotide> std::fmt::Display for Variant<N>
288where
289    <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
290{
291    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
292        let coordinate = self.coordinate().to_string();
293
294        let parts = [
295            coordinate.as_str(),
296            &self.reference().to_string(),
297            &self.alternate().to_string(),
298        ];
299
300        write!(f, "{}", parts.join(self::VARIANT_SEPARATOR))
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use omics_molecule::polymer::dna;
307    use omics_molecule::polymer::rna;
308
309    use super::*;
310
311    #[test]
312    fn it_creates_a_variant_in_a_dna_context() -> Result<(), Box<dyn std::error::Error>> {
313        let variant = "seq0:+:1:A:C".parse::<Variant<dna::Nucleotide>>()?;
314
315        assert_eq!(variant.coordinate().contig().as_str(), "seq0");
316        assert_eq!(variant.coordinate().strand(), Strand::Positive);
317        assert_eq!(variant.coordinate().position().get(), 1);
318        assert_eq!(variant.reference(), &dna::Nucleotide::A);
319        assert_eq!(variant.alternate(), &dna::Nucleotide::C);
320
321        Ok(())
322    }
323
324    #[test]
325    fn it_creates_a_variant_in_a_rna_context() -> Result<(), Box<dyn std::error::Error>> {
326        let variant = "seq0:+:1:U:C".parse::<Variant<rna::Nucleotide>>()?;
327
328        assert_eq!(variant.coordinate().contig().as_str(), "seq0");
329        assert_eq!(variant.coordinate().strand(), Strand::Positive);
330        assert_eq!(variant.coordinate().position().get(), 1);
331        assert_eq!(variant.reference(), &rna::Nucleotide::U);
332        assert_eq!(variant.alternate(), &rna::Nucleotide::C);
333
334        Ok(())
335    }
336
337    #[test]
338    fn it_creates_a_variant_on_the_negative_strand_in_a_dna_context()
339    -> Result<(), Box<dyn std::error::Error>> {
340        let variant = "seq0:-:1:A:C".parse::<Variant<dna::Nucleotide>>()?;
341
342        assert_eq!(variant.coordinate().contig().as_str(), "seq0");
343        assert_eq!(variant.coordinate().strand(), Strand::Negative);
344        assert_eq!(variant.coordinate().position().get(), 1);
345        assert_eq!(variant.reference(), &dna::Nucleotide::A);
346        assert_eq!(variant.alternate(), &dna::Nucleotide::C);
347
348        Ok(())
349    }
350
351    #[test]
352    fn it_creates_a_variant_on_the_negative_strand_in_a_rna_context()
353    -> Result<(), Box<dyn std::error::Error>> {
354        let variant = "seq0:-:1:U:C".parse::<Variant<rna::Nucleotide>>()?;
355
356        assert_eq!(variant.coordinate().contig().as_str(), "seq0");
357        assert_eq!(variant.coordinate().strand(), Strand::Negative);
358        assert_eq!(variant.coordinate().position().get(), 1);
359        assert_eq!(variant.reference(), &rna::Nucleotide::U);
360        assert_eq!(variant.alternate(), &rna::Nucleotide::C);
361
362        Ok(())
363    }
364
365    #[test]
366    fn it_creates_a_variant_with_no_specified_strand_in_a_dna_context()
367    -> Result<(), Box<dyn std::error::Error>> {
368        let variant = "seq0:1:A:C".parse::<Variant<dna::Nucleotide>>()?;
369
370        assert_eq!(variant.coordinate().contig().as_str(), "seq0");
371        assert_eq!(variant.coordinate().strand(), Strand::Positive);
372        assert_eq!(variant.coordinate().position().get(), 1);
373        assert_eq!(variant.reference(), &dna::Nucleotide::A);
374        assert_eq!(variant.alternate(), &dna::Nucleotide::C);
375
376        Ok(())
377    }
378
379    #[test]
380    fn it_creates_a_variant_with_no_specified_strand_in_a_rna_context()
381    -> Result<(), Box<dyn std::error::Error>> {
382        let variant = "seq0:1:U:C".parse::<Variant<rna::Nucleotide>>()?;
383
384        assert_eq!(variant.coordinate().contig().as_str(), "seq0");
385        assert_eq!(variant.coordinate().strand(), Strand::Positive);
386        assert_eq!(variant.coordinate().position().get(), 1);
387        assert_eq!(variant.reference(), &rna::Nucleotide::U);
388        assert_eq!(variant.alternate(), &rna::Nucleotide::C);
389
390        Ok(())
391    }
392
393    #[test]
394    fn it_fails_when_creating_a_variant_with_identical_nucleotides() {
395        let err = "seq0:+:1:A:A"
396            .parse::<Variant<dna::Nucleotide>>()
397            .unwrap_err();
398
399        assert_eq!(err.to_string(), "identical nucleotides for snv: A");
400    }
401
402    #[test]
403    fn it_fails_when_attempting_to_represent_an_insertion() {
404        let err = "seq0:+:1:.:A"
405            .parse::<Variant<dna::Nucleotide>>()
406            .unwrap_err();
407
408        assert_eq!(
409            err.to_string(),
410            "parse error: reference nucleotide error: parse error: invalid nucleotide: ."
411        );
412    }
413
414    #[test]
415    fn it_fails_when_attempting_to_represent_a_deletion() {
416        let err = "seq0:+:1:A:."
417            .parse::<Variant<dna::Nucleotide>>()
418            .unwrap_err();
419
420        assert_eq!(
421            err.to_string(),
422            "parse error: alternate nucleotide error: parse error: invalid nucleotide: ."
423        );
424    }
425
426    #[test]
427    fn it_fails_when_attempting_to_represent_an_empty_pair() {
428        let err = "seq0:+:1:.:."
429            .parse::<Variant<dna::Nucleotide>>()
430            .unwrap_err();
431
432        assert_eq!(
433            err.to_string(),
434            "parse error: reference nucleotide error: parse error: invalid nucleotide: ."
435        );
436    }
437}