1use std::str::FromStr;
4
5use omics_coordinate::Coordinate;
6use omics_coordinate::Strand;
7use omics_coordinate::coordinate;
8use omics_coordinate::system::Base;
9use omics_core::VARIANT_SEPARATOR;
10use omics_molecule::compound::Nucleotide;
11use omics_molecule::compound::nucleotide::relation;
12use omics_molecule::compound::nucleotide::relation::Relation;
13use thiserror::Error;
14
15#[derive(Error, Debug)]
17pub enum ParseError<N: Nucleotide>
18where
19 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
20{
21 #[error("invalid format: {0}")]
23 InvalidFormat(String),
24
25 #[error(transparent)]
27 CoordinateError(#[from] coordinate::Error),
28
29 #[error("reference nucleotide error: {0}")]
32 ReferenceNucleotide(<N as FromStr>::Err),
33
34 #[error("alternate nucleotide error: {0}")]
37 AlternateNucleotide(<N as FromStr>::Err),
38}
39
40#[derive(Error, Debug)]
42pub enum Error<N: Nucleotide>
43where
44 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
45{
46 #[error("identical nucleotides for snv: {0}")]
49 Identical(N),
50
51 #[error(transparent)]
53 Parse(#[from] ParseError<N>),
54
55 #[error(transparent)]
57 Relation(#[from] relation::Error<N>),
58}
59
60#[derive(Debug)]
62pub struct Variant<N: Nucleotide> {
63 coordinate: Coordinate<Base>,
65
66 relation: Relation<N>,
68}
69
70impl<N: Nucleotide> Variant<N>
71where
72 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
73{
74 pub fn try_new(
93 coordinate: impl Into<Coordinate<Base>>,
94 reference_nucleotide: impl Into<N>,
95 alternate_nucleotide: impl Into<N>,
96 ) -> Result<Self, Error<N>> {
97 let coordinate = coordinate.into();
98 let reference_nucleotide = reference_nucleotide.into();
99 let alternate_nucleotide = alternate_nucleotide.into();
100
101 let relation = Relation::try_new(Some(reference_nucleotide), Some(alternate_nucleotide))
102 .map_err(Error::Relation)?;
103
104 if let Relation::Identical(nucleotide) = relation {
105 return Err(Error::Identical(nucleotide));
106 }
107
108 Ok(Self {
109 coordinate,
110 relation,
111 })
112 }
113
114 pub fn coordinate(&self) -> &Coordinate<Base> {
138 &self.coordinate
139 }
140
141 pub fn reference(&self) -> N {
157 self.relation.reference().unwrap()
161 }
162
163 pub fn alternate(&self) -> N {
179 self.relation.alternate().unwrap()
183 }
184}
185
186impl<N: Nucleotide> std::str::FromStr for Variant<N>
187where
188 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
189{
190 type Err = Error<N>;
191
192 fn from_str(s: &str) -> Result<Self, Self::Err> {
193 let parts = s.split(VARIANT_SEPARATOR).collect::<Vec<_>>();
194 let num_parts = parts.len();
195
196 if num_parts != 4 && num_parts != 5 {
197 return Err(Error::Parse(ParseError::InvalidFormat(s.to_owned())));
198 }
199
200 let mut parts = parts.into_iter();
201
202 let coordinate = match num_parts {
203 4 => {
204 let positive = Strand::Positive.to_string();
205
206 [
210 parts.next().unwrap(),
211 positive.as_str(),
212 parts.next().unwrap(),
213 ]
214 .join(VARIANT_SEPARATOR)
215 }
216 5 => {
217 [
221 parts.next().unwrap(),
222 parts.next().unwrap(),
223 parts.next().unwrap(),
224 ]
225 .join(VARIANT_SEPARATOR)
226 }
227 _ => unreachable!(),
229 };
230
231 let coordinate = match coordinate.parse::<Coordinate<Base>>() {
232 Ok(coordinate) => coordinate,
233 Err(err) => return Err(Error::Parse(ParseError::CoordinateError(err))),
234 };
235
236 let reference_nucleotide = parts
239 .next()
240 .unwrap()
241 .parse::<N>()
242 .map_err(|err| Error::Parse(ParseError::ReferenceNucleotide(err)))?;
243
244 let alternate_nucleotide = parts
247 .next()
248 .unwrap()
249 .parse::<N>()
250 .map_err(|err| Error::Parse(ParseError::AlternateNucleotide(err)))?;
251
252 Self::try_new(coordinate, reference_nucleotide, alternate_nucleotide)
253 }
254}
255
256impl<N: Nucleotide> std::fmt::Display for Variant<N>
257where
258 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
259{
260 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
261 let coordinate = self.coordinate().to_string();
262
263 let parts = [
264 coordinate.as_str(),
265 &self.reference().to_string(),
266 &self.alternate().to_string(),
267 ];
268
269 write!(f, "{}", parts.join(self::VARIANT_SEPARATOR))
270 }
271}
272
273#[cfg(test)]
274mod tests {
275 use omics_molecule::polymer::dna;
276 use omics_molecule::polymer::rna;
277
278 use super::*;
279
280 #[test]
281 fn it_creates_a_variant_in_a_dna_context() -> Result<(), Box<dyn std::error::Error>> {
282 let variant = "seq0:+:1:A:C".parse::<Variant<dna::Nucleotide>>()?;
283
284 assert_eq!(variant.coordinate().contig().as_str(), "seq0");
285 assert_eq!(variant.coordinate().strand(), Strand::Positive);
286 assert_eq!(variant.coordinate().position().get(), 1);
287 assert_eq!(variant.reference(), dna::Nucleotide::A);
288 assert_eq!(variant.alternate(), dna::Nucleotide::C);
289
290 Ok(())
291 }
292
293 #[test]
294 fn it_creates_a_variant_in_a_rna_context() -> Result<(), Box<dyn std::error::Error>> {
295 let variant = "seq0:+:1:U:C".parse::<Variant<rna::Nucleotide>>()?;
296
297 assert_eq!(variant.coordinate().contig().as_str(), "seq0");
298 assert_eq!(variant.coordinate().strand(), Strand::Positive);
299 assert_eq!(variant.coordinate().position().get(), 1);
300 assert_eq!(variant.reference(), rna::Nucleotide::U);
301 assert_eq!(variant.alternate(), rna::Nucleotide::C);
302
303 Ok(())
304 }
305
306 #[test]
307 fn it_creates_a_variant_on_the_negative_strand_in_a_dna_context()
308 -> Result<(), Box<dyn std::error::Error>> {
309 let variant = "seq0:-:1:A:C".parse::<Variant<dna::Nucleotide>>()?;
310
311 assert_eq!(variant.coordinate().contig().as_str(), "seq0");
312 assert_eq!(variant.coordinate().strand(), Strand::Negative);
313 assert_eq!(variant.coordinate().position().get(), 1);
314 assert_eq!(variant.reference(), dna::Nucleotide::A);
315 assert_eq!(variant.alternate(), dna::Nucleotide::C);
316
317 Ok(())
318 }
319
320 #[test]
321 fn it_creates_a_variant_on_the_negative_strand_in_a_rna_context()
322 -> Result<(), Box<dyn std::error::Error>> {
323 let variant = "seq0:-:1:U:C".parse::<Variant<rna::Nucleotide>>()?;
324
325 assert_eq!(variant.coordinate().contig().as_str(), "seq0");
326 assert_eq!(variant.coordinate().strand(), Strand::Negative);
327 assert_eq!(variant.coordinate().position().get(), 1);
328 assert_eq!(variant.reference(), rna::Nucleotide::U);
329 assert_eq!(variant.alternate(), rna::Nucleotide::C);
330
331 Ok(())
332 }
333
334 #[test]
335 fn it_creates_a_variant_with_no_specified_strand_in_a_dna_context()
336 -> Result<(), Box<dyn std::error::Error>> {
337 let variant = "seq0:1:A:C".parse::<Variant<dna::Nucleotide>>()?;
338
339 assert_eq!(variant.coordinate().contig().as_str(), "seq0");
340 assert_eq!(variant.coordinate().strand(), Strand::Positive);
341 assert_eq!(variant.coordinate().position().get(), 1);
342 assert_eq!(variant.reference(), dna::Nucleotide::A);
343 assert_eq!(variant.alternate(), dna::Nucleotide::C);
344
345 Ok(())
346 }
347
348 #[test]
349 fn it_creates_a_variant_with_no_specified_strand_in_a_rna_context()
350 -> Result<(), Box<dyn std::error::Error>> {
351 let variant = "seq0:1:U:C".parse::<Variant<rna::Nucleotide>>()?;
352
353 assert_eq!(variant.coordinate().contig().as_str(), "seq0");
354 assert_eq!(variant.coordinate().strand(), Strand::Positive);
355 assert_eq!(variant.coordinate().position().get(), 1);
356 assert_eq!(variant.reference(), rna::Nucleotide::U);
357 assert_eq!(variant.alternate(), rna::Nucleotide::C);
358
359 Ok(())
360 }
361
362 #[test]
363 fn it_fails_when_creating_a_variant_with_identical_nucleotides() {
364 let err = "seq0:+:1:A:A"
365 .parse::<Variant<dna::Nucleotide>>()
366 .unwrap_err();
367
368 assert_eq!(err.to_string(), "identical nucleotides for snv: A");
369 }
370
371 #[test]
372 fn it_fails_when_attempting_to_represent_an_insertion() {
373 let err = "seq0:+:1:.:A"
374 .parse::<Variant<dna::Nucleotide>>()
375 .unwrap_err();
376
377 assert_eq!(
378 err.to_string(),
379 "reference nucleotide error: invalid nucleotide `.`"
380 );
381 }
382
383 #[test]
384 fn it_fails_when_attempting_to_represent_a_deletion() {
385 let err = "seq0:+:1:A:."
386 .parse::<Variant<dna::Nucleotide>>()
387 .unwrap_err();
388
389 assert_eq!(
390 err.to_string(),
391 "alternate nucleotide error: invalid nucleotide `.`"
392 );
393 }
394
395 #[test]
396 fn it_fails_when_attempting_to_represent_an_empty_pair() {
397 let err = "seq0:+:1:.:."
398 .parse::<Variant<dna::Nucleotide>>()
399 .unwrap_err();
400
401 assert_eq!(
402 err.to_string(),
403 "reference nucleotide error: invalid nucleotide `.`"
404 );
405 }
406}