1use std::str::FromStr;
4
5use omics_coordinate::Coordinate;
6use omics_coordinate::Strand;
7use omics_coordinate::coordinate;
8use omics_coordinate::system::Base;
9use omics_core::VARIANT_SEPARATOR;
10use omics_molecule::compound::Nucleotide;
11use omics_molecule::compound::nucleotide::relation;
12use omics_molecule::compound::nucleotide::relation::Relation;
13
14#[derive(Debug)]
16pub enum ParseError<N: Nucleotide>
17where
18 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
19{
20 InvalidFormat(String),
22
23 CoordinateError(coordinate::Error),
25
26 ReferenceNucleotide(<N as FromStr>::Err),
29
30 AlternateNucleotide(<N as FromStr>::Err),
33}
34
35impl<N: Nucleotide> std::fmt::Display for ParseError<N>
36where
37 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
38{
39 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40 match self {
41 ParseError::InvalidFormat(value) => write!(f, "invalid format: {value}"),
42 ParseError::CoordinateError(err) => write!(f, "coordinate error: {err}"),
43 ParseError::ReferenceNucleotide(err) => write!(f, "reference nucleotide error: {err}"),
44 ParseError::AlternateNucleotide(err) => write!(f, "alternate nucleotide error: {err}"),
45 }
46 }
47}
48
49impl<N: Nucleotide> std::error::Error for ParseError<N> where
50 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display
51{
52}
53
54#[derive(Debug)]
56pub enum Error<N: Nucleotide>
57where
58 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
59{
60 Identical(N),
63
64 Parse(ParseError<N>),
66
67 Relation(relation::Error<N>),
69}
70
71impl<N: Nucleotide> std::fmt::Display for Error<N>
72where
73 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
74{
75 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
76 match self {
77 Error::Identical(nucleotide) => {
78 write!(f, "identical nucleotides for snv: {nucleotide}")
79 }
80 Error::Parse(err) => write!(f, "parse error: {err}"),
81 Error::Relation(err) => write!(f, "relation error: {err}"),
82 }
83 }
84}
85
86impl<N: Nucleotide> std::error::Error for Error<N> where
87 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display
88{
89}
90
91#[derive(Debug)]
93pub struct Variant<N: Nucleotide> {
94 coordinate: Coordinate<Base>,
96
97 relation: Relation<N>,
99}
100
101impl<N: Nucleotide> Variant<N>
102where
103 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
104{
105 pub fn try_new(
124 coordinate: impl Into<Coordinate<Base>>,
125 reference_nucleotide: impl Into<N>,
126 alternate_nucleotide: impl Into<N>,
127 ) -> Result<Self, Error<N>> {
128 let coordinate = coordinate.into();
129 let reference_nucleotide = reference_nucleotide.into();
130 let alternate_nucleotide = alternate_nucleotide.into();
131
132 let relation = Relation::try_new(Some(reference_nucleotide), Some(alternate_nucleotide))
133 .map_err(Error::Relation)?;
134
135 if let Relation::Identical(nucleotide) = relation {
136 return Err(Error::Identical(nucleotide));
137 }
138
139 Ok(Self {
140 coordinate,
141 relation,
142 })
143 }
144
145 pub fn coordinate(&self) -> &Coordinate<Base> {
169 &self.coordinate
170 }
171
172 pub fn reference(&self) -> &N {
188 self.relation.reference().unwrap()
192 }
193
194 pub fn alternate(&self) -> &N {
210 self.relation.alternate().unwrap()
214 }
215}
216
217impl<N: Nucleotide> std::str::FromStr for Variant<N>
218where
219 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
220{
221 type Err = Error<N>;
222
223 fn from_str(s: &str) -> Result<Self, Self::Err> {
224 let parts = s.split(VARIANT_SEPARATOR).collect::<Vec<_>>();
225 let num_parts = parts.len();
226
227 if num_parts != 4 && num_parts != 5 {
228 return Err(Error::Parse(ParseError::InvalidFormat(s.to_owned())));
229 }
230
231 let mut parts = parts.into_iter();
232
233 let coordinate = match num_parts {
234 4 => {
235 let positive = Strand::Positive.to_string();
236
237 [
241 parts.next().unwrap(),
242 positive.as_str(),
243 parts.next().unwrap(),
244 ]
245 .join(VARIANT_SEPARATOR)
246 }
247 5 => {
248 [
252 parts.next().unwrap(),
253 parts.next().unwrap(),
254 parts.next().unwrap(),
255 ]
256 .join(VARIANT_SEPARATOR)
257 }
258 _ => unreachable!(),
260 };
261
262 let coordinate = match coordinate.parse::<Coordinate<Base>>() {
263 Ok(coordinate) => coordinate,
264 Err(err) => return Err(Error::Parse(ParseError::CoordinateError(err))),
265 };
266
267 let reference_nucleotide = parts
270 .next()
271 .unwrap()
272 .parse::<N>()
273 .map_err(|err| Error::Parse(ParseError::ReferenceNucleotide(err)))?;
274
275 let alternate_nucleotide = parts
278 .next()
279 .unwrap()
280 .parse::<N>()
281 .map_err(|err| Error::Parse(ParseError::AlternateNucleotide(err)))?;
282
283 Self::try_new(coordinate, reference_nucleotide, alternate_nucleotide)
284 }
285}
286
287impl<N: Nucleotide> std::fmt::Display for Variant<N>
288where
289 <N as FromStr>::Err: std::fmt::Debug + std::fmt::Display,
290{
291 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
292 let coordinate = self.coordinate().to_string();
293
294 let parts = [
295 coordinate.as_str(),
296 &self.reference().to_string(),
297 &self.alternate().to_string(),
298 ];
299
300 write!(f, "{}", parts.join(self::VARIANT_SEPARATOR))
301 }
302}
303
304#[cfg(test)]
305mod tests {
306 use omics_molecule::polymer::dna;
307 use omics_molecule::polymer::rna;
308
309 use super::*;
310
311 #[test]
312 fn it_creates_a_variant_in_a_dna_context() -> Result<(), Box<dyn std::error::Error>> {
313 let variant = "seq0:+:1:A:C".parse::<Variant<dna::Nucleotide>>()?;
314
315 assert_eq!(variant.coordinate().contig().as_str(), "seq0");
316 assert_eq!(variant.coordinate().strand(), Strand::Positive);
317 assert_eq!(variant.coordinate().position().get(), 1);
318 assert_eq!(variant.reference(), &dna::Nucleotide::A);
319 assert_eq!(variant.alternate(), &dna::Nucleotide::C);
320
321 Ok(())
322 }
323
324 #[test]
325 fn it_creates_a_variant_in_a_rna_context() -> Result<(), Box<dyn std::error::Error>> {
326 let variant = "seq0:+:1:U:C".parse::<Variant<rna::Nucleotide>>()?;
327
328 assert_eq!(variant.coordinate().contig().as_str(), "seq0");
329 assert_eq!(variant.coordinate().strand(), Strand::Positive);
330 assert_eq!(variant.coordinate().position().get(), 1);
331 assert_eq!(variant.reference(), &rna::Nucleotide::U);
332 assert_eq!(variant.alternate(), &rna::Nucleotide::C);
333
334 Ok(())
335 }
336
337 #[test]
338 fn it_creates_a_variant_on_the_negative_strand_in_a_dna_context()
339 -> Result<(), Box<dyn std::error::Error>> {
340 let variant = "seq0:-:1:A:C".parse::<Variant<dna::Nucleotide>>()?;
341
342 assert_eq!(variant.coordinate().contig().as_str(), "seq0");
343 assert_eq!(variant.coordinate().strand(), Strand::Negative);
344 assert_eq!(variant.coordinate().position().get(), 1);
345 assert_eq!(variant.reference(), &dna::Nucleotide::A);
346 assert_eq!(variant.alternate(), &dna::Nucleotide::C);
347
348 Ok(())
349 }
350
351 #[test]
352 fn it_creates_a_variant_on_the_negative_strand_in_a_rna_context()
353 -> Result<(), Box<dyn std::error::Error>> {
354 let variant = "seq0:-:1:U:C".parse::<Variant<rna::Nucleotide>>()?;
355
356 assert_eq!(variant.coordinate().contig().as_str(), "seq0");
357 assert_eq!(variant.coordinate().strand(), Strand::Negative);
358 assert_eq!(variant.coordinate().position().get(), 1);
359 assert_eq!(variant.reference(), &rna::Nucleotide::U);
360 assert_eq!(variant.alternate(), &rna::Nucleotide::C);
361
362 Ok(())
363 }
364
365 #[test]
366 fn it_creates_a_variant_with_no_specified_strand_in_a_dna_context()
367 -> Result<(), Box<dyn std::error::Error>> {
368 let variant = "seq0:1:A:C".parse::<Variant<dna::Nucleotide>>()?;
369
370 assert_eq!(variant.coordinate().contig().as_str(), "seq0");
371 assert_eq!(variant.coordinate().strand(), Strand::Positive);
372 assert_eq!(variant.coordinate().position().get(), 1);
373 assert_eq!(variant.reference(), &dna::Nucleotide::A);
374 assert_eq!(variant.alternate(), &dna::Nucleotide::C);
375
376 Ok(())
377 }
378
379 #[test]
380 fn it_creates_a_variant_with_no_specified_strand_in_a_rna_context()
381 -> Result<(), Box<dyn std::error::Error>> {
382 let variant = "seq0:1:U:C".parse::<Variant<rna::Nucleotide>>()?;
383
384 assert_eq!(variant.coordinate().contig().as_str(), "seq0");
385 assert_eq!(variant.coordinate().strand(), Strand::Positive);
386 assert_eq!(variant.coordinate().position().get(), 1);
387 assert_eq!(variant.reference(), &rna::Nucleotide::U);
388 assert_eq!(variant.alternate(), &rna::Nucleotide::C);
389
390 Ok(())
391 }
392
393 #[test]
394 fn it_fails_when_creating_a_variant_with_identical_nucleotides() {
395 let err = "seq0:+:1:A:A"
396 .parse::<Variant<dna::Nucleotide>>()
397 .unwrap_err();
398
399 assert_eq!(err.to_string(), "identical nucleotides for snv: A");
400 }
401
402 #[test]
403 fn it_fails_when_attempting_to_represent_an_insertion() {
404 let err = "seq0:+:1:.:A"
405 .parse::<Variant<dna::Nucleotide>>()
406 .unwrap_err();
407
408 assert_eq!(
409 err.to_string(),
410 "parse error: reference nucleotide error: parse error: invalid nucleotide: ."
411 );
412 }
413
414 #[test]
415 fn it_fails_when_attempting_to_represent_a_deletion() {
416 let err = "seq0:+:1:A:."
417 .parse::<Variant<dna::Nucleotide>>()
418 .unwrap_err();
419
420 assert_eq!(
421 err.to_string(),
422 "parse error: alternate nucleotide error: parse error: invalid nucleotide: ."
423 );
424 }
425
426 #[test]
427 fn it_fails_when_attempting_to_represent_an_empty_pair() {
428 let err = "seq0:+:1:.:."
429 .parse::<Variant<dna::Nucleotide>>()
430 .unwrap_err();
431
432 assert_eq!(
433 err.to_string(),
434 "parse error: reference nucleotide error: parse error: invalid nucleotide: ."
435 );
436 }
437}