1use super::SpdiVariant;
31use crate::coords::{OneBasedPos, ZeroBasedPos};
32use crate::error::FerroError;
33use crate::hgvs::edit::{InsertedSequence, NaEdit, Sequence};
34use crate::hgvs::interval::Interval;
35use crate::hgvs::location::GenomePos;
36use crate::hgvs::parser::accession::parse_accession;
37use crate::hgvs::variant::{GenomeVariant, HgvsVariant, LocEdit};
38
39#[derive(Debug, Clone, PartialEq, Eq)]
41pub enum ConversionError {
42 UnsupportedVariantType {
44 description: String,
46 },
47 MissingReferenceData {
49 description: String,
51 },
52 UnsupportedEditType {
54 description: String,
56 },
57 InvalidPosition {
59 description: String,
61 },
62 InvalidAccession {
64 description: String,
66 },
67}
68
69impl std::fmt::Display for ConversionError {
70 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71 match self {
72 ConversionError::UnsupportedVariantType { description } => {
73 write!(
74 f,
75 "unsupported variant type for conversion: {}",
76 description
77 )
78 }
79 ConversionError::MissingReferenceData { description } => {
80 write!(f, "missing reference data: {}", description)
81 }
82 ConversionError::UnsupportedEditType { description } => {
83 write!(f, "unsupported edit type for conversion: {}", description)
84 }
85 ConversionError::InvalidPosition { description } => {
86 write!(f, "invalid position: {}", description)
87 }
88 ConversionError::InvalidAccession { description } => {
89 write!(f, "invalid accession: {}", description)
90 }
91 }
92 }
93}
94
95impl std::error::Error for ConversionError {}
96
97impl From<ConversionError> for FerroError {
98 fn from(err: ConversionError) -> Self {
99 FerroError::ConversionError {
100 msg: err.to_string(),
101 }
102 }
103}
104
105fn sequence_to_string(seq: &Sequence) -> String {
107 seq.to_string()
108}
109
110fn inserted_sequence_to_string(seq: &InsertedSequence) -> Option<String> {
112 match seq {
113 InsertedSequence::Literal(s) => Some(s.to_string()),
114 _ => None,
115 }
116}
117
118fn get_start_pos(interval: &Interval<GenomePos>) -> Option<u64> {
120 interval.start.inner().map(|p| p.base)
121}
122
123fn get_end_pos(interval: &Interval<GenomePos>) -> Option<u64> {
125 interval.end.inner().map(|p| p.base)
126}
127
128pub fn hgvs_to_spdi_simple(variant: &HgvsVariant) -> Result<SpdiVariant, ConversionError> {
155 match variant {
156 HgvsVariant::Genome(g) => genome_to_spdi_simple(g),
157 _ => Err(ConversionError::UnsupportedVariantType {
158 description: format!(
159 "only genomic (g.) variants can be directly converted to SPDI, got {}",
160 variant.variant_type()
161 ),
162 }),
163 }
164}
165
166fn genome_to_spdi_simple(variant: &GenomeVariant) -> Result<SpdiVariant, ConversionError> {
168 let sequence = variant.accession.to_string();
169 let interval = &variant.loc_edit.location;
170
171 let edit = variant
173 .loc_edit
174 .edit
175 .inner()
176 .ok_or_else(|| ConversionError::InvalidPosition {
177 description: "cannot convert variant with unknown edit".to_string(),
178 })?;
179
180 let start_pos = get_start_pos(interval).ok_or_else(|| ConversionError::InvalidPosition {
182 description: "cannot convert variant with unknown start position".to_string(),
183 })?;
184
185 let hgvs_pos_ob =
187 OneBasedPos::try_new(start_pos).ok_or_else(|| ConversionError::InvalidPosition {
188 description: "position 0 is not valid in HGVS".to_string(),
189 })?;
190
191 let spdi_pos_zb: ZeroBasedPos = hgvs_pos_ob.to_zero_based();
193 let spdi_pos = spdi_pos_zb.value();
194
195 match edit {
196 NaEdit::Substitution {
197 reference,
198 alternative,
199 } => {
200 Ok(SpdiVariant::new(
202 sequence,
203 spdi_pos,
204 reference.to_string(),
205 alternative.to_string(),
206 ))
207 }
208 NaEdit::Insertion { sequence: inserted } => {
209 let ins_str = inserted_sequence_to_string(inserted).ok_or_else(|| {
213 ConversionError::MissingReferenceData {
214 description: "insertion sequence is not a literal sequence".to_string(),
215 }
216 })?;
217 Ok(SpdiVariant::new(sequence, spdi_pos, "", ins_str))
218 }
219 NaEdit::Duplication {
220 sequence: dup_seq, ..
221 } => {
222 if let Some(seq) = dup_seq {
225 let end_pos = get_end_pos(interval).unwrap_or(start_pos);
227 let end_pos_ob = OneBasedPos::new(end_pos);
229 let spdi_end_zb = end_pos_ob.to_zero_based();
230 Ok(SpdiVariant::new(
231 sequence,
232 spdi_end_zb.value(),
233 "",
234 sequence_to_string(seq),
235 ))
236 } else {
237 Err(ConversionError::MissingReferenceData {
238 description:
239 "duplication sequence not provided; reference data needed to determine duplicated bases"
240 .to_string(),
241 })
242 }
243 }
244 NaEdit::Deletion {
245 sequence: del_seq, ..
246 } => {
247 if let Some(seq) = del_seq {
250 Ok(SpdiVariant::new(
251 sequence,
252 spdi_pos,
253 sequence_to_string(seq),
254 "",
255 ))
256 } else {
257 Err(ConversionError::MissingReferenceData {
258 description:
259 "deleted sequence not provided; reference data needed to determine deleted bases"
260 .to_string(),
261 })
262 }
263 }
264 NaEdit::Delins { sequence: _ins_seq } => {
265 let end_pos = get_end_pos(interval).unwrap_or(start_pos);
270 let del_len = (end_pos - start_pos).saturating_add(1) as usize;
271
272 Err(ConversionError::MissingReferenceData {
274 description: format!(
275 "Cannot convert delins to SPDI: deleted sequence of length {} is unknown (no reference data)",
276 del_len
277 ),
278 })
279 }
280 NaEdit::Identity {
281 sequence: id_seq, ..
282 } => {
283 let ref_base = id_seq.as_ref().map(sequence_to_string).unwrap_or_default();
285 Ok(SpdiVariant::new(
286 sequence,
287 spdi_pos,
288 ref_base.clone(),
289 ref_base,
290 ))
291 }
292 NaEdit::Inversion { .. } => Err(ConversionError::UnsupportedEditType {
293 description: "inversions cannot be represented in SPDI format".to_string(),
294 }),
295 NaEdit::Repeat { .. } => Err(ConversionError::UnsupportedEditType {
296 description: "repeat variants cannot be directly converted to SPDI".to_string(),
297 }),
298 NaEdit::CopyNumber { .. } => Err(ConversionError::UnsupportedEditType {
299 description: "copy number variants cannot be represented in SPDI format".to_string(),
300 }),
301 NaEdit::Conversion { .. } => Err(ConversionError::UnsupportedEditType {
302 description: "conversion variants cannot be represented in SPDI format".to_string(),
303 }),
304 _ => Err(ConversionError::UnsupportedEditType {
305 description: format!("unsupported edit type: {:?}", edit),
306 }),
307 }
308}
309
310pub fn spdi_to_hgvs(spdi: &SpdiVariant) -> Result<HgvsVariant, ConversionError> {
331 let accession = parse_accession(&spdi.sequence)
333 .map(|(_, acc)| acc)
334 .map_err(|_| ConversionError::InvalidAccession {
335 description: format!("could not parse accession: {}", spdi.sequence),
336 })?;
337
338 let spdi_pos_zb = ZeroBasedPos::new(spdi.position);
340 let hgvs_pos_ob = spdi_pos_zb.to_one_based();
341 let hgvs_pos = hgvs_pos_ob.value();
342
343 let (interval, edit) = if spdi.is_identity() {
345 let seq = if spdi.deletion.is_empty() {
347 None
348 } else {
349 Some(string_to_sequence(&spdi.deletion)?)
350 };
351 (
352 Interval::point(GenomePos::new(hgvs_pos)),
353 NaEdit::Identity {
354 sequence: seq,
355 whole_entity: false,
356 },
357 )
358 } else if spdi.deletion.len() == 1 && spdi.insertion.len() == 1 {
359 let ref_base = char_to_base(spdi.deletion.chars().next().unwrap())?;
361 let alt_base = char_to_base(spdi.insertion.chars().next().unwrap())?;
362 (
363 Interval::point(GenomePos::new(hgvs_pos)),
364 NaEdit::Substitution {
365 reference: ref_base,
366 alternative: alt_base,
367 },
368 )
369 } else if spdi.is_deletion() {
370 let del_len = spdi.deletion.len();
372 let del_seq = string_to_sequence(&spdi.deletion)?;
373 let interval = if del_len > 1 {
374 Interval::new(
375 GenomePos::new(hgvs_pos),
376 GenomePos::new(hgvs_pos + del_len as u64 - 1),
377 )
378 } else {
379 Interval::point(GenomePos::new(hgvs_pos))
380 };
381 (
382 interval,
383 NaEdit::Deletion {
384 sequence: Some(del_seq),
385 length: None,
386 },
387 )
388 } else if spdi.is_insertion() {
389 let ins_seq = string_to_sequence(&spdi.insertion)?;
393 (
394 Interval::new(GenomePos::new(hgvs_pos), GenomePos::new(hgvs_pos + 1)),
395 NaEdit::Insertion {
396 sequence: InsertedSequence::Literal(ins_seq),
397 },
398 )
399 } else {
400 let del_len = spdi.deletion.len();
402 let ins_seq = string_to_sequence(&spdi.insertion)?;
403 let interval = if del_len > 1 {
404 Interval::new(
405 GenomePos::new(hgvs_pos),
406 GenomePos::new(hgvs_pos + del_len as u64 - 1),
407 )
408 } else {
409 Interval::point(GenomePos::new(hgvs_pos))
410 };
411 (
412 interval,
413 NaEdit::Delins {
414 sequence: InsertedSequence::Literal(ins_seq),
415 },
416 )
417 };
418
419 Ok(HgvsVariant::Genome(GenomeVariant {
420 accession,
421 gene_symbol: None,
422 loc_edit: LocEdit::new(interval, edit),
423 }))
424}
425
426fn string_to_sequence(s: &str) -> Result<Sequence, ConversionError> {
428 s.parse().map_err(|_| ConversionError::InvalidPosition {
429 description: format!("invalid sequence: {}", s),
430 })
431}
432
433fn char_to_base(c: char) -> Result<crate::hgvs::edit::Base, ConversionError> {
435 crate::hgvs::edit::Base::from_char(c).ok_or_else(|| ConversionError::InvalidPosition {
436 description: format!("invalid base character: {}", c),
437 })
438}
439
440#[cfg(test)]
441mod tests {
442 use super::*;
443 use crate::hgvs::parser::parse_hgvs;
444
445 #[test]
448 fn test_hgvs_to_spdi_substitution() {
449 let hgvs = parse_hgvs("NC_000001.11:g.12345A>G").unwrap();
450 let spdi = hgvs_to_spdi_simple(&hgvs).unwrap();
451 assert_eq!(spdi.sequence, "NC_000001.11");
452 assert_eq!(spdi.position, 12344);
453 assert_eq!(spdi.deletion, "A");
454 assert_eq!(spdi.insertion, "G");
455 }
456
457 #[test]
458 fn test_hgvs_to_spdi_insertion() {
459 let hgvs = parse_hgvs("NC_000001.11:g.100_101insATG").unwrap();
460 let spdi = hgvs_to_spdi_simple(&hgvs).unwrap();
461 assert_eq!(spdi.position, 99); assert_eq!(spdi.deletion, "");
463 assert_eq!(spdi.insertion, "ATG");
464 }
465
466 #[test]
467 fn test_hgvs_to_spdi_deletion_with_seq() {
468 let hgvs = parse_hgvs("NC_000001.11:g.100_102delATG").unwrap();
469 let spdi = hgvs_to_spdi_simple(&hgvs).unwrap();
470 assert_eq!(spdi.position, 99);
471 assert_eq!(spdi.deletion, "ATG");
472 assert_eq!(spdi.insertion, "");
473 }
474
475 #[test]
476 fn test_hgvs_to_spdi_deletion_without_seq() {
477 let hgvs = parse_hgvs("NC_000001.11:g.100_102del").unwrap();
478 let result = hgvs_to_spdi_simple(&hgvs);
479 assert!(matches!(
480 result,
481 Err(ConversionError::MissingReferenceData { .. })
482 ));
483 }
484
485 #[test]
486 fn test_hgvs_to_spdi_delins_without_ref() {
487 let hgvs = parse_hgvs("NC_000001.11:g.100_102delinsTTCC").unwrap();
489 let result = hgvs_to_spdi_simple(&hgvs);
490 assert!(result.is_err());
491 assert!(result.unwrap_err().to_string().contains("unknown"));
492 }
493
494 #[test]
495 fn test_hgvs_to_spdi_duplication_with_seq() {
496 let hgvs = parse_hgvs("NC_000001.11:g.100_102dupATG").unwrap();
497 let spdi = hgvs_to_spdi_simple(&hgvs).unwrap();
498 assert_eq!(spdi.position, 101); assert_eq!(spdi.deletion, "");
501 assert_eq!(spdi.insertion, "ATG");
502 }
503
504 #[test]
505 fn test_hgvs_to_spdi_identity() {
506 let hgvs = parse_hgvs("NC_000001.11:g.100A=").unwrap();
507 let spdi = hgvs_to_spdi_simple(&hgvs).unwrap();
508 assert_eq!(spdi.position, 99);
509 assert_eq!(spdi.deletion, "A");
510 assert_eq!(spdi.insertion, "A");
511 }
512
513 #[test]
514 fn test_hgvs_to_spdi_unsupported_coding() {
515 let hgvs = parse_hgvs("NM_000088.3:c.100A>G").unwrap();
516 let result = hgvs_to_spdi_simple(&hgvs);
517 assert!(matches!(
518 result,
519 Err(ConversionError::UnsupportedVariantType { .. })
520 ));
521 }
522
523 #[test]
524 fn test_hgvs_to_spdi_unsupported_inversion() {
525 let hgvs = parse_hgvs("NC_000001.11:g.100_200inv").unwrap();
526 let result = hgvs_to_spdi_simple(&hgvs);
527 assert!(matches!(
528 result,
529 Err(ConversionError::UnsupportedEditType { .. })
530 ));
531 }
532
533 #[test]
536 fn test_spdi_to_hgvs_substitution() {
537 let spdi = SpdiVariant::new("NC_000001.11", 12344, "A", "G");
538 let hgvs = spdi_to_hgvs(&spdi).unwrap();
539 assert_eq!(hgvs.to_string(), "NC_000001.11:g.12345A>G");
540 }
541
542 #[test]
543 fn test_spdi_to_hgvs_deletion() {
544 let spdi = SpdiVariant::deletion("NC_000001.11", 99, "ATG");
545 let hgvs = spdi_to_hgvs(&spdi).unwrap();
546 assert_eq!(hgvs.to_string(), "NC_000001.11:g.100_102delATG");
547 }
548
549 #[test]
550 fn test_spdi_to_hgvs_insertion() {
551 let spdi = SpdiVariant::insertion("NC_000001.11", 100, "ATG");
552 let hgvs = spdi_to_hgvs(&spdi).unwrap();
553 assert_eq!(hgvs.to_string(), "NC_000001.11:g.101_102insATG");
554 }
555
556 #[test]
557 fn test_spdi_to_hgvs_delins() {
558 let spdi = SpdiVariant::delins("NC_000001.11", 99, "ATG", "TTCC");
559 let hgvs = spdi_to_hgvs(&spdi).unwrap();
560 assert_eq!(hgvs.to_string(), "NC_000001.11:g.100_102delinsTTCC");
561 }
562
563 #[test]
564 fn test_spdi_to_hgvs_identity() {
565 let spdi = SpdiVariant::new("NC_000001.11", 99, "A", "A");
566 let hgvs = spdi_to_hgvs(&spdi).unwrap();
567 assert_eq!(hgvs.to_string(), "NC_000001.11:g.100A=");
568 }
569
570 #[test]
571 fn test_spdi_to_hgvs_single_del() {
572 let spdi = SpdiVariant::deletion("NC_000001.11", 99, "A");
573 let hgvs = spdi_to_hgvs(&spdi).unwrap();
574 assert_eq!(hgvs.to_string(), "NC_000001.11:g.100delA");
575 }
576
577 #[test]
580 fn test_roundtrip_substitution() {
581 let original = "NC_000001.11:g.12345A>G";
582 let hgvs = parse_hgvs(original).unwrap();
583 let spdi = hgvs_to_spdi_simple(&hgvs).unwrap();
584 let back = spdi_to_hgvs(&spdi).unwrap();
585 assert_eq!(back.to_string(), original);
586 }
587
588 #[test]
589 fn test_roundtrip_insertion() {
590 let original = "NC_000001.11:g.100_101insATG";
591 let hgvs = parse_hgvs(original).unwrap();
592 let spdi = hgvs_to_spdi_simple(&hgvs).unwrap();
593 let back = spdi_to_hgvs(&spdi).unwrap();
594 assert_eq!(back.to_string(), original);
595 }
596
597 #[test]
598 fn test_roundtrip_deletion_with_seq() {
599 let original = "NC_000001.11:g.100_102delATG";
600 let hgvs = parse_hgvs(original).unwrap();
601 let spdi = hgvs_to_spdi_simple(&hgvs).unwrap();
602 let back = spdi_to_hgvs(&spdi).unwrap();
603 assert_eq!(back.to_string(), original);
604 }
605
606 #[test]
607 fn test_error_display() {
608 let err = ConversionError::UnsupportedVariantType {
609 description: "test".to_string(),
610 };
611 assert!(err.to_string().contains("unsupported variant type"));
612
613 let err = ConversionError::MissingReferenceData {
614 description: "test".to_string(),
615 };
616 assert!(err.to_string().contains("missing reference data"));
617 }
618
619 struct MockGenomicRef {
625 sequences: std::collections::HashMap<(String, u64, u64), String>,
627 }
628
629 impl MockGenomicRef {
630 fn new() -> Self {
631 let mut sequences = std::collections::HashMap::new();
632
633 sequences.insert(("NC_000001.11".to_string(), 99, 102), "ATG".to_string());
637 sequences.insert(
639 ("NC_000001.11".to_string(), 199, 206),
640 "GATTACA".to_string(),
641 );
642 sequences.insert(
644 ("NC_000001.11".to_string(), 999, 1009),
645 "AAACCCGGGT".to_string(),
646 );
647
648 Self { sequences }
649 }
650
651 fn get_sequence(&self, accession: &str, start: u64, end: u64) -> Option<String> {
652 self.sequences
653 .get(&(accession.to_string(), start, end))
654 .cloned()
655 }
656 }
657
658 fn hgvs_to_spdi_with_ref(
660 variant: &HgvsVariant,
661 reference: &MockGenomicRef,
662 ) -> Result<SpdiVariant, ConversionError> {
663 match variant {
664 HgvsVariant::Genome(g) => {
665 let sequence = g.accession.to_string();
666 let interval = &g.loc_edit.location;
667
668 let edit =
669 g.loc_edit
670 .edit
671 .inner()
672 .ok_or_else(|| ConversionError::InvalidPosition {
673 description: "cannot convert variant with unknown edit".to_string(),
674 })?;
675
676 let start_pos =
677 get_start_pos(interval).ok_or_else(|| ConversionError::InvalidPosition {
678 description: "cannot convert variant with unknown start position"
679 .to_string(),
680 })?;
681 let end_pos = get_end_pos(interval).unwrap_or(start_pos);
682
683 let start_pos_ob = OneBasedPos::new(start_pos);
685 let spdi_pos = start_pos_ob.to_zero_based().value();
686
687 match edit {
688 NaEdit::Deletion { sequence: None, .. } => {
689 let del_seq = reference
691 .get_sequence(&sequence, spdi_pos, end_pos)
692 .ok_or_else(|| ConversionError::MissingReferenceData {
693 description: format!(
694 "could not fetch sequence for {}:{}-{}",
695 sequence, start_pos, end_pos
696 ),
697 })?;
698 Ok(SpdiVariant::new(sequence, spdi_pos, del_seq, ""))
699 }
700 NaEdit::Delins { sequence: ins_seq } => {
701 let del_seq = reference
703 .get_sequence(&sequence, spdi_pos, end_pos)
704 .ok_or_else(|| ConversionError::MissingReferenceData {
705 description: format!(
706 "could not fetch sequence for {}:{}-{}",
707 sequence, start_pos, end_pos
708 ),
709 })?;
710 let ins_str = inserted_sequence_to_string(ins_seq).ok_or_else(|| {
711 ConversionError::MissingReferenceData {
712 description: "delins inserted sequence is not a literal"
713 .to_string(),
714 }
715 })?;
716 Ok(SpdiVariant::new(sequence, spdi_pos, del_seq, ins_str))
717 }
718 NaEdit::Duplication { sequence: None, .. } => {
719 let dup_seq = reference
721 .get_sequence(&sequence, spdi_pos, end_pos)
722 .ok_or_else(|| ConversionError::MissingReferenceData {
723 description: format!(
724 "could not fetch sequence for {}:{}-{}",
725 sequence, start_pos, end_pos
726 ),
727 })?;
728 let end_pos_ob = OneBasedPos::new(end_pos);
731 let spdi_end = end_pos_ob.to_zero_based().value();
732 Ok(SpdiVariant::new(sequence, spdi_end, "", dup_seq))
733 }
734 _ => hgvs_to_spdi_simple(variant),
736 }
737 }
738 _ => hgvs_to_spdi_simple(variant),
739 }
740 }
741
742 #[test]
743 fn test_deletion_without_seq_with_reference() {
744 let reference = MockGenomicRef::new();
745 let hgvs = parse_hgvs("NC_000001.11:g.100_102del").unwrap();
746
747 assert!(hgvs_to_spdi_simple(&hgvs).is_err());
749
750 let spdi = hgvs_to_spdi_with_ref(&hgvs, &reference).unwrap();
752 assert_eq!(spdi.sequence, "NC_000001.11");
753 assert_eq!(spdi.position, 99);
754 assert_eq!(spdi.deletion, "ATG");
755 assert_eq!(spdi.insertion, "");
756 }
757
758 #[test]
759 fn test_delins_with_reference_provides_actual_deletion() {
760 let reference = MockGenomicRef::new();
761 let hgvs = parse_hgvs("NC_000001.11:g.100_102delinsTTCC").unwrap();
762
763 let simple = hgvs_to_spdi_simple(&hgvs);
765 assert!(simple.is_err());
766 assert!(simple.unwrap_err().to_string().contains("unknown"));
767
768 let spdi = hgvs_to_spdi_with_ref(&hgvs, &reference).unwrap();
770 assert_eq!(spdi.deletion, "ATG");
771 assert_eq!(spdi.insertion, "TTCC");
772 }
773
774 #[test]
775 fn test_delins_roundtrip_with_reference() {
776 let reference = MockGenomicRef::new();
777 let original_hgvs = parse_hgvs("NC_000001.11:g.100_102delinsTTCC").unwrap();
778
779 let spdi = hgvs_to_spdi_with_ref(&original_hgvs, &reference).unwrap();
781 assert_eq!(spdi.deletion, "ATG");
782 assert_eq!(spdi.insertion, "TTCC");
783
784 let back = spdi_to_hgvs(&spdi).unwrap();
786 assert_eq!(back.to_string(), "NC_000001.11:g.100_102delinsTTCC");
787 }
788
789 #[test]
790 fn test_deletion_roundtrip_with_reference() {
791 let reference = MockGenomicRef::new();
792 let original_hgvs = parse_hgvs("NC_000001.11:g.100_102del").unwrap();
793
794 let spdi = hgvs_to_spdi_with_ref(&original_hgvs, &reference).unwrap();
796 assert_eq!(spdi.deletion, "ATG");
797 assert_eq!(spdi.insertion, "");
798
799 let back = spdi_to_hgvs(&spdi).unwrap();
801 assert_eq!(back.to_string(), "NC_000001.11:g.100_102delATG");
803 }
804
805 #[test]
806 fn test_duplication_without_seq_with_reference() {
807 let reference = MockGenomicRef::new();
808 let hgvs = parse_hgvs("NC_000001.11:g.100_102dup").unwrap();
809
810 assert!(hgvs_to_spdi_simple(&hgvs).is_err());
812
813 let spdi = hgvs_to_spdi_with_ref(&hgvs, &reference).unwrap();
815 assert_eq!(spdi.sequence, "NC_000001.11");
816 assert_eq!(spdi.position, 101); assert_eq!(spdi.deletion, "");
819 assert_eq!(spdi.insertion, "ATG");
820 }
821
822 #[test]
823 fn test_long_deletion_with_reference() {
824 let reference = MockGenomicRef::new();
825 let hgvs = parse_hgvs("NC_000001.11:g.1000_1009del").unwrap();
826
827 let spdi = hgvs_to_spdi_with_ref(&hgvs, &reference).unwrap();
828 assert_eq!(spdi.deletion, "AAACCCGGGT");
829 assert_eq!(spdi.deletion.len(), 10);
830 }
831
832 #[test]
833 fn test_reference_missing_region() {
834 let reference = MockGenomicRef::new();
835 let hgvs = parse_hgvs("NC_000001.11:g.50000_50002del").unwrap();
837
838 let result = hgvs_to_spdi_with_ref(&hgvs, &reference);
839 assert!(matches!(
840 result,
841 Err(ConversionError::MissingReferenceData { .. })
842 ));
843 }
844
845 #[test]
846 fn test_substitution_falls_through_to_simple() {
847 let reference = MockGenomicRef::new();
848 let hgvs = parse_hgvs("NC_000001.11:g.12345A>G").unwrap();
849
850 let spdi = hgvs_to_spdi_with_ref(&hgvs, &reference).unwrap();
852 assert_eq!(spdi.to_string(), "NC_000001.11:12344:A:G");
853 }
854
855 #[test]
856 fn test_mnv_delins_with_reference() {
857 let reference = MockGenomicRef::new();
859 let hgvs = parse_hgvs("NC_000001.11:g.100_102delinsGGG").unwrap();
860
861 let spdi = hgvs_to_spdi_with_ref(&hgvs, &reference).unwrap();
862 assert_eq!(spdi.deletion, "ATG");
863 assert_eq!(spdi.insertion, "GGG");
864
865 let back = spdi_to_hgvs(&spdi).unwrap();
867 assert_eq!(back.to_string(), "NC_000001.11:g.100_102delinsGGG");
868 }
869
870 #[test]
875 fn test_spdi_empty_deletion_insertion() {
876 let spdi = SpdiVariant::insertion("NC_000001.11", 100, "ATG");
878 assert!(spdi.is_insertion());
879 assert!(!spdi.is_deletion());
880 assert!(!spdi.is_identity());
881 assert_eq!(spdi.deletion, "");
882 assert_eq!(spdi.insertion, "ATG");
883 }
884
885 #[test]
886 fn test_spdi_empty_insertion_deletion() {
887 let spdi = SpdiVariant::deletion("NC_000001.11", 100, "ATG");
889 assert!(spdi.is_deletion());
890 assert!(!spdi.is_insertion());
891 assert!(!spdi.is_identity());
892 assert_eq!(spdi.deletion, "ATG");
893 assert_eq!(spdi.insertion, "");
894 }
895
896 #[test]
897 fn test_spdi_both_empty_is_identity() {
898 let spdi = SpdiVariant::new("NC_000001.11", 100, "", "");
900 assert!(spdi.is_identity());
901 assert!(!spdi.is_insertion());
902 assert!(!spdi.is_deletion());
903 }
904
905 #[test]
906 fn test_spdi_single_base_insertion() {
907 let spdi = SpdiVariant::insertion("NC_000001.11", 100, "A");
908 assert!(spdi.is_insertion());
909 assert_eq!(spdi.insertion.len(), 1);
910
911 let hgvs = spdi_to_hgvs(&spdi).unwrap();
912 assert!(hgvs.to_string().contains("ins"));
913 }
914
915 #[test]
916 fn test_spdi_single_base_deletion() {
917 let spdi = SpdiVariant::deletion("NC_000001.11", 100, "A");
918 assert!(spdi.is_deletion());
919 assert_eq!(spdi.deletion.len(), 1);
920
921 let hgvs = spdi_to_hgvs(&spdi).unwrap();
922 assert!(hgvs.to_string().contains("del"));
923 }
924
925 #[test]
926 fn test_spdi_long_insertion_100bp() {
927 let long_seq = "A".repeat(100);
929 let spdi = SpdiVariant::insertion("NC_000001.11", 12345, &long_seq);
930
931 assert!(spdi.is_insertion());
932 assert_eq!(spdi.insertion.len(), 100);
933
934 let hgvs = spdi_to_hgvs(&spdi).unwrap();
935 assert!(hgvs.to_string().contains("ins"));
936 assert!(hgvs.to_string().ends_with(&format!("ins{}", long_seq)));
938 }
939
940 #[test]
941 fn test_spdi_long_deletion_100bp() {
942 let long_seq = "ACGT".repeat(25); let spdi = SpdiVariant::deletion("NC_000001.11", 12345, &long_seq);
945
946 assert!(spdi.is_deletion());
947 assert_eq!(spdi.deletion.len(), 100);
948
949 let hgvs = spdi_to_hgvs(&spdi).unwrap();
950 assert!(hgvs.to_string().contains("del"));
951 }
952
953 #[test]
954 fn test_spdi_long_indel_asymmetric() {
955 let del_seq = "A".repeat(50);
957 let ins_seq = "G".repeat(150);
958 let spdi = SpdiVariant::delins("NC_000001.11", 12345, &del_seq, &ins_seq);
959
960 assert!(!spdi.is_insertion());
961 assert!(!spdi.is_deletion());
962 assert_eq!(spdi.deletion.len(), 50);
963 assert_eq!(spdi.insertion.len(), 150);
964
965 let hgvs = spdi_to_hgvs(&spdi).unwrap();
966 assert!(hgvs.to_string().contains("delins"));
967 }
968
969 #[test]
970 fn test_spdi_very_long_insertion_1000bp() {
971 let long_seq = "ACGT".repeat(250); let spdi = SpdiVariant::insertion("NC_000001.11", 50000, &long_seq);
974
975 assert!(spdi.is_insertion());
976 assert_eq!(spdi.insertion.len(), 1000);
977
978 let hgvs = spdi_to_hgvs(&spdi).unwrap();
980 assert!(hgvs.to_string().contains("ins"));
981 }
982
983 #[test]
984 fn test_spdi_position_zero() {
985 let spdi = SpdiVariant::new("NC_000001.11", 0, "A", "G");
987 assert_eq!(spdi.position, 0);
988
989 let hgvs = spdi_to_hgvs(&spdi).unwrap();
990 assert!(hgvs.to_string().contains("g.1A>G"));
992 }
993
994 #[test]
995 fn test_spdi_position_max() {
996 let spdi = SpdiVariant::new("NC_000001.11", 248956421, "A", "G");
998
999 let hgvs = spdi_to_hgvs(&spdi).unwrap();
1000 assert!(hgvs.to_string().contains("248956422")); }
1002
1003 #[test]
1004 fn test_spdi_lowercase_sequence() {
1005 let spdi = SpdiVariant::new("NC_000001.11", 100, "a", "g");
1007
1008 assert_eq!(spdi.deletion, "a");
1010 assert_eq!(spdi.insertion, "g");
1011 }
1012
1013 #[test]
1014 fn test_spdi_mixed_case_sequence() {
1015 let spdi = SpdiVariant::new("NC_000001.11", 100, "AtGc", "GcTa");
1017
1018 assert_eq!(spdi.deletion, "AtGc");
1019 assert_eq!(spdi.insertion, "GcTa");
1020 }
1021
1022 #[test]
1023 fn test_spdi_n_bases_in_sequence() {
1024 let spdi = SpdiVariant::new("NC_000001.11", 100, "ANG", "TNC");
1026
1027 assert_eq!(spdi.deletion, "ANG");
1028 assert_eq!(spdi.insertion, "TNC");
1029 }
1030
1031 #[test]
1032 fn test_spdi_complex_repeat_sequence() {
1033 let repeat = "CAG".repeat(30); let spdi = SpdiVariant::insertion("NC_000004.12", 3074876, &repeat);
1036
1037 assert!(spdi.is_insertion());
1038 assert_eq!(spdi.insertion.len(), 90);
1039 }
1040
1041 #[test]
1042 fn test_spdi_to_hgvs_delins_single_base_del() {
1043 let spdi = SpdiVariant::delins("NC_000001.11", 100, "A", "TTTT");
1045
1046 let hgvs = spdi_to_hgvs(&spdi).unwrap();
1047 assert!(hgvs.to_string().contains("delinsTTTT"));
1049 }
1050
1051 #[test]
1052 fn test_spdi_to_hgvs_delins_single_base_ins() {
1053 let spdi = SpdiVariant::delins("NC_000001.11", 100, "AAAA", "T");
1055
1056 let hgvs = spdi_to_hgvs(&spdi).unwrap();
1057 assert!(hgvs.to_string().contains("delinsT"));
1058 }
1059
1060 #[test]
1061 fn test_spdi_different_chromosome_formats() {
1062 let test_cases = vec![
1064 ("NC_000001.11", "NC_000001.11"), ("NC_000023.11", "NC_000023.11"), ("NC_000024.10", "NC_000024.10"), ("NC_012920.1", "NC_012920.1"), ];
1069
1070 for (input_acc, expected_acc) in test_cases {
1071 let spdi = SpdiVariant::new(input_acc, 100, "A", "G");
1072 assert_eq!(spdi.sequence, expected_acc);
1073
1074 let hgvs = spdi_to_hgvs(&spdi).unwrap();
1075 assert!(hgvs.to_string().starts_with(expected_acc));
1076 }
1077 }
1078
1079 #[test]
1080 fn test_spdi_roundtrip_preserves_case_normalized() {
1081 let spdi = SpdiVariant::new("NC_000001.11", 100, "A", "G");
1083 let hgvs = spdi_to_hgvs(&spdi).unwrap();
1084 let back = hgvs_to_spdi_simple(&hgvs).unwrap();
1085
1086 assert_eq!(back.deletion, "A");
1088 assert_eq!(back.insertion, "G");
1089
1090 let spdi_delins = SpdiVariant::new("NC_000001.11", 100, "ACGT", "TGCA");
1093 let hgvs_delins = spdi_to_hgvs(&spdi_delins).unwrap();
1094 let back_delins = hgvs_to_spdi_simple(&hgvs_delins);
1095
1096 assert!(back_delins.is_err());
1098 }
1099
1100 #[test]
1101 fn test_spdi_empty_seq_insertion_roundtrip() {
1102 let spdi = SpdiVariant::insertion("NC_000001.11", 100, "ATG");
1104 let hgvs = spdi_to_hgvs(&spdi).unwrap();
1105
1106 assert!(hgvs.to_string().contains("ins"));
1108
1109 let back = hgvs_to_spdi_simple(&hgvs).unwrap();
1111 assert_eq!(back.deletion, "");
1112 assert_eq!(back.insertion, "ATG");
1113 }
1114
1115 #[test]
1116 fn test_hgvs_to_spdi_various_accession_types() {
1117 let test_variants = vec![
1119 "NC_000001.11:g.12345A>G", "NC_000023.11:g.12345A>G", "NC_012920.1:g.12345A>G", ];
1123
1124 for variant_str in test_variants {
1125 let hgvs = parse_hgvs(variant_str).unwrap();
1126 let spdi = hgvs_to_spdi_simple(&hgvs);
1127 assert!(spdi.is_ok(), "Failed for: {}", variant_str);
1128 }
1129 }
1130
1131 #[test]
1132 fn test_spdi_display_format() {
1133 let spdi = SpdiVariant::new("NC_000001.11", 12344, "A", "G");
1134 assert_eq!(spdi.to_string(), "NC_000001.11:12344:A:G");
1135
1136 let spdi_del = SpdiVariant::deletion("NC_000001.11", 100, "ATG");
1137 assert_eq!(spdi_del.to_string(), "NC_000001.11:100:ATG:");
1138
1139 let spdi_ins = SpdiVariant::insertion("NC_000001.11", 100, "ATG");
1140 assert_eq!(spdi_ins.to_string(), "NC_000001.11:100::ATG");
1141 }
1142
1143 #[test]
1144 fn test_spdi_identity_various_lengths() {
1145 let spdi1 = SpdiVariant::new("NC_000001.11", 100, "A", "A");
1147 assert!(spdi1.is_identity());
1148
1149 let spdi2 = SpdiVariant::new("NC_000001.11", 100, "ATG", "ATG");
1151 assert!(spdi2.is_identity());
1152
1153 let spdi3 = SpdiVariant::new("NC_000001.11", 100, "", "");
1155 assert!(spdi3.is_identity());
1156 }
1157}