1use crate::data::mapping::MappingInfo;
4use crate::data::projection::Projector;
5use crate::error::FerroError;
6use crate::hgvs::edit::NaEdit;
7use crate::hgvs::interval::{CdsInterval, TxInterval};
8use crate::hgvs::location::{CdsPos, GenomePos, TxPos};
9use crate::hgvs::variant::{
10 is_frameshift, AlleleVariant, CdsVariant, HgvsVariant, LocEdit, TxVariant,
11};
12use crate::normalize::{NormalizeConfig, Normalizer};
13use crate::project::accession::parse_accession;
14use crate::project::edit::transform_edit_for_strand;
15use crate::project::protein::{predict_indel_protein, predict_substitution_protein};
16use crate::project::result::VariantProjection;
17use crate::reference::ReferenceProvider;
18
19pub struct VariantProjector<P: ReferenceProvider + Clone> {
20 projector: Projector,
21 provider: P,
22 normalizer: Normalizer<P>,
23}
24
25impl<P: ReferenceProvider + Clone> VariantProjector<P> {
26 pub fn new(projector: Projector, provider: P) -> Self {
27 let normalizer = Normalizer::with_config(provider.clone(), NormalizeConfig::default());
28 Self {
29 projector,
30 provider,
31 normalizer,
32 }
33 }
34
35 pub fn with_normalize_config(mut self, config: NormalizeConfig) -> Self {
36 self.normalizer = Normalizer::with_config(self.provider.clone(), config);
37 self
38 }
39
40 pub fn project(
42 &self,
43 hgvs_string: &str,
44 transcript_id: &str,
45 ) -> Result<VariantProjection, FerroError> {
46 let variant = crate::parse_hgvs(hgvs_string)?;
47 self.project_variant(&variant, transcript_id)
48 }
49
50 pub fn project_variant(
55 &self,
56 variant: &HgvsVariant,
57 transcript_id: &str,
58 ) -> Result<VariantProjection, FerroError> {
59 let normalized = self.normalizer.normalize(variant)?;
63 self.project_variant_inner(&normalized, transcript_id)
64 }
65
66 pub fn project_normalized(
75 &self,
76 variant: &HgvsVariant,
77 transcript_id: &str,
78 ) -> Result<VariantProjection, FerroError> {
79 self.project_variant_inner(variant, transcript_id)
80 }
81
82 pub fn project_all(&self, hgvs_string: &str) -> Result<Vec<VariantProjection>, FerroError> {
90 let variant = crate::parse_hgvs(hgvs_string)?;
91 self.project_variant_all(&variant)
92 }
93
94 pub fn project_variant_all(
99 &self,
100 variant: &HgvsVariant,
101 ) -> Result<Vec<VariantProjection>, FerroError> {
102 let normalized = self.normalizer.normalize(variant)?;
104 self.project_normalized_all(&normalized)
105 }
106
107 pub fn project_normalized_all(
113 &self,
114 variant: &HgvsVariant,
115 ) -> Result<Vec<VariantProjection>, FerroError> {
116 let (contig, pos) = extract_contig_and_pos(variant)?;
119
120 let projection_result = self.projector.project(&contig, pos)?;
122
123 let mut results = Vec::with_capacity(projection_result.projections.len());
125 for tx_proj in &projection_result.projections {
126 match self.project_variant_inner(variant, &tx_proj.transcript_id) {
127 Ok(vp) => results.push(vp),
128 Err(e) => {
129 log::trace!(
131 "project_normalized_all: skipping {} for {}: {}",
132 tx_proj.transcript_id,
133 variant,
134 e
135 );
136 }
137 }
138 }
139
140 Ok(results)
141 }
142
143 fn project_variant_inner(
150 &self,
151 variant: &HgvsVariant,
152 transcript_id: &str,
153 ) -> Result<VariantProjection, FerroError> {
154 match variant {
156 HgvsVariant::Allele(allele) => {
157 self.project_allele_inner(allele, variant, transcript_id)
158 }
159 _ => self.project_single_inner(variant, transcript_id),
160 }
161 }
162
163 fn project_allele_inner(
166 &self,
167 allele: &AlleleVariant,
168 original: &HgvsVariant,
169 transcript_id: &str,
170 ) -> Result<VariantProjection, FerroError> {
171 if allele.variants.is_empty() {
177 let gene_symbol = self
178 .projector
179 .mapper()
180 .cdot()
181 .get_transcript(transcript_id)
182 .ok_or_else(|| FerroError::ReferenceNotFound {
183 id: transcript_id.to_string(),
184 })?
185 .gene_name
186 .clone();
187 return Ok(VariantProjection {
188 genomic: original.clone(),
189 coding: None,
190 protein: None,
191 transcript_id: transcript_id.to_string(),
192 gene_symbol,
193 is_frameshift: false,
194 is_intronic: false,
195 is_utr: false,
196 });
197 }
198
199 let mut inner_projections = Vec::with_capacity(allele.variants.len());
201 for inner in &allele.variants {
202 let proj = self.project_variant_inner(inner, transcript_id)?;
203 inner_projections.push(proj);
204 }
205
206 let is_frameshift = inner_projections.iter().any(|p| p.is_frameshift);
208 let is_intronic = inner_projections.iter().any(|p| p.is_intronic);
209 let is_utr = inner_projections.iter().any(|p| p.is_utr);
210
211 let gene_symbol = inner_projections.iter().find_map(|p| p.gene_symbol.clone());
213
214 let coding_variants: Option<Vec<HgvsVariant>> =
216 inner_projections.iter().map(|p| p.coding.clone()).collect();
217 let coding = coding_variants
218 .map(|variants| HgvsVariant::Allele(AlleleVariant::new(variants, allele.phase)));
219
220 let all_have_protein = inner_projections.iter().all(|p| p.protein.is_some());
222 let protein = if all_have_protein {
223 let protein_variants: Vec<HgvsVariant> = inner_projections
224 .iter()
225 .filter_map(|p| p.protein.clone())
226 .collect();
227 Some(HgvsVariant::Allele(AlleleVariant::new(
228 protein_variants,
229 allele.phase,
230 )))
231 } else {
232 None
233 };
234
235 Ok(VariantProjection {
236 genomic: original.clone(),
237 coding,
238 protein,
239 transcript_id: transcript_id.to_string(),
240 gene_symbol,
241 is_frameshift,
242 is_intronic,
243 is_utr,
244 })
245 }
246
247 fn project_single_inner(
250 &self,
251 normalized: &HgvsVariant,
252 transcript_id: &str,
253 ) -> Result<VariantProjection, FerroError> {
254 let genome_variant = match normalized {
256 HgvsVariant::Genome(g) => g.clone(),
257 _ => {
258 return Err(FerroError::UnsupportedProjection {
259 reason: "VariantProjector currently only accepts g. variants".to_string(),
260 });
261 }
262 };
263
264 let edit = genome_variant
265 .loc_edit
266 .edit
267 .inner()
268 .cloned()
269 .ok_or_else(|| FerroError::UnsupportedProjection {
270 reason: "g. variant has no concrete edit".to_string(),
271 })?;
272
273 let cdot_tx = self
275 .projector
276 .mapper()
277 .cdot()
278 .get_transcript(transcript_id)
279 .ok_or_else(|| FerroError::ReferenceNotFound {
280 id: transcript_id.to_string(),
281 })?;
282 let strand = cdot_tx.strand;
283 let gene_symbol = cdot_tx.gene_name.clone();
284 let cdot_protein = cdot_tx.protein.clone();
285 let is_coding = cdot_tx.cds_start.is_some();
286
287 let g_start = genome_variant
289 .loc_edit
290 .location
291 .start
292 .inner()
293 .cloned()
294 .ok_or_else(|| FerroError::InvalidCoordinates {
295 msg: "genomic interval start is unknown".to_string(),
296 })?;
297 let g_end = genome_variant
298 .loc_edit
299 .location
300 .end
301 .inner()
302 .cloned()
303 .ok_or_else(|| FerroError::InvalidCoordinates {
304 msg: "genomic interval end is unknown".to_string(),
305 })?;
306
307 let mapper = self.projector.mapper();
308 let normalized_str = normalized.to_string();
309
310 let (tx_genome_start, tx_genome_end) = {
313 let exons = &cdot_tx.exons;
314 if exons.is_empty() {
315 return Err(FerroError::ReferenceNotFound {
316 id: transcript_id.to_string(),
317 });
318 }
319 let starts = exons.iter().map(|e| e[0]).min().unwrap();
320 let ends = exons.iter().map(|e| e[1]).max().unwrap();
321 (starts, ends)
322 };
323
324 let map_position = |gp: &GenomePos| -> Result<(CdsPos, MappingInfo), FerroError> {
326 if gp.base < tx_genome_start || gp.base >= tx_genome_end {
327 return Err(FerroError::TranscriptNotOverlapping {
328 variant: normalized_str.clone(),
329 transcript_id: transcript_id.to_string(),
330 });
331 }
332 match mapper.genome_to_cds(transcript_id, gp) {
333 Ok(res) => Ok((res.variant, res.info)),
334 Err(FerroError::InvalidCoordinates { .. })
335 | Err(FerroError::ConversionError { .. }) => {
336 Err(FerroError::TranscriptNotOverlapping {
337 variant: normalized_str.clone(),
338 transcript_id: transcript_id.to_string(),
339 })
340 }
341 Err(other) => Err(other),
342 }
343 };
344
345 let (cds_start_raw, info_start) = map_position(&g_start)?;
346 let (cds_end_raw, info_end) = map_position(&g_end)?;
347
348 let (cds_start, cds_end) = match strand {
350 crate::reference::Strand::Plus => (cds_start_raw, cds_end_raw),
351 crate::reference::Strand::Minus => (cds_end_raw, cds_start_raw),
352 crate::reference::Strand::Unknown => {
353 return Err(FerroError::UnsupportedProjection {
354 reason: format!(
355 "transcript {} has unknown strand; cannot project g. → c./n.",
356 transcript_id
357 ),
358 });
359 }
360 };
361
362 let c_edit = transform_edit_for_strand(&edit, strand);
364
365 let coding = if is_coding {
367 let interval = CdsInterval::new(cds_start, cds_end);
368 HgvsVariant::Cds(CdsVariant {
369 accession: parse_accession(transcript_id),
370 gene_symbol: gene_symbol.clone(),
371 loc_edit: LocEdit::new(interval, c_edit.clone()),
372 })
373 } else {
374 let tx_start = TxPos::new(cds_start.base);
375 let tx_end = TxPos::new(cds_end.base);
376 HgvsVariant::Tx(TxVariant {
377 accession: parse_accession(transcript_id),
378 gene_symbol: gene_symbol.clone(),
379 loc_edit: LocEdit::new(TxInterval::new(tx_start, tx_end), c_edit.clone()),
380 })
381 };
382
383 let is_intronic = info_start.is_intronic || info_end.is_intronic;
385 let is_utr = !is_intronic
386 && (info_start.in_5utr || info_start.in_3utr || info_end.in_5utr || info_end.in_3utr);
387
388 let mut protein = None;
390 if !is_intronic && !is_utr && is_coding {
391 let prot_acc = match cdot_protein {
397 Some(p) => Some(p),
398 None => transcript_id
399 .strip_prefix("NM_")
400 .map(|rest| format!("NP_{rest}"))
401 .or_else(|| {
402 transcript_id
403 .strip_prefix("XM_")
404 .map(|rest| format!("XP_{rest}"))
405 }),
406 };
407 if let Some(prot_acc) = prot_acc {
408 match &c_edit {
409 NaEdit::Substitution { .. } => {
410 let tx_for_codon = self.provider.get_transcript(transcript_id)?;
411 protein = Some(predict_substitution_protein(
412 &tx_for_codon,
413 cds_start.base,
414 &c_edit,
415 &prot_acc,
416 )?);
417 }
418 NaEdit::Deletion { .. }
419 | NaEdit::Insertion { .. }
420 | NaEdit::Duplication { .. }
421 | NaEdit::Delins { .. }
422 | NaEdit::Inversion { .. }
423 if cds_start.offset.is_none()
426 && cds_end.offset.is_none()
427 && cds_start.base > 0
428 && cds_end.base > 0 =>
429 {
430 let tx_for_codon = self.provider.get_transcript(transcript_id)?;
431 match predict_indel_protein(
432 &tx_for_codon,
433 cds_start.base,
434 cds_end.base,
435 &c_edit,
436 &prot_acc,
437 ) {
438 Ok(pv) => protein = Some(pv),
439 Err(FerroError::UnsupportedProjection { .. })
441 | Err(FerroError::ProteinSequenceUnavailable { .. }) => {}
442 Err(other) => return Err(other),
443 }
444 }
445 _ => {}
446 }
447 }
448 }
449
450 let frameshift = is_frameshift(&coding);
451 Ok(VariantProjection {
452 genomic: normalized.clone(),
453 coding: Some(coding),
454 protein,
455 transcript_id: transcript_id.to_string(),
456 gene_symbol,
457 is_frameshift: frameshift,
458 is_intronic,
459 is_utr,
460 })
461 }
462}
463
464fn extract_contig_and_pos(variant: &HgvsVariant) -> Result<(String, u64), FerroError> {
480 let effective = match variant {
481 HgvsVariant::Allele(allele) => {
482 allele
483 .variants
484 .first()
485 .ok_or_else(|| FerroError::UnsupportedProjection {
486 reason: "cannot project an empty allele to all transcripts".to_string(),
487 })?
488 }
489 other => other,
490 };
491
492 match effective {
493 HgvsVariant::Genome(gv) => {
494 let contig = if let Some(chr) = &gv.accession.chromosome {
496 chr.to_string()
497 } else {
498 gv.accession.full()
499 };
500
501 let pos = gv
502 .loc_edit
503 .location
504 .start
505 .inner()
506 .cloned()
507 .ok_or_else(|| FerroError::InvalidCoordinates {
508 msg: "genomic interval start is unknown".to_string(),
509 })?
510 .base;
511
512 Ok((contig, pos))
513 }
514 _ => Err(FerroError::UnsupportedProjection {
515 reason: "project_all currently only accepts g. variants".to_string(),
516 }),
517 }
518}
519
520#[cfg(test)]
521mod tests {
522 use super::*;
523 use crate::data::cdot::{CdotMapper, CdotTranscript};
524 use crate::data::projection::Projector;
525 use crate::hgvs::variant::{AllelePhase, AlleleVariant};
526 use crate::reference::mock::MockProvider;
527 use crate::reference::transcript::{Exon, ManeStatus, Strand as TxStrand, Transcript};
528 use crate::reference::Strand as ProvStrand;
529 use std::sync::OnceLock;
530
531 fn make_test_provider_and_projector() -> (Projector, MockProvider) {
532 let mut cdot = CdotMapper::new();
550 cdot.add_transcript(
551 "NM_TEST.1".to_string(),
552 CdotTranscript {
553 gene_name: Some("TESTGENE".to_string()),
554 contig: "chr1".to_string(),
555 strand: ProvStrand::Plus,
556 exons: vec![[1000, 1009, 0, 9]],
558 cds_start: Some(0), cds_end: Some(9), gene_id: None,
561 protein: Some("NP_TEST.1".to_string()),
562 exon_cigars: Vec::new(),
563 },
564 );
565 let projector = Projector::new(cdot);
566
567 let mut provider = MockProvider::new();
568 provider.add_transcript(Transcript {
570 id: "NM_TEST.1".to_string(),
571 gene_symbol: Some("TESTGENE".to_string()),
572 strand: TxStrand::Plus,
573 sequence: Some("ATGCGCTAA".to_string()),
574 cds_start: Some(1), cds_end: Some(9),
576 exons: vec![Exon::new(1, 1, 9)],
577 chromosome: Some("chr1".to_string()),
578 genomic_start: Some(1000),
579 genomic_end: Some(1008),
580 genome_build: Default::default(),
581 mane_status: ManeStatus::default(),
582 refseq_match: None,
583 ensembl_match: None,
584 exon_cigars: Vec::new(),
585 cached_introns: OnceLock::new(),
586 });
587 let prefix = "N".repeat(999);
589 let suffix = "N".repeat(100);
590 provider.add_genomic_sequence("chr1", format!("{}{}{}", prefix, "ATGCGCTAA", suffix));
591 (projector, provider)
592 }
593
594 fn make_minus_strand_provider_and_projector() -> (Projector, MockProvider) {
595 let mut cdot = CdotMapper::new();
597 cdot.add_transcript(
598 "NM_TEST_MINUS.1".to_string(),
599 CdotTranscript {
600 gene_name: Some("TESTGENE".to_string()),
601 contig: "chr1".to_string(),
602 strand: ProvStrand::Minus,
603 exons: vec![[1000, 1009, 0, 9]],
604 cds_start: Some(0),
605 cds_end: Some(9),
606 gene_id: None,
607 protein: Some("NP_TEST_MINUS.1".to_string()),
608 exon_cigars: Vec::new(),
609 },
610 );
611 let projector = Projector::new(cdot);
612
613 let mut provider = MockProvider::new();
614 provider.add_transcript(Transcript {
615 id: "NM_TEST_MINUS.1".to_string(),
616 gene_symbol: Some("TESTGENE".to_string()),
617 strand: TxStrand::Minus,
618 sequence: Some("ATGCGCTAA".to_string()), cds_start: Some(1), cds_end: Some(9),
621 exons: vec![Exon::new(1, 1, 9)],
622 chromosome: Some("chr1".to_string()),
623 genomic_start: Some(1000),
624 genomic_end: Some(1008),
625 genome_build: Default::default(),
626 mane_status: ManeStatus::default(),
627 refseq_match: None,
628 ensembl_match: None,
629 exon_cigars: Vec::new(),
630 cached_introns: OnceLock::new(),
631 });
632 let prefix = "N".repeat(999);
633 let suffix = "N".repeat(100);
634 provider.add_genomic_sequence("chr1", format!("{}TTAGCGCAT{}", prefix, suffix));
635 (projector, provider)
636 }
637
638 fn make_two_transcript_setup() -> (Projector, MockProvider) {
644 let mut cdot = CdotMapper::new();
645 cdot.add_transcript(
646 "NM_TX1.1".to_string(),
647 CdotTranscript {
648 gene_name: Some("GENE1".to_string()),
649 contig: "chr1".to_string(),
650 strand: ProvStrand::Plus,
651 exons: vec![[1000, 1009, 0, 9]],
652 cds_start: Some(0),
653 cds_end: Some(9),
654 gene_id: None,
655 protein: Some("NP_TX1.1".to_string()),
656 exon_cigars: Vec::new(),
657 },
658 );
659 cdot.add_transcript(
660 "NM_TX2.1".to_string(),
661 CdotTranscript {
662 gene_name: Some("GENE1".to_string()),
663 contig: "chr1".to_string(),
664 strand: ProvStrand::Plus,
665 exons: vec![[1000, 1009, 0, 9]],
666 cds_start: Some(0),
667 cds_end: Some(9),
668 gene_id: None,
669 protein: Some("NP_TX2.1".to_string()),
670 exon_cigars: Vec::new(),
671 },
672 );
673 let projector = Projector::new(cdot)
674 .with_mane(vec!["NM_TX2.1".to_string()], vec![]);
676
677 let mut provider = MockProvider::new();
678 for id in ["NM_TX1.1", "NM_TX2.1"] {
679 provider.add_transcript(Transcript {
680 id: id.to_string(),
681 gene_symbol: Some("GENE1".to_string()),
682 strand: TxStrand::Plus,
683 sequence: Some("ATGCGCTAA".to_string()),
684 cds_start: Some(1),
685 cds_end: Some(9),
686 exons: vec![Exon::new(1, 1, 9)],
687 chromosome: Some("chr1".to_string()),
688 genomic_start: Some(1000),
689 genomic_end: Some(1008),
690 genome_build: Default::default(),
691 mane_status: ManeStatus::default(),
692 refseq_match: None,
693 ensembl_match: None,
694 exon_cigars: Vec::new(),
695 cached_introns: OnceLock::new(),
696 });
697 }
698 let prefix = "N".repeat(999);
699 let suffix = "N".repeat(100);
700 provider.add_genomic_sequence("chr1", format!("{}{}{}", prefix, "ATGCGCTAA", suffix));
701 (projector, provider)
702 }
703
704 #[test]
709 fn project_substitution_minus_strand_revcomps_ref_alt() {
710 let (projector, provider) = make_minus_strand_provider_and_projector();
711 let vp = VariantProjector::new(projector, provider);
712 let result = vp
713 .project("chr1:g.1005G>A", "NM_TEST_MINUS.1")
714 .expect("minus-strand projection should succeed");
715 let c = result
716 .coding
717 .as_ref()
718 .expect("c. should be present")
719 .to_string();
720 assert!(
721 c.contains(":c.4C>T"),
722 "expected revcomp c.4C>T for minus strand, got: {}",
723 c
724 );
725 let p = result
726 .protein
727 .as_ref()
728 .expect("p. should be present")
729 .to_string();
730 assert_eq!(p, "NP_TEST_MINUS.1(TESTGENE):p.(Arg2Cys)");
731 assert!(!result.is_frameshift);
732 assert!(!result.is_intronic);
733 assert!(!result.is_utr);
734 }
735
736 fn make_intronic_test_data() -> (Projector, MockProvider) {
737 let mut cdot = CdotMapper::new();
751 cdot.add_transcript(
752 "NM_INTR.1".to_string(),
753 CdotTranscript {
754 gene_name: Some("INTRGENE".to_string()),
755 contig: "chr1".to_string(),
756 strand: ProvStrand::Plus,
757 exons: vec![[1000, 1010, 0, 10], [2000, 2010, 10, 20]],
758 cds_start: Some(0),
759 cds_end: Some(18),
760 gene_id: None,
761 protein: Some("NP_INTR.1".to_string()),
762 exon_cigars: Vec::new(),
763 },
764 );
765 let projector = Projector::new(cdot);
766
767 let mut provider = MockProvider::new();
768 provider.add_transcript(Transcript {
769 id: "NM_INTR.1".to_string(),
770 gene_symbol: Some("INTRGENE".to_string()),
771 strand: TxStrand::Plus,
772 sequence: Some("ATGCGCAAAGGGTAACCC".to_string()), cds_start: Some(1),
774 cds_end: Some(18),
775 exons: vec![Exon::new(1, 1, 10), Exon::new(2, 11, 20)],
776 chromosome: Some("chr1".to_string()),
777 genomic_start: Some(1000),
778 genomic_end: Some(2009),
779 genome_build: Default::default(),
780 mane_status: ManeStatus::default(),
781 refseq_match: None,
782 ensembl_match: None,
783 exon_cigars: Vec::new(),
784 cached_introns: OnceLock::new(),
785 });
786 let exon1 = "ATGCGCAAAG"; let intron = "N".repeat(990); let exon2 = "GGTAACCCNN"; let prefix = "N".repeat(999);
790 let suffix = "N".repeat(100);
791 provider.add_genomic_sequence("chr1", format!("{prefix}{exon1}{intron}{exon2}{suffix}"));
792 (projector, provider)
793 }
794
795 #[test]
796 fn project_intronic_substitution_no_protein() {
797 let (projector, provider) = make_intronic_test_data();
798 let vp = VariantProjector::new(projector, provider);
799 let result = vp
800 .project("NC_000001.11:g.1015A>G", "NM_INTR.1")
801 .expect("intronic substitution should project to c. with offset");
802 assert!(result.is_intronic, "expected is_intronic=true");
803 assert!(result.protein.is_none(), "no p. for intronic substitutions");
804 let c = result.coding.as_ref().expect("c. expected").to_string();
805 assert!(
806 c.contains('+'),
807 "expected intronic offset notation (e.g. c.9+6 or c.10+6), got: {}",
808 c
809 );
810 }
811
812 #[test]
813 fn project_no_overlap_returns_transcript_not_overlapping() {
814 let (projector, provider) = make_test_provider_and_projector();
815 let vp = VariantProjector::new(projector, provider);
816 let err = vp
817 .project("NC_000001.11:g.5000A>G", "NM_TEST.1")
818 .expect_err("should fail to project outside the transcript");
819 assert!(
820 matches!(err, FerroError::TranscriptNotOverlapping { .. }),
821 "expected TranscriptNotOverlapping, got: {:?}",
822 err
823 );
824 }
825
826 #[test]
827 fn project_substitution_plus_strand_missense() {
828 let (projector, provider) = make_test_provider_and_projector();
829 let vp = VariantProjector::new(projector, provider);
830
831 let result = vp
832 .project("chr1:g.1003C>A", "NM_TEST.1")
833 .expect("projection should succeed");
834
835 assert_eq!(result.transcript_id, "NM_TEST.1");
836 assert_eq!(result.gene_symbol.as_deref(), Some("TESTGENE"));
837
838 let c = result
839 .coding
840 .as_ref()
841 .expect("c. should be present")
842 .to_string();
843 assert!(c.contains(":c.4C>A"), "expected ':c.4C>A' in '{}' ", c);
844
845 let p = result
846 .protein
847 .as_ref()
848 .expect("p. should be present")
849 .to_string();
850 assert_eq!(p, "NP_TEST.1(TESTGENE):p.(Arg2Ser)");
851
852 assert!(!result.is_frameshift);
853 assert!(!result.is_intronic);
854 assert!(!result.is_utr);
855 }
856
857 #[test]
858 fn project_single_base_deletion_is_frameshift() {
859 let (projector, provider) = make_test_provider_and_projector();
860 let vp = VariantProjector::new(projector, provider);
861 let result = vp
862 .project("NC_000001.11:g.1004del", "NM_TEST.1")
863 .expect("deletion should project");
864 let c = result.coding.as_ref().expect("c. expected").to_string();
865 assert!(c.contains(":c."), "expected c. variant, got: {}", c);
866 assert!(c.contains("del"), "expected del notation in c., got: {}", c);
867 let p = result
868 .protein
869 .as_ref()
870 .expect("p. expected for CDS del")
871 .to_string();
872 assert!(p.contains("Arg2"), "expected Arg2 in p.: {}", p);
873 assert!(p.contains("fs"), "expected fs in p.: {}", p);
874 assert!(result.is_frameshift, "1-base del should be frameshift");
875 assert!(!result.is_intronic);
876 }
877
878 #[test]
879 fn project_three_base_deletion_in_frame() {
880 let (projector, provider) = make_test_provider_and_projector();
881 let vp = VariantProjector::new(projector, provider);
882 let result = vp
883 .project("NC_000001.11:g.1003_1005del", "NM_TEST.1")
884 .expect("3-base del should project");
885 let c = result.coding.as_ref().expect("c. expected").to_string();
886 assert!(c.contains("del"), "expected del notation, got: {}", c);
887 let p = result
888 .protein
889 .as_ref()
890 .expect("p. expected for in-frame del")
891 .to_string();
892 assert!(p.contains("Arg2del"), "expected Arg2del in p.: {}", p);
893 assert!(!result.is_frameshift, "3-base deletion is in-frame");
894 }
895
896 #[test]
897 fn project_single_base_insertion_is_frameshift() {
898 let (projector, provider) = make_test_provider_and_projector();
899 let vp = VariantProjector::new(projector, provider);
900 let result = vp
901 .project("NC_000001.11:g.1003_1004insA", "NM_TEST.1")
902 .expect("insertion should project");
903 let c = result.coding.as_ref().expect("c. expected").to_string();
904 assert!(c.contains("ins"), "expected ins notation, got: {}", c);
905 assert!(result.protein.is_some(), "p. expected for CDS insertion");
906 assert!(result.is_frameshift, "1-base insertion is frameshift");
907 }
908
909 #[test]
910 fn project_duplication() {
911 let (projector, provider) = make_test_provider_and_projector();
912 let vp = VariantProjector::new(projector, provider);
913 let result = vp
914 .project("NC_000001.11:g.1004dup", "NM_TEST.1")
915 .expect("dup should project");
916 let c = result.coding.as_ref().expect("c. expected").to_string();
917 assert!(c.contains("dup"), "expected dup notation, got: {}", c);
918 assert!(result.protein.is_some(), "p. expected for CDS dup");
919 assert!(result.is_frameshift, "1-base dup is frameshift");
920 }
921
922 #[test]
923 fn project_delins() {
924 let (projector, provider) = make_test_provider_and_projector();
925 let vp = VariantProjector::new(projector, provider);
926 let result = vp
927 .project("NC_000001.11:g.1003_1004delinsAT", "NM_TEST.1")
928 .expect("delins should project");
929 let c = result.coding.as_ref().expect("c. expected").to_string();
930 assert!(c.contains("delins"), "expected delins notation, got: {}", c);
931 let p = result
932 .protein
933 .as_ref()
934 .expect("p. expected for delins")
935 .to_string();
936 assert!(p.contains("delins"), "expected delins in p.: {}", p);
937 assert!(
938 !result.is_frameshift,
939 "delins of equal length is not a frameshift"
940 );
941 }
942
943 #[test]
944 fn project_inversion() {
945 let (projector, provider) = make_test_provider_and_projector();
946 let vp = VariantProjector::new(projector, provider);
947 let result = vp
948 .project("NC_000001.11:g.1003_1005inv", "NM_TEST.1")
949 .expect("inv should project");
950 let c = result.coding.as_ref().expect("c. expected").to_string();
951 assert!(c.contains("inv"), "expected inv notation, got: {}", c);
952 let p = result
953 .protein
954 .as_ref()
955 .expect("p. expected for inv")
956 .to_string();
957 assert!(p.contains("Arg2"), "expected Arg2 in p.: {}", p);
958 assert!(p.contains("delins"), "expected delins in p.: {}", p);
959 assert!(p.contains("Ala"), "expected Ala in p.: {}", p);
960 assert!(!result.is_frameshift, "inversion is not a frameshift");
961 }
962
963 fn make_ensembl_provider_and_projector() -> (Projector, MockProvider) {
964 let mut cdot = CdotMapper::new();
969 cdot.add_transcript(
970 "ENST00000000001.1".to_string(),
971 CdotTranscript {
972 gene_name: Some("TESTGENE".to_string()),
973 contig: "chr1".to_string(),
974 strand: ProvStrand::Plus,
975 exons: vec![[1000, 1009, 0, 9]],
976 cds_start: Some(0),
977 cds_end: Some(9),
978 gene_id: None,
979 protein: None,
980 exon_cigars: Vec::new(),
981 },
982 );
983 let projector = Projector::new(cdot);
984
985 let mut provider = MockProvider::new();
986 provider.add_transcript(Transcript {
987 id: "ENST00000000001.1".to_string(),
988 gene_symbol: Some("TESTGENE".to_string()),
989 strand: TxStrand::Plus,
990 sequence: Some("ATGCGCTAA".to_string()),
991 cds_start: Some(1),
992 cds_end: Some(9),
993 exons: vec![Exon::new(1, 1, 9)],
994 chromosome: Some("chr1".to_string()),
995 genomic_start: Some(1000),
996 genomic_end: Some(1008),
997 genome_build: Default::default(),
998 mane_status: ManeStatus::default(),
999 refseq_match: None,
1000 ensembl_match: None,
1001 exon_cigars: Vec::new(),
1002 cached_introns: OnceLock::new(),
1003 });
1004 let prefix = "N".repeat(999);
1005 let suffix = "N".repeat(100);
1006 provider.add_genomic_sequence("chr1", format!("{}{}{}", prefix, "ATGCGCTAA", suffix));
1007 (projector, provider)
1008 }
1009
1010 #[test]
1011 fn project_substitution_no_protein_accession_skips_protein() {
1012 let (projector, provider) = make_ensembl_provider_and_projector();
1013 let vp = VariantProjector::new(projector, provider);
1014 let result = vp
1015 .project("chr1:g.1003C>A", "ENST00000000001.1")
1016 .expect("projection should succeed");
1017 let c = result.coding.as_ref().expect("c. expected").to_string();
1018 assert!(c.contains(":c.4C>A"), "expected c.4C>A, got: {}", c);
1019 assert!(
1021 result.protein.is_none(),
1022 "expected no protein for accession with no NM_/XM_ prefix and no cdot.protein, got: {:?}",
1023 result.protein
1024 );
1025 }
1026
1027 #[test]
1032 fn project_normalized_same_result_as_project_variant() {
1033 let (projector, provider) = make_test_provider_and_projector();
1034 let vp = VariantProjector::new(projector, provider);
1035
1036 let variant = crate::parse_hgvs("chr1:g.1003C>A").expect("parse should succeed");
1037 let via_project = vp
1038 .project_variant(&variant, "NM_TEST.1")
1039 .expect("project_variant should succeed");
1040 let normalized = vp.normalizer.normalize(&variant).expect("normalize failed");
1042 let via_normalized = vp
1043 .project_normalized(&normalized, "NM_TEST.1")
1044 .expect("project_normalized should succeed");
1045
1046 assert_eq!(
1047 via_project.coding.as_ref().map(|v| v.to_string()),
1048 via_normalized.coding.as_ref().map(|v| v.to_string()),
1049 "project_normalized should produce same c. as project_variant"
1050 );
1051 assert_eq!(
1052 via_project.protein.as_ref().map(|v| v.to_string()),
1053 via_normalized.protein.as_ref().map(|v| v.to_string()),
1054 "project_normalized should produce same p. as project_variant"
1055 );
1056 }
1057
1058 #[test]
1063 fn project_variant_all_returns_both_transcripts() {
1064 let (projector, provider) = make_two_transcript_setup();
1065 let vp = VariantProjector::new(projector, provider);
1066
1067 let results = vp
1068 .project_all("chr1:g.1003C>A")
1069 .expect("project_all should succeed");
1070
1071 assert_eq!(
1072 results.len(),
1073 2,
1074 "expected projections onto both transcripts, got: {}",
1075 results.len()
1076 );
1077 }
1078
1079 #[test]
1080 fn project_all_mane_select_sorts_first() {
1081 let (projector, provider) = make_two_transcript_setup();
1082 let vp = VariantProjector::new(projector, provider);
1083
1084 let results = vp
1085 .project_all("chr1:g.1003C>A")
1086 .expect("project_all should succeed");
1087
1088 assert_eq!(
1090 results[0].transcript_id, "NM_TX2.1",
1091 "MANE Select should be first"
1092 );
1093 }
1094
1095 #[test]
1096 fn project_all_no_overlap_returns_empty() {
1097 let (projector, provider) = make_two_transcript_setup();
1098 let vp = VariantProjector::new(projector, provider);
1099
1100 let results = vp
1102 .project_all("NC_000001.11:g.5000A>G")
1103 .expect("project_all should return Ok for no overlaps");
1104
1105 assert!(
1106 results.is_empty(),
1107 "expected empty result for non-overlapping variant"
1108 );
1109 }
1110
1111 fn project_cis_allele() -> VariantProjection {
1118 let (projector, provider) = make_test_provider_and_projector();
1119 let vp = VariantProjector::new(projector, provider);
1120
1121 let v1 = crate::parse_hgvs("chr1:g.1003C>A").expect("v1 parse");
1126 let v2 = crate::parse_hgvs("chr1:g.1006T>A").expect("v2 parse");
1127 let allele = HgvsVariant::Allele(AlleleVariant::cis(vec![v1, v2]));
1128 vp.project_variant(&allele, "NM_TEST.1")
1129 .expect("cis allele projection should succeed")
1130 }
1131
1132 #[test]
1133 fn project_cis_allele_produces_cis_coding_allele() {
1134 let result = project_cis_allele();
1135 let coding = result.coding.as_ref().expect("c. allele should be present");
1136 assert!(
1138 matches!(coding, HgvsVariant::Allele(av) if av.phase == AllelePhase::Cis),
1139 "expected Cis coding allele, got: {}",
1140 coding
1141 );
1142 if let HgvsVariant::Allele(av) = coding {
1144 assert_eq!(
1145 av.variants.len(),
1146 2,
1147 "expected 2 inner c. variants in cis allele"
1148 );
1149 }
1150 }
1151
1152 #[test]
1153 fn project_trans_allele_preserves_trans_phase() {
1154 let (projector, provider) = make_test_provider_and_projector();
1155 let vp = VariantProjector::new(projector, provider);
1156
1157 let v1 = crate::parse_hgvs("chr1:g.1003C>A").expect("v1 parse");
1159 let v2 = crate::parse_hgvs("chr1:g.1006T>A").expect("v2 parse");
1160 let allele = HgvsVariant::Allele(AlleleVariant::trans(vec![v1, v2]));
1161 let result = vp
1162 .project_variant(&allele, "NM_TEST.1")
1163 .expect("trans allele projection should succeed");
1164
1165 let coding = result.coding.as_ref().expect("c. expected");
1166 assert!(
1167 matches!(coding, HgvsVariant::Allele(av) if av.phase == AllelePhase::Trans),
1168 "expected Trans coding allele, got: {}",
1169 coding
1170 );
1171 }
1172
1173 #[test]
1174 fn project_allele_with_frameshift_inner_sets_is_frameshift() {
1175 let (projector, provider) = make_test_provider_and_projector();
1176 let vp = VariantProjector::new(projector, provider);
1177
1178 let v1 = crate::parse_hgvs("chr1:g.1003C>A").expect("v1 parse");
1180 let v_fs = crate::parse_hgvs("NC_000001.11:g.1004del").expect("fs parse");
1181 let allele = HgvsVariant::Allele(AlleleVariant::cis(vec![v1, v_fs]));
1182 let result = vp
1183 .project_variant(&allele, "NM_TEST.1")
1184 .expect("allele with frameshift should project");
1185
1186 assert!(
1187 result.is_frameshift,
1188 "allele containing a frameshift inner variant should set is_frameshift=true"
1189 );
1190 }
1191
1192 #[test]
1193 fn project_allele_with_non_protein_inner_has_no_protein() {
1194 let (projector, provider) = make_intronic_test_data();
1197 let vp = VariantProjector::new(projector, provider);
1198
1199 let v_exon = crate::parse_hgvs("NC_000001.11:g.1003C>G").expect("exon parse");
1203 let v_intron = crate::parse_hgvs("NC_000001.11:g.1015N>G").expect("intron parse");
1204 let allele = HgvsVariant::Allele(AlleleVariant::cis(vec![v_exon, v_intron]));
1205 let result = vp
1206 .project_variant(&allele, "NM_INTR.1")
1207 .expect("allele with intronic variant should project");
1208
1209 assert!(
1210 result.protein.is_none(),
1211 "allele with intronic inner variant should have no protein"
1212 );
1213 assert!(result.is_intronic, "should be marked intronic");
1214 }
1215
1216 #[test]
1217 fn project_empty_allele_unknown_transcript_returns_reference_not_found() {
1218 let (projector, provider) = make_test_provider_and_projector();
1223 let vp = VariantProjector::new(projector, provider);
1224 let empty_allele = HgvsVariant::Allele(AlleleVariant::cis(vec![]));
1225 let err = vp
1226 .project_variant(&empty_allele, "NM_DOES_NOT_EXIST.1")
1227 .expect_err("empty allele on unknown transcript should error");
1228 assert!(
1229 matches!(err, FerroError::ReferenceNotFound { ref id } if id == "NM_DOES_NOT_EXIST.1"),
1230 "expected ReferenceNotFound for unknown transcript, got: {:?}",
1231 err
1232 );
1233 }
1234}