use crate::data::mapping::MappingInfo;
use crate::data::projection::Projector;
use crate::error::FerroError;
use crate::hgvs::edit::NaEdit;
use crate::hgvs::interval::{CdsInterval, TxInterval};
use crate::hgvs::location::{CdsPos, GenomePos, TxPos};
use crate::hgvs::variant::{
is_frameshift, AlleleVariant, CdsVariant, HgvsVariant, LocEdit, TxVariant,
};
use crate::normalize::{NormalizeConfig, Normalizer};
use crate::project::accession::parse_accession;
use crate::project::edit::transform_edit_for_strand;
use crate::project::protein::{predict_indel_protein, predict_substitution_protein};
use crate::project::result::VariantProjection;
use crate::reference::ReferenceProvider;
pub struct VariantProjector<P: ReferenceProvider + Clone> {
projector: Projector,
provider: P,
normalizer: Normalizer<P>,
}
impl<P: ReferenceProvider + Clone> VariantProjector<P> {
pub fn new(projector: Projector, provider: P) -> Self {
let normalizer = Normalizer::with_config(provider.clone(), NormalizeConfig::default());
Self {
projector,
provider,
normalizer,
}
}
pub fn with_normalize_config(mut self, config: NormalizeConfig) -> Self {
self.normalizer = Normalizer::with_config(self.provider.clone(), config);
self
}
pub fn project(
&self,
hgvs_string: &str,
transcript_id: &str,
) -> Result<VariantProjection, FerroError> {
let variant = crate::parse_hgvs(hgvs_string)?;
self.project_variant(&variant, transcript_id)
}
pub fn project_variant(
&self,
variant: &HgvsVariant,
transcript_id: &str,
) -> Result<VariantProjection, FerroError> {
let normalized = self.normalizer.normalize(variant)?;
self.project_variant_inner(&normalized, transcript_id)
}
pub fn project_normalized(
&self,
variant: &HgvsVariant,
transcript_id: &str,
) -> Result<VariantProjection, FerroError> {
self.project_variant_inner(variant, transcript_id)
}
pub fn project_all(&self, hgvs_string: &str) -> Result<Vec<VariantProjection>, FerroError> {
let variant = crate::parse_hgvs(hgvs_string)?;
self.project_variant_all(&variant)
}
pub fn project_variant_all(
&self,
variant: &HgvsVariant,
) -> Result<Vec<VariantProjection>, FerroError> {
let normalized = self.normalizer.normalize(variant)?;
self.project_normalized_all(&normalized)
}
pub fn project_normalized_all(
&self,
variant: &HgvsVariant,
) -> Result<Vec<VariantProjection>, FerroError> {
let (contig, pos) = extract_contig_and_pos(variant)?;
let projection_result = self.projector.project(&contig, pos)?;
let mut results = Vec::with_capacity(projection_result.projections.len());
for tx_proj in &projection_result.projections {
match self.project_variant_inner(variant, &tx_proj.transcript_id) {
Ok(vp) => results.push(vp),
Err(e) => {
log::trace!(
"project_normalized_all: skipping {} for {}: {}",
tx_proj.transcript_id,
variant,
e
);
}
}
}
Ok(results)
}
fn project_variant_inner(
&self,
variant: &HgvsVariant,
transcript_id: &str,
) -> Result<VariantProjection, FerroError> {
match variant {
HgvsVariant::Allele(allele) => {
self.project_allele_inner(allele, variant, transcript_id)
}
_ => self.project_single_inner(variant, transcript_id),
}
}
fn project_allele_inner(
&self,
allele: &AlleleVariant,
original: &HgvsVariant,
transcript_id: &str,
) -> Result<VariantProjection, FerroError> {
if allele.variants.is_empty() {
let gene_symbol = self
.projector
.mapper()
.cdot()
.get_transcript(transcript_id)
.ok_or_else(|| FerroError::ReferenceNotFound {
id: transcript_id.to_string(),
})?
.gene_name
.clone();
return Ok(VariantProjection {
genomic: original.clone(),
coding: None,
protein: None,
transcript_id: transcript_id.to_string(),
gene_symbol,
is_frameshift: false,
is_intronic: false,
is_utr: false,
});
}
let mut inner_projections = Vec::with_capacity(allele.variants.len());
for inner in &allele.variants {
let proj = self.project_variant_inner(inner, transcript_id)?;
inner_projections.push(proj);
}
let is_frameshift = inner_projections.iter().any(|p| p.is_frameshift);
let is_intronic = inner_projections.iter().any(|p| p.is_intronic);
let is_utr = inner_projections.iter().any(|p| p.is_utr);
let gene_symbol = inner_projections.iter().find_map(|p| p.gene_symbol.clone());
let coding_variants: Option<Vec<HgvsVariant>> =
inner_projections.iter().map(|p| p.coding.clone()).collect();
let coding = coding_variants
.map(|variants| HgvsVariant::Allele(AlleleVariant::new(variants, allele.phase)));
let all_have_protein = inner_projections.iter().all(|p| p.protein.is_some());
let protein = if all_have_protein {
let protein_variants: Vec<HgvsVariant> = inner_projections
.iter()
.filter_map(|p| p.protein.clone())
.collect();
Some(HgvsVariant::Allele(AlleleVariant::new(
protein_variants,
allele.phase,
)))
} else {
None
};
Ok(VariantProjection {
genomic: original.clone(),
coding,
protein,
transcript_id: transcript_id.to_string(),
gene_symbol,
is_frameshift,
is_intronic,
is_utr,
})
}
fn project_single_inner(
&self,
normalized: &HgvsVariant,
transcript_id: &str,
) -> Result<VariantProjection, FerroError> {
let genome_variant = match normalized {
HgvsVariant::Genome(g) => g.clone(),
_ => {
return Err(FerroError::UnsupportedProjection {
reason: "VariantProjector currently only accepts g. variants".to_string(),
});
}
};
let edit = genome_variant
.loc_edit
.edit
.inner()
.cloned()
.ok_or_else(|| FerroError::UnsupportedProjection {
reason: "g. variant has no concrete edit".to_string(),
})?;
let cdot_tx = self
.projector
.mapper()
.cdot()
.get_transcript(transcript_id)
.ok_or_else(|| FerroError::ReferenceNotFound {
id: transcript_id.to_string(),
})?;
let strand = cdot_tx.strand;
let gene_symbol = cdot_tx.gene_name.clone();
let cdot_protein = cdot_tx.protein.clone();
let is_coding = cdot_tx.cds_start.is_some();
let g_start = genome_variant
.loc_edit
.location
.start
.inner()
.cloned()
.ok_or_else(|| FerroError::InvalidCoordinates {
msg: "genomic interval start is unknown".to_string(),
})?;
let g_end = genome_variant
.loc_edit
.location
.end
.inner()
.cloned()
.ok_or_else(|| FerroError::InvalidCoordinates {
msg: "genomic interval end is unknown".to_string(),
})?;
let mapper = self.projector.mapper();
let normalized_str = normalized.to_string();
let (tx_genome_start, tx_genome_end) = {
let exons = &cdot_tx.exons;
if exons.is_empty() {
return Err(FerroError::ReferenceNotFound {
id: transcript_id.to_string(),
});
}
let starts = exons.iter().map(|e| e[0]).min().unwrap();
let ends = exons.iter().map(|e| e[1]).max().unwrap();
(starts, ends)
};
let map_position = |gp: &GenomePos| -> Result<(CdsPos, MappingInfo), FerroError> {
if gp.base < tx_genome_start || gp.base >= tx_genome_end {
return Err(FerroError::TranscriptNotOverlapping {
variant: normalized_str.clone(),
transcript_id: transcript_id.to_string(),
});
}
match mapper.genome_to_cds(transcript_id, gp) {
Ok(res) => Ok((res.variant, res.info)),
Err(FerroError::InvalidCoordinates { .. })
| Err(FerroError::ConversionError { .. }) => {
Err(FerroError::TranscriptNotOverlapping {
variant: normalized_str.clone(),
transcript_id: transcript_id.to_string(),
})
}
Err(other) => Err(other),
}
};
let (cds_start_raw, info_start) = map_position(&g_start)?;
let (cds_end_raw, info_end) = map_position(&g_end)?;
let (cds_start, cds_end) = match strand {
crate::reference::Strand::Plus => (cds_start_raw, cds_end_raw),
crate::reference::Strand::Minus => (cds_end_raw, cds_start_raw),
crate::reference::Strand::Unknown => {
return Err(FerroError::UnsupportedProjection {
reason: format!(
"transcript {} has unknown strand; cannot project g. → c./n.",
transcript_id
),
});
}
};
let c_edit = transform_edit_for_strand(&edit, strand);
let coding = if is_coding {
let interval = CdsInterval::new(cds_start, cds_end);
HgvsVariant::Cds(CdsVariant {
accession: parse_accession(transcript_id),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, c_edit.clone()),
})
} else {
let tx_start = TxPos::new(cds_start.base);
let tx_end = TxPos::new(cds_end.base);
HgvsVariant::Tx(TxVariant {
accession: parse_accession(transcript_id),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(TxInterval::new(tx_start, tx_end), c_edit.clone()),
})
};
let is_intronic = info_start.is_intronic || info_end.is_intronic;
let is_utr = !is_intronic
&& (info_start.in_5utr || info_start.in_3utr || info_end.in_5utr || info_end.in_3utr);
let mut protein = None;
if !is_intronic && !is_utr && is_coding {
let prot_acc = match cdot_protein {
Some(p) => Some(p),
None => transcript_id
.strip_prefix("NM_")
.map(|rest| format!("NP_{rest}"))
.or_else(|| {
transcript_id
.strip_prefix("XM_")
.map(|rest| format!("XP_{rest}"))
}),
};
if let Some(prot_acc) = prot_acc {
match &c_edit {
NaEdit::Substitution { .. } => {
let tx_for_codon = self.provider.get_transcript(transcript_id)?;
protein = Some(predict_substitution_protein(
&tx_for_codon,
cds_start.base,
&c_edit,
&prot_acc,
)?);
}
NaEdit::Deletion { .. }
| NaEdit::Insertion { .. }
| NaEdit::Duplication { .. }
| NaEdit::Delins { .. }
| NaEdit::Inversion { .. }
if cds_start.offset.is_none()
&& cds_end.offset.is_none()
&& cds_start.base > 0
&& cds_end.base > 0 =>
{
let tx_for_codon = self.provider.get_transcript(transcript_id)?;
match predict_indel_protein(
&tx_for_codon,
cds_start.base,
cds_end.base,
&c_edit,
&prot_acc,
) {
Ok(pv) => protein = Some(pv),
Err(FerroError::UnsupportedProjection { .. })
| Err(FerroError::ProteinSequenceUnavailable { .. }) => {}
Err(other) => return Err(other),
}
}
_ => {}
}
}
}
let frameshift = is_frameshift(&coding);
Ok(VariantProjection {
genomic: normalized.clone(),
coding: Some(coding),
protein,
transcript_id: transcript_id.to_string(),
gene_symbol,
is_frameshift: frameshift,
is_intronic,
is_utr,
})
}
}
fn extract_contig_and_pos(variant: &HgvsVariant) -> Result<(String, u64), FerroError> {
let effective = match variant {
HgvsVariant::Allele(allele) => {
allele
.variants
.first()
.ok_or_else(|| FerroError::UnsupportedProjection {
reason: "cannot project an empty allele to all transcripts".to_string(),
})?
}
other => other,
};
match effective {
HgvsVariant::Genome(gv) => {
let contig = if let Some(chr) = &gv.accession.chromosome {
chr.to_string()
} else {
gv.accession.full()
};
let pos = gv
.loc_edit
.location
.start
.inner()
.cloned()
.ok_or_else(|| FerroError::InvalidCoordinates {
msg: "genomic interval start is unknown".to_string(),
})?
.base;
Ok((contig, pos))
}
_ => Err(FerroError::UnsupportedProjection {
reason: "project_all currently only accepts g. variants".to_string(),
}),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::data::cdot::{CdotMapper, CdotTranscript};
use crate::data::projection::Projector;
use crate::hgvs::variant::{AllelePhase, AlleleVariant};
use crate::reference::mock::MockProvider;
use crate::reference::transcript::{Exon, ManeStatus, Strand as TxStrand, Transcript};
use crate::reference::Strand as ProvStrand;
use std::sync::OnceLock;
fn make_test_provider_and_projector() -> (Projector, MockProvider) {
let mut cdot = CdotMapper::new();
cdot.add_transcript(
"NM_TEST.1".to_string(),
CdotTranscript {
gene_name: Some("TESTGENE".to_string()),
contig: "chr1".to_string(),
strand: ProvStrand::Plus,
exons: vec![[1000, 1009, 0, 9]],
cds_start: Some(0), cds_end: Some(9), gene_id: None,
protein: Some("NP_TEST.1".to_string()),
exon_cigars: Vec::new(),
},
);
let projector = Projector::new(cdot);
let mut provider = MockProvider::new();
provider.add_transcript(Transcript {
id: "NM_TEST.1".to_string(),
gene_symbol: Some("TESTGENE".to_string()),
strand: TxStrand::Plus,
sequence: Some("ATGCGCTAA".to_string()),
cds_start: Some(1), cds_end: Some(9),
exons: vec![Exon::new(1, 1, 9)],
chromosome: Some("chr1".to_string()),
genomic_start: Some(1000),
genomic_end: Some(1008),
genome_build: Default::default(),
mane_status: ManeStatus::default(),
refseq_match: None,
ensembl_match: None,
exon_cigars: Vec::new(),
cached_introns: OnceLock::new(),
});
let prefix = "N".repeat(999);
let suffix = "N".repeat(100);
provider.add_genomic_sequence("chr1", format!("{}{}{}", prefix, "ATGCGCTAA", suffix));
(projector, provider)
}
fn make_minus_strand_provider_and_projector() -> (Projector, MockProvider) {
let mut cdot = CdotMapper::new();
cdot.add_transcript(
"NM_TEST_MINUS.1".to_string(),
CdotTranscript {
gene_name: Some("TESTGENE".to_string()),
contig: "chr1".to_string(),
strand: ProvStrand::Minus,
exons: vec![[1000, 1009, 0, 9]],
cds_start: Some(0),
cds_end: Some(9),
gene_id: None,
protein: Some("NP_TEST_MINUS.1".to_string()),
exon_cigars: Vec::new(),
},
);
let projector = Projector::new(cdot);
let mut provider = MockProvider::new();
provider.add_transcript(Transcript {
id: "NM_TEST_MINUS.1".to_string(),
gene_symbol: Some("TESTGENE".to_string()),
strand: TxStrand::Minus,
sequence: Some("ATGCGCTAA".to_string()), cds_start: Some(1), cds_end: Some(9),
exons: vec![Exon::new(1, 1, 9)],
chromosome: Some("chr1".to_string()),
genomic_start: Some(1000),
genomic_end: Some(1008),
genome_build: Default::default(),
mane_status: ManeStatus::default(),
refseq_match: None,
ensembl_match: None,
exon_cigars: Vec::new(),
cached_introns: OnceLock::new(),
});
let prefix = "N".repeat(999);
let suffix = "N".repeat(100);
provider.add_genomic_sequence("chr1", format!("{}TTAGCGCAT{}", prefix, suffix));
(projector, provider)
}
fn make_two_transcript_setup() -> (Projector, MockProvider) {
let mut cdot = CdotMapper::new();
cdot.add_transcript(
"NM_TX1.1".to_string(),
CdotTranscript {
gene_name: Some("GENE1".to_string()),
contig: "chr1".to_string(),
strand: ProvStrand::Plus,
exons: vec![[1000, 1009, 0, 9]],
cds_start: Some(0),
cds_end: Some(9),
gene_id: None,
protein: Some("NP_TX1.1".to_string()),
exon_cigars: Vec::new(),
},
);
cdot.add_transcript(
"NM_TX2.1".to_string(),
CdotTranscript {
gene_name: Some("GENE1".to_string()),
contig: "chr1".to_string(),
strand: ProvStrand::Plus,
exons: vec![[1000, 1009, 0, 9]],
cds_start: Some(0),
cds_end: Some(9),
gene_id: None,
protein: Some("NP_TX2.1".to_string()),
exon_cigars: Vec::new(),
},
);
let projector = Projector::new(cdot)
.with_mane(vec!["NM_TX2.1".to_string()], vec![]);
let mut provider = MockProvider::new();
for id in ["NM_TX1.1", "NM_TX2.1"] {
provider.add_transcript(Transcript {
id: id.to_string(),
gene_symbol: Some("GENE1".to_string()),
strand: TxStrand::Plus,
sequence: Some("ATGCGCTAA".to_string()),
cds_start: Some(1),
cds_end: Some(9),
exons: vec![Exon::new(1, 1, 9)],
chromosome: Some("chr1".to_string()),
genomic_start: Some(1000),
genomic_end: Some(1008),
genome_build: Default::default(),
mane_status: ManeStatus::default(),
refseq_match: None,
ensembl_match: None,
exon_cigars: Vec::new(),
cached_introns: OnceLock::new(),
});
}
let prefix = "N".repeat(999);
let suffix = "N".repeat(100);
provider.add_genomic_sequence("chr1", format!("{}{}{}", prefix, "ATGCGCTAA", suffix));
(projector, provider)
}
#[test]
fn project_substitution_minus_strand_revcomps_ref_alt() {
let (projector, provider) = make_minus_strand_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let result = vp
.project("chr1:g.1005G>A", "NM_TEST_MINUS.1")
.expect("minus-strand projection should succeed");
let c = result
.coding
.as_ref()
.expect("c. should be present")
.to_string();
assert!(
c.contains(":c.4C>T"),
"expected revcomp c.4C>T for minus strand, got: {}",
c
);
let p = result
.protein
.as_ref()
.expect("p. should be present")
.to_string();
assert_eq!(p, "NP_TEST_MINUS.1(TESTGENE):p.(Arg2Cys)");
assert!(!result.is_frameshift);
assert!(!result.is_intronic);
assert!(!result.is_utr);
}
fn make_intronic_test_data() -> (Projector, MockProvider) {
let mut cdot = CdotMapper::new();
cdot.add_transcript(
"NM_INTR.1".to_string(),
CdotTranscript {
gene_name: Some("INTRGENE".to_string()),
contig: "chr1".to_string(),
strand: ProvStrand::Plus,
exons: vec![[1000, 1010, 0, 10], [2000, 2010, 10, 20]],
cds_start: Some(0),
cds_end: Some(18),
gene_id: None,
protein: Some("NP_INTR.1".to_string()),
exon_cigars: Vec::new(),
},
);
let projector = Projector::new(cdot);
let mut provider = MockProvider::new();
provider.add_transcript(Transcript {
id: "NM_INTR.1".to_string(),
gene_symbol: Some("INTRGENE".to_string()),
strand: TxStrand::Plus,
sequence: Some("ATGCGCAAAGGGTAACCC".to_string()), cds_start: Some(1),
cds_end: Some(18),
exons: vec![Exon::new(1, 1, 10), Exon::new(2, 11, 20)],
chromosome: Some("chr1".to_string()),
genomic_start: Some(1000),
genomic_end: Some(2009),
genome_build: Default::default(),
mane_status: ManeStatus::default(),
refseq_match: None,
ensembl_match: None,
exon_cigars: Vec::new(),
cached_introns: OnceLock::new(),
});
let exon1 = "ATGCGCAAAG"; let intron = "N".repeat(990); let exon2 = "GGTAACCCNN"; let prefix = "N".repeat(999);
let suffix = "N".repeat(100);
provider.add_genomic_sequence("chr1", format!("{prefix}{exon1}{intron}{exon2}{suffix}"));
(projector, provider)
}
#[test]
fn project_intronic_substitution_no_protein() {
let (projector, provider) = make_intronic_test_data();
let vp = VariantProjector::new(projector, provider);
let result = vp
.project("NC_000001.11:g.1015A>G", "NM_INTR.1")
.expect("intronic substitution should project to c. with offset");
assert!(result.is_intronic, "expected is_intronic=true");
assert!(result.protein.is_none(), "no p. for intronic substitutions");
let c = result.coding.as_ref().expect("c. expected").to_string();
assert!(
c.contains('+'),
"expected intronic offset notation (e.g. c.9+6 or c.10+6), got: {}",
c
);
}
#[test]
fn project_no_overlap_returns_transcript_not_overlapping() {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let err = vp
.project("NC_000001.11:g.5000A>G", "NM_TEST.1")
.expect_err("should fail to project outside the transcript");
assert!(
matches!(err, FerroError::TranscriptNotOverlapping { .. }),
"expected TranscriptNotOverlapping, got: {:?}",
err
);
}
#[test]
fn project_substitution_plus_strand_missense() {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let result = vp
.project("chr1:g.1003C>A", "NM_TEST.1")
.expect("projection should succeed");
assert_eq!(result.transcript_id, "NM_TEST.1");
assert_eq!(result.gene_symbol.as_deref(), Some("TESTGENE"));
let c = result
.coding
.as_ref()
.expect("c. should be present")
.to_string();
assert!(c.contains(":c.4C>A"), "expected ':c.4C>A' in '{}' ", c);
let p = result
.protein
.as_ref()
.expect("p. should be present")
.to_string();
assert_eq!(p, "NP_TEST.1(TESTGENE):p.(Arg2Ser)");
assert!(!result.is_frameshift);
assert!(!result.is_intronic);
assert!(!result.is_utr);
}
#[test]
fn project_single_base_deletion_is_frameshift() {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let result = vp
.project("NC_000001.11:g.1004del", "NM_TEST.1")
.expect("deletion should project");
let c = result.coding.as_ref().expect("c. expected").to_string();
assert!(c.contains(":c."), "expected c. variant, got: {}", c);
assert!(c.contains("del"), "expected del notation in c., got: {}", c);
let p = result
.protein
.as_ref()
.expect("p. expected for CDS del")
.to_string();
assert!(p.contains("Arg2"), "expected Arg2 in p.: {}", p);
assert!(p.contains("fs"), "expected fs in p.: {}", p);
assert!(result.is_frameshift, "1-base del should be frameshift");
assert!(!result.is_intronic);
}
#[test]
fn project_three_base_deletion_in_frame() {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let result = vp
.project("NC_000001.11:g.1003_1005del", "NM_TEST.1")
.expect("3-base del should project");
let c = result.coding.as_ref().expect("c. expected").to_string();
assert!(c.contains("del"), "expected del notation, got: {}", c);
let p = result
.protein
.as_ref()
.expect("p. expected for in-frame del")
.to_string();
assert!(p.contains("Arg2del"), "expected Arg2del in p.: {}", p);
assert!(!result.is_frameshift, "3-base deletion is in-frame");
}
#[test]
fn project_single_base_insertion_is_frameshift() {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let result = vp
.project("NC_000001.11:g.1003_1004insA", "NM_TEST.1")
.expect("insertion should project");
let c = result.coding.as_ref().expect("c. expected").to_string();
assert!(c.contains("ins"), "expected ins notation, got: {}", c);
assert!(result.protein.is_some(), "p. expected for CDS insertion");
assert!(result.is_frameshift, "1-base insertion is frameshift");
}
#[test]
fn project_duplication() {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let result = vp
.project("NC_000001.11:g.1004dup", "NM_TEST.1")
.expect("dup should project");
let c = result.coding.as_ref().expect("c. expected").to_string();
assert!(c.contains("dup"), "expected dup notation, got: {}", c);
assert!(result.protein.is_some(), "p. expected for CDS dup");
assert!(result.is_frameshift, "1-base dup is frameshift");
}
#[test]
fn project_delins() {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let result = vp
.project("NC_000001.11:g.1003_1004delinsAT", "NM_TEST.1")
.expect("delins should project");
let c = result.coding.as_ref().expect("c. expected").to_string();
assert!(c.contains("delins"), "expected delins notation, got: {}", c);
let p = result
.protein
.as_ref()
.expect("p. expected for delins")
.to_string();
assert!(p.contains("delins"), "expected delins in p.: {}", p);
assert!(
!result.is_frameshift,
"delins of equal length is not a frameshift"
);
}
#[test]
fn project_inversion() {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let result = vp
.project("NC_000001.11:g.1003_1005inv", "NM_TEST.1")
.expect("inv should project");
let c = result.coding.as_ref().expect("c. expected").to_string();
assert!(c.contains("inv"), "expected inv notation, got: {}", c);
let p = result
.protein
.as_ref()
.expect("p. expected for inv")
.to_string();
assert!(p.contains("Arg2"), "expected Arg2 in p.: {}", p);
assert!(p.contains("delins"), "expected delins in p.: {}", p);
assert!(p.contains("Ala"), "expected Ala in p.: {}", p);
assert!(!result.is_frameshift, "inversion is not a frameshift");
}
fn make_ensembl_provider_and_projector() -> (Projector, MockProvider) {
let mut cdot = CdotMapper::new();
cdot.add_transcript(
"ENST00000000001.1".to_string(),
CdotTranscript {
gene_name: Some("TESTGENE".to_string()),
contig: "chr1".to_string(),
strand: ProvStrand::Plus,
exons: vec![[1000, 1009, 0, 9]],
cds_start: Some(0),
cds_end: Some(9),
gene_id: None,
protein: None,
exon_cigars: Vec::new(),
},
);
let projector = Projector::new(cdot);
let mut provider = MockProvider::new();
provider.add_transcript(Transcript {
id: "ENST00000000001.1".to_string(),
gene_symbol: Some("TESTGENE".to_string()),
strand: TxStrand::Plus,
sequence: Some("ATGCGCTAA".to_string()),
cds_start: Some(1),
cds_end: Some(9),
exons: vec![Exon::new(1, 1, 9)],
chromosome: Some("chr1".to_string()),
genomic_start: Some(1000),
genomic_end: Some(1008),
genome_build: Default::default(),
mane_status: ManeStatus::default(),
refseq_match: None,
ensembl_match: None,
exon_cigars: Vec::new(),
cached_introns: OnceLock::new(),
});
let prefix = "N".repeat(999);
let suffix = "N".repeat(100);
provider.add_genomic_sequence("chr1", format!("{}{}{}", prefix, "ATGCGCTAA", suffix));
(projector, provider)
}
#[test]
fn project_substitution_no_protein_accession_skips_protein() {
let (projector, provider) = make_ensembl_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let result = vp
.project("chr1:g.1003C>A", "ENST00000000001.1")
.expect("projection should succeed");
let c = result.coding.as_ref().expect("c. expected").to_string();
assert!(c.contains(":c.4C>A"), "expected c.4C>A, got: {}", c);
assert!(
result.protein.is_none(),
"expected no protein for accession with no NM_/XM_ prefix and no cdot.protein, got: {:?}",
result.protein
);
}
#[test]
fn project_normalized_same_result_as_project_variant() {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let variant = crate::parse_hgvs("chr1:g.1003C>A").expect("parse should succeed");
let via_project = vp
.project_variant(&variant, "NM_TEST.1")
.expect("project_variant should succeed");
let normalized = vp.normalizer.normalize(&variant).expect("normalize failed");
let via_normalized = vp
.project_normalized(&normalized, "NM_TEST.1")
.expect("project_normalized should succeed");
assert_eq!(
via_project.coding.as_ref().map(|v| v.to_string()),
via_normalized.coding.as_ref().map(|v| v.to_string()),
"project_normalized should produce same c. as project_variant"
);
assert_eq!(
via_project.protein.as_ref().map(|v| v.to_string()),
via_normalized.protein.as_ref().map(|v| v.to_string()),
"project_normalized should produce same p. as project_variant"
);
}
#[test]
fn project_variant_all_returns_both_transcripts() {
let (projector, provider) = make_two_transcript_setup();
let vp = VariantProjector::new(projector, provider);
let results = vp
.project_all("chr1:g.1003C>A")
.expect("project_all should succeed");
assert_eq!(
results.len(),
2,
"expected projections onto both transcripts, got: {}",
results.len()
);
}
#[test]
fn project_all_mane_select_sorts_first() {
let (projector, provider) = make_two_transcript_setup();
let vp = VariantProjector::new(projector, provider);
let results = vp
.project_all("chr1:g.1003C>A")
.expect("project_all should succeed");
assert_eq!(
results[0].transcript_id, "NM_TX2.1",
"MANE Select should be first"
);
}
#[test]
fn project_all_no_overlap_returns_empty() {
let (projector, provider) = make_two_transcript_setup();
let vp = VariantProjector::new(projector, provider);
let results = vp
.project_all("NC_000001.11:g.5000A>G")
.expect("project_all should return Ok for no overlaps");
assert!(
results.is_empty(),
"expected empty result for non-overlapping variant"
);
}
fn project_cis_allele() -> VariantProjection {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let v1 = crate::parse_hgvs("chr1:g.1003C>A").expect("v1 parse");
let v2 = crate::parse_hgvs("chr1:g.1006T>A").expect("v2 parse");
let allele = HgvsVariant::Allele(AlleleVariant::cis(vec![v1, v2]));
vp.project_variant(&allele, "NM_TEST.1")
.expect("cis allele projection should succeed")
}
#[test]
fn project_cis_allele_produces_cis_coding_allele() {
let result = project_cis_allele();
let coding = result.coding.as_ref().expect("c. allele should be present");
assert!(
matches!(coding, HgvsVariant::Allele(av) if av.phase == AllelePhase::Cis),
"expected Cis coding allele, got: {}",
coding
);
if let HgvsVariant::Allele(av) = coding {
assert_eq!(
av.variants.len(),
2,
"expected 2 inner c. variants in cis allele"
);
}
}
#[test]
fn project_trans_allele_preserves_trans_phase() {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let v1 = crate::parse_hgvs("chr1:g.1003C>A").expect("v1 parse");
let v2 = crate::parse_hgvs("chr1:g.1006T>A").expect("v2 parse");
let allele = HgvsVariant::Allele(AlleleVariant::trans(vec![v1, v2]));
let result = vp
.project_variant(&allele, "NM_TEST.1")
.expect("trans allele projection should succeed");
let coding = result.coding.as_ref().expect("c. expected");
assert!(
matches!(coding, HgvsVariant::Allele(av) if av.phase == AllelePhase::Trans),
"expected Trans coding allele, got: {}",
coding
);
}
#[test]
fn project_allele_with_frameshift_inner_sets_is_frameshift() {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let v1 = crate::parse_hgvs("chr1:g.1003C>A").expect("v1 parse");
let v_fs = crate::parse_hgvs("NC_000001.11:g.1004del").expect("fs parse");
let allele = HgvsVariant::Allele(AlleleVariant::cis(vec![v1, v_fs]));
let result = vp
.project_variant(&allele, "NM_TEST.1")
.expect("allele with frameshift should project");
assert!(
result.is_frameshift,
"allele containing a frameshift inner variant should set is_frameshift=true"
);
}
#[test]
fn project_allele_with_non_protein_inner_has_no_protein() {
let (projector, provider) = make_intronic_test_data();
let vp = VariantProjector::new(projector, provider);
let v_exon = crate::parse_hgvs("NC_000001.11:g.1003C>G").expect("exon parse");
let v_intron = crate::parse_hgvs("NC_000001.11:g.1015N>G").expect("intron parse");
let allele = HgvsVariant::Allele(AlleleVariant::cis(vec![v_exon, v_intron]));
let result = vp
.project_variant(&allele, "NM_INTR.1")
.expect("allele with intronic variant should project");
assert!(
result.protein.is_none(),
"allele with intronic inner variant should have no protein"
);
assert!(result.is_intronic, "should be marked intronic");
}
#[test]
fn project_empty_allele_unknown_transcript_returns_reference_not_found() {
let (projector, provider) = make_test_provider_and_projector();
let vp = VariantProjector::new(projector, provider);
let empty_allele = HgvsVariant::Allele(AlleleVariant::cis(vec![]));
let err = vp
.project_variant(&empty_allele, "NM_DOES_NOT_EXIST.1")
.expect_err("empty allele on unknown transcript should error");
assert!(
matches!(err, FerroError::ReferenceNotFound { ref id } if id == "NM_DOES_NOT_EXIST.1"),
"expected ReferenceNotFound for unknown transcript, got: {:?}",
err
);
}
}