use super::complex::{annotate_complex_delins, annotate_mnv};
use super::helpers::trim_alleles;
use super::*;
use crate::fasta::{FastaReader, write_genome_binary};
use crate::locate::{LocateIndex, VariantLocation, locate_variant};
use crate::test_fixtures::{minus_strand_coding, noncoding_2_exon, plus_strand_coding};
use crate::types::{Biotype, CdsSegment, Exon, Strand, TranscriptModel, TranscriptTier};
use tempfile::TempDir;
fn write_test_fasta() -> (TempDir, FastaReader) {
let tmp = TempDir::new().unwrap();
let mut chr1 = vec![b'A'; 6000];
chr1[1500] = b'A';
chr1[1501] = b'T';
chr1[1502] = b'G';
let mut i = 1503;
while i + 2 < 2000 {
chr1[i] = b'C';
chr1[i + 1] = b'G';
chr1[i + 2] = b'T';
i += 3;
}
chr1[1998] = b'C';
chr1[1999] = b'G';
chr1[3000] = b'T';
i = 3001;
while i + 2 < 3500 {
chr1[i] = b'C';
chr1[i + 1] = b'G';
chr1[i + 2] = b'T';
i += 3;
}
while i < 3500 {
chr1[i] = b'C';
i += 1;
}
i = 4000;
while i + 2 < 4497 {
chr1[i] = b'C';
chr1[i + 1] = b'G';
chr1[i + 2] = b'T';
i += 3;
}
while i < 4497 {
chr1[i] = b'C';
i += 1;
}
chr1[4497] = b'T';
chr1[4498] = b'A';
chr1[4499] = b'A';
let mut chr17 = vec![b'A'; 21000];
let mut coding = Vec::with_capacity(4500);
coding.extend_from_slice(b"ATG");
while coding.len() + 3 <= 4497 {
coding.extend_from_slice(b"CGT");
}
while coding.len() < 4497 {
coding.push(b'C');
}
coding.extend_from_slice(b"TAA");
assert_eq!(coding.len(), 4500);
for offset in 0u32..1500 {
let genomic = 19499 - offset as usize;
chr17[genomic] = crate::codon::complement(coding[offset as usize]);
}
for offset in 0u32..2000 {
let genomic = 15999 - offset as usize;
chr17[genomic] = crate::codon::complement(coding[(1500 + offset) as usize]);
}
for offset in 0u32..1000 {
let genomic = 11999 - offset as usize;
chr17[genomic] = crate::codon::complement(coding[(3500 + offset) as usize]);
}
let chr2 = vec![b'A'; 1000];
let mut chr3 = vec![b'A'; 2000];
chr3[600] = b'A';
chr3[601] = b'T';
chr3[602] = b'G';
i = 603;
while i + 2 < 797 {
chr3[i] = b'C';
chr3[i + 1] = b'G';
chr3[i + 2] = b'T';
i += 3;
}
while i < 797 {
chr3[i] = b'C';
i += 1;
}
chr3[797] = b'T';
chr3[798] = b'A';
chr3[799] = b'A';
let mut chrm = vec![b'A'; 1000];
chrm[100] = b'A';
chrm[101] = b'T';
chrm[102] = b'G';
i = 103;
while i + 2 < 297 {
chrm[i] = b'T';
chrm[i + 1] = b'G';
chrm[i + 2] = b'A';
i += 3;
}
while i < 297 {
chrm[i] = b'T';
i += 1;
}
chrm[297] = b'T';
chrm[298] = b'A';
chrm[299] = b'A';
let contigs: Vec<(&str, &[u8])> = vec![
("chr1", &chr1),
("chr17", &chr17),
("chr2", &chr2),
("chr3", &chr3),
("chrM", &chrm),
];
let bin_path = tmp.path().join("test.bin");
let idx_path = tmp.path().join("test.bin.idx");
write_genome_binary(&contigs, "test", &bin_path, &idx_path).unwrap();
let reader = FastaReader::open(&bin_path).unwrap();
(tmp, reader)
}
fn mito_coding() -> TranscriptModel {
TranscriptModel {
accession: "NM_TEST_MITO.1".into(),
protein_accession: Some("NP_TEST_MITO.1".into()),
gene_symbol: "TESTMITO".into(),
hgnc_id: None,
ensembl_accession: None,
chrom: "chrM".into(),
strand: Strand::Plus,
tx_start: 0,
tx_end: 1000,
cds_genomic_start: Some(100),
cds_genomic_end: Some(400),
exons: vec![Exon {
exon_number: 1,
genomic_start: 0,
genomic_end: 1000,
}],
cds_segments: vec![CdsSegment {
exon_index: 0,
genomic_start: 100,
genomic_end: 400,
phase: 0,
}],
tier: TranscriptTier::ManeSelect,
biotype: Biotype::ProteinCoding,
exon_count: 1,
}
}
fn single_exon_coding() -> TranscriptModel {
TranscriptModel {
accession: "NM_TEST_SE.1".into(),
protein_accession: Some("NP_TEST_SE.1".into()),
gene_symbol: "TESTSE".into(),
hgnc_id: None,
ensembl_accession: None,
chrom: "chr3".into(),
strand: Strand::Plus,
tx_start: 0,
tx_end: 2000,
cds_genomic_start: Some(600),
cds_genomic_end: Some(1400),
exons: vec![Exon {
exon_number: 1,
genomic_start: 0,
genomic_end: 2000,
}],
cds_segments: vec![CdsSegment {
exon_index: 0,
genomic_start: 600,
genomic_end: 1400,
phase: 0,
}],
tier: TranscriptTier::ManeSelect,
biotype: Biotype::ProteinCoding,
exon_count: 1,
}
}
fn build_index(tx: &TranscriptModel) -> LocateIndex {
LocateIndex::build(tx).unwrap()
}
#[test]
fn missense_variant() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 1503, b'C', b'T', &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::MissenseVariant));
assert_eq!(result.impact, Impact::Moderate);
assert_eq!(result.protein_start, Some(2));
assert_eq!(result.amino_acids.as_deref(), Some("R/C"));
assert_eq!(result.codons.as_deref(), Some("Cgt/Tgt"));
assert_eq!(result.cds_position, Some(4));
}
#[test]
fn synonymous_variant() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 1505, b'T', b'A', &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SynonymousVariant)
);
assert_eq!(result.impact, Impact::Low);
assert_eq!(result.amino_acids.as_deref(), Some("R"));
assert_eq!(result.codons.as_deref(), Some("cgT/cgA"));
}
#[test]
fn stop_gained() {
let (_tmp, fasta) = write_stop_gained_fasta();
let tx = stop_gained_transcript();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 103, b'C', b'T', &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::StopGained));
assert_eq!(result.impact, Impact::High);
assert_eq!(result.amino_acids.as_deref(), Some("R/*"));
}
fn stop_gained_transcript() -> TranscriptModel {
TranscriptModel {
accession: "NM_STOP_GAINED.1".into(),
protein_accession: Some("NP_STOP_GAINED.1".into()),
gene_symbol: "TESTSTOP".into(),
hgnc_id: None,
ensembl_accession: None,
chrom: "chr1".into(),
strand: Strand::Plus,
tx_start: 50,
tx_end: 200,
cds_genomic_start: Some(100),
cds_genomic_end: Some(112),
exons: vec![Exon {
exon_number: 1,
genomic_start: 50,
genomic_end: 200,
}],
cds_segments: vec![CdsSegment {
exon_index: 0,
genomic_start: 100,
genomic_end: 112,
phase: 0,
}],
tier: TranscriptTier::ManeSelect,
biotype: Biotype::ProteinCoding,
exon_count: 1,
}
}
fn write_stop_gained_fasta() -> (TempDir, FastaReader) {
let tmp = TempDir::new().unwrap();
let mut seq = vec![b'A'; 300];
seq[100] = b'A';
seq[101] = b'T';
seq[102] = b'G';
seq[103] = b'C';
seq[104] = b'G';
seq[105] = b'A';
seq[106] = b'C';
seq[107] = b'G';
seq[108] = b'T';
seq[109] = b'T';
seq[110] = b'A';
seq[111] = b'A';
let bin_path = tmp.path().join("stop.bin");
let idx_path = tmp.path().join("stop.bin.idx");
write_genome_binary(&[("chr1", seq.as_slice())], "test", &bin_path, &idx_path).unwrap();
let reader = FastaReader::open(&bin_path).unwrap();
(tmp, reader)
}
#[test]
fn stop_lost() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 4497, b'T', b'C', &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::StopLost));
assert_eq!(result.impact, Impact::High);
assert_eq!(result.amino_acids.as_deref(), Some("*/Q"));
assert_eq!(result.protein_start, Some(500));
}
#[test]
fn start_lost() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 1500, b'A', b'C', &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::StartLost));
assert_eq!(result.impact, Impact::High);
assert_eq!(result.protein_start, Some(1));
}
#[test]
fn stop_retained_variant() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 4499, b'A', b'G', &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::StopRetainedVariant)
);
assert_eq!(result.impact, Impact::Low);
}
#[test]
fn splice_donor_maps_correctly() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 2000, b'A', b'T', &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SpliceDonorVariant)
);
assert_eq!(result.impact, Impact::High);
assert!(result.intron.is_some());
}
#[test]
fn splice_acceptor_maps_correctly() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 2999, b'A', b'T', &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SpliceAcceptorVariant)
);
assert_eq!(result.impact, Impact::High);
}
#[test]
fn intron_variant_maps_correctly() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 2500, b'A', b'T', &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::IntronVariant));
assert_eq!(result.impact, Impact::Modifier);
assert!(result.intron.is_some());
assert!(result.cds_position.is_none());
assert!(result.cdna_position.is_none());
}
#[test]
fn five_prime_utr_maps_correctly() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 1200, b'A', b'T', &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::FivePrimeUtrVariant)
);
assert_eq!(result.impact, Impact::Modifier);
assert!(result.exon.is_some());
}
#[test]
fn three_prime_utr_maps_correctly() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 4600, b'A', b'T', &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::ThreePrimeUtrVariant)
);
assert_eq!(result.impact, Impact::Modifier);
}
#[test]
fn compound_missense_splice_region() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 3000, b'T', b'A', &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SynonymousVariant)
);
assert!(
result
.consequences
.contains(&Consequence::SpliceRegionVariant)
);
assert_eq!(result.impact, Impact::Low);
}
#[test]
fn upstream_gene_variant() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 500, b'A', b'T', &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::UpstreamGeneVariant)
);
assert_eq!(result.impact, Impact::Modifier);
}
#[test]
fn downstream_gene_variant() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 5500, b'A', b'T', &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::DownstreamGeneVariant)
);
assert_eq!(result.impact, Impact::Modifier);
}
#[test]
fn non_coding_exon_variant() {
let (_tmp, fasta) = write_test_fasta();
let tx = noncoding_2_exon();
let idx = build_index(&tx);
let result = annotate_snv("chr2", 200, b'A', b'T', &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::NonCodingTranscriptExonVariant)
);
assert_eq!(result.impact, Impact::Modifier);
}
#[test]
fn minus_strand_complement() {
let (_tmp, fasta) = write_test_fasta();
let tx = minus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr17", 19496, b'G', b'A', &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::MissenseVariant));
assert_eq!(result.amino_acids.as_deref(), Some("R/C"));
assert_eq!(result.protein_start, Some(2));
}
#[test]
fn codon_spanning_exon_boundary() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 1999, b'G', b'A', &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::MissenseVariant));
assert_eq!(result.amino_acids.as_deref(), Some("R/H"));
assert_eq!(result.codons.as_deref(), Some("cGt/cAt"));
assert_eq!(result.cds_position, Some(500));
}
#[test]
fn mitochondrial_tga_is_trp() {
let (_tmp, fasta) = write_test_fasta();
let tx = mito_coding();
let idx = build_index(&tx);
let loc = locate_variant("chrM", 103, 104, &tx, &idx).unwrap();
match loc {
VariantLocation::CdsExon { codon_number, .. } => {
assert_eq!(codon_number, 2);
}
other => panic!("expected CdsExon, got {:?}", other),
}
let result = annotate_snv("chrM", 103, b'T', b'A', &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::StopGained));
let result2 = annotate_snv("chrM", 104, b'G', b'C', &tx, &idx, &fasta).unwrap();
assert!(result2.consequences.contains(&Consequence::MissenseVariant));
assert_eq!(result2.amino_acids.as_deref(), Some("W/S"));
}
#[test]
fn ref_mismatch_returns_error() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let err = annotate_snv("chr1", 1500, b'G', b'T', &tx, &idx, &fasta).unwrap_err();
assert!(
matches!(err, crate::VarEffectError::RefMismatch { .. }),
"expected RefMismatch, got: {err:?}",
);
}
#[test]
#[ignore]
fn tp53_r248w() {
let store_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.parent()
.unwrap()
.join("data/vareffect/transcript_models.bin");
let store = crate::TranscriptStore::load_from_path(&store_path).unwrap();
let fasta_path = std::env::var("FASTA_PATH").expect("FASTA_PATH env var");
let fasta = FastaReader::open(std::path::Path::new(&fasta_path)).unwrap();
let (tx, idx) = store
.get_by_accession("NM_000546.6")
.expect("NM_000546.6 not found");
let result = annotate_snv("chr17", 7_674_220, b'G', b'A', tx, idx, &fasta).unwrap();
assert!(
result.consequences.contains(&Consequence::MissenseVariant)
|| result.consequences.contains(&Consequence::StopGained),
"unexpected consequences: {:?}",
result.consequences,
);
assert_eq!(result.protein_start, Some(248));
}
#[test]
fn trim_deletion() {
let (r, a, adj) = trim_alleles(b"CA", b"C");
assert_eq!(r, b"A");
assert!(a.is_empty());
assert_eq!(adj, 1);
}
#[test]
fn trim_insertion() {
let (r, a, adj) = trim_alleles(b"C", b"CA");
assert!(r.is_empty());
assert_eq!(a, b"A");
assert_eq!(adj, 1);
}
#[test]
fn trim_complex() {
let (r, a, adj) = trim_alleles(b"CATG", b"CG");
assert_eq!(r, b"AT");
assert!(a.is_empty());
assert_eq!(adj, 1);
}
#[test]
fn trim_with_suffix() {
let (r, a, adj) = trim_alleles(b"ACGT", b"AT");
assert_eq!(r, b"CG");
assert!(a.is_empty());
assert_eq!(adj, 1);
}
#[test]
fn trim_snv() {
let (r, a, adj) = trim_alleles(b"A", b"T");
assert_eq!(r, b"A");
assert_eq!(a, b"T");
assert_eq!(adj, 0);
}
#[test]
fn trim_no_shared() {
let (r, a, adj) = trim_alleles(b"AC", b"GT");
assert_eq!(r, b"AC");
assert_eq!(a, b"GT");
assert_eq!(adj, 0);
}
#[test]
fn frameshift_1bp_deletion() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1503, 1504, b"C", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::FrameshiftVariant)
);
assert_eq!(result.impact, Impact::High);
assert_eq!(result.protein_start, Some(2));
}
#[test]
fn frameshift_2bp_deletion() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1503, 1505, b"CG", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::FrameshiftVariant)
);
assert_eq!(result.impact, Impact::High);
}
#[test]
fn inframe_3bp_deletion() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1503, 1506, b"CGT", &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::InframeDeletion));
assert_eq!(result.impact, Impact::Moderate);
assert!(result.amino_acids.is_some());
}
#[test]
fn inframe_6bp_deletion() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1503, 1509, b"CGTCGT", &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::InframeDeletion));
assert_eq!(result.impact, Impact::Moderate);
}
#[test]
fn frameshift_1bp_insertion() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_insertion("chr1", 1503, b"A", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::FrameshiftVariant)
);
assert_eq!(result.impact, Impact::High);
}
#[test]
fn frameshift_2bp_insertion() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_insertion("chr1", 1503, b"AT", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::FrameshiftVariant)
);
assert_eq!(result.impact, Impact::High);
}
#[test]
fn inframe_3bp_insertion() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_insertion("chr1", 1503, b"GGG", &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::InframeInsertion));
assert_eq!(result.impact, Impact::Moderate);
assert!(result.amino_acids.is_some());
}
#[test]
fn intronic_deletion() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 2500, 2503, b"AAA", &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::IntronVariant));
assert_eq!(result.impact, Impact::Modifier);
}
#[test]
fn utr5_deletion() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1200, 1203, b"AAA", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::FivePrimeUtrVariant)
);
}
#[test]
fn utr3_insertion() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_insertion("chr1", 4600, b"GGG", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::ThreePrimeUtrVariant)
);
}
#[test]
fn deletion_overlaps_splice_donor() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1998, 2002, b"AAAA", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SpliceDonorVariant)
|| result
.consequences
.contains(&Consequence::SpliceAcceptorVariant),
"expected splice consequence, got {:?}",
result.consequences,
);
assert_eq!(result.impact, Impact::High);
}
#[test]
fn deletion_overlaps_splice_acceptor() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 2998, 3002, b"AAAA", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SpliceDonorVariant)
|| result
.consequences
.contains(&Consequence::SpliceAcceptorVariant),
"expected splice consequence, got {:?}",
result.consequences,
);
assert_eq!(result.impact, Impact::High);
}
#[test]
fn insertion_at_splice_donor() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_insertion("chr1", 2001, b"GGG", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SpliceDonorVariant)
|| result
.consequences
.contains(&Consequence::SpliceAcceptorVariant)
|| result
.consequences
.contains(&Consequence::SpliceRegionVariant),
"expected splice consequence, got {:?}",
result.consequences,
);
}
#[test]
fn minus_strand_frameshift() {
let (_tmp, fasta) = write_test_fasta();
let tx = minus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr17", 19498, 19499, b"A", &tx, &idx, &fasta).unwrap();
assert!(
result.consequences.contains(&Consequence::StartLost),
"expected start_lost (1bp del at codon 1 destroys Met), got {:?}",
result.consequences,
);
assert!(
result
.consequences
.contains(&Consequence::FrameshiftVariant),
"expected frameshift_variant alongside start_lost (1bp del is frameshifting), got {:?}",
result.consequences,
);
assert_eq!(result.impact, Impact::High);
let result2 = annotate_deletion("chr17", 19490, 19491, b"A", &tx, &idx, &fasta).unwrap();
assert!(
result2
.consequences
.contains(&Consequence::FrameshiftVariant),
"expected frameshift_variant for deletion at codon 4, got {:?}",
result2.consequences,
);
assert_eq!(result2.impact, Impact::High);
}
#[test]
fn minus_strand_inframe_deletion() {
let (_tmp, fasta) = write_test_fasta();
let tx = minus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr17", 19490, 19493, b"AAA", &tx, &idx, &fasta).unwrap();
assert!(
result.consequences.contains(&Consequence::InframeDeletion),
"expected inframe_deletion, got {:?}",
result.consequences,
);
assert_eq!(result.impact, Impact::Moderate);
}
#[test]
fn deletion_of_start_codon() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1500, 1503, b"ATG", &tx, &idx, &fasta).unwrap();
assert!(
result.consequences.contains(&Consequence::StartLost),
"expected start_lost, got {:?}",
result.consequences,
);
assert_eq!(result.impact, Impact::High);
}
#[test]
fn plus_strand_frameshift_start_lost() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1500, 1501, b"A", &tx, &idx, &fasta).unwrap();
assert!(
result.consequences.contains(&Consequence::StartLost),
"expected start_lost, got {:?}",
result.consequences,
);
assert!(
result
.consequences
.contains(&Consequence::FrameshiftVariant),
"expected frameshift_variant alongside start_lost, got {:?}",
result.consequences,
);
assert_eq!(result.impact, Impact::High);
}
#[test]
fn deletion_of_stop_codon() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 4497, 4500, b"TAA", &tx, &idx, &fasta).unwrap();
assert!(
result.consequences.contains(&Consequence::StopLost),
"expected stop_lost, got {:?}",
result.consequences,
);
}
#[test]
fn plus_strand_deletion_spans_stop_into_3utr_frameshift_length() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 4498, 4508, b"AAAAAAAAAA", &tx, &idx, &fasta).unwrap();
assert!(
result.consequences.contains(&Consequence::StopLost),
"expected stop_lost, got {:?}",
result.consequences,
);
assert!(
result
.consequences
.contains(&Consequence::ThreePrimeUtrVariant),
"expected 3_prime_UTR_variant, got {:?}",
result.consequences,
);
assert!(
!result
.consequences
.contains(&Consequence::FrameshiftVariant),
"must NOT emit frameshift_variant (VEP suppresses it via undefined cds_end), got {:?}",
result.consequences,
);
assert_eq!(result.impact, Impact::High);
}
#[test]
fn plus_strand_deletion_spans_stop_into_3utr_inframe_length() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 4497, 4506, b"AAAAAAAAA", &tx, &idx, &fasta).unwrap();
assert!(
result.consequences.contains(&Consequence::StopLost),
"expected stop_lost, got {:?}",
result.consequences,
);
assert!(
result
.consequences
.contains(&Consequence::ThreePrimeUtrVariant),
"expected 3_prime_UTR_variant, got {:?}",
result.consequences,
);
}
#[test]
fn minus_strand_deletion_spans_stop_into_3utr_frameshift_length() {
let (_tmp, fasta) = write_test_fasta();
let tx = minus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr17", 10_995, 11_002, b"AAAAAAA", &tx, &idx, &fasta).unwrap();
assert!(
result.consequences.contains(&Consequence::StopLost),
"expected stop_lost, got {:?}",
result.consequences,
);
assert!(
result
.consequences
.contains(&Consequence::ThreePrimeUtrVariant),
"expected 3_prime_UTR_variant, got {:?}",
result.consequences,
);
assert!(
!result
.consequences
.contains(&Consequence::FrameshiftVariant),
"must NOT emit frameshift_variant, got {:?}",
result.consequences,
);
assert_eq!(result.impact, Impact::High);
}
#[test]
fn non_coding_transcript_deletion() {
let (_tmp, fasta) = write_test_fasta();
let tx = noncoding_2_exon();
let idx = build_index(&tx);
let result = annotate_deletion("chr2", 150, 153, b"AAA", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::NonCodingTranscriptExonVariant)
);
}
#[test]
fn exon_intron_boundary_deferred() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1999, 2002, b"AAA", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SpliceDonorVariant),
"expected splice_donor_variant, got {:?}",
result.consequences,
);
}
#[test]
fn boundary_del_splice_donor_priority() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1998, 2003, b"AAAAA", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SpliceDonorVariant),
"splice donor should win over frame: {:?}",
result.consequences,
);
assert_eq!(result.impact, Impact::High);
}
#[test]
fn boundary_del_cds_utr() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 4498, 4502, b"AAAA", &tx, &idx, &fasta).unwrap();
let has_frame_or_stop = result
.consequences
.contains(&Consequence::FrameshiftVariant)
|| result.consequences.contains(&Consequence::StopLost);
assert!(
has_frame_or_stop,
"expected frameshift or stop_lost for CDS/UTR boundary del: {:?}",
result.consequences,
);
}
#[test]
fn insertion_at_exon_intron_junction() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_insertion("chr1", 2000, b"AAA", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SpliceDonorVariant),
"insertion at donor junction: {:?}",
result.consequences,
);
}
#[test]
fn multi_exon_del_full_exon() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 2500, 3600, b"A", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SpliceDonorVariant)
|| result
.consequences
.contains(&Consequence::SpliceAcceptorVariant),
"multi-exon del should have splice: {:?}",
result.consequences,
);
}
#[test]
fn delins_cds_frameshift() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_complex_delins("chr1", 1503, b"CGTC", b"AT", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::FrameshiftVariant),
"delins -2 should be frameshift: {:?}",
result.consequences,
);
}
#[test]
fn delins_cds_inframe() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result =
annotate_complex_delins("chr1", 1503, b"CGT", b"AAAAAA", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::ProteinAlteringVariant),
"inframe delins should be protein_altering: {:?}",
result.consequences,
);
}
#[test]
fn delins_splice_overlap() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_complex_delins("chr1", 1999, b"AAA", b"T", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SpliceDonorVariant),
"delins at donor: {:?}",
result.consequences,
);
}
#[test]
fn mnv_single_codon_missense() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_mnv("chr1", 1503, b"CG", b"TA", &tx, &idx, &fasta).unwrap();
assert!(
result.consequences.contains(&Consequence::MissenseVariant)
|| result.consequences.contains(&Consequence::StopGained),
"MNV in single codon: {:?}",
result.consequences,
);
}
#[test]
fn mnv_two_codons() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_mnv("chr1", 1505, b"TC", b"AA", &tx, &idx, &fasta).unwrap();
let has_coding = result.consequences.contains(&Consequence::MissenseVariant)
|| result.consequences.contains(&Consequence::StopGained)
|| result
.consequences
.contains(&Consequence::SynonymousVariant);
assert!(
has_coding,
"MNV spanning codons should have coding consequence: {:?}",
result.consequences,
);
}
#[test]
fn mnv_creates_stop() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_mnv("chr1", 1503, b"CGT", b"TAA", &tx, &idx, &fasta).unwrap();
assert!(
result.consequences.contains(&Consequence::StopGained),
"MNV creating stop: {:?}",
result.consequences,
);
}
#[test]
fn mnv_synonymous() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_mnv("chr1", 1503, b"CGT", b"AGA", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SynonymousVariant),
"CGT->AGA (Arg->Arg) should be synonymous: {:?}",
result.consequences,
);
}
#[test]
fn minus_strand_boundary_del() {
let (_tmp, fasta) = write_test_fasta();
let tx = minus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr17", 17998, 18002, b"AAAA", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::SpliceDonorVariant),
"minus strand boundary del at donor: {:?}",
result.consequences,
);
}
#[test]
fn minus_strand_mnv() {
let (_tmp, fasta) = write_test_fasta();
let tx = minus_strand_coding();
let idx = build_index(&tx);
let result = annotate_mnv("chr17", 19498, b"AT", b"CC", &tx, &idx, &fasta).unwrap();
let has_coding = result.consequences.contains(&Consequence::MissenseVariant)
|| result.consequences.contains(&Consequence::StartLost)
|| result.consequences.contains(&Consequence::StopGained);
assert!(
has_coding,
"minus strand MNV should have coding consequence: {:?}",
result.consequences,
);
}
#[test]
fn incomplete_terminal_codon() {
let (_tmp, fasta) = write_test_fasta();
let mut tx = single_exon_coding();
tx.cds_genomic_end = Some(1402);
tx.cds_segments[0].genomic_end = 1402;
let idx = build_index(&tx);
assert_eq!(idx.total_cds_length() % 3, 1);
let result = annotate_snv("chr3", 1401, b'A', b'T', &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::IncompleteTerminalCodonVariant),
"variant in incomplete terminal codon: {:?}",
result.consequences,
);
}
#[test]
fn splice_donor_boundary_regression() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1999, 2002, b"AAA", &tx, &idx, &fasta).unwrap();
assert!(
!result
.consequences
.contains(&Consequence::ProteinAlteringVariant),
"should not return old placeholder: {:?}",
result.consequences,
);
assert!(
result
.consequences
.contains(&Consequence::SpliceDonorVariant),
"should be splice_donor_variant: {:?}",
result.consequences,
);
}
#[test]
fn trim_complex_dispatches_delins() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let store = crate::TranscriptStore::from_transcripts(vec![tx]);
let results = annotate("chr1", 1503, b"CGTC", b"TG", &store, &fasta).unwrap();
assert!(
!results.is_empty(),
"should annotate against the transcript"
);
}
#[test]
fn trim_mnv_dispatches() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let store = crate::TranscriptStore::from_transcripts(vec![tx]);
let results = annotate("chr1", 1503, b"CG", b"TA", &store, &fasta).unwrap();
assert!(!results.is_empty(), "should annotate MNV");
}
#[test]
fn intergenic_no_overlap() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let store = crate::TranscriptStore::from_transcripts(vec![tx]);
let results = annotate("chr1", 50, b"A", b"T", &store, &fasta).unwrap();
assert_eq!(results.len(), 1);
let r = &results[0];
assert_eq!(r.consequences, vec![Consequence::IntergenicVariant]);
assert_eq!(r.impact, Impact::Modifier);
assert!(r.transcript.is_empty());
assert!(r.gene_symbol.is_empty());
assert!(r.protein_start.is_none());
assert!(r.cds_position.is_none());
assert!(!r.predicts_nmd);
}
#[test]
fn intergenic_different_chrom() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let store = crate::TranscriptStore::from_transcripts(vec![tx]);
let results = annotate("chr2", 150, b"A", b"T", &store, &fasta).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(
results[0].consequences,
vec![Consequence::IntergenicVariant]
);
}
#[test]
fn non_intergenic_has_transcript() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let store = crate::TranscriptStore::from_transcripts(vec![tx]);
let results = annotate("chr1", 1505, b"T", b"A", &store, &fasta).unwrap();
assert!(!results.is_empty());
assert!(
!results[0]
.consequences
.contains(&Consequence::IntergenicVariant)
);
}
#[test]
fn stop_gained_single_exon_no_nmd() {
let (_tmp, fasta) = write_stop_gained_fasta();
let tx = stop_gained_transcript();
let idx = build_index(&tx);
let result = annotate_snv("chr1", 103, b'C', b'T', &tx, &idx, &fasta).unwrap();
assert!(result.consequences.contains(&Consequence::StopGained));
assert!(!result.predicts_nmd);
}
#[test]
fn frameshift_multi_exon_nmd() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1503, 1504, b"C", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::FrameshiftVariant)
);
assert!(result.predicts_nmd);
}
#[test]
fn frameshift_immediate_stop_still_gets_nmd() {
let (_tmp, fasta) = write_test_fasta();
let tx = plus_strand_coding();
let idx = build_index(&tx);
let result = annotate_deletion("chr1", 1503, 1505, b"CG", &tx, &idx, &fasta).unwrap();
assert!(
result
.consequences
.contains(&Consequence::FrameshiftVariant),
"expected FrameshiftVariant, got: {:?}",
result.consequences,
);
assert!(
!result.consequences.contains(&Consequence::StopGained),
"should not contain StopGained for a frameshift: {:?}",
result.consequences,
);
assert!(result.predicts_nmd);
}