mod complex;
pub(crate) mod helpers;
mod indel;
mod nmd;
mod snv;
#[cfg(test)]
mod tests;
pub use indel::{annotate_deletion, annotate_insertion};
pub use snv::annotate_snv;
use crate::error::VarEffectError;
use crate::fasta::FastaReader;
use crate::transcript::TranscriptStore;
use crate::types::Biotype;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Impact {
Modifier,
Low,
Moderate,
High,
}
impl std::fmt::Display for Impact {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
Self::Modifier => "MODIFIER",
Self::Low => "LOW",
Self::Moderate => "MODERATE",
Self::High => "HIGH",
})
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum Consequence {
TranscriptAblation,
SpliceAcceptorVariant,
SpliceDonorVariant,
StopGained,
FrameshiftVariant,
StopLost,
StartLost,
InframeInsertion,
InframeDeletion,
MissenseVariant,
ProteinAlteringVariant,
SpliceRegionVariant,
StartRetainedVariant,
StopRetainedVariant,
IncompleteTerminalCodonVariant,
SynonymousVariant,
CodingSequenceVariant,
FivePrimeUtrVariant,
ThreePrimeUtrVariant,
NonCodingTranscriptExonVariant,
IntronVariant,
UpstreamGeneVariant,
DownstreamGeneVariant,
IntergenicVariant,
}
impl Consequence {
pub fn impact(&self) -> Impact {
match self {
Self::TranscriptAblation
| Self::SpliceAcceptorVariant
| Self::SpliceDonorVariant
| Self::StopGained
| Self::FrameshiftVariant
| Self::StopLost
| Self::StartLost => Impact::High,
Self::InframeInsertion
| Self::InframeDeletion
| Self::MissenseVariant
| Self::ProteinAlteringVariant => Impact::Moderate,
Self::SpliceRegionVariant
| Self::StartRetainedVariant
| Self::StopRetainedVariant
| Self::IncompleteTerminalCodonVariant
| Self::SynonymousVariant => Impact::Low,
Self::CodingSequenceVariant
| Self::FivePrimeUtrVariant
| Self::ThreePrimeUtrVariant
| Self::NonCodingTranscriptExonVariant
| Self::IntronVariant
| Self::UpstreamGeneVariant
| Self::DownstreamGeneVariant
| Self::IntergenicVariant => Impact::Modifier,
}
}
pub fn severity_rank(&self) -> u8 {
match self {
Self::TranscriptAblation => 1,
Self::SpliceAcceptorVariant => 2,
Self::SpliceDonorVariant => 3,
Self::StopGained => 4,
Self::FrameshiftVariant => 5,
Self::StopLost => 6,
Self::StartLost => 7,
Self::InframeInsertion => 8,
Self::InframeDeletion => 9,
Self::MissenseVariant => 10,
Self::ProteinAlteringVariant => 11,
Self::SpliceRegionVariant => 12,
Self::StartRetainedVariant => 13,
Self::StopRetainedVariant => 14,
Self::IncompleteTerminalCodonVariant => 15,
Self::SynonymousVariant => 16,
Self::CodingSequenceVariant => 17,
Self::FivePrimeUtrVariant => 18,
Self::ThreePrimeUtrVariant => 19,
Self::NonCodingTranscriptExonVariant => 20,
Self::IntronVariant => 21,
Self::UpstreamGeneVariant => 22,
Self::DownstreamGeneVariant => 23,
Self::IntergenicVariant => 24,
}
}
pub fn as_str(&self) -> &'static str {
match self {
Self::TranscriptAblation => "transcript_ablation",
Self::SpliceAcceptorVariant => "splice_acceptor_variant",
Self::SpliceDonorVariant => "splice_donor_variant",
Self::StopGained => "stop_gained",
Self::FrameshiftVariant => "frameshift_variant",
Self::StopLost => "stop_lost",
Self::StartLost => "start_lost",
Self::InframeInsertion => "inframe_insertion",
Self::InframeDeletion => "inframe_deletion",
Self::MissenseVariant => "missense_variant",
Self::ProteinAlteringVariant => "protein_altering_variant",
Self::SpliceRegionVariant => "splice_region_variant",
Self::StartRetainedVariant => "start_retained_variant",
Self::StopRetainedVariant => "stop_retained_variant",
Self::IncompleteTerminalCodonVariant => "incomplete_terminal_codon_variant",
Self::SynonymousVariant => "synonymous_variant",
Self::CodingSequenceVariant => "coding_sequence_variant",
Self::FivePrimeUtrVariant => "5_prime_UTR_variant",
Self::ThreePrimeUtrVariant => "3_prime_UTR_variant",
Self::NonCodingTranscriptExonVariant => "non_coding_transcript_exon_variant",
Self::IntronVariant => "intron_variant",
Self::UpstreamGeneVariant => "upstream_gene_variant",
Self::DownstreamGeneVariant => "downstream_gene_variant",
Self::IntergenicVariant => "intergenic_variant",
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct ConsequenceResult {
pub transcript: String,
pub gene_symbol: String,
pub protein_accession: Option<String>,
pub consequences: Vec<Consequence>,
pub impact: Impact,
pub protein_start: Option<u32>,
pub protein_end: Option<u32>,
pub codons: Option<String>,
pub amino_acids: Option<String>,
pub exon: Option<String>,
pub intron: Option<String>,
pub cds_position: Option<u32>,
pub cds_position_end: Option<u32>,
pub cdna_position: Option<u32>,
pub cdna_position_end: Option<u32>,
pub strand: crate::types::Strand,
pub biotype: Biotype,
pub is_mane_select: bool,
pub is_mane_plus_clinical: bool,
pub is_refseq_select: bool,
pub hgvs_c: Option<String>,
pub hgvs_p: Option<String>,
pub predicts_nmd: bool,
}
pub(crate) fn annotate(
chrom: &str,
pos: u64,
ref_allele: &[u8],
alt_allele: &[u8],
store: &TranscriptStore,
fasta: &FastaReader,
) -> Result<Vec<ConsequenceResult>, VarEffectError> {
if !ref_allele.is_empty() && !fasta.verify_ref(chrom, pos, ref_allele)? {
let expected = fasta.fetch_sequence(chrom, pos, pos + ref_allele.len() as u64)?;
return Err(VarEffectError::RefMismatch {
chrom: chrom.to_string(),
pos,
expected: String::from_utf8_lossy(&expected).into_owned(),
got: String::from_utf8_lossy(ref_allele).into_owned(),
});
}
let (trimmed_ref, trimmed_alt, pos_adj) = helpers::trim_alleles(ref_allele, alt_allele);
let trimmed_pos = pos + pos_adj;
let results = match (trimmed_ref.len(), trimmed_alt.len()) {
(1, 1) => {
let overlaps = store.query_overlap(chrom, trimmed_pos, trimmed_pos + 1);
let mut results = Vec::with_capacity(overlaps.len());
for (tx, idx) in overlaps {
results.push(snv::annotate_snv_verified(
chrom,
trimmed_pos,
trimmed_ref[0],
trimmed_alt[0],
tx,
idx,
fasta,
)?);
}
results
}
(0, _) => {
let query_start = trimmed_pos.saturating_sub(1);
let query_end = trimmed_pos + 1;
let overlaps = store.query_overlap(chrom, query_start, query_end);
let mut results = Vec::with_capacity(overlaps.len());
for (tx, idx) in overlaps {
results.push(annotate_insertion(
chrom,
trimmed_pos,
trimmed_alt,
tx,
idx,
fasta,
)?);
}
results
}
(n, 0) if n > 0 => {
let del_end = trimmed_pos + n as u64;
let overlaps = store.query_overlap(chrom, trimmed_pos, del_end);
let mut results = Vec::with_capacity(overlaps.len());
for (tx, idx) in overlaps {
results.push(annotate_deletion(
chrom,
trimmed_pos,
del_end,
trimmed_ref,
tx,
idx,
fasta,
)?);
}
results
}
(r, a) if r == a && r > 1 => {
let query_end = trimmed_pos + r as u64;
let overlaps = store.query_overlap(chrom, trimmed_pos, query_end);
let mut results = Vec::with_capacity(overlaps.len());
for (tx, idx) in overlaps {
results.push(complex::annotate_mnv(
chrom,
trimmed_pos,
trimmed_ref,
trimmed_alt,
tx,
idx,
fasta,
)?);
}
results
}
(r, a) if r > 0 && a > 0 => {
let query_end = trimmed_pos + r as u64;
let overlaps = store.query_overlap(chrom, trimmed_pos, query_end);
let mut results = Vec::with_capacity(overlaps.len());
for (tx, idx) in overlaps {
results.push(complex::annotate_complex_delins(
chrom,
trimmed_pos,
trimmed_ref,
trimmed_alt,
tx,
idx,
fasta,
)?);
}
results
}
_ => {
return Err(VarEffectError::Malformed(
"REF and ALT alleles are identical after trimming".to_string(),
));
}
};
if results.is_empty() {
return Ok(vec![ConsequenceResult {
transcript: String::new(),
gene_symbol: String::new(),
protein_accession: None,
consequences: vec![Consequence::IntergenicVariant],
impact: Impact::Modifier,
protein_start: None,
protein_end: None,
codons: None,
amino_acids: None,
exon: None,
intron: None,
cds_position: None,
cds_position_end: None,
cdna_position: None,
cdna_position_end: None,
strand: crate::types::Strand::Plus,
biotype: Biotype::Unknown,
is_mane_select: false,
is_mane_plus_clinical: false,
is_refseq_select: false,
hgvs_c: None,
hgvs_p: None,
predicts_nmd: false,
}]);
}
Ok(results)
}