use std::{collections::HashMap, sync::Arc};
use crate::{
annotate::seqvars::provider::Provider as MehariProvider,
pbs::txs::{Strand, Transcript},
};
#[derive(
serde::Serialize, serde::Deserialize, PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy,
)]
#[cfg_attr(feature = "server", derive(utoipa::ToSchema))]
#[serde(rename_all = "snake_case")]
pub enum StrucvarsTranscriptEffect {
TranscriptVariant,
ExonVariant,
SpliceRegionVariant,
IntronVariant,
UpstreamVariant,
DownstreamVariant,
IntergenicVariant,
}
impl StrucvarsTranscriptEffect {
pub fn vec_all() -> Vec<StrucvarsTranscriptEffect> {
vec![
StrucvarsTranscriptEffect::TranscriptVariant,
StrucvarsTranscriptEffect::ExonVariant,
StrucvarsTranscriptEffect::SpliceRegionVariant,
StrucvarsTranscriptEffect::IntronVariant,
StrucvarsTranscriptEffect::UpstreamVariant,
StrucvarsTranscriptEffect::DownstreamVariant,
StrucvarsTranscriptEffect::IntergenicVariant,
]
}
}
pub mod interface {
#[derive(
serde::Serialize,
serde::Deserialize,
Debug,
Clone,
Copy,
PartialEq,
Eq,
PartialOrd,
Ord,
Hash,
)]
#[cfg_attr(feature = "server", derive(utoipa::ToSchema))]
pub enum StrucvarsSvType {
#[serde(rename = "DEL")]
Del,
#[serde(rename = "DUP")]
Dup,
#[serde(rename = "INS")]
Ins,
#[serde(rename = "INV")]
Inv,
#[serde(rename = "BND")]
Bnd,
#[serde(rename = "CNV")]
Cnv,
}
#[derive(
serde::Serialize,
serde::Deserialize,
Debug,
Clone,
Default,
PartialEq,
Eq,
PartialOrd,
Ord,
Hash,
)]
pub enum StrandOrientation {
#[serde(rename = "3to3")]
ThreeToThree,
#[serde(rename = "5to5")]
FiveToFive,
#[serde(rename = "3to5")]
ThreeToFive,
#[serde(rename = "5to3")]
FiveToThree,
#[serde(rename = "NtoN")]
#[default]
NotApplicable,
}
pub trait StrucVar {
fn chrom(&self) -> String;
fn chrom2(&self) -> String;
fn start(&self) -> i32;
fn stop(&self) -> i32;
fn sv_type(&self) -> StrucvarsSvType;
fn strand_orientation(&self) -> StrandOrientation;
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
struct TxRegion {
begin: i32,
end: i32,
no: usize,
effect: StrucvarsTranscriptEffect,
}
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
#[cfg_attr(feature = "server", derive(utoipa::ToSchema))]
pub struct StrucvarsGeneTranscriptEffects {
hgnc_id: String,
transcript_effects: Vec<StrucvarsTranscriptEffect>,
}
static X_STREAM: i32 = 5000;
fn tx_regions(tx: &Transcript) -> Vec<TxRegion> {
assert_eq!(
tx.genome_alignments.len(),
1,
"only one alignment supported"
);
if tx.genome_alignments[0].exons.is_empty() {
return Vec::new();
}
let mut result = Vec::new();
let mut tx_start = None;
let mut tx_end = None;
let genome_alignment = &tx.genome_alignments[0];
for exon_alignment in &genome_alignment.exons {
if let Some(value) = tx_start {
if exon_alignment.alt_start_i < value {
tx_start = Some(exon_alignment.alt_start_i);
}
} else {
tx_start = Some(exon_alignment.alt_start_i);
}
if let Some(value) = tx_end {
if exon_alignment.alt_end_i > value {
tx_end = Some(exon_alignment.alt_end_i);
}
} else {
tx_end = Some(exon_alignment.alt_end_i);
}
}
let tx_start = tx_start.expect("must have been set");
let tx_end = tx_end.expect("must have been set");
let mut prev_alt_end_i = 0;
for (no, exon_alignment) in genome_alignment.exons.iter().enumerate() {
if exon_alignment.alt_start_i == tx_start {
result.push(TxRegion {
begin: exon_alignment.alt_start_i - X_STREAM,
end: exon_alignment.alt_start_i - 1,
no,
effect: if genome_alignment.strand == Strand::Plus as i32 {
StrucvarsTranscriptEffect::UpstreamVariant
} else {
StrucvarsTranscriptEffect::DownstreamVariant
},
});
} else {
result.push(TxRegion {
begin: (exon_alignment.alt_start_i - 1) - 8,
end: (exon_alignment.alt_start_i - 1) + 3,
no,
effect: StrucvarsTranscriptEffect::SpliceRegionVariant,
})
}
if exon_alignment.alt_end_i == tx_end {
result.push(TxRegion {
begin: exon_alignment.alt_end_i,
end: exon_alignment.alt_end_i + X_STREAM,
no,
effect: if genome_alignment.strand == Strand::Plus as i32 {
StrucvarsTranscriptEffect::DownstreamVariant
} else {
StrucvarsTranscriptEffect::UpstreamVariant
},
});
} else {
result.push(TxRegion {
begin: exon_alignment.alt_end_i - 3,
end: exon_alignment.alt_end_i + 8,
no,
effect: StrucvarsTranscriptEffect::SpliceRegionVariant,
})
}
result.push(TxRegion {
begin: exon_alignment.alt_start_i - 1,
end: exon_alignment.alt_end_i,
no,
effect: StrucvarsTranscriptEffect::ExonVariant,
});
if exon_alignment.alt_start_i != tx_start {
result.push(TxRegion {
begin: prev_alt_end_i,
end: exon_alignment.alt_start_i - 1,
no,
effect: StrucvarsTranscriptEffect::IntronVariant,
});
}
prev_alt_end_i = exon_alignment.alt_end_i;
}
result
}
fn gene_tx_effects_for_bp(tx: &Transcript, pos: i32) -> Vec<StrucvarsTranscriptEffect> {
let regions = tx_regions(tx);
let pos = pos - 1; let mut result = regions
.iter()
.filter(|r| r.begin <= pos && pos < r.end)
.map(|r| r.effect)
.collect::<Vec<_>>();
if result.is_empty() {
result.push(StrucvarsTranscriptEffect::IntergenicVariant);
} else {
result.sort();
result.dedup();
}
result
}
fn gene_tx_effect_for_range(
tx: &Transcript,
start: i32,
stop: i32,
) -> Vec<StrucvarsTranscriptEffect> {
let regions = tx_regions(tx);
let pos = start - 1; let mut result = regions
.iter()
.filter(|region| pos <= region.end && region.begin <= stop)
.map(|region| region.effect)
.collect::<Vec<_>>();
result.sort();
result.dedup();
if result.contains(&StrucvarsTranscriptEffect::UpstreamVariant)
&& result.contains(&StrucvarsTranscriptEffect::DownstreamVariant)
{
result.push(StrucvarsTranscriptEffect::TranscriptVariant);
}
result
}
fn compute_tx_effects_for_breakpoint(
sv: &impl interface::StrucVar,
mehari_provider: &MehariProvider,
) -> Vec<StrucvarsGeneTranscriptEffects> {
let tx_db = mehari_provider
.tx_seq_db
.tx_db
.as_ref()
.expect("transcripts must be present");
let chrom_acc = mehari_provider.contig_manager.get_accession(&sv.chrom());
if chrom_acc.is_none() {
return Default::default();
}
let chrom_acc = chrom_acc.expect("chromosome must be known at this point");
let query = (sv.start() - X_STREAM)..(sv.start() + X_STREAM);
if let Some(idx) = mehari_provider.tx_trees.contig_to_idx.get(chrom_acc) {
let mut effects_by_gene: HashMap<_, Vec<_>> = HashMap::new();
let tree = &mehari_provider.tx_trees.trees[*idx];
for it in tree.find(query) {
let tx = &tx_db.transcripts[*it.data() as usize];
effects_by_gene
.entry(tx.gene_id.clone())
.or_default()
.extend(gene_tx_effects_for_bp(tx, sv.start()));
}
effects_by_gene.iter_mut().for_each(|(_, v)| {
v.sort();
v.dedup()
});
effects_by_gene
.into_iter()
.map(
|(hgnc_id, transcript_effects)| StrucvarsGeneTranscriptEffects {
hgnc_id,
transcript_effects,
},
)
.collect()
} else {
Default::default()
}
}
fn compute_tx_effects_for_linear(
sv: &impl interface::StrucVar,
mehari_provider: &MehariProvider,
) -> Vec<StrucvarsGeneTranscriptEffects> {
let tx_db = mehari_provider
.tx_seq_db
.tx_db
.as_ref()
.expect("transcripts must be present");
let chrom_acc = mehari_provider.contig_manager.get_accession(&sv.chrom());
if chrom_acc.is_none() {
return Default::default();
}
let chrom_acc = chrom_acc.expect("chromosome must be known at this point");
let query = (sv.start() - X_STREAM)..(sv.stop() + X_STREAM);
if let Some(idx) = mehari_provider.tx_trees.contig_to_idx.get(chrom_acc) {
let mut effects_by_gene: HashMap<_, Vec<_>> = HashMap::new();
let tree = &mehari_provider.tx_trees.trees[*idx];
for it in tree.find(query) {
let tx = &tx_db.transcripts[*it.data() as usize];
effects_by_gene
.entry(tx.gene_id.clone())
.or_default()
.extend(gene_tx_effect_for_range(tx, sv.start(), sv.stop()));
}
effects_by_gene.iter_mut().for_each(|(_, v)| {
v.sort();
v.dedup()
});
effects_by_gene
.into_iter()
.map(
|(hgnc_id, transcript_effects)| StrucvarsGeneTranscriptEffects {
hgnc_id,
transcript_effects,
},
)
.collect()
} else {
Default::default()
}
}
#[derive(derivative::Derivative)]
#[derivative(Debug)]
pub struct ConsequencePredictor {
#[derivative(Debug = "ignore")]
provider: Arc<MehariProvider>,
}
impl ConsequencePredictor {
pub fn new(provider: Arc<MehariProvider>) -> Self {
ConsequencePredictor { provider }
}
pub fn compute_tx_effects(
&self,
sv: &impl interface::StrucVar,
) -> Vec<StrucvarsGeneTranscriptEffects> {
match sv.sv_type() {
interface::StrucvarsSvType::Ins | interface::StrucvarsSvType::Bnd => {
compute_tx_effects_for_breakpoint(sv, &self.provider)
}
interface::StrucvarsSvType::Del
| interface::StrucvarsSvType::Dup
| interface::StrucvarsSvType::Inv
| interface::StrucvarsSvType::Cnv => compute_tx_effects_for_linear(sv, &self.provider),
}
}
pub fn data_version(&self) -> Option<String> {
self.provider.as_ref().tx_seq_db.version.clone()
}
}