use axum::{extract::State, http::StatusCode, response::Json};
use std::time::Instant;
use crate::hgvs::location::CdsPos;
use crate::service::{
server::AppState,
types::{
EffectRequest, EffectResponse, ErrorResponse, NmdPrediction, ProteinConsequence,
SequenceEffect, ServiceError,
},
validation::validate_hgvs,
};
pub async fn predict_effect(
State(state): State<AppState>,
Json(request): Json<EffectRequest>,
) -> Result<Json<EffectResponse>, (StatusCode, Json<ErrorResponse>)> {
let start = Instant::now();
if let Err(validation_error) = validate_hgvs(&request.hgvs) {
let error = ServiceError::InvalidHgvs(validation_error.to_string());
return Err((
StatusCode::from_u16(error.status_code()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR),
Json(error.to_response()),
));
}
let hgvs_str = request.hgvs.clone();
let parse_result =
tokio::task::spawn_blocking(move || crate::hgvs::parser::parse_hgvs_lenient(&hgvs_str))
.await
.map_err(|e| {
let error = ServiceError::InternalError(format!("Task error: {}", e));
(StatusCode::INTERNAL_SERVER_ERROR, Json(error.to_response()))
})?;
let elapsed_ms = start.elapsed().as_millis() as u64;
match parse_result {
Ok(result) => {
let (effect, protein_consequence, nmd_prediction) =
predict_from_variant(&state, &result.result, request.include_nmd);
Ok(Json(EffectResponse {
input: request.hgvs,
effect,
protein_consequence,
nmd_prediction,
error: None,
processing_time_ms: elapsed_ms,
}))
}
Err(e) => Ok(Json(EffectResponse {
input: request.hgvs,
effect: None,
protein_consequence: None,
nmd_prediction: None,
error: Some(format!("Failed to parse input: {}", e)),
processing_time_ms: elapsed_ms,
})),
}
}
fn predict_from_variant(
state: &AppState,
variant: &crate::hgvs::variant::HgvsVariant,
include_nmd: bool,
) -> (
Option<SequenceEffect>,
Option<ProteinConsequence>,
Option<NmdPrediction>,
) {
use crate::hgvs::variant::HgvsVariant;
match variant {
HgvsVariant::Cds(v) => {
let accession = v.accession.to_string();
let cds_pos = extract_cds_position(&v.loc_edit);
let (edit_type, is_frameshift, ref_len, alt_len) =
if let Some(edit) = v.loc_edit.edit.inner() {
analyze_na_edit(edit)
} else {
("unknown", false, 0, 0)
};
let is_intronic = cds_pos.is_intronic();
let effect = predict_cds_effect(edit_type, is_intronic, is_frameshift, &cds_pos);
let protein_consequence = if !is_intronic && !cds_pos.utr3 && cds_pos.base > 0 {
predict_protein_consequence(
state, &accession, &cds_pos, edit_type, ref_len, alt_len,
)
} else {
None
};
let nmd_prediction = if include_nmd {
predict_nmd_for_cds(state, &accession, &cds_pos, is_frameshift, edit_type)
} else {
None
};
(Some(effect), protein_consequence, nmd_prediction)
}
HgvsVariant::Genome(v) => {
if let Some(edit) = v.loc_edit.edit.inner() {
let (edit_type, _, _, _) = analyze_na_edit(edit);
let effect = predict_genomic_effect(edit_type);
(Some(effect), None, None)
} else {
(None, None, None)
}
}
HgvsVariant::Protein(v) => {
let effect = SequenceEffect {
so_term: "SO:0001583".to_string(),
name: "missense_variant".to_string(),
description: "A sequence variant that changes one or more bases".to_string(),
impact: "MODERATE".to_string(),
};
let protein = ProteinConsequence {
hgvs_p: format!("{}", v),
ref_aa: "".to_string(),
alt_aa: "".to_string(),
position: 0,
is_frameshift: false,
};
(Some(effect), Some(protein), None)
}
HgvsVariant::Tx(v) => {
if let Some(edit) = v.loc_edit.edit.inner() {
let (edit_type, _, _, _) = analyze_na_edit(edit);
let effect = SequenceEffect {
so_term: "SO:0001619".to_string(),
name: "non_coding_transcript_variant".to_string(),
description: format!("A {} in a non-coding transcript", edit_type),
impact: "MODIFIER".to_string(),
};
(Some(effect), None, None)
} else {
(None, None, None)
}
}
_ => (None, None, None),
}
}
fn extract_cds_position(
loc_edit: &crate::hgvs::variant::LocEdit<
crate::hgvs::interval::CdsInterval,
crate::hgvs::edit::NaEdit,
>,
) -> CdsPos {
let interval = &loc_edit.location;
if let Some(start) = interval.start.inner() {
CdsPos {
base: start.base,
offset: start.offset,
utr3: start.utr3,
}
} else {
CdsPos::new(1)
}
}
fn analyze_na_edit(edit: &crate::hgvs::edit::NaEdit) -> (&'static str, bool, usize, usize) {
use crate::hgvs::edit::NaEdit;
match edit {
NaEdit::Substitution {
reference,
alternative,
} => (
"substitution",
false,
reference.to_string().len(),
alternative.to_string().len(),
),
NaEdit::SubstitutionNoRef { alternative } => {
("substitution", false, 1, alternative.to_string().len())
}
NaEdit::Deletion { sequence, length } => {
let len = sequence
.as_ref()
.map(|s| s.to_string().len())
.or(length.map(|l| l as usize))
.unwrap_or(1);
("deletion", len % 3 != 0, len, 0)
}
NaEdit::Insertion { sequence } => {
let len = sequence.to_string().len();
("insertion", len % 3 != 0, 0, len)
}
NaEdit::Delins { sequence } => {
let alt_len = sequence.to_string().len();
("delins", true, 0, alt_len) }
NaEdit::Duplication {
sequence, length, ..
} => {
let len = sequence
.as_ref()
.map(|s| s.to_string().len())
.or(length.map(|l| l as usize))
.unwrap_or(1);
("duplication", len % 3 != 0, len, len * 2)
}
NaEdit::Inversion { .. } => ("inversion", true, 0, 0),
NaEdit::Repeat { .. } => ("repeat", false, 0, 0),
NaEdit::Identity { .. } => ("identity", false, 0, 0),
NaEdit::Unknown { .. } => ("unknown", false, 0, 0),
_ => ("other", false, 0, 0),
}
}
fn predict_cds_effect(
edit_type: &str,
is_intronic: bool,
is_frameshift: bool,
cds_pos: &CdsPos,
) -> SequenceEffect {
if is_intronic {
if let Some(offset) = cds_pos.offset {
if offset.abs() <= 2 {
return SequenceEffect {
so_term: "SO:0001629".to_string(),
name: "splice_site_variant".to_string(),
description: "A sequence variant that changes the first two or last two bases of an intron".to_string(),
impact: "HIGH".to_string(),
};
} else if offset.abs() <= 8 {
return SequenceEffect {
so_term: "SO:0001630".to_string(),
name: "splice_region_variant".to_string(),
description: "A sequence variant in which a change has occurred within the region of the splice site".to_string(),
impact: "LOW".to_string(),
};
}
}
return SequenceEffect {
so_term: "SO:0001627".to_string(),
name: "intron_variant".to_string(),
description: "A transcript variant occurring within an intron".to_string(),
impact: "MODIFIER".to_string(),
};
}
if cds_pos.base < 0 {
return SequenceEffect {
so_term: "SO:0001623".to_string(),
name: "5_prime_UTR_variant".to_string(),
description: "A UTR variant of the 5' UTR".to_string(),
impact: "MODIFIER".to_string(),
};
}
if cds_pos.utr3 {
return SequenceEffect {
so_term: "SO:0001624".to_string(),
name: "3_prime_UTR_variant".to_string(),
description: "A UTR variant of the 3' UTR".to_string(),
impact: "MODIFIER".to_string(),
};
}
match edit_type {
"substitution" => SequenceEffect {
so_term: "SO:0001583".to_string(),
name: "missense_variant".to_string(),
description: "A codon change resulting in a different amino acid (predicted)"
.to_string(),
impact: "MODERATE".to_string(),
},
"deletion" => {
if is_frameshift {
SequenceEffect {
so_term: "SO:0001589".to_string(),
name: "frameshift_variant".to_string(),
description:
"A sequence variant which causes a disruption of the translational reading frame"
.to_string(),
impact: "HIGH".to_string(),
}
} else {
SequenceEffect {
so_term: "SO:0001822".to_string(),
name: "inframe_deletion".to_string(),
description:
"An inframe non-synonymous variant that deletes bases from the coding sequence"
.to_string(),
impact: "MODERATE".to_string(),
}
}
}
"insertion" => {
if is_frameshift {
SequenceEffect {
so_term: "SO:0001589".to_string(),
name: "frameshift_variant".to_string(),
description:
"A sequence variant which causes a disruption of the translational reading frame"
.to_string(),
impact: "HIGH".to_string(),
}
} else {
SequenceEffect {
so_term: "SO:0001821".to_string(),
name: "inframe_insertion".to_string(),
description:
"An inframe non-synonymous variant that inserts bases into the coding sequence"
.to_string(),
impact: "MODERATE".to_string(),
}
}
}
"delins" => SequenceEffect {
so_term: "SO:1000032".to_string(),
name: "indel".to_string(),
description: "A sequence alteration which includes both deletion and insertion"
.to_string(),
impact: "MODERATE".to_string(),
},
"duplication" => {
if is_frameshift {
SequenceEffect {
so_term: "SO:0001589".to_string(),
name: "frameshift_variant".to_string(),
description:
"A sequence variant which causes a disruption of the translational reading frame"
.to_string(),
impact: "HIGH".to_string(),
}
} else {
SequenceEffect {
so_term: "SO:1000035".to_string(),
name: "duplication".to_string(),
description:
"An insertion which derives from a copy of a sequence immediately adjacent"
.to_string(),
impact: "MODERATE".to_string(),
}
}
}
"inversion" => SequenceEffect {
so_term: "SO:1000036".to_string(),
name: "inversion".to_string(),
description: "A continuous nucleotide sequence is inverted in the same position"
.to_string(),
impact: "HIGH".to_string(),
},
_ => SequenceEffect {
so_term: "SO:0001580".to_string(),
name: "coding_sequence_variant".to_string(),
description: "A sequence variant that changes the coding sequence".to_string(),
impact: "MODERATE".to_string(),
},
}
}
fn predict_genomic_effect(edit_type: &str) -> SequenceEffect {
match edit_type {
"substitution" => SequenceEffect {
so_term: "SO:0001483".to_string(),
name: "SNV".to_string(),
description: "A single nucleotide variant".to_string(),
impact: "MODIFIER".to_string(),
},
"deletion" => SequenceEffect {
so_term: "SO:0000159".to_string(),
name: "deletion".to_string(),
description: "A sequence alteration where nucleotides are removed".to_string(),
impact: "MODIFIER".to_string(),
},
"insertion" => SequenceEffect {
so_term: "SO:0000667".to_string(),
name: "insertion".to_string(),
description: "The sequence of one or more nucleotides added".to_string(),
impact: "MODIFIER".to_string(),
},
_ => SequenceEffect {
so_term: "SO:0001060".to_string(),
name: "sequence_variant".to_string(),
description: "A sequence variant".to_string(),
impact: "MODIFIER".to_string(),
},
}
}
fn predict_protein_consequence(
state: &AppState,
accession: &str,
cds_pos: &CdsPos,
edit_type: &str,
ref_len: usize,
alt_len: usize,
) -> Option<ProteinConsequence> {
let cdot = state.cdot.as_ref()?;
let cdot_tx = cdot.get_transcript(accession)?;
let prot_position = if cds_pos.base > 0 {
((cds_pos.base - 1) / 3 + 1) as u64
} else {
return None; };
let codon_phase = ((cds_pos.base - 1) % 3) as u8;
let net_change = alt_len as i64 - ref_len as i64;
let is_frameshift = net_change % 3 != 0 && edit_type != "substitution";
let prot_acc = cdot_tx
.protein
.clone()
.unwrap_or_else(|| accession.replace("NM_", "NP_").replace("XM_", "XP_"));
let hgvs_p = if is_frameshift {
format!("{}:p.(?{}fs)", prot_acc, prot_position)
} else if edit_type == "substitution" {
format!("{}:p.(?{}?)", prot_acc, prot_position)
} else if edit_type == "deletion" {
let end_pos = prot_position + (ref_len / 3).max(1) as u64 - 1;
if prot_position == end_pos {
format!("{}:p.(?{}del)", prot_acc, prot_position)
} else {
format!("{}:p.(?{}_?{}del)", prot_acc, prot_position, end_pos)
}
} else if edit_type == "insertion" {
format!(
"{}:p.(?{}_?{}ins?)",
prot_acc,
prot_position,
prot_position + 1
)
} else {
format!("{}:p.(?{}?)", prot_acc, prot_position)
};
Some(ProteinConsequence {
hgvs_p,
ref_aa: format!("?(pos{})", codon_phase + 1), alt_aa: "?".to_string(),
position: prot_position,
is_frameshift,
})
}
fn predict_nmd_for_cds(
state: &AppState,
accession: &str,
cds_pos: &CdsPos,
is_frameshift: bool,
edit_type: &str,
) -> Option<NmdPrediction> {
let cdot = state.cdot.as_ref()?;
let cdot_tx = cdot.get_transcript(accession)?;
let cds_start = cdot_tx.cds_start?;
let cds_end = cdot_tx.cds_end?;
let cds_length = cds_end.saturating_sub(cds_start);
let relative_pos = if cds_pos.base > 0 && cds_length > 0 {
cds_pos.base as f64 / cds_length as f64
} else {
0.0
};
let near_3_end = relative_pos > 0.9;
let (predicted, confidence, reason) = if !is_frameshift && edit_type != "deletion" {
(
false,
0.9,
"Non-frameshift variant unlikely to trigger NMD".to_string(),
)
} else if near_3_end {
(
false,
0.8,
"Variant in last exon region - likely escapes NMD".to_string(),
)
} else if relative_pos < 0.5 && is_frameshift {
(
true,
0.7,
"Early frameshift likely to introduce PTC and trigger NMD".to_string(),
)
} else if is_frameshift {
(
true,
0.5,
"Frameshift may introduce PTC, NMD possible".to_string(),
)
} else {
(
false,
0.3,
"Insufficient information for NMD prediction".to_string(),
)
};
Some(NmdPrediction {
predicted,
confidence,
reason,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_analyze_substitution() {
let result = crate::hgvs::parser::parse_hgvs_lenient("NM_000249.4:c.350C>T").unwrap();
if let crate::hgvs::variant::HgvsVariant::Cds(v) = &result.result {
if let Some(edit) = v.loc_edit.edit.inner() {
let (edit_type, is_frameshift, _, _) = analyze_na_edit(edit);
assert_eq!(edit_type, "substitution");
assert!(!is_frameshift);
}
}
}
#[test]
fn test_analyze_frameshift_deletion() {
let result = crate::hgvs::parser::parse_hgvs_lenient("NM_000249.4:c.350del").unwrap();
if let crate::hgvs::variant::HgvsVariant::Cds(v) = &result.result {
if let Some(edit) = v.loc_edit.edit.inner() {
let (edit_type, is_frameshift, _, _) = analyze_na_edit(edit);
assert_eq!(edit_type, "deletion");
assert!(is_frameshift); }
}
}
#[test]
fn test_analyze_inframe_deletion() {
let result = crate::hgvs::parser::parse_hgvs_lenient("NM_000249.4:c.350_352del").unwrap();
if let crate::hgvs::variant::HgvsVariant::Cds(v) = &result.result {
if let Some(edit) = v.loc_edit.edit.inner() {
let (edit_type, _is_frameshift, _, _) = analyze_na_edit(edit);
assert_eq!(edit_type, "deletion");
}
}
}
#[test]
fn test_predict_splice_site_effect() {
let cds_pos = CdsPos {
base: 117,
offset: Some(-2),
utr3: false,
};
let effect = predict_cds_effect("deletion", true, false, &cds_pos);
assert_eq!(effect.name, "splice_site_variant");
assert_eq!(effect.impact, "HIGH");
}
#[test]
fn test_predict_splice_region_effect() {
let cds_pos = CdsPos {
base: 117,
offset: Some(-5),
utr3: false,
};
let effect = predict_cds_effect("substitution", true, false, &cds_pos);
assert_eq!(effect.name, "splice_region_variant");
assert_eq!(effect.impact, "LOW");
}
#[test]
fn test_predict_intron_effect() {
let cds_pos = CdsPos {
base: 117,
offset: Some(-50),
utr3: false,
};
let effect = predict_cds_effect("substitution", true, false, &cds_pos);
assert_eq!(effect.name, "intron_variant");
assert_eq!(effect.impact, "MODIFIER");
}
#[test]
fn test_predict_5_utr_effect() {
let cds_pos = CdsPos {
base: -10,
offset: None,
utr3: false,
};
let effect = predict_cds_effect("substitution", false, false, &cds_pos);
assert_eq!(effect.name, "5_prime_UTR_variant");
}
#[test]
fn test_predict_3_utr_effect() {
let cds_pos = CdsPos {
base: 10,
offset: None,
utr3: true,
};
let effect = predict_cds_effect("substitution", false, false, &cds_pos);
assert_eq!(effect.name, "3_prime_UTR_variant");
}
#[test]
fn test_predict_frameshift_effect() {
let cds_pos = CdsPos::new(350);
let effect = predict_cds_effect("deletion", false, true, &cds_pos);
assert_eq!(effect.name, "frameshift_variant");
assert_eq!(effect.impact, "HIGH");
}
}