use crate::error::ParseHgvsError;
struct DiagnosticMatcher {
code: &'static str,
message: &'static str,
detect: fn(&str) -> Option<String>,
}
const UNSUPPORTED_MATCHERS: &[DiagnosticMatcher] = &[
DiagnosticMatcher {
code: "unsupported.rna_adjoined_transcript",
message: "RNA adjoined transcript syntax is not supported yet",
detect: rna_adjoined_transcript_fragment,
},
DiagnosticMatcher {
code: "unsupported.rna_splicing_outcome",
message: "RNA splicing outcome containers are not supported yet",
detect: rna_splicing_outcome_fragment,
},
DiagnosticMatcher {
code: "unsupported.telomeric_position",
message: "telomeric positions such as pter and qter are not supported yet",
detect: telomeric_position_fragment,
},
DiagnosticMatcher {
code: "unsupported.epigenetic_edit",
message: "epigenetic edit syntax is not supported yet",
detect: epigenetic_edit_fragment,
},
DiagnosticMatcher {
code: "unsupported.protein_insertion_payload",
message: "quantified or terminal protein insertion payloads are not supported yet",
detect: protein_insertion_payload_fragment,
},
DiagnosticMatcher {
code: "unsupported.protein_uncertain_consequence",
message: "uncertain protein consequence syntax is not supported yet",
detect: protein_uncertain_consequence_fragment,
},
DiagnosticMatcher {
code: "unsupported.rna_special_state",
message: "RNA consequence states such as r.spl, r.?, and r.0 are not supported yet",
detect: rna_special_state_fragment,
},
DiagnosticMatcher {
code: "unsupported.uncertain_size",
message: "uncertain HGVS size syntax is not supported yet",
detect: uncertain_size_fragment,
},
DiagnosticMatcher {
code: "unsupported.allele_unknown_variant",
message: "allele variants written as [?] are not supported yet",
detect: allele_unknown_variant_fragment,
},
DiagnosticMatcher {
code: "unsupported.alternate_allele_state",
message: "alternate allele states are not supported yet",
detect: alternate_allele_state_fragment,
},
DiagnosticMatcher {
code: "unsupported.one_allele_multi_protein",
message: "one protein allele encoding more than one protein is not supported yet",
detect: one_allele_multi_protein_fragment,
},
DiagnosticMatcher {
code: "unsupported.allele_uncertain_variant_state",
message: "uncertain allele variant states are not supported yet",
detect: allele_uncertain_variant_state_fragment,
},
DiagnosticMatcher {
code: "unsupported.allele",
message: "allele syntax is not supported yet",
detect: allele_fragment,
},
];
pub(crate) fn classify_parse_failure(input: &str) -> ParseHgvsError {
for matcher in UNSUPPORTED_MATCHERS {
if let Some(fragment) = (matcher.detect)(input) {
return ParseHgvsError::unsupported(
matcher.code,
matcher.message,
input,
Some(fragment),
);
}
}
ParseHgvsError::invalid(input)
}
fn rna_adjoined_transcript_fragment(input: &str) -> Option<String> {
let description = coordinate_description_fragment(input, "r.")?;
(description.contains("::") || input.contains("::")).then(|| "::".to_string())
}
fn rna_splicing_outcome_fragment(input: &str) -> Option<String> {
let description = coordinate_description_fragment(input, "r.")?;
let has_context = input.contains("):r.");
if !has_context {
return None;
}
if description.starts_with('[') {
Some("r.[...]".to_string())
} else if description.starts_with('(') {
Some("r.(...)".to_string())
} else if description.starts_with('?') {
Some("r.?".to_string())
} else if description.starts_with("spl") {
Some("r.spl".to_string())
} else {
None
}
}
fn telomeric_position_fragment(input: &str) -> Option<String> {
if input.contains("pter") {
Some("pter".to_string())
} else if input.contains("qter") {
Some("qter".to_string())
} else {
None
}
}
fn epigenetic_edit_fragment(input: &str) -> Option<String> {
let description = variant_description_fragment(input)?;
description
.split_once('|')
.map(|(_, modifier)| format!("|{modifier}"))
}
fn protein_insertion_payload_fragment(input: &str) -> Option<String> {
let description = protein_description_fragment(input)?;
if !description.contains("ins") {
return None;
}
if description.contains("Xaa[") {
Some("Xaa[...]".to_string())
} else if description.contains("ins*") {
Some("*".to_string())
} else {
None
}
}
fn protein_uncertain_consequence_fragment(input: &str) -> Option<String> {
let description = protein_description_fragment(input)?;
if description.starts_with('[') {
return None;
}
if description.contains('^') {
Some("^".to_string())
} else if description.contains("[(") {
Some("[(...)]".to_string())
} else {
None
}
}
fn rna_special_state_fragment(input: &str) -> Option<String> {
let description = coordinate_description_fragment(input, "r.")?;
if description == "?" {
Some("r.?".to_string())
} else if description.starts_with("spl") {
Some("r.spl".to_string())
} else if description.starts_with('0') {
Some("r.0".to_string())
} else if description.starts_with('(') && description.ends_with(')') {
Some("r.(...)".to_string())
} else if description.contains("=/") || description.contains("//") {
Some("=/".to_string())
} else {
None
}
}
fn uncertain_size_fragment(input: &str) -> Option<String> {
let description = variant_description_fragment(input)?;
if protein_description_fragment(input).is_some() && description.contains('[') {
return None;
}
if description.contains("[(") {
Some("[(...)]".to_string())
} else {
None
}
}
fn allele_unknown_variant_fragment(input: &str) -> Option<String> {
let description = variant_description_fragment(input)?;
description.contains("[?]").then(|| "[?]".to_string())
}
fn allele_uncertain_variant_state_fragment(input: &str) -> Option<String> {
let description = variant_description_fragment(input)?;
description.contains("(;)(").then(|| "(;)(...)".to_string())
}
fn alternate_allele_state_fragment(input: &str) -> Option<String> {
let description = protein_description_fragment(input)?;
description.contains('^').then(|| "^".to_string())
}
fn one_allele_multi_protein_fragment(input: &str) -> Option<String> {
let description = protein_description_fragment(input)?;
description.contains(',').then(|| ",".to_string())
}
fn allele_fragment(input: &str) -> Option<String> {
if protein_description_fragment(input).is_some() {
return None;
}
let description = variant_description_fragment(input)?;
allele_like_fragment(description)
}
fn allele_like_fragment(description: &str) -> Option<String> {
if description.contains("=/") {
Some("=/".to_string())
} else if description.contains("];[") {
Some("];[".to_string())
} else {
None
}
}
fn protein_description_fragment(input: &str) -> Option<&str> {
coordinate_description_fragment(input, "p.")
}
fn variant_description_fragment(input: &str) -> Option<&str> {
["g.", "o.", "m.", "c.", "n.", "r.", "p."]
.into_iter()
.find_map(|marker| coordinate_description_fragment(input, marker))
}
fn coordinate_description_fragment<'a>(input: &'a str, marker: &str) -> Option<&'a str> {
if let Some(rest) = input.strip_prefix(marker) {
return Some(rest);
}
let needle = format!(":{marker}");
input
.find(&needle)
.map(|index| &input[index + needle.len()..])
}