use crate::error::ParseHgvsError;
struct DiagnosticMatcher {
code: &'static str,
message: &'static str,
detect: fn(&str) -> Option<String>,
}
const UNSUPPORTED_MATCHERS: &[DiagnosticMatcher] = &[
DiagnosticMatcher {
code: "unsupported.rna_adjoined_transcript",
message: "RNA adjoined transcript syntax is not supported yet",
detect: rna_adjoined_transcript_fragment,
},
DiagnosticMatcher {
code: "unsupported.rna_splicing_outcome",
message: "RNA splicing outcome containers are not supported yet",
detect: rna_splicing_outcome_fragment,
},
DiagnosticMatcher {
code: "unsupported.telomeric_position",
message: "telomeric positions such as pter and qter are not supported yet",
detect: telomeric_position_fragment,
},
DiagnosticMatcher {
code: "unsupported.epigenetic_edit",
message: "epigenetic edit syntax is not supported yet",
detect: epigenetic_edit_fragment,
},
DiagnosticMatcher {
code: "unsupported.protein_extension",
message: "protein extension syntax is not supported yet",
detect: protein_extension_fragment,
},
DiagnosticMatcher {
code: "unsupported.protein_insertion_payload",
message: "quantified or terminal protein insertion payloads are not supported yet",
detect: protein_insertion_payload_fragment,
},
DiagnosticMatcher {
code: "unsupported.protein_uncertain_consequence",
message: "uncertain protein consequence syntax is not supported yet",
detect: protein_uncertain_consequence_fragment,
},
DiagnosticMatcher {
code: "unsupported.rna_uncertain_position",
message: "RNA variants with uncertain positions are not supported yet",
detect: rna_uncertain_position_fragment,
},
DiagnosticMatcher {
code: "unsupported.rna_special_state",
message: "RNA consequence states such as r.spl, r.?, and r.0 are not supported yet",
detect: rna_special_state_fragment,
},
DiagnosticMatcher {
code: "unsupported.cdna_offset_anchor",
message: "coding-DNA positions anchored to CDS start/end with additional offsets are not supported yet",
detect: cdna_offset_anchor_fragment,
},
DiagnosticMatcher {
code: "unsupported.uncertain_range",
message: "uncertain HGVS ranges are not supported yet",
detect: uncertain_range_fragment,
},
DiagnosticMatcher {
code: "unsupported.allele",
message: "allele syntax is not supported yet",
detect: allele_fragment,
},
];
pub(crate) fn classify_parse_failure(input: &str) -> ParseHgvsError {
for matcher in UNSUPPORTED_MATCHERS {
if let Some(fragment) = (matcher.detect)(input) {
return ParseHgvsError::unsupported(
matcher.code,
matcher.message,
input,
Some(fragment),
);
}
}
ParseHgvsError::invalid(input)
}
fn rna_adjoined_transcript_fragment(input: &str) -> Option<String> {
let description = coordinate_description_fragment(input, "r.")?;
(description.contains("::") || input.contains("::")).then(|| "::".to_string())
}
fn rna_splicing_outcome_fragment(input: &str) -> Option<String> {
let description = coordinate_description_fragment(input, "r.")?;
let has_context = input.contains("):r.");
if !has_context {
return None;
}
if description.starts_with('[') {
Some("r.[...]".to_string())
} else if description.starts_with('(') {
Some("r.(...)".to_string())
} else if description.starts_with('?') {
Some("r.?".to_string())
} else if description.starts_with("spl") {
Some("r.spl".to_string())
} else {
None
}
}
fn telomeric_position_fragment(input: &str) -> Option<String> {
if input.contains("pter") {
Some("pter".to_string())
} else if input.contains("qter") {
Some("qter".to_string())
} else {
None
}
}
fn epigenetic_edit_fragment(input: &str) -> Option<String> {
let description = variant_description_fragment(input)?;
description
.split_once('|')
.map(|(_, modifier)| format!("|{modifier}"))
}
fn protein_extension_fragment(input: &str) -> Option<String> {
let description = protein_description_fragment(input)?;
description.contains("ext").then(|| "ext".to_string())
}
fn protein_insertion_payload_fragment(input: &str) -> Option<String> {
let description = protein_description_fragment(input)?;
if !description.contains("ins") {
return None;
}
if description.contains("Xaa[") {
Some("Xaa[...]".to_string())
} else if description.contains("ins*") {
Some("*".to_string())
} else {
None
}
}
fn protein_uncertain_consequence_fragment(input: &str) -> Option<String> {
let description = protein_description_fragment(input)?;
if description.contains('^') {
Some("^".to_string())
} else if description.contains("[(") {
Some("[(...)]".to_string())
} else {
None
}
}
fn rna_uncertain_position_fragment(input: &str) -> Option<String> {
let description = coordinate_description_fragment(input, "r.")?;
(description.starts_with('(')
&& (description.contains("ins")
|| description.contains("del")
|| description.contains("dup")
|| description.contains("inv")))
.then(|| "r.(...)".to_string())
}
fn rna_special_state_fragment(input: &str) -> Option<String> {
let description = coordinate_description_fragment(input, "r.")?;
if description.starts_with('?') {
Some("r.?".to_string())
} else if description.starts_with("spl") {
Some("r.spl".to_string())
} else if description.starts_with('0') {
Some("r.0".to_string())
} else if description.starts_with('(') {
Some("r.(...)".to_string())
} else if description.contains("=/") || description.contains("//") {
Some("=/".to_string())
} else {
None
}
}
fn cdna_offset_anchor_fragment(input: &str) -> Option<String> {
let description = coordinate_description_fragment(input, "c.")?;
let first = description
.split_once('_')
.map_or(description, |(start, _)| start);
if let Some(fragment) = anchored_offset_position_fragment(first) {
return Some(fragment);
}
description
.split_once('_')
.and_then(|(_, end)| anchored_offset_position_fragment(end))
}
fn uncertain_range_fragment(input: &str) -> Option<String> {
let description = variant_description_fragment(input)?;
if description.contains('^') {
Some("^".to_string())
} else if description.contains("[(") {
Some("[(...)]".to_string())
} else if description.contains("(?") || description.contains("?)") {
Some("(?)".to_string())
} else if description.starts_with('(') || description.contains("_(") {
Some("(".to_string())
} else {
None
}
}
fn allele_fragment(input: &str) -> Option<String> {
let description = variant_description_fragment(input)?;
if description.contains("=/") {
Some("=/".to_string())
} else if description.contains("(;)") {
Some("(;)".to_string())
} else if description.contains("];[") {
Some("];[".to_string())
} else if description.starts_with('[')
&& (description.contains(';') || description.contains(','))
{
Some("[".to_string())
} else {
None
}
}
fn anchored_offset_position_fragment(position: &str) -> Option<String> {
let mut characters = position.char_indices();
let (_, anchor) = characters.next()?;
if !matches!(anchor, '-' | '*') {
return None;
}
let mut offset_start = None;
for (index, character) in characters.by_ref() {
if character.is_ascii_digit() {
continue;
}
if matches!(character, '+' | '-') {
offset_start = Some(index);
}
break;
}
let offset_start = offset_start?;
let mut end = offset_start + 1;
for (index, character) in characters {
if character.is_ascii_digit() {
end = index + character.len_utf8();
continue;
}
break;
}
(end > offset_start + 1).then(|| position[..end].to_string())
}
fn protein_description_fragment(input: &str) -> Option<&str> {
coordinate_description_fragment(input, "p.")
}
fn variant_description_fragment(input: &str) -> Option<&str> {
["g.", "o.", "m.", "c.", "n.", "r.", "p."]
.into_iter()
.find_map(|marker| coordinate_description_fragment(input, marker))
}
fn coordinate_description_fragment<'a>(input: &'a str, marker: &str) -> Option<&'a str> {
if let Some(rest) = input.strip_prefix(marker) {
return Some(rest);
}
let needle = format!(":{marker}");
input
.find(&needle)
.map(|index| &input[index + needle.len()..])
}