#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OldSubstitutionFormat {
ArrowSubstitution,
SinglePositionInsertion,
}
pub fn parse_old_substitution(input: &str) -> Option<(OldSubstitutionFormat, String)> {
convert_old_substitution(input).map(|s| (OldSubstitutionFormat::ArrowSubstitution, s))
}
pub fn convert_old_substitution(input: &str) -> Option<String> {
let coord_start = input.find(['c', 'g', 'n', 'm', 'o'])?;
let after_type = &input[coord_start..];
if after_type.chars().nth(1).is_none_or(|c| c != '.') {
return None;
}
let underscore_pos = after_type.find('_')?;
let arrow_pos = after_type.find('>')?;
if arrow_pos <= underscore_pos {
return None;
}
let between = &after_type[underscore_pos + 1..arrow_pos];
if between.is_empty() || !between.chars().next()?.is_ascii_digit() {
return None;
}
if after_type[..arrow_pos].contains("del") {
return None;
}
let sequence = &after_type[arrow_pos + 1..];
if sequence.is_empty() || !sequence.chars().all(|c| "ACGTacgtNn".contains(c)) {
return None;
}
let prefix = &input[..coord_start];
let before_arrow = &after_type[..arrow_pos];
Some(format!("{}{}delins{}", prefix, before_arrow, sequence))
}
#[allow(dead_code)]
pub fn convert_single_position_insertion(input: &str) -> Option<String> {
let ins_pos = input.find("ins")?;
let before_ins = &input[..ins_pos];
let coord_marker = before_ins.rfind(['c', 'g', 'n', 'm', 'o'])?;
if before_ins.get(coord_marker + 1..coord_marker + 2) != Some(".") {
return None;
}
let pos_part = &before_ins[coord_marker + 2..];
if pos_part.contains('_') {
return None; }
let pos_start = before_ins.rfind(|c: char| !c.is_ascii_digit())?;
let position: u64 = before_ins[pos_start + 1..].parse().ok()?;
let prefix = &before_ins[..=pos_start];
let after_ins = &input[ins_pos..];
Some(format!(
"{}{}_{}{}",
prefix,
position,
position + 1,
after_ins
))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_old_substitution_simple() {
let result = convert_old_substitution("NM_000088.3:c.100_102>ATG");
assert_eq!(result, Some("NM_000088.3:c.100_102delinsATG".to_string()));
}
#[test]
fn test_old_substitution_genomic() {
let result = convert_old_substitution("NC_000001.11:g.12345_12350>ACGTAC");
assert_eq!(
result,
Some("NC_000001.11:g.12345_12350delinsACGTAC".to_string())
);
}
#[test]
fn test_old_substitution_lowercase() {
let result = convert_old_substitution("NM_000088.3:c.100_102>atg");
assert_eq!(result, Some("NM_000088.3:c.100_102delinsatg".to_string()));
}
#[test]
fn test_old_substitution_not_interval() {
let result = convert_old_substitution("NM_000088.3:c.100A>G");
assert_eq!(result, None);
}
#[test]
fn test_old_substitution_already_delins() {
let result = convert_old_substitution("NM_000088.3:c.100_102delinsATG");
assert_eq!(result, None);
}
#[test]
fn test_single_position_insertion() {
let result = convert_single_position_insertion("NM_000088.3:c.100insA");
assert_eq!(result, Some("NM_000088.3:c.100_101insA".to_string()));
}
#[test]
fn test_single_position_insertion_multi() {
let result = convert_single_position_insertion("c.500insACGT");
assert_eq!(result, Some("c.500_501insACGT".to_string()));
}
#[test]
fn test_already_two_position_insertion() {
let result = convert_single_position_insertion("c.100_101insA");
assert_eq!(result, None);
}
}