use nom::branch::alt;
use nom::bytes::complete::{tag, take_while1};
use nom::character::complete::{char, digit1};
use nom::combinator::{all_consuming, map, map_res, opt, value};
use nom::multi::{many0, many1, separated_list1};
use nom::sequence::{delimited, pair, preceded};
use nom::{IResult, Parser};
use crate::diagnostics::classify_parse_failure;
use crate::error::ParseHgvsError;
use crate::model::{
Accession, CoordinateSystem, CopiedSequenceItem, HgvsVariant, Interval, LiteralSequenceItem,
NucleotideAnchor, NucleotideCoordinate, NucleotideEdit, NucleotideRepeatBlock,
NucleotideSequenceItem, NucleotideVariant, ProteinCoordinate, ProteinEdit, ProteinEffect,
ProteinExtensionEdit, ProteinExtensionTerminal, ProteinFrameshiftStop,
ProteinFrameshiftStopKind, ProteinSequence, ProteinVariant, ReferenceSpec, RepeatSequenceItem,
VariantDescription,
};
type ParseResult<'a, T> = IResult<&'a str, T>;
const PROTEIN_SYMBOLS: &[&str] = &[
"Ter", "Sec", "Pyl", "Xaa", "Ala", "Arg", "Asn", "Asp", "Cys", "Gln", "Glu", "Gly", "His",
"Ile", "Leu", "Lys", "Met", "Phe", "Pro", "Ser", "Thr", "Trp", "Tyr", "Val", "*", "A", "R",
"N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
];
pub fn parse_hgvs(input: &str) -> Result<HgvsVariant, ParseHgvsError> {
let input = input.trim();
all_consuming(hgvs_variant)
.parse(input)
.map(|(_, variant)| variant)
.map_err(|_| classify_parse_failure(input))
}
fn hgvs_variant(input: &str) -> ParseResult<'_, HgvsVariant> {
alt((variant_with_reference, protein_variant_without_reference)).parse(input)
}
fn variant_with_reference(input: &str) -> ParseResult<'_, HgvsVariant> {
let (input, reference) = reference_spec(input)?;
let (input, _) = char(':')(input)?;
let (input, coordinate_system) = coordinate_system(input)?;
let (input, _) = char('.')(input)?;
let (input, description) = variant_description(coordinate_system, input)?;
Ok((
input,
build_variant(Some(reference), coordinate_system, description),
))
}
fn protein_variant_without_reference(input: &str) -> ParseResult<'_, HgvsVariant> {
let (input, _) = tag("p.")(input)?;
let (input, description) = protein_description(input)?;
Ok((
input,
build_variant(None, CoordinateSystem::Protein, description),
))
}
fn build_variant(
reference: Option<ReferenceSpec>,
coordinate_system: CoordinateSystem,
description: VariantDescription,
) -> HgvsVariant {
HgvsVariant {
reference,
coordinate_system,
description,
}
}
fn variant_description(
coordinate_system: CoordinateSystem,
input: &str,
) -> ParseResult<'_, VariantDescription> {
if coordinate_system.is_protein() {
protein_description(input)
} else {
nucleotide_description(coordinate_system, input)
}
}
fn reference_spec(input: &str) -> ParseResult<'_, ReferenceSpec> {
map(
pair(accession, opt(delimited(char('('), accession, char(')')))),
|(primary, context)| ReferenceSpec {
primary: Accession::new(primary),
context: context.map(Accession::new),
},
)
.parse(input)
}
fn accession(input: &str) -> ParseResult<'_, String> {
map(
take_while1(|c: char| c.is_ascii_alphanumeric() || matches!(c, '_' | '.')),
str::to_string,
)
.parse(input)
}
fn coordinate_system(input: &str) -> ParseResult<'_, CoordinateSystem> {
alt((
value(CoordinateSystem::Genomic, char('g')),
value(CoordinateSystem::CircularGenomic, char('o')),
value(CoordinateSystem::Mitochondrial, char('m')),
value(CoordinateSystem::CodingDna, char('c')),
value(CoordinateSystem::NonCodingDna, char('n')),
value(CoordinateSystem::Rna, char('r')),
value(CoordinateSystem::Protein, char('p')),
))
.parse(input)
}
fn nucleotide_description(
coordinate_system: CoordinateSystem,
input: &str,
) -> ParseResult<'_, VariantDescription> {
let (input, initial_location) = nucleotide_interval(input)?;
let (input, edit) = nucleotide_edit(input)?;
if !is_valid_nucleotide_repeat(coordinate_system, &initial_location, &edit) {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
let location = resolve_nucleotide_location(&initial_location, &edit);
Ok((
input,
VariantDescription::Nucleotide(NucleotideVariant { location, edit }),
))
}
fn nucleotide_interval(input: &str) -> ParseResult<'_, Interval<NucleotideCoordinate>> {
alt((
map(
pair(
nucleotide_coordinate,
preceded(char('_'), nucleotide_coordinate),
),
|(start, end)| Interval {
start,
end: Some(end),
},
),
map(nucleotide_coordinate, |start| Interval { start, end: None }),
))
.parse(input)
}
fn nucleotide_coordinate(input: &str) -> ParseResult<'_, NucleotideCoordinate> {
alt((
map(preceded(char('-'), parse_i32), |coordinate| {
NucleotideCoordinate {
anchor: NucleotideAnchor::RelativeCdsStart,
coordinate: -coordinate,
offset: 0,
}
}),
map(preceded(char('*'), parse_i32), |coordinate| {
NucleotideCoordinate {
anchor: NucleotideAnchor::RelativeCdsEnd,
coordinate,
offset: 0,
}
}),
map(
pair(parse_i32, opt(pair(alt((char('+'), char('-'))), parse_i32))),
|(coordinate, offset)| {
let offset = offset
.map(|(sign, value)| if sign == '-' { -value } else { value })
.unwrap_or(0);
NucleotideCoordinate {
anchor: NucleotideAnchor::Absolute,
coordinate,
offset,
}
},
),
))
.parse(input)
}
fn parse_i32(input: &str) -> ParseResult<'_, i32> {
map_res(digit1, str::parse::<i32>).parse(input)
}
fn parse_usize(input: &str) -> ParseResult<'_, usize> {
map_res(digit1, str::parse::<usize>).parse(input)
}
fn nucleotide_edit(input: &str) -> ParseResult<'_, NucleotideEdit> {
alt((
value(NucleotideEdit::NoChange, char('=')),
map(
preceded(tag("delins"), nucleotide_sequence_items),
|items| NucleotideEdit::DeletionInsertion { items },
),
value(NucleotideEdit::Deletion, tag("del")),
value(NucleotideEdit::Duplication, tag("dup")),
nucleotide_repeat_without_sequence,
nucleotide_repeat_with_sequence,
map(preceded(tag("ins"), nucleotide_sequence_items), |items| {
NucleotideEdit::Insertion { items }
}),
value(NucleotideEdit::Inversion, tag("inv")),
map(
pair(nucleotide_literal, preceded(char('>'), nucleotide_literal)),
|(reference, alternate)| NucleotideEdit::Substitution {
reference,
alternate,
},
),
))
.parse(input)
}
fn nucleotide_repeat_without_sequence(input: &str) -> ParseResult<'_, NucleotideEdit> {
map(
pair(repeat_count_only_block, many0(repeat_located_count_block)),
|(first, rest)| {
let mut blocks = Vec::with_capacity(rest.len() + 1);
blocks.push(first);
blocks.extend(rest);
NucleotideEdit::Repeat { blocks }
},
)
.parse(input)
}
fn nucleotide_repeat_with_sequence(input: &str) -> ParseResult<'_, NucleotideEdit> {
map(many1(repeat_sequence_block), |blocks| {
NucleotideEdit::Repeat { blocks }
})
.parse(input)
}
fn nucleotide_sequence_items(input: &str) -> ParseResult<'_, Vec<NucleotideSequenceItem>> {
map(
alt((
delimited(
char('['),
separated_list1(char(';'), nucleotide_sequence_item),
char(']'),
),
map(nucleotide_sequence_item, |item| vec![item]),
)),
|items| items,
)
.parse(input)
}
fn nucleotide_sequence_item(input: &str) -> ParseResult<'_, NucleotideSequenceItem> {
alt((
map(sequence_repeat, NucleotideSequenceItem::Repeat),
map(sequence_segment, NucleotideSequenceItem::Copied),
map(nucleotide_literal, |value| {
NucleotideSequenceItem::Literal(LiteralSequenceItem { value })
}),
))
.parse(input)
}
fn sequence_repeat(input: &str) -> ParseResult<'_, RepeatSequenceItem> {
map(
pair(
nucleotide_literal,
delimited(char('['), parse_usize, char(']')),
),
|(unit, count)| RepeatSequenceItem { unit, count },
)
.parse(input)
}
fn repeat_sequence_block(input: &str) -> ParseResult<'_, NucleotideRepeatBlock> {
map(
pair(
nucleotide_literal,
delimited(char('['), parse_usize, char(']')),
),
|(unit, count)| NucleotideRepeatBlock {
count,
unit: Some(unit),
location: None,
},
)
.parse(input)
}
fn repeat_count_only_block(input: &str) -> ParseResult<'_, NucleotideRepeatBlock> {
map(delimited(char('['), parse_usize, char(']')), |count| {
NucleotideRepeatBlock {
count,
unit: None,
location: None,
}
})
.parse(input)
}
fn repeat_located_count_block(input: &str) -> ParseResult<'_, NucleotideRepeatBlock> {
map(
pair(
nucleotide_interval,
delimited(char('['), parse_usize, char(']')),
),
|(location, count)| NucleotideRepeatBlock {
count,
unit: None,
location: Some(location),
},
)
.parse(input)
}
fn sequence_segment(input: &str) -> ParseResult<'_, CopiedSequenceItem> {
alt((remote_sequence_segment, current_reference_sequence_segment)).parse(input)
}
fn current_reference_sequence_segment(input: &str) -> ParseResult<'_, CopiedSequenceItem> {
map(
pair(nucleotide_interval, opt(tag("inv"))),
|(source_location, is_inverted)| CopiedSequenceItem {
source_reference: None,
source_coordinate_system: None,
source_location,
is_inverted: is_inverted.is_some(),
},
)
.parse(input)
}
fn remote_sequence_segment(input: &str) -> ParseResult<'_, CopiedSequenceItem> {
map(
(
reference_spec,
char(':'),
coordinate_system,
char('.'),
nucleotide_interval,
opt(tag("inv")),
),
|(source_reference, _, source_coordinate_system, _, source_location, is_inverted)| {
CopiedSequenceItem {
source_reference: Some(source_reference),
source_coordinate_system: Some(source_coordinate_system),
source_location,
is_inverted: is_inverted.is_some(),
}
},
)
.parse(input)
}
fn nucleotide_literal(input: &str) -> ParseResult<'_, String> {
map(
take_while1(|c: char| c.is_ascii_alphabetic()),
str::to_string,
)
.parse(input)
}
fn protein_description(input: &str) -> ParseResult<'_, VariantDescription> {
alt((
map(delimited(char('('), protein_effect, char(')')), |effect| {
VariantDescription::Protein(ProteinVariant {
is_predicted: true,
effect,
})
}),
map(protein_effect, |effect| {
VariantDescription::Protein(ProteinVariant {
is_predicted: false,
effect,
})
}),
))
.parse(input)
}
fn protein_effect(input: &str) -> ParseResult<'_, ProteinEffect> {
alt((
value(ProteinEffect::Unknown, char('?')),
value(ProteinEffect::NoProteinProduced, char('0')),
map_res(
pair(protein_interval, protein_edit),
build_protein_edit_effect,
),
))
.parse(input)
}
fn build_protein_edit_effect(
(location, edit): (Interval<ProteinCoordinate>, ProteinEdit),
) -> Result<ProteinEffect, ()> {
let location = resolve_protein_effect_location(&location, &edit).ok_or(())?;
Ok(ProteinEffect::Edit { location, edit })
}
fn protein_interval(input: &str) -> ParseResult<'_, Interval<ProteinCoordinate>> {
alt((
map(
pair(protein_coordinate, preceded(char('_'), protein_coordinate)),
|(start, end)| Interval {
start,
end: Some(end),
},
),
map(protein_coordinate, |start| Interval { start, end: None }),
))
.parse(input)
}
fn protein_coordinate(input: &str) -> ParseResult<'_, ProteinCoordinate> {
map(pair(protein_symbol, parse_i32), |(residue, ordinal)| {
ProteinCoordinate { residue, ordinal }
})
.parse(input)
}
fn protein_edit(input: &str) -> ParseResult<'_, ProteinEdit> {
alt((
value(ProteinEdit::Unknown, char('?')),
value(ProteinEdit::NoChange, char('=')),
map(preceded(tag("delins"), protein_sequence), |sequence| {
ProteinEdit::DeletionInsertion { sequence }
}),
value(ProteinEdit::Deletion, tag("del")),
value(ProteinEdit::Duplication, tag("dup")),
map(delimited(char('['), parse_usize, char(']')), |count| {
ProteinEdit::Repeat { count }
}),
protein_extension_edit,
protein_frameshift_edit,
map(preceded(tag("ins"), protein_sequence), |sequence| {
ProteinEdit::Insertion { sequence }
}),
map(protein_symbol, |to| ProteinEdit::Substitution { to }),
))
.parse(input)
}
fn protein_extension_edit(input: &str) -> ParseResult<'_, ProteinEdit> {
alt((
map(
preceded(tag("ext"), protein_n_terminal_extension_ordinal),
|terminal_ordinal| {
ProteinEdit::Extension(ProteinExtensionEdit {
to_terminal: ProteinExtensionTerminal::N,
to_residue: None,
terminal_ordinal: Some(terminal_ordinal),
})
},
),
map(
pair(
protein_extension_residue,
protein_c_terminal_extension_state,
),
|(to_residue, terminal_ordinal)| {
ProteinEdit::Extension(ProteinExtensionEdit {
to_terminal: ProteinExtensionTerminal::C,
to_residue: Some(to_residue),
terminal_ordinal,
})
},
),
))
.parse(input)
}
fn protein_n_terminal_extension_ordinal(input: &str) -> ParseResult<'_, i32> {
map(preceded(char('-'), parse_i32), |ordinal| -ordinal).parse(input)
}
fn protein_extension_residue(input: &str) -> ParseResult<'_, String> {
let (input, residue) = protein_symbol(input)?;
if residue == "Ter" {
Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)))
} else {
Ok((input, residue))
}
}
fn protein_c_terminal_extension_state(input: &str) -> ParseResult<'_, Option<i32>> {
preceded(
tag("ext"),
alt((
value(None, pair(alt((tag("Ter"), tag("*"))), char('?'))),
map(preceded(alt((tag("Ter"), tag("*"))), parse_i32), Some),
)),
)
.parse(input)
}
fn protein_frameshift_edit(input: &str) -> ParseResult<'_, ProteinEdit> {
alt((
map(
pair(
protein_frameshift_residue,
pair(tag("fs"), protein_frameshift_stop),
),
|(to_residue, (_, stop))| ProteinEdit::Frameshift {
to_residue: Some(to_residue),
stop,
},
),
value(
ProteinEdit::Frameshift {
to_residue: None,
stop: ProteinFrameshiftStop {
ordinal: None,
kind: ProteinFrameshiftStopKind::Omitted,
},
},
tag("fs"),
),
))
.parse(input)
}
fn protein_frameshift_stop(input: &str) -> ParseResult<'_, ProteinFrameshiftStop> {
alt((
value(
ProteinFrameshiftStop {
ordinal: None,
kind: ProteinFrameshiftStopKind::Unknown,
},
pair(alt((tag("Ter"), tag("*"))), char('?')),
),
map(
preceded(alt((tag("Ter"), tag("*"))), parse_usize),
|ordinal| ProteinFrameshiftStop {
ordinal: Some(ordinal),
kind: ProteinFrameshiftStopKind::Known,
},
),
))
.parse(input)
}
fn protein_frameshift_residue(input: &str) -> ParseResult<'_, String> {
let (input, residue) = protein_symbol(input)?;
if residue == "Ter" {
Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)))
} else {
Ok((input, residue))
}
}
fn protein_sequence(input: &str) -> ParseResult<'_, ProteinSequence> {
map(many1(protein_symbol), |residues| ProteinSequence {
residues,
})
.parse(input)
}
fn protein_symbol(input: &str) -> ParseResult<'_, String> {
for symbol in PROTEIN_SYMBOLS {
if let Some(rest) = input.strip_prefix(symbol) {
return Ok((rest, normalize_protein_symbol(symbol)));
}
}
Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)))
}
fn normalize_protein_symbol(symbol: &str) -> String {
if symbol == "*" {
"Ter".to_string()
} else {
symbol.to_string()
}
}
fn resolve_protein_effect_location(
location: &Interval<ProteinCoordinate>,
edit: &ProteinEdit,
) -> Option<Interval<ProteinCoordinate>> {
let ProteinEdit::Extension(extension) = edit else {
return Some(location.clone());
};
if location.end.is_some() {
return None;
}
let mut start = location.start.clone();
match extension.to_terminal {
ProteinExtensionTerminal::N => {
if start.residue != "Met"
|| start.ordinal != 1
|| extension.to_residue.is_some()
|| !matches!(extension.terminal_ordinal, Some(ordinal) if ordinal < 0)
{
return None;
}
}
ProteinExtensionTerminal::C => {
if start.residue != "Ter"
|| extension.to_residue.is_none()
|| matches!(extension.terminal_ordinal, Some(ordinal) if ordinal <= 0)
{
return None;
}
start.residue = "Ter".to_string();
}
}
Some(Interval { start, end: None })
}
fn resolve_nucleotide_location(
initial_location: &Interval<NucleotideCoordinate>,
edit: &NucleotideEdit,
) -> Interval<NucleotideCoordinate> {
let NucleotideEdit::Repeat { blocks } = edit else {
return initial_location.clone();
};
let Some(last_location) = blocks
.iter()
.filter_map(|block| block.location.as_ref())
.last()
else {
return initial_location.clone();
};
Interval {
start: initial_location.start.clone(),
end: last_location
.end
.clone()
.or_else(|| Some(last_location.start.clone())),
}
}
fn is_valid_nucleotide_repeat(
coordinate_system: CoordinateSystem,
initial_location: &Interval<NucleotideCoordinate>,
edit: &NucleotideEdit,
) -> bool {
let NucleotideEdit::Repeat { blocks } = edit else {
return true;
};
let all_have_units = blocks.iter().all(|block| block.unit.is_some());
let none_have_units = blocks.iter().all(|block| block.unit.is_none());
let any_have_locations = blocks.iter().any(|block| block.location.is_some());
match coordinate_system {
CoordinateSystem::Rna => {
if none_have_units {
true
} else if all_have_units {
blocks.len() == 1 && !any_have_locations && initial_location.end.is_none()
} else {
false
}
}
CoordinateSystem::Genomic
| CoordinateSystem::CircularGenomic
| CoordinateSystem::Mitochondrial
| CoordinateSystem::CodingDna
| CoordinateSystem::NonCodingDna => all_have_units && !any_have_locations,
CoordinateSystem::Protein => false,
}
}
#[cfg(test)]
mod tests {
use nom::combinator::all_consuming;
use super::*;
#[test]
fn parses_nucleotide_position_branches() {
let (_, coding) = all_consuming(nucleotide_coordinate).parse("93+1").unwrap();
assert_eq!(coding.anchor, NucleotideAnchor::Absolute);
assert_eq!(coding.coordinate, 93);
assert_eq!(coding.offset, 1);
let (_, upstream_intronic) = all_consuming(nucleotide_coordinate).parse("93-2").unwrap();
assert_eq!(upstream_intronic.anchor, NucleotideAnchor::Absolute);
assert_eq!(upstream_intronic.coordinate, 93);
assert_eq!(upstream_intronic.offset, -2);
let (_, utr5) = all_consuming(nucleotide_coordinate).parse("-18").unwrap();
assert_eq!(utr5.anchor, NucleotideAnchor::RelativeCdsStart);
assert_eq!(utr5.coordinate, -18);
assert_eq!(utr5.offset, 0);
let (_, utr3) = all_consuming(nucleotide_coordinate).parse("*18").unwrap();
assert_eq!(utr3.anchor, NucleotideAnchor::RelativeCdsEnd);
assert_eq!(utr3.coordinate, 18);
assert_eq!(utr3.offset, 0);
}
#[test]
fn parses_nucleotide_edit_branches() {
assert_eq!(
all_consuming(nucleotide_edit).parse("=").unwrap().1,
NucleotideEdit::NoChange
);
assert_eq!(
all_consuming(nucleotide_edit).parse("del").unwrap().1,
NucleotideEdit::Deletion
);
assert!(all_consuming(nucleotide_edit).parse("delA").is_err());
assert_eq!(
all_consuming(nucleotide_edit).parse("dup").unwrap().1,
NucleotideEdit::Duplication
);
assert_eq!(
all_consuming(nucleotide_edit).parse("inv").unwrap().1,
NucleotideEdit::Inversion
);
assert!(matches!(
all_consuming(nucleotide_edit).parse("C>A").unwrap().1,
NucleotideEdit::Substitution { .. }
));
assert!(matches!(
all_consuming(nucleotide_edit).parse("insT").unwrap().1,
NucleotideEdit::Insertion { .. }
));
assert!(matches!(
all_consuming(nucleotide_edit).parse("delinsT").unwrap().1,
NucleotideEdit::DeletionInsertion { .. }
));
assert!(matches!(
all_consuming(nucleotide_edit).parse("[4]").unwrap().1,
NucleotideEdit::Repeat { .. }
));
assert!(matches!(
all_consuming(nucleotide_edit).parse("CAG[23]").unwrap().1,
NucleotideEdit::Repeat { .. }
));
}
#[test]
fn parses_nucleotide_sequence_items() {
let (_, literal) = all_consuming(nucleotide_sequence_items).parse("T").unwrap();
assert_eq!(literal.len(), 1);
let (_, repeat) = all_consuming(nucleotide_sequence_items)
.parse("N[12]")
.unwrap();
assert!(matches!(
repeat.first().unwrap(),
NucleotideSequenceItem::Repeat(RepeatSequenceItem { unit, count })
if unit == "N" && *count == 12
));
let (_, local) = all_consuming(nucleotide_sequence_items)
.parse("850_900inv")
.unwrap();
assert!(matches!(
local.first().unwrap(),
NucleotideSequenceItem::Copied(CopiedSequenceItem {
source_reference: None,
source_coordinate_system: None,
is_inverted: true,
..
})
));
let (_, remote) = all_consuming(nucleotide_sequence_items)
.parse("[NC_000022.10:g.35788169_35788352]")
.unwrap();
assert!(matches!(
remote.first().unwrap(),
NucleotideSequenceItem::Copied(CopiedSequenceItem {
source_reference: Some(_),
source_coordinate_system: Some(CoordinateSystem::Genomic),
..
})
));
}
#[test]
fn parses_protein_effect_branches() {
assert_eq!(
all_consuming(protein_effect).parse("?").unwrap().1,
ProteinEffect::Unknown
);
assert_eq!(
all_consuming(protein_effect).parse("0").unwrap().1,
ProteinEffect::NoProteinProduced
);
assert!(matches!(
all_consuming(protein_effect).parse("Met1?").unwrap().1,
ProteinEffect::Edit {
edit: ProteinEdit::Unknown,
..
}
));
assert!(matches!(
all_consuming(protein_effect).parse("Trp24Ter").unwrap().1,
ProteinEffect::Edit {
edit: ProteinEdit::Substitution { .. },
..
}
));
assert!(matches!(
all_consuming(protein_effect).parse("Ala2[10]").unwrap().1,
ProteinEffect::Edit {
edit: ProteinEdit::Repeat { count: 10 },
..
}
));
assert!(matches!(
all_consuming(protein_effect).parse("Arg97fs").unwrap().1,
ProteinEffect::Edit {
edit: ProteinEdit::Frameshift {
to_residue: None,
stop: ProteinFrameshiftStop {
ordinal: None,
kind: ProteinFrameshiftStopKind::Omitted,
},
},
..
}
));
assert!(matches!(
all_consuming(protein_effect).parse("Met1ext-5").unwrap().1,
ProteinEffect::Edit {
edit: ProteinEdit::Extension(ProteinExtensionEdit {
to_terminal: ProteinExtensionTerminal::N,
to_residue: None,
terminal_ordinal: Some(-5),
}),
..
}
));
assert!(matches!(
all_consuming(protein_effect)
.parse("Ter110GlnextTer17")
.unwrap()
.1,
ProteinEffect::Edit {
edit: ProteinEdit::Extension(ProteinExtensionEdit {
to_terminal: ProteinExtensionTerminal::C,
to_residue: Some(_),
terminal_ordinal: Some(17),
}),
..
}
));
assert!(matches!(
all_consuming(protein_effect)
.parse("Arg97ProfsTer23")
.unwrap()
.1,
ProteinEffect::Edit {
edit: ProteinEdit::Frameshift {
to_residue: Some(_),
stop: ProteinFrameshiftStop {
ordinal: Some(23),
kind: ProteinFrameshiftStopKind::Known,
},
},
..
}
));
assert!(matches!(
all_consuming(protein_effect)
.parse("Ile327Argfs*?")
.unwrap()
.1,
ProteinEffect::Edit {
edit: ProteinEdit::Frameshift {
to_residue: Some(_),
stop: ProteinFrameshiftStop {
ordinal: None,
kind: ProteinFrameshiftStopKind::Unknown,
},
},
..
}
));
}
#[test]
fn parses_protein_frameshift_branches() {
assert_eq!(
all_consuming(protein_edit).parse("fs").unwrap().1,
ProteinEdit::Frameshift {
to_residue: None,
stop: ProteinFrameshiftStop {
ordinal: None,
kind: ProteinFrameshiftStopKind::Omitted,
},
}
);
assert_eq!(
all_consuming(protein_edit).parse("ProfsTer23").unwrap().1,
ProteinEdit::Frameshift {
to_residue: Some("Pro".to_string()),
stop: ProteinFrameshiftStop {
ordinal: Some(23),
kind: ProteinFrameshiftStopKind::Known,
},
}
);
assert_eq!(
all_consuming(protein_edit).parse("Argfs*?").unwrap().1,
ProteinEdit::Frameshift {
to_residue: Some("Arg".to_string()),
stop: ProteinFrameshiftStop {
ordinal: None,
kind: ProteinFrameshiftStopKind::Unknown,
},
}
);
assert!(all_consuming(protein_edit).parse("TerfsTer2").is_err());
}
#[test]
fn parses_protein_extension_branches() {
assert_eq!(
all_consuming(protein_edit).parse("ext-5").unwrap().1,
ProteinEdit::Extension(ProteinExtensionEdit {
to_terminal: ProteinExtensionTerminal::N,
to_residue: None,
terminal_ordinal: Some(-5),
})
);
assert_eq!(
all_consuming(protein_edit).parse("GlnextTer17").unwrap().1,
ProteinEdit::Extension(ProteinExtensionEdit {
to_terminal: ProteinExtensionTerminal::C,
to_residue: Some("Gln".to_string()),
terminal_ordinal: Some(17),
})
);
assert_eq!(
all_consuming(protein_edit).parse("Argext*?").unwrap().1,
ProteinEdit::Extension(ProteinExtensionEdit {
to_terminal: ProteinExtensionTerminal::C,
to_residue: Some("Arg".to_string()),
terminal_ordinal: None,
})
);
assert!(all_consuming(protein_edit).parse("TerextTer17").is_err());
}
}