use crate::error::FerroError;
use crate::hgvs::edit::ProteinEdit;
use crate::hgvs::interval::{
CdsInterval, GenomeInterval, Interval, ProtInterval, RnaInterval, TxInterval, UncertainBoundary,
};
use crate::hgvs::location::{AminoAcid, CdsPos, ProtPos};
use crate::hgvs::parser::accession::{parse_accession, parse_gene_symbol};
use crate::hgvs::parser::edit::{parse_na_edit, parse_protein_edit};
use crate::hgvs::parser::position::{
parse_amino_acid, parse_cds_pos, parse_genome_pos, parse_prot_pos, parse_rna_pos, parse_tx_pos,
};
use crate::hgvs::uncertainty::Mu;
use crate::hgvs::variant::{
Accession, AllelePhase, AlleleVariant, CdsVariant, CircularVariant, GenomeVariant, HgvsVariant,
LocEdit, MtVariant, ProteinVariant, RnaFusionBreakpoint, RnaFusionVariant, RnaVariant,
TxVariant,
};
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{char, digit1},
combinator::map,
sequence::{delimited, preceded},
IResult, Parser,
};
fn parse_uncertain_tx_pos(input: &str) -> IResult<&str, Mu<crate::hgvs::location::TxPos>> {
alt((
map(tag("?"), |_| Mu::Unknown),
map(delimited(char('('), parse_tx_pos, char(')')), Mu::uncertain),
map(parse_tx_pos, Mu::certain),
))
.parse(input)
}
fn parse_uncertain_rna_pos(input: &str) -> IResult<&str, Mu<crate::hgvs::location::RnaPos>> {
alt((
map(tag("?"), |_| Mu::Unknown),
map(
delimited(char('('), parse_rna_pos, char(')')),
Mu::uncertain,
),
map(parse_rna_pos, Mu::certain),
))
.parse(input)
}
fn parse_cds_intronic_range_inner(
input: &str,
) -> IResult<&str, UncertainBoundary<crate::hgvs::location::CdsPos>> {
let (remaining, (start, _, end)) = (parse_cds_pos, tag("_"), parse_cds_pos).parse(input)?;
if start.offset.is_some() || end.offset.is_some() {
Ok((
remaining,
UncertainBoundary::range(Mu::certain(start), Mu::certain(end)),
))
} else {
Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)))
}
}
fn parse_cds_unparenthesized_range_start(
input: &str,
) -> IResult<&str, UncertainBoundary<crate::hgvs::location::CdsPos>> {
let (after_question, _) = tag::<_, _, nom::error::Error<&str>>("?_").parse(input)?;
let (remaining, pos) = parse_cds_pos(after_question)?;
if !remaining.starts_with('_') {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
Ok((
remaining,
UncertainBoundary::range(Mu::Unknown, Mu::certain(pos)),
))
}
fn parse_cds_range_boundary(
input: &str,
) -> IResult<&str, UncertainBoundary<crate::hgvs::location::CdsPos>> {
delimited(
char('('),
alt((
map((tag("?"), tag("_"), parse_cds_pos), |(_, _, end)| {
UncertainBoundary::range(Mu::Unknown, Mu::certain(end))
}),
map((parse_cds_pos, tag("_"), tag("?")), |(start, _, _)| {
UncertainBoundary::range(Mu::certain(start), Mu::Unknown)
}),
parse_cds_intronic_range_inner,
map(
(parse_cds_pos, tag("_"), parse_cds_pos),
|(start, _, end)| UncertainBoundary::range(Mu::certain(start), Mu::certain(end)),
),
)),
char(')'),
)
.parse(input)
}
#[inline]
fn parse_cds_boundary(
input: &str,
) -> IResult<&str, UncertainBoundary<crate::hgvs::location::CdsPos>> {
let bytes = input.as_bytes();
if bytes.is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Eof,
)));
}
if bytes[0].is_ascii_digit() || bytes[0] == b'*' || bytes[0] == b'-' {
return map(parse_cds_pos, UncertainBoundary::certain).parse(input);
}
alt((
parse_cds_range_boundary,
map(
delimited(char('('), parse_cds_pos, char(')')),
UncertainBoundary::uncertain,
),
parse_cds_unparenthesized_range_start,
map(tag("?"), |_| UncertainBoundary::unknown()),
))
.parse(input)
}
fn parse_genome_range_boundary(
input: &str,
) -> IResult<&str, UncertainBoundary<crate::hgvs::location::GenomePos>> {
delimited(
char('('),
alt((
map((tag("?"), tag("_"), parse_genome_pos), |(_, _, end)| {
UncertainBoundary::range(Mu::Unknown, Mu::certain(end))
}),
map((parse_genome_pos, tag("_"), tag("?")), |(start, _, _)| {
UncertainBoundary::range(Mu::certain(start), Mu::Unknown)
}),
map(
(parse_genome_pos, tag("_"), parse_genome_pos),
|(start, _, end)| UncertainBoundary::range(Mu::certain(start), Mu::certain(end)),
),
)),
char(')'),
)
.parse(input)
}
#[inline]
fn parse_genome_boundary(
input: &str,
) -> IResult<&str, UncertainBoundary<crate::hgvs::location::GenomePos>> {
let bytes = input.as_bytes();
if bytes.is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Eof,
)));
}
if bytes[0].is_ascii_digit() {
return map(parse_genome_pos, UncertainBoundary::certain).parse(input);
}
alt((
parse_genome_range_boundary,
map(
delimited(char('('), parse_genome_pos, char(')')),
UncertainBoundary::uncertain,
),
map(tag("?"), |_| UncertainBoundary::unknown()),
))
.parse(input)
}
#[inline]
fn starts_with_special_pos(bytes: &[u8]) -> bool {
(bytes.len() >= 4 && (bytes[0..4] == *b"pter" || bytes[0..4] == *b"qter"))
|| (bytes.len() >= 3 && bytes[0..3] == *b"cen")
}
#[inline]
fn parse_genome_interval(input: &str) -> IResult<&str, GenomeInterval> {
let bytes = input.as_bytes();
if bytes.is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Eof,
)));
}
if bytes[0].is_ascii_digit() || starts_with_special_pos(bytes) {
let (remaining, start) = parse_genome_pos(input)?;
if let Some(after_underscore) = remaining.strip_prefix('_') {
let after_bytes = after_underscore.as_bytes();
if !after_bytes.is_empty()
&& (after_bytes[0].is_ascii_digit() || starts_with_special_pos(after_bytes))
{
let (remaining, end) = parse_genome_pos(after_underscore)?;
let allows_inverted = remaining.starts_with("dupins")
|| remaining.starts_with("inv")
|| remaining.starts_with("ins")
|| remaining.starts_with("dup");
if !start.is_special()
&& !end.is_special()
&& !allows_inverted
&& start.base > end.base
{
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
return Ok((
remaining,
Interval::with_complex_boundaries(
UncertainBoundary::certain(start),
UncertainBoundary::certain(end),
),
));
}
} else {
return Ok((remaining, GenomeInterval::point(start)));
}
}
alt((
|input| {
let (remaining, (start, _, end)) = delimited(
char('('),
(parse_genome_pos, tag("_"), parse_genome_pos),
char(')'),
)
.parse(input)?;
if remaining.starts_with('_') {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
if start.base > end.base {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
Ok((
remaining,
Interval::with_uncertainty(Mu::uncertain(start), Mu::uncertain(end)),
))
},
|input| {
let (remaining, (start, _, end)) =
(parse_genome_boundary, tag("_"), parse_genome_boundary).parse(input)?;
if let (
UncertainBoundary::Single(Mu::Certain(s)),
UncertainBoundary::Single(Mu::Certain(e)),
) = (&start, &end)
{
if s.base > e.base {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
}
Ok((remaining, Interval::with_complex_boundaries(start, end)))
},
map(parse_genome_boundary, |boundary| match boundary {
UncertainBoundary::Single(Mu::Certain(inner)) => GenomeInterval::point(inner),
UncertainBoundary::Single(Mu::Uncertain(inner)) => {
Interval::with_uncertainty(Mu::uncertain(inner), Mu::uncertain(inner))
}
UncertainBoundary::Single(Mu::Unknown) => {
Interval::with_uncertainty(Mu::Unknown, Mu::Unknown)
}
UncertainBoundary::Range { start, end } => {
Interval::with_complex_boundaries(
UncertainBoundary::Range {
start: start.clone(),
end: end.clone(),
},
UncertainBoundary::Range { start, end },
)
}
}),
))
.parse(input)
}
fn parse_simple_uncertain_cds_interval(input: &str) -> IResult<&str, CdsInterval> {
let (remaining, _) = char('(').parse(input)?;
let (remaining, start) = parse_cds_pos(remaining)?;
let (remaining, _) = tag("_").parse(remaining)?;
let (remaining, end) = parse_cds_pos(remaining)?;
let (remaining, _) = char(')').parse(remaining)?;
if remaining.starts_with('_') {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
if start.offset.is_none() && end.offset.is_none() {
Ok((
remaining,
Interval::with_uncertainty(Mu::uncertain(start), Mu::uncertain(end)),
))
} else {
if remaining.starts_with('(') {
let (remaining2, _) = char('(').parse(remaining)?;
let (remaining2, end_start) = parse_cds_pos(remaining2)?;
let (remaining2, _) = tag("_").parse(remaining2)?;
let (remaining2, end_end) = parse_cds_pos(remaining2)?;
let (remaining2, _) = char(')').parse(remaining2)?;
return Ok((
remaining2,
Interval::with_complex_boundaries(
UncertainBoundary::range(Mu::certain(start), Mu::certain(end)),
UncertainBoundary::range(Mu::certain(end_start), Mu::certain(end_end)),
),
));
}
Ok((
remaining,
Interval::with_complex_boundaries(
UncertainBoundary::range(Mu::certain(start), Mu::certain(end)),
UncertainBoundary::range(Mu::certain(start), Mu::certain(end)),
),
))
}
}
fn cds_pos_is_inverted(start: &CdsPos, end: &CdsPos) -> bool {
if start.is_unknown() || end.is_unknown() {
return false;
}
if start.offset.is_some() || end.offset.is_some() {
return false;
}
if start.utr3 != end.utr3 {
return start.utr3;
}
if start.utr3 {
start.base > end.base
} else {
start.base > end.base
}
}
#[inline]
fn parse_cds_interval(input: &str) -> IResult<&str, CdsInterval> {
let bytes = input.as_bytes();
if bytes.is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Eof,
)));
}
let use_fast_path = bytes[0].is_ascii_digit()
|| bytes[0] == b'*'
|| bytes[0] == b'-'
|| (bytes[0] == b'?' && bytes.len() > 1 && (bytes[1] == b'+' || bytes[1] == b'-'));
if use_fast_path {
let (remaining, start) = parse_cds_pos(input)?;
if let Some(after_underscore) = remaining.strip_prefix('_') {
let after_bytes = after_underscore.as_bytes();
let is_simple_range = !after_bytes.is_empty()
&& (after_bytes[0].is_ascii_digit()
|| after_bytes[0] == b'*'
|| after_bytes[0] == b'-');
if is_simple_range {
let (remaining, end) = parse_cds_pos(after_underscore)?;
if cds_pos_is_inverted(&start, &end) {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
return Ok((
remaining,
Interval::with_complex_boundaries(
UncertainBoundary::certain(start),
UncertainBoundary::certain(end),
),
));
}
} else {
return Ok((remaining, CdsInterval::point(start)));
}
}
alt((
parse_simple_uncertain_cds_interval,
|input| {
let (remaining, (start, _, end)) =
(parse_cds_boundary, tag("_"), parse_cds_boundary).parse(input)?;
if let (
UncertainBoundary::Single(Mu::Certain(s)),
UncertainBoundary::Single(Mu::Certain(e)),
) = (&start, &end)
{
if cds_pos_is_inverted(s, e) {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
}
Ok((remaining, Interval::with_complex_boundaries(start, end)))
},
map(parse_cds_boundary, |boundary| match boundary {
UncertainBoundary::Single(Mu::Certain(inner)) => CdsInterval::point(inner),
UncertainBoundary::Single(Mu::Uncertain(inner)) => {
Interval::with_uncertainty(Mu::uncertain(inner), Mu::uncertain(inner))
}
UncertainBoundary::Single(Mu::Unknown) => {
Interval::with_uncertainty(Mu::Unknown, Mu::Unknown)
}
UncertainBoundary::Range { start, end } => {
Interval::with_complex_boundaries(
UncertainBoundary::Range {
start: start.clone(),
end: end.clone(),
},
UncertainBoundary::Range { start, end },
)
}
}),
))
.parse(input)
}
fn parse_tx_range_boundary(
input: &str,
) -> IResult<&str, UncertainBoundary<crate::hgvs::location::TxPos>> {
delimited(
char('('),
map((parse_tx_pos, tag("_"), parse_tx_pos), |(start, _, end)| {
UncertainBoundary::range(Mu::certain(start), Mu::certain(end))
}),
char(')'),
)
.parse(input)
}
fn parse_tx_boundary(
input: &str,
) -> IResult<&str, UncertainBoundary<crate::hgvs::location::TxPos>> {
alt((
parse_tx_range_boundary,
map(
delimited(char('('), parse_tx_pos, char(')')),
UncertainBoundary::uncertain,
),
map(parse_tx_pos, UncertainBoundary::certain),
))
.parse(input)
}
fn parse_tx_interval(input: &str) -> IResult<&str, TxInterval> {
alt((
|input| {
let (remaining, (start, _, end)) =
delimited(char('('), (parse_tx_pos, tag("_"), parse_tx_pos), char(')'))
.parse(input)?;
if remaining.starts_with('_') {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
Ok((
remaining,
Interval::with_uncertainty(Mu::uncertain(start), Mu::uncertain(end)),
))
},
|input| {
let (remaining, (start, _, end)) =
(parse_tx_boundary, tag("_"), parse_tx_boundary).parse(input)?;
Ok((remaining, Interval::with_complex_boundaries(start, end)))
},
map(
(parse_uncertain_tx_pos, tag("_"), parse_uncertain_tx_pos),
|(start, _, end)| Interval::with_uncertainty(start, end),
),
map(parse_uncertain_tx_pos, |pos| match pos {
Mu::Unknown => Interval::with_uncertainty(Mu::Unknown, Mu::Unknown),
Mu::Uncertain(inner) => {
Interval::with_uncertainty(Mu::uncertain(inner), Mu::uncertain(inner))
}
Mu::Certain(inner) => TxInterval::point(inner),
}),
))
.parse(input)
}
fn parse_prot_range_boundary(
input: &str,
) -> IResult<&str, UncertainBoundary<crate::hgvs::location::ProtPos>> {
delimited(
char('('),
alt((
map((tag("?"), tag("_"), parse_prot_pos), |(_, _, end)| {
UncertainBoundary::range(Mu::Unknown, Mu::certain(end))
}),
map((parse_prot_pos, tag("_"), tag("?")), |(start, _, _)| {
UncertainBoundary::range(Mu::certain(start), Mu::Unknown)
}),
map(
(parse_prot_pos, tag("_"), parse_prot_pos),
|(start, _, end)| UncertainBoundary::range(Mu::certain(start), Mu::certain(end)),
),
)),
char(')'),
)
.parse(input)
}
fn parse_prot_aa_with_range(
input: &str,
) -> IResult<&str, UncertainBoundary<crate::hgvs::location::ProtPos>> {
use crate::hgvs::location::ProtPos;
use nom::character::complete::digit1;
let (remaining, aa) = parse_amino_acid(input)?;
if let Ok((number_remaining, num_str)) =
digit1::<&str, nom::error::Error<&str>>.parse(remaining)
{
if number_remaining.starts_with("(?)") {
let pos = num_str.parse::<u64>().unwrap_or(0);
let (final_remaining, _) =
tag::<&str, &str, nom::error::Error<&str>>("(?)").parse(number_remaining)?;
return Ok((
final_remaining,
UncertainBoundary::uncertain(ProtPos::new(aa, pos)),
));
}
}
if !remaining.starts_with('(') {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
let (remaining, _) = char('(').parse(remaining)?;
let (remaining, boundary) = alt((
map(
(tag("?"), tag("_"), digit1),
|(_, _, end): (&str, &str, &str)| {
let pos = end.parse::<u64>().unwrap_or(0);
UncertainBoundary::range(Mu::Unknown, Mu::certain(ProtPos::new(aa, pos)))
},
),
map(
(digit1, tag("_"), tag("?")),
|(start, _, _): (&str, &str, &str)| {
let pos = start.parse::<u64>().unwrap_or(0);
UncertainBoundary::range(Mu::certain(ProtPos::new(aa, pos)), Mu::Unknown)
},
),
map(tag("?"), |_| {
UncertainBoundary::uncertain(ProtPos::new(aa, 0))
}),
))
.parse(remaining)?;
let (remaining, _) = char(')').parse(remaining)?;
Ok((remaining, boundary))
}
fn parse_prot_boundary(
input: &str,
) -> IResult<&str, UncertainBoundary<crate::hgvs::location::ProtPos>> {
alt((
parse_prot_aa_with_range,
parse_prot_range_boundary,
map(
delimited(char('('), parse_prot_pos, char(')')),
UncertainBoundary::uncertain,
),
map(preceded(char('?'), digit1), |n: &str| {
UncertainBoundary::certain(ProtPos::new(AminoAcid::Xaa, n.parse().unwrap_or(0)))
}),
map(tag("?"), |_| UncertainBoundary::unknown()),
map(parse_prot_pos, UncertainBoundary::certain),
))
.parse(input)
}
fn parse_prot_interval(input: &str) -> IResult<&str, ProtInterval> {
alt((
|input| {
let (remaining, (start, _, end)) = delimited(
char('('),
(parse_prot_pos, tag("_"), parse_prot_pos),
char(')'),
)
.parse(input)?;
if remaining.starts_with('_') {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
Ok((
remaining,
Interval::with_uncertainty(Mu::uncertain(start), Mu::uncertain(end)),
))
},
|input| {
let (remaining, (start, _, end)) =
(parse_prot_boundary, tag("_"), parse_prot_boundary).parse(input)?;
Ok((remaining, Interval::with_complex_boundaries(start, end)))
},
map(parse_prot_boundary, |boundary| match boundary {
UncertainBoundary::Single(Mu::Certain(inner)) => ProtInterval::point(inner),
UncertainBoundary::Single(Mu::Uncertain(inner)) => {
Interval::with_uncertainty(Mu::uncertain(inner), Mu::uncertain(inner))
}
UncertainBoundary::Single(Mu::Unknown) => {
Interval::with_uncertainty(Mu::Unknown, Mu::Unknown)
}
UncertainBoundary::Range { start, end } => {
Interval::with_complex_boundaries(
UncertainBoundary::Range {
start: start.clone(),
end: end.clone(),
},
UncertainBoundary::Range { start, end },
)
}
}),
))
.parse(input)
}
fn parse_rna_interval(input: &str) -> IResult<&str, RnaInterval> {
alt((
map(
delimited(
char('('),
(parse_rna_pos, tag("_"), parse_rna_pos),
char(')'),
),
|(start, _, end)| Interval::with_uncertainty(Mu::uncertain(start), Mu::uncertain(end)),
),
map(
(parse_uncertain_rna_pos, tag("_"), parse_uncertain_rna_pos),
|(start, _, end)| Interval::with_uncertainty(start, end),
),
map(parse_uncertain_rna_pos, |pos| match pos {
Mu::Unknown => Interval::with_uncertainty(Mu::Unknown, Mu::Unknown),
Mu::Uncertain(inner) => {
Interval::with_uncertainty(Mu::uncertain(inner), Mu::uncertain(inner))
}
Mu::Certain(inner) => RnaInterval::point(inner),
}),
))
.parse(input)
}
fn parse_genome_variant(
accession: Accession,
gene_symbol: Option<String>,
) -> impl FnMut(&str) -> IResult<&str, HgvsVariant> {
move |input: &str| {
let (input, _) = tag("g.").parse(input)?;
if let Ok((remaining, edit)) = parse_whole_genome_identity(input) {
let dummy_pos = crate::hgvs::location::GenomePos::new(1);
let dummy_interval = GenomeInterval::point(dummy_pos);
return Ok((
remaining,
HgvsVariant::Genome(GenomeVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, edit),
}),
));
}
if let Ok((remaining, edit)) = parse_whole_genome_unknown(input) {
let dummy_pos = crate::hgvs::location::GenomePos::new(1);
let dummy_interval = GenomeInterval::point(dummy_pos);
return Ok((
remaining,
HgvsVariant::Genome(GenomeVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, edit),
}),
));
}
if input.starts_with('[') {
if let Ok((remaining, allele)) =
parse_genome_compound_allele(input, accession.clone(), gene_symbol.clone())
{
return Ok((remaining, HgvsVariant::Allele(allele)));
}
}
let (input, interval) = parse_genome_interval(input)?;
let (input, edit) = if let Ok((remaining, edit)) = parse_na_edit(input) {
(remaining, edit)
} else if input.is_empty() || input.starts_with(']') || input.starts_with(';') {
(
input,
crate::hgvs::edit::NaEdit::Identity {
sequence: None,
whole_entity: false,
},
)
} else {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
};
Ok((
input,
HgvsVariant::Genome(GenomeVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}),
))
}
}
fn parse_genome_compound_allele(
input: &str,
accession: Accession,
gene_symbol: Option<String>,
) -> IResult<&str, AlleleVariant> {
let (remaining, _) = char('[').parse(input)?;
let mut variants = Vec::new();
let mut current = remaining;
loop {
let (rest, interval) = parse_genome_interval(current)?;
let (rest, edit) = if let Ok((r, e)) = parse_na_edit(rest) {
(r, e)
} else {
(
rest,
crate::hgvs::edit::NaEdit::Identity {
sequence: None,
whole_entity: false,
},
)
};
variants.push(HgvsVariant::Genome(GenomeVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}));
if let Some(after_semi) = rest.strip_prefix(';') {
current = after_semi;
} else if let Some(after_bracket) = rest.strip_prefix(']') {
current = after_bracket;
break;
} else {
return Err(nom::Err::Error(nom::error::Error::new(
rest,
nom::error::ErrorKind::Tag,
)));
}
}
if variants.is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
Ok((current, AlleleVariant::cis(variants)))
}
fn parse_whole_genome_identity(input: &str) -> IResult<&str, crate::hgvs::edit::NaEdit> {
map(tag("="), |_| {
crate::hgvs::edit::NaEdit::whole_entity_identity()
})
.parse(input)
}
fn parse_whole_genome_unknown(input: &str) -> IResult<&str, crate::hgvs::edit::NaEdit> {
let (remaining, _) = tag("?").parse(input)?;
if remaining.starts_with('_') {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
Ok((remaining, crate::hgvs::edit::NaEdit::whole_entity_unknown()))
}
fn parse_whole_cds_identity(input: &str) -> IResult<&str, crate::hgvs::edit::NaEdit> {
map(tag("="), |_| {
crate::hgvs::edit::NaEdit::whole_entity_identity()
})
.parse(input)
}
fn parse_whole_cds_unknown(input: &str) -> IResult<&str, crate::hgvs::edit::NaEdit> {
let (remaining, _) = tag("?").parse(input)?;
if remaining.starts_with('_')
|| remaining.starts_with('+')
|| remaining.starts_with('-')
|| remaining.starts_with("dup")
|| remaining.starts_with("del")
|| remaining.starts_with("ins")
|| remaining.starts_with("inv")
{
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
Ok((remaining, crate::hgvs::edit::NaEdit::whole_entity_unknown()))
}
fn parse_cds_variant(
accession: Accession,
gene_symbol: Option<String>,
) -> impl FnMut(&str) -> IResult<&str, HgvsVariant> {
move |input: &str| {
let (input, _) = tag("c.").parse(input)?;
if input.starts_with('[') {
return parse_cds_allele_shorthand(input, accession.clone(), gene_symbol.clone());
}
if input.contains("(;)") && !input.starts_with('[') {
return parse_cds_position_unknown_phase(input, accession.clone(), gene_symbol.clone());
}
if let Ok((remaining, edit)) = parse_whole_cds_identity(input) {
let dummy_pos = crate::hgvs::location::CdsPos {
base: 1,
offset: None,
utr3: false,
};
let dummy_interval = CdsInterval::point(dummy_pos);
return Ok((
remaining,
HgvsVariant::Cds(CdsVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, edit),
}),
));
}
if let Ok((remaining, edit)) = parse_whole_cds_unknown(input) {
let dummy_pos = crate::hgvs::location::CdsPos {
base: 1,
offset: None,
utr3: false,
};
let dummy_interval = CdsInterval::point(dummy_pos);
return Ok((
remaining,
HgvsVariant::Cds(CdsVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, edit),
}),
));
}
if input.starts_with('(') && !input.starts_with("(?") {
if let Some(close_paren) = input.find(')') {
let inner = &input[1..close_paren];
if inner.contains('>') && !inner.contains('_') {
if let Ok((after_edit, interval)) = parse_cds_interval(inner) {
if let Ok((remaining_inner, edit)) = parse_na_edit(after_edit) {
if remaining_inner.is_empty() {
let remaining = &input[close_paren + 1..];
return Ok((
remaining,
HgvsVariant::Cds(CdsVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new_predicted(interval, edit),
}),
));
}
}
}
}
}
}
let (input, interval) = parse_cds_interval(input)?;
let (input, edit) = if let Ok((remaining, edit)) = parse_na_edit(input) {
(remaining, edit)
} else if input.is_empty() || !input.chars().next().unwrap_or(' ').is_alphanumeric() {
let is_range = match (&interval.start, &interval.end) {
(
crate::hgvs::interval::UncertainBoundary::Single(
crate::hgvs::uncertainty::Mu::Certain(start),
),
crate::hgvs::interval::UncertainBoundary::Single(
crate::hgvs::uncertainty::Mu::Certain(end),
),
) => start != end,
_ => true, };
let is_utr = match &interval.start {
crate::hgvs::interval::UncertainBoundary::Single(
crate::hgvs::uncertainty::Mu::Certain(pos),
) => pos.utr3 || pos.base < 0, _ => false,
};
if is_range || is_utr {
(input, crate::hgvs::edit::NaEdit::PositionOnly)
} else {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
} else {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
};
Ok((
input,
HgvsVariant::Cds(CdsVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}),
))
}
}
fn parse_cds_position_unknown_phase(
input: &str,
accession: Accession,
gene_symbol: Option<String>,
) -> IResult<&str, HgvsVariant> {
let mut variants = Vec::with_capacity(4);
for part in input.split("(;)") {
let part = part.trim();
if part.is_empty() {
continue;
}
let (edit_remaining, interval) = parse_cds_interval(part)?;
let (final_remaining, edit) = parse_na_edit(edit_remaining)?;
if !final_remaining.trim().is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
final_remaining,
nom::error::ErrorKind::Tag,
)));
}
variants.push(HgvsVariant::Cds(CdsVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}));
}
if variants.len() < 2 {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
Ok((
"",
HgvsVariant::Allele(AlleleVariant::new(variants, AllelePhase::Unknown)),
))
}
fn parse_cds_allele_shorthand(
input: &str,
accession: Accession,
gene_symbol: Option<String>,
) -> IResult<&str, HgvsVariant> {
if !input.starts_with('[') {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
if input.contains("];[") {
return parse_cds_trans_allele_shorthand(input, accession, gene_symbol);
}
let close_bracket = input.rfind(']').ok_or_else(|| {
nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Tag))
})?;
let content = &input[1..close_bracket];
let remaining = &input[close_bracket + 1..];
let has_unknown_phase = content.contains("(;)");
let has_cis_separator = {
let temp = content.replace("(;)", "\x00");
temp.contains(';')
};
let phase = if has_unknown_phase {
AllelePhase::Unknown
} else {
AllelePhase::Cis
};
let mut variants = Vec::with_capacity(4);
if has_unknown_phase && has_cis_separator {
for unknown_group in content.split("(;)") {
for part in unknown_group.split(';') {
let part = part.trim();
if part.is_empty() {
continue;
}
let (edit_remaining, interval) = parse_cds_interval(part)?;
let (final_remaining, edit) = if let Ok((r, e)) = parse_na_edit(edit_remaining) {
(r, e)
} else {
(
edit_remaining,
crate::hgvs::edit::NaEdit::Identity {
sequence: None,
whole_entity: false,
},
)
};
if !final_remaining.trim().is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
final_remaining,
nom::error::ErrorKind::Tag,
)));
}
variants.push(HgvsVariant::Cds(CdsVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}));
}
}
} else {
let separator = if has_unknown_phase { "(;)" } else { ";" };
for part in content.split(separator) {
let part = part.trim();
if part.is_empty() {
continue;
}
let (edit_remaining, interval) = parse_cds_interval(part)?;
let (final_remaining, edit) = if let Ok((r, e)) = parse_na_edit(edit_remaining) {
(r, e)
} else {
(
edit_remaining,
crate::hgvs::edit::NaEdit::Identity {
sequence: None,
whole_entity: false,
},
)
};
if !final_remaining.trim().is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
final_remaining,
nom::error::ErrorKind::Tag,
)));
}
variants.push(HgvsVariant::Cds(CdsVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}));
}
}
if variants.is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
Ok((
remaining,
HgvsVariant::Allele(AlleleVariant::new(variants, phase)),
))
}
fn parse_cds_trans_allele_shorthand(
input: &str,
accession: Accession,
gene_symbol: Option<String>,
) -> IResult<&str, HgvsVariant> {
let mut variants = Vec::with_capacity(2);
let mut remaining = input;
while !remaining.is_empty() {
if !remaining.starts_with('[') {
break;
}
let close_bracket = remaining.find(']').ok_or_else(|| {
nom::Err::Error(nom::error::Error::new(
remaining,
nom::error::ErrorKind::Tag,
))
})?;
let content = &remaining[1..close_bracket].trim();
let variant = if *content == "0" {
HgvsVariant::NullAllele
} else if *content == "?" {
HgvsVariant::UnknownAllele
} else {
let (edit_remaining, interval) = parse_cds_interval(content)?;
let (final_remaining, edit) = parse_na_edit(edit_remaining)?;
if !final_remaining.trim().is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
final_remaining,
nom::error::ErrorKind::Tag,
)));
}
HgvsVariant::Cds(CdsVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
})
};
variants.push(variant);
remaining = &remaining[close_bracket + 1..];
if remaining.starts_with(';') {
remaining = &remaining[1..];
}
}
if variants.len() < 2 {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
Ok((
remaining,
HgvsVariant::Allele(AlleleVariant::new(variants, AllelePhase::Trans)),
))
}
fn parse_tx_variant(
accession: Accession,
gene_symbol: Option<String>,
) -> impl FnMut(&str) -> IResult<&str, HgvsVariant> {
move |input: &str| {
let (input, _) = tag("n.").parse(input)?;
let (input, interval) = parse_tx_interval(input)?;
let (input, edit) = parse_na_edit(input)?;
Ok((
input,
HgvsVariant::Tx(TxVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}),
))
}
}
fn parse_mt_variant(
accession: Accession,
gene_symbol: Option<String>,
) -> impl FnMut(&str) -> IResult<&str, HgvsVariant> {
move |input: &str| {
let (input, _) = tag("m.").parse(input)?;
let (input, interval) = parse_genome_interval(input)?;
let (input, edit) = parse_na_edit(input)?;
Ok((
input,
HgvsVariant::Mt(MtVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}),
))
}
}
fn parse_whole_protein_identity(input: &str) -> IResult<&str, ProteinEdit> {
alt((
map(tag("(=)"), |_| {
ProteinEdit::whole_protein_identity_predicted()
}),
map(tag("="), |_| ProteinEdit::whole_protein_identity()),
))
.parse(input)
}
fn parse_whole_protein_no_protein(input: &str) -> IResult<&str, ProteinEdit> {
alt((
map(tag("0?"), |_| ProteinEdit::no_protein_predicted()),
map(tag("0"), |_| ProteinEdit::no_protein()),
))
.parse(input)
}
fn parse_whole_protein_unknown(input: &str) -> IResult<&str, ProteinEdit> {
alt((
map(tag("(?)"), |_| {
ProteinEdit::whole_protein_unknown_predicted()
}),
map(tag("?"), |_| ProteinEdit::whole_protein_unknown()),
))
.parse(input)
}
fn parse_protein_allele_shorthand(
input: &str,
accession: Accession,
gene_symbol: Option<String>,
) -> IResult<&str, AlleleVariant> {
let (remaining, _) = char('[').parse(input)?;
let mut variants = Vec::new();
let mut current = remaining;
loop {
let (rest, interval) = parse_prot_interval(current)?;
let (rest, edit) = parse_protein_edit(rest)?;
variants.push(HgvsVariant::Protein(ProteinVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}));
if let Some(after_semi) = rest.strip_prefix(';') {
current = after_semi;
} else if let Some(after_bracket) = rest.strip_prefix(']') {
current = after_bracket;
break;
} else {
return Err(nom::Err::Error(nom::error::Error::new(
rest,
nom::error::ErrorKind::Tag,
)));
}
}
if variants.is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
Ok((current, AlleleVariant::cis(variants)))
}
fn parse_protein_numeric_repeat(
input: &str,
accession: Accession,
gene_symbol: Option<String>,
) -> IResult<&str, HgvsVariant> {
let (remaining, start_num) = digit1.parse(input)?;
let start: u64 = start_num.parse().map_err(|_| {
nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
})?;
let (remaining, end_opt) = if remaining.starts_with('_') {
let (rest, _) = tag("_").parse(remaining)?;
let (rest, end_num) = digit1.parse(rest)?;
let end: u64 = end_num.parse().map_err(|_| {
nom::Err::Error(nom::error::Error::new(
remaining,
nom::error::ErrorKind::Digit,
))
})?;
(rest, Some(end))
} else {
(remaining, None)
};
let (remaining, edit) = parse_protein_edit(remaining)?;
if !matches!(
edit,
ProteinEdit::Repeat { .. } | ProteinEdit::MultiRepeat { .. }
) {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
let start_pos = ProtPos::new(AminoAcid::Xaa, start);
let interval = match end_opt {
Some(end) => {
let end_pos = ProtPos::new(AminoAcid::Xaa, end);
ProtInterval::new(start_pos, end_pos)
}
None => ProtInterval::point(start_pos),
};
Ok((
remaining,
HgvsVariant::Protein(ProteinVariant {
accession,
gene_symbol,
loc_edit: LocEdit::new(interval, edit),
}),
))
}
fn parse_protein_numeric_position(
input: &str,
accession: Accession,
gene_symbol: Option<String>,
) -> IResult<&str, HgvsVariant> {
let (remaining, start_num) = digit1.parse(input)?;
let start: u64 = start_num.parse().map_err(|_| {
nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
})?;
if start == 0 {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
let (remaining, end_opt) = if remaining.starts_with('_') {
let (rest, _) = tag("_").parse(remaining)?;
let (rest, end_num) = digit1.parse(rest)?;
let end: u64 = end_num.parse().map_err(|_| {
nom::Err::Error(nom::error::Error::new(
remaining,
nom::error::ErrorKind::Digit,
))
})?;
(rest, Some(end))
} else {
(remaining, None)
};
let start_pos = ProtPos::new(AminoAcid::Xaa, start);
let interval = match end_opt {
Some(end) => {
let end_pos = ProtPos::new(AminoAcid::Xaa, end);
ProtInterval::new(start_pos, end_pos)
}
None => ProtInterval::point(start_pos),
};
let (remaining, edit) = if remaining.starts_with("ins")
|| remaining.starts_with("del")
|| remaining.starts_with("dup")
{
parse_protein_edit(remaining)?
} else if remaining.starts_with(|c: char| c.is_ascii_uppercase()) {
let aa_char = remaining.chars().next().unwrap();
if let Some(aa) = AminoAcid::from_one_letter(aa_char) {
(
&remaining[1..],
ProteinEdit::Substitution {
reference: AminoAcid::Xaa,
alternative: aa,
},
)
} else {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
} else if remaining.is_empty() || remaining.starts_with(' ') || remaining.starts_with(',') {
(remaining, ProteinEdit::position_unknown())
} else {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
};
Ok((
remaining,
HgvsVariant::Protein(ProteinVariant {
accession,
gene_symbol,
loc_edit: LocEdit::new(interval, edit),
}),
))
}
fn parse_protein_variant(
accession: Accession,
gene_symbol: Option<String>,
) -> impl FnMut(&str) -> IResult<&str, HgvsVariant> {
move |input: &str| {
let (input, _) = tag("p.").parse(input)?;
if let Ok((remaining, edit)) = parse_whole_protein_identity(input) {
let dummy_pos = ProtPos::new(AminoAcid::Met, 1);
let dummy_interval = ProtInterval::point(dummy_pos);
return Ok((
remaining,
HgvsVariant::Protein(ProteinVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, edit),
}),
));
}
if let Ok((remaining, edit)) = parse_whole_protein_no_protein(input) {
let dummy_pos = ProtPos::new(AminoAcid::Met, 1);
let dummy_interval = ProtInterval::point(dummy_pos);
return Ok((
remaining,
HgvsVariant::Protein(ProteinVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, edit),
}),
));
}
if let Ok((remaining, edit)) = parse_whole_protein_unknown(input) {
let dummy_pos = ProtPos::new(AminoAcid::Met, 1);
let dummy_interval = ProtInterval::point(dummy_pos);
return Ok((
remaining,
HgvsVariant::Protein(ProteinVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, edit),
}),
));
}
if input.starts_with('[') {
if let Ok((remaining, allele)) =
parse_protein_allele_shorthand(input, accession.clone(), gene_symbol.clone())
{
return Ok((remaining, HgvsVariant::Allele(allele)));
}
}
if input.starts_with('(') && !input.starts_with("(=)") {
if let Ok((remaining, (interval, edit))) = delimited(
char('('),
(parse_prot_interval, parse_protein_edit),
char(')'),
)
.parse(input)
{
return Ok((
remaining,
HgvsVariant::Protein(ProteinVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::with_uncertainty(interval, Mu::Uncertain(edit)),
}),
));
}
}
if let Ok((remaining, variant)) =
parse_protein_numeric_repeat(input, accession.clone(), gene_symbol.clone())
{
return Ok((remaining, variant));
}
if let Ok((remaining, variant)) =
parse_protein_numeric_position(input, accession.clone(), gene_symbol.clone())
{
return Ok((remaining, variant));
}
let (input, interval) = parse_prot_interval(input)?;
let (input, edit) = match parse_protein_edit(input) {
Ok((remaining, edit)) => (remaining, edit),
Err(_) if input.is_empty() || input.starts_with(' ') || input.starts_with(',') => {
(input, ProteinEdit::position_unknown())
}
Err(e) => return Err(e),
};
Ok((
input,
HgvsVariant::Protein(ProteinVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}),
))
}
}
fn parse_rna_variant(
accession: Accession,
gene_symbol: Option<String>,
) -> impl FnMut(&str) -> IResult<&str, HgvsVariant> {
move |input: &str| {
let (input, _) = tag("r.").parse(input)?;
if input.starts_with('[') {
return parse_rna_allele_shorthand(input, accession.clone(), gene_symbol.clone());
}
if input.contains("(;)") && !input.starts_with('[') {
return parse_rna_position_unknown_phase(input, accession.clone(), gene_symbol.clone());
}
if let Ok((remaining, edit)) = parse_whole_rna_identity(input) {
let dummy_pos = crate::hgvs::location::RnaPos {
base: 1,
offset: None,
utr3: false,
};
let dummy_interval = RnaInterval::point(dummy_pos);
return Ok((
remaining,
HgvsVariant::Rna(RnaVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, edit),
}),
));
}
if let Ok((remaining, edit)) = parse_whole_rna_unknown(input) {
let dummy_pos = crate::hgvs::location::RnaPos {
base: 1,
offset: None,
utr3: false,
};
let dummy_interval = RnaInterval::point(dummy_pos);
return Ok((
remaining,
HgvsVariant::Rna(RnaVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, edit),
}),
));
}
if let Ok((remaining, edit)) = parse_rna_splice(input) {
let dummy_pos = crate::hgvs::location::RnaPos {
base: 1,
offset: None,
utr3: false,
};
let dummy_interval = RnaInterval::point(dummy_pos);
return Ok((
remaining,
HgvsVariant::Rna(RnaVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, edit),
}),
));
}
if let Ok((remaining, edit)) = parse_rna_no_product(input) {
let dummy_pos = crate::hgvs::location::RnaPos {
base: 1,
offset: None,
utr3: false,
};
let dummy_interval = RnaInterval::point(dummy_pos);
return Ok((
remaining,
HgvsVariant::Rna(RnaVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, edit),
}),
));
}
if let Ok((remaining, variant)) =
parse_predicted_rna_variant(input, accession.clone(), gene_symbol.clone())
{
return Ok((remaining, variant));
}
let (input, interval) = parse_rna_interval(input)?;
let (input, edit) = parse_na_edit(input)?;
Ok((
input,
HgvsVariant::Rna(RnaVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}),
))
}
}
fn parse_whole_rna_identity(input: &str) -> IResult<&str, crate::hgvs::edit::NaEdit> {
map(tag("="), |_| {
crate::hgvs::edit::NaEdit::whole_entity_identity()
})
.parse(input)
}
fn parse_whole_rna_unknown(input: &str) -> IResult<&str, crate::hgvs::edit::NaEdit> {
let (remaining, _) = tag("?").parse(input)?;
if remaining.starts_with('_') {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
Ok((remaining, crate::hgvs::edit::NaEdit::whole_entity_unknown()))
}
fn parse_rna_splice(input: &str) -> IResult<&str, crate::hgvs::edit::NaEdit> {
map(tag("spl"), |_| crate::hgvs::edit::NaEdit::Splice).parse(input)
}
fn parse_rna_no_product(input: &str) -> IResult<&str, crate::hgvs::edit::NaEdit> {
let (remaining, _) = tag("0").parse(input)?;
if remaining.chars().next().is_some_and(|c| c.is_ascii_digit()) {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
Ok((remaining, crate::hgvs::edit::NaEdit::NoProduct))
}
fn parse_predicted_rna_variant(
input: &str,
accession: Accession,
gene_symbol: Option<String>,
) -> IResult<&str, HgvsVariant> {
let (input, _) = tag("(").parse(input)?;
let (input, interval) = parse_rna_interval(input)?;
let (input, edit) = parse_na_edit(input)?;
let (input, _) = tag(")").parse(input)?;
Ok((
input,
HgvsVariant::Rna(RnaVariant {
accession,
gene_symbol,
loc_edit: LocEdit::new(interval, edit),
}),
))
}
fn parse_rna_position_unknown_phase(
input: &str,
accession: Accession,
gene_symbol: Option<String>,
) -> IResult<&str, HgvsVariant> {
let mut variants = Vec::with_capacity(4);
for part in input.split("(;)") {
let part = part.trim();
if part.is_empty() {
continue;
}
let (edit_remaining, interval) = parse_rna_interval(part)?;
let (final_remaining, edit) = parse_na_edit(edit_remaining)?;
if !final_remaining.trim().is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
final_remaining,
nom::error::ErrorKind::Tag,
)));
}
variants.push(HgvsVariant::Rna(RnaVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}));
}
if variants.len() < 2 {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
Ok((
"",
HgvsVariant::Allele(AlleleVariant::new(variants, AllelePhase::Unknown)),
))
}
fn parse_rna_allele_shorthand(
input: &str,
accession: Accession,
gene_symbol: Option<String>,
) -> IResult<&str, HgvsVariant> {
if !input.starts_with('[') {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
if input.contains("];[") {
return parse_rna_trans_allele_shorthand(input, accession, gene_symbol);
}
let close_bracket = input.rfind(']').ok_or_else(|| {
nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Tag))
})?;
let content = &input[1..close_bracket];
let remaining = &input[close_bracket + 1..];
let has_unknown_phase = content.contains("(;)");
let has_cis_separator = {
let temp = content.replace("(;)", "\x00");
temp.contains(';')
};
let phase = if has_unknown_phase {
AllelePhase::Unknown
} else {
AllelePhase::Cis
};
let mut variants = Vec::with_capacity(4);
if has_unknown_phase && has_cis_separator {
for unknown_group in content.split("(;)") {
for part in unknown_group.split(';') {
let part = part.trim();
if part.is_empty() {
continue;
}
let (edit_remaining, interval) = parse_rna_interval(part)?;
let (final_remaining, edit) = parse_na_edit(edit_remaining)?;
if !final_remaining.trim().is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
final_remaining,
nom::error::ErrorKind::Tag,
)));
}
variants.push(HgvsVariant::Rna(RnaVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}));
}
}
} else {
let separator = if has_unknown_phase { "(;)" } else { ";" };
for part in content.split(separator) {
let part = part.trim();
if part.is_empty() {
continue;
}
let (edit_remaining, interval) = parse_rna_interval(part)?;
let (final_remaining, edit) = parse_na_edit(edit_remaining)?;
if !final_remaining.trim().is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
final_remaining,
nom::error::ErrorKind::Tag,
)));
}
variants.push(HgvsVariant::Rna(RnaVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}));
}
}
if variants.is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
Ok((
remaining,
HgvsVariant::Allele(AlleleVariant::new(variants, phase)),
))
}
fn parse_rna_trans_allele_shorthand(
input: &str,
accession: Accession,
gene_symbol: Option<String>,
) -> IResult<&str, HgvsVariant> {
let mut variants = Vec::with_capacity(2);
let mut remaining = input;
while !remaining.is_empty() {
if !remaining.starts_with('[') {
break;
}
let close_bracket = remaining.find(']').ok_or_else(|| {
nom::Err::Error(nom::error::Error::new(
remaining,
nom::error::ErrorKind::Tag,
))
})?;
let content = &remaining[1..close_bracket].trim();
let variant = if *content == "0" {
HgvsVariant::NullAllele
} else if *content == "?" {
HgvsVariant::UnknownAllele
} else {
let (edit_remaining, interval) = parse_rna_interval(content)?;
let (final_remaining, edit) = parse_na_edit(edit_remaining)?;
if !final_remaining.trim().is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
final_remaining,
nom::error::ErrorKind::Tag,
)));
}
HgvsVariant::Rna(RnaVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
})
};
variants.push(variant);
remaining = &remaining[close_bracket + 1..];
if remaining.starts_with(';') {
remaining = &remaining[1..];
}
}
if variants.len() < 2 {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
Ok((
remaining,
HgvsVariant::Allele(AlleleVariant::new(variants, AllelePhase::Trans)),
))
}
fn parse_circular_variant(
accession: Accession,
gene_symbol: Option<String>,
) -> impl FnMut(&str) -> IResult<&str, HgvsVariant> {
move |input: &str| {
let (input, _) = tag("o.").parse(input)?;
let (input, interval) = parse_genome_interval(input)?;
let (input, edit) = parse_na_edit(input)?;
Ok((
input,
HgvsVariant::Circular(CircularVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.clone(),
loc_edit: LocEdit::new(interval, edit),
}),
))
}
}
fn parse_single_variant(input: &str) -> Result<(&str, HgvsVariant), FerroError> {
let trimmed = input.trim_start();
let (remaining, accession) = parse_accession(trimmed).map_err(|e| FerroError::Parse {
pos: 0,
msg: format!("Failed to parse accession: {:?}", e),
diagnostic: None,
})?;
let (remaining, gene_symbol) = parse_gene_symbol(remaining).map_err(|e| FerroError::Parse {
pos: trimmed.len() - remaining.len(),
msg: format!("Failed to parse gene symbol: {:?}", e),
diagnostic: None,
})?;
let remaining = remaining
.strip_prefix(':')
.ok_or_else(|| FerroError::Parse {
pos: trimmed.len() - remaining.len(),
msg: "Expected ':' after accession".to_string(),
diagnostic: None,
})?;
let result = if remaining.starts_with('[') {
return parse_clinvar_style_allele(remaining, &accession);
} else if remaining.starts_with("c.") {
parse_cds_variant(accession, gene_symbol).parse(remaining)
} else if remaining.starts_with("p.") {
parse_protein_variant(accession, gene_symbol).parse(remaining)
} else if remaining.starts_with("g.") {
parse_genome_variant(accession, gene_symbol).parse(remaining)
} else if remaining.starts_with("n.") {
parse_tx_variant(accession, gene_symbol).parse(remaining)
} else if remaining.starts_with("r.") {
parse_rna_variant(accession, gene_symbol).parse(remaining)
} else if remaining.starts_with("m.") {
parse_mt_variant(accession, gene_symbol).parse(remaining)
} else if remaining.starts_with("o.") {
parse_circular_variant(accession, gene_symbol).parse(remaining)
} else {
let acc_prefix: &str = &accession.prefix;
let is_genomic_prefix = acc_prefix == "NC"
|| acc_prefix == "NT"
|| acc_prefix == "NW"
|| acc_prefix == "AC"
|| acc_prefix == "NG"
|| acc_prefix.starts_with("AL")
|| acc_prefix.starts_with("BX")
|| acc_prefix.starts_with("CR")
|| acc_prefix.starts_with("CT")
|| acc_prefix.starts_with("CU");
if is_genomic_prefix {
let prefixed = format!("g.{}", remaining);
match parse_genome_variant(accession.clone(), gene_symbol.clone()).parse(&prefixed) {
Ok((rem, variant)) => {
let consumed = prefixed.len() - rem.len() - 2; Ok((&remaining[consumed..], variant))
}
Err(_) => {
return Err(FerroError::Parse {
pos: trimmed.len() - remaining.len(),
msg: format!(
"Unknown variant type prefix: expected one of 'c.', 'g.', 'p.', 'n.', 'r.', 'm.', 'o.' but found '{}'",
remaining.chars().take(3).collect::<String>()
),
diagnostic: None,
});
}
}
} else if acc_prefix == "NM"
|| acc_prefix == "NR"
|| acc_prefix == "XM"
|| acc_prefix == "XR"
{
let prefixed = format!("c.{}", remaining);
match parse_cds_variant(accession.clone(), gene_symbol.clone()).parse(&prefixed) {
Ok((rem, variant)) => {
let consumed = prefixed.len() - rem.len() - 2; Ok((&remaining[consumed..], variant))
}
Err(_) => {
return Err(FerroError::Parse {
pos: trimmed.len() - remaining.len(),
msg: format!(
"Unknown variant type prefix: expected one of 'c.', 'g.', 'p.', 'n.', 'r.', 'm.', 'o.' but found '{}'",
remaining.chars().take(3).collect::<String>()
),
diagnostic: None,
});
}
}
} else {
return Err(FerroError::Parse {
pos: trimmed.len() - remaining.len(),
msg: format!(
"Unknown variant type prefix: expected one of 'c.', 'g.', 'p.', 'n.', 'r.', 'm.', 'o.' but found '{}'",
remaining.chars().take(3).collect::<String>()
),
diagnostic: None,
});
}
};
match result {
Ok((remaining, variant)) => Ok((remaining, variant)),
Err(e) => Err(FerroError::Parse {
pos: trimmed.len() - remaining.len(),
msg: format!("Failed to parse variant: {:?}", e),
diagnostic: None,
}),
}
}
fn parse_clinvar_style_allele<'a>(
input: &'a str,
accession: &Accession,
) -> Result<(&'a str, HgvsVariant), FerroError> {
if !input.starts_with('[') {
return Err(FerroError::Parse {
pos: 0,
msg: "ClinVar-style allele must start with '['".to_string(),
diagnostic: None,
});
}
let close_bracket = input.rfind(']').ok_or_else(|| FerroError::Parse {
pos: 0,
msg: "ClinVar-style allele must end with ']'".to_string(),
diagnostic: None,
})?;
let content = &input[1..close_bracket];
let remaining = &input[close_bracket + 1..];
let has_unknown_phase = content.contains("(;)");
let parts: Vec<&str> = if has_unknown_phase {
content
.split("(;)")
.flat_map(|part| part.split(';'))
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect()
} else {
content
.split(';')
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect()
};
if parts.is_empty() {
return Err(FerroError::Parse {
pos: 1,
msg: "ClinVar-style allele must contain at least one variant".to_string(),
diagnostic: None,
});
}
let mut variants = Vec::new();
for part in parts {
let full_variant = format!("{}:{}", accession, part);
let (_, variant) = parse_single_variant(&full_variant)?;
variants.push(variant);
}
let phase = if has_unknown_phase {
AllelePhase::Unknown
} else {
AllelePhase::Cis
};
Ok((
remaining,
HgvsVariant::Allele(AlleleVariant::new(variants, phase)),
))
}
fn parse_cis_allele(input: &str) -> Result<HgvsVariant, FerroError> {
let input = input.trim();
if !input.starts_with('[') {
return Err(FerroError::Parse {
pos: 0,
msg: "Cis allele must start with '['".to_string(),
diagnostic: None,
});
}
let close_bracket = input.rfind(']').ok_or_else(|| FerroError::Parse {
pos: input.len(),
msg: "Missing closing ']' in cis allele".to_string(),
diagnostic: None,
})?;
let after_bracket = &input[close_bracket + 1..];
if !after_bracket.trim().is_empty() {
return Err(FerroError::Parse {
pos: close_bracket + 1,
msg: format!("Unexpected trailing characters: '{}'", after_bracket),
diagnostic: None,
});
}
let content = &input[1..close_bracket];
let mut variants = Vec::with_capacity(4);
for part in content.split(';') {
let part = part.trim();
if part.is_empty() {
continue;
}
let (remaining, variant) = parse_single_variant(part)?;
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: 0,
msg: format!("Unexpected content after variant: '{}'", remaining),
diagnostic: None,
});
}
variants.push(variant);
}
if variants.is_empty() {
return Err(FerroError::Parse {
pos: 0,
msg: "Empty allele".to_string(),
diagnostic: None,
});
}
Ok(HgvsVariant::Allele(AlleleVariant::new(
variants,
AllelePhase::Cis,
)))
}
fn parse_trans_allele(input: &str) -> Result<HgvsVariant, FerroError> {
let input = input.trim();
if !input.starts_with('[') {
return Err(FerroError::Parse {
pos: 0,
msg: "Trans allele must start with '['".to_string(),
diagnostic: None,
});
}
let mut variants = Vec::with_capacity(2);
let mut remaining = input;
while !remaining.is_empty() {
if !remaining.starts_with('[') {
return Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: "Expected '[' in trans allele".to_string(),
diagnostic: None,
});
}
let close_bracket = remaining.find(']').ok_or_else(|| FerroError::Parse {
pos: input.len() - remaining.len(),
msg: "Missing closing ']' in trans allele".to_string(),
diagnostic: None,
})?;
let variant_str = &remaining[1..close_bracket].trim();
let variant = if *variant_str == "0" {
HgvsVariant::NullAllele
} else if *variant_str == "?" {
HgvsVariant::UnknownAllele
} else {
let (var_remaining, var) = parse_single_variant(variant_str)?;
if !var_remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: 0,
msg: format!("Unexpected content after variant: '{}'", var_remaining),
diagnostic: None,
});
}
var
};
variants.push(variant);
remaining = &remaining[close_bracket + 1..];
if remaining.starts_with(';') {
remaining = &remaining[1..];
} else if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Expected ';' or end of input, found: '{}'", remaining),
diagnostic: None,
});
}
}
if variants.len() < 2 {
return Err(FerroError::Parse {
pos: 0,
msg: "Trans allele requires at least two variants".to_string(),
diagnostic: None,
});
}
Ok(HgvsVariant::Allele(AlleleVariant::new(
variants,
AllelePhase::Trans,
)))
}
fn create_identity_variant_from(reference: &HgvsVariant) -> Result<HgvsVariant, FerroError> {
use crate::hgvs::edit::NaEdit;
use crate::hgvs::location::{GenomePos, RnaPos, TxPos};
match reference {
HgvsVariant::Cds(cds) => {
let dummy_pos = CdsPos {
base: 1,
offset: None,
utr3: false,
};
let dummy_interval = CdsInterval::point(dummy_pos);
Ok(HgvsVariant::Cds(CdsVariant {
accession: cds.accession.clone(),
gene_symbol: cds.gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, NaEdit::whole_entity_identity()),
}))
}
HgvsVariant::Genome(genome) => {
let dummy_interval = GenomeInterval::point(GenomePos::new(1));
Ok(HgvsVariant::Genome(GenomeVariant {
accession: genome.accession.clone(),
gene_symbol: genome.gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, NaEdit::whole_entity_identity()),
}))
}
HgvsVariant::Tx(tx) => {
let dummy_interval = TxInterval::point(TxPos::new(1));
Ok(HgvsVariant::Tx(TxVariant {
accession: tx.accession.clone(),
gene_symbol: tx.gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, NaEdit::whole_entity_identity()),
}))
}
HgvsVariant::Rna(rna) => {
let dummy_interval = RnaInterval::point(RnaPos::new(1));
Ok(HgvsVariant::Rna(RnaVariant {
accession: rna.accession.clone(),
gene_symbol: rna.gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, NaEdit::whole_entity_identity()),
}))
}
HgvsVariant::Mt(mt) => {
let dummy_interval = GenomeInterval::point(GenomePos::new(1));
Ok(HgvsVariant::Mt(MtVariant {
accession: mt.accession.clone(),
gene_symbol: mt.gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, NaEdit::whole_entity_identity()),
}))
}
HgvsVariant::Circular(circular) => {
let dummy_interval = GenomeInterval::point(GenomePos::new(1));
Ok(HgvsVariant::Circular(CircularVariant {
accession: circular.accession.clone(),
gene_symbol: circular.gene_symbol.clone(),
loc_edit: LocEdit::new(dummy_interval, NaEdit::whole_entity_identity()),
}))
}
HgvsVariant::Protein(protein) => {
let dummy_interval = ProtInterval::point(ProtPos::new(AminoAcid::Met, 1));
Ok(HgvsVariant::Protein(ProteinVariant {
accession: protein.accession.clone(),
gene_symbol: protein.gene_symbol.clone(),
loc_edit: LocEdit::new(
dummy_interval,
crate::hgvs::edit::ProteinEdit::whole_protein_identity(),
),
}))
}
_ => Err(FerroError::Parse {
pos: 0,
msg: "Cannot create identity variant for this variant type".to_string(),
diagnostic: None,
}),
}
}
fn parse_mosaic_allele(input: &str) -> Result<HgvsVariant, FerroError> {
let input = input.trim();
let mut variants = Vec::with_capacity(2);
let mut start = 0;
let mut first_variant: Option<HgvsVariant> = None;
let mut first_accession: Option<Accession> = None;
let mut first_gene_symbol: Option<String> = None;
while start < input.len() {
let mut found_slash = None;
let bytes = input.as_bytes();
for i in start..input.len() {
if bytes[i] == b'/' {
if i + 1 < input.len() && bytes[i + 1] == b'/' {
return Err(FerroError::Parse {
pos: i,
msg: "Found '//' (chimeric) but expected '/' (mosaic)".to_string(),
diagnostic: None,
});
}
found_slash = Some(i);
break;
}
}
let end = found_slash.unwrap_or(input.len());
let variant_str = &input[start..end].trim();
let variant = if *variant_str == "=" {
match &first_variant {
Some(ref_var) => create_identity_variant_from(ref_var)?,
None => {
return Err(FerroError::Parse {
pos: start,
msg: "Cannot use '=' as first variant in mosaic notation".to_string(),
diagnostic: None,
});
}
}
} else {
match parse_single_variant(variant_str) {
Ok((remaining, var)) => {
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: 0,
msg: format!("Unexpected content after variant: '{}'", remaining),
diagnostic: None,
});
}
var
}
Err(_) if first_accession.is_some() => {
parse_variant_with_inherited_accession(
variant_str,
first_accession.as_ref().unwrap(),
first_gene_symbol.as_ref(),
)?
}
Err(e) => return Err(e),
}
};
if first_variant.is_none() {
first_variant = Some(variant.clone());
match &variant {
HgvsVariant::Cds(v) => {
first_accession = Some(v.accession.clone());
first_gene_symbol = v.gene_symbol.clone();
}
HgvsVariant::Genome(v) => {
first_accession = Some(v.accession.clone());
first_gene_symbol = v.gene_symbol.clone();
}
HgvsVariant::Tx(v) => {
first_accession = Some(v.accession.clone());
first_gene_symbol = v.gene_symbol.clone();
}
HgvsVariant::Rna(v) => {
first_accession = Some(v.accession.clone());
first_gene_symbol = v.gene_symbol.clone();
}
HgvsVariant::Protein(v) => {
first_accession = Some(v.accession.clone());
first_gene_symbol = v.gene_symbol.clone();
}
HgvsVariant::Mt(v) => {
first_accession = Some(v.accession.clone());
first_gene_symbol = v.gene_symbol.clone();
}
HgvsVariant::Circular(v) => {
first_accession = Some(v.accession.clone());
first_gene_symbol = v.gene_symbol.clone();
}
_ => {}
}
}
variants.push(variant);
if found_slash.is_some() {
start = end + 1;
} else {
break;
}
}
if variants.len() < 2 {
return Err(FerroError::Parse {
pos: 0,
msg: "Mosaic allele requires at least two variants".to_string(),
diagnostic: None,
});
}
Ok(HgvsVariant::Allele(AlleleVariant::new(
variants,
AllelePhase::Mosaic,
)))
}
fn parse_variant_with_inherited_accession(
input: &str,
accession: &Accession,
gene_symbol: Option<&String>,
) -> Result<HgvsVariant, FerroError> {
let input = input.trim();
if let Some(rest) = input.strip_prefix("c.") {
let (remaining, interval) = parse_cds_interval(rest).map_err(|e| FerroError::Parse {
pos: 2,
msg: format!("Failed to parse CDS interval: {:?}", e),
diagnostic: None,
})?;
let (remaining, edit) = parse_na_edit(remaining).map_err(|e| FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Failed to parse edit: {:?}", e),
diagnostic: None,
})?;
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Unexpected content: '{}'", remaining),
diagnostic: None,
});
}
Ok(HgvsVariant::Cds(CdsVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.cloned(),
loc_edit: LocEdit::new(interval, edit),
}))
} else if let Some(rest) = input.strip_prefix("g.") {
let (remaining, interval) = parse_genome_interval(rest).map_err(|e| FerroError::Parse {
pos: 2,
msg: format!("Failed to parse genome interval: {:?}", e),
diagnostic: None,
})?;
let (remaining, edit) = parse_na_edit(remaining).map_err(|e| FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Failed to parse edit: {:?}", e),
diagnostic: None,
})?;
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Unexpected content: '{}'", remaining),
diagnostic: None,
});
}
Ok(HgvsVariant::Genome(GenomeVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.cloned(),
loc_edit: LocEdit::new(interval, edit),
}))
} else if let Some(rest) = input.strip_prefix("p.") {
if let Ok((remaining, edit)) = parse_whole_protein_identity(rest) {
let dummy_pos = ProtPos::new(AminoAcid::Met, 1);
let dummy_interval = ProtInterval::point(dummy_pos);
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Unexpected content: '{}'", remaining),
diagnostic: None,
});
}
return Ok(HgvsVariant::Protein(ProteinVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.cloned(),
loc_edit: LocEdit::new(dummy_interval, edit),
}));
}
if let Ok((remaining, edit)) = parse_whole_protein_unknown(rest) {
let dummy_pos = ProtPos::new(AminoAcid::Met, 1);
let dummy_interval = ProtInterval::point(dummy_pos);
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Unexpected content: '{}'", remaining),
diagnostic: None,
});
}
return Ok(HgvsVariant::Protein(ProteinVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.cloned(),
loc_edit: LocEdit::new(dummy_interval, edit),
}));
}
if let Ok((remaining, edit)) = parse_whole_protein_no_protein(rest) {
let dummy_pos = ProtPos::new(AminoAcid::Met, 1);
let dummy_interval = ProtInterval::point(dummy_pos);
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Unexpected content: '{}'", remaining),
diagnostic: None,
});
}
return Ok(HgvsVariant::Protein(ProteinVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.cloned(),
loc_edit: LocEdit::new(dummy_interval, edit),
}));
}
let (remaining, interval) = parse_prot_interval(rest).map_err(|e| FerroError::Parse {
pos: 2,
msg: format!("Failed to parse protein interval: {:?}", e),
diagnostic: None,
})?;
let (remaining, edit) = parse_protein_edit(remaining).map_err(|e| FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Failed to parse protein edit: {:?}", e),
diagnostic: None,
})?;
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Unexpected content: '{}'", remaining),
diagnostic: None,
});
}
Ok(HgvsVariant::Protein(ProteinVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.cloned(),
loc_edit: LocEdit::new(interval, edit),
}))
} else if let Some(rest) = input.strip_prefix("n.") {
let (remaining, interval) = parse_tx_interval(rest).map_err(|e| FerroError::Parse {
pos: 2,
msg: format!("Failed to parse transcript interval: {:?}", e),
diagnostic: None,
})?;
let (remaining, edit) = parse_na_edit(remaining).map_err(|e| FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Failed to parse edit: {:?}", e),
diagnostic: None,
})?;
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Unexpected content: '{}'", remaining),
diagnostic: None,
});
}
Ok(HgvsVariant::Tx(TxVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.cloned(),
loc_edit: LocEdit::new(interval, edit),
}))
} else if let Some(rest) = input.strip_prefix("r.") {
let (remaining, interval) = parse_rna_interval(rest).map_err(|e| FerroError::Parse {
pos: 2,
msg: format!("Failed to parse RNA interval: {:?}", e),
diagnostic: None,
})?;
let (remaining, edit) = parse_na_edit(remaining).map_err(|e| FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Failed to parse edit: {:?}", e),
diagnostic: None,
})?;
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Unexpected content: '{}'", remaining),
diagnostic: None,
});
}
Ok(HgvsVariant::Rna(RnaVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.cloned(),
loc_edit: LocEdit::new(interval, edit),
}))
} else if let Some(rest) = input.strip_prefix("m.") {
let (remaining, interval) = parse_genome_interval(rest).map_err(|e| FerroError::Parse {
pos: 2,
msg: format!("Failed to parse mitochondrial interval: {:?}", e),
diagnostic: None,
})?;
let (remaining, edit) = parse_na_edit(remaining).map_err(|e| FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Failed to parse edit: {:?}", e),
diagnostic: None,
})?;
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Unexpected content: '{}'", remaining),
diagnostic: None,
});
}
Ok(HgvsVariant::Mt(MtVariant {
accession: accession.clone(),
gene_symbol: gene_symbol.cloned(),
loc_edit: LocEdit::new(interval, edit),
}))
} else {
Err(FerroError::Parse {
pos: 0,
msg: format!(
"Unknown variant type prefix: expected one of 'c.', 'g.', 'p.', 'n.', 'r.', 'm.' but found '{}'",
input.chars().take(3).collect::<String>()
),
diagnostic: None,
})
}
}
fn parse_chimeric_allele(input: &str) -> Result<HgvsVariant, FerroError> {
let input = input.trim();
let parts: Vec<&str> = input.split("//").collect();
if parts.len() < 2 {
return Err(FerroError::Parse {
pos: 0,
msg: "Chimeric allele requires '//' separator".to_string(),
diagnostic: None,
});
}
let mut variants = Vec::with_capacity(parts.len());
let mut first_variant: Option<HgvsVariant> = None;
for part in parts {
let part = part.trim();
if part.is_empty() {
return Err(FerroError::Parse {
pos: 0,
msg: "Empty variant in chimeric allele".to_string(),
diagnostic: None,
});
}
let variant = if part == "=" {
match &first_variant {
Some(ref_var) => create_identity_variant_from(ref_var)?,
None => {
return Err(FerroError::Parse {
pos: 0,
msg: "Cannot use '=' as first variant in chimeric notation".to_string(),
diagnostic: None,
});
}
}
} else {
let (remaining, var) = parse_single_variant(part)?;
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: 0,
msg: format!("Unexpected content after variant: '{}'", remaining),
diagnostic: None,
});
}
var
};
if first_variant.is_none() {
first_variant = Some(variant.clone());
}
variants.push(variant);
}
Ok(HgvsVariant::Allele(AlleleVariant::new(
variants,
AllelePhase::Chimeric,
)))
}
fn parse_unknown_phase_allele(input: &str) -> Result<HgvsVariant, FerroError> {
let input = input.trim();
if !input.starts_with('[') || !input.ends_with(']') {
return Err(FerroError::Parse {
pos: 0,
msg: "Unknown phase allele must be wrapped in brackets".to_string(),
diagnostic: None,
});
}
let content = &input[1..input.len() - 1];
let mut variants = Vec::with_capacity(2);
for part in content.split("(;)") {
let part = part.trim();
if part.is_empty() {
continue;
}
let (remaining, variant) = parse_single_variant(part)?;
if !remaining.trim().is_empty() {
return Err(FerroError::Parse {
pos: 0,
msg: format!("Unexpected content after variant: '{}'", remaining),
diagnostic: None,
});
}
variants.push(variant);
}
if variants.is_empty() {
return Err(FerroError::Parse {
pos: 0,
msg: "Empty allele".to_string(),
diagnostic: None,
});
}
Ok(HgvsVariant::Allele(AlleleVariant::new(
variants,
AllelePhase::Unknown,
)))
}
fn detect_allele_type(input: &str) -> Option<&'static str> {
let input = input.trim();
if input.starts_with('[') && input.contains("];[") {
return Some("trans");
}
if input.starts_with('[') && input.ends_with(']') && input.contains("(;)") {
return Some("unknown_phase");
}
if input.starts_with('[') && input.ends_with(']') && !input.contains("];[") {
let inner = &input[1..input.len() - 1];
if inner.contains(';') {
return Some("cis");
}
}
if input.contains("//") {
return Some("chimeric");
}
if input.contains('/') {
if let Some(slash_pos) = input.find('/') {
let before_slash = &input[..slash_pos];
if before_slash.contains(':') {
return Some("mosaic");
}
}
}
if input.contains(":r.") && input.contains("::") {
if let Some(fusion_pos) = input.find("::") {
let before = &input[..fusion_pos];
let after = &input[fusion_pos + 2..];
if before.contains(":r.") && (after.contains(":r.") || after.starts_with("r.")) {
return Some("rna_fusion");
}
}
}
None
}
fn parse_rna_fusion(input: &str) -> Result<HgvsVariant, FerroError> {
let input = input.trim();
let fusion_pos = input.find("::").ok_or_else(|| FerroError::Parse {
pos: 0,
msg: "RNA fusion requires '::' separator".to_string(),
diagnostic: None,
})?;
let five_prime_str = &input[..fusion_pos];
let three_prime_str = &input[fusion_pos + 2..];
let five_prime = parse_rna_fusion_breakpoint(five_prime_str)?;
let three_prime = parse_rna_fusion_breakpoint(three_prime_str)?;
Ok(HgvsVariant::RnaFusion(RnaFusionVariant::new(
five_prime,
three_prime,
)))
}
fn parse_rna_fusion_breakpoint(input: &str) -> Result<RnaFusionBreakpoint, FerroError> {
let input = input.trim();
let (remaining, accession) = parse_accession(input).map_err(|e| FerroError::Parse {
pos: 0,
msg: format!("Failed to parse fusion accession: {:?}", e),
diagnostic: None,
})?;
let (remaining, gene_symbol) = parse_gene_symbol(remaining).map_err(|e| FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Failed to parse gene symbol: {:?}", e),
diagnostic: None,
})?;
let remaining = remaining
.strip_prefix(":r.")
.ok_or_else(|| FerroError::Parse {
pos: input.len() - remaining.len(),
msg: "Expected ':r.' in RNA fusion breakpoint".to_string(),
diagnostic: None,
})?;
let (remaining, interval) = parse_rna_interval(remaining).map_err(|e| FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Failed to parse RNA interval: {:?}", e),
diagnostic: None,
})?;
if !remaining.is_empty() {
return Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Unexpected content after RNA interval: '{}'", remaining),
diagnostic: None,
});
}
Ok(RnaFusionBreakpoint {
accession,
gene_symbol,
interval,
})
}
pub fn parse_variant(input: &str) -> Result<HgvsVariant, FerroError> {
let input = input.trim();
if let Some(allele_type) = detect_allele_type(input) {
return match allele_type {
"cis" => parse_cis_allele(input),
"trans" => parse_trans_allele(input),
"mosaic" => parse_mosaic_allele(input),
"chimeric" => parse_chimeric_allele(input),
"unknown_phase" => parse_unknown_phase_allele(input),
"rna_fusion" => parse_rna_fusion(input),
_ => unreachable!(),
};
}
let (remaining, variant) = parse_single_variant(input)?;
if remaining.is_empty() {
Ok(variant)
} else {
Err(FerroError::Parse {
pos: input.len() - remaining.len(),
msg: format!("Unexpected trailing characters: '{}'", remaining),
diagnostic: None,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_genomic_substitution() {
let variant = parse_variant("NC_000001.11:g.12345A>G").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(
variant.accession().expect("Expected accession").full(),
"NC_000001.11"
);
assert_eq!(format!("{}", variant), "NC_000001.11:g.12345A>G");
}
#[test]
fn test_parse_genomic_deletion() {
let variant = parse_variant("NC_000001.11:g.12345_12350del").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(format!("{}", variant), "NC_000001.11:g.12345_12350del");
}
#[test]
fn test_parse_cds_substitution() {
let variant = parse_variant("NM_000088.3:c.459A>G").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.459A>G");
}
#[test]
fn test_parse_cds_deletion() {
let variant = parse_variant("NM_000088.3:c.459del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
}
#[test]
fn test_parse_cds_intronic() {
let variant = parse_variant("NM_000088.3:c.459+5G>A").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.459+5G>A");
}
#[test]
fn test_parse_cds_intronic_uncertain_offset() {
let variant = parse_variant("NM_000088.3:c.100+?del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.100+?del");
let variant = parse_variant("NM_000088.3:c.100-?del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.100-?del");
let variant = parse_variant("NM_000088.3:c.548-?_5193+?del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.548-?_5193+?del");
let variant = parse_variant("NM_000088.3:c.(?_-1)_328+?del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.(?_-1)_328+?del");
}
#[test]
fn test_parse_cds_inverted_intronic_range() {
let variant = parse_variant("NM_000088.3:c.85-47_84+48del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.85-47_84+48del");
let variant = parse_variant("NM_000088.3:c.86-46_85+47insA").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.86-46_85+47insA");
let variant = parse_variant("NM_000088.3:c.85-47_84+48delGCCAinsG").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.85-47_84+48delinsG");
}
#[test]
fn test_parse_cds_utr() {
let variant = parse_variant("NM_000088.3:c.-20G>A").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.-20G>A");
let variant = parse_variant("NM_000088.3:c.*50G>A").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.*50G>A");
}
#[test]
fn test_parse_whole_cds_unknown() {
let variant = parse_variant("NM_000088.3:c.?").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.?");
}
#[test]
fn test_parse_transcript_variant() {
let variant = parse_variant("NR_000001.1:n.100A>G").unwrap();
assert!(matches!(variant, HgvsVariant::Tx(_)));
}
#[test]
fn test_parse_protein_substitution() {
let variant = parse_variant("NP_000079.2:p.Val600Glu").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
}
#[test]
fn test_parse_protein_frameshift() {
let variant = parse_variant("NP_000079.2:p.Lys23fs").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
let variant = parse_variant("NP_000079.2:p.Arg97ProfsTer23").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.Arg97ProfsTer23");
}
#[test]
fn test_parse_insertion() {
let variant = parse_variant("NM_000088.3:c.459_460insATG").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.459_460insATG");
}
#[test]
fn test_parse_duplication() {
let variant = parse_variant("NM_000088.3:c.459dup").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
}
#[test]
fn test_parse_invalid() {
let result = parse_variant("invalid");
assert!(result.is_err());
}
#[test]
fn test_parse_with_trailing() {
let result = parse_variant("NC_000001.11:g.12345A>G extra");
assert!(result.is_err());
}
#[test]
fn test_parse_uncertain_genomic_deletion() {
let variant = parse_variant("NC_000001.11:g.(12345_12350)del").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(format!("{}", variant), "NC_000001.11:g.(12345)_(12350)del");
}
#[test]
fn test_parse_uncertain_cds_deletion() {
let variant = parse_variant("NM_000088.3:c.(100_200)del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.(100)_(200)del");
}
#[test]
fn test_parse_individual_uncertain_positions() {
let variant = parse_variant("NC_000001.11:g.(100)_(200)del").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(format!("{}", variant), "NC_000001.11:g.(100)_(200)del");
}
#[test]
fn test_parse_mixed_uncertainty() {
let variant = parse_variant("NC_000001.11:g.100_(200)del").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(format!("{}", variant), "NC_000001.11:g.100_(200)del");
}
#[test]
fn test_parse_rna_substitution() {
let variant = parse_variant("NM_000088.3:r.100a>g").unwrap();
assert!(matches!(variant, HgvsVariant::Rna(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:r.100a>g");
}
#[test]
fn test_parse_rna_deletion() {
let variant = parse_variant("NM_000088.3:r.100_200del").unwrap();
assert!(matches!(variant, HgvsVariant::Rna(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:r.100_200del");
}
#[test]
fn test_parse_rna_insertion() {
let variant = parse_variant("NM_000088.3:r.100_101insaug").unwrap();
assert!(matches!(variant, HgvsVariant::Rna(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:r.100_101insaug");
}
#[test]
fn test_parse_rna_with_offset() {
let variant = parse_variant("NM_000088.3:r.100+5a>g").unwrap();
assert!(matches!(variant, HgvsVariant::Rna(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:r.100+5a>g");
}
#[test]
fn test_parse_rna_uncertain() {
let variant = parse_variant("NM_000088.3:r.(100_200)del").unwrap();
assert!(matches!(variant, HgvsVariant::Rna(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:r.(100)_(200)del");
}
#[test]
fn test_parse_rna_utr() {
let variant = parse_variant("NM_000088.3:r.-14a>c").unwrap();
assert!(matches!(variant, HgvsVariant::Rna(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:r.-14a>c");
let variant = parse_variant("NM_000088.3:r.*41u>a").unwrap();
assert!(matches!(variant, HgvsVariant::Rna(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:r.*41u>a");
}
#[test]
fn test_parse_protein_identity() {
let variant = parse_variant("NP_000079.2:p.=").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.=");
}
#[test]
fn test_parse_protein_identity_predicted() {
let variant = parse_variant("NP_000079.2:p.(=)").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.(=)");
}
#[test]
fn test_parse_protein_position_identity() {
let variant = parse_variant("NP_000079.2:p.Val600=").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.Val600=");
}
#[test]
fn test_parse_protein_no_protein() {
let variant = parse_variant("NP_000079.2:p.0").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.0");
}
#[test]
fn test_parse_protein_no_protein_predicted() {
let variant = parse_variant("NP_000079.2:p.0?").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.0?");
}
#[test]
fn test_parse_protein_start_codon_substitution() {
let variant = parse_variant("NP_000079.2:p.Met1Leu").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.Met1Leu");
}
#[test]
fn test_parse_protein_start_codon_deletion() {
let variant = parse_variant("NP_000079.2:p.Met1del").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.Met1del");
}
#[test]
fn test_parse_protein_start_codon_extension() {
let variant = parse_variant("NP_000079.2:p.Met1ext-5").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.Met1ext-5");
}
#[test]
fn test_parse_protein_start_codon_uncertain() {
let variant = parse_variant("NP_000079.2:p.Met1?").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.Met1?");
}
#[test]
fn test_parse_protein_unusual_range() {
let variant = parse_variant("XP_005261260.1:p.Met1_?4").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "XP_005261260.1:p.Met1_Xaa4?");
let variant = parse_variant("NP_000079.2:p.Gly100_?105").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.Gly100_Xaa105?");
}
#[test]
fn test_parse_protein_uncertain_range_boundaries() {
let variant = parse_variant("NP_001005484.1:p.(?_Met1)_Glu2del").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_001005484.1:p.(?_Met1)_Glu2del");
let variant = parse_variant("NP_001005484.1:p.(Met1_?)_Glu2del").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_001005484.1:p.(Met1_?)_Glu2del");
let variant = parse_variant("NP_001005484.1:p.Met1_(Glu2_?)del").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_001005484.1:p.Met1_(Glu2_?)del");
let variant = parse_variant("NP_001005484.1:p.(?_Met1)_(Glu2_?)del").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(
format!("{}", variant),
"NP_001005484.1:p.(?_Met1)_(Glu2_?)del"
);
}
#[test]
fn test_parse_protein_nested_uncertain_positions() {
let variant = parse_variant("NP_000088.1:p.Met(?_1)_Glu2(?)").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
let variant = parse_variant("NP_000088.1:p.Ter(?)").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
let variant = parse_variant("NP_000088.1:p.His1811_Ter(?)").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
let variant = parse_variant("NP_000088.1:p.Met(?_5)del").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
let variant = parse_variant("NP_000088.1:p.Glu(3_?)del").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
}
#[test]
fn test_parse_conversion() {
let variant = parse_variant("NM_000088.3:c.100_200conNM_000089.1:c.50_150").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(
format!("{}", variant),
"NM_000088.3:c.100_200conNM_000089.1:c.50_150"
);
}
#[test]
fn test_parse_genomic_conversion() {
let variant = parse_variant("NC_000001.11:g.12345_12400conNC_000002.12:g.100_155").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(
format!("{}", variant),
"NC_000001.11:g.12345_12400conNC_000002.12:g.100_155"
);
}
#[test]
fn test_parse_position_based_conversion() {
let variant =
parse_variant("NC_000017.11:g.42522624_42522669con42536337_42536382").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(
format!("{}", variant),
"NC_000017.11:g.42522624_42522669con42536337_42536382"
);
}
#[test]
fn test_parse_repeat_exact() {
let variant = parse_variant("NM_000088.3:c.100CAG[12]").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.100CAG[12]");
}
#[test]
fn test_parse_repeat_range() {
let variant = parse_variant("NM_000088.3:c.100CAG[10_15]").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.100CAG[10_15]");
}
#[test]
fn test_parse_repeat_min_uncertain() {
let variant = parse_variant("NM_000088.3:c.100CAG[10_?]").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.100CAG[10_?]");
}
#[test]
fn test_parse_repeat_max_uncertain() {
let variant = parse_variant("NM_000088.3:c.100CAG[?_20]").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.100CAG[?_20]");
}
#[test]
fn test_parse_repeat_unknown() {
let variant = parse_variant("NM_000088.3:c.100CAG[?]").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.100CAG[?]");
}
#[test]
fn test_parse_unknown_end_position() {
let variant = parse_variant("NM_000088.3:c.1_?del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.1_?del");
}
#[test]
fn test_parse_unknown_start_position() {
let variant = parse_variant("NM_000088.3:c.?_100del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.?_100del");
}
#[test]
fn test_parse_genomic_unknown_position() {
let variant = parse_variant("NC_000001.11:g.100_?del").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(format!("{}", variant), "NC_000001.11:g.100_?del");
let variant = parse_variant("NC_000001.11:g.?_200del").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(format!("{}", variant), "NC_000001.11:g.?_200del");
}
#[test]
fn test_parse_transcript_unknown_position() {
let variant = parse_variant("NR_000001.1:n.100_?del").unwrap();
assert!(matches!(variant, HgvsVariant::Tx(_)));
assert_eq!(format!("{}", variant), "NR_000001.1:n.100_?del");
}
#[test]
fn test_parse_whole_cds_identity() {
let variant = parse_variant("NM_000088.3:c.=").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.=");
}
#[test]
fn test_parse_position_specific_identity() {
let variant = parse_variant("NM_000088.3:c.100=").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000088.3:c.100=");
}
#[test]
fn test_parse_whole_protein_unknown() {
let variant = parse_variant("NP_000079.2:p.?").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.?");
let variant = parse_variant("NP_000079.2:p.(?)").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.(?)");
}
#[test]
fn test_parse_position_specific_unknown() {
let variant = parse_variant("NP_000079.2:p.Met1?").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.Met1?");
}
#[test]
fn test_parse_predicted_protein_substitution() {
let variant = parse_variant("NP_000079.2:p.(Arg248Gln)").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.(Arg248Gln)");
}
#[test]
fn test_parse_predicted_protein_frameshift() {
let variant = parse_variant("NP_000079.2:p.(Lys23fs)").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.(Lys23fs)");
}
#[test]
fn test_parse_predicted_protein_deletion() {
let variant = parse_variant("NP_000079.2:p.(Val600del)").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
assert_eq!(format!("{}", variant), "NP_000079.2:p.(Val600del)");
}
#[test]
fn test_parse_complex_cds_interval_exon_deletion() {
let variant = parse_variant("NM_007294.4:c.(4185+1_4186-1)_(4357+1_4358-1)del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(
format!("{}", variant),
"NM_007294.4:c.(4185+1_4186-1)_(4357+1_4358-1)del"
);
}
#[test]
fn test_parse_complex_cds_interval_with_unknown() {
let variant = parse_variant("NM_000546.6:c.(?_-1)_(96+1_97-1)del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(
format!("{}", variant),
"NM_000546.6:c.(?_-1)_(96+1_97-1)del"
);
}
#[test]
fn test_parse_complex_genome_interval_uncertain_boundaries() {
let variant = parse_variant("NC_000017.11:g.(?_43044294)_(43125364_?)del").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(
format!("{}", variant),
"NC_000017.11:g.(?_43044294)_(43125364_?)del"
);
}
#[test]
fn test_parse_simple_uncertain_boundary() {
let variant = parse_variant("NM_000001.1:c.(4_6)_246del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000001.1:c.(4_6)_246del");
let variant = parse_variant("NM_000001.1:c.100_(200_250)del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000001.1:c.100_(200_250)del");
let variant = parse_variant("NM_000001.1:c.(4_6)_(200_250)del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_000001.1:c.(4_6)_(200_250)del");
}
#[test]
fn test_parse_cis_allele() {
let variant = parse_variant("[NM_000088.3:c.100A>G;NM_000088.3:c.200C>T]").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Cis);
assert_eq!(allele.variants.len(), 2);
}
assert_eq!(
format!("{}", variant),
"[NM_000088.3:c.100A>G;NM_000088.3:c.200C>T]"
);
}
#[test]
fn test_parse_trans_allele() {
let variant = parse_variant("[NM_000088.3:c.100A>G];[NM_000088.3:c.200C>T]").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Trans);
assert_eq!(allele.variants.len(), 2);
}
assert_eq!(
format!("{}", variant),
"[NM_000088.3:c.100A>G];[NM_000088.3:c.200C>T]"
);
}
#[test]
fn test_parse_mosaic_allele() {
let variant = parse_variant("NM_000088.3:c.100A>G/NM_000088.3:c.200C>T").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Mosaic);
assert_eq!(allele.variants.len(), 2);
}
assert_eq!(
format!("{}", variant),
"NM_000088.3:c.100A>G/NM_000088.3:c.200C>T"
);
}
#[test]
fn test_parse_mosaic_with_reference() {
let variant = parse_variant("NM_000088.3:c.123A>G/=").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Mosaic);
assert_eq!(allele.variants.len(), 2);
assert!(matches!(&allele.variants[0], HgvsVariant::Cds(_)));
if let HgvsVariant::Cds(cds) = &allele.variants[1] {
assert!(cds
.loc_edit
.edit
.inner()
.map(|e| e.is_whole_entity_identity())
.unwrap_or(false));
} else {
panic!("Expected CDS variant");
}
}
assert_eq!(
format!("{}", variant),
"NM_000088.3:c.123A>G/NM_000088.3:c.="
);
}
#[test]
fn test_parse_chimeric_with_reference() {
let variant = parse_variant("NM_000088.3:c.456C>T//=").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Chimeric);
assert_eq!(allele.variants.len(), 2);
}
assert_eq!(
format!("{}", variant),
"NM_000088.3:c.456C>T//NM_000088.3:c.="
);
}
#[test]
fn test_parse_mosaic_reference_first_fails() {
let result = parse_variant("=/NM_000088.3:c.123A>G");
assert!(result.is_err());
}
#[test]
fn test_parse_chimeric_allele() {
let variant = parse_variant("NM_000088.3:c.100A>G//NM_000088.3:c.200C>T").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Chimeric);
assert_eq!(allele.variants.len(), 2);
}
assert_eq!(
format!("{}", variant),
"NM_000088.3:c.100A>G//NM_000088.3:c.200C>T"
);
}
#[test]
fn test_parse_unknown_phase_allele() {
let variant = parse_variant("[NM_000088.3:c.100A>G(;)NM_000088.3:c.200C>T]").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Unknown);
assert_eq!(allele.variants.len(), 2);
}
assert_eq!(
format!("{}", variant),
"[NM_000088.3:c.100A>G(;)NM_000088.3:c.200C>T]"
);
}
#[test]
fn test_parse_unknown_phase_allele_multiple() {
let variant =
parse_variant("[NM_000088.3:c.100A>G(;)NM_000088.3:c.200C>T(;)NM_000088.3:c.300G>A]")
.unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Unknown);
assert_eq!(allele.variants.len(), 3);
}
}
#[test]
fn test_parse_cis_allele_shorthand() {
let variant = parse_variant("NM_000088.3:c.[145C>T;147C>G]").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Cis);
assert_eq!(allele.variants.len(), 2);
for v in &allele.variants {
assert_eq!(
v.accession().expect("Expected accession").full(),
"NM_000088.3"
);
}
}
assert_eq!(
format!("{}", variant),
"[NM_000088.3:c.145C>T;NM_000088.3:c.147C>G]"
);
}
#[test]
fn test_parse_unknown_phase_allele_shorthand() {
let variant = parse_variant("NM_000088.3:c.[145C>T(;)147C>G]").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Unknown);
assert_eq!(allele.variants.len(), 2);
}
assert_eq!(
format!("{}", variant),
"[NM_000088.3:c.145C>T(;)NM_000088.3:c.147C>G]"
);
}
#[test]
fn test_parse_mixed_phase_allele_shorthand() {
let variant = parse_variant("NM_000088.3:c.[123A>G;456C>T(;)789G>A]").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Unknown);
assert_eq!(allele.variants.len(), 3);
}
}
#[test]
fn test_parse_mixed_phase_allele_shorthand_complex() {
let variant =
parse_variant("NM_000088.3:c.[100A>G;200C>T;300G>A(;)400del;500dup]").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Unknown);
assert_eq!(allele.variants.len(), 5);
}
}
#[test]
fn test_parse_allele_with_interval_only_variant() {
let variant = parse_variant("NC_000023.10:g.[100_200;300_400dup]").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Cis);
assert_eq!(allele.variants.len(), 2);
if let HgvsVariant::Genome(v) = &allele.variants[0] {
assert!(matches!(
v.loc_edit.edit,
crate::hgvs::uncertainty::Mu::Certain(
crate::hgvs::edit::NaEdit::Identity { .. }
)
));
}
}
let variant = parse_variant(
"NC_000023.10:g.[(?_29619835)_(29843303_?);(?_30646799)_(30848980_?)dup]",
)
.unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.variants.len(), 2);
}
}
#[test]
fn test_parse_cis_allele_multiple_variants() {
let variant =
parse_variant("[NM_000088.3:c.100A>G;NM_000088.3:c.200C>T;NM_000088.3:c.300G>A]")
.unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Cis);
assert_eq!(allele.variants.len(), 3);
}
}
#[test]
fn test_parse_cis_allele_mixed_types() {
let variant = parse_variant("[NM_000088.3:c.100A>G;NP_000079.2:p.Val600Glu]").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Cis);
assert_eq!(allele.variants.len(), 2);
assert!(matches!(allele.variants[0], HgvsVariant::Cds(_)));
assert!(matches!(allele.variants[1], HgvsVariant::Protein(_)));
}
}
#[test]
fn test_parse_cis_allele_clinvar_format() {
let variant = parse_variant("NM_006876.2:[c.1168A>G;c.1217C>T]").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Cis);
assert_eq!(allele.variants.len(), 2);
assert!(
matches!(&allele.variants[0], HgvsVariant::Cds(v) if v.accession.to_string() == "NM_006876.2")
);
assert!(
matches!(&allele.variants[1], HgvsVariant::Cds(v) if v.accession.to_string() == "NM_006876.2")
);
}
}
#[test]
fn test_parse_cis_allele_clinvar_format_genomic() {
let variant = parse_variant("NC_000017.11:[g.43094927del;g.43095845dup]").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Cis);
assert_eq!(allele.variants.len(), 2);
assert!(matches!(&allele.variants[0], HgvsVariant::Genome(_)));
assert!(matches!(&allele.variants[1], HgvsVariant::Genome(_)));
}
}
#[test]
fn test_parse_unknown_phase_allele_clinvar_format() {
let variant = parse_variant("NM_000088.3:[c.145C>T(;)c.147C>G]").unwrap();
assert!(matches!(variant, HgvsVariant::Allele(_)));
if let HgvsVariant::Allele(allele) = &variant {
assert_eq!(allele.phase, AllelePhase::Unknown);
assert_eq!(allele.variants.len(), 2);
}
}
#[test]
fn test_parse_circular_substitution() {
let variant = parse_variant("NC_001416.1:o.100A>G").unwrap();
assert!(matches!(variant, HgvsVariant::Circular(_)));
assert_eq!(format!("{}", variant), "NC_001416.1:o.100A>G");
}
#[test]
fn test_parse_circular_duplication() {
let variant = parse_variant("NC_001416.1:o.100_200dup").unwrap();
assert!(matches!(variant, HgvsVariant::Circular(_)));
assert_eq!(format!("{}", variant), "NC_001416.1:o.100_200dup");
}
#[test]
fn test_parse_circular_deletion() {
let variant = parse_variant("NC_001416.1:o.100_200del").unwrap();
assert!(matches!(variant, HgvsVariant::Circular(_)));
assert_eq!(format!("{}", variant), "NC_001416.1:o.100_200del");
}
#[test]
fn test_parse_circular_insertion() {
let variant = parse_variant("NC_001416.1:o.100_101insATG").unwrap();
assert!(matches!(variant, HgvsVariant::Circular(_)));
assert_eq!(format!("{}", variant), "NC_001416.1:o.100_101insATG");
}
#[test]
fn test_parse_rna_fusion_basic() {
let variant = parse_variant("NM_152263.2:r.-115_775::NM_002609.3:r.1580_*1924").unwrap();
assert!(matches!(variant, HgvsVariant::RnaFusion(_)));
if let HgvsVariant::RnaFusion(fusion) = &variant {
assert_eq!(fusion.five_prime.accession.full(), "NM_152263.2");
assert_eq!(fusion.three_prime.accession.full(), "NM_002609.3");
}
assert_eq!(
format!("{}", variant),
"NM_152263.2:r.-115_775::NM_002609.3:r.1580_*1924"
);
}
#[test]
fn test_parse_rna_fusion_simple_positions() {
let variant = parse_variant("NM_000546.5:r.1_200::NM_000245.3:r.100_500").unwrap();
assert!(matches!(variant, HgvsVariant::RnaFusion(_)));
assert_eq!(
format!("{}", variant),
"NM_000546.5:r.1_200::NM_000245.3:r.100_500"
);
}
#[test]
fn test_parse_rna_fusion_single_positions() {
let variant = parse_variant("NM_000001.1:r.100::NM_000002.1:r.200").unwrap();
assert!(matches!(variant, HgvsVariant::RnaFusion(_)));
assert_eq!(
format!("{}", variant),
"NM_000001.1:r.100::NM_000002.1:r.200"
);
}
#[test]
fn test_parse_rna_fusion_epcam_msh2() {
let variant = parse_variant("NM_002354.2:r.1_3469::NM_000251.2:r.3397_6871").unwrap();
assert!(matches!(variant, HgvsVariant::RnaFusion(_)));
if let HgvsVariant::RnaFusion(fusion) = &variant {
assert_eq!(&*fusion.five_prime.accession.prefix, "NM");
assert_eq!(&*fusion.three_prime.accession.prefix, "NM");
}
}
#[test]
fn test_parse_nested_repeat_notation() {
let variant = parse_variant("NM_000088.3:c.100A[6][1]").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
let variant = parse_variant("NM_000088.3:c.100[4][5][6]").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
}
#[test]
fn test_parse_complex_insertion_with_parts() {
let variant = parse_variant("NM_000088.3:c.100_101ins[A[10];T]").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
let variant = parse_variant("NM_000088.3:c.419_420ins[T;401_419]").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
}
#[test]
fn test_parse_uniprot_protein_variant() {
let variant = parse_variant("P54802:p.Phe48Leu").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
if let HgvsVariant::Protein(prot) = &variant {
assert_eq!(&*prot.accession.prefix, "P");
assert_eq!(&*prot.accession.number, "54802");
}
let variant = parse_variant("Q8TAM1:p.Arg34Pro").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
}
#[test]
fn test_parse_uniprot_10char_accession() {
let variant = parse_variant("A0A024R1R8:p.Val100Glu").unwrap();
assert!(matches!(variant, HgvsVariant::Protein(_)));
if let HgvsVariant::Protein(prot) = &variant {
assert_eq!(&*prot.accession.prefix, "A");
assert_eq!(&*prot.accession.number, "0A024R1R8");
assert_eq!(prot.accession.full(), "A0A024R1R8");
}
}
#[test]
fn test_parse_inversion_with_length() {
let variant = parse_variant("NM_000088.3:c.274_276inv3").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
}
#[test]
fn test_parse_inversion_with_sequence() {
let variant = parse_variant("NM_000088.3:c.274_276invATG").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
}
#[test]
fn test_parse_uncertain_duplication_range() {
let variant = parse_variant("NM_000088.3:c.722_742dup(731_741)").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
let variant = parse_variant("NM_000088.3:c.100dup?").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
let variant = parse_variant("NM_000088.3:c.100dup(?)").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
}
#[test]
fn test_parse_assembly_chromosome_notation() {
let variant = parse_variant("GRCh38(chr1):g.100A>G").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
if let HgvsVariant::Genome(gen) = &variant {
assert!(gen.accession.is_assembly_ref());
assert_eq!(gen.accession.assembly.as_deref(), Some("GRCh38"));
assert_eq!(gen.accession.chromosome.as_deref(), Some("chr1"));
}
let variant = parse_variant("GRCh37(chrX):g.15000del").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
}
#[test]
fn test_parse_circular_dna_notation() {
let variant = parse_variant("NC_012920.1:o.100A>G").unwrap();
assert!(matches!(variant, HgvsVariant::Circular(_)));
}
#[test]
fn test_parse_complex_uncertain_positions() {
let variant = parse_variant("NM_000088.3:c.(?_-1)_328+?del").unwrap();
assert_eq!(format!("{}", variant), "NM_000088.3:c.(?_-1)_328+?del");
let variant = parse_variant("NM_000088.3:c.(?_1)_5074+?del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
}
#[test]
fn test_parse_unknown_position_patterns() {
let variant = parse_variant("NM_001412270.1:c.?dup").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_001412270.1:c.?dup");
let variant = parse_variant("NM_007294.3:c.?-232_4484+?del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_007294.3:c.?-232_4484+?del");
let variant = parse_variant("LRG_292t1:c.?-232_4484+?del").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "LRG_292t1:c.?-232_4484+?del");
}
#[test]
fn test_parse_delins_external_reference() {
let variant =
parse_variant("NC_000008.11:g.86587460_86650711delins[KY923049.1:g.1_466]").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(
format!("{}", variant),
"NC_000008.11:g.86587460_86650711delins[KY923049.1:g.1_466]"
);
let variant =
parse_variant("NM_019098.4:c.904-2824_1782-8208delins[KY923049.1:g.1_466]").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(
format!("{}", variant),
"NM_019098.4:c.904-2824_1782-8208delins[KY923049.1:g.1_466]"
);
}
#[test]
fn test_parse_complex_delins_array() {
let variant = parse_variant("NC_000007.14:g.45043702_46521017delins[AGAAGGAAATTT;45310743_46521014;45043709_45310738inv]").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(format!("{}", variant), "NC_000007.14:g.45043702_46521017delins[AGAAGGAAATTT;45310743_46521014;45043709_45310738inv]");
let variant =
parse_variant("NC_000016.9:g.78179358_78219143delins[78185355_78199419inv]").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(
format!("{}", variant),
"NC_000016.9:g.78179358_78219143delins[78185355_78199419inv]"
);
}
#[test]
fn test_parse_delins_sequence_repeat_range() {
let variant =
parse_variant("NC_000004.12:g.39348425_39348479delinsAAAGG[400_2000]").unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(
format!("{}", variant),
"NC_000004.12:g.39348425_39348479delinsAAAGG[400_2000]"
);
}
#[test]
fn test_parse_complex_insertion_with_external() {
let variant = parse_variant("NM_017635.5:c.438_439ins[TCTT;KT192064.1:1_310]").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(
format!("{}", variant),
"NM_017635.5:c.438_439ins[TCTT;KT192064.1:1_310]"
);
}
#[test]
fn test_parse_telomere_qter_patterns() {
let variant =
parse_variant("NC_000009.12:g.12891379_qterdelins[T;NC_000020.11:g.47204889_qterinv]")
.unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(
format!("{}", variant),
"NC_000009.12:g.12891379_qterdelins[T;NC_000020.11:g.47204889_qterinv]"
);
let variant = parse_variant(
"NC_000013.10:g.114819939_qterdelins[96729864_114814234inv;96735632_104289803]",
)
.unwrap();
assert!(matches!(variant, HgvsVariant::Genome(_)));
assert_eq!(
format!("{}", variant),
"NC_000013.10:g.114819939_qterdelins[96729864_114814234inv;96735632_104289803]"
);
}
#[test]
fn test_parse_predicted_substitution_in_parens() {
let variant = parse_variant("NM_002016.2:c.(9740C>A)").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_002016.2:c.9740(C>A)");
let variant = parse_variant("NM_006767.4:c.(742G>A)").unwrap();
assert!(matches!(variant, HgvsVariant::Cds(_)));
assert_eq!(format!("{}", variant), "NM_006767.4:c.742(G>A)");
}
#[test]
fn test_parse_uncertain_intron_exon_ranges() {
}
#[test]
fn test_parse_reverse_order_intervals() {
let variant =
parse_variant("NC_000011.10:g.5238138_5153222insTATTT").expect("should parse");
assert_eq!(
variant.to_string(),
"NC_000011.10:g.5238138_5153222insTATTT"
);
let variant = parse_variant("NC_000012.11:g.110593351_110576466dup").expect("should parse");
assert_eq!(variant.to_string(), "NC_000012.11:g.110593351_110576466dup");
let variant = parse_variant("NC_000016.9:g.23634775_23621090dup").expect("should parse");
assert_eq!(variant.to_string(), "NC_000016.9:g.23634775_23621090dup");
}
#[test]
fn test_parse_position_only_cds() {
let variant = parse_variant("NM_173651.4:c.5238_5240").expect("should parse");
assert_eq!(variant.to_string(), "NM_173651.4:c.5238_5240");
let variant = parse_variant("NM_134428.2:c.1486_1487").expect("should parse");
assert_eq!(variant.to_string(), "NM_134428.2:c.1486_1487");
let variant = parse_variant("NM_007375.3:c.*697").expect("should parse");
assert_eq!(variant.to_string(), "NM_007375.3:c.*697");
}
#[test]
fn test_parse_unknown_position_insertion() {
let _result = parse_variant("LRG_308:g.?_?ins(23632682_23625413)_(23625324_23619334)");
let _result = parse_variant("NG_007406.1:g.?_?ins(23632682_23625413)_(23625324_23619334)");
}
#[test]
fn test_parse_tx_downstream_positions() {
let variant = parse_variant("NR_033294.1:n.*5C>G").expect("should parse");
assert_eq!(variant.to_string(), "NR_033294.1:n.*5C>G");
let variant = parse_variant("NR_033294.1:n.*5+10C>G").expect("should parse");
assert_eq!(variant.to_string(), "NR_033294.1:n.*5+10C>G");
}
}