use super::edit::{NaEdit, ProteinEdit};
use super::interval::{CdsInterval, GenomeInterval, ProtInterval, RnaInterval, TxInterval};
use super::uncertainty::Mu;
use serde::{Deserialize, Serialize};
use std::fmt;
use std::sync::Arc;
mod interned {
use std::sync::{Arc, OnceLock};
macro_rules! interned_prefix {
($name:ident, $value:literal) => {
pub fn $name() -> Arc<str> {
static INSTANCE: OnceLock<Arc<str>> = OnceLock::new();
INSTANCE.get_or_init(|| Arc::from($value)).clone()
}
};
}
interned_prefix!(nc, "NC");
interned_prefix!(ng, "NG");
interned_prefix!(nt, "NT");
interned_prefix!(nw, "NW");
interned_prefix!(nm, "NM");
interned_prefix!(nr, "NR");
interned_prefix!(np, "NP");
interned_prefix!(xm, "XM");
interned_prefix!(xr, "XR");
interned_prefix!(xp, "XP");
interned_prefix!(enst, "ENST");
interned_prefix!(ensg, "ENSG");
interned_prefix!(ensp, "ENSP");
interned_prefix!(ense, "ENSE");
interned_prefix!(ensr, "ENSR");
interned_prefix!(lrg, "LRG");
interned_prefix!(empty, "");
#[inline]
pub fn get_prefix(s: &str) -> Option<Arc<str>> {
match s {
"NC" => Some(nc()),
"NG" => Some(ng()),
"NT" => Some(nt()),
"NW" => Some(nw()),
"NM" => Some(nm()),
"NR" => Some(nr()),
"NP" => Some(np()),
"XM" => Some(xm()),
"XR" => Some(xr()),
"XP" => Some(xp()),
"ENST" => Some(enst()),
"ENSG" => Some(ensg()),
"ENSP" => Some(ensp()),
"ENSE" => Some(ense()),
"ENSR" => Some(ensr()),
"LRG" => Some(lrg()),
"" => Some(empty()),
_ => None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Accession {
pub prefix: Arc<str>,
pub number: Arc<str>,
pub version: Option<u32>,
#[serde(default)]
pub ensembl_style: bool,
#[serde(default)]
pub assembly: Option<Arc<str>>,
#[serde(default)]
pub chromosome: Option<Arc<str>>,
}
impl Accession {
#[inline]
fn intern_prefix(prefix: impl Into<Arc<str>>) -> Arc<str> {
let prefix: Arc<str> = prefix.into();
interned::get_prefix(&prefix).unwrap_or(prefix)
}
pub fn new(
prefix: impl Into<Arc<str>>,
number: impl Into<Arc<str>>,
version: Option<u32>,
) -> Self {
let prefix = Self::intern_prefix(prefix);
let ensembl_style = Self::is_ensembl_prefix(&prefix);
Self {
prefix,
number: number.into(),
version,
ensembl_style,
assembly: None,
chromosome: None,
}
}
pub fn with_style(
prefix: impl Into<Arc<str>>,
number: impl Into<Arc<str>>,
version: Option<u32>,
ensembl_style: bool,
) -> Self {
Self {
prefix: Self::intern_prefix(prefix),
number: number.into(),
version,
ensembl_style,
assembly: None,
chromosome: None,
}
}
pub fn from_assembly(assembly: impl Into<Arc<str>>, chromosome: impl Into<Arc<str>>) -> Self {
Self {
prefix: interned::empty(),
number: interned::empty(),
version: None,
ensembl_style: false,
assembly: Some(assembly.into()),
chromosome: Some(chromosome.into()),
}
}
pub fn is_assembly_ref(&self) -> bool {
self.assembly.is_some() && self.chromosome.is_some()
}
pub fn is_ensembl_prefix(prefix: &str) -> bool {
matches!(prefix, "ENST" | "ENSG" | "ENSP" | "ENSE" | "ENSR")
}
pub fn is_ensembl(&self) -> bool {
self.ensembl_style || Self::is_ensembl_prefix(&self.prefix)
}
pub fn validate_ensembl(&self) -> bool {
if !self.is_ensembl() {
return true; }
let digit_count = self.number.len();
(11..=15).contains(&digit_count) && self.number.chars().all(|c| c.is_ascii_digit())
}
pub fn inferred_variant_type(&self) -> Option<&'static str> {
match &*self.prefix {
"NC" | "NG" | "NT" | "NW" => Some("g"),
"NM" => Some("c"),
"NR" => Some("n"),
"NP" => Some("p"),
"ENST" => Some("c"),
"ENSG" => Some("g"),
"ENSP" => Some("p"),
"LRG" => Some("g"),
p if p.len() == 1 && p.chars().next().is_some_and(|c| c.is_ascii_uppercase()) => {
Some("p")
}
_ => None,
}
}
pub fn is_uniprot(&self) -> bool {
self.prefix.len() == 1
&& self
.prefix
.chars()
.next()
.is_some_and(|c| c.is_ascii_uppercase())
&& self.number.len() == 5
&& self.number.chars().all(|c| c.is_ascii_alphanumeric())
}
pub fn base(&self) -> String {
if let (Some(assembly), Some(chromosome)) = (&self.assembly, &self.chromosome) {
return format!("{}({})", assembly, chromosome);
}
if self.ensembl_style {
format!("{}{}", self.prefix, self.number)
} else {
format!("{}_{}", self.prefix, self.number)
}
}
pub fn full(&self) -> String {
if self.is_assembly_ref() {
return self.base();
}
match self.version {
Some(v) => {
if self.ensembl_style {
format!("{}{}.{}", self.prefix, self.number, v)
} else {
format!("{}_{}.{}", self.prefix, self.number, v)
}
}
None => self.base(),
}
}
}
impl fmt::Display for Accession {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.full())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct LocEdit<L, E> {
pub location: L,
pub edit: Mu<E>,
}
impl<L, E> LocEdit<L, E> {
pub fn new(location: L, edit: E) -> Self {
Self {
location,
edit: Mu::Certain(edit),
}
}
pub fn new_predicted(location: L, edit: E) -> Self {
Self {
location,
edit: Mu::Uncertain(edit),
}
}
pub fn with_uncertainty(location: L, edit: Mu<E>) -> Self {
Self { location, edit }
}
}
impl<L: fmt::Display, E: fmt::Display> fmt::Display for LocEdit<L, E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}{}", self.location, self.edit)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum AllelePhase {
Cis,
Trans,
Unknown,
Mosaic,
Chimeric,
}
impl fmt::Display for AllelePhase {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
AllelePhase::Cis => write!(f, "cis"),
AllelePhase::Trans => write!(f, "trans"),
AllelePhase::Unknown => write!(f, "unknown"),
AllelePhase::Mosaic => write!(f, "mosaic"),
AllelePhase::Chimeric => write!(f, "chimeric"),
}
}
}
impl AllelePhase {
pub fn is_mosaic(&self) -> bool {
matches!(self, AllelePhase::Mosaic)
}
pub fn is_chimeric(&self) -> bool {
matches!(self, AllelePhase::Chimeric)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct AlleleVariant {
pub variants: Vec<HgvsVariant>,
pub phase: AllelePhase,
}
impl AlleleVariant {
pub fn new(variants: Vec<HgvsVariant>, phase: AllelePhase) -> Self {
Self { variants, phase }
}
pub fn cis(variants: Vec<HgvsVariant>) -> Self {
Self::new(variants, AllelePhase::Cis)
}
pub fn trans(variants: Vec<HgvsVariant>) -> Self {
Self::new(variants, AllelePhase::Trans)
}
pub fn mosaic(variants: Vec<HgvsVariant>) -> Self {
Self::new(variants, AllelePhase::Mosaic)
}
pub fn chimeric(variants: Vec<HgvsVariant>) -> Self {
Self::new(variants, AllelePhase::Chimeric)
}
pub fn unknown_phase(variants: Vec<HgvsVariant>) -> Self {
Self::new(variants, AllelePhase::Unknown)
}
}
impl fmt::Display for AlleleVariant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.phase {
AllelePhase::Cis => {
write!(f, "[")?;
for (i, v) in self.variants.iter().enumerate() {
if i > 0 {
write!(f, ";")?;
}
write!(f, "{}", v)?;
}
write!(f, "]")
}
AllelePhase::Trans => {
for (i, v) in self.variants.iter().enumerate() {
if i > 0 {
write!(f, ";")?;
}
write!(f, "[{}]", v)?;
}
Ok(())
}
AllelePhase::Unknown => {
write!(f, "[")?;
for (i, v) in self.variants.iter().enumerate() {
if i > 0 {
write!(f, "(;)")?;
}
write!(f, "{}", v)?;
}
write!(f, "]")
}
AllelePhase::Mosaic => {
for (i, v) in self.variants.iter().enumerate() {
if i > 0 {
write!(f, "/")?;
}
write!(f, "{}", v)?;
}
Ok(())
}
AllelePhase::Chimeric => {
for (i, v) in self.variants.iter().enumerate() {
if i > 0 {
write!(f, "//")?;
}
write!(f, "{}", v)?;
}
Ok(())
}
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum HgvsVariant {
Genome(GenomeVariant),
Cds(CdsVariant),
Tx(TxVariant),
Rna(RnaVariant),
Protein(ProteinVariant),
Mt(MtVariant),
Circular(CircularVariant),
RnaFusion(RnaFusionVariant),
Allele(AlleleVariant),
NullAllele,
UnknownAllele,
}
impl HgvsVariant {
pub fn accession(&self) -> Option<&Accession> {
match self {
HgvsVariant::Genome(v) => Some(&v.accession),
HgvsVariant::Cds(v) => Some(&v.accession),
HgvsVariant::Tx(v) => Some(&v.accession),
HgvsVariant::Rna(v) => Some(&v.accession),
HgvsVariant::Protein(v) => Some(&v.accession),
HgvsVariant::Mt(v) => Some(&v.accession),
HgvsVariant::Circular(v) => Some(&v.accession),
HgvsVariant::RnaFusion(v) => Some(&v.five_prime.accession),
HgvsVariant::Allele(a) => a.variants.first().and_then(|v| v.accession()),
HgvsVariant::NullAllele | HgvsVariant::UnknownAllele => None,
}
}
pub fn variant_type(&self) -> &'static str {
match self {
HgvsVariant::Genome(_) => "g",
HgvsVariant::Cds(_) => "c",
HgvsVariant::Tx(_) => "n",
HgvsVariant::Rna(_) => "r",
HgvsVariant::Protein(_) => "p",
HgvsVariant::Mt(_) => "m",
HgvsVariant::Circular(_) => "o",
HgvsVariant::RnaFusion(_) => "r::r",
HgvsVariant::Allele(_) => "allele",
HgvsVariant::NullAllele => "null",
HgvsVariant::UnknownAllele => "unknown",
}
}
pub fn is_allele(&self) -> bool {
matches!(self, HgvsVariant::Allele(_))
}
pub fn is_null_allele(&self) -> bool {
matches!(self, HgvsVariant::NullAllele)
}
pub fn is_unknown_allele(&self) -> bool {
matches!(self, HgvsVariant::UnknownAllele)
}
}
impl fmt::Display for HgvsVariant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
HgvsVariant::Genome(v) => write!(f, "{}", v),
HgvsVariant::Cds(v) => write!(f, "{}", v),
HgvsVariant::Tx(v) => write!(f, "{}", v),
HgvsVariant::Rna(v) => write!(f, "{}", v),
HgvsVariant::Protein(v) => write!(f, "{}", v),
HgvsVariant::Mt(v) => write!(f, "{}", v),
HgvsVariant::Circular(v) => write!(f, "{}", v),
HgvsVariant::RnaFusion(v) => write!(f, "{}", v),
HgvsVariant::Allele(a) => write!(f, "{}", a),
HgvsVariant::NullAllele => write!(f, "0"),
HgvsVariant::UnknownAllele => write!(f, "?"),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct GenomeVariant {
pub accession: Accession,
pub gene_symbol: Option<String>,
pub loc_edit: LocEdit<GenomeInterval, NaEdit>,
}
impl fmt::Display for GenomeVariant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(edit) = self.loc_edit.edit.inner() {
if edit.is_whole_entity() {
return write!(f, "{}:g.{}", self.accession, self.loc_edit.edit);
}
}
write!(f, "{}:g.{}", self.accession, self.loc_edit)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct CdsVariant {
pub accession: Accession,
pub gene_symbol: Option<String>,
pub loc_edit: LocEdit<CdsInterval, NaEdit>,
}
impl fmt::Display for CdsVariant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(edit) = self.loc_edit.edit.inner() {
if edit.is_whole_entity_identity() || edit.is_whole_entity_unknown() {
return write!(f, "{}:c.{}", self.accession, self.loc_edit.edit);
}
}
write!(f, "{}:c.{}", self.accession, self.loc_edit)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct TxVariant {
pub accession: Accession,
pub gene_symbol: Option<String>,
pub loc_edit: LocEdit<TxInterval, NaEdit>,
}
impl fmt::Display for TxVariant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:n.{}", self.accession, self.loc_edit)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct RnaVariant {
pub accession: Accession,
pub gene_symbol: Option<String>,
pub loc_edit: LocEdit<RnaInterval, NaEdit>,
}
impl fmt::Display for RnaVariant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.loc_edit.edit {
crate::hgvs::uncertainty::Mu::Certain(edit) => {
if edit.is_whole_entity() {
write!(f, "{}:r.{}", self.accession, edit.to_rna_string())
} else {
write!(
f,
"{}:r.{}{}",
self.accession,
self.loc_edit.location,
edit.to_rna_string()
)
}
}
crate::hgvs::uncertainty::Mu::Uncertain(edit) => {
if edit.is_whole_entity() {
write!(f, "{}:r.({})", self.accession, edit.to_rna_string())
} else {
write!(
f,
"{}:r.{}({})",
self.accession,
self.loc_edit.location,
edit.to_rna_string()
)
}
}
crate::hgvs::uncertainty::Mu::Unknown => {
write!(f, "{}:r.?", self.accession)
}
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ProteinVariant {
pub accession: Accession,
pub gene_symbol: Option<String>,
pub loc_edit: LocEdit<ProtInterval, ProteinEdit>,
}
impl fmt::Display for ProteinVariant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(edit) = self.loc_edit.edit.inner() {
if edit.is_whole_protein_identity()
|| edit.is_no_protein()
|| edit.is_whole_protein_unknown()
{
return write!(f, "{}:p.{}", self.accession, self.loc_edit.edit);
}
}
if self.loc_edit.edit.is_uncertain() {
if let Some(edit) = self.loc_edit.edit.inner() {
return write!(
f,
"{}:p.({}{})",
self.accession, self.loc_edit.location, edit
);
}
}
write!(f, "{}:p.{}", self.accession, self.loc_edit)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct MtVariant {
pub accession: Accession,
pub gene_symbol: Option<String>,
pub loc_edit: LocEdit<GenomeInterval, NaEdit>,
}
impl fmt::Display for MtVariant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:m.{}", self.accession, self.loc_edit)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct CircularVariant {
pub accession: Accession,
pub gene_symbol: Option<String>,
pub loc_edit: LocEdit<GenomeInterval, NaEdit>,
}
impl fmt::Display for CircularVariant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:o.{}", self.accession, self.loc_edit)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct RnaFusionBreakpoint {
pub accession: Accession,
pub gene_symbol: Option<String>,
pub interval: RnaInterval,
}
impl fmt::Display for RnaFusionBreakpoint {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(ref gene) = self.gene_symbol {
write!(f, "{}({}):r.{}", self.accession, gene, self.interval)
} else {
write!(f, "{}:r.{}", self.accession, self.interval)
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct RnaFusionVariant {
pub five_prime: RnaFusionBreakpoint,
pub three_prime: RnaFusionBreakpoint,
}
impl RnaFusionVariant {
pub fn new(five_prime: RnaFusionBreakpoint, three_prime: RnaFusionBreakpoint) -> Self {
Self {
five_prime,
three_prime,
}
}
}
impl fmt::Display for RnaFusionVariant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}::{}", self.five_prime, self.three_prime)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::hgvs::edit::Base;
use crate::hgvs::location::GenomePos;
#[test]
fn test_accession_display() {
let acc = Accession::new("NM", "000088", Some(3));
assert_eq!(format!("{}", acc), "NM_000088.3");
let acc_no_version = Accession::new("NM", "000088", None);
assert_eq!(format!("{}", acc_no_version), "NM_000088");
}
#[test]
fn test_genome_variant_display() {
let variant = GenomeVariant {
accession: Accession::new("NC", "000001", Some(11)),
gene_symbol: None,
loc_edit: LocEdit::new(
GenomeInterval::point(GenomePos::new(12345)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
};
assert_eq!(format!("{}", variant), "NC_000001.11:g.12345A>G");
}
#[test]
fn test_cds_variant_display() {
use crate::hgvs::location::CdsPos;
let variant = CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(459)),
NaEdit::Deletion {
sequence: None,
length: None,
},
),
};
assert_eq!(format!("{}", variant), "NM_000088.3:c.459del");
}
#[test]
fn test_allele_cis_display() {
use crate::hgvs::location::CdsPos;
let var1 = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(100)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
});
let var2 = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(200)),
NaEdit::Substitution {
reference: Base::C,
alternative: Base::T,
},
),
});
let allele = AlleleVariant::cis(vec![var1, var2]);
let allele_variant = HgvsVariant::Allele(allele);
assert_eq!(
format!("{}", allele_variant),
"[NM_000088.3:c.100A>G;NM_000088.3:c.200C>T]"
);
}
#[test]
fn test_allele_trans_display() {
use crate::hgvs::location::CdsPos;
let var1 = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(100)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
});
let var2 = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(200)),
NaEdit::Substitution {
reference: Base::C,
alternative: Base::T,
},
),
});
let allele = AlleleVariant::trans(vec![var1, var2]);
let allele_variant = HgvsVariant::Allele(allele);
assert_eq!(
format!("{}", allele_variant),
"[NM_000088.3:c.100A>G];[NM_000088.3:c.200C>T]"
);
}
#[test]
fn test_allele_accession() {
use crate::hgvs::location::CdsPos;
let var1 = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(100)),
NaEdit::Deletion {
sequence: None,
length: None,
},
),
});
let allele = AlleleVariant::cis(vec![var1]);
let allele_variant = HgvsVariant::Allele(allele);
assert_eq!(
&*allele_variant
.accession()
.expect("Expected accession")
.prefix,
"NM"
);
assert!(allele_variant.is_allele());
}
#[test]
fn test_allele_mosaic_display() {
use crate::hgvs::location::CdsPos;
let var1 = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(100)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
});
let var2 = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(200)),
NaEdit::Substitution {
reference: Base::C,
alternative: Base::T,
},
),
});
let allele = AlleleVariant::mosaic(vec![var1, var2]);
let allele_variant = HgvsVariant::Allele(allele);
assert_eq!(
format!("{}", allele_variant),
"NM_000088.3:c.100A>G/NM_000088.3:c.200C>T"
);
}
#[test]
fn test_allele_chimeric_display() {
use crate::hgvs::location::CdsPos;
let var1 = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(100)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
});
let var2 = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(200)),
NaEdit::Substitution {
reference: Base::C,
alternative: Base::T,
},
),
});
let allele = AlleleVariant::chimeric(vec![var1, var2]);
let allele_variant = HgvsVariant::Allele(allele);
assert_eq!(
format!("{}", allele_variant),
"NM_000088.3:c.100A>G//NM_000088.3:c.200C>T"
);
}
#[test]
fn test_allele_phase_variants() {
assert!(!AllelePhase::Cis.is_mosaic());
assert!(!AllelePhase::Trans.is_mosaic());
assert!(!AllelePhase::Unknown.is_mosaic());
assert!(AllelePhase::Mosaic.is_mosaic());
assert!(!AllelePhase::Chimeric.is_mosaic());
assert!(!AllelePhase::Cis.is_chimeric());
assert!(!AllelePhase::Trans.is_chimeric());
assert!(!AllelePhase::Unknown.is_chimeric());
assert!(!AllelePhase::Mosaic.is_chimeric());
assert!(AllelePhase::Chimeric.is_chimeric());
}
#[test]
fn test_accession_new() {
let acc = Accession::new("NM", "000088", Some(3));
assert_eq!(&*acc.prefix, "NM");
assert_eq!(&*acc.number, "000088");
assert_eq!(acc.version, Some(3));
assert!(!acc.ensembl_style);
}
#[test]
fn test_accession_with_style() {
let acc = Accession::with_style("ENST", "00000012345", Some(1), true);
assert!(acc.ensembl_style);
assert_eq!(format!("{}", acc), "ENST00000012345.1");
}
#[test]
fn test_accession_from_assembly() {
let acc = Accession::from_assembly("GRCh37", "chr1");
assert!(acc.is_assembly_ref());
assert_eq!(format!("{}", acc), "GRCh37(chr1)");
}
#[test]
fn test_accession_is_ensembl_prefix() {
assert!(Accession::is_ensembl_prefix("ENST"));
assert!(Accession::is_ensembl_prefix("ENSG"));
assert!(Accession::is_ensembl_prefix("ENSP"));
assert!(Accession::is_ensembl_prefix("ENSE"));
assert!(Accession::is_ensembl_prefix("ENSR"));
assert!(!Accession::is_ensembl_prefix("NM"));
assert!(!Accession::is_ensembl_prefix("NC"));
}
#[test]
fn test_accession_is_ensembl() {
let ensembl = Accession::new("ENST", "00000012345", Some(1));
assert!(ensembl.is_ensembl());
let refseq = Accession::new("NM", "000088", Some(3));
assert!(!refseq.is_ensembl());
}
#[test]
fn test_accession_validate_ensembl() {
let valid = Accession::new("ENST", "00000012345", Some(1));
assert!(valid.validate_ensembl());
let refseq = Accession::new("NM", "000088", Some(3));
assert!(refseq.validate_ensembl());
}
#[test]
fn test_accession_inferred_variant_type() {
assert_eq!(
Accession::new("NC", "000001", Some(11)).inferred_variant_type(),
Some("g")
);
assert_eq!(
Accession::new("NG", "012345", Some(1)).inferred_variant_type(),
Some("g")
);
assert_eq!(
Accession::new("NM", "000088", Some(3)).inferred_variant_type(),
Some("c")
);
assert_eq!(
Accession::new("NR", "123456", Some(1)).inferred_variant_type(),
Some("n")
);
assert_eq!(
Accession::new("NP", "000079", Some(2)).inferred_variant_type(),
Some("p")
);
assert_eq!(
Accession::new("ENST", "00000012345", Some(1)).inferred_variant_type(),
Some("c")
);
assert_eq!(
Accession::new("ENSG", "00000012345", Some(1)).inferred_variant_type(),
Some("g")
);
assert_eq!(
Accession::new("ENSP", "00000012345", Some(1)).inferred_variant_type(),
Some("p")
);
assert_eq!(
Accession::new("LRG", "1", None).inferred_variant_type(),
Some("g")
);
assert_eq!(
Accession::new("P", "12345", None).inferred_variant_type(),
Some("p")
);
assert_eq!(
Accession::new("XX", "12345", None).inferred_variant_type(),
None
);
}
#[test]
fn test_accession_is_uniprot() {
let uniprot = Accession::new("P", "12345", None);
assert!(uniprot.is_uniprot());
let not_uniprot = Accession::new("NM", "000088", Some(3));
assert!(!not_uniprot.is_uniprot());
let wrong_length = Accession::new("P", "123", None);
assert!(!wrong_length.is_uniprot());
}
#[test]
fn test_accession_base() {
let acc = Accession::new("NM", "000088", Some(3));
assert_eq!(acc.base(), "NM_000088");
let ensembl = Accession::with_style("ENST", "00000012345", Some(1), true);
assert_eq!(ensembl.base(), "ENST00000012345");
let assembly = Accession::from_assembly("GRCh38", "chrX");
assert_eq!(assembly.base(), "GRCh38(chrX)");
}
#[test]
fn test_accession_full() {
let acc = Accession::new("NM", "000088", Some(3));
assert_eq!(acc.full(), "NM_000088.3");
let no_version = Accession::new("NM", "000088", None);
assert_eq!(no_version.full(), "NM_000088");
let ensembl = Accession::with_style("ENST", "00000012345", Some(1), true);
assert_eq!(ensembl.full(), "ENST00000012345.1");
}
#[test]
fn test_loc_edit_new() {
use crate::hgvs::location::GenomePos;
let loc_edit: LocEdit<GenomeInterval, NaEdit> = LocEdit::new(
GenomeInterval::point(GenomePos::new(100)),
NaEdit::Deletion {
sequence: None,
length: None,
},
);
assert!(matches!(loc_edit.edit, Mu::Certain(_)));
}
#[test]
fn test_loc_edit_with_uncertainty() {
use crate::hgvs::location::GenomePos;
let loc_edit: LocEdit<GenomeInterval, NaEdit> = LocEdit::with_uncertainty(
GenomeInterval::point(GenomePos::new(100)),
Mu::Uncertain(NaEdit::Deletion {
sequence: None,
length: None,
}),
);
assert!(matches!(loc_edit.edit, Mu::Uncertain(_)));
}
#[test]
fn test_allele_phase_display() {
assert_eq!(format!("{}", AllelePhase::Cis), "cis");
assert_eq!(format!("{}", AllelePhase::Trans), "trans");
assert_eq!(format!("{}", AllelePhase::Unknown), "unknown");
assert_eq!(format!("{}", AllelePhase::Mosaic), "mosaic");
assert_eq!(format!("{}", AllelePhase::Chimeric), "chimeric");
}
#[test]
fn test_allele_variant_unknown_phase() {
use crate::hgvs::location::CdsPos;
let var1 = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(100)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
});
let var2 = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(200)),
NaEdit::Substitution {
reference: Base::C,
alternative: Base::T,
},
),
});
let allele = AlleleVariant::unknown_phase(vec![var1, var2]);
assert_eq!(allele.phase, AllelePhase::Unknown);
}
#[test]
fn test_hgvs_variant_accession() {
let variant = HgvsVariant::Genome(GenomeVariant {
accession: Accession::new("NC", "000001", Some(11)),
gene_symbol: None,
loc_edit: LocEdit::new(
GenomeInterval::point(GenomePos::new(12345)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
});
let acc = variant.accession().expect("Expected accession");
assert_eq!(&*acc.prefix, "NC");
assert_eq!(&*acc.number, "000001");
}
#[test]
fn test_hgvs_variant_is_allele() {
use crate::hgvs::location::CdsPos;
let var1 = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(100)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
});
assert!(!var1.is_allele());
let allele = AlleleVariant::cis(vec![var1.clone()]);
let allele_variant = HgvsVariant::Allele(allele);
assert!(allele_variant.is_allele());
}
#[test]
fn test_hgvs_variant_protein_type() {
use crate::hgvs::location::{AminoAcid, ProtPos};
let protein = HgvsVariant::Protein(ProteinVariant {
accession: Accession::new("NP", "000079", Some(2)),
gene_symbol: None,
loc_edit: LocEdit::new(
ProtInterval::point(ProtPos::new(AminoAcid::Val, 600)),
ProteinEdit::Substitution {
reference: AminoAcid::Val,
alternative: AminoAcid::Glu,
},
),
});
assert_eq!(protein.variant_type(), "p");
assert!(matches!(protein, HgvsVariant::Protein(_)));
let genomic = HgvsVariant::Genome(GenomeVariant {
accession: Accession::new("NC", "000001", Some(11)),
gene_symbol: None,
loc_edit: LocEdit::new(
GenomeInterval::point(GenomePos::new(12345)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
});
assert_eq!(genomic.variant_type(), "g");
assert!(!matches!(genomic, HgvsVariant::Protein(_)));
}
#[test]
fn test_hgvs_variant_variant_type() {
let genomic = HgvsVariant::Genome(GenomeVariant {
accession: Accession::new("NC", "000001", Some(11)),
gene_symbol: None,
loc_edit: LocEdit::new(
GenomeInterval::point(GenomePos::new(12345)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
});
assert_eq!(genomic.variant_type(), "g");
use crate::hgvs::location::CdsPos;
let cds = HgvsVariant::Cds(CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: None,
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(100)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
});
assert_eq!(cds.variant_type(), "c");
}
#[test]
fn test_cds_variant_with_gene_symbol() {
use crate::hgvs::location::CdsPos;
let variant = CdsVariant {
accession: Accession::new("NM", "000088", Some(3)),
gene_symbol: Some("COL1A1".to_string()),
loc_edit: LocEdit::new(
CdsInterval::point(CdsPos::new(100)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
};
assert_eq!(variant.gene_symbol, Some("COL1A1".to_string()));
let display = format!("{}", variant);
assert!(display.contains("NM_000088.3"));
}
#[test]
fn test_genome_variant_with_gene_symbol() {
let variant = GenomeVariant {
accession: Accession::new("NC", "000001", Some(11)),
gene_symbol: Some("BRCA1".to_string()),
loc_edit: LocEdit::new(
GenomeInterval::point(GenomePos::new(12345)),
NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
},
),
};
assert_eq!(variant.gene_symbol, Some("BRCA1".to_string()));
let display = format!("{}", variant);
assert!(display.contains("NC_000001.11"));
}
}