use crate::codon::{aa_three_letter, reverse_complement, translate_codon_for_transcript};
use crate::consequence::Consequence;
use crate::consequence::helpers::fetch_cds_sequence;
use crate::error::VarEffectError;
use crate::fasta::FastaReader;
use crate::locate::LocateIndex;
use crate::types::{Strand, TranscriptModel};
pub(crate) fn format_hgvs_p_snv(
consequences: &[Consequence],
amino_acids: Option<&str>,
protein_start: Option<u32>,
) -> Option<String> {
let aa_str = amino_acids?;
let pos = protein_start?;
let (ref_aa, alt_aa) = match aa_str.split_once('/') {
Some((r, a)) if !r.is_empty() && !a.is_empty() => (r.as_bytes()[0], Some(a.as_bytes()[0])),
None if !aa_str.is_empty() => (aa_str.as_bytes()[0], None),
_ => return None,
};
if consequences.contains(&Consequence::StartLost) {
return Some("p.Met1?".to_string());
}
if consequences.contains(&Consequence::StopLost) {
let alt = aa_three_letter(alt_aa?);
return Some(format!("p.Ter{pos}{alt}extTer?"));
}
if consequences.contains(&Consequence::StopGained) {
let r = aa_three_letter(ref_aa);
return Some(format!("p.{r}{pos}Ter"));
}
if consequences.contains(&Consequence::MissenseVariant) {
let r = aa_three_letter(ref_aa);
let a = aa_three_letter(alt_aa?);
return Some(format!("p.{r}{pos}{a}"));
}
if consequences.contains(&Consequence::SynonymousVariant)
|| consequences.contains(&Consequence::StartRetainedVariant)
{
let r = aa_three_letter(ref_aa);
return Some(format!("p.{r}{pos}="));
}
if consequences.contains(&Consequence::StopRetainedVariant) {
return Some(format!("p.Ter{pos}="));
}
None
}
fn format_aa_sequence(aas: &[u8]) -> String {
let mut out = String::with_capacity(aas.len() * 3);
for &aa in aas {
out.push_str(aa_three_letter(aa));
}
out
}
fn find_first_changed_aa(ref_aas: &[u8], alt_aas: &[u8]) -> Option<usize> {
ref_aas.iter().zip(alt_aas.iter()).position(|(r, a)| r != a)
}
fn apply_protein_3prime_rule(ref_protein: &[u8], del_len: usize, initial_pos: usize) -> usize {
if del_len == 0 {
return initial_pos;
}
let mut pos = initial_pos;
while pos + del_len < ref_protein.len() && ref_protein[pos] == ref_protein[pos + del_len] {
pos += 1;
}
pos
}
fn scan_for_stop_codon(seq: &[u8], is_mito: bool) -> Option<u32> {
for (i, codon_bytes) in seq.chunks_exact(3).enumerate() {
let codon: [u8; 3] = [codon_bytes[0], codon_bytes[1], codon_bytes[2]];
let aa = translate_codon_for_transcript(&codon, is_mito);
if aa == b'*' {
return Some(i as u32 + 1);
}
}
None
}
fn fetch_3prime_utr_coding_seq(
chrom: &str,
transcript: &TranscriptModel,
fasta: &FastaReader,
) -> Result<Vec<u8>, VarEffectError> {
let cds_end = match transcript.strand {
Strand::Plus => match transcript.cds_genomic_end {
Some(e) => e,
None => return Ok(Vec::new()),
},
Strand::Minus => match transcript.cds_genomic_start {
Some(s) => s,
None => return Ok(Vec::new()),
},
};
let mut utr_seq = Vec::new();
match transcript.strand {
Strand::Plus => {
for exon in &transcript.exons {
if exon.genomic_end <= cds_end {
continue; }
let start = exon.genomic_start.max(cds_end);
let end = exon.genomic_end;
if start < end {
let bases = fasta.fetch_sequence(chrom, start, end)?;
utr_seq.extend_from_slice(&bases);
}
}
}
Strand::Minus => {
for exon in &transcript.exons {
if exon.genomic_start >= cds_end {
continue; }
let start = exon.genomic_start;
let end = exon.genomic_end.min(cds_end);
if start < end {
let plus_bases = fasta.fetch_sequence(chrom, start, end)?;
let coding = reverse_complement(&plus_bases);
utr_seq.extend_from_slice(&coding);
}
}
}
}
Ok(utr_seq)
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn format_hgvs_p_frameshift(
cds_offset_start: u32,
cds_offset_end: u32,
inserted_coding_bases: &[u8],
chrom: &str,
transcript: &TranscriptModel,
index: &LocateIndex,
fasta: &FastaReader,
dna_shift: u32,
) -> Result<Option<String>, VarEffectError> {
let total_cds = index.total_cds_length();
let is_mito = transcript.chrom == "chrM";
let is_insertion = cds_offset_end == cds_offset_start;
let (eff_start, eff_end) = if dna_shift > 0 {
let shifted = cds_offset_start.saturating_add(dna_shift);
if shifted < total_cds {
if is_insertion {
(shifted, shifted)
} else {
(shifted, cds_offset_end.saturating_add(dna_shift))
}
} else {
(cds_offset_start, cds_offset_end)
}
} else {
(cds_offset_start, cds_offset_end)
};
let codon_start = (eff_start / 3) * 3;
let mut ref_seq = fetch_cds_sequence(codon_start, total_cds, chrom, transcript, index, fasta)?;
let local_start = (eff_start - codon_start) as usize;
let local_end = (eff_end - codon_start) as usize;
let mut alt_seq =
Vec::with_capacity(ref_seq.len() - (local_end - local_start) + inserted_coding_bases.len());
alt_seq.extend_from_slice(&ref_seq[..local_start]);
alt_seq.extend_from_slice(inserted_coding_bases);
alt_seq.extend_from_slice(&ref_seq[local_end..]);
let stop_codon_start = total_cds.saturating_sub(3);
let extended_into_utr = if eff_start >= stop_codon_start {
let utr_seq = fetch_3prime_utr_coding_seq(chrom, transcript, fasta)?;
if !utr_seq.is_empty() {
ref_seq.extend_from_slice(&utr_seq);
alt_seq.extend_from_slice(&utr_seq);
true
} else {
false
}
} else {
false
};
let ref_complete_len = ref_seq.len() - (ref_seq.len() % 3);
let ref_protein = crate::codon::translate_sequence(&ref_seq[..ref_complete_len], is_mito)?;
let alt_complete_len = alt_seq.len() - (alt_seq.len() % 3);
let trailing_bases = if extended_into_utr {
Vec::new()
} else {
alt_seq[alt_complete_len..].to_vec()
};
if alt_complete_len == 0 {
return Ok(None);
}
let alt_protein = crate::codon::translate_sequence(&alt_seq[..alt_complete_len], is_mito)?;
let change_idx = match find_first_changed_aa(&ref_protein, &alt_protein) {
Some(idx) => idx,
None => {
let idx = ref_protein.len().min(alt_protein.len());
if idx >= ref_protein.len() {
return Ok(None); }
idx
}
};
let protein_pos = (codon_start / 3) + 1 + change_idx as u32;
let ref_aa = ref_protein[change_idx];
if let Some(&alt_aa) = alt_protein.get(change_idx) {
if alt_aa == b'*' {
return Ok(Some(format!(
"p.{}{}Ter",
aa_three_letter(ref_aa),
protein_pos,
)));
}
if let Some(star_pos) = alt_protein[change_idx..].iter().position(|&a| a == b'*') {
let stop_n = star_pos as u32 + 1;
return Ok(Some(format!(
"p.{}{}{}fsTer{}",
aa_three_letter(ref_aa),
protein_pos,
aa_three_letter(alt_aa),
stop_n,
)));
}
if !extended_into_utr {
let codons_past_change = alt_protein.len() - change_idx;
let utr_seq = fetch_3prime_utr_coding_seq(chrom, transcript, fasta)?;
let mut continuation = trailing_bases;
continuation.extend_from_slice(&utr_seq);
if let Some(utr_stop) = scan_for_stop_codon(&continuation, is_mito) {
let stop_n = codons_past_change as u32 + utr_stop;
return Ok(Some(format!(
"p.{}{}{}fsTer{}",
aa_three_letter(ref_aa),
protein_pos,
aa_three_letter(alt_aa),
stop_n,
)));
}
}
return Ok(Some(format!(
"p.{}{}{}fsTer?",
aa_three_letter(ref_aa),
protein_pos,
aa_three_letter(alt_aa),
)));
}
Ok(Some(format!(
"p.{}{}?fsTer?",
aa_three_letter(ref_aa),
protein_pos,
)))
}
pub(crate) fn format_hgvs_p_inframe_del(
ref_aas: &[u8],
alt_aas: &[u8],
protein_start: u32,
consequences: &[Consequence],
) -> Option<String> {
if consequences.contains(&Consequence::StartLost) {
return Some("p.Met1?".to_string());
}
if consequences.contains(&Consequence::StopLost) {
return None;
}
let prefix_len = ref_aas
.iter()
.zip(alt_aas.iter())
.take_while(|(r, a)| r == a)
.count();
let ref_tail = &ref_aas[prefix_len..];
let alt_tail = &alt_aas[prefix_len..];
let suffix_len = ref_tail
.iter()
.rev()
.zip(alt_tail.iter().rev())
.take_while(|(r, a)| r == a)
.count();
let remaining_ref = &ref_aas[prefix_len..ref_aas.len() - suffix_len];
let remaining_alt = &alt_aas[prefix_len..alt_aas.len() - suffix_len];
if remaining_ref.is_empty() {
return None;
}
if remaining_alt.is_empty() {
let del_len = remaining_ref.len();
let adjusted_pos = apply_protein_3prime_rule(ref_aas, del_len, prefix_len);
let start_pos = protein_start + adjusted_pos as u32;
if del_len == 1 {
let aa = ref_aas[adjusted_pos];
Some(format!("p.{}{}del", aa_three_letter(aa), start_pos))
} else {
let first_aa = ref_aas[adjusted_pos];
let last_aa = ref_aas[adjusted_pos + del_len - 1];
let end_pos = start_pos + del_len as u32 - 1;
Some(format!(
"p.{}{}_{}{}del",
aa_three_letter(first_aa),
start_pos,
aa_three_letter(last_aa),
end_pos,
))
}
} else {
let start_pos = protein_start + prefix_len as u32;
let end_pos = start_pos + remaining_ref.len() as u32 - 1;
let alt_display = truncate_at_stop(remaining_alt);
if remaining_ref.len() == 1 {
Some(format!(
"p.{}{}delins{}",
aa_three_letter(remaining_ref[0]),
start_pos,
format_aa_sequence(alt_display),
))
} else {
Some(format!(
"p.{}{}_{}{}delins{}",
aa_three_letter(remaining_ref[0]),
start_pos,
aa_three_letter(
*remaining_ref
.last()
.expect("remaining_ref verified non-empty")
),
end_pos,
format_aa_sequence(alt_display),
))
}
}
}
pub(crate) fn format_hgvs_p_inframe_ins(
ref_aas: &[u8],
alt_aas: &[u8],
protein_start: u32,
consequences: &[Consequence],
right_flanking_aa: u8,
) -> Option<String> {
if consequences.contains(&Consequence::StartLost) {
return Some("p.Met1?".to_string());
}
if consequences.contains(&Consequence::StopLost) {
return None;
}
let prefix_len = ref_aas
.iter()
.zip(alt_aas.iter())
.take_while(|(r, a)| r == a)
.count();
let ref_tail = &ref_aas[prefix_len..];
let alt_tail = &alt_aas[prefix_len..];
let suffix_len = ref_tail
.iter()
.rev()
.zip(alt_tail.iter().rev())
.take_while(|(r, a)| r == a)
.count();
let remaining_ref = &ref_aas[prefix_len..ref_aas.len() - suffix_len];
let inserted_aas = &alt_aas[prefix_len..alt_aas.len() - suffix_len];
if inserted_aas.is_empty() {
return None; }
if !remaining_ref.is_empty() {
let start_pos = protein_start + prefix_len as u32;
let end_pos = start_pos + remaining_ref.len() as u32 - 1;
let alt_display = truncate_at_stop(inserted_aas);
if remaining_ref.len() == 1 {
return Some(format!(
"p.{}{}delins{}",
aa_three_letter(remaining_ref[0]),
start_pos,
format_aa_sequence(alt_display),
));
}
return Some(format!(
"p.{}{}_{}{}delins{}",
aa_three_letter(remaining_ref[0]),
start_pos,
aa_three_letter(
*remaining_ref
.last()
.expect("remaining_ref verified non-empty")
),
end_pos,
format_aa_sequence(alt_display),
));
}
if consequences.contains(&Consequence::StopGained) && inserted_aas == [b'*'] {
let pos = protein_start + prefix_len as u32;
if prefix_len > 0 {
let ref_aa = ref_aas[prefix_len - 1];
return Some(format!("p.{}{}Ter", aa_three_letter(ref_aa), pos));
}
}
let ins_len = inserted_aas.len();
if prefix_len >= ins_len {
let preceding = &ref_aas[prefix_len - ins_len..prefix_len];
if preceding == inserted_aas {
let dup_start_0based = prefix_len - ins_len;
let adjusted = apply_protein_3prime_rule(ref_aas, ins_len, dup_start_0based);
let dup_start = protein_start + adjusted as u32;
if ins_len == 1 {
let aa = ref_aas[adjusted];
return Some(format!("p.{}{}dup", aa_three_letter(aa), dup_start));
}
let first_aa = ref_aas[adjusted];
let last_aa = ref_aas[adjusted + ins_len - 1];
let dup_end = dup_start + ins_len as u32 - 1;
return Some(format!(
"p.{}{}_{}{}dup",
aa_three_letter(first_aa),
dup_start,
aa_three_letter(last_aa),
dup_end,
));
}
}
let left_aa;
let left_pos;
let right_aa;
let right_pos;
if prefix_len > 0 && prefix_len <= ref_aas.len() {
left_aa = ref_aas[prefix_len - 1];
left_pos = protein_start + prefix_len as u32 - 1;
if prefix_len < ref_aas.len() {
right_aa = ref_aas[prefix_len];
right_pos = left_pos + 1;
} else {
right_aa = right_flanking_aa;
right_pos = left_pos + 1;
}
} else {
left_aa = ref_aas[0];
left_pos = protein_start;
right_aa = right_flanking_aa;
right_pos = left_pos + 1;
}
let ins_display = truncate_at_stop(inserted_aas);
Some(format!(
"p.{}{}_{}{}ins{}",
aa_three_letter(left_aa),
left_pos,
aa_three_letter(right_aa),
right_pos,
format_aa_sequence(ins_display),
))
}
pub(crate) fn format_hgvs_p_delins(
ref_aas: &[u8],
alt_aas: &[u8],
protein_start: u32,
_protein_end: u32,
consequences: &[Consequence],
) -> Option<String> {
if consequences.contains(&Consequence::StartLost) {
return Some("p.Met1?".to_string());
}
if consequences.contains(&Consequence::StopLost) {
return None;
}
let prefix_len = ref_aas
.iter()
.zip(alt_aas.iter())
.take_while(|(r, a)| r == a)
.count();
let ref_tail = &ref_aas[prefix_len..];
let alt_tail = &alt_aas[prefix_len..];
let suffix_len = ref_tail
.iter()
.rev()
.zip(alt_tail.iter().rev())
.take_while(|(r, a)| r == a)
.count();
let remaining_ref = &ref_aas[prefix_len..ref_aas.len() - suffix_len];
let remaining_alt = &alt_aas[prefix_len..alt_aas.len() - suffix_len];
if remaining_ref.is_empty() && remaining_alt.is_empty() {
if ref_aas.first() == Some(&b'M') && protein_start == 1 {
return Some("p.Met1=".to_string());
}
if ref_aas.last() == Some(&b'*') {
let pos = protein_start + ref_aas.len() as u32 - 1;
return Some(format!("p.Ter{}=", pos));
}
let aa = ref_aas[0];
return Some(format!("p.{}{}=", aa_three_letter(aa), protein_start));
}
if remaining_ref.len() == 1 && remaining_alt.len() == 1 {
let r = remaining_ref[0];
let a = remaining_alt[0];
let pos = protein_start + prefix_len as u32;
if r == a {
if r == b'*' {
return Some(format!("p.Ter{}=", pos));
}
return Some(format!("p.{}{}=", aa_three_letter(r), pos));
}
if a == b'*' {
return Some(format!("p.{}{}Ter", aa_three_letter(r), pos));
}
if r == b'*' {
return None;
}
return Some(format!(
"p.{}{}{}",
aa_three_letter(r),
pos,
aa_three_letter(a),
));
}
if remaining_ref.is_empty() {
return None;
}
let start_pos = protein_start + prefix_len as u32;
let end_pos = start_pos + remaining_ref.len() as u32 - 1;
let alt_display = truncate_at_stop(remaining_alt);
if remaining_ref.len() == 1 {
Some(format!(
"p.{}{}delins{}",
aa_three_letter(remaining_ref[0]),
start_pos,
format_aa_sequence(alt_display),
))
} else {
Some(format!(
"p.{}{}_{}{}delins{}",
aa_three_letter(remaining_ref[0]),
start_pos,
aa_three_letter(
*remaining_ref
.last()
.expect("remaining_ref verified non-empty")
),
end_pos,
format_aa_sequence(alt_display),
))
}
}
pub(crate) fn format_hgvs_p_extension(
alt_aa: u8,
protein_pos: u32,
chrom: &str,
transcript: &TranscriptModel,
_index: &LocateIndex,
fasta: &FastaReader,
) -> Result<String, VarEffectError> {
let is_mito = transcript.chrom == "chrM";
let alt_name = aa_three_letter(alt_aa);
let utr_seq = fetch_3prime_utr_coding_seq(chrom, transcript, fasta)?;
if let Some(utr_stop) = scan_for_stop_codon(&utr_seq, is_mito) {
Ok(format!("p.Ter{protein_pos}{alt_name}extTer{utr_stop}"))
} else {
Ok(format!("p.Ter{protein_pos}{alt_name}extTer?"))
}
}
pub(crate) fn format_hgvs_p_del_extension(
protein_pos: u32,
chrom: &str,
transcript: &TranscriptModel,
fasta: &FastaReader,
) -> Result<String, VarEffectError> {
let is_mito = transcript.chrom == "chrM";
let utr_seq = fetch_3prime_utr_coding_seq(chrom, transcript, fasta)?;
if let Some(utr_stop) = scan_for_stop_codon(&utr_seq, is_mito) {
let ext_distance = utr_stop - 1;
Ok(format!("p.Ter{protein_pos}delextTer{ext_distance}"))
} else {
Ok(format!("p.Ter{protein_pos}delextTer?"))
}
}
fn truncate_at_stop(aas: &[u8]) -> &[u8] {
match aas.iter().position(|&a| a == b'*') {
Some(pos) => &aas[..=pos],
None => aas,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn csq(c: Consequence) -> Vec<Consequence> {
vec![c]
}
#[test]
fn missense_basic() {
let result = format_hgvs_p_snv(&csq(Consequence::MissenseVariant), Some("R/W"), Some(248));
assert_eq!(result.as_deref(), Some("p.Arg248Trp"));
}
#[test]
fn missense_braf() {
let result = format_hgvs_p_snv(&csq(Consequence::MissenseVariant), Some("V/E"), Some(600));
assert_eq!(result.as_deref(), Some("p.Val600Glu"));
}
#[test]
fn synonymous() {
let result = format_hgvs_p_snv(&csq(Consequence::SynonymousVariant), Some("L"), Some(54));
assert_eq!(result.as_deref(), Some("p.Leu54="));
}
#[test]
fn nonsense() {
let result = format_hgvs_p_snv(&csq(Consequence::StopGained), Some("W/*"), Some(26));
assert_eq!(result.as_deref(), Some("p.Trp26Ter"));
}
#[test]
fn stop_lost() {
let result = format_hgvs_p_snv(&csq(Consequence::StopLost), Some("*/R"), Some(328));
assert_eq!(result.as_deref(), Some("p.Ter328ArgextTer?"));
}
#[test]
fn start_lost() {
let result = format_hgvs_p_snv(&csq(Consequence::StartLost), Some("M/V"), Some(1));
assert_eq!(result.as_deref(), Some("p.Met1?"));
}
#[test]
fn start_retained() {
let result = format_hgvs_p_snv(&csq(Consequence::StartRetainedVariant), Some("M"), Some(1));
assert_eq!(result.as_deref(), Some("p.Met1="));
}
#[test]
fn stop_retained() {
let result =
format_hgvs_p_snv(&csq(Consequence::StopRetainedVariant), Some("*"), Some(394));
assert_eq!(result.as_deref(), Some("p.Ter394="));
}
#[test]
fn no_amino_acids_returns_none() {
let result = format_hgvs_p_snv(&csq(Consequence::MissenseVariant), None, Some(248));
assert_eq!(result, None);
}
#[test]
fn no_protein_start_returns_none() {
let result = format_hgvs_p_snv(&csq(Consequence::MissenseVariant), Some("R/W"), None);
assert_eq!(result, None);
}
#[test]
fn all_20_amino_acids() {
let cases: &[(u8, &str)] = &[
(b'A', "Ala"),
(b'C', "Cys"),
(b'D', "Asp"),
(b'E', "Glu"),
(b'F', "Phe"),
(b'G', "Gly"),
(b'H', "His"),
(b'I', "Ile"),
(b'K', "Lys"),
(b'L', "Leu"),
(b'M', "Met"),
(b'N', "Asn"),
(b'P', "Pro"),
(b'Q', "Gln"),
(b'R', "Arg"),
(b'S', "Ser"),
(b'T', "Thr"),
(b'V', "Val"),
(b'W', "Trp"),
(b'Y', "Tyr"),
];
for &(one, three) in cases {
let aa_str = format!("{}/A", one as char);
let result =
format_hgvs_p_snv(&csq(Consequence::MissenseVariant), Some(&aa_str), Some(100));
let expected = format!("p.{three}100Ala");
assert_eq!(
result.as_deref(),
Some(expected.as_str()),
"failed for amino acid '{}'",
one as char,
);
}
}
#[test]
fn non_cds_consequence_returns_none() {
let result = format_hgvs_p_snv(&csq(Consequence::IntronVariant), None, None);
assert_eq!(result, None);
}
#[test]
fn format_aa_sequence_basic() {
assert_eq!(format_aa_sequence(b"GSK"), "GlySerLys");
assert_eq!(format_aa_sequence(b"*"), "Ter");
assert_eq!(format_aa_sequence(&[]), "");
assert_eq!(format_aa_sequence(b"M"), "Met");
}
#[test]
fn find_first_changed_basic() {
assert_eq!(find_first_changed_aa(b"MRK", b"MWK"), Some(1));
assert_eq!(find_first_changed_aa(b"MRK", b"MRK"), None);
assert_eq!(find_first_changed_aa(b"MRK", b"WRK"), Some(0));
assert_eq!(find_first_changed_aa(b"MR", b"MRK"), None); }
#[test]
fn scan_for_stop_basic() {
assert_eq!(scan_for_stop_codon(b"ATGTAA", false), Some(2));
assert_eq!(scan_for_stop_codon(b"TAA", false), Some(1));
assert_eq!(scan_for_stop_codon(b"ATGATG", false), None);
assert_eq!(scan_for_stop_codon(b"ATGTAAG", false), Some(2));
assert_eq!(scan_for_stop_codon(b"ATGTAGATG", false), Some(2));
assert_eq!(scan_for_stop_codon(b"", false), None);
assert_eq!(scan_for_stop_codon(b"AT", false), None);
}
#[test]
fn protein_3prime_rule_shift() {
let ref_prot = b"MWSSSHD";
assert_eq!(apply_protein_3prime_rule(ref_prot, 1, 2), 4);
let ref_prot2 = b"MWFHD";
assert_eq!(apply_protein_3prime_rule(ref_prot2, 1, 2), 2);
let ref_prot3 = b"MWSSSSHD";
assert_eq!(apply_protein_3prime_rule(ref_prot3, 2, 2), 4);
}
#[test]
fn inframe_del_single() {
let result = format_hgvs_p_inframe_del(b"IF", b"I", 507, &[Consequence::InframeDeletion]);
assert_eq!(result.as_deref(), Some("p.Phe508del"));
}
#[test]
fn inframe_del_range() {
let result =
format_hgvs_p_inframe_del(b"KELREA", b"K", 745, &[Consequence::InframeDeletion]);
assert_eq!(result.as_deref(), Some("p.Glu746_Ala750del"));
}
#[test]
fn inframe_del_3prime_rule() {
let result =
format_hgvs_p_inframe_del(b"MWSSSHD", b"MWSSHD", 1, &[Consequence::InframeDeletion]);
assert_eq!(result.as_deref(), Some("p.Ser5del"));
}
#[test]
fn inframe_del_becomes_delins() {
let result = format_hgvs_p_inframe_del(b"RK", b"W", 10, &[Consequence::InframeDeletion]);
assert_eq!(result.as_deref(), Some("p.Arg10_Lys11delinsTrp"));
}
#[test]
fn start_lost_returns_met1() {
let result = format_hgvs_p_inframe_del(
b"M",
&[],
1,
&[Consequence::StartLost, Consequence::InframeDeletion],
);
assert_eq!(result.as_deref(), Some("p.Met1?"));
}
#[test]
fn start_lost_plus_inframe_del() {
let result = format_hgvs_p_inframe_del(
b"MR",
b"R",
1,
&[Consequence::StartLost, Consequence::InframeDeletion],
);
assert_eq!(result.as_deref(), Some("p.Met1?"));
}
#[test]
fn inframe_ins_simple() {
let result =
format_hgvs_p_inframe_ins(b"K", b"KQSK", 2, &[Consequence::InframeInsertion], b'M');
assert_eq!(result.as_deref(), Some("p.Lys2_Met3insGlnSerLys"));
}
#[test]
fn inframe_ins_dup_single() {
let result =
format_hgvs_p_inframe_ins(b"K", b"KK", 5, &[Consequence::InframeInsertion], b'M');
assert_eq!(result.as_deref(), Some("p.Lys5dup"));
}
#[test]
fn inframe_ins_with_stop() {
let result = format_hgvs_p_inframe_ins(
b"P",
b"PG*",
2,
&[Consequence::InframeInsertion, Consequence::StopGained],
b'I',
);
assert_eq!(result.as_deref(), Some("p.Pro2_Ile3insGlyTer"));
}
#[test]
fn delins_shrink() {
let result =
format_hgvs_p_delins(b"CK", b"W", 28, 29, &[Consequence::ProteinAlteringVariant]);
assert_eq!(result.as_deref(), Some("p.Cys28_Lys29delinsTrp"));
}
#[test]
fn delins_grow() {
let result =
format_hgvs_p_delins(b"C", b"WV", 28, 28, &[Consequence::ProteinAlteringVariant]);
assert_eq!(result.as_deref(), Some("p.Cys28delinsTrpVal"));
}
#[test]
fn delins_single_missense() {
let result = format_hgvs_p_delins(b"R", b"W", 248, 248, &[Consequence::MissenseVariant]);
assert_eq!(result.as_deref(), Some("p.Arg248Trp"));
}
#[test]
fn delins_synonymous() {
let result = format_hgvs_p_delins(b"R", b"R", 248, 248, &[Consequence::SynonymousVariant]);
assert_eq!(result.as_deref(), Some("p.Arg248="));
}
#[test]
fn delins_stop_gained() {
let result = format_hgvs_p_delins(
b"PK",
b"L*",
578,
579,
&[Consequence::StopGained, Consequence::ProteinAlteringVariant],
);
assert_eq!(result.as_deref(), Some("p.Pro578_Lys579delinsLeuTer"));
}
#[test]
fn truncate_at_stop_helper() {
assert_eq!(truncate_at_stop(b"L*K"), b"L*");
assert_eq!(truncate_at_stop(b"LK"), b"LK");
assert_eq!(truncate_at_stop(b"*"), b"*");
}
}