use thiserror::Error;
use super::VcfVariant;
#[derive(Debug, Error)]
pub enum NormalizeError {
#[error(
"variant on contig '{chrom}' position {pos0} is out of bounds for that contig's \
reference length {reference_len}"
)]
OutOfBounds {
chrom: String,
pos0: u32,
reference_len: usize,
},
#[error("empty allele after normalization")]
EmptyAllele,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct NormalizedVariant {
pub chrom: String,
pub pos0: u32,
pub reference: Vec<u8>,
pub alternate: Vec<u8>,
}
pub fn normalize_variant(
reference: &[u8],
v: &VcfVariant,
) -> Result<NormalizedVariant, NormalizeError> {
let mut pos0 = v.pos0;
let mut r = v
.reference
.iter()
.map(|b| b.to_ascii_uppercase())
.collect::<Vec<u8>>();
let mut a = v
.alternate
.iter()
.map(|b| b.to_ascii_uppercase())
.collect::<Vec<u8>>();
if pos0 as usize >= reference.len() {
return Err(NormalizeError::OutOfBounds {
chrom: v.chrom.clone(),
pos0,
reference_len: reference.len(),
});
}
while r.len() > 1 && a.len() > 1 && r.last() == a.last() {
r.pop();
a.pop();
}
while r.len() > 1 && a.len() > 1 && r.first() == a.first() {
r.remove(0);
a.remove(0);
pos0 += 1;
}
if r.is_empty() || a.is_empty() {
return Err(NormalizeError::EmptyAllele);
}
if r.len() != a.len() {
left_align(reference, &mut pos0, &mut r, &mut a)?;
}
Ok(NormalizedVariant {
chrom: v.chrom.clone(),
pos0,
reference: r,
alternate: a,
})
}
fn left_align(
reference: &[u8],
pos0: &mut u32,
r: &mut Vec<u8>,
a: &mut Vec<u8>,
) -> Result<(), NormalizeError> {
while *pos0 > 0 {
let left_base = reference[(*pos0 as usize) - 1].to_ascii_uppercase();
let r_last = *r.last().ok_or(NormalizeError::EmptyAllele)?;
let a_last = *a.last().ok_or(NormalizeError::EmptyAllele)?;
if left_base != r_last && left_base != a_last {
break;
}
*pos0 -= 1;
r.insert(0, left_base);
r.pop();
a.insert(0, left_base);
a.pop();
while r.len() > 1 && a.len() > 1 && r.last() == a.last() {
r.pop();
a.pop();
}
while r.len() > 1 && a.len() > 1 && r.first() == a.first() {
r.remove(0);
a.remove(0);
*pos0 += 1;
}
if r.is_empty() || a.is_empty() {
return Err(NormalizeError::EmptyAllele);
}
}
Ok(())
}