use crate::variant_calling::Variant;
pub fn translate_ms_val(
ms_curr: i64,
ms_next: i64,
ms_prev: i64,
threshold: usize,
) -> (char, char) {
assert!(threshold > 1);
let aln_curr: char;
let mut aln_next: char = ' ';
if ms_curr > threshold as i64 && ms_next > 0 && ms_next < threshold as i64 {
aln_curr = 'R';
aln_next = 'R';
} else if ms_curr <= 0 {
if ms_next == 1 && ms_prev > 0 {
aln_curr = 'X';
} else {
aln_curr = '-';
}
} else {
aln_curr = 'M';
}
(aln_curr, aln_next)
}
pub fn translate_ms_vec(
derand_ms: &[i64],
k: usize,
threshold: usize,
) -> Vec<char> {
assert!(k > 0);
assert!(threshold > 1);
assert!(derand_ms.len() > 2);
let len = derand_ms.len();
let mut res = vec![' '; len];
for pos in 0..len {
let prev: i64 = if pos > 1 { derand_ms[pos - 1] } else { k as i64};
let curr: i64 = derand_ms[pos];
let next: i64 = if pos < len - 1 { derand_ms[pos + 1] } else { derand_ms[pos] };
if !(pos > 1 && res[pos - 1] == 'R' && res[pos] == 'R') {
let (aln_curr, aln_next) = translate_ms_val(curr, next, prev, threshold);
res[pos] = aln_curr;
if pos + 1 < len - 1 && aln_next != ' ' {
res[pos + 1] = aln_next;
}
}
}
res
}
pub fn add_variants(
translation: &[char],
variants: &[Variant],
) -> Vec<char> {
let mut refined = translation.to_vec();
variants.iter().for_each(|var| {
let query_len = var.query_chars.len();
let ref_len = var.ref_chars.len();
if query_len == ref_len {
var.ref_chars.iter().enumerate().for_each(|(i, nt)| {
refined[var.query_pos + i] = *nt as char;
})
} else if query_len == 0 {
refined[var.query_pos - 1] = 'I';
refined[var.query_pos] = 'I';
} else if ref_len == 0 {
var.query_chars.iter().enumerate().for_each(|(i, _)| {
refined[var.query_pos + i] = 'D';
});
} else if ref_len != query_len {
let all_equal = var.ref_chars.iter().cloned().collect::<std::collections::HashSet<u8>>().len() == 1;
let fill_char = if all_equal { var.ref_chars[0] as char } else { 'N' };
var.query_chars.iter().enumerate().for_each(|(i, _)| {
refined[var.query_pos + i] = fill_char;
});
}
});
refined
}
#[cfg(test)]
mod tests {
#[test]
fn translate_ms_val_with_deletion() {
let expected = ('R','R');
let got = super::translate_ms_val(3, 1, 2, 2);
assert_eq!(got, expected);
}
#[test]
fn translate_ms_val_with_recombination() {
let expected = ('R','R');
let got = super::translate_ms_val(3, 1, 3, 2);
assert_eq!(got, expected);
}
#[test]
fn translate_ms_val_with_mismatch() {
let expected = ('X',' ');
let got = super::translate_ms_val(0, 1, 3, 2);
assert_eq!(got, expected);
}
#[test]
fn translate_ms_val_with_single_insertion() {
let expected = ('X', ' ');
let got = super::translate_ms_val(0, 1, 3, 2);
assert_eq!(got, expected);
}
#[test]
fn translate_ms_val_with_many_insertions() {
let expected = ('-', ' ');
let got = super::translate_ms_val(-1, 0, 3, 2);
assert_eq!(got, expected);
}
#[test]
fn translate_ms_val_with_only_matches() {
let expected = ('M', ' ');
let got = super::translate_ms_val(1, 2, 3, 2);
assert_eq!(got, expected);
}
#[test]
fn translate_ms_vec() {
let input: Vec<i64> = vec![0,1,2,3,1,2,3,0,1,2,3,-1,0,1,2,3,-1,0];
let expected: Vec<char> = vec!['X','M','M','R','R','M','M','X','M','M','M','-','-','M','M','M','-','-'];
let got = super::translate_ms_vec(&input, 3, 2);
assert_eq!(got, expected);
}
#[test]
fn translate_ms_vec_with_recombination() {
let input: Vec<i64> = vec![1,2,3,1,2,3,3,3,3,1,2,3];
let expected: Vec<char> = vec!['M','M','R','R','M','M','M','M','R','R','M','M'];
let got = super::translate_ms_vec(&input, 3, 2);
assert_eq!(got, expected);
}
#[test]
fn add_variants() {
use crate::build;
use crate::call;
use crate::CallOpts;
use crate::BuildOpts;
use crate::index::query_sbwt;
use crate::derandomize::derandomize_ms_vec;
use super::translate_ms_vec;
use super::add_variants;
let reference = b"TCGTGGATCGATACACGCTAGCAGGCTGACTCGATGGGATACTATGTGTTATAGCAATTCGGATCGATCGA".to_vec();
let query = b"TCGTGGATCGATACACGCTAGCAGTGACTCGATGGGATACCATGTGTTATAGCAATTCCGGATCGATCGA".to_vec();
let expected = b"MMMMMMMMMMMMMMMMMMMMMMMMDDMMMMMMMMMMMMMMMMCMMMMMMMMMMMMMMMMIIMMMMMMMMMM".to_vec();
let k = 20;
let threshold = 10;
let (sbwt_query, lcs_query) = build(&[query], BuildOpts{ k, build_select: true, ..Default::default() });
let mut call_opts = CallOpts::default();
call_opts.sbwt_build_opts.k = k;
call_opts.max_error_prob = 0.001;
let noisy_ms = query_sbwt(&reference, &sbwt_query, &lcs_query);
let derand_ms = derandomize_ms_vec(&noisy_ms.iter().map(|x| x.0).collect::<Vec<usize>>(), k, threshold);
let translated = translate_ms_vec(&derand_ms, k, threshold);
let variants = call(&sbwt_query, &lcs_query, &reference, call_opts);
eprintln!("{:?}", variants);
let with_variants = add_variants(&translated, &variants);
assert_eq!(expected.iter().map(|x| *x as char).collect::<Vec<char>>(), with_variants);
}
#[test]
fn add_variants_multi_base_substitution() {
use crate::build;
use crate::call;
use crate::CallOpts;
use crate::BuildOpts;
use crate::index::query_sbwt;
use crate::derandomize::derandomize_ms_vec;
use super::translate_ms_vec;
use super::add_variants;
let reference = b"TCGTGGATCGATACACGCTAGCAGGCTGACTCGATGGGATACTATGTGTTATAGCAATTCCGGATCGATCGA".to_vec();
let query = b"TCGTGGATCGATACACGCTAGCAGGCTGACTCGATGGGATACCCAATGTGTTATAGCAATTCCGGATCGATCGA".to_vec();
let expected = b"MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNMMMMMMMMMMMMMMMMMMMMMMMMMMMMM".to_vec();
let k = 20;
let threshold = 10;
let (sbwt_query, lcs_query) = build(&[query], BuildOpts{ k, build_select: true, ..Default::default() });
let mut call_opts = CallOpts::default();
call_opts.sbwt_build_opts.k = k;
call_opts.max_error_prob = 0.001;
let noisy_ms = query_sbwt(&reference, &sbwt_query, &lcs_query);
let derand_ms = derandomize_ms_vec(&noisy_ms.iter().map(|x| x.0).collect::<Vec<usize>>(), k, threshold);
let translated = translate_ms_vec(&derand_ms, k, threshold);
let variants = call(&sbwt_query, &lcs_query, &reference, call_opts);
eprintln!("{:?}", variants);
let with_variants = add_variants(&translated, &variants);
assert_eq!(expected.iter().map(|x| *x as char).collect::<Vec<char>>(), with_variants);
}
#[test]
fn add_variants_multi_base_substitution_all_same() {
use crate::build;
use crate::call;
use crate::CallOpts;
use crate::BuildOpts;
use crate::index::query_sbwt;
use crate::derandomize::derandomize_ms_vec;
use super::translate_ms_vec;
use super::add_variants;
let reference = b"TCGTGGATCGATACACGCTAGCAGGCTGACTCGATGGGATACTATGTGTTATAGCAATTCCGGATCGATCGA".to_vec();
let query = b"TCGTGGATCGATACACGCTAGCAGGCTGACTCGATGGGATACGGGATGTGTTATAGCAATTCCGGATCGATCGA".to_vec();
let expected = b"MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMGMMMMMMMMMMMMMMMMMMMMMMMMMMMMM".to_vec();
let k = 20;
let threshold = 10;
let (sbwt_query, lcs_query) = build(&[query], BuildOpts{ k, build_select: true, ..Default::default() });
let mut call_opts = CallOpts::default();
call_opts.sbwt_build_opts.k = k;
call_opts.max_error_prob = 0.001;
let noisy_ms = query_sbwt(&reference, &sbwt_query, &lcs_query);
let derand_ms = derandomize_ms_vec(&noisy_ms.iter().map(|x| x.0).collect::<Vec<usize>>(), k, threshold);
let translated = translate_ms_vec(&derand_ms, k, threshold);
let variants = call(&sbwt_query, &lcs_query, &reference, call_opts);
eprintln!("{:?}", variants);
let with_variants = add_variants(&translated, &variants);
assert_eq!(expected.iter().map(|x| *x as char).collect::<Vec<char>>(), with_variants);
}
#[test]
fn add_variants_clustered_substitutions() {
use crate::build;
use crate::call;
use crate::CallOpts;
use crate::BuildOpts;
use crate::index::query_sbwt;
use crate::derandomize::derandomize_ms_vec;
use super::translate_ms_vec;
use super::add_variants;
let reference = b"TCGTGGATCGATACACGCTAGCAGGCTGACTCGATGGGATACTATGTGTTATAGCAATTCCGGATCGATCGA".to_vec();
let query = b"TCGTGGATCGATACACGCTAGCAGGCTGACTCGATGGGATACCACGTGTTATAGCAATTCCGGATCGATCGA".to_vec();
let expected = b"MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMCACMMMMMMMMMMMMMMMMMMMMMMMMMMM".to_vec();
let k = 20;
let threshold = 10;
let (sbwt_query, lcs_query) = build(&[query], BuildOpts{ k, build_select: true, ..Default::default() });
let mut call_opts = CallOpts::default();
call_opts.sbwt_build_opts.k = k;
call_opts.max_error_prob = 0.001;
let noisy_ms = query_sbwt(&reference, &sbwt_query, &lcs_query);
let derand_ms = derandomize_ms_vec(&noisy_ms.iter().map(|x| x.0).collect::<Vec<usize>>(), k, threshold);
let translated = translate_ms_vec(&derand_ms, k, threshold);
let variants = call(&sbwt_query, &lcs_query, &reference, call_opts);
eprintln!("{:?}", variants);
let with_variants = add_variants(&translated, &variants);
assert_eq!(expected.iter().map(|x| *x as char).collect::<Vec<char>>(), with_variants);
}
}