use super::paf::*;
use log;
use rust_htslib::bam::record::Cigar::*;
use std::cmp::{max, min};
fn compute_overlap_scores(
paf: &PafRecord,
q_start: u64,
q_end: u64,
match_score: i32,
diff_score: i32,
indel_score: i32,
) -> Vec<i32> {
let overlap_len = (q_end - q_start) as usize;
let mut scores = vec![0i32; overlap_len];
let mut q_pos = if paf.strand == '+' { paf.q_st } else { paf.q_en };
for op in paf.cigar.into_iter() {
let op_len = op.len() as u64;
let moves_q = consumes_query(op);
if !moves_q {
continue;
}
let (q_lo, q_hi) = if paf.strand == '+' {
let lo = q_pos;
q_pos += op_len;
(lo, q_pos)
} else {
q_pos -= op_len;
(q_pos, q_pos + op_len)
};
let ovl_st = max(q_lo, q_start);
let ovl_en = min(q_hi, q_end);
if ovl_st >= ovl_en {
continue;
}
let score = match op {
Equal(_) => match_score,
Ins(_) | Del(_) => -indel_score,
_ => -diff_score, };
let idx_start = (ovl_st - q_start) as usize;
let idx_end = (ovl_en - q_start) as usize;
for s in scores[idx_start..idx_end].iter_mut() {
*s = score;
}
}
scores
}
pub fn trim_overlapping_pafs(
left: &mut PafRecord,
right: &mut PafRecord,
match_score: i32,
diff_score: i32,
indel_score: i32,
) {
let st_ovl = max(left.q_st, right.q_st);
let en_ovl = min(left.q_en, right.q_en);
log::info!("Number of overlapping bases {}", en_ovl - st_ovl);
let l_scores = compute_overlap_scores(left, st_ovl, en_ovl, match_score, diff_score, indel_score);
let r_scores = compute_overlap_scores(right, st_ovl, en_ovl, match_score, diff_score, indel_score);
let mut l_score = vec![0i32];
l_score.extend_from_slice(&l_scores);
let mut r_score = r_scores;
r_score.push(0);
l_score.iter_mut().fold(0, |acc, x| {
*x += acc;
*x
});
r_score.iter_mut().rev().fold(0, |acc, x| {
*x += acc;
*x
});
let mut max_idx: u64 = 0;
let mut max = 0;
for (idx, (l, r)) in l_score.iter().zip(r_score.iter()).enumerate() {
if l + r > max {
max = l + r;
max_idx = idx as u64;
}
}
left.truncate_record_by_query(left.q_st, st_ovl + max_idx);
right.truncate_record_by_query(st_ovl + max_idx, right.q_en);
log::info!(
"Split position was found to be {} bases after the overlap start ({}) with a score of {}.",
max_idx,
st_ovl,
max
);
}
#[cfg(test)]
mod tests {
use super::*;
fn pretty_print_paf(paf: &Paf) {
println!("\nPAF RECORDS");
for rec in &paf.records {
println!(
" {}:{}-{} ({}) -> {}:{}-{} cigar={}",
rec.q_name, rec.q_st, rec.q_en, rec.strand,
rec.t_name, rec.t_st, rec.t_en, rec.cigar
);
}
}
#[test]
fn test_inversion_trimming() {
let mut left = PafRecord::new("Q 20 0 10 + T 20 0 10 3 9 60 cg:Z:7=1X2=").unwrap();
left.check_integrity().unwrap();
let mut center = PafRecord::new("Q 20 4 15 - T 20 5 16 3 9 60 cg:Z:3=1X3=1M1X2=").unwrap();
center.check_integrity().unwrap();
let mut right =
PafRecord::new("Q 20 10 20 + T 20 10 20 3 9 60 cz:Z:10= cg:Z:2=2X2=2X2=").unwrap();
right.check_integrity().unwrap();
let mut paf = Paf::new();
paf.records = vec![left, center, right];
pretty_print_paf(&paf);
paf.overlapping_paf_recs(1, 1, 1, false);
pretty_print_paf(&paf);
let expected_cigars = vec!["7=", "2=1X3=1M", "2=2X2="];
for (i, rec) in paf.records.iter().enumerate() {
assert_eq!(rec.cigar.to_string(), expected_cigars[i]);
}
}
}