use crate::genomics::{CigarOp, CigarOpKind};
const NEG_INF: i32 = i32::MIN / 4;
#[derive(Debug, Clone, Copy)]
pub struct AffineScores {
pub match_score: i32,
pub mismatch_score: i32,
pub gap_open: i32,
pub gap_extend: i32,
}
impl Default for AffineScores {
fn default() -> Self {
Self {
match_score: 2,
mismatch_score: -4,
gap_open: -6,
gap_extend: -1,
}
}
}
#[derive(Debug, Clone)]
pub struct RefinedAlignment {
pub ref_start: usize,
pub cigar: Vec<CigarOp>,
pub score: i32,
pub nm: u32,
pub md: String,
}
pub fn banded_affine_align(
reference_window: &[u8],
read: &[u8],
band: usize,
scores: AffineScores,
) -> Option<RefinedAlignment> {
if read.is_empty() || reference_window.is_empty() {
return None;
}
let m = read.len();
let n = reference_window.len();
let mut m_mat = vec![vec![NEG_INF; n + 1]; m + 1];
let mut ix = vec![vec![NEG_INF; n + 1]; m + 1]; let mut iy = vec![vec![NEG_INF; n + 1]; m + 1];
let mut tb_m = vec![vec![0u8; n + 1]; m + 1];
let mut tb_ix = vec![vec![0u8; n + 1]; m + 1];
let mut tb_iy = vec![vec![0u8; n + 1]; m + 1];
for j in 0..=n {
m_mat[0][j] = 0;
ix[0][j] = NEG_INF;
iy[0][j] = NEG_INF;
}
m_mat[0][0] = 0;
for i in 1..=m {
ix[i][0] = scores.gap_open + (i as i32 - 1) * scores.gap_extend;
tb_ix[i][0] = 1;
m_mat[i][0] = NEG_INF;
iy[i][0] = NEG_INF;
}
for i in 1..=m {
let j_min = i.saturating_sub(band).max(1);
let j_max = (i + band).min(n);
for j in j_min..=j_max {
let from_m = m_mat[i - 1][j] + scores.gap_open;
let from_ix = ix[i - 1][j] + scores.gap_extend;
if from_m >= from_ix {
ix[i][j] = from_m;
tb_ix[i][j] = 0;
} else {
ix[i][j] = from_ix;
tb_ix[i][j] = 1;
}
let from_m = m_mat[i][j - 1] + scores.gap_open;
let from_iy = iy[i][j - 1] + scores.gap_extend;
if from_m >= from_iy {
iy[i][j] = from_m;
tb_iy[i][j] = 0;
} else {
iy[i][j] = from_iy;
tb_iy[i][j] = 2;
}
let a = read[i - 1];
let b = reference_window[j - 1];
let s = if a == b {
scores.match_score
} else {
scores.mismatch_score
};
let (best_prev, prev_id) =
best3(m_mat[i - 1][j - 1], ix[i - 1][j - 1], iy[i - 1][j - 1]);
m_mat[i][j] = best_prev + s;
tb_m[i][j] = prev_id;
}
}
let mut best_score = NEG_INF;
let mut best_j = 0usize;
let mut best_matrix = 0u8;
for j in 0..=n {
let (score, which) = best3(m_mat[m][j], ix[m][j], iy[m][j]);
if score > best_score {
best_score = score;
best_j = j;
best_matrix = which;
}
}
if best_score == NEG_INF {
return None;
}
let mut ops_rev: Vec<(CigarOpKind, u32)> = Vec::new();
let mut i = m;
let mut j = best_j;
let mut matrix = best_matrix;
while i > 0 {
match matrix {
0 => {
push_op(&mut ops_rev, CigarOpKind::Match, 1);
let prev = tb_m[i][j];
i -= 1;
j = j.saturating_sub(1);
matrix = prev;
}
1 => {
push_op(&mut ops_rev, CigarOpKind::Insertion, 1);
let prev = tb_ix[i][j];
i -= 1;
matrix = prev;
}
2 => {
push_op(&mut ops_rev, CigarOpKind::Deletion, 1);
let prev = tb_iy[i][j];
j = j.saturating_sub(1);
matrix = prev;
}
_ => break,
}
if j == 0 && i == 0 {
break;
}
}
ops_rev.reverse();
let cigar: Vec<CigarOp> = ops_rev
.into_iter()
.map(|(kind, len)| CigarOp { kind, len })
.collect();
let ref_start = compute_ref_start(reference_window, read, &cigar, best_j)?;
let (nm, md) = compute_nm_md(reference_window, read, ref_start, &cigar);
Some(RefinedAlignment {
ref_start,
cigar,
score: best_score,
nm,
md,
})
}
#[inline]
fn best3(a: i32, b: i32, c: i32) -> (i32, u8) {
if a >= b && a >= c {
(a, 0)
} else if b >= c {
(b, 1)
} else {
(c, 2)
}
}
fn push_op(out: &mut Vec<(CigarOpKind, u32)>, kind: CigarOpKind, len: u32) {
if let Some((last_kind, last_len)) = out.last_mut() {
if *last_kind == kind {
*last_len += len;
return;
}
}
out.push((kind, len));
}
fn compute_ref_start(
reference_window: &[u8],
read: &[u8],
cigar: &[CigarOp],
ref_end_j: usize,
) -> Option<usize> {
let mut ref_consumed = 0usize;
let mut read_consumed = 0usize;
for op in cigar {
match op.kind {
CigarOpKind::Match => {
ref_consumed += op.len as usize;
read_consumed += op.len as usize;
}
CigarOpKind::Insertion => {
read_consumed += op.len as usize;
}
CigarOpKind::Deletion => {
ref_consumed += op.len as usize;
}
_ => {}
}
}
if read_consumed != read.len() {
return None;
}
if ref_consumed > ref_end_j {
return None;
}
let start = ref_end_j - ref_consumed;
if start <= reference_window.len() {
Some(start)
} else {
None
}
}
fn compute_nm_md(
reference_window: &[u8],
read: &[u8],
ref_start: usize,
cigar: &[CigarOp],
) -> (u32, String) {
let mut nm: u32 = 0;
let mut md = String::new();
let mut match_run: u32 = 0;
let mut r_i = 0usize;
let mut ref_i = ref_start;
for op in cigar {
match op.kind {
CigarOpKind::Match => {
for _ in 0..op.len {
let rb = read[r_i];
let gb = reference_window[ref_i];
if rb == gb {
match_run += 1;
} else {
nm += 1;
md.push_str(&match_run.to_string());
match_run = 0;
md.push(gb as char);
}
r_i += 1;
ref_i += 1;
}
}
CigarOpKind::Insertion => {
nm += op.len;
r_i += op.len as usize;
}
CigarOpKind::Deletion => {
nm += op.len;
md.push_str(&match_run.to_string());
match_run = 0;
md.push('^');
for _ in 0..op.len {
md.push(reference_window[ref_i] as char);
ref_i += 1;
}
}
_ => {}
}
}
md.push_str(&match_run.to_string());
(nm, md)
}