use crate::protein::AminoAcid;
use crate::scoring::ScoringMatrix;
use crate::error::Result;
pub fn smith_waterman_banded(
seq1: &[AminoAcid],
seq2: &[AminoAcid],
matrix: &ScoringMatrix,
open_penalty: i32,
extend_penalty: i32,
bandwidth: usize,
) -> Result<(Vec<Vec<i32>>, usize, usize)> {
let m = seq1.len();
let n = seq2.len();
let length_ratio = (m as f64) / (n.max(1) as f64);
if length_ratio < 0.5 || length_ratio > 2.0 {
return smith_waterman_standard(seq1, seq2, matrix, open_penalty, extend_penalty);
}
let mut h = vec![vec![0i32; n + 1]; m + 1];
let mut max_score = 0;
let mut max_i = 0;
let mut max_j = 0;
for i in 1..=m {
let j_min = if i > bandwidth { i - bandwidth } else { 1 };
let j_max = std::cmp::min(n, i + bandwidth);
for j in j_min..=j_max {
let match_score = matrix.score(seq1[i - 1], seq2[j - 1]);
let diagonal = if i > 0 && j > 0 {
h[i - 1][j - 1] + match_score
} else {
match_score
};
let up = if i > 0 {
h[i - 1][j] + extend_penalty
} else {
i32::MIN / 2 };
let left = if j > 0 {
h[i][j - 1] + extend_penalty
} else {
i32::MIN / 2
};
h[i][j] = std::cmp::max(0, std::cmp::max(diagonal, std::cmp::max(up, left)));
if h[i][j] > max_score {
max_score = h[i][j];
max_i = i;
max_j = j;
}
}
}
Ok((h, max_i, max_j))
}
pub fn needleman_wunsch_banded(
seq1: &[AminoAcid],
seq2: &[AminoAcid],
matrix: &ScoringMatrix,
open_penalty: i32,
extend_penalty: i32,
bandwidth: usize,
) -> Result<Vec<Vec<i32>>> {
let m = seq1.len();
let n = seq2.len();
let length_ratio = (m as f64) / (n.max(1) as f64);
if length_ratio < 0.5 || length_ratio > 2.0 {
return needleman_wunsch_standard(seq1, seq2, matrix, open_penalty, extend_penalty);
}
let mut h = vec![vec![i32::MIN / 2; n + 1]; m + 1];
h[0][0] = 0;
for j in 1..=std::cmp::min(n, bandwidth) {
h[0][j] = (j as i32) * open_penalty;
}
for i in 1..=std::cmp::min(m, bandwidth) {
h[i][0] = (i as i32) * open_penalty;
}
for i in 1..=m {
let j_min = if i > bandwidth { i - bandwidth } else { 1 };
let j_max = std::cmp::min(n, i + bandwidth);
for j in j_min..=j_max {
let match_score = matrix.score(seq1[i - 1], seq2[j - 1]);
let diagonal = if h[i - 1][j - 1] != i32::MIN / 2 {
h[i - 1][j - 1] + match_score
} else {
i32::MIN / 2
};
let up = if h[i - 1][j] != i32::MIN / 2 {
h[i - 1][j] + extend_penalty
} else {
i32::MIN / 2
};
let left = if j > 0 && h[i][j - 1] != i32::MIN / 2 {
h[i][j - 1] + extend_penalty
} else {
i32::MIN / 2
};
h[i][j] = std::cmp::max(diagonal, std::cmp::max(up, left));
}
}
Ok(h)
}
fn smith_waterman_standard(
seq1: &[AminoAcid],
seq2: &[AminoAcid],
matrix: &ScoringMatrix,
_open_penalty: i32,
extend_penalty: i32,
) -> Result<(Vec<Vec<i32>>, usize, usize)> {
let m = seq1.len();
let n = seq2.len();
let mut h = vec![vec![0i32; n + 1]; m + 1];
let mut max_score = 0;
let mut max_i = 0;
let mut max_j = 0;
for i in 1..=m {
for j in 1..=n {
let match_score = matrix.score(seq1[i - 1], seq2[j - 1]);
let diagonal = h[i - 1][j - 1] + match_score;
let up = h[i - 1][j] + extend_penalty;
let left = h[i][j - 1] + extend_penalty;
h[i][j] = std::cmp::max(0, std::cmp::max(diagonal, std::cmp::max(up, left)));
if h[i][j] > max_score {
max_score = h[i][j];
max_i = i;
max_j = j;
}
}
}
Ok((h, max_i, max_j))
}
fn needleman_wunsch_standard(
seq1: &[AminoAcid],
seq2: &[AminoAcid],
matrix: &ScoringMatrix,
open_penalty: i32,
extend_penalty: i32,
) -> Result<Vec<Vec<i32>>> {
let m = seq1.len();
let n = seq2.len();
let mut h = vec![vec![0i32; n + 1]; m + 1];
for i in 0..=m {
h[i][0] = (i as i32) * open_penalty;
}
for j in 0..=n {
h[0][j] = (j as i32) * open_penalty;
}
for i in 1..=m {
for j in 1..=n {
let match_score = matrix.score(seq1[i - 1], seq2[j - 1]);
let diagonal = h[i - 1][j - 1] + match_score;
let up = h[i - 1][j] + extend_penalty;
let left = h[i][j - 1] + extend_penalty;
h[i][j] = std::cmp::max(diagonal, std::cmp::max(up, left));
}
}
Ok(h)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::scoring::{ScoringMatrix, MatrixType};
#[test]
fn test_banded_sw_similar_sequences() -> Result<()> {
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let seq1 = vec![
AminoAcid::Alanine,
AminoAcid::Glycine,
AminoAcid::Serine,
AminoAcid::Glycine,
AminoAcid::AsparticAcid,
];
let seq2 = vec![
AminoAcid::Alanine,
AminoAcid::Glycine,
AminoAcid::Serine,
AminoAcid::Glycine,
AminoAcid::AsparticAcid,
];
let (h_banded, i_banded, j_banded) = smith_waterman_banded(&seq1, &seq2, &matrix, -11, -1, 10)?;
let (h_standard, i_standard, j_standard) = smith_waterman_standard(&seq1, &seq2, &matrix, -11, -1)?;
assert_eq!(h_banded[i_banded][j_banded], h_standard[i_standard][j_standard]);
Ok(())
}
#[test]
fn test_banded_nw_similar_sequences() -> Result<()> {
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let seq1 = vec![
AminoAcid::Methionine,
AminoAcid::Glycine,
AminoAcid::Leucine,
];
let seq2 = vec![
AminoAcid::Methionine,
AminoAcid::Glycine,
AminoAcid::Leucine,
];
let h_banded = needleman_wunsch_banded(&seq1, &seq2, &matrix, -11, -1, 10)?;
let h_standard = needleman_wunsch_standard(&seq1, &seq2, &matrix, -11, -1)?;
assert_eq!(h_banded[seq1.len()][seq2.len()], h_standard[seq1.len()][seq2.len()]);
Ok(())
}
#[test]
fn test_banded_fallback_different_lengths() -> Result<()> {
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let seq1 = vec![AminoAcid::Alanine; 100];
let seq2 = vec![AminoAcid::Alanine; 10];
let (h_banded, i_banded, j_banded) = smith_waterman_banded(&seq1, &seq2, &matrix, -11, -1, 5)?;
let (h_standard, i_standard, j_standard) = smith_waterman_standard(&seq1, &seq2, &matrix, -11, -1)?;
assert_eq!(h_banded[i_banded][j_banded], h_standard[i_standard][j_standard]);
Ok(())
}
}