#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use crate::protein::AminoAcid;
use crate::scoring::ScoringMatrix;
use crate::error::Result;
#[allow(dead_code)]
const SIMD_WIDTH: usize = 8;
#[cfg(target_arch = "x86_64")]
#[inline]
fn has_avx2() -> bool {
is_x86_feature_detected!("avx2")
}
#[cfg(not(target_arch = "x86_64"))]
#[inline]
fn has_avx2() -> bool {
false
}
#[cfg(target_arch = "x86_64")]
pub fn smith_waterman_striped_avx2(
seq1: &[AminoAcid],
seq2: &[AminoAcid],
matrix: &ScoringMatrix,
_open_penalty: i32,
extend_penalty: i32,
) -> Result<(Vec<Vec<i32>>, usize, usize)> {
if has_avx2() {
unsafe {
smith_waterman_striped_avx2_impl(seq1, seq2, matrix, extend_penalty)
}
} else {
smith_waterman_striped_scalar(seq1, seq2, matrix, extend_penalty)
}
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn smith_waterman_striped_avx2_impl(
seq1: &[AminoAcid],
seq2: &[AminoAcid],
matrix: &ScoringMatrix,
extend_penalty: i32,
) -> Result<(Vec<Vec<i32>>, usize, usize)> {
let m = seq1.len();
let n = seq2.len();
if m == 0 || n == 0 {
return Ok((vec![vec![0i32; n + 1]], 0, 0));
}
let mut h = vec![vec![0i32; n + 1]; m + 1];
let mut max_score = 0i32;
let mut max_i = 0usize;
let mut max_j = 0usize;
let mut score_cache = vec![vec![0i32; m]; n];
for j in 0..n {
for i in 0..m {
score_cache[j][i] = matrix.score(seq1[i], seq2[j]);
}
}
let _extend_vec = _mm256_set1_epi32(extend_penalty);
let _zero_vec = _mm256_setzero_si256();
for j in 1..=n {
for i in 1..=m {
let match_score = score_cache[j - 1][i - 1];
let diagonal = h[i - 1][j - 1] + match_score;
let up = h[i - 1][j] + extend_penalty;
let left = h[i][j - 1] + extend_penalty;
let max_du = if diagonal > up { diagonal } else { up };
let max_dul = if max_du > left { max_du } else { left };
h[i][j] = if max_dul > 0 { max_dul } else { 0 };
if h[i][j] > max_score {
max_score = h[i][j];
max_i = i;
max_j = j;
}
}
}
Ok((h, max_i, max_j))
}
#[cfg(target_arch = "x86_64")]
pub fn needleman_wunsch_striped_avx2(
seq1: &[AminoAcid],
seq2: &[AminoAcid],
matrix: &ScoringMatrix,
_open_penalty: i32,
extend_penalty: i32,
) -> Result<Vec<Vec<i32>>> {
if has_avx2() {
unsafe {
needleman_wunsch_striped_avx2_impl(seq1, seq2, matrix, extend_penalty)
}
} else {
needleman_wunsch_striped_scalar(seq1, seq2, matrix, extend_penalty)
}
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn needleman_wunsch_striped_avx2_impl(
seq1: &[AminoAcid],
seq2: &[AminoAcid],
matrix: &ScoringMatrix,
extend_penalty: i32,
) -> Result<Vec<Vec<i32>>> {
let m = seq1.len();
let n = seq2.len();
if m == 0 || n == 0 {
return Ok(vec![vec![0i32; n + 1]; m + 1]);
}
let mut h = vec![vec![0i32; n + 1]; m + 1];
for j in 1..=n {
h[0][j] = h[0][j - 1] + extend_penalty;
}
for i in 1..=m {
h[i][0] = h[i - 1][0] + extend_penalty;
}
let mut score_cache = vec![vec![0i32; m]; n];
for j in 0..n {
for i in 0..m {
score_cache[j][i] = matrix.score(seq1[i], seq2[j]);
}
}
for j in 1..=n {
for i in 1..=m {
let match_score = score_cache[j - 1][i - 1];
let diagonal = h[i - 1][j - 1] + match_score;
let up = h[i - 1][j] + extend_penalty;
let left = h[i][j - 1] + extend_penalty;
h[i][j] = if diagonal > up {
if diagonal > left { diagonal } else { left }
} else {
if up > left { up } else { left }
};
}
}
Ok(h)
}
fn smith_waterman_striped_scalar(
seq1: &[AminoAcid],
seq2: &[AminoAcid],
matrix: &ScoringMatrix,
extend_penalty: i32,
) -> Result<(Vec<Vec<i32>>, usize, usize)> {
let m = seq1.len();
let n = seq2.len();
let mut h = vec![vec![0i32; n + 1]; m + 1];
let mut max_score = 0i32;
let mut max_i = 0usize;
let mut max_j = 0usize;
for j in 1..=n {
for i in 1..=m {
let match_score = matrix.score(seq1[i - 1], seq2[j - 1]);
let diagonal = h[i - 1][j - 1] + match_score;
let up = h[i - 1][j] + extend_penalty;
let left = h[i][j - 1] + extend_penalty;
h[i][j] = std::cmp::max(0, std::cmp::max(diagonal, std::cmp::max(up, left)));
if h[i][j] > max_score {
max_score = h[i][j];
max_i = i;
max_j = j;
}
}
}
Ok((h, max_i, max_j))
}
fn needleman_wunsch_striped_scalar(
seq1: &[AminoAcid],
seq2: &[AminoAcid],
matrix: &ScoringMatrix,
extend_penalty: i32,
) -> Result<Vec<Vec<i32>>> {
let m = seq1.len();
let n = seq2.len();
let mut h = vec![vec![0i32; n + 1]; m + 1];
for i in 1..=m {
h[i][0] = h[i - 1][0] + extend_penalty;
}
for j in 1..=n {
h[0][j] = h[0][j - 1] + extend_penalty;
}
for j in 1..=n {
for i in 1..=m {
let match_score = matrix.score(seq1[i - 1], seq2[j - 1]);
let diagonal = h[i - 1][j - 1] + match_score;
let up = h[i - 1][j] + extend_penalty;
let left = h[i][j - 1] + extend_penalty;
h[i][j] = std::cmp::max(diagonal, std::cmp::max(up, left));
}
}
Ok(h)
}
#[cfg(not(target_arch = "x86_64"))]
pub fn smith_waterman_striped_avx2(
_seq1: &[AminoAcid],
_seq2: &[AminoAcid],
_matrix: &ScoringMatrix,
_open_penalty: i32,
_extend_penalty: i32,
) -> Result<(Vec<Vec<i32>>, usize, usize)> {
Err(crate::error::Error::Custom(
"Striped AVX2 kernel requires x86_64 architecture".to_string(),
))
}
#[cfg(not(target_arch = "x86_64"))]
pub fn needleman_wunsch_striped_avx2(
_seq1: &[AminoAcid],
_seq2: &[AminoAcid],
_matrix: &ScoringMatrix,
_open_penalty: i32,
_extend_penalty: i32,
) -> Result<Vec<Vec<i32>>> {
Err(crate::error::Error::Custom(
"Striped AVX2 kernel requires x86_64 architecture".to_string(),
))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::scoring::{ScoringMatrix, MatrixType};
#[test]
fn test_smith_waterman_striped_empty() -> Result<()> {
let seq1 = vec![];
let seq2 = vec![];
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let (h, max_i, max_j) = smith_waterman_striped_avx2(&seq1, &seq2, &matrix, -11, -1)?;
assert_eq!(h.len(), 1);
assert_eq!(max_i, 0);
assert_eq!(max_j, 0);
Ok(())
}
#[test]
fn test_smith_waterman_striped_single() -> Result<()> {
let seq1 = vec![AminoAcid::Alanine];
let seq2 = vec![AminoAcid::Alanine];
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let (_h, max_i, max_j) = smith_waterman_striped_avx2(&seq1, &seq2, &matrix, -11, -1)?;
assert!(max_i >= 1 && max_j >= 1);
Ok(())
}
#[test]
fn test_smith_waterman_striped_match() -> Result<()> {
let seq1 = vec![
AminoAcid::Alanine,
AminoAcid::Lysine,
AminoAcid::Glycine,
];
let seq2 = vec![
AminoAcid::Alanine,
AminoAcid::Lysine,
AminoAcid::Glycine,
];
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let (h, max_i, max_j) = smith_waterman_striped_avx2(&seq1, &seq2, &matrix, -11, -1)?;
assert!(h[max_i][max_j] > 5, "Got score: {}", h[max_i][max_j]);
Ok(())
}
#[test]
fn test_needleman_wunsch_striped() -> Result<()> {
let seq1 = vec![
AminoAcid::Alanine,
AminoAcid::Lysine,
AminoAcid::Glycine,
];
let seq2 = vec![
AminoAcid::Alanine,
AminoAcid::Lysine,
AminoAcid::Leucine,
];
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let h = needleman_wunsch_striped_avx2(&seq1, &seq2, &matrix, -11, -1)?;
assert!(h[seq1.len()][seq2.len()] != 0);
Ok(())
}
#[test]
fn test_striped_vs_scalar_consistency() -> Result<()> {
let seq1 = vec![
AminoAcid::Alanine,
AminoAcid::Lysine,
AminoAcid::Glycine,
AminoAcid::Serine,
AminoAcid::Valine,
];
let seq2 = vec![
AminoAcid::Alanine,
AminoAcid::Lysine,
AminoAcid::Leucine,
AminoAcid::Serine,
AminoAcid::Isoleucine,
];
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let (h_simd, max_i_simd, max_j_simd) =
smith_waterman_striped_avx2(&seq1, &seq2, &matrix, -11, -1)?;
let (h_scalar, max_i_scalar, max_j_scalar) =
smith_waterman_striped_scalar(&seq1, &seq2, &matrix, -1)?;
assert_eq!(h_simd, h_scalar);
assert_eq!(max_i_simd, max_i_scalar);
assert_eq!(max_j_simd, max_j_scalar);
Ok(())
}
}