Skip to main content

oxicuda_seq/alignment/
smith_waterman.rs

1//! Smith-Waterman local alignment.
2
3use super::needleman_wunsch::ScoringMatrix;
4use crate::error::{SeqError, SeqResult};
5
6/// Local alignment result: aligned slices + score + start indices.
7#[derive(Debug, Clone)]
8pub struct LocalAlignment {
9    pub a_aligned: Vec<Option<usize>>,
10    pub b_aligned: Vec<Option<usize>>,
11    pub score: i32,
12    pub start_a: usize,
13    pub start_b: usize,
14    pub end_a: usize,
15    pub end_b: usize,
16}
17
18/// Run Smith-Waterman local alignment.
19pub fn smith_waterman(a: &[u8], b: &[u8], score: &ScoringMatrix) -> SeqResult<LocalAlignment> {
20    let m = a.len();
21    let n = b.len();
22    if m == 0 || n == 0 {
23        return Err(SeqError::EmptyInput);
24    }
25    let cols = n + 1;
26    let mut dp = vec![0i32; (m + 1) * cols];
27    let mut trace = vec![0u8; (m + 1) * cols];
28    let mut best = 0i32;
29    let mut best_i = 0usize;
30    let mut best_j = 0usize;
31
32    for i in 1..=m {
33        for j in 1..=n {
34            let s = if a[i - 1] == b[j - 1] {
35                score.match_score
36            } else {
37                score.mismatch
38            };
39            let diag = dp[(i - 1) * cols + (j - 1)] + s;
40            let up = dp[(i - 1) * cols + j] + score.gap;
41            let left = dp[i * cols + (j - 1)] + score.gap;
42            let mut cand = diag.max(up).max(left).max(0);
43            let dir = if cand == 0 {
44                3u8
45            } else if cand == diag {
46                0u8
47            } else if cand == up {
48                1u8
49            } else {
50                2u8
51            };
52            if cand < 0 {
53                cand = 0;
54            }
55            dp[i * cols + j] = cand;
56            trace[i * cols + j] = dir;
57            if cand > best {
58                best = cand;
59                best_i = i;
60                best_j = j;
61            }
62        }
63    }
64
65    // Trace back from (best_i, best_j) until score 0.
66    let mut a_align = Vec::new();
67    let mut b_align = Vec::new();
68    let mut i = best_i;
69    let mut j = best_j;
70    while i > 0 && j > 0 && dp[i * cols + j] > 0 {
71        match trace[i * cols + j] {
72            0 => {
73                a_align.push(Some(i - 1));
74                b_align.push(Some(j - 1));
75                i -= 1;
76                j -= 1;
77            }
78            1 => {
79                a_align.push(Some(i - 1));
80                b_align.push(None);
81                i -= 1;
82            }
83            2 => {
84                a_align.push(None);
85                b_align.push(Some(j - 1));
86                j -= 1;
87            }
88            _ => break,
89        }
90    }
91    let start_a = i;
92    let start_b = j;
93    a_align.reverse();
94    b_align.reverse();
95    Ok(LocalAlignment {
96        a_aligned: a_align,
97        b_aligned: b_align,
98        score: best,
99        start_a,
100        start_b,
101        end_a: best_i,
102        end_b: best_j,
103    })
104}
105
106#[cfg(test)]
107mod tests {
108    use super::*;
109
110    #[test]
111    fn sw_finds_embedded_substring() {
112        let a = b"XXXACGTYYY";
113        let b = b"AACGTGG";
114        let s = ScoringMatrix::default();
115        let r = smith_waterman(a, b, &s).expect("ok");
116        // The longest common substring "ACGT" has score 4 with match=1.
117        assert!(r.score >= 4, "got score {}", r.score);
118    }
119
120    #[test]
121    fn sw_no_match_zero_score() {
122        let a = b"AAAA";
123        let b = b"CCCC";
124        // Mismatch penalty -1, gap -2; best local score should be 0 (empty alignment).
125        let s = ScoringMatrix::default();
126        let r = smith_waterman(a, b, &s).expect("ok");
127        assert_eq!(r.score, 0);
128    }
129}