textdistance/algorithms/
smith_waterman.rs

1//! Smith-Waterman sequence alignment
2use crate::{Algorithm, Result};
3use alloc::vec;
4use alloc::vec::Vec;
5
6/// [Smith-Waterman similarity] is edit-based and designed for nucleic acid (and protein) sequences.
7///
8/// [Smith-Waterman similarity]: https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
9pub struct SmithWaterman {
10    /// The cost of an alignment gap. Default: 1.
11    pub gap_cost: isize,
12
13    /// The cost of symbols matching. Default: -1.
14    pub match_cost: isize,
15
16    /// The cost of symbols not matching. Default: 0.
17    pub mismatch_cost: isize,
18}
19
20impl Default for SmithWaterman {
21    fn default() -> Self {
22        Self {
23            gap_cost: 1,
24            match_cost: -1,
25            mismatch_cost: 0,
26        }
27    }
28}
29
30impl Algorithm<usize> for SmithWaterman {
31    fn for_vec<E: Eq>(&self, s1: &[E], s2: &[E]) -> Result<usize> {
32        let l1 = s1.len();
33        let l2 = s2.len();
34        let mut dist_mat: Vec<Vec<isize>> = vec![vec![0; l2 + 1]; l1 + 1];
35        for (i, sc1) in s1.iter().enumerate() {
36            for (j, sc2) in s2.iter().enumerate() {
37                let cost = if sc1 == sc2 {
38                    self.match_cost
39                } else {
40                    self.mismatch_cost
41                };
42                let match_ = dist_mat[i][j] - cost;
43                let delete = dist_mat[i][j + 1] - self.gap_cost;
44                let insert = dist_mat[i + 1][j] - self.gap_cost;
45                dist_mat[i + 1][j + 1] = 0.max(match_).max(delete).max(insert);
46            }
47        }
48        let result = dist_mat[l1][l2];
49        Result {
50            #[allow(clippy::cast_sign_loss)]
51            abs: result as usize,
52            is_distance: false,
53            max: l1.max(l2),
54            len1: l1,
55            len2: l2,
56        }
57    }
58}
59
60#[cfg(test)]
61mod tests {
62    use crate::str::smith_waterman;
63    use assert2::assert;
64    use rstest::rstest;
65
66    #[rstest]
67    // parity with textdistance
68    #[case("hello", "world", 1)]
69    #[case("abcd", "abce", 3)]
70    #[case("AGACTAGTTAC", "CGAGACGT", 3)]
71    #[case("qwe", "rty", 0)]
72    #[case("qwe", "rty", 0)]
73    #[case("check", "shrek", 2)]
74    // parity with abydos
75    #[case("cat", "hat", 2)]
76    #[case("Niall", "Neil", 1)]
77    #[case("aluminum", "Catalan", 0)]
78    #[case("ATCG", "TAGC", 1)]
79    fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
80        assert!(smith_waterman(s1, s2) == exp);
81    }
82}