Skip to main content

oxicuda_seq/alignment/
gotoh.rs

1//! Gotoh affine-gap global alignment (3 DP matrices: match M, gap-in-x X, gap-in-y Y).
2
3use crate::error::{SeqError, SeqResult};
4
5/// Affine-gap scoring scheme.
6#[derive(Debug, Clone, Copy)]
7pub struct GotohScoring {
8    pub match_score: i32,
9    pub mismatch: i32,
10    pub gap_open: i32,
11    pub gap_extend: i32,
12}
13
14impl Default for GotohScoring {
15    fn default() -> Self {
16        Self {
17            match_score: 2,
18            mismatch: -1,
19            gap_open: -5,
20            gap_extend: -1,
21        }
22    }
23}
24
25/// Result of Gotoh affine-gap global alignment.
26#[derive(Debug, Clone)]
27pub struct GotohAlignment {
28    pub a_aligned: Vec<Option<usize>>,
29    pub b_aligned: Vec<Option<usize>>,
30    pub score: i32,
31}
32
33/// Run Gotoh's affine-gap global alignment.
34pub fn gotoh_align(a: &[u8], b: &[u8], sc: &GotohScoring) -> SeqResult<GotohAlignment> {
35    let m = a.len();
36    let n = b.len();
37    if m == 0 || n == 0 {
38        return Err(SeqError::EmptyInput);
39    }
40    let cols = n + 1;
41    let neg_inf = i32::MIN / 2;
42    let mut mm = vec![neg_inf; (m + 1) * cols];
43    let mut x = vec![neg_inf; (m + 1) * cols];
44    let mut y = vec![neg_inf; (m + 1) * cols];
45
46    mm[0] = 0;
47    for i in 1..=m {
48        x[i * cols] = sc.gap_open + (i as i32 - 1) * sc.gap_extend;
49    }
50    for j in 1..=n {
51        y[j] = sc.gap_open + (j as i32 - 1) * sc.gap_extend;
52    }
53
54    for i in 1..=m {
55        for j in 1..=n {
56            let s = if a[i - 1] == b[j - 1] {
57                sc.match_score
58            } else {
59                sc.mismatch
60            };
61            let prev_diag = [
62                mm[(i - 1) * cols + (j - 1)],
63                x[(i - 1) * cols + (j - 1)],
64                y[(i - 1) * cols + (j - 1)],
65            ];
66            mm[i * cols + j] = prev_diag.iter().cloned().fold(neg_inf, i32::max) + s;
67            x[i * cols + j] =
68                (mm[(i - 1) * cols + j] + sc.gap_open).max(x[(i - 1) * cols + j] + sc.gap_extend);
69            y[i * cols + j] =
70                (mm[i * cols + (j - 1)] + sc.gap_open).max(y[i * cols + (j - 1)] + sc.gap_extend);
71        }
72    }
73    let score = [mm[m * cols + n], x[m * cols + n], y[m * cols + n]]
74        .iter()
75        .cloned()
76        .fold(neg_inf, i32::max);
77
78    // Trace back picking the layer with highest score.
79    let mut a_align = Vec::new();
80    let mut b_align = Vec::new();
81    let mut i = m;
82    let mut j = n;
83    let mut layer = if mm[i * cols + j] >= x[i * cols + j] && mm[i * cols + j] >= y[i * cols + j] {
84        0
85    } else if x[i * cols + j] >= y[i * cols + j] {
86        1
87    } else {
88        2
89    };
90
91    while i > 0 || j > 0 {
92        match layer {
93            0 => {
94                if i > 0 && j > 0 {
95                    a_align.push(Some(i - 1));
96                    b_align.push(Some(j - 1));
97                    // Pick which layer mm came from
98                    let prev = [
99                        mm[(i - 1) * cols + (j - 1)],
100                        x[(i - 1) * cols + (j - 1)],
101                        y[(i - 1) * cols + (j - 1)],
102                    ];
103                    let max_prev = prev.iter().cloned().fold(neg_inf, i32::max);
104                    layer = if prev[0] == max_prev {
105                        0
106                    } else if prev[1] == max_prev {
107                        1
108                    } else {
109                        2
110                    };
111                    i -= 1;
112                    j -= 1;
113                } else if i > 0 {
114                    layer = 1;
115                } else {
116                    layer = 2;
117                }
118            }
119            1 => {
120                if i > 0 {
121                    a_align.push(Some(i - 1));
122                    b_align.push(None);
123                    let open = mm[(i - 1) * cols + j] + sc.gap_open;
124                    let ext = x[(i - 1) * cols + j] + sc.gap_extend;
125                    layer = if open >= ext { 0 } else { 1 };
126                    i -= 1;
127                } else {
128                    layer = 2;
129                }
130            }
131            _ => {
132                if j > 0 {
133                    a_align.push(None);
134                    b_align.push(Some(j - 1));
135                    let open = mm[i * cols + (j - 1)] + sc.gap_open;
136                    let ext = y[i * cols + (j - 1)] + sc.gap_extend;
137                    layer = if open >= ext { 0 } else { 2 };
138                    j -= 1;
139                } else {
140                    layer = 1;
141                }
142            }
143        }
144    }
145    a_align.reverse();
146    b_align.reverse();
147    Ok(GotohAlignment {
148        a_aligned: a_align,
149        b_aligned: b_align,
150        score,
151    })
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157
158    #[test]
159    fn gotoh_identical_is_match_count() {
160        let a = b"AAAA";
161        let r = gotoh_align(a, a, &GotohScoring::default()).expect("ok");
162        assert_eq!(r.score, 2 * a.len() as i32);
163    }
164
165    #[test]
166    fn gotoh_handles_long_gap() {
167        let a = b"AAAA";
168        let b = b"AAAAGGGGAAAA";
169        let sc = GotohScoring {
170            match_score: 2,
171            mismatch: -1,
172            gap_open: -5,
173            gap_extend: -1,
174        };
175        let r = gotoh_align(a, b, &sc).expect("ok");
176        // Should achieve a finite score; not -inf.
177        assert!(r.score > i32::MIN / 4);
178    }
179}