Skip to main content

oxicuda_seq/alignment/
banded.rs

1//! Banded global (Needleman–Wunsch) alignment.
2//!
3//! Restricts the Needleman–Wunsch dynamic program to a diagonal band of
4//! half-width `band` — only cells with `|i − j| ≤ band` are evaluated. For two
5//! sequences that are near-identical (small edit distance) this finds the
6//! optimal *banded* global alignment by touching only the `O(n · band)` cells
7//! inside the band; every cell outside the band is held at `−∞` and can never
8//! lie on an optimal path. The score is therefore `≤` the unrestricted
9//! Needleman–Wunsch score, and equals it whenever the band is wide enough to
10//! contain an optimal path.
11
12use super::needleman_wunsch::{Alignment, ScoringMatrix};
13use crate::error::{SeqError, SeqResult};
14
15/// Sentinel for unreachable / off-band DP cells.
16const NEG_INF: i32 = i32::MIN / 2;
17
18/// Run banded global alignment of `a` against `b`.
19///
20/// The scoring scheme is the linear-gap [`ScoringMatrix`] shared with
21/// [`needleman_wunsch`](super::needleman_wunsch::needleman_wunsch). Only the
22/// diagonal band `|i − j| ≤ band` is computed; the returned [`Alignment`] holds
23/// the optimal score and traceback subject to that restriction.
24///
25/// # Errors
26/// * [`SeqError::EmptyInput`] if either sequence is empty.
27/// * [`SeqError::InvalidConfiguration`] if `band < |a.len() − b.len()|`: the end
28///   cell `(m, n)` then lies outside the band, so no global alignment fits and
29///   the request is ill-posed.
30pub fn banded_align<T: PartialEq>(
31    a: &[T],
32    b: &[T],
33    score: &ScoringMatrix,
34    band: usize,
35) -> SeqResult<Alignment> {
36    let m = a.len();
37    let n = b.len();
38    if m == 0 || n == 0 {
39        return Err(SeqError::EmptyInput);
40    }
41    let diff = m.abs_diff(n);
42    if band < diff {
43        return Err(SeqError::InvalidConfiguration(format!(
44            "band {band} is narrower than the length difference {diff}; no global alignment fits"
45        )));
46    }
47
48    let cols = n + 1;
49    let mut dp = vec![NEG_INF; (m + 1) * cols];
50    let mut trace = vec![0u8; (m + 1) * cols];
51    // trace codes: 0 = diag, 1 = up (gap in b), 2 = left (gap in a).
52
53    dp[0] = 0;
54    trace[0] = 0;
55    // First column inside the band (gaps in b).
56    let col_limit = m.min(band);
57    for i in 1..=col_limit {
58        dp[i * cols] = score.gap.saturating_mul(i as i32);
59        trace[i * cols] = 1;
60    }
61    // First row inside the band (gaps in a).
62    let row_limit = n.min(band);
63    for j in 1..=row_limit {
64        dp[j] = score.gap.saturating_mul(j as i32);
65        trace[j] = 2;
66    }
67
68    for i in 1..=m {
69        let lo = i.saturating_sub(band).max(1);
70        let hi = (i + band).min(n);
71        for j in lo..=hi {
72            let s = if a[i - 1] == b[j - 1] {
73                score.match_score
74            } else {
75                score.mismatch
76            };
77            let diag = dp[(i - 1) * cols + (j - 1)].saturating_add(s);
78            let up = dp[(i - 1) * cols + j].saturating_add(score.gap);
79            let left = dp[i * cols + (j - 1)].saturating_add(score.gap);
80            let (best, dir) = if diag >= up && diag >= left {
81                (diag, 0u8)
82            } else if up >= left {
83                (up, 1u8)
84            } else {
85                (left, 2u8)
86            };
87            dp[i * cols + j] = best;
88            trace[i * cols + j] = dir;
89        }
90    }
91
92    let final_score = dp[m * cols + n];
93
94    // Trace back from (m, n) following recorded directions.
95    let mut a_align = Vec::new();
96    let mut b_align = Vec::new();
97    let mut i = m;
98    let mut j = n;
99    while i > 0 || j > 0 {
100        match trace[i * cols + j] {
101            0 if i > 0 && j > 0 => {
102                a_align.push(Some(i - 1));
103                b_align.push(Some(j - 1));
104                i -= 1;
105                j -= 1;
106            }
107            1 if i > 0 => {
108                a_align.push(Some(i - 1));
109                b_align.push(None);
110                i -= 1;
111            }
112            2 if j > 0 => {
113                a_align.push(None);
114                b_align.push(Some(j - 1));
115                j -= 1;
116            }
117            _ => {
118                // Boundary safety: prefer up if rows remain, else left.
119                if i > 0 {
120                    a_align.push(Some(i - 1));
121                    b_align.push(None);
122                    i -= 1;
123                } else if j > 0 {
124                    a_align.push(None);
125                    b_align.push(Some(j - 1));
126                    j -= 1;
127                } else {
128                    break;
129                }
130            }
131        }
132    }
133    a_align.reverse();
134    b_align.reverse();
135    Ok(Alignment {
136        a_aligned: a_align,
137        b_aligned: b_align,
138        score: final_score,
139    })
140}
141
142#[cfg(test)]
143mod tests {
144    use super::super::needleman_wunsch::needleman_wunsch;
145    use super::*;
146
147    #[test]
148    fn banded_identical_matches_full_nw() {
149        let a = b"ACGTACGT";
150        let s = ScoringMatrix::default();
151        let full = needleman_wunsch(a, a, &s).expect("nw");
152        // diff = 0, so any band ≥ 0 admits the all-match diagonal.
153        let banded = banded_align(a, a, &s, 2).expect("banded");
154        assert_eq!(banded.score, full.score);
155        assert_eq!(banded.score, a.len() as i32 * s.match_score);
156        assert_eq!(banded.a_aligned.len(), banded.b_aligned.len());
157    }
158
159    #[test]
160    fn banded_score_le_full_nw() {
161        let a = b"ACGTACGTAC";
162        let b = b"ACGTCGTACG";
163        let s = ScoringMatrix::default();
164        let full = needleman_wunsch(a, b, &s).expect("nw");
165        let banded = banded_align(a, b, &s, 1).expect("banded");
166        assert!(
167            banded.score <= full.score,
168            "banded {} should be ≤ full NW {}",
169            banded.score,
170            full.score
171        );
172    }
173
174    #[test]
175    fn wide_band_reproduces_full_nw() {
176        let a = b"GATTACA";
177        let b = b"GCATGCT";
178        let s = ScoringMatrix::default();
179        let full = needleman_wunsch(a, b, &s).expect("nw");
180        // band ≥ max(m, n) covers the whole grid → identical to full NW.
181        let banded = banded_align(a, b, &s, a.len().max(b.len())).expect("banded");
182        assert_eq!(banded.score, full.score);
183    }
184
185    #[test]
186    fn one_substitution_recovered_with_narrow_band() {
187        let a = b"ACGTACGT";
188        let b = b"ACGTTCGT"; // single substitution at index 4 (A → T)
189        let s = ScoringMatrix::default();
190        let banded = banded_align(a, b, &s, 1).expect("banded");
191        let expected = 7 * s.match_score + s.mismatch;
192        assert_eq!(banded.score, expected);
193        let full = needleman_wunsch(a, b, &s).expect("nw");
194        assert_eq!(banded.score, full.score);
195    }
196
197    #[test]
198    fn band_too_small_for_length_diff_errors() {
199        let a = b"ACGTA"; // len 5
200        let b = b"ACGTACGTA"; // len 9, diff = 4
201        let s = ScoringMatrix::default();
202        assert!(banded_align(a, b, &s, 2).is_err());
203        // A band that just covers the difference succeeds.
204        assert!(banded_align(a, b, &s, 4).is_ok());
205    }
206
207    #[test]
208    fn empty_input_errors() {
209        let s = ScoringMatrix::default();
210        let empty: &[u8] = b"";
211        assert!(banded_align(empty, b"ACGT", &s, 3).is_err());
212        assert!(banded_align(b"ACGT", empty, &s, 3).is_err());
213    }
214}