Skip to main content

oxicuda_seq/alignment/
hirschberg.rs

1//! Hirschberg's O(min(m, n)) memory global alignment.
2//!
3//! Implements the classical divide-and-conquer reduction of NW.
4
5use super::needleman_wunsch::ScoringMatrix;
6use crate::error::{SeqError, SeqResult};
7
8/// Result of Hirschberg alignment.
9#[derive(Debug, Clone)]
10pub struct HirschbergAlignment {
11    pub a_aligned: Vec<Option<usize>>,
12    pub b_aligned: Vec<Option<usize>>,
13    pub score: i32,
14}
15
16/// Compute the NW score row using O(n) memory; offsets keep the original index space.
17fn nw_score(a: &[u8], b: &[u8], sc: &ScoringMatrix) -> Vec<i32> {
18    let n = b.len();
19    let mut prev = vec![0i32; n + 1];
20    let mut cur = vec![0i32; n + 1];
21    for j in 0..=n {
22        prev[j] = sc.gap * j as i32;
23    }
24    for i in 1..=a.len() {
25        cur[0] = sc.gap * i as i32;
26        for j in 1..=n {
27            let s = if a[i - 1] == b[j - 1] {
28                sc.match_score
29            } else {
30                sc.mismatch
31            };
32            cur[j] = (prev[j - 1] + s)
33                .max(prev[j] + sc.gap)
34                .max(cur[j - 1] + sc.gap);
35        }
36        std::mem::swap(&mut prev, &mut cur);
37    }
38    prev
39}
40
41/// Run Hirschberg global alignment.  Returns the same alignment shape as NW.
42pub fn hirschberg_align(a: &[u8], b: &[u8], sc: &ScoringMatrix) -> SeqResult<HirschbergAlignment> {
43    let m = a.len();
44    let n = b.len();
45    if m == 0 || n == 0 {
46        return Err(SeqError::EmptyInput);
47    }
48    // Use absolute indices via offsets to recover original positions.
49    let mut a_align: Vec<Option<usize>> = Vec::new();
50    let mut b_align: Vec<Option<usize>> = Vec::new();
51    hirschberg_rec(a, b, 0, 0, sc, &mut a_align, &mut b_align);
52
53    // Compute the score by walking the alignment with the scoring matrix.
54    let mut score = 0i32;
55    for (ai, bi) in a_align.iter().zip(b_align.iter()) {
56        match (ai, bi) {
57            (Some(i), Some(j)) => {
58                score += if a[*i] == b[*j] {
59                    sc.match_score
60                } else {
61                    sc.mismatch
62                };
63            }
64            _ => score += sc.gap,
65        }
66    }
67    Ok(HirschbergAlignment {
68        a_aligned: a_align,
69        b_aligned: b_align,
70        score,
71    })
72}
73
74/// Append a vertical gap-in-b run (only `a` characters with `b = None`).
75fn append_a_only(
76    a_off: usize,
77    len: usize,
78    a_align: &mut Vec<Option<usize>>,
79    b_align: &mut Vec<Option<usize>>,
80) {
81    for k in 0..len {
82        a_align.push(Some(a_off + k));
83        b_align.push(None);
84    }
85}
86
87/// Append a horizontal gap-in-a run.
88fn append_b_only(
89    b_off: usize,
90    len: usize,
91    a_align: &mut Vec<Option<usize>>,
92    b_align: &mut Vec<Option<usize>>,
93) {
94    for k in 0..len {
95        a_align.push(None);
96        b_align.push(Some(b_off + k));
97    }
98}
99
100/// Direct NW alignment for small sub-problems (m == 0 or 1).
101fn nw_base(
102    a: &[u8],
103    b: &[u8],
104    a_off: usize,
105    b_off: usize,
106    sc: &ScoringMatrix,
107    a_align: &mut Vec<Option<usize>>,
108    b_align: &mut Vec<Option<usize>>,
109) {
110    if a.is_empty() {
111        append_b_only(b_off, b.len(), a_align, b_align);
112        return;
113    }
114    if a.len() == 1 {
115        // Try matching `a[0]` at every position of b; pick the highest-scoring placement.
116        let n = b.len();
117        let mut best_j: Option<usize> = None;
118        let mut best_score = i32::MIN;
119        for j in 0..n {
120            let mut s = 0i32;
121            s += sc.gap * j as i32; // initial gaps in a
122            s += if a[0] == b[j] {
123                sc.match_score
124            } else {
125                sc.mismatch
126            };
127            s += sc.gap * (n - j - 1) as i32; // trailing gaps in a
128            if s > best_score {
129                best_score = s;
130                best_j = Some(j);
131            }
132        }
133        // Also consider the all-gaps option (a[0] as a gap, b entirely gaps in a)
134        let all_gaps_score = sc.gap + sc.gap * n as i32;
135        if all_gaps_score > best_score {
136            append_a_only(a_off, 1, a_align, b_align);
137            append_b_only(b_off, n, a_align, b_align);
138            return;
139        }
140        if let Some(j) = best_j {
141            append_b_only(b_off, j, a_align, b_align);
142            a_align.push(Some(a_off));
143            b_align.push(Some(b_off + j));
144            append_b_only(b_off + j + 1, n - j - 1, a_align, b_align);
145        } else {
146            append_a_only(a_off, 1, a_align, b_align);
147            append_b_only(b_off, n, a_align, b_align);
148        }
149        return;
150    }
151    // Should not happen — recursion handles it.
152    append_a_only(a_off, a.len(), a_align, b_align);
153    append_b_only(b_off, b.len(), a_align, b_align);
154}
155
156fn hirschberg_rec(
157    a: &[u8],
158    b: &[u8],
159    a_off: usize,
160    b_off: usize,
161    sc: &ScoringMatrix,
162    a_align: &mut Vec<Option<usize>>,
163    b_align: &mut Vec<Option<usize>>,
164) {
165    let m = a.len();
166    let n = b.len();
167    if m == 0 {
168        append_b_only(b_off, n, a_align, b_align);
169        return;
170    }
171    if n == 0 {
172        append_a_only(a_off, m, a_align, b_align);
173        return;
174    }
175    if m == 1 {
176        nw_base(a, b, a_off, b_off, sc, a_align, b_align);
177        return;
178    }
179    let i_mid = m / 2;
180    let score_l = nw_score(&a[..i_mid], b, sc);
181    let a_rev: Vec<u8> = a[i_mid..].iter().rev().cloned().collect();
182    let b_rev: Vec<u8> = b.iter().rev().cloned().collect();
183    let score_r = nw_score(&a_rev, &b_rev, sc);
184    // Find j* maximising score_l[j] + score_r[n - j]
185    let mut best_j = 0usize;
186    let mut best_v = i32::MIN;
187    for j in 0..=n {
188        let v = score_l[j] + score_r[n - j];
189        if v > best_v {
190            best_v = v;
191            best_j = j;
192        }
193    }
194    hirschberg_rec(
195        &a[..i_mid],
196        &b[..best_j],
197        a_off,
198        b_off,
199        sc,
200        a_align,
201        b_align,
202    );
203    hirschberg_rec(
204        &a[i_mid..],
205        &b[best_j..],
206        a_off + i_mid,
207        b_off + best_j,
208        sc,
209        a_align,
210        b_align,
211    );
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217    use crate::alignment::needleman_wunsch::needleman_wunsch;
218
219    #[test]
220    fn hirschberg_matches_nw_score() {
221        let cases: &[(&[u8], &[u8])] = &[
222            (b"GATTACA", b"GCATGCU"),
223            (b"ACGTACGT", b"ACGGACGT"),
224            (b"A", b"AC"),
225            (b"AC", b"A"),
226        ];
227        let sc = ScoringMatrix::default();
228        for (a, b) in cases {
229            let r1 = needleman_wunsch(a, b, &sc).expect("ok");
230            let r2 = hirschberg_align(a, b, &sc).expect("ok");
231            assert_eq!(r1.score, r2.score, "score mismatch on {:?}/{:?}", a, b);
232        }
233    }
234}