lib/
reference.rs

1//! Implements the reference gap-affine (SWG) alignment algorithm.
2
3use crate::alignment_lib::*;
4use std::cmp::min;
5
6#[derive(Debug)]
7struct AlignMat {
8    inserts: Vec<Vec<(Option<u32>, Option<AlignmentLayer>)>>,
9    matches: Vec<Vec<(Option<u32>, Option<AlignmentLayer>)>>,
10    deletes: Vec<Vec<(Option<u32>, Option<AlignmentLayer>)>>,
11}
12
13/// Performs the SWG alignemnt of two &str.
14pub fn affine_gap_align(a: &str, b: &str, pens: &Penalties) -> Result<Alignment, AlignmentError> {
15    let align_mat = affine_gap_mat(a, b, pens);
16    trace_back(&align_mat, a, b)
17}
18
19fn affine_gap_mat(a: &str, b: &str, pens: &Penalties) -> AlignMat {
20    let mut result = new_mat(a, b, pens);
21    let chars_a: Vec<char> = a.chars().collect();
22    let chars_b: Vec<char> = b.chars().collect();
23    for i in 1..chars_a.len() + 1 {
24        for j in 1..chars_b.len() + 1 {
25            result.inserts[i][j] = match (result.inserts[i - 1][j].0, result.matches[i - 1][j].0) {
26                (Some(a), Some(b)) => {
27                    if min(a + pens.extd_pen, b + pens.extd_pen + pens.open_pen)
28                        == a + pens.extd_pen
29                    {
30                        (Some(a + pens.extd_pen), Some(AlignmentLayer::Inserts))
31                    } else {
32                        (
33                            Some(b + pens.extd_pen + pens.open_pen),
34                            Some(AlignmentLayer::Matches),
35                        )
36                    }
37                }
38                (Some(a), None) => (Some(a + pens.extd_pen), Some(AlignmentLayer::Inserts)),
39                (None, Some(a)) => (
40                    Some(a + pens.extd_pen + pens.open_pen),
41                    Some(AlignmentLayer::Matches),
42                ),
43                (None, None) => panic!("(None, None), results.inserts"),
44            };
45
46            result.deletes[i][j] = match (result.deletes[i][j - 1].0, result.matches[i][j - 1].0) {
47                (Some(a), Some(b)) => {
48                    if min(a + pens.extd_pen, b + pens.extd_pen + pens.open_pen)
49                        == a + pens.extd_pen
50                    {
51                        (Some(a + pens.extd_pen), Some(AlignmentLayer::Deletes))
52                    } else {
53                        (
54                            Some(b + pens.extd_pen + pens.open_pen),
55                            Some(AlignmentLayer::Matches),
56                        )
57                    }
58                }
59                (Some(a), None) => (Some(a + pens.extd_pen), Some(AlignmentLayer::Deletes)),
60                (None, Some(a)) => (
61                    Some(a + pens.extd_pen + pens.open_pen),
62                    Some(AlignmentLayer::Matches),
63                ),
64                (None, None) => panic!("(None, None), results.deletes"),
65            };
66
67            let mismatch = if chars_a[i - 1] == chars_b[j - 1] {
68                0
69            } else {
70                pens.mismatch_pen
71            };
72
73            result.matches[i][j] = match (
74                result.matches[i - 1][j - 1].0,
75                result.deletes[i][j].0,
76                result.inserts[i][j].0,
77            ) {
78                (Some(a), Some(b), Some(c)) => {
79                    if a + mismatch < b {
80                        if a + mismatch < c {
81                            (Some(a + mismatch), Some(AlignmentLayer::Matches))
82                        } else {
83                            (Some(c), Some(AlignmentLayer::Inserts))
84                        }
85                    } else if b <= c {
86                        (Some(b), Some(AlignmentLayer::Deletes))
87                    } else {
88                        (Some(c), Some(AlignmentLayer::Inserts))
89                    }
90                }
91                (Some(a), Some(b), None) => {
92                    if a + mismatch < b {
93                        (Some(a + mismatch), Some(AlignmentLayer::Matches))
94                    } else {
95                        (Some(b), Some(AlignmentLayer::Deletes))
96                    }
97                }
98                (Some(a), None, Some(c)) => {
99                    if a + mismatch < c {
100                        (Some(a + mismatch), Some(AlignmentLayer::Matches))
101                    } else {
102                        (Some(c), Some(AlignmentLayer::Inserts))
103                    }
104                }
105                (None, Some(b), Some(c)) => {
106                    if b < c {
107                        (Some(b), Some(AlignmentLayer::Deletes))
108                    } else {
109                        (Some(c), Some(AlignmentLayer::Inserts))
110                    }
111                }
112                (Some(a), None, None) => (Some(a + mismatch), Some(AlignmentLayer::Matches)),
113                (None, Some(b), None) => (Some(b), Some(AlignmentLayer::Deletes)),
114                (None, None, Some(c)) => (Some(c), Some(AlignmentLayer::Inserts)),
115                (None, None, None) => panic!("(None, None, None), result.matches"),
116            };
117        }
118    }
119    result
120}
121
122fn new_mat(a: &str, b: &str, pens: &Penalties) -> AlignMat {
123    let a_length = a.len() + 1;
124    let b_length = b.len() + 1;
125
126    let mut inserts = vec![vec![(None, None); b_length]; a_length];
127    let mut matches = vec![vec![(None, None); b_length]; a_length];
128    let mut deletes = vec![vec![(None, None); b_length]; a_length];
129
130    matches[0][0] = (Some(0), None);
131
132    inserts[1][0] = (
133        Some(pens.extd_pen + pens.open_pen),
134        Some(AlignmentLayer::Matches),
135    );
136    matches[1][0] = inserts[1][0];
137    for i in 2..a_length {
138        inserts[i][0] = (
139            Some(inserts[i - 1][0].0.unwrap() + pens.extd_pen),
140            Some(AlignmentLayer::Inserts),
141        );
142        matches[i][0] = inserts[i][0];
143    }
144
145    deletes[0][1] = (
146        Some(pens.extd_pen + pens.open_pen),
147        Some(AlignmentLayer::Matches),
148    );
149    matches[0][1] = deletes[0][1];
150    for i in 2..b_length {
151        deletes[0][i] = (
152            Some(deletes[0][i - 1].0.unwrap() + pens.extd_pen),
153            Some(AlignmentLayer::Deletes),
154        );
155        matches[0][i] = deletes[0][i];
156    }
157
158    AlignMat {
159        inserts,
160        matches,
161        deletes,
162    }
163}
164
165fn trace_back(mat: &AlignMat, a: &str, b: &str) -> Result<Alignment, AlignmentError> {
166    let mut result = Alignment {
167        query_aligned: String::new(),
168        text_aligned: String::new(),
169        score: 0,
170    };
171
172    let mut a_pos = a.len();
173    let mut b_pos = b.len();
174
175    let a_chars: Vec<char> = a.chars().collect();
176    let b_chars: Vec<char> = b.chars().collect();
177
178    let mut layer = AlignmentLayer::Matches;
179    result.score = mat.matches[a_pos][b_pos].0.unwrap();
180
181    while (a_pos > 0) || (b_pos > 0) {
182        if a_pos == 0 {
183            b_pos -= 1;
184            result.query_aligned.push('-');
185            result.text_aligned.push(b_chars[b_pos]);
186        } else if b_pos == 0 {
187            a_pos -= 1;
188            result.query_aligned.push(a_chars[a_pos]);
189            result.text_aligned.push('-');
190        } else {
191            match &mut layer {
192                AlignmentLayer::Inserts => {
193                    result.query_aligned.push(a_chars[a_pos - 1]);
194                    result.text_aligned.push('-');
195                    if let Some(AlignmentLayer::Matches) = mat.inserts[a_pos][b_pos].1 {
196                        layer = AlignmentLayer::Matches;
197                    };
198                    a_pos -= 1;
199                }
200                AlignmentLayer::Matches => match mat.matches[a_pos][b_pos].1 {
201                    Some(AlignmentLayer::Matches) => {
202                        a_pos -= 1;
203                        b_pos -= 1;
204                        result.query_aligned.push(a_chars[a_pos]);
205                        result.text_aligned.push(b_chars[b_pos]);
206                    }
207                    Some(AlignmentLayer::Inserts) => {
208                        layer = AlignmentLayer::Inserts;
209                    }
210                    Some(AlignmentLayer::Deletes) => {
211                        layer = AlignmentLayer::Deletes;
212                    }
213                    _ => panic!(),
214                },
215                AlignmentLayer::Deletes => {
216                    result.query_aligned.push('-');
217                    result.text_aligned.push(b_chars[b_pos - 1]);
218                    if let Some(AlignmentLayer::Matches) = mat.deletes[a_pos][b_pos].1 {
219                        layer = AlignmentLayer::Matches;
220                    };
221                    b_pos -= 1;
222                }
223            }
224        }
225    }
226    result.query_aligned = result.query_aligned.chars().rev().collect();
227    result.text_aligned = result.text_aligned.chars().rev().collect();
228    Ok(result)
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234
235    #[test]
236    fn assert_align_score() {
237        assert_eq!(
238            affine_gap_align(
239                "CAT",
240                "CAT",
241                &Penalties {
242                    mismatch_pen: 1,
243                    extd_pen: 1,
244                    open_pen: 1,
245                }
246            ),
247            Ok(Alignment {
248                query_aligned: "CAT".to_string(),
249                text_aligned: "CAT".to_string(),
250                score: 0,
251            })
252        );
253        assert_eq!(
254            affine_gap_align(
255                "CAT",
256                "CATS",
257                &Penalties {
258                    mismatch_pen: 1,
259                    extd_pen: 1,
260                    open_pen: 1,
261                }
262            ),
263            Ok(Alignment {
264                query_aligned: "CAT-".to_string(),
265                text_aligned: "CATS".to_string(),
266                score: 2,
267            })
268        );
269        assert_eq!(
270            affine_gap_align(
271                "XX",
272                "YY",
273                &Penalties {
274                    mismatch_pen: 1,
275                    extd_pen: 100,
276                    open_pen: 100,
277                }
278            ),
279            Ok(Alignment {
280                query_aligned: "XX".to_string(),
281                text_aligned: "YY".to_string(),
282                score: 2,
283            })
284        );
285        assert_eq!(
286            affine_gap_align(
287                "XX",
288                "YY",
289                &Penalties {
290                    mismatch_pen: 100,
291                    extd_pen: 1,
292                    open_pen: 1,
293                }
294            ),
295            Ok(Alignment {
296                query_aligned: "XX--".to_string(),
297                text_aligned: "--YY".to_string(),
298                score: 6,
299            })
300        );
301        assert_eq!(
302            affine_gap_align(
303                "XX",
304                "YYYYYYYY",
305                &Penalties {
306                    mismatch_pen: 100,
307                    extd_pen: 1,
308                    open_pen: 1,
309                }
310            ),
311            Ok(Alignment {
312                query_aligned: "XX--------".to_string(),
313                text_aligned: "--YYYYYYYY".to_string(),
314                score: 12,
315            })
316        );
317        assert_eq!(
318            affine_gap_align(
319                "XXZZ",
320                "XXYZ",
321                &Penalties {
322                    mismatch_pen: 100,
323                    extd_pen: 1,
324                    open_pen: 1,
325                }
326            ),
327            Ok(Alignment {
328                query_aligned: "XX-ZZ".to_string(),
329                text_aligned: "XXYZ-".to_string(),
330                score: 4,
331            })
332        );
333        assert_eq!(
334            match affine_gap_align(
335                "TCTTTACTCGCGCGTTGGAGAAATACAATAGT",
336                "TCTATACTGCGCGTTTGGAGAAATAAAATAGT",
337                &Penalties {
338                    mismatch_pen: 1,
339                    extd_pen: 1,
340                    open_pen: 1,
341                }
342            ) {
343                Ok(s) => s.score,
344                _ => 1,
345            },
346            6
347        );
348
349        assert_eq!(
350            match affine_gap_align(
351                "TCTTTACTCGCGCGTTGGAGAAATACAATAGT",
352                "TCTATACTGCGCGTTTGGAGAAATAAAATAGT",
353                &Penalties {
354                    mismatch_pen: 135,
355                    extd_pen: 19,
356                    open_pen: 82,
357                }
358            ) {
359                Ok(s) => s.score,
360                _ => 1,
361            },
362            472
363        );
364    }
365}