lib/
alignment_lib.rs

1//! General types and functions that are useful for alignments.
2
3/// Holds penalties scores.
4/// There is no match penalty: matches do not change the score.
5/// The penalty for any gap is length * extd_pen + open_pen. The extension pen is also applied
6/// when a gap is opened.
7/// Penalties should be a positive int.
8use strum_macros::{Display, EnumString};
9
10/// The different alignment algorithms implemented in this crate.
11#[derive(Clone, Copy, Debug, EnumString, Display)]
12pub enum AlignmentAlgorithm {
13    /// Basic WFA.
14    Wavefront,
15    
16    WavefrontAdaptive,
17
18    /// DP matrix based, gap-affine, unoptimized alignment.
19    SWG,
20}
21
22/// Penalties used for WFA.
23#[derive(Debug, PartialEq, Eq, Clone)]
24pub struct Penalties {
25    /// There is a single mismatch penalty for every char combination.
26    /// WFA requires that the match penalty is set to 0.
27    pub mismatch_pen: u32,
28
29    /// Gap opening penalty.
30    pub open_pen: u32,
31
32    /// Gap extension penalty. It is also applied when a gap is opened.
33    pub extd_pen: u32,
34}
35
36/// This is the value returned by every alignment function after successfully aligning 2 strings.
37/// The aligned strings have '-' at gaps.
38#[derive(Debug, Eq, PartialEq, Clone)]
39pub struct Alignment {
40    pub score: u32,
41    pub query_aligned: String,
42    pub text_aligned: String,
43}
44
45/// Error type, for alignment errors.
46#[derive(Debug, Eq, PartialEq)]
47pub enum AlignmentError {
48    /// Both strings should have at least 1 character.
49    ZeroLength(String),
50
51    /// query.len() needs to be <= to text.len()
52    QueryTooLong(String),
53}
54
55/// Alignment layers. Used for tracking back.
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub enum AlignmentLayer {
58    Matches,
59    Inserts,
60    Deletes,
61}
62
63/// The methods for every wavefront type.
64pub(crate) trait Wavefront {
65    fn extend(&mut self);
66    fn next(&mut self);
67    fn increment_score(&mut self);
68    fn is_finished(&self) -> bool;
69    fn backtrace(&self) -> Result<Alignment, AlignmentError>;
70}
71
72/// Used to store and access wavefronts efficiently.
73/// T is the type used to store the number of chars matched.
74/// U is the type used for diagonals.
75#[derive(Debug, Eq, PartialEq)]
76pub(crate) struct WavefrontGrid {
77    /// The vec of (lowest valid diag, highest valid diag) for each score.
78    /// Lowest is always a negative value, stored using an unsigned type.
79    diags: Vec<(i32, i32)>,
80
81    /// The vec that stores the offset at which each layer starts in the vector.
82    /// Each layer corresponds to a score.
83    offsets: Vec<usize>,
84
85    matches: Vec<Option<(u32, AlignmentLayer)>>,
86    inserts: Vec<Option<(u32, AlignmentLayer)>>,
87    deletes: Vec<Option<(u32, AlignmentLayer)>>,
88}
89
90/// Make a new wavefront grid with the first diagonal of (lo, hi)
91/// lo and hi = 0 for a 1-element initial diagonal.
92pub(crate) fn new_wavefront_grid() -> WavefrontGrid {
93    let diags = vec![(0, 0)];
94    // Stores the tuple of the (lowest, highest) diagonals for a given score.
95    // Initial value = (0, 0) => the last value is included.
96    // The first tuple item stores the lowest diagonal, and stores values <= 0.
97
98    let matches = vec![Some((0, AlignmentLayer::Matches)); 1];
99    let inserts = vec![None; 1];
100    let deletes = vec![None; 1];
101
102    let offsets = vec![0, 1];
103    // The furthest-reaching point will be stored in the previous 3 vecs.
104    // These vecs are 1D: instead of indicing them by 2D Vecs of v[score][diagonal],
105    // we'll indice them as:
106    //      v[offsets[score] + (diagonal - lowest_diag_at_that_score)]
107    //
108    // Thus, offsets stores the index at which a given score starts in the 3 previous vecs.
109    //
110    // Whenever we add a layer, we'll push n None values in the 3 vecs,
111    // with None = highest_diag - lowest_diag + 1
112    //      => We'll know in advance at which offset will the next score start.
113    //      Therefore, offsets' last value will always be in advance by 1.
114
115    WavefrontGrid {
116        diags,
117        offsets,
118        matches,
119        inserts,
120        deletes,
121    }
122}
123
124impl WavefrontGrid {
125    /// Add a new layer to the wavefronts.
126    /// lo and hi are the lowest/highest diagonals for this new layer.
127    pub(crate) fn add_layer(&mut self, lo: i32, hi: i32) {
128        self.diags.push((lo, hi));
129
130        let new_width: usize = (hi - lo + 1) as usize;
131        self.offsets
132            .push(self.offsets[self.offsets.len() - 1] + new_width);
133
134        for _ in lo..=hi {
135            self.matches.push(None);
136            self.inserts.push(None);
137            self.deletes.push(None);
138        }
139    }
140
141    /// Get a value.
142    pub(crate) fn get(
143        &self,
144        layer: AlignmentLayer,
145        score: u32,
146        diag: i32,
147    ) -> Option<(u32, AlignmentLayer)> {
148        let score = score as usize;
149        if score >= self.offsets.len() || diag < self.diags[score].0 || diag > self.diags[score].1 {
150            // offsets is always ahead by 1, since we know the len of a layer
151            // when it's created. Adding a new layer updates the offset of the next layer.
152            None
153        } else {
154            let diag_offset = (diag - self.diags[score].0) as usize;
155            let position: usize = self.offsets[score] + diag_offset;
156            match layer {
157                AlignmentLayer::Matches => self.matches[position],
158                AlignmentLayer::Inserts => self.inserts[position],
159                AlignmentLayer::Deletes => self.deletes[position],
160            }
161        }
162    }
163
164    pub(crate) fn set(
165        &mut self,
166        layer: AlignmentLayer,
167        score: u32,
168        diag: i32,
169        value: Option<(u32, AlignmentLayer)>,
170    ) {
171        let score = score as usize;
172        if score < self.offsets.len() && diag >= self.diags[score].0 && diag <= self.diags[score].1
173        {
174            let position = self.offsets[score] + (diag - self.diags[score].0) as usize;
175            match layer {
176                AlignmentLayer::Matches => self.matches[position] = value,
177                AlignmentLayer::Inserts => self.inserts[position] = value,
178                AlignmentLayer::Deletes => self.deletes[position] = value,
179            };
180        }
181    }
182
183    pub(crate) fn get_diag_range(&self, score: u32) -> Option<&(i32, i32)> {
184        self.diags.get(score as usize)
185    }
186
187    pub(crate) fn increment(&mut self, score: u32, diag: i32) {
188        let score = score as usize;
189        let position = self.offsets[score] + (diag - self.diags[score].0) as usize;
190        self.matches[position] = match self.matches[position] {
191            Some((score, direction)) => Some((score + 1, direction)),
192            None => Some((1, AlignmentLayer::Matches)),
193        };
194    }
195}
196
197#[cfg(test)]
198mod tests_wfgrid {
199    use super::*;
200
201    #[test]
202    fn test_new_wfgrid() {
203        let grid: WavefrontGrid = new_wavefront_grid();
204        assert_eq!(grid.diags[0], (0, 0));
205        assert_eq!(grid.offsets[0], 0);
206        assert_eq!(grid.offsets[1], 1);
207        assert_eq!(grid.matches[0], Some((0, AlignmentLayer::Matches)));
208        assert_eq!(grid.inserts[0], None);
209        assert_eq!(grid.deletes[0], None);
210    }
211
212    #[test]
213    fn test_add_layer() {
214        let mut grid: WavefrontGrid = new_wavefront_grid();
215        grid.add_layer(-3, 3);
216        assert_eq!(grid.diags[0], (0, 0));
217        assert_eq!(grid.diags[1], (-3, 3));
218        assert_eq!(grid.offsets[0], 0);
219        assert_eq!(grid.offsets[1], 1);
220        assert_eq!(grid.offsets[2], 8);
221        assert_eq!(grid.matches.len(), 8); // initial = 0, next cycle has 7 values
222        assert_eq!(grid.inserts.len(), 8);
223        assert_eq!(grid.deletes.len(), 8);
224    }
225}