1use std::collections::{HashSet, VecDeque};
2
3use crate::op::{Backtrack, BestCost, IndexedOperation, Operation};
4use crate::{Measure, SeqPair};
5
6pub trait Align<'a, M, T>
11where
12 M: Measure<T>,
13 T: Eq,
14{
15 fn align(&'a self, source: &'a [T], target: &'a [T]) -> Alignment<'a, M, T>;
19}
20
21impl<'a, M, T> Align<'a, M, T> for M
22where
23 M: Measure<T>,
24 T: Eq,
25{
26 fn align(&'a self, source: &'a [T], target: &'a [T]) -> Alignment<'a, M, T> {
27 let pair = SeqPair { source, target };
28
29 let source_len = pair.source.len() + 1;
30 let target_len = pair.target.len() + 1;
31
32 let mut cost_matrix = vec![vec![0; target_len]; source_len];
33
34 for target_idx in 1..target_len {
37 cost_matrix[0][target_idx] = self
38 .best_cost(&pair, &cost_matrix, 0, target_idx)
39 .expect("No applicable operation");
40 }
41
42 for source_idx in 1..source_len {
44 for target_idx in 0..target_len {
45 cost_matrix[source_idx][target_idx] = self
46 .best_cost(&pair, &cost_matrix, source_idx, target_idx)
47 .expect("No applicable operation");
48 }
49 }
50
51 Alignment {
52 measure: self,
53 pair,
54 cost_matrix,
55 }
56 }
57}
58
59pub struct Alignment<'a, M, T>
61where
62 M: Measure<T>,
63 T: Eq,
64{
65 measure: &'a M,
66 pair: SeqPair<'a, T>,
67 cost_matrix: Vec<Vec<usize>>,
68}
69
70impl<'a, M, T> Alignment<'a, M, T>
71where
72 M: Measure<T>,
73 T: Eq,
74{
75 pub fn distance(&self) -> usize {
77 self.cost_matrix[self.cost_matrix.len() - 1][self.cost_matrix[0].len() - 1]
78 }
79
80 pub fn edit_script(&self) -> Vec<IndexedOperation<M::Operation>> {
85 let mut source_idx = self.pair.source.len();
86 let mut target_idx = self.pair.target.len();
87 let mut script = Vec::new();
88
89 while let Some(op) =
90 self.measure
91 .backtrack(&self.pair, &self.cost_matrix, source_idx, target_idx)
92 {
93 let (new_source_idx, new_target_idx) = op
94 .backtrack(&self.pair, source_idx, target_idx)
95 .expect("Cannot backtrack");
96 source_idx = new_source_idx;
97 target_idx = new_target_idx;
98
99 script.push(IndexedOperation::new(op, source_idx, target_idx));
100
101 if source_idx == 0 && target_idx == 0 {
102 break;
103 }
104 }
105
106 assert_eq!(source_idx, 0, "Cannot backtrack to cell 0, 0");
107 assert_eq!(target_idx, 0, "Cannot backtrack to cell 0, 0");
108
109 script.reverse();
110
111 script
112 }
113
114 pub fn edit_scripts(&self) -> HashSet<Vec<IndexedOperation<M::Operation>>> {
118 let mut q: VecDeque<BacktrackState<M, T>> = VecDeque::new();
123 q.push_back(BacktrackState {
124 source_idx: self.pair.source.len(),
125 target_idx: self.pair.target.len(),
126 script: Vec::new(),
127 });
128
129 let mut scripts = HashSet::new();
130 while let Some(BacktrackState {
131 source_idx,
132 target_idx,
133 script,
134 }) = q.pop_front()
135 {
136 for op in self
139 .measure
140 .backtracks(&self.pair, &self.cost_matrix, source_idx, target_idx)
141 {
142 let (new_source_idx, new_target_idx) = op
143 .backtrack(&self.pair, source_idx, target_idx)
144 .expect("Cannot backtrack");
145 let mut new_script = script.clone();
146
147 new_script.push(IndexedOperation::new(op, new_source_idx, new_target_idx));
148
149 if new_source_idx == 0 && new_target_idx == 0 {
150 new_script.reverse();
152 scripts.insert(new_script);
153 } else {
154 q.push_back(BacktrackState {
156 source_idx: new_source_idx,
157 target_idx: new_target_idx,
158 script: new_script,
159 })
160 }
161 }
162 }
163
164 assert!(!scripts.is_empty(), "Cannot backtrack to cell 0, 0");
165
166 scripts
167 }
168
169 pub fn cost_matrix(&self) -> &Vec<Vec<usize>> {
171 &self.cost_matrix
172 }
173
174 pub fn seq_pair(&self) -> &SeqPair<T> {
176 &self.pair
177 }
178}
179
180struct BacktrackState<M, T>
181where
182 M: Measure<T>,
183{
184 source_idx: usize,
185 target_idx: usize,
186 script: Vec<IndexedOperation<M::Operation>>,
187}
188
189#[cfg(test)]
190mod tests {
191 use crate::measures::Levenshtein;
192 use crate::measures::LevenshteinOp::*;
193 use crate::op::IndexedOperation;
194
195 use super::Align;
196
197 #[test]
198 fn distance_test() {
199 let applet: Vec<char> = "applet".chars().collect();
200 let pineapple: Vec<char> = "pineapple".chars().collect();
201 let pen: Vec<char> = "pen".chars().collect();
202
203 let levenshtein = Levenshtein::new(1, 1, 1);
204
205 assert_eq!(levenshtein.align(&pineapple, &pen).distance(), 7);
206 assert_eq!(levenshtein.align(&pen, &pineapple).distance(), 7);
207 assert_eq!(levenshtein.align(&pineapple, &applet).distance(), 5);
208 assert_eq!(levenshtein.align(&applet, &pen).distance(), 4);
209 }
210
211 #[test]
212 fn edit_script_test() {
213 let applet: Vec<char> = "applet".chars().collect();
214 let pineapple: Vec<char> = "pineapple".chars().collect();
215 let pen: Vec<char> = "pen".chars().collect();
216
217 let levenshtein = Levenshtein::new(1, 1, 1);
218
219 assert_eq!(
220 vec![
221 IndexedOperation::new(Match, 0, 0),
222 IndexedOperation::new(Substitute(1), 1, 1),
223 IndexedOperation::new(Match, 2, 2),
224 IndexedOperation::new(Delete(1), 3, 3),
225 IndexedOperation::new(Delete(1), 4, 3),
226 IndexedOperation::new(Delete(1), 5, 3),
227 IndexedOperation::new(Delete(1), 6, 3),
228 IndexedOperation::new(Delete(1), 7, 3),
229 IndexedOperation::new(Delete(1), 8, 3),
230 ],
231 levenshtein.align(&pineapple, &pen).edit_script()
232 );
233
234 assert_eq!(
235 vec![
236 IndexedOperation::new(Match, 0, 0),
237 IndexedOperation::new(Substitute(1), 1, 1),
238 IndexedOperation::new(Match, 2, 2),
239 IndexedOperation::new(Insert(1), 3, 3),
240 IndexedOperation::new(Insert(1), 3, 4),
241 IndexedOperation::new(Insert(1), 3, 5),
242 IndexedOperation::new(Insert(1), 3, 6),
243 IndexedOperation::new(Insert(1), 3, 7),
244 IndexedOperation::new(Insert(1), 3, 8),
245 ],
246 levenshtein.align(&pen, &pineapple).edit_script()
247 );
248
249 assert_eq!(
250 vec![
251 IndexedOperation::new(Delete(1), 0, 0),
252 IndexedOperation::new(Delete(1), 1, 0),
253 IndexedOperation::new(Delete(1), 2, 0),
254 IndexedOperation::new(Delete(1), 3, 0),
255 IndexedOperation::new(Match, 4, 0),
256 IndexedOperation::new(Match, 5, 1),
257 IndexedOperation::new(Match, 6, 2),
258 IndexedOperation::new(Match, 7, 3),
259 IndexedOperation::new(Match, 8, 4),
260 IndexedOperation::new(Insert(1), 9, 5),
261 ],
262 levenshtein.align(&pineapple, &applet).edit_script()
263 );
264 }
265
266 #[test]
267 fn edit_script_tests() {
268 let applet: Vec<char> = "applet".chars().collect();
269 let pineapple: Vec<char> = "pineapple".chars().collect();
270 let aplpet: Vec<char> = "aplpet".chars().collect();
271
272 let levenshtein = Levenshtein::new(1, 1, 1);
273 assert_eq!(
274 hashset![vec![
275 IndexedOperation::new(Delete(1), 0, 0),
276 IndexedOperation::new(Delete(1), 1, 0),
277 IndexedOperation::new(Delete(1), 2, 0),
278 IndexedOperation::new(Delete(1), 3, 0),
279 IndexedOperation::new(Match, 4, 0),
280 IndexedOperation::new(Match, 5, 1),
281 IndexedOperation::new(Match, 6, 2),
282 IndexedOperation::new(Match, 7, 3),
283 IndexedOperation::new(Match, 8, 4),
284 IndexedOperation::new(Insert(1), 9, 5),
285 ],],
286 levenshtein.align(&pineapple, &applet).edit_scripts()
287 );
288
289 assert_eq!(
290 hashset![
291 vec![
292 IndexedOperation::new(Match, 0, 0),
293 IndexedOperation::new(Match, 1, 1),
294 IndexedOperation::new(Substitute(1), 2, 2),
295 IndexedOperation::new(Substitute(1), 3, 3),
296 IndexedOperation::new(Match, 4, 4),
297 IndexedOperation::new(Match, 5, 5),
298 ],
299 vec![
300 IndexedOperation::new(Match, 0, 0),
301 IndexedOperation::new(Match, 1, 1),
302 IndexedOperation::new(Delete(1), 2, 2),
303 IndexedOperation::new(Match, 3, 2),
304 IndexedOperation::new(Insert(1), 4, 3),
305 IndexedOperation::new(Match, 4, 4),
306 IndexedOperation::new(Match, 5, 5),
307 ],
308 vec![
309 IndexedOperation::new(Match, 0, 0),
310 IndexedOperation::new(Delete(1), 1, 1),
311 IndexedOperation::new(Match, 2, 1),
312 IndexedOperation::new(Match, 3, 2),
313 IndexedOperation::new(Insert(1), 4, 3),
314 IndexedOperation::new(Match, 4, 4),
315 IndexedOperation::new(Match, 5, 5),
316 ],
317 vec![
318 IndexedOperation::new(Match, 0, 0),
319 IndexedOperation::new(Match, 1, 1),
320 IndexedOperation::new(Insert(1), 2, 2),
321 IndexedOperation::new(Match, 2, 3),
322 IndexedOperation::new(Delete(1), 3, 4),
323 IndexedOperation::new(Match, 4, 4),
324 IndexedOperation::new(Match, 5, 5),
325 ],
326 ],
327 levenshtein.align(&applet, &aplpet).edit_scripts()
328 );
329
330 assert_eq!(
331 hashset![vec![
332 IndexedOperation::new(Match, 0, 0),
333 IndexedOperation::new(Match, 1, 1),
334 IndexedOperation::new(Match, 2, 2),
335 IndexedOperation::new(Match, 3, 3),
336 IndexedOperation::new(Match, 4, 4),
337 IndexedOperation::new(Match, 5, 5),
338 ]],
339 levenshtein.align(&applet, &applet).edit_scripts()
340 );
341 }
342
343 #[test]
344 fn align_empty_test() {
345 let empty: &[char] = &[];
346 let non_empty: Vec<char> = "hello".chars().collect();
347
348 let levenshtein = Levenshtein::new(1, 1, 1);
349
350 assert_eq!(levenshtein.align(empty, empty).distance(), 0);
351 assert_eq!(levenshtein.align(non_empty.as_slice(), empty).distance(), 5);
352 assert_eq!(levenshtein.align(empty, non_empty.as_slice()).distance(), 5);
353 }
354}