1use std::cmp::Eq;
2use std::cmp::Ordering;
3use std::fmt;
4use std::fmt::Debug;
5
6use lazy_static::lazy_static;
7use seqalign::measures::LCSOp;
8use seqalign::measures::LCS;
9use seqalign::op::IndexedOperation;
10use seqalign::Align;
11use serde::{Deserialize, Serialize};
12
13lazy_static! {
14 static ref MEASURE: LCS = LCS::new(1, 1);
15}
16
17#[derive(Debug, PartialEq, Hash, Eq, Clone, Serialize, Deserialize)]
20pub enum EditTree<T> {
21 MatchNode {
22 pre: usize,
23 suf: usize,
24 left: Option<Box<EditTree<T>>>,
25 right: Option<Box<EditTree<T>>>,
26 },
27
28 ReplaceNode {
29 replacee: Vec<T>,
30 replacement: Vec<T>,
31 },
32}
33
34impl<T> EditTree<T>
35where
36 T: PartialEq + Eq + Clone,
37{
38 pub fn create_tree(a: &[T], b: &[T]) -> Option<Self> {
44 build_tree(a, b).map(|tree| *tree)
45 }
46
47 pub fn pretty_print(&self, format_vec: impl (Fn(&[T]) -> String) + Copy) -> String {
53 match self {
54 EditTree::MatchNode {
55 pre,
56 suf,
57 left,
58 right,
59 } => {
60 let left_str = left
61 .as_ref()
62 .map(|left| left.pretty_print(format_vec))
63 .unwrap_or_else(|| "()".to_string());
64 let right_str = right
65 .as_ref()
66 .map(|right| right.pretty_print(format_vec))
67 .unwrap_or_else(|| "()".to_string());
68
69 format!("(match {} {} {} {})", pre, suf, left_str, right_str)
70 }
71 EditTree::ReplaceNode {
72 replacee,
73 replacement,
74 } => format!(
75 "(replace \"{}\" \"{}\")",
76 format_vec(replacee),
77 format_vec(replacement),
78 ),
79 }
80 }
81}
82
83#[derive(Debug, PartialEq, Eq, Hash)]
85pub struct LCSMatch {
86 start_src: usize,
87 start_targ: usize,
88 length: usize,
89}
90
91impl LCSMatch {
92 pub fn new(start_src: usize, start_targ: usize, length: usize) -> Self {
93 LCSMatch {
94 start_src,
95 start_targ,
96 length,
97 }
98 }
99 fn empty() -> Self {
100 LCSMatch::new(0, 0, 0)
101 }
102}
103
104impl PartialOrd for LCSMatch {
105 fn partial_cmp(&self, other: &LCSMatch) -> Option<Ordering> {
106 Some(self.length.cmp(&other.length))
107 }
108}
109
110fn longest_match(script: &[IndexedOperation<LCSOp>]) -> Option<LCSMatch> {
112 let mut longest = LCSMatch::empty();
113
114 let mut script_slice = script;
115 while !script_slice.is_empty() {
116 let op = &script_slice[0];
117
118 match op.operation() {
119 LCSOp::Match => {
120 let in_start = op.source_idx();
121 let o_start = op.target_idx();
122 let end = match script_slice
123 .iter()
124 .position(|x| !matches!(x.operation(), LCSOp::Match))
125 {
126 Some(idx) => idx,
127 None => script_slice.len(),
128 };
129 if end > longest.length {
130 longest = LCSMatch::new(in_start, o_start, end);
131 };
132
133 script_slice = &script_slice[end..];
134 }
135 _ => {
136 script_slice = &script_slice[1..];
137 }
138 }
139 }
140
141 if longest.length != 0 {
142 Some(longest)
143 } else {
144 None
145 }
146}
147
148fn build_tree<T: PartialEq + Eq + Clone>(form_ch: &[T], lem_ch: &[T]) -> Option<Box<EditTree<T>>> {
150 if form_ch.is_empty() && lem_ch.is_empty() {
151 return None;
152 }
153
154 let alignment = MEASURE.align(form_ch, lem_ch);
155 let root = match longest_match(&alignment.edit_script()[..]) {
156 Some(m) => EditTree::MatchNode {
157 pre: m.start_src,
158 suf: (form_ch.len() - m.start_src) - m.length,
159 left: build_tree(&form_ch[..m.start_src], &lem_ch[..m.start_targ]),
160 right: build_tree(
161 &form_ch[m.start_src + m.length..],
162 &lem_ch[m.start_targ + m.length..],
163 ),
164 },
165 None => EditTree::ReplaceNode {
166 replacee: form_ch.to_vec(),
167 replacement: lem_ch.to_vec(),
168 },
169 };
170 Some(Box::new(root))
171}
172
173pub trait Apply<T: PartialEq> {
175 fn apply(&self, form: &[T]) -> Option<Vec<T>>;
176}
177
178impl<T: PartialEq + Eq + Clone + Debug> Apply<T> for EditTree<T> {
179 fn apply(&self, form: &[T]) -> Option<Vec<T>> {
182 let form_len = form.len();
183 match self {
184 EditTree::MatchNode {
185 pre,
186 suf,
187 left,
188 right,
189 } => {
190 if pre + suf >= form_len {
191 return None;
192 }
193
194 let mut left = match left {
195 Some(left) => left.apply(&form[..*pre])?,
196 None => vec![],
197 };
198
199 left.extend(form[*pre..form_len - *suf].iter().cloned());
200
201 if let Some(right) = right {
202 left.extend(right.apply(&form[form_len - *suf..])?)
203 }
204
205 Some(left)
206 }
207
208 EditTree::ReplaceNode {
209 ref replacee,
210 ref replacement,
211 } => {
212 if form == &replacee[..] || replacee.is_empty() {
213 Some(replacement.clone())
214 } else {
215 None
216 }
217 }
218 }
219 }
220}
221
222impl fmt::Display for EditTree<char> {
223 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
224 write!(
225 f,
226 "{}",
227 self.pretty_print(|chars: &[char]| chars.iter().collect::<String>())
228 )
229 }
230}
231
232#[cfg(test)]
233mod tests {
234 use super::{Apply, EditTree};
235 use crate::ToLowerCharVec;
236 use std::collections::HashSet;
237
238 #[test]
239 fn test_graph_equality_outcome() {
240 let a = "hates".to_lower_char_vec();
241 let b = "hate".to_lower_char_vec();
242 let g = EditTree::create_tree(a.as_slice(), b.as_slice()).unwrap();
243
244 let a = "loves".to_lower_char_vec();
245 let b = "love".to_lower_char_vec();
246 let g1 = EditTree::create_tree(a.as_slice(), b.as_slice()).unwrap();
247
248 let f = "loves".to_lower_char_vec();
249 let f1 = "hates".to_lower_char_vec();
250 let exp = "love".to_lower_char_vec();
251 let exp1 = "hate".to_lower_char_vec();
252
253 assert_eq!(g.apply(&f1).unwrap(), exp1);
254 assert_eq!(g1.apply(&f).unwrap(), exp);
255 assert_eq!(g, g1);
256 }
257
258 #[test]
259 fn test_graph_equality_outcome_2() {
260 let g = EditTree::create_tree(
261 &"machen".to_lower_char_vec(),
262 &"gemacht".to_lower_char_vec(),
263 )
264 .unwrap();
265 let g1 = EditTree::create_tree(
266 &"lachen".to_lower_char_vec(),
267 &"gelacht".to_lower_char_vec(),
268 )
269 .unwrap();
270
271 let f = "machen".to_lower_char_vec();
272 let f1 = "lachen".to_lower_char_vec();
273 let exp = "gemacht".to_lower_char_vec();
274 let exp1 = "gelacht".to_lower_char_vec();
275
276 assert_eq!(g.apply(&f1).unwrap(), exp1);
277 assert_eq!(g1.apply(&f).unwrap(), exp);
278 assert_eq!(g, g1);
279 }
280
281 #[test]
282 fn test_graph_equality_outcome_3() {
283 let a = "aaaaaaaaen".to_lower_char_vec();
284 let b = "geaaaaaaaat".to_lower_char_vec();
285 let g = EditTree::create_tree(&a, &b).unwrap();
286
287 let a = "lachen".to_lower_char_vec();
288 let b = "gelacht".to_lower_char_vec();
289 let g1 = EditTree::create_tree(&a, &b).unwrap();
290
291 let f = "lachen".to_lower_char_vec();
292 let f1 = "aaaaaaaaen".to_lower_char_vec();
293 let exp = "gelacht".to_lower_char_vec();
294 let exp1 = "geaaaaaaaat".to_lower_char_vec();
295
296 assert_eq!(g.apply(&f).unwrap(), exp);
297 assert_eq!(g1.apply(&f1).unwrap(), exp1);
298 assert_eq!(g, g1);
299 }
300
301 #[test]
302 fn test_graph_equality_and_applicability() {
303 let mut set: HashSet<EditTree<char>> = HashSet::default();
304 let a = "abc".to_lower_char_vec();
305 let b = "ab".to_lower_char_vec();
306 let g1 = EditTree::create_tree(&a, &b).unwrap();
307
308 let a = "aaa".to_lower_char_vec();
309 let b = "aa".to_lower_char_vec();
310 let g2 = EditTree::create_tree(&a, &b).unwrap();
311
312 let a = "cba".to_lower_char_vec();
313 let b = "ba".to_lower_char_vec();
314 let g3 = EditTree::create_tree(&a, &b).unwrap();
315 let g4 = EditTree::create_tree(&a, &b).unwrap();
316
317 let a = "aaa".to_lower_char_vec();
318 let b = "aac".to_lower_char_vec();
319 let g5 = EditTree::create_tree(&a, &b).unwrap();
320
321 let a = "dec".to_lower_char_vec();
322 let b = "decc".to_lower_char_vec();
323 let g6 = EditTree::create_tree(&a, &a).unwrap();
324 let g7 = EditTree::create_tree(&a, &b).unwrap();
325
326 set.insert(g1);
327 assert_eq!(set.len(), 1);
328 set.insert(g2);
329 assert_eq!(set.len(), 2);
330 set.insert(g3);
331 assert_eq!(set.len(), 3);
332 set.insert(g4);
333 assert_eq!(set.len(), 3);
334 set.insert(g5);
335 assert_eq!(set.len(), 4);
336 set.insert(g6);
337 set.insert(g7);
338 assert_eq!(set.len(), 6);
339
340 let v = "yyyy".to_lower_char_vec();
341 let res: HashSet<String> = set
342 .iter()
343 .map(|x| x.apply(&v))
344 .filter(|x| x.is_some())
345 .map(|x| x.unwrap().iter().collect::<String>())
346 .collect();
347
348 assert_eq!(res.len(), 2);
349
350 let v = "yyy".to_lower_char_vec();
351 let res: HashSet<String> = set
352 .iter()
353 .map(|x| x.apply(&v))
354 .filter(|x| x.is_some())
355 .map(|x| x.unwrap().iter().collect::<String>())
356 .collect();
357 assert!(res.contains("yyyc"));
358 assert!(res.contains("yyy"));
359 assert_eq!(res.len(), 2);
360
361 let v = "bba".to_lower_char_vec();
362 let res: HashSet<String> = set
363 .iter()
364 .map(|x| x.apply(&v))
365 .filter(|x| x.is_some())
366 .map(|x| x.unwrap().iter().collect::<String>())
367 .collect();
368
369 assert!(res.contains("bbac"));
370 assert!(res.contains("bba"));
371 assert!(res.contains("bb"));
372 assert!(res.contains("bbc"));
373 assert_eq!(res.len(), 4);
374
375 let v = a.as_slice();
376 let res: HashSet<String> = set
377 .iter()
378 .map(|x| x.apply(&v))
379 .filter(|x| x.is_some())
380 .map(|x| x.unwrap().iter().collect::<String>())
381 .collect();
382 assert!(res.contains("dec"));
383 assert!(res.contains("decc"));
384 assert!(res.contains("de"));
385 assert_eq!(res.len(), 3);
386
387 let a = "die".to_lower_char_vec();
388 let b = "das".to_lower_char_vec();
389 let c = "die".to_lower_char_vec();
390 let g = EditTree::create_tree(a.as_slice(), b.as_slice()).unwrap();
391 assert!(g.apply(&c).is_some());
392 }
393 #[test]
394 fn test_graphs_inapplicable() {
395 let g = EditTree::create_tree(
396 "abcdefg".to_lower_char_vec().as_slice(),
397 "abc".to_lower_char_vec().as_slice(),
398 )
399 .unwrap();
400 assert!(g.apply(&"abc".to_lower_char_vec().as_slice()).is_none());
401
402 let g = EditTree::create_tree(
403 "abcdefg".to_lower_char_vec().as_slice(),
404 "efg".to_lower_char_vec().as_slice(),
405 )
406 .unwrap();
407 assert!(g.apply(&"efg".to_lower_char_vec().as_slice()).is_none());
408 }
409
410 #[test]
411 fn display_edit_tree() {
412 let tree =
413 EditTree::create_tree(&['l', 'o', 'o', 'p', 't'], &['l', 'o', 'p', 'e', 'n']).unwrap();
414
415 assert_eq!(
416 tree.to_string(),
417 "(match 0 3 () (match 1 1 (replace \"o\" \"\") (replace \"t\" \"en\")))"
418 );
419 }
420}