Skip to main content

similar/algorithms/
lcs.rs

1//! Classic LCS table diff algorithm.
2//!
3//! This implementation builds an LCS table for the compared ranges and then
4//! walks it forward to emit operations.
5//! * time: `O(N*M)`
6//! * space `O(N*M)`
7//!
8//! # Heuristics
9//!
10//! See [`crate::algorithms`] for shared heuristics and the
11//! `diff_deadline_raw` API.
12use alloc::collections::BTreeMap;
13#[cfg(test)]
14use alloc::vec;
15use core::hash::Hash;
16use core::ops::{Index, Range};
17
18use crate::algorithms::utils::{common_prefix_len, common_suffix_len, is_empty_range};
19use crate::algorithms::{DiffHook, preflight};
20use crate::deadline_support::{Instant, deadline_exceeded};
21
22/// Classic LCS table diff algorithm.
23///
24/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
25///
26/// This diff is done with an optional deadline that defines the maximal
27/// execution time permitted before it bails and falls back to an very bad
28/// approximation.  Deadlines with LCS do not make a lot of sense and should
29/// not be used.
30pub fn diff<Old, New, D>(
31    d: &mut D,
32    old: &Old,
33    old_range: Range<usize>,
34    new: &New,
35    new_range: Range<usize>,
36) -> Result<(), D::Error>
37where
38    Old: Index<usize> + ?Sized,
39    New: Index<usize> + ?Sized,
40    D: DiffHook,
41    Old::Output: Hash + Eq,
42    New::Output: PartialEq<Old::Output> + Hash + Eq,
43{
44    diff_deadline(d, old, old_range, new, new_range, None)
45}
46
47/// Classic LCS table diff algorithm.
48///
49/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
50///
51/// This diff is done with an optional deadline that defines the maximal
52/// execution time permitted before it bails and falls back to an approximation.
53pub fn diff_deadline<Old, New, D>(
54    d: &mut D,
55    old: &Old,
56    old_range: Range<usize>,
57    new: &New,
58    new_range: Range<usize>,
59    deadline: Option<Instant>,
60) -> Result<(), D::Error>
61where
62    Old: Index<usize> + ?Sized,
63    New: Index<usize> + ?Sized,
64    D: DiffHook,
65    Old::Output: Hash + Eq,
66    New::Output: PartialEq<Old::Output> + Hash + Eq,
67{
68    if preflight::maybe_emit_disjoint_fast_path(
69        d,
70        old,
71        old_range.clone(),
72        new,
73        new_range.clone(),
74        deadline,
75    )? {
76        return Ok(());
77    }
78
79    diff_deadline_impl(d, old, old_range, new, new_range, deadline)
80}
81
82/// Raw classic LCS table diff algorithm with deadline and without shared
83/// heuristics.
84pub fn diff_deadline_raw<Old, New, D>(
85    d: &mut D,
86    old: &Old,
87    old_range: Range<usize>,
88    new: &New,
89    new_range: Range<usize>,
90    deadline: Option<Instant>,
91) -> Result<(), D::Error>
92where
93    Old: Index<usize> + ?Sized,
94    New: Index<usize> + ?Sized,
95    D: DiffHook,
96    New::Output: PartialEq<Old::Output>,
97{
98    diff_deadline_impl(d, old, old_range, new, new_range, deadline)
99}
100
101fn diff_deadline_impl<Old, New, D>(
102    d: &mut D,
103    old: &Old,
104    old_range: Range<usize>,
105    new: &New,
106    new_range: Range<usize>,
107    deadline: Option<Instant>,
108) -> Result<(), D::Error>
109where
110    Old: Index<usize> + ?Sized,
111    New: Index<usize> + ?Sized,
112    D: DiffHook,
113    New::Output: PartialEq<Old::Output>,
114{
115    if is_empty_range(&new_range) {
116        d.delete(old_range.start, old_range.len(), new_range.start)?;
117        d.finish()?;
118        return Ok(());
119    } else if is_empty_range(&old_range) {
120        d.insert(old_range.start, new_range.start, new_range.len())?;
121        d.finish()?;
122        return Ok(());
123    }
124
125    let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
126    let common_suffix_len = common_suffix_len(
127        old,
128        old_range.start + common_prefix_len..old_range.end,
129        new,
130        new_range.start + common_prefix_len..new_range.end,
131    );
132
133    // If the sequences are not different then we're done
134    if common_prefix_len == old_range.len() && (old_range.len() == new_range.len()) {
135        d.equal(0, 0, old_range.len())?;
136        d.finish()?;
137        return Ok(());
138    }
139
140    let maybe_table = make_table(
141        old,
142        (old_range.start + common_prefix_len)..(old_range.end - common_suffix_len),
143        new,
144        (new_range.start + common_prefix_len)..(new_range.end - common_suffix_len),
145        deadline,
146    );
147    let mut old_idx = 0;
148    let mut new_idx = 0;
149    let new_len = new_range.len() - common_prefix_len - common_suffix_len;
150    let old_len = old_range.len() - common_prefix_len - common_suffix_len;
151
152    if common_prefix_len > 0 {
153        d.equal(old_range.start, new_range.start, common_prefix_len)?;
154    }
155
156    if let Some(table) = maybe_table {
157        while new_idx < new_len && old_idx < old_len {
158            let old_orig_idx = old_range.start + common_prefix_len + old_idx;
159            let new_orig_idx = new_range.start + common_prefix_len + new_idx;
160
161            if new[new_orig_idx] == old[old_orig_idx] {
162                d.equal(old_orig_idx, new_orig_idx, 1)?;
163                old_idx += 1;
164                new_idx += 1;
165            } else if table.get(&(new_idx, old_idx + 1)).unwrap_or(&0)
166                >= table.get(&(new_idx + 1, old_idx)).unwrap_or(&0)
167            {
168                d.delete(old_orig_idx, 1, new_orig_idx)?;
169                old_idx += 1;
170            } else {
171                d.insert(old_orig_idx, new_orig_idx, 1)?;
172                new_idx += 1;
173            }
174        }
175    } else {
176        let old_orig_idx = old_range.start + common_prefix_len + old_idx;
177        let new_orig_idx = new_range.start + common_prefix_len + new_idx;
178        d.delete(old_orig_idx, old_len, new_orig_idx)?;
179        d.insert(old_orig_idx, new_orig_idx, new_len)?;
180    }
181
182    if old_idx < old_len {
183        d.delete(
184            old_range.start + common_prefix_len + old_idx,
185            old_len - old_idx,
186            new_range.start + common_prefix_len + new_idx,
187        )?;
188        old_idx += old_len - old_idx;
189    }
190
191    if new_idx < new_len {
192        d.insert(
193            old_range.start + common_prefix_len + old_idx,
194            new_range.start + common_prefix_len + new_idx,
195            new_len - new_idx,
196        )?;
197    }
198
199    if common_suffix_len > 0 {
200        d.equal(
201            old_range.start + old_len + common_prefix_len,
202            new_range.start + new_len + common_prefix_len,
203            common_suffix_len,
204        )?;
205    }
206
207    d.finish()
208}
209
210fn make_table<Old, New>(
211    old: &Old,
212    old_range: Range<usize>,
213    new: &New,
214    new_range: Range<usize>,
215    deadline: Option<Instant>,
216) -> Option<BTreeMap<(usize, usize), u32>>
217where
218    Old: Index<usize> + ?Sized,
219    New: Index<usize> + ?Sized,
220    New::Output: PartialEq<Old::Output>,
221{
222    let old_len = old_range.len();
223    let new_len = new_range.len();
224    let mut table = BTreeMap::new();
225
226    for i in (0..new_len).rev() {
227        // are we running for too long?  give up on the table
228        if deadline_exceeded(deadline) {
229            return None;
230        }
231
232        for j in (0..old_len).rev() {
233            let val = if new[new_range.start + i] == old[old_range.start + j] {
234                table.get(&(i + 1, j + 1)).unwrap_or(&0) + 1
235            } else {
236                *table
237                    .get(&(i + 1, j))
238                    .unwrap_or(&0)
239                    .max(table.get(&(i, j + 1)).unwrap_or(&0))
240            };
241            if val > 0 {
242                table.insert((i, j), val);
243            }
244        }
245    }
246
247    Some(table)
248}
249
250#[test]
251fn test_table() {
252    let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3, None).unwrap();
253    let expected = {
254        let mut m = BTreeMap::new();
255        m.insert((1, 0), 1);
256        m.insert((0, 0), 1);
257        m.insert((2, 0), 1);
258        m
259    };
260    assert_eq!(table, expected);
261}
262
263#[test]
264fn test_diff() {
265    let a: &[usize] = &[0, 1, 2, 3, 4];
266    let b: &[usize] = &[0, 1, 2, 9, 4];
267
268    let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
269    diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
270    insta::assert_debug_snapshot!(d.into_inner().ops());
271}
272
273#[test]
274fn test_raw_accepts_partialeq_only_values() {
275    let old = [1.0f32, 2.0, 3.0];
276    let new = [1.0f32, 4.0, 3.0];
277
278    let mut d = crate::algorithms::Capture::new();
279    diff_deadline_raw(&mut d, &old, 0..old.len(), &new, 0..new.len(), None).unwrap();
280
281    assert!(!d.ops().is_empty());
282}
283
284#[test]
285fn test_contiguous() {
286    let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5];
287    let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
288
289    let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
290    diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
291    insta::assert_debug_snapshot!(d.into_inner().ops());
292}
293
294#[test]
295fn test_pat() {
296    let a: &[usize] = &[0, 1, 3, 4, 5];
297    let b: &[usize] = &[0, 1, 4, 5, 8, 9];
298
299    let mut d = crate::algorithms::Capture::new();
300    diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
301    insta::assert_debug_snapshot!(d.ops());
302}
303
304#[test]
305fn test_issue44_swapped_regression() {
306    use crate::DiffOp;
307
308    let a: &[usize] = &[0, 1, 4, 5, 8, 9];
309    let b: &[usize] = &[0, 1, 3, 4, 5];
310
311    let mut d = crate::algorithms::Capture::new();
312    diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
313    assert_eq!(
314        d.into_ops(),
315        vec![
316            DiffOp::Equal {
317                old_index: 0,
318                new_index: 0,
319                len: 2,
320            },
321            DiffOp::Insert {
322                old_index: 2,
323                new_index: 2,
324                new_len: 1,
325            },
326            DiffOp::Equal {
327                old_index: 2,
328                new_index: 3,
329                len: 1,
330            },
331            DiffOp::Equal {
332                old_index: 3,
333                new_index: 4,
334                len: 1,
335            },
336            DiffOp::Delete {
337                old_index: 4,
338                old_len: 2,
339                new_index: 5,
340            },
341        ]
342    );
343}
344
345#[test]
346fn test_subrange_regression() {
347    use crate::DiffOp;
348
349    let a: &[usize] = &[99, 0, 1, 4, 5, 8, 9, 88];
350    let b: &[usize] = &[77, 0, 1, 3, 4, 5, 66];
351
352    let mut d = crate::algorithms::Capture::new();
353    diff(&mut d, a, 1..7, b, 1..6).unwrap();
354    assert_eq!(
355        d.into_ops(),
356        vec![
357            DiffOp::Equal {
358                old_index: 1,
359                new_index: 1,
360                len: 2,
361            },
362            DiffOp::Insert {
363                old_index: 3,
364                new_index: 3,
365                new_len: 1,
366            },
367            DiffOp::Equal {
368                old_index: 3,
369                new_index: 4,
370                len: 1,
371            },
372            DiffOp::Equal {
373                old_index: 4,
374                new_index: 5,
375                len: 1,
376            },
377            DiffOp::Delete {
378                old_index: 5,
379                old_len: 2,
380                new_index: 6,
381            },
382        ]
383    );
384}
385
386#[test]
387fn test_same() {
388    let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5];
389    let b: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5];
390
391    let mut d = crate::algorithms::Capture::new();
392    diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
393    insta::assert_debug_snapshot!(d.ops());
394}
395
396#[test]
397fn test_finish_called() {
398    struct HasRunFinish(bool);
399
400    impl DiffHook for HasRunFinish {
401        type Error = ();
402        fn finish(&mut self) -> Result<(), Self::Error> {
403            self.0 = true;
404            Ok(())
405        }
406    }
407
408    let mut d = HasRunFinish(false);
409    let slice = &[1, 2];
410    let slice2 = &[1, 2, 3];
411    diff(&mut d, slice, 0..slice.len(), slice2, 0..slice2.len()).unwrap();
412    assert!(d.0);
413
414    let mut d = HasRunFinish(false);
415    let slice = &[1, 2];
416    diff(&mut d, slice, 0..slice.len(), slice, 0..slice.len()).unwrap();
417    assert!(d.0);
418
419    let mut d = HasRunFinish(false);
420    let slice: &[u8] = &[];
421    diff(&mut d, slice, 0..slice.len(), slice, 0..slice.len()).unwrap();
422    assert!(d.0);
423}
424
425#[test]
426fn test_bad_range_regression() {
427    use crate::DiffOp;
428    use crate::algorithms::Capture;
429    let mut d = Capture::new();
430    diff(&mut d, &[0], 0..1, &[0, 0], 0..2).unwrap();
431    assert_eq!(
432        d.into_ops(),
433        vec![
434            DiffOp::Equal {
435                old_index: 0,
436                new_index: 0,
437                len: 1
438            },
439            DiffOp::Insert {
440                old_index: 1,
441                new_index: 1,
442                new_len: 1
443            }
444        ]
445    );
446}