Skip to main content

similar/
common.rs

1use alloc::vec;
2use alloc::vec::Vec;
3use core::hash::Hash;
4use core::ops::{Index, Range};
5
6use crate::algorithms::{Capture, Compact, Replace, diff_deadline};
7use crate::deadline_support::Instant;
8use crate::{Algorithm, DiffOp};
9
10/// Creates a diff between old and new with the given algorithm capturing the ops.
11///
12/// This is like [`diff`](crate::algorithms::diff) but instead of using an
13/// arbitrary hook this will always use [`Compact`] + [`Replace`] + [`Capture`]
14/// and return the captured [`DiffOp`]s. For lazily computed values or diffing
15/// by a derived key, see [`crate::algorithms::CachedLookup`].
16pub fn capture_diff<Old, New>(
17    alg: Algorithm,
18    old: &Old,
19    old_range: Range<usize>,
20    new: &New,
21    new_range: Range<usize>,
22) -> Vec<DiffOp>
23where
24    Old: Index<usize> + ?Sized,
25    New: Index<usize> + ?Sized,
26    Old::Output: Hash + Eq,
27    New::Output: PartialEq<Old::Output> + Hash + Eq,
28{
29    capture_diff_deadline(alg, old, old_range, new, new_range, None)
30}
31
32/// Creates a diff between old and new with the given algorithm capturing the ops.
33///
34/// Works like [`capture_diff`] but with an optional deadline.
35pub fn capture_diff_deadline<Old, New>(
36    alg: Algorithm,
37    old: &Old,
38    old_range: Range<usize>,
39    new: &New,
40    new_range: Range<usize>,
41    deadline: Option<Instant>,
42) -> Vec<DiffOp>
43where
44    Old: Index<usize> + ?Sized,
45    New: Index<usize> + ?Sized,
46    Old::Output: Hash + Eq,
47    New::Output: PartialEq<Old::Output> + Hash + Eq,
48{
49    let mut d = Compact::new(Replace::new(Capture::new()), old, new);
50    diff_deadline(alg, &mut d, old, old_range, new, new_range, deadline).unwrap();
51    d.into_inner().into_inner().into_ops()
52}
53
54/// Creates a diff between old and new with the given algorithm capturing the ops.
55///
56/// For lazily computed values or diffing by a derived key, see
57/// [`crate::algorithms::CachedLookup`].
58pub fn capture_diff_slices<T>(alg: Algorithm, old: &[T], new: &[T]) -> Vec<DiffOp>
59where
60    T: Eq + Hash,
61{
62    capture_diff_slices_deadline(alg, old, new, None)
63}
64
65/// Creates a diff between old and new with the given algorithm capturing the ops.
66///
67/// Works like [`capture_diff_slices`] but with an optional deadline.
68pub fn capture_diff_slices_deadline<T>(
69    alg: Algorithm,
70    old: &[T],
71    new: &[T],
72    deadline: Option<Instant>,
73) -> Vec<DiffOp>
74where
75    T: Eq + Hash,
76{
77    capture_diff_deadline(alg, old, 0..old.len(), new, 0..new.len(), deadline)
78}
79
80/// Return a measure of similarity in the range `0..=1`.
81///
82/// A ratio of `1.0` means the two sequences are a complete match, a
83/// ratio of `0.0` would indicate completely distinct sequences.  The input
84/// is the sequence of diff operations and the length of the old and new
85/// sequence.
86pub fn diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 {
87    let matches = ops
88        .iter()
89        .map(|op| {
90            if let DiffOp::Equal { len, .. } = *op {
91                len
92            } else {
93                0
94            }
95        })
96        .sum::<usize>();
97    let len = old_len + new_len;
98    if len == 0 {
99        1.0
100    } else {
101        2.0 * matches as f32 / len as f32
102    }
103}
104
105/// Isolate change clusters by eliminating ranges with no changes.
106///
107/// This will leave holes behind in long periods of equal ranges so that
108/// you can build things like unified diffs.
109pub fn group_diff_ops(mut ops: Vec<DiffOp>, n: usize) -> Vec<Vec<DiffOp>> {
110    if ops.is_empty() {
111        return vec![];
112    }
113
114    let mut pending_group = Vec::new();
115    let mut rv = Vec::new();
116
117    if let Some(DiffOp::Equal {
118        old_index,
119        new_index,
120        len,
121    }) = ops.first_mut()
122    {
123        let offset = (*len).saturating_sub(n);
124        *old_index += offset;
125        *new_index += offset;
126        *len -= offset;
127    }
128
129    if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() {
130        *len -= (*len).saturating_sub(n);
131    }
132
133    for op in ops.into_iter() {
134        if let DiffOp::Equal {
135            old_index,
136            new_index,
137            len,
138        } = op
139        {
140            // End the current group and start a new one whenever
141            // there is a large range with no changes.
142            if len > n * 2 {
143                pending_group.push(DiffOp::Equal {
144                    old_index,
145                    new_index,
146                    len: n,
147                });
148                rv.push(pending_group);
149                let offset = len.saturating_sub(n);
150                pending_group = vec![DiffOp::Equal {
151                    old_index: old_index + offset,
152                    new_index: new_index + offset,
153                    len: len - offset,
154                }];
155                continue;
156            }
157        }
158        pending_group.push(op);
159    }
160
161    match &pending_group[..] {
162        &[] | &[DiffOp::Equal { .. }] => {}
163        _ => rv.push(pending_group),
164    }
165
166    rv
167}
168
169#[test]
170fn test_non_string_iter_change() {
171    use crate::ChangeTag;
172
173    let old = vec![1, 2, 3];
174    let new = vec![1, 2, 4];
175    let ops = capture_diff_slices(Algorithm::Myers, &old, &new);
176    let changes: Vec<_> = ops
177        .iter()
178        .flat_map(|x| x.iter_changes(&old, &new))
179        .map(|x| (x.tag(), x.value()))
180        .collect();
181
182    assert_eq!(
183        changes,
184        vec![
185            (ChangeTag::Equal, 1),
186            (ChangeTag::Equal, 2),
187            (ChangeTag::Delete, 3),
188            (ChangeTag::Insert, 4),
189        ]
190    );
191}
192
193#[test]
194fn test_myers_compacts_adjacent_deletes_issue_80() {
195    let a: Vec<u8> = vec![0, 1, 0, 0, 0, 1, 2];
196    let b: Vec<u8> = vec![1, 0, 1];
197
198    let ops = capture_diff_slices(Algorithm::Myers, &a, &b);
199
200    let delete_lengths = ops
201        .iter()
202        .filter_map(|op| match op {
203            DiffOp::Delete { old_len, .. } => Some(*old_len),
204            _ => None,
205        })
206        .collect::<Vec<_>>();
207
208    assert_eq!(delete_lengths, vec![1, 2, 1]);
209}
210
211#[test]
212fn test_myers_unbalanced_regressions() {
213    {
214        let mut old = (0..3_000u32).collect::<Vec<_>>();
215        let mut new = (0..2_999u32).collect::<Vec<_>>();
216
217        old[2_999] = 1_000_000;
218        new.push(2_000_000);
219        new.extend((0..100_000u32).map(|i| 3_000_000 + i));
220
221        let ops = capture_diff_slices(Algorithm::Myers, &old, &new);
222
223        assert_eq!(
224            ops,
225            vec![
226                DiffOp::Equal {
227                    old_index: 0,
228                    new_index: 0,
229                    len: 2_999,
230                },
231                DiffOp::Replace {
232                    old_index: 2_999,
233                    old_len: 1,
234                    new_index: 2_999,
235                    new_len: 100_001,
236                },
237            ]
238        );
239    }
240
241    {
242        let mut old = (0..3_008u32).collect::<Vec<_>>();
243        let mut new = (0..3_000u32).collect::<Vec<_>>();
244
245        // Make the old tail distinct from the new tail except for a single
246        // sparse overlap far into the new side.
247        for i in 0..8 {
248            old[3_000 + i] = 1_000_000 + i as u32;
249        }
250
251        new.extend((0..100_000u32).map(|i| 2_000_000 + i));
252        new[3_000 + 50_000] = 1_000_000;
253
254        let ops = capture_diff_slices(Algorithm::Myers, &old, &new);
255
256        // Ensure the sparse overlap is preserved (do not collapse into one
257        // large replace due to pathological fallback behavior).
258        assert!(ops.iter().any(|op| {
259            matches!(
260                op,
261                DiffOp::Equal {
262                    old_index: 3_000,
263                    new_index: 53_000,
264                    len: 1,
265                }
266            )
267        }));
268    }
269}