libdiffsitter/
diff.rs

1//! Structs and other convenience methods for handling logical concepts pertaining to diffs, such
2//! as hunks.
3
4use crate::input_processing::{EditType, Entry};
5use crate::neg_idx_vec::NegIdxVec;
6use anyhow::Result;
7use logging_timer::time;
8use serde::Serialize;
9use std::fmt::Debug;
10use std::iter::FromIterator;
11use std::ops::Range;
12use thiserror::Error;
13
14/// Find the length of the common prefix between the ranges specified for `a` and `b`.
15fn common_prefix_len<T: PartialEq>(
16    a: &[T],
17    a_range: Range<usize>,
18    b: &[T],
19    b_range: Range<usize>,
20) -> usize {
21    let mut l = 0;
22
23    unsafe {
24        while a_range.start + l < a_range.end
25            && b_range.start + l < b_range.end
26            && a.get_unchecked(a_range.start + l) == b.get_unchecked(b_range.start + l)
27        {
28            l += 1;
29        }
30    }
31    l
32}
33
34/// Coordinates for different inputs
35///
36/// A coordinate pair that corresponds to the two inputs in a diff algorithm.
37#[derive(Debug, PartialEq, Eq, Clone, Copy)]
38struct Coordinates<T>
39where
40    T: Debug + PartialEq + Eq + Clone + Copy,
41{
42    /// The index in the old input
43    pub old: T,
44
45    /// The index in the new input
46    pub new: T,
47}
48
49/// Convert a range with into another numeric type.
50///
51/// This will panic if the values cannot be converted to the target type. This is better than using
52/// `as` because it will explicitly panic instead of silently wrapping around.
53fn convert_range<FromType, IntoType>(range: Range<FromType>) -> Range<IntoType>
54where
55    FromType: TryInto<IntoType>,
56    <FromType as TryInto<IntoType>>::Error: std::fmt::Debug,
57{
58    range.start.try_into().unwrap()..range.end.try_into().unwrap()
59}
60
61/// Find the length of the common suffix between the ranges specified for `a` and `b`.
62/// The ranges are assumed to be [inclusive, exclusive).
63fn common_suffix_len<T: PartialEq>(
64    a: &[T],
65    a_range: Range<usize>,
66    b: &[T],
67    b_range: Range<usize>,
68) -> usize {
69    let mut l: isize = 1;
70    let a_range: Range<isize> = convert_range(a_range);
71    let b_range: Range<isize> = convert_range(b_range);
72    unsafe {
73        while a_range.end - l >= a_range.start
74            && b_range.end - l >= b_range.start
75            && a.get_unchecked::<usize>((a_range.end - l).try_into().unwrap())
76                == b.get_unchecked::<usize>((b_range.end - l).try_into().unwrap())
77        {
78            l += 1;
79        }
80    }
81    (l - 1).try_into().unwrap()
82}
83
84/// The edit information representing a line
85#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
86pub struct Line<'a> {
87    /// The index of the line in the original document
88    pub line_index: usize,
89    /// The entries corresponding to the line
90    pub entries: Vec<&'a Entry<'a>>,
91}
92
93impl<'a> Line<'a> {
94    #[must_use]
95    pub fn new(line_index: usize) -> Self {
96        Line {
97            line_index,
98            entries: Vec::new(),
99        }
100    }
101}
102
103/// A grouping of consecutive edit lines for a document
104///
105/// Every line in a hunk must be consecutive and in ascending order.
106#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
107pub struct Hunk<'a>(pub Vec<Line<'a>>);
108
109/// Types of errors that come up when inserting an entry to a hunk
110#[derive(Debug, Error)]
111pub enum HunkInsertionError {
112    #[error(
113        "Non-adjacent entry (line {incoming_line:?}) added to hunk (last line: {last_line:?})"
114    )]
115    NonAdjacentHunk {
116        incoming_line: usize,
117        last_line: usize,
118    },
119
120    #[error("Attempted to append an entry with a line index ({incoming_line:?}) less than the first line's index ({last_line:?})")]
121    PriorLine {
122        incoming_line: usize,
123        last_line: usize,
124    },
125
126    #[error("Attempted to append an entry with a column ({incoming_col:?}, line: {incoming_line:?}) less than the first entry's column ({last_col:?}, line: {last_line:?})")]
127    PriorColumn {
128        incoming_col: usize,
129        incoming_line: usize,
130        last_col: usize,
131        last_line: usize,
132    },
133}
134
135impl<'a> Hunk<'a> {
136    /// Create a new, empty hunk
137    #[must_use]
138    pub fn new() -> Self {
139        Hunk(Vec::new())
140    }
141
142    /// Returns the first line number of the hunk
143    ///
144    /// This will return [None] if the internal vector is empty
145    #[must_use]
146    pub fn first_line(&self) -> Option<usize> {
147        self.0.first().map(|x| x.line_index)
148    }
149
150    /// Returns the last line number of the hunk
151    ///
152    /// This will return [None] if the internal vector is empty
153    #[must_use]
154    pub fn last_line(&self) -> Option<usize> {
155        self.0.last().map(|x| x.line_index)
156    }
157
158    /// Returns whether an entry can be pushed onto the current hunk.
159    ///
160    /// This method is exposed so users can check if the push back operation would fail. This is
161    /// useful if you want to avoid needing to do extra copies in case an incoming entry is can't
162    /// be added to the entry and you don't want to have to deal with copying the entry for the
163    /// next hunk.
164    ///
165    /// This returns a result with the specific reason the entry couldn't be inserted.
166    pub fn can_push_back(&self, entry: &Entry<'a>) -> Result<(), HunkInsertionError> {
167        let incoming_line_idx = entry.start_position().row;
168
169        // Create a new line if the incoming entry is on the next line. This will throw an error
170        // if we have an entry on a non-adjacent line or an out-of-order insertion.
171        if let Some(last_line) = self.0.last() {
172            let last_line_idx = last_line.line_index;
173
174            if incoming_line_idx < last_line_idx {
175                return Err(HunkInsertionError::PriorLine {
176                    incoming_line: incoming_line_idx,
177                    last_line: last_line_idx,
178                });
179            } else if incoming_line_idx - last_line_idx > 1 {
180                return Err(HunkInsertionError::NonAdjacentHunk {
181                    incoming_line: incoming_line_idx,
182                    last_line: last_line_idx,
183                });
184            }
185            // Only check the incoming column if the new entry would be appended to the same line
186            if incoming_line_idx == last_line_idx && !last_line.entries.is_empty() {
187                let last_entry = last_line.entries.last().unwrap();
188                let last_col = last_entry.end_position().column;
189                let last_line = last_entry.end_position().row;
190                let incoming_col = entry.start_position().column;
191                let incoming_line = entry.end_position().row;
192                if incoming_col < last_col {
193                    return Err(HunkInsertionError::PriorColumn {
194                        incoming_col,
195                        last_col,
196                        incoming_line,
197                        last_line,
198                    });
199                }
200            }
201        }
202        Ok(())
203    }
204
205    /// Append an [entry](Entry) to a hunk.
206    ///
207    /// Entries can only be appended in ascending order (first to last). It is an error to append
208    /// entries out of order. For example, you can't insert an entry on line 1 after inserting an
209    /// entry on line 5.
210    pub fn push_back(&mut self, entry: &'a Entry<'a>) -> Result<(), HunkInsertionError> {
211        self.can_push_back(entry)?;
212        let incoming_line_idx = entry.start_position().row;
213
214        // Don't need to check the other conditions because other scenarios like adding a prior
215        // line or a non-adjacent line.
216        if self.0.is_empty() || incoming_line_idx - self.0.last().unwrap().line_index == 1 {
217            self.0.push(Line::new(incoming_line_idx));
218        }
219        // This doesn't need to be checked because we add a line if there isn't one already in the
220        // vector so the unwrap won't fail
221        let last_line = self.0.last_mut().unwrap();
222        last_line.entries.push(entry);
223        Ok(())
224    }
225}
226
227impl<'a> Default for Hunk<'a> {
228    fn default() -> Self {
229        Self::new()
230    }
231}
232
233/// A generic type for diffs that source from one of two documents.
234///
235/// A lot of items in the diff are delineated by whether they come from the old document or the new
236/// one. This enum generically defines an enum wrapper over those document types.
237#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
238pub enum DocumentType<T: Debug + PartialEq + Serialize + Clone> {
239    Old(T),
240    New(T),
241}
242
243impl<T> AsRef<T> for DocumentType<T>
244where
245    T: Debug + Clone + PartialEq + Serialize,
246{
247    fn as_ref(&self) -> &T {
248        match self {
249            Self::Old(x) | Self::New(x) => x,
250        }
251    }
252}
253
254impl<T> AsMut<T> for DocumentType<T>
255where
256    T: Debug + Clone + PartialEq + Serialize,
257{
258    fn as_mut(&mut self) -> &mut T {
259        match self {
260            Self::Old(x) | Self::New(x) => x,
261        }
262    }
263}
264
265impl<T: Debug + Clone + PartialEq + Serialize> DocumentType<T> {
266    /// Move the inner object out and consume it.
267    fn consume(self) -> T {
268        match self {
269            Self::Old(x) | Self::New(x) => x,
270        }
271    }
272}
273
274/// A hunk with metadata about which document it came from.
275pub type RichHunk<'a> = DocumentType<Hunk<'a>>;
276
277/// The hunks that correspond to a document
278///
279/// This type implements a helper builder function that can take
280#[derive(Debug, Clone, PartialEq, Eq)]
281pub struct Hunks<'a>(pub Vec<Hunk<'a>>);
282
283#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
284pub struct RichHunks<'a>(pub Vec<RichHunk<'a>>);
285
286/// A builder struct for [`RichHunks`].
287///
288/// The builder struct allows us to maintain some state as we build [`RichHunks`].
289pub struct RichHunksBuilder<'a> {
290    /// The hunks that we're trying to build
291    hunks: RichHunks<'a>,
292
293    /// The last old entry seen so far (if any).
294    last_old: Option<usize>,
295
296    /// The last new entry seen so far (if any).
297    last_new: Option<usize>,
298}
299
300impl<'a> RichHunksBuilder<'a> {
301    #[must_use]
302    pub fn new() -> Self {
303        Self {
304            hunks: RichHunks(Vec::new()),
305            last_old: None,
306            last_new: None,
307        }
308    }
309
310    /// Finalize building the hunks.
311    ///
312    /// This consumes the builder, which means you won't be able to add to it any more.
313    #[must_use]
314    pub fn build(self) -> RichHunks<'a> {
315        self.hunks
316    }
317
318    /// Initialize a new hunk struct for the incoming entry if necessary.
319    ///
320    /// This returns the hunk that the entry should be added to.
321    fn get_hunk_for_insertion(
322        &mut self,
323        incoming_entry: &DocumentType<&'a Entry<'a>>,
324    ) -> Result<usize, HunkInsertionError> {
325        let (mut last_idx, new_hunk) = match incoming_entry {
326            DocumentType::Old(_) => (self.last_old, DocumentType::Old(Hunk::new())),
327            DocumentType::New(_) => (self.last_new, DocumentType::New(Hunk::new())),
328        };
329
330        match last_idx {
331            // If there is no hunk for this type, we create a new one and will add the incoming
332            // entry to it.
333            None => {
334                self.hunks.0.push(new_hunk);
335                last_idx = Some(self.hunks.0.len() - 1);
336            }
337            // If there is a reference to the last hunk that corresponds to the incoming entry's
338            // document type, we check the line numbers and create a new hunk if necessary.
339            Some(idx) => {
340                let last_hunk = self.hunks.0[idx].as_ref();
341                let last_line = last_hunk.last_line();
342
343                // If the hunk is populated, we only add to it if the incoming entry is on the same
344                // line as the last hunk for the same type. Otherwise we break and create a new
345                // one. If the hunk is empty, we can obviously add to it, so we do nothing.
346                if let Some(last_line) = last_line {
347                    let incoming_line = incoming_entry.as_ref().end_position.row;
348
349                    if incoming_line < last_line {
350                        return Err(HunkInsertionError::PriorLine {
351                            incoming_line,
352                            last_line,
353                        });
354                    }
355
356                    // If we have a non-adjacent edit, we need to create a new hunk. If the last
357                    // hunk for this document type isn't the last edit, we need to create a new
358                    // hunk because the edit chain has been broken.
359                    if incoming_line - last_line > 1 {
360                        self.hunks.0.push(new_hunk);
361                        last_idx = Some(self.hunks.0.len() - 1);
362                    }
363
364                    // Otherwise, the line number must be equal
365                }
366            }
367        };
368
369        match incoming_entry {
370            DocumentType::Old(_) => self.last_old = last_idx,
371            DocumentType::New(_) => self.last_new = last_idx,
372        }
373        Ok(last_idx.unwrap())
374    }
375
376    /// Add an entry to the hunks.
377    pub fn push_back(&mut self, entry_wrapper: DocumentType<&'a Entry<'a>>) -> Result<()> {
378        let insertion_idx = self.get_hunk_for_insertion(&entry_wrapper)?;
379        self.hunks.0[insertion_idx]
380            .as_mut()
381            .push_back(entry_wrapper.consume())?;
382        Ok(())
383    }
384}
385
386impl<'a> Default for RichHunksBuilder<'a> {
387    fn default() -> Self {
388        Self::new()
389    }
390}
391
392impl<'a> Hunks<'a> {
393    #[must_use]
394    pub fn new() -> Self {
395        Hunks(Vec::new())
396    }
397
398    pub fn push_back(&mut self, entry: &'a Entry<'a>) -> Result<()> {
399        if let Some(hunk) = self.0.last_mut() {
400            match hunk.can_push_back(entry) {
401                Ok(()) => hunk.push_back(entry),
402                // If the entry is not contiguous with the current hunk then we need to start a new
403                // one
404                Err(HunkInsertionError::NonAdjacentHunk {
405                    incoming_line: _,
406                    last_line: _,
407                }) => {
408                    let mut hunk = Hunk::new();
409                    hunk.push_back(entry)?;
410                    self.0.push(hunk);
411                    Ok(())
412                }
413                Err(e) => Err(e),
414            }?;
415        } else {
416            self.0.push(Hunk::new());
417            self.0.last_mut().unwrap().push_back(entry)?;
418        }
419        Ok(())
420    }
421}
422
423impl<'a> Default for Hunks<'a> {
424    fn default() -> Self {
425        Self::new()
426    }
427}
428
429pub struct HunkAppender<'a>(pub Hunks<'a>);
430
431impl<'a> FromIterator<&'a Entry<'a>> for HunkAppender<'a> {
432    /// Create an instance of `Hunks` from an iterator over [entries](Entry).
433    ///
434    /// The user is responsible for making sure that the hunks are in proper order, otherwise this
435    /// constructor may panic.
436    fn from_iter<T>(iter: T) -> Self
437    where
438        T: IntoIterator<Item = &'a Entry<'a>>,
439    {
440        let mut hunks = Hunks::new();
441
442        for i in iter {
443            hunks.push_back(i).expect("Invalid iterator");
444        }
445        HunkAppender(hunks)
446    }
447}
448
449/// A difference engine provider
450///
451/// Any entity that implements this trait is responsible for providing a method
452/// that computes the diff between two inputs.
453pub trait Engine<'elem, T>
454where
455    T: Eq + 'elem,
456{
457    /// The container type to returned from the `diff` function
458    type Container;
459
460    /// Compute the shortest edit sequence that will turn `a` into `b`
461    fn diff(&self, a: &'elem [T], b: &'elem [T]) -> Self::Container;
462}
463
464#[derive(Eq, PartialEq, Copy, Clone, Debug, Default)]
465pub struct Myers {}
466
467impl<'elem, T> Engine<'elem, T> for Myers
468where
469    T: Eq + 'elem + std::fmt::Debug,
470{
471    type Container = Vec<EditType<&'elem T>>;
472
473    fn diff(&self, a: &'elem [T], b: &'elem [T]) -> Self::Container {
474        // We know the worst case is deleting everything from a and inserting everything from b
475        let mut res = Vec::with_capacity(a.len() + b.len());
476        let mut frontiers = MyersFrontiers::new(a.len(), b.len());
477        Myers::diff_impl(&mut res, a, 0..a.len(), b, 0..b.len(), &mut frontiers);
478        res
479    }
480}
481
482/// Information relevant for a middle snake calculation
483#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
484pub struct MidSnakeInfo {
485    /// The index in `a` that corresponds to the middle snake
486    pub a_split: i32,
487
488    /// The index in `b` that corresponds to the middle snake
489    pub b_split: i32,
490
491    /// the full length of the optimal path between the two inputs
492    pub optimal_len: u32,
493}
494
495/// Split a range at the specified index
496fn split_range(r: &Range<usize>, idx: usize) -> (Range<usize>, Range<usize>) {
497    (r.start..idx, idx..r.end)
498}
499
500/// The frontiers for the Myers diff algorithm
501///
502/// We define this externally to the recursive method so we can allocate once and reuse between
503/// recursive calls.
504pub struct MyersFrontiers {
505    /// Stores the longest path seen from the old input to the new input
506    pub forward: NegIdxVec<i32>,
507
508    /// Stores the longest path seen from the new input to the old input
509    pub reverse: NegIdxVec<i32>,
510}
511
512impl MyersFrontiers {
513    /// Construct frontiers for the given input sizes
514    fn new(old_len: usize, new_len: usize) -> Self {
515        let midpoint = ((old_len + new_len) as f32 / 2.00).ceil() as i32 + 1;
516
517        // The size of the frontier vector
518        let vec_length = (midpoint * 2) as usize;
519
520        MyersFrontiers {
521            forward: vec![0; vec_length].into(),
522            reverse: vec![0; vec_length].into(),
523        }
524    }
525}
526
527impl Myers {
528    /// A helper implementation function that handles the recursive end of finding a diff using
529    /// Myers' algorithm.
530    fn diff_impl<'elem, T: Eq + Debug + 'elem>(
531        res: &mut Vec<EditType<&'elem T>>,
532        old: &'elem [T],
533        mut old_range: Range<usize>,
534        new: &'elem [T],
535        mut new_range: Range<usize>,
536        frontiers: &mut MyersFrontiers,
537    ) {
538        // Initial optimizations: we can skip the common prefix + suffix
539        let common_pref_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
540        old_range.start += common_pref_len;
541        new_range.start += common_pref_len;
542
543        let common_suf_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
544        // Need to make sure our begin/end ranges don't overlap
545        old_range.end = old_range.start.max(old_range.end - common_suf_len);
546        new_range.end = new_range.start.max(new_range.end - common_suf_len);
547
548        // We know if either or both of the inputs are empty, we don't have to bother finding the
549        // middle snake
550        if old_range.is_empty() && new_range.is_empty() {
551            return;
552        }
553
554        if old_range.is_empty() {
555            for i in new_range {
556                res.push(EditType::Addition(&new[i]));
557            }
558            return;
559        }
560
561        if new_range.is_empty() {
562            for i in old_range {
563                res.push(EditType::Deletion(&old[i]));
564            }
565            return;
566        }
567
568        let Coordinates {
569            old: x_start,
570            new: y_start,
571        } = Myers::middle_snake(old, old_range.clone(), new, new_range.clone(), frontiers);
572
573        // divide and conquer along the middle snake
574        let (old_first_half, old_second_half) = split_range(&old_range, x_start);
575        let (new_first_half, new_second_half) = split_range(&new_range, y_start);
576
577        Myers::diff_impl(res, old, old_first_half, new, new_first_half, frontiers);
578        Myers::diff_impl(res, old, old_second_half, new, new_second_half, frontiers);
579    }
580
581    /// Calculate the (x, y) coordinates of the midpoint of the optimal path.
582    ///
583    /// This implementation directly derives from "An O(ND) Difference Algorithm and Its Variations"
584    /// by Myers. This will compute the location of the middle snake and the length of the optimal
585    /// shortest edit script.
586    fn middle_snake<T: Eq>(
587        old: &[T],
588        old_range: Range<usize>,
589        new: &[T],
590        new_range: Range<usize>,
591        frontiers: &mut MyersFrontiers,
592    ) -> Coordinates<usize> {
593        let n = old_range.len() as i32;
594        let m = new_range.len() as i32;
595        let delta = n - m;
596        let is_odd = delta % 2 == 1;
597        let midpoint = ((m + n) as f32 / 2.00).ceil() as i32 + 1;
598
599        let fwd_front = &mut frontiers.forward;
600        let rev_front = &mut frontiers.reverse;
601
602        fwd_front[1] = 0;
603        rev_front[1] = 0;
604
605        for d in 0..=midpoint {
606            // Find the end of the furthest reaching forward d-path
607            for k in (-d..=d).rev().step_by(2) {
608                // k == -d and k != d are just bounds checks to make sure we don't try to compare
609                // values outside of the [-d, d] range. We check for the furthest reaching forward
610                // frontier by seeing which diagonal has the highest x value.
611                let mut x = if k == -d || (k != d && fwd_front[k + 1] >= fwd_front[k - 1]) {
612                    // If the longest diagonal is from the vertically connected d - 1 path. The y
613                    // component is implicitly added when we compute y below with a different k value.
614                    fwd_front[k + 1]
615                } else {
616                    // If the longest diagonal is from the horizontally connected d - 1 path. We
617                    // add one here for the horizontal connection (x, y) -> (x + 1, y).
618                    fwd_front[k - 1] + 1
619                };
620                let y = x - k;
621
622                // Coordinates of the first point in the snake
623                let (x0, y0) = (x, y);
624
625                // Extend the snake
626                if x < n && y < m {
627                    debug_assert!(x >= 0);
628                    debug_assert!(y >= 0);
629
630                    let common_pref_len = common_prefix_len(
631                        old,
632                        old_range.start + (x as usize)..old_range.end,
633                        new,
634                        new_range.start + (y as usize)..new_range.end,
635                    );
636                    x += common_pref_len as i32;
637                }
638
639                fwd_front[k] = x;
640
641                // If delta is odd and k is in the defined range
642                if is_odd && (k - delta).abs() < d {
643                    // If the path overlaps the furthest reaching reverse d - 1 path in diagonal k
644                    // then the length of an SES is 2D - 1, and the last snake of the forward path
645                    // is the middle snake.
646                    let reverse_x = rev_front[-(k - delta)];
647
648                    // We convert everything over to signed integers first so we can check for any
649                    // overflow errors.
650                    let old = (old_range.start as i32) + x0;
651                    let new = (new_range.start as i32) + y0;
652
653                    debug_assert!(
654                        old >= (old_range.start as i32) && old <= (old_range.end as i32),
655                        "expected old={} in {}..{}",
656                        old,
657                        old_range.start,
658                        old_range.end,
659                    );
660                    debug_assert!(
661                        new >= (new_range.start as i32) && new <= (new_range.end as i32),
662                        "expected new={} in {}..{}",
663                        new,
664                        new_range.start,
665                        new_range.end,
666                    );
667
668                    // NOTE: we can convert x and y to `usize` because they are both within
669                    // the range of the length of the inputs, which are valid usize values. This property
670                    // is also checked with assertions in debug releases.
671                    if x + reverse_x >= n {
672                        return Coordinates {
673                            old: old as usize,
674                            new: new as usize,
675                        };
676                    }
677                }
678            }
679
680            // Find the end of the furthest reaching reverse d-path
681            for k in (-d..=d).rev().step_by(2) {
682                // k == d and k != -d are just bounds checks to make sure we don't try to compare
683                // anything out of range, as explained above. In the reverse path we check to see
684                // which diagonal has the smallest *real* x value because we're trying to go from
685                // the bottom-right to the top-left of the matrix. Note that we're looking for the
686                // biggest x value in the reverse frontier, which will be subtracted from the total
687                // length.
688                let mut x = if k == -d || (k != d && rev_front[k + 1] >= rev_front[k - 1]) {
689                    // If the longest diagonal is from the horizontally connected d - 1 path.
690                    rev_front[k + 1]
691                } else {
692                    // If the longest diagonal is from the vertically connected d - 1 path. The y
693                    // value is implicitly handled when we compute y with a different k value.
694                    rev_front[k - 1] + 1
695                };
696                let mut y = x - k;
697
698                // Advance the diagonal as far as possible
699                if x < n && y < m {
700                    debug_assert!(x >= 0);
701                    debug_assert!(y >= 0);
702                    debug_assert!(n - x >= 0);
703                    debug_assert!(m - y >= 0);
704
705                    let common_suf_len = common_suffix_len(
706                        old,
707                        old_range.start..old_range.start + (n as usize) - (x as usize),
708                        new,
709                        new_range.start..new_range.start + (m as usize) - (y as usize),
710                    );
711                    x += common_suf_len as i32;
712                    y += common_suf_len as i32;
713                }
714                rev_front[k] = x;
715
716                // If delta is even and k is in the defined range, check for an overlap
717                if !is_odd && (k - delta).abs() <= d {
718                    let forward_x = fwd_front[-(k - delta)];
719
720                    // If forward_x + reverse_x >= n, the forward and backward paths make up a full
721                    // path, so we have a possible overlap. So return the furthest reaching reverse
722                    // path as the middle snake.
723                    // NOTE: that we can convert x and y to `usize` because they are both within
724                    // the range of the length of the inputs, which are valid usize values.
725                    if forward_x + x >= n {
726                        let old = n - x + (old_range.start as i32);
727                        let new = m - y + (new_range.start as i32);
728
729                        debug_assert!(
730                            old >= (old_range.start as i32) && old <= (old_range.end as i32),
731                            "expected old={} in {}..{}",
732                            old,
733                            old_range.start,
734                            old_range.end,
735                        );
736                        debug_assert!(
737                            new >= (new_range.start as i32) && new <= (new_range.end as i32),
738                            "expected new={} in {}..{}",
739                            new,
740                            new_range.start,
741                            new_range.end,
742                        );
743
744                        return Coordinates {
745                            old: old as usize,
746                            new: new as usize,
747                        };
748                    }
749                }
750            }
751        }
752        unreachable!();
753    }
754}
755
756impl<'a> TryFrom<Vec<EditType<&'a Entry<'a>>>> for RichHunks<'a> {
757    type Error = anyhow::Error;
758
759    fn try_from(edits: Vec<EditType<&'a Entry<'a>>>) -> Result<Self, Self::Error> {
760        let mut builder = RichHunksBuilder::new();
761        for edit_wrapper in edits {
762            // TODO: deal with the clone here
763            let edit = match edit_wrapper {
764                EditType::Addition(edit) => DocumentType::New(edit),
765                EditType::Deletion(edit) => DocumentType::Old(edit),
766            };
767            builder.push_back(edit)?;
768        }
769        Ok(builder.build())
770    }
771}
772
773/// Compute the hunks corresponding to the minimum edit path between two documents.
774///
775/// This will process the the AST vectors with the user-provided settings.
776///
777/// This will return two groups of [hunks](diff::Hunks) in a tuple of the form
778/// `(old_hunks, new_hunks)`.
779#[time("info", "diff::{}")]
780pub fn compute_edit_script<'a>(
781    old: &'a [Entry<'a>],
782    new: &'a [Entry<'a>],
783) -> Result<RichHunks<'a>> {
784    let myers = Myers::default();
785    let edit_script = myers.diff(old, new);
786    RichHunks::try_from(edit_script)
787}
788
789#[cfg(test)]
790mod tests {
791    use super::*;
792    use pretty_assertions::assert_eq as p_assert_eq;
793    use test_case::test_case;
794
795    /// A convenience function to invoke the a Myers diff
796    fn myers_diff<'a, T>(a: &'a [T], b: &'a [T]) -> Vec<EditType<&'a T>>
797    where
798        T: 'a + Eq + Debug,
799    {
800        let myers = Myers::default();
801        myers.diff(a, b)
802    }
803
804    #[test]
805    fn mid_snake_empty_input() {
806        let input_a = b"";
807        let input_b = b"";
808
809        let mut frontiers = MyersFrontiers::new(input_a.len(), input_b.len());
810        let mid_snake = Myers::middle_snake(
811            &input_a[..],
812            0..input_a.len(),
813            &input_b[..],
814            0..input_b.len(),
815            &mut frontiers,
816        );
817        let expected = Coordinates { old: 0, new: 0 };
818        p_assert_eq!(expected, mid_snake);
819    }
820
821    #[test]
822    fn mid_snake() {
823        let input_a = &b"ABCABBA"[..];
824        let input_b = &b"CBABAC"[..];
825        let mut frontiers = MyersFrontiers::new(input_a.len(), input_b.len());
826        let mid_snake = Myers::middle_snake(
827            input_a,
828            0..input_a.len(),
829            input_b,
830            0..input_b.len(),
831            &mut frontiers,
832        );
833        let expected = Coordinates { old: 4, new: 1 };
834        p_assert_eq!(expected, mid_snake);
835    }
836
837    #[test]
838    fn myers_diff_empty_inputs() {
839        let input_a: Vec<i32> = vec![];
840        let input_b: Vec<i32> = vec![];
841        let edit_script = myers_diff(&input_a, &input_b);
842        assert!(edit_script.is_empty());
843    }
844
845    #[test]
846    fn myers_diff_no_diff() {
847        let input_a: Vec<i32> = vec![0; 4];
848        let input_b: Vec<i32> = vec![0; 4];
849        let edit_script = myers_diff(&input_a, &input_b);
850        assert!(edit_script.is_empty());
851    }
852
853    #[test]
854    fn myers_diff_one_addition() {
855        let input_a: Vec<i32> = Vec::new();
856        let input_b: Vec<i32> = vec![0];
857        let expected = vec![EditType::Addition(&input_b[0])];
858        let edit_script = myers_diff(&input_a, &input_b);
859        p_assert_eq!(expected, edit_script);
860    }
861
862    #[test]
863    fn myers_diff_one_deletion() {
864        let input_a: Vec<i32> = vec![0];
865        let input_b: Vec<i32> = Vec::new();
866        let expected = vec![EditType::Deletion(&input_a[0])];
867        let edit_script = myers_diff(&input_a, &input_b);
868        p_assert_eq!(expected, edit_script);
869    }
870
871    #[test]
872    fn myers_diff_single_substitution() {
873        let myers = Myers::default();
874        let input_a = [1];
875        let input_b = [2];
876        let edit_script = myers.diff(&input_a[..], &input_b[..]);
877        let expected = vec![
878            EditType::Addition(&input_b[0]),
879            EditType::Deletion(&input_a[0]),
880        ];
881        p_assert_eq!(expected, edit_script);
882    }
883
884    #[test]
885    fn myers_diff_single_substitution_with_common_elements() {
886        let myers = Myers::default();
887        let input_a = [0, 0, 0];
888        let input_b = [0, 1, 0];
889        let edit_script = myers.diff(&input_a[..], &input_b[..]);
890        let expected = vec![
891            EditType::Addition(&input_b[1]),
892            EditType::Deletion(&input_a[1]),
893        ];
894        p_assert_eq!(expected, edit_script);
895    }
896
897    #[test_case(b"BAAA", b"CAAA" => 0 ; "no common prefix")]
898    #[test_case(b"AAABA", b"AAACA" => 3 ; "with common prefix")]
899    fn common_prefix(a: &[u8], b: &[u8]) -> usize {
900        common_prefix_len(a, 0..a.len(), b, 0..b.len())
901    }
902
903    #[test_case(b"AAAB", b"AAAC" => 0 ; "no common suffix")]
904    #[test_case(b"ABAAA", b"ACAAA" => 3 ; "with common suffix")]
905    fn common_suffix(a: &[u8], b: &[u8]) -> usize {
906        common_suffix_len(a, 0..a.len(), b, 0..b.len())
907    }
908}
libdiffsitter/diff.rs

libdiffsitter/
diff.rs