diffy_imara/diff/
mod.rs

1use imara_diff::{
2    intern::{InternedInput, Token},
3    sources::{byte_lines_with_terminator, lines_with_terminator},
4};
5
6use crate::{
7    patch::{Hunk, HunkRange, Line, Patch},
8    range::{DiffRange, SliceLike},
9    sink::DiffyDiffRangeBuilder,
10    Algorithm,
11};
12use std::{borrow::Cow, cmp, hash::Hash, ops};
13
14mod cleanup;
15mod myers;
16
17#[cfg(test)]
18mod tests;
19
20// TODO determine if this should be exposed in the public API
21#[allow(dead_code)]
22#[derive(Debug, PartialEq, Eq)]
23enum Diff<'a, T: ?Sized> {
24    Equal(&'a T),
25    Delete(&'a T),
26    Insert(&'a T),
27}
28
29impl<T: ?Sized> Copy for Diff<'_, T> {}
30
31impl<T: ?Sized> Clone for Diff<'_, T> {
32    fn clone(&self) -> Self {
33        *self
34    }
35}
36
37impl<'a, T> From<DiffRange<'a, 'a, T>> for Diff<'a, T>
38where
39    T: ?Sized + SliceLike,
40{
41    fn from(diff: DiffRange<'a, 'a, T>) -> Self {
42        match diff {
43            DiffRange::Equal(range, _) => Diff::Equal(range.as_slice()),
44            DiffRange::Delete(range) => Diff::Delete(range.as_slice()),
45            DiffRange::Insert(range) => Diff::Insert(range.as_slice()),
46        }
47    }
48}
49
50/// A collection of options for modifying the way a diff is performed
51#[derive(Debug)]
52pub struct DiffOptions {
53    compact: bool,
54    context_len: usize,
55    algorithm: Algorithm,
56    original_filename: Option<Cow<'static, str>>,
57    modified_filename: Option<Cow<'static, str>>,
58}
59
60impl DiffOptions {
61    /// Construct a new `DiffOptions` with default settings
62    ///
63    /// ## Defaults
64    /// * context_len = 3
65    /// * algorithm = Algorithm::Histogram
66    pub fn new() -> Self {
67        Self {
68            compact: true,
69            context_len: 3,
70            algorithm: Algorithm::Histogram,
71            original_filename: Some("original".into()),
72            modified_filename: Some("modified".into()),
73        }
74    }
75
76    /// Set the number of context lines that should be used when producing a patch
77    pub fn set_context_len(&mut self, context_len: usize) -> &mut Self {
78        self.context_len = context_len;
79        self
80    }
81
82    /// Enable/Disable diff compaction. Compaction is a post-processing step which attempts to
83    /// produce a prettier diff by reducing the number of edited blocks by shifting and merging
84    /// edit blocks.
85    // TODO determine if this should be exposed in the public API
86    #[allow(dead_code)]
87    fn set_compact(&mut self, compact: bool) -> &mut Self {
88        self.compact = compact;
89        self
90    }
91
92    /// Set the algorithm used to perform the diff.
93    pub fn set_algorithm(&mut self, algorithm: Algorithm) -> &mut Self {
94        self.algorithm = algorithm;
95        self
96    }
97
98    /// Set the filename to be used in the patch for the original text
99    ///
100    /// If not set, the default value is "original".
101    pub fn set_original_filename<T>(&mut self, filename: T) -> &mut Self
102    where
103        T: Into<Cow<'static, str>>,
104    {
105        self.original_filename = Some(filename.into());
106        self
107    }
108
109    /// Set the filename to be used in the patch for the modified text
110    ///
111    /// If not set, the default value is "modified".
112    pub fn set_modified_filename<T>(&mut self, filename: T) -> &mut Self
113    where
114        T: Into<Cow<'static, str>>,
115    {
116        self.modified_filename = Some(filename.into());
117        self
118    }
119
120    // TODO determine if this should be exposed in the public API
121    #[allow(dead_code)]
122    fn diff<'a>(&self, original: &'a str, modified: &'a str) -> Vec<Diff<'a, str>> {
123        let solution = myers::diff(original.as_bytes(), modified.as_bytes());
124
125        let mut solution = solution
126            .into_iter()
127            .map(|diff_range| diff_range.to_str(original, modified))
128            .collect();
129
130        if self.compact {
131            cleanup::compact(&mut solution);
132        }
133
134        solution.into_iter().map(Diff::from).collect()
135    }
136
137    /// Produce a Patch between two texts based on the configured options
138    pub fn create_patch<'a>(&self, original: &'a str, modified: &'a str) -> Patch<'a, str> {
139        let input = InternedInput::new(original, modified);
140        let old_lines: Vec<_> = lines_with_terminator(original).collect();
141        let new_lines: Vec<_> = lines_with_terminator(modified).collect();
142
143        let solution = self.diff_interned(&input);
144
145        let hunks = to_hunks(&old_lines, &new_lines, &solution, self.context_len);
146        Patch::new(
147            self.original_filename.clone(),
148            self.modified_filename.clone(),
149            hunks,
150        )
151    }
152
153    /// Create a patch between two potentially non-utf8 texts
154    pub fn create_patch_bytes<'a>(
155        &self,
156        original: &'a [u8],
157        modified: &'a [u8],
158    ) -> Patch<'a, [u8]> {
159        let input = InternedInput::new(original, modified);
160        let old_lines: Vec<_> = byte_lines_with_terminator(original).collect();
161        let new_lines: Vec<_> = byte_lines_with_terminator(modified).collect();
162
163        let solution = self.diff_interned(&input);
164
165        let hunks = to_hunks(&old_lines, &new_lines, &solution, self.context_len);
166
167        // helper function to convert a utf8 cow to a bytes cow
168        fn cow_str_to_bytes(cow: Cow<'static, str>) -> Cow<'static, [u8]> {
169            match cow {
170                Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()),
171                Cow::Owned(o) => Cow::Owned(o.into_bytes()),
172            }
173        }
174
175        Patch::new(
176            self.original_filename.clone().map(cow_str_to_bytes),
177            self.modified_filename.clone().map(cow_str_to_bytes),
178            hunks,
179        )
180    }
181
182    pub(crate) fn diff_interned<'a, T: Eq + Hash>(
183        &self,
184        input: &'a InternedInput<T>,
185    ) -> Vec<DiffRange<'a, 'a, [Token]>> {
186        let sink = DiffyDiffRangeBuilder::new(input);
187
188        imara_diff::diff(self.algorithm, input, sink)
189    }
190
191    pub(crate) fn diff_tokens<'a>(
192        &self,
193        before: &'a [Token],
194        after: &'a [Token],
195        num_tokens: u32,
196    ) -> Vec<DiffRange<'a, 'a, [Token]>> {
197        let sink = DiffyDiffRangeBuilder::from_tokens(before, after);
198
199        imara_diff::diff_with_tokens(self.algorithm, before, after, num_tokens, sink)
200    }
201}
202
203impl Default for DiffOptions {
204    fn default() -> Self {
205        Self::new()
206    }
207}
208
209// TODO determine if this should be exposed in the public API
210#[allow(dead_code)]
211fn diff<'a>(original: &'a str, modified: &'a str) -> Vec<Diff<'a, str>> {
212    DiffOptions::default().diff(original, modified)
213}
214
215/// Create a patch between two texts.
216///
217/// ```
218/// # use diffy_imara::create_patch;
219/// let original = "\
220/// I am afraid, however, that all I have known - that my story - will be forgotten.
221/// I am afraid for the world that is to come.
222/// Afraid that my plans will fail.
223/// Afraid of a doom worse than the Deepness.
224/// ";
225///
226/// let modified = "\
227/// I am afraid, however, that all I have known - that my story - will be forgotten.
228/// I am afraid for the world that is to come.
229/// Afraid that Alendi will fail.
230/// Afraid of a doom brought by the Deepness.
231/// ";
232///
233/// let expected = "\
234/// --- original
235/// +++ modified
236/// @@ -1,4 +1,4 @@
237///  I am afraid, however, that all I have known - that my story - will be forgotten.
238///  I am afraid for the world that is to come.
239/// -Afraid that my plans will fail.
240/// -Afraid of a doom worse than the Deepness.
241/// +Afraid that Alendi will fail.
242/// +Afraid of a doom brought by the Deepness.
243/// ";
244///
245/// let patch = create_patch(original, modified);
246/// assert_eq!(patch.to_string(), expected);
247/// ```
248pub fn create_patch<'a>(original: &'a str, modified: &'a str) -> Patch<'a, str> {
249    DiffOptions::default().create_patch(original, modified)
250}
251
252/// Create a patch between two potentially non-utf8 texts
253pub fn create_patch_bytes<'a>(original: &'a [u8], modified: &'a [u8]) -> Patch<'a, [u8]> {
254    DiffOptions::default().create_patch_bytes(original, modified)
255}
256
257fn to_hunks<'a, T: ?Sized>(
258    lines1: &[&'a T],
259    lines2: &[&'a T],
260    solution: &[DiffRange<[Token]>],
261    context_len: usize,
262) -> Vec<Hunk<'a, T>> {
263    let edit_script = build_edit_script(solution);
264
265    let mut hunks = Vec::new();
266
267    let mut idx = 0;
268    while let Some(mut script) = edit_script.get(idx) {
269        let start1 = script.old.start.saturating_sub(context_len);
270        let start2 = script.new.start.saturating_sub(context_len);
271
272        let (mut end1, mut end2) = calc_end(
273            context_len,
274            lines1.len(),
275            lines2.len(),
276            script.old.end,
277            script.new.end,
278        );
279
280        let mut lines = Vec::new();
281
282        // Pre-context
283        for line in lines2.get(start2..script.new.start).into_iter().flatten() {
284            lines.push(Line::Context(*line));
285        }
286
287        loop {
288            // Delete lines from text1
289            for line in lines1.get(script.old.clone()).into_iter().flatten() {
290                lines.push(Line::Delete(*line));
291            }
292
293            // Insert lines from text2
294            for line in lines2.get(script.new.clone()).into_iter().flatten() {
295                lines.push(Line::Insert(*line));
296            }
297
298            if let Some(s) = edit_script.get(idx + 1) {
299                // Check to see if we can merge the hunks
300                let start1_next =
301                    cmp::min(s.old.start, lines1.len() - 1).saturating_sub(context_len);
302                if start1_next < end1 {
303                    // Context lines between hunks
304                    for (_i1, i2) in (script.old.end..s.old.start).zip(script.new.end..s.new.start)
305                    {
306                        if let Some(line) = lines2.get(i2) {
307                            lines.push(Line::Context(*line));
308                        }
309                    }
310
311                    // Calc the new end
312                    let (e1, e2) = calc_end(
313                        context_len,
314                        lines1.len(),
315                        lines2.len(),
316                        s.old.end,
317                        s.new.end,
318                    );
319
320                    end1 = e1;
321                    end2 = e2;
322                    script = s;
323                    idx += 1;
324                    continue;
325                }
326            }
327
328            break;
329        }
330
331        // Post-context
332        for line in lines2.get(script.new.end..end2).into_iter().flatten() {
333            lines.push(Line::Context(*line));
334        }
335
336        let len1 = end1 - start1;
337        let old_range = HunkRange::new(if len1 > 0 { start1 + 1 } else { start1 }, len1);
338
339        let len2 = end2 - start2;
340        let new_range = HunkRange::new(if len2 > 0 { start2 + 1 } else { start2 }, len2);
341
342        hunks.push(Hunk::new(old_range, new_range, None, lines));
343        idx += 1;
344    }
345
346    hunks
347}
348
349fn calc_end(
350    context_len: usize,
351    text1_len: usize,
352    text2_len: usize,
353    script1_end: usize,
354    script2_end: usize,
355) -> (usize, usize) {
356    let post_context_len = cmp::min(
357        context_len,
358        cmp::min(
359            text1_len.saturating_sub(script1_end),
360            text2_len.saturating_sub(script2_end),
361        ),
362    );
363
364    let end1 = script1_end + post_context_len;
365    let end2 = script2_end + post_context_len;
366
367    (end1, end2)
368}
369
370#[derive(Debug)]
371struct EditRange {
372    old: ops::Range<usize>,
373    new: ops::Range<usize>,
374}
375
376impl EditRange {
377    fn new(old: ops::Range<usize>, new: ops::Range<usize>) -> Self {
378        Self { old, new }
379    }
380}
381
382fn build_edit_script<T>(solution: &[DiffRange<[T]>]) -> Vec<EditRange> {
383    let mut idx_a = 0;
384    let mut idx_b = 0;
385
386    let mut edit_script: Vec<EditRange> = Vec::new();
387    let mut script = None;
388
389    for diff in solution {
390        match diff {
391            DiffRange::Equal(range1, range2) => {
392                idx_a += range1.len();
393                idx_b += range2.len();
394                if let Some(script) = script.take() {
395                    edit_script.push(script);
396                }
397            }
398            DiffRange::Delete(range) => {
399                match script {
400                    Some(ref mut s) => s.old.end += range.len(),
401                    None => {
402                        script = Some(EditRange::new(idx_a..idx_a + range.len(), idx_b..idx_b));
403                    }
404                }
405                idx_a += range.len();
406            }
407            DiffRange::Insert(range) => {
408                match script {
409                    Some(ref mut s) => s.new.end += range.len(),
410                    None => {
411                        script = Some(EditRange::new(idx_a..idx_a, idx_b..idx_b + range.len()));
412                    }
413                }
414                idx_b += range.len();
415            }
416        }
417    }
418
419    if let Some(script) = script.take() {
420        edit_script.push(script);
421    }
422
423    edit_script
424}
425
426#[cfg(test)]
427mod test {
428    use super::DiffOptions;
429
430    #[test]
431    fn set_original_and_modified_filenames() {
432        let original = "\
433I am afraid, however, that all I have known - that my story - will be forgotten.
434I am afraid for the world that is to come.
435Afraid that my plans will fail.
436Afraid of a doom worse than the Deepness.
437";
438        let modified = "\
439I am afraid, however, that all I have known - that my story - will be forgotten.
440I am afraid for the world that is to come.
441Afraid that Alendi will fail.
442Afraid of a doom brought by the Deepness.
443";
444        let expected = "\
445--- the old version
446+++ the better version
447@@ -1,4 +1,4 @@
448 I am afraid, however, that all I have known - that my story - will be forgotten.
449 I am afraid for the world that is to come.
450-Afraid that my plans will fail.
451-Afraid of a doom worse than the Deepness.
452+Afraid that Alendi will fail.
453+Afraid of a doom brought by the Deepness.
454";
455
456        let patch = DiffOptions::new()
457            .set_original_filename("the old version")
458            .set_modified_filename("the better version")
459            .create_patch(original, modified);
460
461        assert_eq!(patch.to_string(), expected);
462    }
463}