diffy_fork_filenames/diff/
mod.rs

1use crate::{
2    patch::{Hunk, HunkRange, Line, Patch},
3    range::{DiffRange, SliceLike},
4    utils::Classifier,
5};
6use std::{cmp, ops};
7
8mod cleanup;
9mod myers;
10
11#[cfg(test)]
12mod tests;
13
14// TODO determine if this should be exposed in the public API
15#[allow(dead_code)]
16#[derive(Debug, PartialEq, Eq)]
17enum Diff<'a, T: ?Sized> {
18    Equal(&'a T),
19    Delete(&'a T),
20    Insert(&'a T),
21}
22
23impl<T: ?Sized> Copy for Diff<'_, T> {}
24
25impl<T: ?Sized> Clone for Diff<'_, T> {
26    fn clone(&self) -> Self {
27        *self
28    }
29}
30
31impl<'a, T> From<DiffRange<'a, 'a, T>> for Diff<'a, T>
32where
33    T: ?Sized + SliceLike,
34{
35    fn from(diff: DiffRange<'a, 'a, T>) -> Self {
36        match diff {
37            DiffRange::Equal(range, _) => Diff::Equal(range.as_slice()),
38            DiffRange::Delete(range) => Diff::Delete(range.as_slice()),
39            DiffRange::Insert(range) => Diff::Insert(range.as_slice()),
40        }
41    }
42}
43
44/// A collection of options for modifying the way a diff is performed
45#[derive(Debug)]
46pub struct DiffOptions {
47    compact: bool,
48    context_len: usize,
49}
50
51impl DiffOptions {
52    /// Construct a new `DiffOptions` with default settings
53    ///
54    /// ## Defaults
55    /// * context_len = 3
56    pub fn new() -> Self {
57        Self {
58            compact: true,
59            context_len: 3,
60        }
61    }
62
63    /// Set the number of context lines that should be used when producing a patch
64    pub fn set_context_len(&mut self, context_len: usize) -> &mut Self {
65        self.context_len = context_len;
66        self
67    }
68
69    /// Enable/Disable diff compaction. Compaction is a post-processing step which attempts to
70    /// produce a prettier diff by reducing the number of edited blocks by shifting and merging
71    /// edit blocks.
72    // TODO determine if this should be exposed in the public API
73    #[allow(dead_code)]
74    fn set_compact(&mut self, compact: bool) -> &mut Self {
75        self.compact = compact;
76        self
77    }
78
79    // TODO determine if this should be exposed in the public API
80    #[allow(dead_code)]
81    fn diff<'a>(&self, original: &'a str, modified: &'a str) -> Vec<Diff<'a, str>> {
82        let solution = myers::diff(original.as_bytes(), modified.as_bytes());
83
84        let mut solution = solution
85            .into_iter()
86            .map(|diff_range| diff_range.to_str(original, modified))
87            .collect();
88
89        if self.compact {
90            cleanup::compact(&mut solution);
91        }
92
93        solution.into_iter().map(Diff::from).collect()
94    }
95
96    /// Produce a Patch between two texts based on the configured options
97    pub fn create_patch<'a>(&self, original: &'a str, modified: &'a str) -> Patch<'a, str> {
98        self.construct_patch(original, modified, "original", "modified")
99    }
100
101    /// Produce a Patch between two texts from files based on the configured options
102    pub fn create_file_patch<'a>(&self, original: &'a str, modified: &'a str, original_filename: &'a str, modified_filename: &'a str) -> Patch<'a, str> {
103        self.construct_patch(original, modified, original_filename, modified_filename)
104    }
105
106    /// Create a patch between two potentially non-utf8 texts
107    pub fn create_patch_bytes<'a>(
108        &self,
109        original: &'a [u8],
110        modified: &'a [u8],
111    ) -> Patch<'a, [u8]> {
112        let mut classifier = Classifier::default();
113        let (old_lines, old_ids) = classifier.classify_lines(original);
114        let (new_lines, new_ids) = classifier.classify_lines(modified);
115
116        let solution = self.diff_slice(&old_ids, &new_ids);
117
118        let hunks = to_hunks(&old_lines, &new_lines, &solution, self.context_len);
119        Patch::new(Some(&b"original"[..]), Some(&b"modified"[..]), hunks)
120    }
121
122    pub(crate) fn diff_slice<'a, T: PartialEq>(
123        &self,
124        old: &'a [T],
125        new: &'a [T],
126    ) -> Vec<DiffRange<'a, 'a, [T]>> {
127        let mut solution = myers::diff(old, new);
128
129        if self.compact {
130            cleanup::compact(&mut solution);
131        }
132
133        solution
134    }
135
136    fn construct_patch<'a>(&self, original: &'a str, modified: &'a str, original_filename: &'a str, modified_filename: &'a str) -> Patch<'a, str> {
137        let mut classifier = Classifier::default();
138        let (old_lines, old_ids) = classifier.classify_lines(original);
139        let (new_lines, new_ids) = classifier.classify_lines(modified);
140
141        let solution = self.diff_slice(&old_ids, &new_ids);
142
143        let hunks = to_hunks(&old_lines, &new_lines, &solution, self.context_len);
144        Patch::new(Some(original_filename), Some(modified_filename), hunks)
145    }
146}
147
148impl Default for DiffOptions {
149    fn default() -> Self {
150        Self::new()
151    }
152}
153
154// TODO determine if this should be exposed in the public API
155#[allow(dead_code)]
156fn diff<'a>(original: &'a str, modified: &'a str) -> Vec<Diff<'a, str>> {
157    DiffOptions::default().diff(original, modified)
158}
159
160/// Create a patch between two texts.
161///
162/// ```
163/// # use diffy::create_patch;
164/// let original = "\
165/// I am afraid, however, that all I have known - that my story - will be forgotten.
166/// I am afraid for the world that is to come.
167/// Afraid that my plans will fail.
168/// Afraid of a doom worse than the Deepness.
169/// ";
170///
171/// let modified = "\
172/// I am afraid, however, that all I have known - that my story - will be forgotten.
173/// I am afraid for the world that is to come.
174/// Afraid that Alendi will fail.
175/// Afraid of a doom brought by the Deepness.
176/// ";
177///
178/// let expected = "\
179/// --- original
180/// +++ modified
181/// @@ -1,4 +1,4 @@
182///  I am afraid, however, that all I have known - that my story - will be forgotten.
183///  I am afraid for the world that is to come.
184/// -Afraid that my plans will fail.
185/// -Afraid of a doom worse than the Deepness.
186/// +Afraid that Alendi will fail.
187/// +Afraid of a doom brought by the Deepness.
188/// ";
189///
190/// let patch = create_patch(original, modified);
191/// assert_eq!(patch.to_string(), expected);
192/// ```
193pub fn create_patch<'a>(original: &'a str, modified: &'a str) -> Patch<'a, str> {
194    DiffOptions::default().create_patch(original, modified)
195}
196
197pub fn create_file_patch<'a>(original: &'a str, modified: &'a str, original_filename: &'a str, modified_filename: &'a str) -> Patch<'a, str> {
198    DiffOptions::default().create_file_patch(original, modified, original_filename, modified_filename)
199}
200
201/// Create a patch between two potentially non-utf8 texts
202pub fn create_patch_bytes<'a>(original: &'a [u8], modified: &'a [u8]) -> Patch<'a, [u8]> {
203    DiffOptions::default().create_patch_bytes(original, modified)
204}
205
206fn to_hunks<'a, T: ?Sized>(
207    lines1: &[&'a T],
208    lines2: &[&'a T],
209    solution: &[DiffRange<[u64]>],
210    context_len: usize,
211) -> Vec<Hunk<'a, T>> {
212    let edit_script = build_edit_script(solution);
213
214    let mut hunks = Vec::new();
215
216    let mut idx = 0;
217    while let Some(mut script) = edit_script.get(idx) {
218        let start1 = script.old.start.saturating_sub(context_len);
219        let start2 = script.new.start.saturating_sub(context_len);
220
221        let (mut end1, mut end2) = calc_end(
222            context_len,
223            lines1.len(),
224            lines2.len(),
225            script.old.end,
226            script.new.end,
227        );
228
229        let mut lines = Vec::new();
230
231        // Pre-context
232        for line in lines2.get(start2..script.new.start).into_iter().flatten() {
233            lines.push(Line::Context(*line));
234        }
235
236        loop {
237            // Delete lines from text1
238            for line in lines1.get(script.old.clone()).into_iter().flatten() {
239                lines.push(Line::Delete(*line));
240            }
241
242            // Insert lines from text2
243            for line in lines2.get(script.new.clone()).into_iter().flatten() {
244                lines.push(Line::Insert(*line));
245            }
246
247            if let Some(s) = edit_script.get(idx + 1) {
248                // Check to see if we can merge the hunks
249                let start1_next =
250                    cmp::min(s.old.start, lines1.len() - 1).saturating_sub(context_len);
251                if start1_next < end1 {
252                    // Context lines between hunks
253                    for (_i1, i2) in (script.old.end..s.old.start).zip(script.new.end..s.new.start)
254                    {
255                        if let Some(line) = lines2.get(i2) {
256                            lines.push(Line::Context(*line));
257                        }
258                    }
259
260                    // Calc the new end
261                    let (e1, e2) = calc_end(
262                        context_len,
263                        lines1.len(),
264                        lines2.len(),
265                        s.old.end,
266                        s.new.end,
267                    );
268
269                    end1 = e1;
270                    end2 = e2;
271                    script = s;
272                    idx += 1;
273                    continue;
274                }
275            }
276
277            break;
278        }
279
280        // Post-context
281        for line in lines2.get(script.new.end..end2).into_iter().flatten() {
282            lines.push(Line::Context(*line));
283        }
284
285        let len1 = end1 - start1;
286        let old_range = HunkRange::new(if len1 > 0 { start1 + 1 } else { start1 }, len1);
287
288        let len2 = end2 - start2;
289        let new_range = HunkRange::new(if len2 > 0 { start2 + 1 } else { start2 }, len2);
290
291        hunks.push(Hunk::new(old_range, new_range, None, lines));
292        idx += 1;
293    }
294
295    hunks
296}
297
298fn calc_end(
299    context_len: usize,
300    text1_len: usize,
301    text2_len: usize,
302    script1_end: usize,
303    script2_end: usize,
304) -> (usize, usize) {
305    let post_context_len = cmp::min(
306        context_len,
307        cmp::min(
308            text1_len.saturating_sub(script1_end),
309            text2_len.saturating_sub(script2_end),
310        ),
311    );
312
313    let end1 = script1_end + post_context_len;
314    let end2 = script2_end + post_context_len;
315
316    (end1, end2)
317}
318
319#[derive(Debug)]
320struct EditRange {
321    old: ops::Range<usize>,
322    new: ops::Range<usize>,
323}
324
325impl EditRange {
326    fn new(old: ops::Range<usize>, new: ops::Range<usize>) -> Self {
327        Self { old, new }
328    }
329}
330
331fn build_edit_script<T>(solution: &[DiffRange<[T]>]) -> Vec<EditRange> {
332    let mut idx_a = 0;
333    let mut idx_b = 0;
334
335    let mut edit_script: Vec<EditRange> = Vec::new();
336    let mut script = None;
337
338    for diff in solution {
339        match diff {
340            DiffRange::Equal(range1, range2) => {
341                idx_a += range1.len();
342                idx_b += range2.len();
343                if let Some(script) = script.take() {
344                    edit_script.push(script);
345                }
346            }
347            DiffRange::Delete(range) => {
348                match &mut script {
349                    Some(s) => s.old.end += range.len(),
350                    None => {
351                        script = Some(EditRange::new(idx_a..idx_a + range.len(), idx_b..idx_b));
352                    }
353                }
354                idx_a += range.len();
355            }
356            DiffRange::Insert(range) => {
357                match &mut script {
358                    Some(s) => s.new.end += range.len(),
359                    None => {
360                        script = Some(EditRange::new(idx_a..idx_a, idx_b..idx_b + range.len()));
361                    }
362                }
363                idx_b += range.len();
364            }
365        }
366    }
367
368    if let Some(script) = script.take() {
369        edit_script.push(script);
370    }
371
372    edit_script
373}