jj_lib/
files.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::borrow::Borrow;
18use std::collections::VecDeque;
19use std::iter;
20use std::mem;
21
22use bstr::BStr;
23use bstr::BString;
24use itertools::Itertools as _;
25
26use crate::diff::Diff;
27use crate::diff::DiffHunk;
28use crate::diff::DiffHunkKind;
29use crate::merge::Merge;
30
31/// A diff line which may contain small hunks originating from both sides.
32#[derive(PartialEq, Eq, Clone, Debug)]
33pub struct DiffLine<'a> {
34    pub line_number: DiffLineNumber,
35    pub hunks: Vec<(DiffLineHunkSide, &'a BStr)>,
36}
37
38impl DiffLine<'_> {
39    pub fn has_left_content(&self) -> bool {
40        self.hunks
41            .iter()
42            .any(|&(side, _)| side != DiffLineHunkSide::Right)
43    }
44
45    pub fn has_right_content(&self) -> bool {
46        self.hunks
47            .iter()
48            .any(|&(side, _)| side != DiffLineHunkSide::Left)
49    }
50
51    pub fn is_unmodified(&self) -> bool {
52        self.hunks
53            .iter()
54            .all(|&(side, _)| side == DiffLineHunkSide::Both)
55    }
56
57    fn take(&mut self) -> Self {
58        DiffLine {
59            line_number: self.line_number,
60            hunks: mem::take(&mut self.hunks),
61        }
62    }
63}
64
65#[derive(Clone, Copy, Debug, Eq, PartialEq)]
66pub struct DiffLineNumber {
67    pub left: u32,
68    pub right: u32,
69}
70
71/// Which side a `DiffLine` hunk belongs to?
72#[derive(Clone, Copy, Debug, Eq, PartialEq)]
73pub enum DiffLineHunkSide {
74    Both,
75    Left,
76    Right,
77}
78
79pub struct DiffLineIterator<'a, I> {
80    diff_hunks: iter::Fuse<I>,
81    current_line: DiffLine<'a>,
82    queued_lines: VecDeque<DiffLine<'a>>,
83}
84
85impl<'a, I> DiffLineIterator<'a, I>
86where
87    I: Iterator,
88    I::Item: Borrow<DiffHunk<'a>>,
89{
90    /// Iterates `diff_hunks` by line. Each hunk should have exactly two inputs.
91    pub fn new(diff_hunks: I) -> Self {
92        let line_number = DiffLineNumber { left: 1, right: 1 };
93        Self::with_line_number(diff_hunks, line_number)
94    }
95
96    /// Iterates `diff_hunks` by line. Each hunk should have exactly two inputs.
97    /// Hunk's line numbers start from the given `line_number`.
98    pub fn with_line_number(diff_hunks: I, line_number: DiffLineNumber) -> Self {
99        let current_line = DiffLine {
100            line_number,
101            hunks: vec![],
102        };
103        DiffLineIterator {
104            diff_hunks: diff_hunks.fuse(),
105            current_line,
106            queued_lines: VecDeque::new(),
107        }
108    }
109}
110
111impl<I> DiffLineIterator<'_, I> {
112    /// Returns line number of the next hunk. After all hunks are consumed, this
113    /// returns the next line number if the last hunk ends with newline.
114    pub fn next_line_number(&self) -> DiffLineNumber {
115        let next_line = self.queued_lines.front().unwrap_or(&self.current_line);
116        next_line.line_number
117    }
118}
119
120impl<'a, I> Iterator for DiffLineIterator<'a, I>
121where
122    I: Iterator,
123    I::Item: Borrow<DiffHunk<'a>>,
124{
125    type Item = DiffLine<'a>;
126
127    fn next(&mut self) -> Option<Self::Item> {
128        // TODO: Should we attempt to interpret as utf-8 and otherwise break only at
129        // newlines?
130        while self.queued_lines.is_empty() {
131            let Some(hunk) = self.diff_hunks.next() else {
132                break;
133            };
134            let hunk = hunk.borrow();
135            match hunk.kind {
136                DiffHunkKind::Matching => {
137                    // TODO: add support for unmatched contexts?
138                    debug_assert!(hunk.contents.iter().all_equal());
139                    let text = hunk.contents[0];
140                    let lines = text.split_inclusive(|b| *b == b'\n').map(BStr::new);
141                    for line in lines {
142                        self.current_line.hunks.push((DiffLineHunkSide::Both, line));
143                        if line.ends_with(b"\n") {
144                            self.queued_lines.push_back(self.current_line.take());
145                            self.current_line.line_number.left += 1;
146                            self.current_line.line_number.right += 1;
147                        }
148                    }
149                }
150                DiffHunkKind::Different => {
151                    let [left_text, right_text] = hunk.contents[..]
152                        .try_into()
153                        .expect("hunk should have exactly two inputs");
154                    let left_lines = left_text.split_inclusive(|b| *b == b'\n').map(BStr::new);
155                    for left_line in left_lines {
156                        self.current_line
157                            .hunks
158                            .push((DiffLineHunkSide::Left, left_line));
159                        if left_line.ends_with(b"\n") {
160                            self.queued_lines.push_back(self.current_line.take());
161                            self.current_line.line_number.left += 1;
162                        }
163                    }
164                    let right_lines = right_text.split_inclusive(|b| *b == b'\n').map(BStr::new);
165                    for right_line in right_lines {
166                        self.current_line
167                            .hunks
168                            .push((DiffLineHunkSide::Right, right_line));
169                        if right_line.ends_with(b"\n") {
170                            self.queued_lines.push_back(self.current_line.take());
171                            self.current_line.line_number.right += 1;
172                        }
173                    }
174                }
175            }
176        }
177
178        if let Some(line) = self.queued_lines.pop_front() {
179            return Some(line);
180        }
181
182        if !self.current_line.hunks.is_empty() {
183            return Some(self.current_line.take());
184        }
185
186        None
187    }
188}
189
190#[derive(PartialEq, Eq, Clone, Debug)]
191pub enum MergeResult {
192    Resolved(BString),
193    Conflict(Vec<Merge<BString>>),
194}
195
196pub fn merge<T: AsRef<[u8]>>(slices: &Merge<T>) -> MergeResult {
197    // TODO: Using the first remove as base (first in the inputs) is how it's
198    // usually done for 3-way conflicts. Are there better heuristics when there are
199    // more than 3 parts?
200    let num_diffs = slices.removes().len();
201    let diff_inputs = slices.removes().chain(slices.adds());
202    merge_hunks(&Diff::by_line(diff_inputs), num_diffs)
203}
204
205fn merge_hunks(diff: &Diff, num_diffs: usize) -> MergeResult {
206    let mut resolved_hunk = BString::new(vec![]);
207    let mut merge_hunks: Vec<Merge<BString>> = vec![];
208    for diff_hunk in diff.hunks() {
209        match diff_hunk.kind {
210            DiffHunkKind::Matching => {
211                debug_assert!(diff_hunk.contents.iter().all_equal());
212                resolved_hunk.extend_from_slice(diff_hunk.contents[0]);
213            }
214            DiffHunkKind::Different => {
215                let merge = Merge::from_removes_adds(
216                    diff_hunk.contents[..num_diffs].iter().copied(),
217                    diff_hunk.contents[num_diffs..].iter().copied(),
218                );
219                if let Some(resolved) = merge.resolve_trivial() {
220                    resolved_hunk.extend_from_slice(resolved);
221                } else {
222                    if !resolved_hunk.is_empty() {
223                        merge_hunks.push(Merge::resolved(resolved_hunk));
224                        resolved_hunk = BString::new(vec![]);
225                    }
226                    merge_hunks.push(merge.map(|&s| s.to_owned()));
227                }
228            }
229        }
230    }
231
232    if merge_hunks.is_empty() {
233        MergeResult::Resolved(resolved_hunk)
234    } else {
235        if !resolved_hunk.is_empty() {
236            merge_hunks.push(Merge::resolved(resolved_hunk));
237        }
238        MergeResult::Conflict(merge_hunks)
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    fn hunk(data: &[u8]) -> BString {
247        data.into()
248    }
249
250    fn merge(removes: &[&[u8]], adds: &[&[u8]]) -> MergeResult {
251        super::merge(&Merge::from_removes_adds(removes, adds))
252    }
253
254    #[test]
255    fn test_diff_line_iterator_line_numbers() {
256        let mut line_iter = DiffLineIterator::with_line_number(
257            [DiffHunk::different(["a\nb", "c\nd\n"])].into_iter(),
258            DiffLineNumber { left: 1, right: 10 },
259        );
260        // Nothing queued
261        assert_eq!(
262            line_iter.next_line_number(),
263            DiffLineNumber { left: 1, right: 10 }
264        );
265        assert_eq!(
266            line_iter.next().unwrap(),
267            DiffLine {
268                line_number: DiffLineNumber { left: 1, right: 10 },
269                hunks: vec![(DiffLineHunkSide::Left, "a\n".as_ref())],
270            }
271        );
272        // Multiple lines queued
273        assert_eq!(
274            line_iter.next_line_number(),
275            DiffLineNumber { left: 2, right: 10 }
276        );
277        assert_eq!(
278            line_iter.next().unwrap(),
279            DiffLine {
280                line_number: DiffLineNumber { left: 2, right: 10 },
281                hunks: vec![
282                    (DiffLineHunkSide::Left, "b".as_ref()),
283                    (DiffLineHunkSide::Right, "c\n".as_ref()),
284                ],
285            }
286        );
287        // Single line queued
288        assert_eq!(
289            line_iter.next_line_number(),
290            DiffLineNumber { left: 2, right: 11 }
291        );
292        assert_eq!(
293            line_iter.next().unwrap(),
294            DiffLine {
295                line_number: DiffLineNumber { left: 2, right: 11 },
296                hunks: vec![(DiffLineHunkSide::Right, "d\n".as_ref())],
297            }
298        );
299        // No more lines: left remains 2 as it lacks newline
300        assert_eq!(
301            line_iter.next_line_number(),
302            DiffLineNumber { left: 2, right: 12 }
303        );
304        assert!(line_iter.next().is_none());
305        assert_eq!(
306            line_iter.next_line_number(),
307            DiffLineNumber { left: 2, right: 12 }
308        );
309    }
310
311    #[test]
312    fn test_merge_single_hunk() {
313        // Unchanged and empty on all sides
314        assert_eq!(merge(&[b""], &[b"", b""]), MergeResult::Resolved(hunk(b"")));
315        // Unchanged on all sides
316        assert_eq!(
317            merge(&[b"a"], &[b"a", b"a"]),
318            MergeResult::Resolved(hunk(b"a"))
319        );
320        // One side removed, one side unchanged
321        assert_eq!(
322            merge(&[b"a\n"], &[b"", b"a\n"]),
323            MergeResult::Resolved(hunk(b""))
324        );
325        // One side unchanged, one side removed
326        assert_eq!(
327            merge(&[b"a\n"], &[b"a\n", b""]),
328            MergeResult::Resolved(hunk(b""))
329        );
330        // Both sides removed same line
331        assert_eq!(
332            merge(&[b"a\n"], &[b"", b""]),
333            MergeResult::Resolved(hunk(b""))
334        );
335        // One side modified, one side unchanged
336        assert_eq!(
337            merge(&[b"a"], &[b"a b", b"a"]),
338            MergeResult::Resolved(hunk(b"a b"))
339        );
340        // One side unchanged, one side modified
341        assert_eq!(
342            merge(&[b"a"], &[b"a", b"a b"]),
343            MergeResult::Resolved(hunk(b"a b"))
344        );
345        // All sides added same content
346        assert_eq!(
347            merge(&[b"", b""], &[b"a\n", b"a\n", b"a\n"]),
348            MergeResult::Resolved(hunk(b"a\n"))
349        );
350        // One side modified, two sides added
351        assert_eq!(
352            merge(&[b"a", b""], &[b"b", b"b", b"b"]),
353            MergeResult::Conflict(vec![Merge::from_removes_adds(
354                vec![hunk(b"a"), hunk(b"")],
355                vec![hunk(b"b"), hunk(b"b"), hunk(b"b")]
356            )])
357        );
358        // All sides removed same content
359        assert_eq!(
360            merge(&[b"a\n", b"a\n", b"a\n"], &[b"", b"", b"", b""]),
361            MergeResult::Resolved(hunk(b""))
362        );
363        // One side modified, two sides removed
364        assert_eq!(
365            merge(&[b"a\n", b"a\n"], &[b"b\n", b"", b""]),
366            MergeResult::Conflict(vec![Merge::from_removes_adds(
367                vec![hunk(b"a\n"), hunk(b"a\n")],
368                vec![hunk(b"b\n"), hunk(b""), hunk(b"")]
369            )])
370        );
371        // Three sides made the same change
372        assert_eq!(
373            merge(&[b"a", b"a"], &[b"b", b"b", b"b"]),
374            MergeResult::Resolved(hunk(b"b"))
375        );
376        // One side removed, one side modified
377        assert_eq!(
378            merge(&[b"a\n"], &[b"", b"b\n"]),
379            MergeResult::Conflict(vec![Merge::from_removes_adds(
380                vec![hunk(b"a\n")],
381                vec![hunk(b""), hunk(b"b\n")]
382            )])
383        );
384        // One side modified, one side removed
385        assert_eq!(
386            merge(&[b"a\n"], &[b"b\n", b""]),
387            MergeResult::Conflict(vec![Merge::from_removes_adds(
388                vec![hunk(b"a\n")],
389                vec![hunk(b"b\n"), hunk(b"")]
390            )])
391        );
392        // Two sides modified in different ways
393        assert_eq!(
394            merge(&[b"a"], &[b"b", b"c"]),
395            MergeResult::Conflict(vec![Merge::from_removes_adds(
396                vec![hunk(b"a")],
397                vec![hunk(b"b"), hunk(b"c")]
398            )])
399        );
400        // Two of three sides don't change, third side changes
401        assert_eq!(
402            merge(&[b"a", b"a"], &[b"a", b"", b"a"]),
403            MergeResult::Resolved(hunk(b""))
404        );
405        // One side unchanged, two other sides make the same change
406        assert_eq!(
407            merge(&[b"a", b"a"], &[b"b", b"a", b"b"]),
408            MergeResult::Resolved(hunk(b"b"))
409        );
410        // One side unchanged, two other sides make the different change
411        assert_eq!(
412            merge(&[b"a", b"a"], &[b"b", b"a", b"c"]),
413            MergeResult::Conflict(vec![Merge::from_removes_adds(
414                vec![hunk(b"a"), hunk(b"a")],
415                vec![hunk(b"b"), hunk(b"a"), hunk(b"c")]
416            )])
417        );
418        // Merge of an unresolved conflict and another branch, where the other branch
419        // undid the change from one of the inputs to the unresolved conflict in the
420        // first.
421        assert_eq!(
422            merge(&[b"a", b"b"], &[b"b", b"a", b"c"]),
423            MergeResult::Resolved(hunk(b"c"))
424        );
425        // Merge of an unresolved conflict and another branch.
426        assert_eq!(
427            merge(&[b"a", b"b"], &[b"c", b"d", b"e"]),
428            MergeResult::Conflict(vec![Merge::from_removes_adds(
429                vec![hunk(b"a"), hunk(b"b")],
430                vec![hunk(b"c"), hunk(b"d"), hunk(b"e")]
431            )])
432        );
433        // Two sides made the same change, third side made a different change
434        assert_eq!(
435            merge(&[b"a", b"b"], &[b"c", b"c", b"c"]),
436            MergeResult::Conflict(vec![Merge::from_removes_adds(
437                vec![hunk(b"a"), hunk(b"b")],
438                vec![hunk(b"c"), hunk(b"c"), hunk(b"c")]
439            )])
440        );
441    }
442
443    #[test]
444    fn test_merge_multi_hunk() {
445        // Two sides left one line unchanged, and added conflicting additional lines
446        assert_eq!(
447            merge(&[b"a\n"], &[b"a\nb\n", b"a\nc\n"]),
448            MergeResult::Conflict(vec![
449                Merge::resolved(hunk(b"a\n")),
450                Merge::from_removes_adds(vec![hunk(b"")], vec![hunk(b"b\n"), hunk(b"c\n")])
451            ])
452        );
453        // Two sides changed different lines: no conflict
454        assert_eq!(
455            merge(&[b"a\nb\nc\n"], &[b"a2\nb\nc\n", b"a\nb\nc2\n"]),
456            MergeResult::Resolved(hunk(b"a2\nb\nc2\n"))
457        );
458        // Conflict with non-conflicting lines around
459        assert_eq!(
460            merge(&[b"a\nb\nc\n"], &[b"a\nb1\nc\n", b"a\nb2\nc\n"]),
461            MergeResult::Conflict(vec![
462                Merge::resolved(hunk(b"a\n")),
463                Merge::from_removes_adds(vec![hunk(b"b\n")], vec![hunk(b"b1\n"), hunk(b"b2\n")]),
464                Merge::resolved(hunk(b"c\n"))
465            ])
466        );
467        // One side changes a line and adds a block after. The other side just adds the
468        // same block. You might expect the last block would be deduplicated. However,
469        // the changes in the first side can be parsed as follows:
470        // ```
471        //  a {
472        // -    p
473        // +    q
474        // +}
475        // +
476        // +b {
477        // +    x
478        //  }
479        // ```
480        // Therefore, the first side modifies the block `a { .. }`, and the second side
481        // adds `b { .. }`. Git and Mercurial both duplicate the block in the result.
482        assert_eq!(
483            merge(
484                &[b"\
485a {
486    p
487}
488"],
489                &[
490                    b"\
491a {
492    q
493}
494
495b {
496    x
497}
498",
499                    b"\
500a {
501    p
502}
503
504b {
505    x
506}
507"
508                ],
509            ),
510            MergeResult::Resolved(hunk(
511                b"\
512a {
513    q
514}
515
516b {
517    x
518}
519
520b {
521    x
522}
523"
524            ))
525        );
526    }
527}