jujutsu_lib/
files.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{HashSet, VecDeque};
16use std::fmt::{Debug, Error, Formatter};
17use std::ops::Range;
18
19use itertools::Itertools;
20
21use crate::diff;
22use crate::diff::{Diff, DiffHunk};
23
24#[derive(PartialEq, Eq, Clone, Debug)]
25pub struct DiffLine<'a> {
26    pub left_line_number: u32,
27    pub right_line_number: u32,
28    pub has_left_content: bool,
29    pub has_right_content: bool,
30    pub hunks: Vec<DiffHunk<'a>>,
31}
32
33impl DiffLine<'_> {
34    fn reset_line(&mut self) {
35        self.has_left_content = false;
36        self.has_right_content = false;
37        self.hunks.clear();
38    }
39
40    pub fn is_unmodified(&self) -> bool {
41        self.hunks
42            .iter()
43            .all(|hunk| matches!(hunk, DiffHunk::Matching(_)))
44    }
45}
46
47pub fn diff<'a>(left: &'a [u8], right: &'a [u8]) -> DiffLineIterator<'a> {
48    let diff_hunks = diff::diff(left, right);
49    DiffLineIterator::new(diff_hunks)
50}
51
52pub struct DiffLineIterator<'a> {
53    diff_hunks: Vec<DiffHunk<'a>>,
54    current_pos: usize,
55    current_line: DiffLine<'a>,
56    queued_lines: VecDeque<DiffLine<'a>>,
57}
58
59impl<'a> DiffLineIterator<'a> {
60    fn new(diff_hunks: Vec<DiffHunk<'a>>) -> Self {
61        let current_line = DiffLine {
62            left_line_number: 1,
63            right_line_number: 1,
64            has_left_content: false,
65            has_right_content: false,
66            hunks: vec![],
67        };
68        DiffLineIterator {
69            diff_hunks,
70            current_pos: 0,
71            current_line,
72            queued_lines: VecDeque::new(),
73        }
74    }
75}
76
77impl<'a> Iterator for DiffLineIterator<'a> {
78    type Item = DiffLine<'a>;
79
80    fn next(&mut self) -> Option<Self::Item> {
81        // TODO: Should we attempt to interpret as utf-8 and otherwise break only at
82        // newlines?
83        while self.current_pos < self.diff_hunks.len() && self.queued_lines.is_empty() {
84            let hunk = &self.diff_hunks[self.current_pos];
85            self.current_pos += 1;
86            match hunk {
87                diff::DiffHunk::Matching(text) => {
88                    let lines = text.split_inclusive(|b| *b == b'\n');
89                    for line in lines {
90                        self.current_line.has_left_content = true;
91                        self.current_line.has_right_content = true;
92                        self.current_line.hunks.push(DiffHunk::Matching(line));
93                        if line.ends_with(b"\n") {
94                            self.queued_lines.push_back(self.current_line.clone());
95                            self.current_line.left_line_number += 1;
96                            self.current_line.right_line_number += 1;
97                            self.current_line.reset_line();
98                        }
99                    }
100                }
101                diff::DiffHunk::Different(contents) => {
102                    let left_lines = contents[0].split_inclusive(|b| *b == b'\n');
103                    for left_line in left_lines {
104                        self.current_line.has_left_content = true;
105                        self.current_line
106                            .hunks
107                            .push(DiffHunk::Different(vec![left_line, b""]));
108                        if left_line.ends_with(b"\n") {
109                            self.queued_lines.push_back(self.current_line.clone());
110                            self.current_line.left_line_number += 1;
111                            self.current_line.reset_line();
112                        }
113                    }
114                    let right_lines = contents[1].split_inclusive(|b| *b == b'\n');
115                    for right_line in right_lines {
116                        self.current_line.has_right_content = true;
117                        self.current_line
118                            .hunks
119                            .push(DiffHunk::Different(vec![b"", right_line]));
120                        if right_line.ends_with(b"\n") {
121                            self.queued_lines.push_back(self.current_line.clone());
122                            self.current_line.right_line_number += 1;
123                            self.current_line.reset_line();
124                        }
125                    }
126                }
127            }
128        }
129
130        if let Some(line) = self.queued_lines.pop_front() {
131            return Some(line);
132        }
133
134        if !self.current_line.hunks.is_empty() {
135            let line = self.current_line.clone();
136            self.current_line.reset_line();
137            return Some(line);
138        }
139
140        None
141    }
142}
143
144#[derive(PartialEq, Eq, Clone)]
145pub struct ConflictHunk {
146    pub removes: Vec<Vec<u8>>,
147    pub adds: Vec<Vec<u8>>,
148}
149
150#[derive(PartialEq, Eq, Clone)]
151pub enum MergeHunk {
152    Resolved(Vec<u8>),
153    Conflict(ConflictHunk),
154}
155
156impl Debug for MergeHunk {
157    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
158        match self {
159            MergeHunk::Resolved(data) => f
160                .debug_tuple("Resolved")
161                .field(&String::from_utf8_lossy(data))
162                .finish(),
163            MergeHunk::Conflict(ConflictHunk { removes, adds }) => f
164                .debug_struct("Conflict")
165                .field(
166                    "removes",
167                    &removes
168                        .iter()
169                        .map(|part| String::from_utf8_lossy(part))
170                        .collect_vec(),
171                )
172                .field(
173                    "adds",
174                    &adds
175                        .iter()
176                        .map(|part| String::from_utf8_lossy(part))
177                        .collect_vec(),
178                )
179                .finish(),
180        }
181    }
182}
183
184#[derive(PartialEq, Eq, Clone)]
185pub enum MergeResult {
186    Resolved(Vec<u8>),
187    Conflict(Vec<MergeHunk>),
188}
189
190impl Debug for MergeResult {
191    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
192        match self {
193            MergeResult::Resolved(data) => f
194                .debug_tuple("Resolved")
195                .field(&String::from_utf8_lossy(data))
196                .finish(),
197            MergeResult::Conflict(hunks) => f.debug_tuple("Conflict").field(hunks).finish(),
198        }
199    }
200}
201
202/// A region where the base and two sides match.
203#[derive(Debug, PartialEq, Eq, Clone)]
204struct SyncRegion {
205    base: Range<usize>,
206    left: Range<usize>,
207    right: Range<usize>,
208}
209
210// TODO: Should we require `add.len() == removes.len() + 1`? If that condition
211// is false, it effectively means that we should pretend that there are empty
212// strings in `removes` or `adds` to make it true. Maybe we should have to
213// caller make it explicitly that way.
214pub fn merge(removes: &[&[u8]], adds: &[&[u8]]) -> MergeResult {
215    let num_removes = removes.len();
216    // TODO: Using the first remove as base (first in the inputs) is how it's
217    // usually done for 3-way conflicts. Are there better heuristics when there are
218    // more than 3 parts?
219    let mut diff_inputs = removes.to_vec();
220    diff_inputs.extend(adds);
221
222    let diff = Diff::for_tokenizer(&diff_inputs, &diff::find_line_ranges);
223    let mut resolved_hunk: Vec<u8> = vec![];
224    let mut merge_hunks: Vec<MergeHunk> = vec![];
225    for diff_hunk in diff.hunks() {
226        match diff_hunk {
227            DiffHunk::Matching(content) => {
228                if adds.len() > removes.len() {
229                    resolved_hunk.extend(content);
230                }
231            }
232            DiffHunk::Different(parts) => {
233                let mut removed_parts = parts[..num_removes].to_vec();
234                let mut added_parts = parts[num_removes..].to_vec();
235                // Remove pairs of parts that match in the removes and adds.
236                let mut added_index = 0;
237                while added_index < added_parts.len() {
238                    let added_part = added_parts[added_index];
239                    added_index += 1;
240                    for (removed_index, removed_part) in removed_parts.iter().enumerate() {
241                        if *removed_part == added_part {
242                            added_index -= 1;
243                            added_parts.remove(added_index);
244                            removed_parts.remove(removed_index);
245                            break;
246                        }
247                    }
248                }
249                let distinct_removes: HashSet<&[u8]> = removed_parts.iter().copied().collect();
250                let distinct_adds: HashSet<&[u8]> = added_parts.iter().copied().collect();
251                if removed_parts.is_empty() && added_parts.is_empty() {
252                    // The same content was added and removed, so there's
253                    // nothing left.
254                } else if distinct_removes.is_empty() && distinct_adds.len() == 1 {
255                    // All sides added the same content
256                    resolved_hunk.extend(added_parts[0]);
257                } else if distinct_removes.len() == 1 && distinct_adds.is_empty() {
258                    // All sides removed the same content
259                } else if distinct_removes.len() == 1
260                    && distinct_adds.len() == 1
261                    && added_parts.len() == removed_parts.len() + 1
262                {
263                    // All sides made the same change, and there's a matching extra base to apply it
264                    // to
265                    resolved_hunk.extend(added_parts[0]);
266                } else {
267                    if !resolved_hunk.is_empty() {
268                        merge_hunks.push(MergeHunk::Resolved(resolved_hunk));
269                        resolved_hunk = vec![];
270                    }
271                    // Include the unfiltered lists of removed and added here, so the caller
272                    // knows which part corresponds to which input.
273                    merge_hunks.push(MergeHunk::Conflict(ConflictHunk {
274                        removes: parts[..num_removes]
275                            .iter()
276                            .map(|part| part.to_vec())
277                            .collect_vec(),
278                        adds: parts[num_removes..]
279                            .iter()
280                            .map(|part| part.to_vec())
281                            .collect_vec(),
282                    }));
283                }
284            }
285        }
286    }
287
288    if merge_hunks.is_empty() {
289        MergeResult::Resolved(resolved_hunk)
290    } else {
291        if !resolved_hunk.is_empty() {
292            merge_hunks.push(MergeHunk::Resolved(resolved_hunk));
293        }
294        MergeResult::Conflict(merge_hunks)
295    }
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    #[test]
303    fn test_merge() {
304        // Unchanged and empty on all sides
305        assert_eq!(
306            merge(&[b""], &[b"", b""]),
307            MergeResult::Resolved(b"".to_vec())
308        );
309        // Unchanged on all sides
310        assert_eq!(
311            merge(&[b"a"], &[b"a", b"a"]),
312            MergeResult::Resolved(b"a".to_vec())
313        );
314        // One side removed, one side unchanged
315        assert_eq!(
316            merge(&[b"a\n"], &[b"", b"a\n"]),
317            MergeResult::Resolved(b"".to_vec())
318        );
319        // One side unchanged, one side removed
320        assert_eq!(
321            merge(&[b"a\n"], &[b"a\n", b""]),
322            MergeResult::Resolved(b"".to_vec())
323        );
324        // Both sides removed same line
325        assert_eq!(
326            merge(&[b"a\n"], &[b"", b""]),
327            MergeResult::Resolved(b"".to_vec())
328        );
329        // One side modified, one side unchanged
330        assert_eq!(
331            merge(&[b"a"], &[b"a b", b"a"]),
332            MergeResult::Resolved(b"a b".to_vec())
333        );
334        // One side unchanged, one side modified
335        assert_eq!(
336            merge(&[b"a"], &[b"a", b"a b"]),
337            MergeResult::Resolved(b"a b".to_vec())
338        );
339        // All sides added same content
340        assert_eq!(
341            merge(&[], &[b"a\n", b"a\n", b"a\n"]),
342            MergeResult::Resolved(b"a\n".to_vec())
343        );
344        // One side modified, two sides added
345        assert_eq!(
346            merge(&[b"a"], &[b"b", b"b", b"b"]),
347            MergeResult::Conflict(vec![MergeHunk::Conflict(ConflictHunk {
348                removes: vec![b"a".to_vec()],
349                adds: vec![b"b".to_vec(), b"b".to_vec(), b"b".to_vec()]
350            })])
351        );
352        // All sides removed same content
353        assert_eq!(
354            merge(&[b"a\n", b"a\n", b"a\n"], &[]),
355            MergeResult::Resolved(b"".to_vec())
356        );
357        // One side modified, two sides removed
358        assert_eq!(
359            merge(&[b"a\n", b"a\n", b"a\n"], &[b""]),
360            MergeResult::Conflict(vec![MergeHunk::Conflict(ConflictHunk {
361                removes: vec![b"a\n".to_vec(), b"a\n".to_vec(), b"a\n".to_vec()],
362                adds: vec![b"".to_vec()]
363            })])
364        );
365        // Three sides made the same change
366        assert_eq!(
367            merge(&[b"a", b"a"], &[b"b", b"b", b"b"]),
368            MergeResult::Resolved(b"b".to_vec())
369        );
370        // One side unchanged, one side added
371        assert_eq!(
372            merge(&[b"a\n"], &[b"a\nb\n"]),
373            MergeResult::Conflict(vec![MergeHunk::Conflict(ConflictHunk {
374                removes: vec![b"".to_vec()],
375                adds: vec![b"b\n".to_vec()]
376            })])
377        );
378        // Two sides left one line unchanged, and added conflicting additional lines
379        assert_eq!(
380            merge(&[b"a\n"], &[b"a\nb\n", b"a\nc\n"]),
381            MergeResult::Conflict(vec![
382                MergeHunk::Resolved(b"a\n".to_vec()),
383                MergeHunk::Conflict(ConflictHunk {
384                    removes: vec![b"".to_vec()],
385                    adds: vec![b"b\n".to_vec(), b"c\n".to_vec()]
386                })
387            ])
388        );
389        // One side removed, one side modified
390        assert_eq!(
391            merge(&[b"a\n"], &[b"", b"b\n"]),
392            MergeResult::Conflict(vec![MergeHunk::Conflict(ConflictHunk {
393                removes: vec![b"a\n".to_vec()],
394                adds: vec![b"".to_vec(), b"b\n".to_vec()]
395            })])
396        );
397        // One side modified, one side removed
398        assert_eq!(
399            merge(&[b"a\n"], &[b"b\n", b""]),
400            MergeResult::Conflict(vec![MergeHunk::Conflict(ConflictHunk {
401                removes: vec![b"a\n".to_vec()],
402                adds: vec![b"b\n".to_vec(), b"".to_vec()]
403            })])
404        );
405        // Two sides modified in different ways
406        assert_eq!(
407            merge(&[b"a"], &[b"b", b"c"]),
408            MergeResult::Conflict(vec![MergeHunk::Conflict(ConflictHunk {
409                removes: vec![b"a".to_vec()],
410                adds: vec![b"b".to_vec(), b"c".to_vec()]
411            })])
412        );
413        // Two of three sides don't change, third side changes
414        assert_eq!(
415            merge(&[b"a", b"a"], &[b"a", b"", b"a"]),
416            MergeResult::Resolved(b"".to_vec())
417        );
418        // One side unchanged, two other sides make the same change
419        assert_eq!(
420            merge(&[b"a", b"a"], &[b"", b"a", b""]),
421            MergeResult::Resolved(b"".to_vec())
422        );
423        // One side unchanged, two other sides make the different change
424        assert_eq!(
425            merge(&[b"a", b"a"], &[b"b", b"a", b"c"]),
426            MergeResult::Conflict(vec![MergeHunk::Conflict(ConflictHunk {
427                removes: vec![b"a".to_vec(), b"a".to_vec()],
428                adds: vec![b"b".to_vec(), b"a".to_vec(), b"c".to_vec()]
429            })])
430        );
431        // Merge of an unresolved conflict and another branch, where the other branch
432        // undid the change from one of the inputs to the unresolved conflict in the
433        // first.
434        assert_eq!(
435            merge(&[b"a", b"b"], &[b"b", b"a", b"c"]),
436            MergeResult::Resolved(b"c".to_vec())
437        );
438        // Merge of an unresolved conflict and another branch.
439        assert_eq!(
440            merge(&[b"a", b"b"], &[b"c", b"d", b"e"]),
441            MergeResult::Conflict(vec![MergeHunk::Conflict(ConflictHunk {
442                removes: vec![b"a".to_vec(), b"b".to_vec()],
443                adds: vec![b"c".to_vec(), b"d".to_vec(), b"e".to_vec()]
444            })])
445        );
446    }
447}