jujube_lib/
trees.rs

1// Copyright 2020 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use crate::files;
16use crate::files::MergeResult;
17use crate::matchers::Matcher;
18use crate::repo_path::{
19    DirRepoPath, DirRepoPathComponent, FileRepoPath, FileRepoPathComponent, RepoPathJoin,
20};
21use crate::store::{Conflict, ConflictPart, StoreError, TreeId, TreeValue};
22use crate::store_wrapper::StoreWrapper;
23use crate::tree::Tree;
24use std::cmp::Ordering;
25
26#[derive(Debug, PartialEq, Eq, Clone)]
27pub enum Diff<T> {
28    Modified(T, T),
29    Added(T),
30    Removed(T),
31}
32
33impl<T> Diff<T> {
34    pub fn as_options(&self) -> (Option<&T>, Option<&T>) {
35        match self {
36            Diff::Modified(left, right) => (Some(left), Some(right)),
37            Diff::Added(right) => (None, Some(right)),
38            Diff::Removed(left) => (Some(left), None),
39        }
40    }
41}
42
43pub type TreeValueDiff<'a> = Diff<&'a TreeValue>;
44
45fn diff_entries<'a, E>(
46    tree1: &'a Tree,
47    tree2: &'a Tree,
48    callback: &mut impl FnMut(&'a str, TreeValueDiff<'a>) -> Result<(), E>,
49) -> Result<(), E> {
50    let mut it1 = tree1.entries_non_recursive();
51    let mut it2 = tree2.entries_non_recursive();
52    let mut entry1 = it1.next();
53    let mut entry2 = it2.next();
54    loop {
55        let name: &'a str;
56        let mut value_before: Option<&'a TreeValue> = None;
57        let mut value_after: Option<&'a TreeValue> = None;
58        match (&entry1, &entry2) {
59            (Some(before), Some(after)) => {
60                match before.name().cmp(after.name()) {
61                    Ordering::Less => {
62                        // entry removed
63                        name = before.name();
64                        value_before = Some(before.value());
65                    }
66                    Ordering::Greater => {
67                        // entry added
68                        name = after.name();
69                        value_after = Some(after.value());
70                    }
71                    Ordering::Equal => {
72                        // entry modified
73                        name = before.name();
74                        value_before = Some(before.value());
75                        value_after = Some(after.value());
76                    }
77                }
78            }
79            (Some(before), None) => {
80                // second iterator exhausted
81                name = before.name();
82                value_before = Some(before.value());
83            }
84            (None, Some(after)) => {
85                // first iterator exhausted
86                name = after.name();
87                value_after = Some(after.value());
88            }
89            (None, None) => {
90                // both iterators exhausted
91                break;
92            }
93        }
94
95        match (value_before, value_after) {
96            (Some(before), Some(after)) => {
97                if before != after {
98                    callback(name, TreeValueDiff::Modified(before, after))?;
99                }
100                entry1 = it1.next();
101                entry2 = it2.next();
102            }
103            (Some(before), None) => {
104                callback(name, TreeValueDiff::Removed(before))?;
105                entry1 = it1.next();
106            }
107            (None, Some(after)) => {
108                callback(name, TreeValueDiff::Added(after))?;
109                entry2 = it2.next();
110            }
111            (None, None) => {
112                panic!("should have been handled above");
113            }
114        }
115    }
116    Ok(())
117}
118
119pub fn recursive_tree_diff<M>(
120    root1: Tree,
121    root2: Tree,
122    matcher: &M,
123    callback: &mut impl FnMut(&FileRepoPath, TreeValueDiff),
124) where
125    M: Matcher,
126{
127    internal_recursive_tree_diff(vec![(DirRepoPath::root(), root1, root2)], matcher, callback)
128}
129
130fn internal_recursive_tree_diff<M>(
131    work: Vec<(DirRepoPath, Tree, Tree)>,
132    _matcher: &M,
133    callback: &mut impl FnMut(&FileRepoPath, TreeValueDiff),
134) where
135    M: Matcher,
136{
137    let mut new_work = Vec::new();
138    // Diffs for which to invoke the callback after having visited subtrees. This is
139    // used for making sure that when a directory gets replaced by a file, we
140    // call the callback for the addition of the file after we call the callback
141    // for removing files in the directory.
142    let mut late_file_diffs = Vec::new();
143    for (dir, tree1, tree2) in &work {
144        diff_entries(tree1, tree2, &mut |name,
145                                         diff: TreeValueDiff|
146         -> Result<(), ()> {
147            let file_path = dir.join(&FileRepoPathComponent::from(name));
148            let subdir = DirRepoPathComponent::from(name);
149            let subdir_path = dir.join(&subdir);
150            // TODO: simplify this mess
151            match diff {
152                TreeValueDiff::Modified(TreeValue::Tree(id_before), TreeValue::Tree(id_after)) => {
153                    new_work.push((
154                        subdir_path,
155                        tree1.known_sub_tree(&subdir, &id_before),
156                        tree2.known_sub_tree(&subdir, &id_after),
157                    ));
158                }
159                TreeValueDiff::Modified(TreeValue::Tree(id_before), file_after) => {
160                    new_work.push((
161                        subdir_path.clone(),
162                        tree1.known_sub_tree(&subdir, &id_before),
163                        Tree::null(tree2.store().clone(), subdir_path),
164                    ));
165                    late_file_diffs.push((file_path, TreeValueDiff::Added(file_after)));
166                }
167                TreeValueDiff::Modified(file_before, TreeValue::Tree(id_after)) => {
168                    new_work.push((
169                        subdir_path.clone(),
170                        Tree::null(tree1.store().clone(), subdir_path),
171                        tree2.known_sub_tree(&subdir, &id_after),
172                    ));
173                    callback(&file_path, TreeValueDiff::Removed(file_before));
174                }
175                TreeValueDiff::Modified(_, _) => {
176                    callback(&file_path, diff);
177                }
178                TreeValueDiff::Added(TreeValue::Tree(id_after)) => {
179                    new_work.push((
180                        subdir_path.clone(),
181                        Tree::null(tree1.store().clone(), subdir_path),
182                        tree2.known_sub_tree(&subdir, &id_after),
183                    ));
184                }
185                TreeValueDiff::Added(_) => {
186                    callback(&file_path, diff);
187                }
188                TreeValueDiff::Removed(TreeValue::Tree(id_before)) => {
189                    new_work.push((
190                        subdir_path.clone(),
191                        tree1.known_sub_tree(&subdir, &id_before),
192                        Tree::null(tree2.store().clone(), subdir_path),
193                    ));
194                }
195                TreeValueDiff::Removed(_) => {
196                    callback(&file_path, diff);
197                }
198            };
199            Ok(())
200        })
201        .unwrap(); // safe because the callback always returns Ok
202    }
203    if !new_work.is_empty() {
204        internal_recursive_tree_diff(new_work, _matcher, callback)
205    }
206    for (file_path, diff) in late_file_diffs {
207        callback(&file_path, diff);
208    }
209}
210
211pub fn merge_trees(
212    side1_tree: &Tree,
213    base_tree: &Tree,
214    side2_tree: &Tree,
215) -> Result<TreeId, StoreError> {
216    let store = base_tree.store().as_ref();
217    let dir = base_tree.dir();
218    assert_eq!(side1_tree.dir(), dir);
219    assert_eq!(side2_tree.dir(), dir);
220
221    if base_tree.id() == side1_tree.id() {
222        return Ok(side2_tree.id().clone());
223    }
224    if base_tree.id() == side2_tree.id() || side1_tree.id() == side2_tree.id() {
225        return Ok(side1_tree.id().clone());
226    }
227
228    // Start with a tree identical to side 1 and modify based on changes from base
229    // to side 2.
230    let mut new_tree = side1_tree.data().clone();
231    diff_entries(base_tree, side2_tree, &mut |basename,
232                                              diff|
233     -> Result<(), StoreError> {
234        let maybe_side1 = side1_tree.value(basename);
235        let (maybe_base, maybe_side2) = match diff {
236            TreeValueDiff::Modified(base, side2) => (Some(base), Some(side2)),
237            TreeValueDiff::Added(side2) => (None, Some(side2)),
238            TreeValueDiff::Removed(base) => (Some(base), None),
239        };
240        if maybe_side1 == maybe_base {
241            // side 1 is unchanged: use the value from side 2
242            match maybe_side2 {
243                None => new_tree.remove(basename),
244                Some(side2) => new_tree.set(basename.to_owned(), side2.clone()),
245            };
246        } else if maybe_side1 == maybe_side2 {
247            // Both sides changed in the same way: new_tree already has the
248            // value
249        } else {
250            // The two sides changed in different ways
251            let new_value =
252                merge_tree_value(store, dir, basename, maybe_base, maybe_side1, maybe_side2)?;
253            match new_value {
254                None => new_tree.remove(basename),
255                Some(value) => new_tree.set(basename.to_owned(), value),
256            }
257        }
258        Ok(())
259    })?;
260    store.write_tree(dir, &new_tree)
261}
262
263fn merge_tree_value(
264    store: &StoreWrapper,
265    dir: &DirRepoPath,
266    basename: &str,
267    maybe_base: Option<&TreeValue>,
268    maybe_side1: Option<&TreeValue>,
269    maybe_side2: Option<&TreeValue>,
270) -> Result<Option<TreeValue>, StoreError> {
271    // Resolve non-trivial conflicts:
272    //   * resolve tree conflicts by recursing
273    //   * try to resolve file conflicts by merging the file contents
274    //   * leave other conflicts (e.g. file/dir conflicts, remove/modify conflicts)
275    //     unresolved
276    Ok(match (maybe_base, maybe_side1, maybe_side2) {
277        (
278            Some(TreeValue::Tree(base)),
279            Some(TreeValue::Tree(side1)),
280            Some(TreeValue::Tree(side2)),
281        ) => {
282            let subdir = dir.join(&DirRepoPathComponent::from(basename));
283            let merged_tree_id = merge_trees(
284                &store.get_tree(&subdir, &side1).unwrap(),
285                &store.get_tree(&subdir, &base).unwrap(),
286                &store.get_tree(&subdir, &side2).unwrap(),
287            )?;
288            if &merged_tree_id == store.empty_tree_id() {
289                None
290            } else {
291                Some(TreeValue::Tree(merged_tree_id))
292            }
293        }
294        _ => {
295            let maybe_merged = match (maybe_base, maybe_side1, maybe_side2) {
296                (
297                    Some(TreeValue::Normal {
298                        id: base_id,
299                        executable: base_executable,
300                    }),
301                    Some(TreeValue::Normal {
302                        id: side1_id,
303                        executable: side1_executable,
304                    }),
305                    Some(TreeValue::Normal {
306                        id: side2_id,
307                        executable: side2_executable,
308                    }),
309                ) => {
310                    let executable = if base_executable == side1_executable {
311                        *side2_executable
312                    } else if base_executable == side2_executable {
313                        *side1_executable
314                    } else {
315                        assert_eq!(side1_executable, side2_executable);
316                        *side1_executable
317                    };
318
319                    let filename = dir.join(&FileRepoPathComponent::from(basename));
320                    let mut base_content = vec![];
321                    store
322                        .read_file(&filename, &base_id)?
323                        .read_to_end(&mut base_content)?;
324                    let mut side1_content = vec![];
325                    store
326                        .read_file(&filename, &side1_id)?
327                        .read_to_end(&mut side1_content)?;
328                    let mut side2_content = vec![];
329                    store
330                        .read_file(&filename, &side2_id)?
331                        .read_to_end(&mut side2_content)?;
332
333                    let merge_result = files::merge(&base_content, &side1_content, &side2_content);
334                    match merge_result {
335                        MergeResult::Resolved(merged_content) => {
336                            let id = store.write_file(&filename, &mut merged_content.as_slice())?;
337                            Some(TreeValue::Normal { id, executable })
338                        }
339                        MergeResult::Conflict(_) => None,
340                    }
341                }
342                _ => None,
343            };
344            match maybe_merged {
345                Some(merged) => Some(merged),
346                None => {
347                    let mut conflict = Conflict::default();
348                    if let Some(base) = maybe_base {
349                        conflict.removes.push(ConflictPart {
350                            value: base.clone(),
351                        });
352                    }
353                    if let Some(side1) = maybe_side1 {
354                        conflict.adds.push(ConflictPart {
355                            value: side1.clone(),
356                        });
357                    }
358                    if let Some(side2) = maybe_side2 {
359                        conflict.adds.push(ConflictPart {
360                            value: side2.clone(),
361                        });
362                    }
363                    simplify_conflict(store, &conflict)?
364                }
365            }
366        }
367    })
368}
369
370fn conflict_part_to_conflict(
371    store: &StoreWrapper,
372    part: &ConflictPart,
373) -> Result<Conflict, StoreError> {
374    match &part.value {
375        TreeValue::Conflict(id) => {
376            let conflict = store.read_conflict(id)?;
377            Ok(conflict)
378        }
379        other => Ok(Conflict {
380            removes: vec![],
381            adds: vec![ConflictPart {
382                value: other.clone(),
383            }],
384        }),
385    }
386}
387
388fn simplify_conflict(
389    store: &StoreWrapper,
390    conflict: &Conflict,
391) -> Result<Option<TreeValue>, StoreError> {
392    // Important cases to simplify:
393    //
394    // D
395    // |
396    // B C
397    // |/
398    // A
399    //
400    // 1. rebase C to B, then back to A => there should be no conflict
401    // 2. rebase C to B, then to D => the conflict should not mention B
402    // 3. rebase B to C and D to B', then resolve the conflict in B' and rebase D'
403    // on top =>    the conflict should be between B'', B, and D; it should not
404    // mention the conflict in B'
405
406    // Case 1 above:
407    // After first rebase, the conflict is {+B-A+C}. After rebasing back,
408    // the unsimplified conflict is {+A-B+{+B-A+C}}. Since the
409    // inner conflict is positive, we can simply move it into the outer conflict. We
410    // thus get {+A-B+B-A+C}, which we can then simplify to just C (because {+C} ==
411    // C).
412    //
413    // Case 2 above:
414    // After first rebase, the conflict is {+B-A+C}. After rebasing to D,
415    // the unsimplified conflict is {+D-C+{+B-A+C}}. As in the
416    // previous case, the inner conflict can be moved into the outer one. We then
417    // get {+D-C+B-A+C}. That can be simplified to
418    // {+D+B-A}, which is the desired conflict.
419    //
420    // Case 3 above:
421    // TODO: describe this case
422
423    // First expand any diffs with nested conflicts.
424    let mut new_removes = vec![];
425    let mut new_adds = vec![];
426    for part in &conflict.adds {
427        match part.value {
428            TreeValue::Conflict(_) => {
429                let conflict = conflict_part_to_conflict(&store, part)?;
430                new_removes.extend_from_slice(&conflict.removes);
431                new_adds.extend_from_slice(&conflict.adds);
432            }
433            _ => {
434                new_adds.push(part.clone());
435            }
436        }
437    }
438    for part in &conflict.removes {
439        match part.value {
440            TreeValue::Conflict(_) => {
441                let conflict = conflict_part_to_conflict(&store, part)?;
442                new_removes.extend_from_slice(&conflict.adds);
443                new_adds.extend_from_slice(&conflict.removes);
444            }
445            _ => {
446                new_removes.push(part.clone());
447            }
448        }
449    }
450
451    // Remove pairs of entries that match in the removes and adds.
452    let mut add_index = 0;
453    while add_index < new_adds.len() {
454        let add = &new_adds[add_index];
455        add_index += 1;
456        for (remove_index, remove) in new_removes.iter().enumerate() {
457            if remove.value == add.value {
458                new_removes.remove(remove_index);
459                add_index -= 1;
460                new_adds.remove(add_index);
461                break;
462            }
463        }
464    }
465
466    // TODO: We should probably remove duplicate entries here too. So if we have
467    // {+A+A}, that would become just {+A}. Similarly {+B-A+B} would be just
468    // {+B-A}.
469
470    if new_adds.is_empty() {
471        // If there are no values to add, then the path doesn't exist (so return None to
472        // indicate that).
473        return Ok(None);
474    }
475
476    if new_removes.is_empty() && new_adds.len() == 1 {
477        // A single add means that the current state is that state.
478        return Ok(Some(new_adds[0].value.clone()));
479    }
480
481    let conflict_id = store.write_conflict(&Conflict {
482        adds: new_adds,
483        removes: new_removes,
484    })?;
485    Ok(Some(TreeValue::Conflict(conflict_id)))
486}