Skip to main content

grit_lib/
merge_trees.rs

1//! Rename-aware three-way tree merge for cherry-pick / revert style merges.
2//!
3//! Flattens trees to path → [`IndexEntry`] maps, aligns paths using rename detection
4//! between base↔ours and base↔theirs (same idea as Git's merge-ort rename paths), then
5//! runs path-by-path three-way rules with optional content merge.
6
7use std::collections::{BTreeMap, HashMap, HashSet};
8
9use crate::diff::{detect_renames, diff_trees, DiffStatus};
10use crate::index::{Index, IndexEntry};
11use crate::merge_file::{merge, ConflictStyle, MergeFavor, MergeInput};
12use crate::objects::{parse_tree, ObjectId, ObjectKind};
13use crate::odb::Odb;
14use crate::repo::Repository;
15use crate::write_tree::write_tree_from_index;
16
17/// Result of merging three trees with optional conflict-marker blobs for checkout.
18#[derive(Debug)]
19pub struct TreeMergeOutput {
20    /// Merged index (may include unmerged stages).
21    pub index: Index,
22    /// Conflict-marker blob OIDs keyed by path (stage-2 path bytes).
23    pub conflict_content: BTreeMap<Vec<u8>, ObjectId>,
24}
25
26#[derive(Clone, Copy, Debug, Default)]
27pub struct WhitespaceMergeOptions {
28    pub ignore_all_space: bool,
29    pub ignore_space_change: bool,
30    pub ignore_space_at_eol: bool,
31    pub ignore_cr_at_eol: bool,
32}
33
34/// How to label the "theirs" side in textual conflict markers.
35#[derive(Clone, Copy, Debug)]
36pub enum TheirsConflictLabel<'a> {
37    /// Use the UTF-8 file path (matches cherry-pick / revert).
38    PathUtf8,
39    /// Fixed label (e.g. `local` for `git checkout -m`).
40    Fixed(&'a str),
41}
42
43/// Labels and marker style for conflict output during tree merges.
44#[derive(Clone, Copy, Debug)]
45pub struct TreeMergeConflictPresentation<'a> {
46    /// Label after `<<<<<<<` (Git: ours side name).
47    pub label_ours: &'a str,
48    /// Label after `>>>>>>>` (path-based or fixed).
49    pub label_theirs: TheirsConflictLabel<'a>,
50    /// Label after `|||||||` in diff3 output.
51    pub label_base: &'a str,
52    /// Two-way vs diff3 markers.
53    pub style: ConflictStyle,
54    /// `git checkout -m`: always leave unmerged index entries when ours ≠ theirs.
55    pub checkout_merge: bool,
56}
57
58impl Default for TreeMergeConflictPresentation<'_> {
59    fn default() -> Self {
60        Self {
61            label_ours: "HEAD",
62            label_theirs: TheirsConflictLabel::PathUtf8,
63            label_base: "merged common ancestors",
64            style: ConflictStyle::Merge,
65            checkout_merge: false,
66        }
67    }
68}
69
70/// Merge `base` + `ours` + `their` trees (by OID) into an index using rename detection.
71///
72/// Paths in the result follow the **ours** tree naming. Rename detection uses a 50%
73/// similarity threshold, matching Git's default rename detection during merges.
74///
75/// # Parameters
76///
77/// - `base_tree` — common ancestor tree (parent tree for cherry-pick, reverted commit tree for revert).
78/// - `ours_tree` — current branch tree (HEAD during pick/revert).
79/// - `theirs_tree` — tree being applied (picked commit for cherry-pick, parent of reverted commit for revert).
80/// - `favor` / `ws` — merge strategy favour and whitespace options for textual merges.
81/// - `presentation` — conflict marker labels and style (`label_base` is the diff3 "base" name).
82///
83/// # Errors
84///
85/// Returns [`crate::error::Error`] on ODB read failures or corrupt trees.
86pub fn merge_trees_three_way(
87    repo: &Repository,
88    base_tree: ObjectId,
89    ours_tree: ObjectId,
90    theirs_tree: ObjectId,
91    favor: MergeFavor,
92    ws: WhitespaceMergeOptions,
93    presentation: TreeMergeConflictPresentation<'_>,
94) -> crate::error::Result<TreeMergeOutput> {
95    let odb = &repo.odb;
96    let base = tree_to_map(tree_to_index_entries(repo, &base_tree, "")?);
97    let ours = tree_to_map(tree_to_index_entries(repo, &ours_tree, "")?);
98    let theirs = tree_to_map(tree_to_index_entries(repo, &theirs_tree, "")?);
99
100    let ours_old_to_new = rename_pairs_base_to_other(odb, &base_tree, &ours_tree)?;
101    let theirs_pairs = rename_pairs_base_to_other(odb, &base_tree, &theirs_tree)?;
102
103    let mut ours_new_to_old: HashMap<Vec<u8>, Vec<u8>> = HashMap::new();
104    let mut ours_best_by_dest: HashMap<Vec<u8>, (Vec<u8>, u32)> = HashMap::new();
105    for (old, new, score) in &ours_old_to_new {
106        let new_b = new.clone();
107        let should_take = match ours_best_by_dest.get(&new_b) {
108            None => true,
109            Some((_, s)) => *score > *s,
110        };
111        if should_take {
112            ours_best_by_dest.insert(new_b, (old.clone(), *score));
113        }
114    }
115    for (new_path, (old_path, _)) in ours_best_by_dest {
116        ours_new_to_old.insert(new_path, old_path);
117    }
118    let mut theirs_old_to_new: HashMap<Vec<u8>, Vec<u8>> = HashMap::new();
119    let mut best_by_dest: HashMap<Vec<u8>, (Vec<u8>, u32)> = HashMap::new();
120    for (old, new, score) in theirs_pairs {
121        let new_b = new.clone();
122        let should_take = match best_by_dest.get(&new_b) {
123            None => true,
124            Some((_, s)) => score > *s,
125        };
126        if should_take {
127            best_by_dest.insert(new_b, (old.clone(), score));
128        }
129    }
130    for (new_path, (old_path, _)) in best_by_dest {
131        theirs_old_to_new.insert(old_path, new_path);
132    }
133
134    three_way_on_aligned_paths(
135        repo,
136        &base,
137        &ours,
138        &theirs,
139        &ours_new_to_old,
140        &theirs_old_to_new,
141        favor,
142        ws,
143        presentation,
144    )
145}
146
147fn rename_pairs_base_to_other(
148    odb: &Odb,
149    base_tree: &ObjectId,
150    other_tree: &ObjectId,
151) -> crate::error::Result<Vec<(Vec<u8>, Vec<u8>, u32)>> {
152    let mut entries = diff_trees(odb, Some(base_tree), Some(other_tree), "")?;
153    entries = detect_renames(odb, None, entries, 50);
154    let mut out = Vec::new();
155    for e in entries {
156        if e.status != DiffStatus::Renamed {
157            continue;
158        }
159        let Some(old) = e.old_path else {
160            continue;
161        };
162        let Some(new) = e.new_path else {
163            continue;
164        };
165        let score = e.score.unwrap_or(0);
166        out.push((old.into_bytes(), new.into_bytes(), score));
167    }
168    Ok(out)
169}
170
171fn three_way_on_aligned_paths(
172    repo: &Repository,
173    base: &HashMap<Vec<u8>, IndexEntry>,
174    ours: &HashMap<Vec<u8>, IndexEntry>,
175    theirs: &HashMap<Vec<u8>, IndexEntry>,
176    ours_new_to_old: &HashMap<Vec<u8>, Vec<u8>>,
177    theirs_old_to_new: &HashMap<Vec<u8>, Vec<u8>>,
178    favor: MergeFavor,
179    ws: WhitespaceMergeOptions,
180    presentation: TreeMergeConflictPresentation<'_>,
181) -> crate::error::Result<TreeMergeOutput> {
182    let mut out = Index::new();
183    let mut conflict_content = BTreeMap::new();
184    let mut handled_base: HashSet<Vec<u8>> = HashSet::new();
185    let mut handled_theirs: HashSet<Vec<u8>> = HashSet::new();
186
187    for op in sorted_paths(ours.keys()) {
188        let bp = if let Some(old) = ours_new_to_old.get(&op) {
189            Some(old.clone())
190        } else if base.contains_key(&op) {
191            Some(op.clone())
192        } else {
193            None
194        };
195
196        let tp = if let Some(ref bpath) = bp {
197            theirs_old_to_new
198                .get(bpath)
199                .cloned()
200                .unwrap_or_else(|| bpath.clone())
201        } else {
202            op.clone()
203        };
204
205        let b = bp.as_ref().and_then(|p| base.get(p));
206        let o = ours.get(&op);
207        let t = theirs.get(&tp);
208        if t.is_some() {
209            handled_theirs.insert(tp.clone());
210        }
211
212        if let Some(ref p) = bp {
213            handled_base.insert(p.clone());
214        }
215
216        // When our branch still has the file at the same path as the merge base, but the side
217        // being applied renamed that path, the result must use their pathname (matches Git
218        // cherry-pick / merge-ort: e.g. base+HEAD at `file.txt`, picked commit has `renamed.txt`).
219        let out_path = if bp.as_ref().is_some_and(|b| b == &op) && tp != op {
220            tp.clone()
221        } else {
222            op.clone()
223        };
224
225        merge_one_path(
226            repo,
227            &mut out,
228            &mut conflict_content,
229            &out_path,
230            b,
231            o,
232            t,
233            favor,
234            ws,
235            presentation,
236        )?;
237    }
238
239    for bp in sorted_paths(base.keys()) {
240        if handled_base.contains(&bp) {
241            continue;
242        }
243        let tp = theirs_old_to_new
244            .get(&bp)
245            .cloned()
246            .unwrap_or_else(|| bp.clone());
247        let b = base.get(&bp);
248        let o: Option<&IndexEntry> = None;
249        let t = theirs.get(&tp);
250        if t.is_some() {
251            handled_theirs.insert(tp.clone());
252        }
253        merge_one_path(
254            repo,
255            &mut out,
256            &mut conflict_content,
257            &bp,
258            b,
259            o,
260            t,
261            favor,
262            ws,
263            presentation,
264        )?;
265    }
266
267    for tp in sorted_paths(theirs.keys()) {
268        if handled_theirs.contains(&tp) {
269            continue;
270        }
271        let b: Option<&IndexEntry> = None;
272        let o: Option<&IndexEntry> = None;
273        let t = theirs.get(&tp);
274        merge_one_path(
275            repo,
276            &mut out,
277            &mut conflict_content,
278            &tp,
279            b,
280            o,
281            t,
282            favor,
283            ws,
284            presentation,
285        )?;
286    }
287
288    out.sort();
289    Ok(TreeMergeOutput {
290        index: out,
291        conflict_content,
292    })
293}
294
295fn sorted_paths<'a>(keys: impl Iterator<Item = &'a Vec<u8>>) -> Vec<Vec<u8>> {
296    let mut v: Vec<Vec<u8>> = keys.cloned().collect();
297    v.sort();
298    v
299}
300
301fn merge_one_path(
302    repo: &Repository,
303    index: &mut Index,
304    conflict_content: &mut BTreeMap<Vec<u8>, ObjectId>,
305    out_path: &[u8],
306    b: Option<&IndexEntry>,
307    o: Option<&IndexEntry>,
308    t: Option<&IndexEntry>,
309    favor: MergeFavor,
310    ws: WhitespaceMergeOptions,
311    presentation: TreeMergeConflictPresentation<'_>,
312) -> crate::error::Result<()> {
313    match (b, o, t) {
314        (_, Some(oe), Some(te)) if same_blob(oe, te) => {
315            let mut e = oe.clone();
316            e.path = out_path.to_vec();
317            e.flags = path_len_flags(out_path);
318            index.entries.push(e);
319        }
320        (Some(be), Some(oe), Some(te)) if same_blob(be, oe) => {
321            let mut e = te.clone();
322            e.path = out_path.to_vec();
323            e.flags = path_len_flags(out_path);
324            index.entries.push(e);
325        }
326        (Some(be), Some(oe), Some(te)) if same_blob(be, te) => {
327            let mut e = oe.clone();
328            e.path = out_path.to_vec();
329            e.flags = path_len_flags(out_path);
330            index.entries.push(e);
331        }
332        (Some(be), Some(oe), Some(te))
333            if be.mode == 0o160000 && oe.mode == 0o160000 && te.mode == 0o160000 =>
334        {
335            if same_blob(oe, te) {
336                let mut e = oe.clone();
337                e.path = out_path.to_vec();
338                e.flags = path_len_flags(out_path);
339                index.entries.push(e);
340            } else if same_blob(be, oe) {
341                let mut e = te.clone();
342                e.path = out_path.to_vec();
343                e.flags = path_len_flags(out_path);
344                index.entries.push(e);
345            } else if same_blob(be, te) {
346                let mut e = oe.clone();
347                e.path = out_path.to_vec();
348                e.flags = path_len_flags(out_path);
349                index.entries.push(e);
350            } else {
351                stage_entry(index, out_path, be, 1);
352                stage_entry(index, out_path, oe, 2);
353                stage_entry(index, out_path, te, 3);
354            }
355        }
356        (Some(be), Some(oe), Some(te)) => {
357            content_merge_or_conflict(
358                repo,
359                index,
360                conflict_content,
361                out_path,
362                be,
363                oe,
364                te,
365                favor,
366                ws,
367                presentation,
368            )?;
369        }
370        (None, Some(oe), None) => {
371            let mut e = oe.clone();
372            e.path = out_path.to_vec();
373            e.flags = path_len_flags(out_path);
374            index.entries.push(e);
375        }
376        (None, None, Some(te)) => {
377            let mut e = te.clone();
378            e.path = out_path.to_vec();
379            e.flags = path_len_flags(out_path);
380            index.entries.push(e);
381        }
382        (None, Some(oe), Some(te)) if same_blob(oe, te) => {
383            let mut e = oe.clone();
384            e.path = out_path.to_vec();
385            e.flags = path_len_flags(out_path);
386            index.entries.push(e);
387        }
388        (None, Some(oe), Some(te)) => {
389            stage_entry(index, out_path, oe, 2);
390            stage_entry(index, out_path, te, 3);
391        }
392        (Some(_), None, None) => {}
393        (Some(be), Some(oe), None) if same_blob(be, oe) => {}
394        (Some(be), None, Some(te)) if same_blob(be, te) => {}
395        (Some(be), Some(oe), None) => {
396            stage_entry(index, out_path, be, 1);
397            stage_entry(index, out_path, oe, 2);
398        }
399        (Some(be), None, Some(te)) => {
400            stage_entry(index, out_path, be, 1);
401            stage_entry(index, out_path, te, 3);
402        }
403        (None, None, None) => {}
404    }
405    Ok(())
406}
407
408fn path_len_flags(path: &[u8]) -> u16 {
409    path.len().min(0xFFF) as u16
410}
411
412fn same_blob(a: &IndexEntry, b: &IndexEntry) -> bool {
413    a.oid == b.oid && a.mode == b.mode
414}
415
416fn stage_entry(index: &mut Index, path: &[u8], src: &IndexEntry, stage: u8) {
417    let mut e = src.clone();
418    e.path = path.to_vec();
419    e.flags = path_len_flags(path) | ((stage as u16) << 12);
420    index.entries.push(e);
421}
422
423fn content_merge_or_conflict(
424    repo: &Repository,
425    index: &mut Index,
426    conflict_content: &mut BTreeMap<Vec<u8>, ObjectId>,
427    path: &[u8],
428    base: &IndexEntry,
429    ours: &IndexEntry,
430    theirs: &IndexEntry,
431    favor: MergeFavor,
432    ws: WhitespaceMergeOptions,
433    presentation: TreeMergeConflictPresentation<'_>,
434) -> crate::error::Result<()> {
435    if base.mode == 0o160000 || ours.mode == 0o160000 || theirs.mode == 0o160000 {
436        stage_entry(index, path, base, 1);
437        stage_entry(index, path, ours, 2);
438        stage_entry(index, path, theirs, 3);
439        return Ok(());
440    }
441
442    // `git checkout -m`: Git records unmerged index entries whenever the branch being checked
443    // out and the pre-checkout working tree disagree, even when a line merge would auto-resolve.
444    if presentation.checkout_merge && !same_blob(ours, theirs) {
445        let ours_obj = repo.odb.read(&ours.oid)?;
446        let theirs_obj = repo.odb.read(&theirs.oid)?;
447        let path_label = String::from_utf8_lossy(path);
448        let label_theirs: std::borrow::Cow<'_, str> = match presentation.label_theirs {
449            TheirsConflictLabel::PathUtf8 => path_label.clone(),
450            TheirsConflictLabel::Fixed(s) => std::borrow::Cow::Borrowed(s),
451        };
452        let mut out = Vec::new();
453        out.extend_from_slice(format!("<<<<<<< {}\n", presentation.label_ours).as_bytes());
454        out.extend_from_slice(&ours_obj.data);
455        if !out.ends_with(b"\n") {
456            out.push(b'\n');
457        }
458        out.extend_from_slice(b"=======\n");
459        out.extend_from_slice(&theirs_obj.data);
460        if !out.ends_with(b"\n") {
461            out.push(b'\n');
462        }
463        out.extend_from_slice(format!(">>>>>>> {}\n", label_theirs).as_bytes());
464        let conflict_oid = repo.odb.write(ObjectKind::Blob, &out)?;
465        conflict_content.insert(path.to_vec(), conflict_oid);
466        stage_entry(index, path, base, 1);
467        stage_entry(index, path, ours, 2);
468        stage_entry(index, path, theirs, 3);
469        return Ok(());
470    }
471
472    let base_obj = repo.odb.read(&base.oid)?;
473    let ours_obj = repo.odb.read(&ours.oid)?;
474    let theirs_obj = repo.odb.read(&theirs.oid)?;
475
476    if crate::merge_file::is_binary(&base_obj.data)
477        || crate::merge_file::is_binary(&ours_obj.data)
478        || crate::merge_file::is_binary(&theirs_obj.data)
479    {
480        match favor {
481            MergeFavor::Theirs => {
482                let mut e = theirs.clone();
483                e.path = path.to_vec();
484                e.flags = path_len_flags(path);
485                index.entries.push(e);
486                return Ok(());
487            }
488            MergeFavor::Ours => {
489                let mut e = ours.clone();
490                e.path = path.to_vec();
491                e.flags = path_len_flags(path);
492                index.entries.push(e);
493                return Ok(());
494            }
495            _ => {
496                stage_entry(index, path, base, 1);
497                stage_entry(index, path, ours, 2);
498                stage_entry(index, path, theirs, 3);
499                return Ok(());
500            }
501        }
502    }
503
504    let path_label = String::from_utf8_lossy(path);
505    let label_theirs: std::borrow::Cow<'_, str> = match presentation.label_theirs {
506        TheirsConflictLabel::PathUtf8 => path_label.clone(),
507        TheirsConflictLabel::Fixed(s) => std::borrow::Cow::Borrowed(s),
508    };
509    let input = MergeInput {
510        base: &base_obj.data,
511        ours: &ours_obj.data,
512        theirs: &theirs_obj.data,
513        label_ours: presentation.label_ours,
514        label_base: presentation.label_base,
515        label_theirs: label_theirs.as_ref(),
516        favor,
517        style: presentation.style,
518        marker_size: 7,
519        diff_algorithm: None,
520        ignore_all_space: ws.ignore_all_space,
521        ignore_space_change: ws.ignore_space_change,
522        ignore_space_at_eol: ws.ignore_space_at_eol,
523        ignore_cr_at_eol: ws.ignore_cr_at_eol,
524    };
525
526    let result = merge(&input)?;
527
528    if result.conflicts > 0 {
529        let conflict_oid = repo.odb.write(ObjectKind::Blob, &result.content)?;
530        conflict_content.insert(path.to_vec(), conflict_oid);
531        stage_entry(index, path, base, 1);
532        stage_entry(index, path, ours, 2);
533        stage_entry(index, path, theirs, 3);
534    } else {
535        let merged_oid = repo.odb.write(ObjectKind::Blob, &result.content)?;
536        let mut entry = ours.clone();
537        entry.path = path.to_vec();
538        entry.flags = path_len_flags(path);
539        entry.oid = merged_oid;
540        if base.mode == ours.mode && base.mode != theirs.mode {
541            entry.mode = theirs.mode;
542        }
543        index.entries.push(entry);
544    }
545
546    Ok(())
547}
548
549fn tree_to_index_entries(
550    repo: &Repository,
551    oid: &ObjectId,
552    prefix: &str,
553) -> crate::error::Result<Vec<IndexEntry>> {
554    let obj = repo.odb.read(oid)?;
555    if obj.kind != ObjectKind::Tree {
556        return Err(crate::error::Error::CorruptObject(format!(
557            "expected tree, got {}",
558            obj.kind.as_str()
559        )));
560    }
561    let entries = parse_tree(&obj.data)?;
562    let mut result = Vec::new();
563
564    for te in entries {
565        let name = String::from_utf8_lossy(&te.name).into_owned();
566        let path = if prefix.is_empty() {
567            name.clone()
568        } else {
569            format!("{prefix}/{name}")
570        };
571
572        if te.mode == 0o040000 {
573            let sub = tree_to_index_entries(repo, &te.oid, &path)?;
574            result.extend(sub);
575        } else {
576            let path_bytes = path.into_bytes();
577            result.push(IndexEntry {
578                ctime_sec: 0,
579                ctime_nsec: 0,
580                mtime_sec: 0,
581                mtime_nsec: 0,
582                dev: 0,
583                ino: 0,
584                mode: te.mode,
585                uid: 0,
586                gid: 0,
587                size: 0,
588                oid: te.oid,
589                flags: path_bytes.len().min(0xFFF) as u16,
590                flags_extended: None,
591                path: path_bytes,
592                base_index_pos: 0,
593            });
594        }
595    }
596    Ok(result)
597}
598
599fn tree_to_map(entries: Vec<IndexEntry>) -> HashMap<Vec<u8>, IndexEntry> {
600    let mut out = HashMap::new();
601    for e in entries {
602        out.insert(e.path.clone(), e);
603    }
604    out
605}
606
607/// True when the index tree matches `head_tree_oid` (used for empty pick detection).
608#[must_use]
609pub fn index_tree_oid_matches_head(
610    odb: &Odb,
611    index: &Index,
612    head_tree_oid: &ObjectId,
613) -> crate::error::Result<bool> {
614    let merged = write_tree_from_index(odb, index, "")?;
615    Ok(merged == *head_tree_oid)
616}