Skip to main content

grit_lib/
combined_tree_diff.rs

1//! Multi-parent combined tree diff (Git `diff_tree_paths` / `find_paths_multitree`).
2//!
3//! Implements the `D(T,P1...Pn)` walk from `tree-diff.c` for merge commits, producing
4//! per-path parent status letters used by `git diff-tree -c --name-status` and
5//! `--find-object` filtering (`combined_objfind` in `combine-diff.c`).
6
7use crate::diff::zero_oid;
8use crate::error::Result;
9use crate::objects::{parse_commit, parse_tree, tree_entry_cmp, ObjectId, ObjectKind, TreeEntry};
10use crate::odb::Odb;
11
12/// One character per parent for raw / name-status combined output (`A`/`M`/`D`).
13#[must_use]
14pub fn combined_parent_status_char(s: CombinedParentStatus) -> char {
15    match s {
16        CombinedParentStatus::Added => 'A',
17        CombinedParentStatus::Modified => 'M',
18        CombinedParentStatus::Deleted => 'D',
19        CombinedParentStatus::Renamed => 'R',
20    }
21}
22
23/// Git raw combined line (`::::modes... oids... MM\tpath`).
24///
25/// When `combined_all_paths` is set and a parent side is a rename, the source name for every
26/// parent is emitted (tab-separated) before the merge-result path, each C-quoted with `quote_fully`.
27#[must_use]
28pub fn format_combined_raw_line_all_paths(
29    p: &CombinedDiffPath,
30    abbrev_len: Option<usize>,
31    combined_all_paths: bool,
32    quote_fully: bool,
33) -> String {
34    if !combined_all_paths {
35        return format_combined_raw_line(p, abbrev_len);
36    }
37    // Emit metadata (modes/oids/status) followed by one source name per parent and the merge path.
38    // We can't string-split the single-path line on `\t` because funny names contain tabs.
39    let mut line = combined_raw_meta(p, abbrev_len);
40    for side in &p.parents {
41        let name = side.rename_from.as_deref().unwrap_or(&p.path);
42        line.push('\t');
43        line.push_str(&crate::quote_path::quote_c_style(name, quote_fully));
44    }
45    line.push('\t');
46    line.push_str(&crate::quote_path::quote_c_style(&p.path, quote_fully));
47    line
48}
49
50/// Git raw combined line (`::::modes... oids... MM\tpath`).
51#[must_use]
52pub fn format_combined_raw_line(p: &CombinedDiffPath, abbrev_len: Option<usize>) -> String {
53    format!("{}\t{}", combined_raw_meta(p, abbrev_len), p.path)
54}
55
56/// Metadata portion of a combined raw line: `::::modes... oids... <status>` (no trailing tab/path).
57#[must_use]
58pub fn combined_raw_meta(p: &CombinedDiffPath, abbrev_len: Option<usize>) -> String {
59    let n = p.parents.len();
60    let mut colons = String::with_capacity(n);
61    for _ in 0..n {
62        colons.push(':');
63    }
64    let mut modes = String::new();
65    for side in &p.parents {
66        modes.push_str(&format!("{:06o} ", side.mode));
67    }
68    modes.push_str(&format!("{:06o}", p.merge_mode));
69    // When OIDs are abbreviated, Git appends `...` if `GIT_PRINT_SHA1_ELLIPSIS=yes`
70    // (matches the non-combined raw format).
71    let ellipsis = if abbrev_len.is_some()
72        && std::env::var("GIT_PRINT_SHA1_ELLIPSIS").ok().as_deref() == Some("yes")
73    {
74        "..."
75    } else {
76        ""
77    };
78    let mut oids = String::new();
79    for side in &p.parents {
80        let h = format!("{}", side.oid);
81        let disp = if let Some(len) = abbrev_len {
82            &h[..len.min(h.len())]
83        } else {
84            h.as_str()
85        };
86        oids.push(' ');
87        oids.push_str(disp);
88        oids.push_str(ellipsis);
89    }
90    let rh = format!("{}", p.merge_oid);
91    let rdisp = if let Some(len) = abbrev_len {
92        &rh[..len.min(rh.len())]
93    } else {
94        rh.as_str()
95    };
96    oids.push(' ');
97    oids.push_str(rdisp);
98    oids.push_str(ellipsis);
99    oids.push(' ');
100    let status: String = p
101        .parents
102        .iter()
103        .map(|s| combined_parent_status_char(s.status))
104        .collect();
105    format!("{colons}{modes}{oids}{status}")
106}
107
108/// Per-parent coarse status in a combined diff (`A` / `M` / `D`).
109#[derive(Debug, Clone, Copy, PartialEq, Eq)]
110pub enum CombinedParentStatus {
111    /// Parent lacks the path; merge tree added it.
112    Added,
113    /// Parent has the path at the same name as the merge result.
114    Modified,
115    /// Path removed from merge (only in parents that had it at the min name).
116    Deleted,
117    /// Path renamed relative to this parent (`-M`/`--combined-all-paths`); the source name is
118    /// carried on [`CombinedParentSide::rename_from`].
119    Renamed,
120}
121
122/// One path in a combined diff: merge result tree vs each parent tree.
123#[derive(Debug, Clone)]
124pub struct CombinedDiffPath {
125    /// Path relative to repository root.
126    pub path: String,
127    /// Mode and OID on the merge result side (`0` / zero OID when deleted from merge).
128    pub merge_mode: u32,
129    pub merge_oid: ObjectId,
130    /// One slot per parent commit (same order as `parents` passed to [`combined_diff_paths_filtered`]).
131    pub parents: Vec<CombinedParentSide>,
132}
133
134/// One parent's contribution at a combined-diff path.
135#[derive(Debug, Clone)]
136pub struct CombinedParentSide {
137    pub mode: u32,
138    pub oid: ObjectId,
139    pub status: CombinedParentStatus,
140    /// Source path when this parent saw the result as a rename (`-M`); `None` otherwise.
141    pub rename_from: Option<String>,
142}
143
144/// Options for the multitree walk.
145#[derive(Debug, Clone, Default)]
146pub struct CombinedTreeDiffOptions {
147    /// Recurse into sub-trees (`diff_options.flags.recursive`).
148    pub recursive: bool,
149    /// Emit tree directory lines when recursing (`tree_in_recursive`, e.g. `--find-object`).
150    pub tree_in_recursive: bool,
151}
152
153fn is_tree_mode(mode: u32) -> bool {
154    (mode & 0o170000) == 0o040000
155}
156
157fn read_tree_entries(odb: &Odb, oid: Option<&ObjectId>) -> Result<Vec<TreeEntry>> {
158    let Some(oid) = oid else {
159        return Ok(Vec::new());
160    };
161    let obj = odb.read(oid)?;
162    if obj.kind != ObjectKind::Tree {
163        return Ok(Vec::new());
164    }
165    parse_tree(&obj.data)
166}
167
168fn tree_entry_pathcmp(a: Option<&TreeEntry>, b: Option<&TreeEntry>) -> std::cmp::Ordering {
169    match (a, b) {
170        (None, None) => std::cmp::Ordering::Equal,
171        (None, Some(_)) => std::cmp::Ordering::Greater,
172        (Some(_), None) => std::cmp::Ordering::Less,
173        (Some(e1), Some(e2)) => tree_entry_cmp(
174            &e1.name,
175            is_tree_mode(e1.mode),
176            &e2.name,
177            is_tree_mode(e2.mode),
178        ),
179    }
180}
181
182fn combined_matches_find_object(path: &CombinedDiffPath, find: Option<&ObjectId>) -> bool {
183    let Some(target) = find else {
184        return true;
185    };
186    if path.merge_oid == *target {
187        return true;
188    }
189    path.parents.iter().any(|p| p.oid == *target)
190}
191
192fn emit_combined_path(
193    path: String,
194    merge_entry: Option<&TreeEntry>,
195    tp: &[Vec<TreeEntry>],
196    tp_idx: &[usize],
197    parent_neq: &[bool],
198    find_object: Option<&ObjectId>,
199    out: &mut Vec<CombinedDiffPath>,
200) {
201    let nparent = tp.len();
202    let mut parents = Vec::with_capacity(nparent);
203
204    if let Some(te) = merge_entry {
205        for i in 0..nparent {
206            let tpi_valid = !parent_neq[i];
207            let (mode_i, oid_i, status) = if tpi_valid {
208                let e = &tp[i][tp_idx[i]];
209                (e.mode, e.oid, CombinedParentStatus::Modified)
210            } else {
211                (0u32, zero_oid(), CombinedParentStatus::Added)
212            };
213            parents.push(CombinedParentSide {
214                mode: mode_i,
215                oid: oid_i,
216                status,
217                rename_from: None,
218            });
219        }
220        let p = CombinedDiffPath {
221            path,
222            merge_mode: te.mode,
223            merge_oid: te.oid,
224            parents,
225        };
226        if combined_matches_find_object(&p, find_object) {
227            out.push(p);
228        }
229    } else {
230        for i in 0..nparent {
231            let tpi_valid = !parent_neq[i];
232            let (mode_i, oid_i, status) = if tpi_valid {
233                let e = &tp[i][tp_idx[i]];
234                (e.mode, e.oid, CombinedParentStatus::Deleted)
235            } else {
236                (0u32, zero_oid(), CombinedParentStatus::Added)
237            };
238            parents.push(CombinedParentSide {
239                mode: mode_i,
240                oid: oid_i,
241                status,
242                rename_from: None,
243            });
244        }
245        let p = CombinedDiffPath {
246            path,
247            merge_mode: 0,
248            merge_oid: zero_oid(),
249            parents,
250        };
251        if combined_matches_find_object(&p, find_object) {
252            out.push(p);
253        }
254    }
255}
256
257fn should_recurse_dir(_path: &str, _opt: &CombinedTreeDiffOptions) -> bool {
258    // Git's `diff_tree_paths` / combined merge diff always recurses into subtrees
259    // (see `diff_tree_combined`: `diffopts.flags.recursive = 1`).
260    true
261}
262
263fn ll_diff_tree_paths(
264    odb: &Odb,
265    out: &mut Vec<CombinedDiffPath>,
266    base_path: &str,
267    opt: &CombinedTreeDiffOptions,
268    merge_oid: Option<&ObjectId>,
269    parents_oid: &[Option<ObjectId>],
270    find_object: Option<&ObjectId>,
271) -> Result<()> {
272    let nparent = parents_oid.len();
273    let t_entries = read_tree_entries(odb, merge_oid)?;
274    let mut tp_entries: Vec<Vec<TreeEntry>> = Vec::with_capacity(nparent);
275    for po in parents_oid {
276        tp_entries.push(read_tree_entries(odb, po.as_ref())?);
277    }
278
279    let mut ti = 0usize;
280    let mut tp_idx = vec![0usize; nparent];
281
282    loop {
283        let t_cur = t_entries.get(ti);
284
285        if t_cur.is_none() && (0..nparent).all(|i| tp_idx[i] >= tp_entries[i].len()) {
286            break;
287        }
288
289        let mut imin = 0usize;
290        if nparent > 0 {
291            for i in 1..nparent {
292                let e_imin = tp_entries[imin].get(tp_idx[imin]);
293                let e_i = tp_entries[i].get(tp_idx[i]);
294                if tree_entry_pathcmp(e_i, e_imin) == std::cmp::Ordering::Less {
295                    imin = i;
296                }
297            }
298        }
299
300        let mut parent_neq = vec![false; nparent];
301        for i in 0..nparent {
302            let e_imin = tp_entries[imin].get(tp_idx[imin]);
303            let e_i = tp_entries[i].get(tp_idx[i]);
304            parent_neq[i] = tree_entry_pathcmp(e_i, e_imin) != std::cmp::Ordering::Equal;
305        }
306        for ne in parent_neq.iter_mut().take(imin) {
307            *ne = true;
308        }
309
310        let p_min = tp_entries[imin].get(tp_idx[imin]);
311
312        let cmp = tree_entry_pathcmp(t_cur, p_min);
313
314        if cmp == std::cmp::Ordering::Equal {
315            if let Some(te) = t_cur {
316                // Combined-diff intersection rule (git tree-diff.c ll_diff_tree_paths,
317                // cmp == 0 branch): skip the path when it is identical to *some* parent
318                // that has it at p[imin]. A path is only interesting for a combined diff
319                // if it differs from every parent. (find_copies_harder is not modelled
320                // here, matching the fast multitree path.)
321                let mut skip_emit = false;
322                for i in 0..nparent {
323                    // p[i] > p[imin]: path absent in this parent -> "+t" in D(T,Pi).
324                    if parent_neq[i] {
325                        continue;
326                    }
327                    if let Some(pe) = tp_entries[i].get(tp_idx[i]) {
328                        if pe.oid == te.oid && pe.mode == te.mode {
329                            // diff(t, pi) == empty: result matches this parent -> skip.
330                            skip_emit = true;
331                            break;
332                        }
333                    }
334                }
335
336                if !skip_emit {
337                    let name = std::str::from_utf8(&te.name).unwrap_or("");
338                    let full_path = if base_path.is_empty() {
339                        name.to_string()
340                    } else {
341                        format!("{base_path}/{name}")
342                    };
343
344                    let isdir = is_tree_mode(te.mode);
345                    let mut do_emit = true;
346                    if isdir && should_recurse_dir(&full_path, opt) {
347                        do_emit = opt.tree_in_recursive;
348                    }
349
350                    if do_emit {
351                        emit_combined_path(
352                            full_path.clone(),
353                            Some(te),
354                            &tp_entries,
355                            &tp_idx,
356                            &parent_neq,
357                            find_object,
358                            out,
359                        );
360                    }
361
362                    if isdir && should_recurse_dir(&full_path, opt) {
363                        let merge_child = te.oid;
364                        let mut child_parent_opts = vec![None; nparent];
365                        for i in 0..nparent {
366                            if parent_neq[i] {
367                                continue;
368                            }
369                            if let Some(pe) = tp_entries[i].get(tp_idx[i]) {
370                                if tree_entry_cmp(
371                                    &pe.name,
372                                    is_tree_mode(pe.mode),
373                                    &te.name,
374                                    is_tree_mode(te.mode),
375                                ) == std::cmp::Ordering::Equal
376                                {
377                                    child_parent_opts[i] = Some(pe.oid);
378                                }
379                            }
380                        }
381                        ll_diff_tree_paths(
382                            odb,
383                            out,
384                            &full_path,
385                            opt,
386                            Some(&merge_child),
387                            &child_parent_opts,
388                            find_object,
389                        )?;
390                    }
391                }
392            }
393
394            ti += 1;
395            for i in 0..nparent {
396                if !parent_neq[i] {
397                    tp_idx[i] += 1;
398                }
399            }
400        } else if cmp == std::cmp::Ordering::Less {
401            let Some(te) = t_cur else {
402                ti += 1;
403                continue;
404            };
405            let name = std::str::from_utf8(&te.name).unwrap_or("");
406            let full_path = if base_path.is_empty() {
407                name.to_string()
408            } else {
409                format!("{base_path}/{name}")
410            };
411            let isdir = is_tree_mode(te.mode);
412            let mut do_emit = true;
413            if isdir && should_recurse_dir(&full_path, opt) {
414                do_emit = opt.tree_in_recursive;
415            }
416            if do_emit {
417                let all_parents_absent: Vec<bool> = vec![true; nparent];
418                emit_combined_path(
419                    full_path.clone(),
420                    Some(te),
421                    &tp_entries,
422                    &tp_idx,
423                    &all_parents_absent,
424                    find_object,
425                    out,
426                );
427            }
428            if isdir && should_recurse_dir(&full_path, opt) {
429                let merge_child = te.oid;
430                let child_parent_opts = vec![None; nparent];
431                ll_diff_tree_paths(
432                    odb,
433                    out,
434                    &full_path,
435                    opt,
436                    Some(&merge_child),
437                    &child_parent_opts,
438                    find_object,
439                )?;
440            }
441            ti += 1;
442        } else {
443            // git tree-diff.c (t > p[imin] branch): "-p[imin]" is only in D(T,Pi)
444            // when *every* parent has the path at p[imin] (deleted from all of them).
445            // Skip when any parent lacks it (parent_neq[i] set).
446            let skip_emit_tp = (0..nparent).any(|i| parent_neq[i]);
447            if !skip_emit_tp {
448                if let Some(pe) = p_min {
449                    let name = std::str::from_utf8(&pe.name).unwrap_or("");
450                    let full_path = if base_path.is_empty() {
451                        name.to_string()
452                    } else {
453                        format!("{base_path}/{name}")
454                    };
455                    let isdir = is_tree_mode(pe.mode);
456                    let mut do_emit = true;
457                    if isdir && should_recurse_dir(&full_path, opt) {
458                        do_emit = opt.tree_in_recursive;
459                    }
460                    if do_emit {
461                        emit_combined_path(
462                            full_path.clone(),
463                            None,
464                            &tp_entries,
465                            &tp_idx,
466                            &parent_neq,
467                            find_object,
468                            out,
469                        );
470                    }
471                    if isdir && should_recurse_dir(&full_path, opt) {
472                        let mut child_parent_opts = vec![None; nparent];
473                        for i in 0..nparent {
474                            if !parent_neq[i] {
475                                if let Some(e) = tp_entries[i].get(tp_idx[i]) {
476                                    child_parent_opts[i] = Some(e.oid);
477                                }
478                            }
479                        }
480                        ll_diff_tree_paths(
481                            odb,
482                            out,
483                            &full_path,
484                            opt,
485                            None,
486                            &child_parent_opts,
487                            find_object,
488                        )?;
489                    }
490                }
491            }
492            for i in 0..nparent {
493                if !parent_neq[i] {
494                    tp_idx[i] += 1;
495                }
496            }
497        }
498    }
499
500    Ok(())
501}
502
503/// Build combined-diff paths for a merge commit's tree against each parent's tree.
504pub fn combined_diff_paths_filtered(
505    odb: &Odb,
506    commit_tree: &ObjectId,
507    parents: &[ObjectId],
508    walk: &CombinedTreeDiffOptions,
509    find_object: Option<&ObjectId>,
510) -> Result<Vec<CombinedDiffPath>> {
511    if parents.is_empty() {
512        return Ok(Vec::new());
513    }
514    let mut parent_trees = Vec::with_capacity(parents.len());
515    for p in parents {
516        let obj = odb.read(p)?;
517        if obj.kind != ObjectKind::Commit {
518            return Ok(Vec::new());
519        }
520        let c = parse_commit(&obj.data)?;
521        parent_trees.push(Some(c.tree));
522    }
523    let parent_opts: Vec<Option<ObjectId>> = parent_trees;
524    combined_diff_paths_trees(odb, commit_tree, &parent_opts, walk, find_object)
525}
526
527/// Combined diff paths when `parents` are already tree OIDs (plumbing `git diff --cc` with N+1 trees).
528pub fn combined_diff_paths_trees(
529    odb: &Odb,
530    merge_tree: &ObjectId,
531    parent_trees: &[Option<ObjectId>],
532    walk: &CombinedTreeDiffOptions,
533    find_object: Option<&ObjectId>,
534) -> Result<Vec<CombinedDiffPath>> {
535    if parent_trees.is_empty() {
536        return Ok(Vec::new());
537    }
538    let mut out = Vec::new();
539    ll_diff_tree_paths(
540        odb,
541        &mut out,
542        "",
543        walk,
544        Some(merge_tree),
545        parent_trees,
546        find_object,
547    )?;
548    Ok(out)
549}