Skip to main content

grit_lib/
merge_diff.rs

1//! Merge commit and combined (`--cc` / `-c`) diff helpers.
2//!
3//! These mirror the subset of Git's combine-diff output needed for porcelain
4//! commands (`git show`, `git diff` during conflicts, `git diff-tree -c`).
5
6use std::io::Write;
7use std::path::Path;
8use std::process::{Command, Stdio};
9
10use similar::{ChangeTag, TextDiff};
11use tempfile::NamedTempFile;
12
13use crate::combined_diff_patch::{format_combined_diff_body, CombinedDiffWsOptions};
14use crate::combined_tree_diff::CombinedParentSide;
15use crate::config::{parse_bool, ConfigSet};
16use crate::crlf::{get_file_attrs, load_gitattributes, DiffAttr, FileAttrs};
17use crate::diff::{detect_renames, diff_trees, DiffStatus};
18use crate::objects::{parse_commit, parse_tree, ObjectId, ObjectKind};
19use crate::odb::Odb;
20use crate::quote_path::format_diff_path_with_prefix;
21use crate::textconv_cache::{read_textconv_cache, write_textconv_cache};
22
23/// Paths that differ between the merge result tree and **every** parent tree.
24#[must_use]
25pub fn combined_diff_paths(odb: &Odb, commit_tree: &ObjectId, parents: &[ObjectId]) -> Vec<String> {
26    if parents.len() < 2 {
27        return Vec::new();
28    }
29    let mut per_parent: Vec<std::collections::HashSet<String>> = Vec::new();
30    for p in parents {
31        let Ok(po) = odb.read(p) else {
32            continue;
33        };
34        let Ok(pc) = parse_commit(&po.data) else {
35            continue;
36        };
37        let Ok(entries) = diff_trees(odb, Some(&pc.tree), Some(commit_tree), "") else {
38            continue;
39        };
40        let paths: std::collections::HashSet<String> =
41            entries.iter().map(|e| e.path().to_string()).collect();
42        per_parent.push(paths);
43    }
44    if per_parent.is_empty() {
45        return Vec::new();
46    }
47    let mut common = per_parent[0].clone();
48    for s in &per_parent[1..] {
49        common = common.intersection(s).cloned().collect();
50    }
51    if common.is_empty() {
52        return Vec::new();
53    }
54    let mut ordered = paths_in_tree_order(odb, commit_tree, "", &common);
55    // Paths removed from the merge result are not present in `commit_tree`, so a merge-tree walk
56    // alone would miss them. Git still lists them in combined diff when every parent changed
57    // (`t4057-diff-combined-paths` merge + `git rm` amend).
58    if ordered.len() < common.len() {
59        let seen: std::collections::HashSet<String> = ordered.iter().cloned().collect();
60        let mut rest: Vec<String> = common.difference(&seen).cloned().collect();
61        rest.sort();
62        ordered.extend(rest);
63    }
64    ordered
65}
66
67/// Per-parent blob paths for a combined merge path when rename detection is enabled.
68///
69/// Returns `None` when no special mapping is needed (each parent reads `merge_path`).
70#[must_use]
71pub fn combined_merge_parent_blob_paths(
72    odb: &Odb,
73    merge_path: &str,
74    parent_trees: &[ObjectId],
75    result_tree: &ObjectId,
76    rename_threshold: u32,
77) -> Option<Vec<String>> {
78    if parent_trees.len() < 2 {
79        return None;
80    }
81    let mut per_parent: Vec<String> = Vec::with_capacity(parent_trees.len());
82    for t in parent_trees {
83        if blob_oid_at_path(odb, t, merge_path).is_some() {
84            per_parent.push(merge_path.to_string());
85        } else {
86            per_parent.push(String::new());
87        }
88    }
89    if per_parent.iter().all(|p| !p.is_empty()) {
90        return None;
91    }
92    let mut any_rename = false;
93    for (i, t) in parent_trees.iter().enumerate() {
94        if !per_parent[i].is_empty() {
95            continue;
96        }
97        // Run the *full* per-parent diff (not restricted to `merge_path`): the rename source is a
98        // path that was deleted relative to this parent, so it lives outside `merge_path`. Git's
99        // `find_paths_generic` likewise runs a full rename-detecting diff against each parent and
100        // intersects on the result path.
101        let entries = diff_trees(odb, Some(t), Some(result_tree), "").ok()?;
102        let with_rn = detect_renames(odb, None, entries, rename_threshold);
103        let mut found: Option<String> = None;
104        for e in with_rn {
105            if e.status != DiffStatus::Renamed {
106                continue;
107            }
108            let new_p = e.new_path.as_deref().unwrap_or("");
109            if new_p != merge_path {
110                continue;
111            }
112            let old_p = e.old_path.clone()?;
113            if blob_oid_at_path(odb, t, &old_p).is_some() {
114                if found.is_some() {
115                    return None;
116                }
117                found = Some(old_p);
118            }
119        }
120        let p = found?;
121        per_parent[i] = p;
122        any_rename = true;
123    }
124    any_rename.then_some(per_parent)
125}
126
127/// Mode of the blob at `path` in `tree`, or `None` if missing / not a blob.
128fn blob_mode_at_path(odb: &Odb, tree: &ObjectId, path: &str) -> Option<u32> {
129    let mut current = *tree;
130    let parts: Vec<&str> = path.split('/').collect();
131    for (pi, part) in parts.iter().enumerate() {
132        let obj = odb.read(&current).ok()?;
133        let entries = crate::objects::parse_tree(&obj.data).ok()?;
134        let found = entries
135            .iter()
136            .find(|e| std::str::from_utf8(&e.name).ok() == Some(*part))?;
137        if pi + 1 == parts.len() {
138            return Some(found.mode);
139        }
140        if found.mode != 0o040000 {
141            return None;
142        }
143        current = found.oid;
144    }
145    None
146}
147
148/// Enrich a combined diff path with per-parent rename info (`-M`/`-C`).
149///
150/// For each parent that the merge-tree walk classified as `Added` (the result name does not exist
151/// in that parent), run a full rename-detecting diff against that parent. If the result path is the
152/// target of a rename, rewrite that parent side to `Renamed`, fill in the source blob's mode/OID,
153/// and record the source name. This mirrors git's `find_paths_generic`, which intersects per-parent
154/// rename-detecting diffs (producing `RR` with all source names under `--combined-all-paths`).
155pub fn enrich_combined_path_renames(
156    odb: &Odb,
157    path: &mut crate::combined_tree_diff::CombinedDiffPath,
158    parent_trees: &[ObjectId],
159    result_tree: &ObjectId,
160    rename_threshold: u32,
161) {
162    use crate::combined_tree_diff::CombinedParentStatus;
163    if parent_trees.len() != path.parents.len() {
164        return;
165    }
166    let Some(parent_paths) = combined_merge_parent_blob_paths(
167        odb,
168        &path.path,
169        parent_trees,
170        result_tree,
171        rename_threshold,
172    ) else {
173        return;
174    };
175    for (i, side) in path.parents.iter_mut().enumerate() {
176        if side.status != CombinedParentStatus::Added {
177            continue;
178        }
179        let src = &parent_paths[i];
180        if src.is_empty() || src == &path.path {
181            continue;
182        }
183        let (Some(oid), Some(mode)) = (
184            blob_oid_at_path(odb, &parent_trees[i], src),
185            blob_mode_at_path(odb, &parent_trees[i], src),
186        ) else {
187            continue;
188        };
189        side.status = CombinedParentStatus::Renamed;
190        side.oid = oid;
191        side.mode = mode;
192        side.rename_from = Some(src.clone());
193    }
194}
195
196/// All blob paths in `tree_oid`, depth-first in Git tree entry order (for `diff` / `log`
197/// `--rotate-to` / `--skip-to`).
198#[must_use]
199pub fn all_blob_paths_in_tree_order(odb: &Odb, tree_oid: &ObjectId) -> Vec<String> {
200    all_blob_paths_dfs(odb, tree_oid, "")
201}
202
203fn all_blob_paths_dfs(odb: &Odb, tree_oid: &ObjectId, prefix: &str) -> Vec<String> {
204    let Ok(obj) = odb.read(tree_oid) else {
205        return Vec::new();
206    };
207    if obj.kind != ObjectKind::Tree {
208        return Vec::new();
209    }
210    let Ok(entries) = parse_tree(&obj.data) else {
211        return Vec::new();
212    };
213    let mut out = Vec::new();
214    for e in entries {
215        let name = String::from_utf8_lossy(&e.name);
216        let path = if prefix.is_empty() {
217            name.into_owned()
218        } else {
219            format!("{prefix}/{name}")
220        };
221        if e.mode == 0o040000 {
222            out.extend(all_blob_paths_dfs(odb, &e.oid, &path));
223        } else {
224            out.push(path);
225        }
226    }
227    out
228}
229
230/// List paths under `prefix` that appear in `want`, following merge-tree entry order (Git
231/// `traverse_trees` order), not lexicographic sorting.
232fn paths_in_tree_order(
233    odb: &Odb,
234    tree_oid: &ObjectId,
235    prefix: &str,
236    want: &std::collections::HashSet<String>,
237) -> Vec<String> {
238    let Ok(obj) = odb.read(tree_oid) else {
239        return Vec::new();
240    };
241    if obj.kind != ObjectKind::Tree {
242        return Vec::new();
243    }
244    let Ok(entries) = parse_tree(&obj.data) else {
245        return Vec::new();
246    };
247    let mut out = Vec::new();
248    for e in entries {
249        let name = String::from_utf8_lossy(&e.name);
250        let path = if prefix.is_empty() {
251            name.into_owned()
252        } else {
253            format!("{prefix}/{name}")
254        };
255        if e.mode == 0o040000 {
256            out.extend(paths_in_tree_order(odb, &e.oid, &path, want));
257        } else if want.contains(&path) {
258            out.push(path);
259        }
260    }
261    out
262}
263
264/// Load attributes for `path` using root `.gitattributes` and `info/attributes`.
265fn attrs_for_repo_path(git_dir: &Path, path: &str) -> FileAttrs {
266    let work_tree = git_dir.parent().unwrap_or(git_dir);
267    let rules = load_gitattributes(work_tree);
268    let config = ConfigSet::load(Some(git_dir), true).unwrap_or_default();
269    get_file_attrs(&rules, path, false, &config)
270}
271
272/// True if diff should treat this path as binary (NUL in blob or `-diff` / `diff=unset`).
273#[must_use]
274pub fn is_binary_for_diff(git_dir: &Path, path: &str, blob: &[u8]) -> bool {
275    let fa = attrs_for_repo_path(git_dir, path);
276    if matches!(fa.diff_attr, DiffAttr::Unset) {
277        return true;
278    }
279    crate::crlf::is_binary(blob)
280}
281
282/// True when `diff.<driver>.binary` is set for this path's `diff=<driver>` attribute.
283fn diff_driver_binary_config(config: &ConfigSet, driver: &str) -> bool {
284    let key = format!("diff.{driver}.binary");
285    config
286        .get(&key)
287        .is_some_and(|v| parse_bool(v.as_str()).unwrap_or(false))
288}
289
290/// Force `Binary files ... differ` when the path's diff driver sets `binary`, except for symlinks.
291///
292/// Matches Git's `diff_filespec_is_binary` driver flag: `diff.<name>.binary` applies to paths
293/// using that driver, but symlink modes (`120000`) still emit textual symlink-target patches
294/// (t4011).
295#[must_use]
296pub fn diff_forced_binary_by_driver(
297    git_dir: &Path,
298    config: &ConfigSet,
299    path: &str,
300    old_mode: &str,
301    new_mode: &str,
302) -> bool {
303    let fa = attrs_for_repo_path(git_dir, path);
304    let DiffAttr::Driver(driver) = fa.diff_attr else {
305        return false;
306    };
307    if !diff_driver_binary_config(config, &driver) {
308        return false;
309    }
310    if old_mode == "120000" || new_mode == "120000" {
311        return false;
312    }
313    true
314}
315
316/// True when Git would wrap the textconv command with `sh -c 'cmd "$@"' -- ...`
317/// (`prepare_shell_cmd` in Git's `run-command.c`).
318fn textconv_cmd_needs_shell_wrapper(cmd_line: &str) -> bool {
319    cmd_line.chars().any(|c| {
320        matches!(
321            c,
322            '|' | '&'
323                | ';'
324                | '<'
325                | '>'
326                | '('
327                | ')'
328                | '$'
329                | '`'
330                | '\\'
331                | '"'
332                | '\''
333                | ' '
334                | '\t'
335                | '\n'
336                | '*'
337                | '?'
338                | '['
339                | '#'
340                | '~'
341                | '='
342                | '%'
343        )
344    })
345}
346
347/// Run `diff.<driver>.textconv` on `input`; returns raw stdout on success.
348///
349/// Matches Git's `run_textconv` / `prepare_shell_cmd`: by default the blob is written to a
350/// temporary file and passed as an argument after `--`. Commands that contain shell
351/// metacharacters (including spaces) use `sh -c 'pgm "$@"' -- pgm <tempfile>`. Config lines
352/// ending with ` <` use stdin instead of a tempfile.
353pub fn run_textconv_raw(
354    command_cwd: &Path,
355    config: &ConfigSet,
356    driver: &str,
357    input: &[u8],
358) -> Option<Vec<u8>> {
359    let mut cmd_line = config.get(&format!("diff.{driver}.textconv"))?;
360    cmd_line = cmd_line.trim_end().to_string();
361    let stdin_mode = if cmd_line.ends_with('<') {
362        let t = cmd_line.trim_end_matches('<').trim_end();
363        cmd_line = t.to_string();
364        true
365    } else {
366        false
367    };
368    if stdin_mode {
369        let mut child = Command::new("sh")
370            .arg("-c")
371            .arg(&cmd_line)
372            .current_dir(command_cwd)
373            .stdin(Stdio::piped())
374            .stdout(Stdio::piped())
375            .stderr(Stdio::null())
376            .spawn()
377            .ok()?;
378        let mut stdin = child.stdin.take()?;
379        stdin.write_all(input).ok()?;
380        drop(stdin);
381        let out = child.wait_with_output().ok()?;
382        return if out.status.success() {
383            Some(out.stdout)
384        } else {
385            None
386        };
387    }
388
389    let mut tmp = NamedTempFile::new().ok()?;
390    tmp.write_all(input).ok()?;
391    tmp.flush().ok()?;
392    let path = tmp.path().to_owned();
393
394    let out = if textconv_cmd_needs_shell_wrapper(&cmd_line) {
395        Command::new("sh")
396            .current_dir(command_cwd)
397            .arg("-c")
398            .arg(format!("{} \"$@\"", cmd_line))
399            .arg(&cmd_line)
400            .arg(&path)
401            .stdout(Stdio::piped())
402            .stderr(Stdio::null())
403            .output()
404            .ok()?
405    } else {
406        Command::new("sh")
407            .current_dir(command_cwd)
408            .arg(&cmd_line)
409            .arg(&path)
410            .stdout(Stdio::piped())
411            .stderr(Stdio::null())
412            .output()
413            .ok()?
414    };
415
416    if !out.status.success() {
417        return None;
418    }
419    Some(out.stdout)
420}
421
422/// Run `diff.<driver>.textconv` feeding `input` on stdin; returns UTF-8 lossy text on success.
423pub fn run_textconv(
424    command_cwd: &Path,
425    config: &ConfigSet,
426    driver: &str,
427    input: &[u8],
428) -> Option<String> {
429    run_textconv_raw(command_cwd, config, driver, input)
430        .map(|b| String::from_utf8_lossy(&b).into_owned())
431}
432
433pub fn diff_textconv_cmd_line(config: &ConfigSet, driver: &str) -> Option<String> {
434    let mut cmd_line = config.get(&format!("diff.{driver}.textconv"))?;
435    cmd_line = cmd_line.trim_end().to_string();
436    if cmd_line.ends_with('<') {
437        let t = cmd_line.trim_end_matches('<').trim_end();
438        cmd_line = t.to_string();
439    }
440    Some(cmd_line)
441}
442
443pub fn diff_cachetextconv_enabled(config: &ConfigSet, driver: &str) -> bool {
444    config
445        .get(&format!("diff.{driver}.cachetextconv"))
446        .map(|v| matches!(v.to_ascii_lowercase().as_str(), "true" | "yes" | "1" | "on"))
447        .unwrap_or(false)
448}
449
450/// Returns true when `path` has a `diff=<driver>` attribute and `diff.<driver>.textconv` is set.
451///
452/// When this holds, Git treats the path as textual for diff purposes (even if the blob contains
453/// NUL), running textconv instead of emitting `Binary files differ`.
454#[must_use]
455pub fn diff_textconv_active(git_dir: &Path, config: &ConfigSet, path: &str) -> bool {
456    let fa = attrs_for_repo_path(git_dir, path);
457    let DiffAttr::Driver(ref driver) = fa.diff_attr else {
458        return false;
459    };
460    diff_textconv_cmd_line(config, driver).is_some()
461}
462
463/// True when a path carries the `-diff` (or `diff=unset` / `binary`) attribute,
464/// which forces Git to emit `Binary files ... differ` regardless of content.
465#[must_use]
466pub fn diff_attr_forces_binary(git_dir: &Path, path: &str) -> bool {
467    matches!(attrs_for_repo_path(git_dir, path).diff_attr, DiffAttr::Unset)
468}
469
470/// True when a path carries a bare `diff` (set) attribute, which forces Git to
471/// produce a textual diff even when the blob contains NUL bytes (t4020 #65).
472#[must_use]
473pub fn diff_attr_forces_text(git_dir: &Path, path: &str) -> bool {
474    matches!(attrs_for_repo_path(git_dir, path).diff_attr, DiffAttr::Set)
475}
476
477/// Resolved external diff driver from a path's `diff=<name>` attribute.
478///
479/// Mirrors Git's `userdiff_find_by_path` → `drv->external`: when a path has a
480/// `diff=<name>` attribute and `diff.<name>.command` is configured, that driver
481/// takes precedence over `GIT_EXTERNAL_DIFF` / `diff.external`. The boolean is
482/// `diff.<name>.trustExitCode`.
483#[must_use]
484pub fn diff_attr_external_driver(
485    git_dir: &Path,
486    config: &ConfigSet,
487    path: &str,
488) -> Option<(String, bool)> {
489    let fa = attrs_for_repo_path(git_dir, path);
490    let DiffAttr::Driver(ref driver) = fa.diff_attr else {
491        return None;
492    };
493    let cmd = config.get(&format!("diff.{driver}.command"))?;
494    if cmd.trim().is_empty() {
495        return None;
496    }
497    let trust = config
498        .get(&format!("diff.{driver}.trustExitCode"))
499        .and_then(|v| parse_bool(v.as_str()).ok())
500        .unwrap_or(false);
501    Some((cmd, trust))
502}
503
504fn textconv_command_cwd(git_dir: &Path) -> std::path::PathBuf {
505    git_dir.parent().unwrap_or(git_dir).to_path_buf()
506}
507
508fn blob_text_for_diff_inner(
509    odb: Option<&Odb>,
510    git_dir: &Path,
511    config: &ConfigSet,
512    path: &str,
513    blob: &[u8],
514    blob_oid: Option<&ObjectId>,
515    use_textconv: bool,
516) -> String {
517    if !use_textconv {
518        return String::from_utf8_lossy(blob).into_owned();
519    }
520    let fa = attrs_for_repo_path(git_dir, path);
521    let DiffAttr::Driver(ref driver) = fa.diff_attr else {
522        return String::from_utf8_lossy(blob).into_owned();
523    };
524    let Some(cmd_line) = diff_textconv_cmd_line(config, driver) else {
525        return String::from_utf8_lossy(blob).into_owned();
526    };
527    let want_cache = diff_cachetextconv_enabled(config, driver);
528    if want_cache {
529        if let (Some(odb), Some(oid)) = (odb, blob_oid) {
530            if let Some(bytes) = read_textconv_cache(odb, git_dir, driver, &cmd_line, oid) {
531                return String::from_utf8_lossy(&bytes).into_owned();
532            }
533        }
534    }
535    let cwd = textconv_command_cwd(git_dir);
536    let Some(t) = run_textconv(&cwd, config, driver, blob) else {
537        return String::from_utf8_lossy(blob).into_owned();
538    };
539    if want_cache {
540        if let (Some(odb), Some(oid)) = (odb, blob_oid) {
541            write_textconv_cache(odb, git_dir, driver, &cmd_line, oid, t.as_bytes());
542        }
543    }
544    t
545}
546
547/// Like [`blob_text_for_diff`], but uses `refs/notes/textconv/<driver>` when
548/// `diff.<driver>.cachetextconv` is true and `blob_oid` is known.
549#[must_use]
550pub fn blob_text_for_diff_with_oid(
551    odb: &Odb,
552    git_dir: &Path,
553    config: &ConfigSet,
554    path: &str,
555    blob: &[u8],
556    blob_oid: &ObjectId,
557    use_textconv: bool,
558) -> String {
559    blob_text_for_diff_inner(
560        Some(odb),
561        git_dir,
562        config,
563        path,
564        blob,
565        Some(blob_oid),
566        use_textconv,
567    )
568}
569
570/// Blob bytes after smudge/EOL conversion for `path`, using the same rules as checkout.
571///
572/// `index` is used to pick up `.gitattributes` from the index when the worktree file is
573/// missing; pass `None` to use only on-disk `.gitattributes` under `work_tree`.
574pub fn convert_blob_to_worktree_for_path(
575    git_dir: &Path,
576    work_tree: &Path,
577    index: Option<&crate::index::Index>,
578    odb: &Odb,
579    path: &str,
580    blob: &[u8],
581    oid_hex: Option<&str>,
582) -> std::io::Result<Vec<u8>> {
583    let config = ConfigSet::load(Some(git_dir), true).unwrap_or_default();
584    let conv = crate::crlf::ConversionConfig::from_config(&config);
585    let rules = match index {
586        Some(idx) => crate::crlf::load_gitattributes_for_checkout(work_tree, path, idx, odb),
587        None => crate::crlf::load_gitattributes(work_tree),
588    };
589    let file_attrs = crate::crlf::get_file_attrs(&rules, path, false, &config);
590    crate::crlf::convert_to_worktree_eager(blob, path, &conv, &file_attrs, oid_hex, None)
591        .map_err(std::io::Error::other)
592}
593
594/// Prepare blob bytes for diff: optional textconv when `use_textconv` and `diff=<driver>`.
595///
596/// Does not read or write the textconv notes cache; use [`blob_text_for_diff_with_oid`] when the
597/// blob OID is known (e.g. commit diffs with `cachetextconv`).
598pub fn blob_text_for_diff(
599    git_dir: &Path,
600    config: &ConfigSet,
601    path: &str,
602    blob: &[u8],
603    use_textconv: bool,
604) -> String {
605    blob_text_for_diff_inner(None, git_dir, config, path, blob, None, use_textconv)
606}
607
608/// `diff --git` against parent `p` for merge commit `-m` output.
609#[allow(clippy::too_many_arguments)]
610pub fn format_parent_patch(
611    git_dir: &Path,
612    config: &ConfigSet,
613    odb: &Odb,
614    path: &str,
615    parent_tree: &ObjectId,
616    result_tree: &ObjectId,
617    abbrev: usize,
618    context: usize,
619    use_textconv: bool,
620) -> Option<String> {
621    let entries = diff_trees(odb, Some(parent_tree), Some(result_tree), "").ok()?;
622    let entry = entries.iter().find(|e| e.path() == path)?;
623    if entry.status == DiffStatus::Unmerged {
624        return None;
625    }
626
627    let old_blob = read_blob(odb, &entry.old_oid);
628    let new_blob = read_blob(odb, &entry.new_oid);
629    let textconv_for_patch = use_textconv && diff_textconv_active(git_dir, config, path);
630    let binary = !textconv_for_patch
631        && (is_binary_for_diff(git_dir, path, &old_blob)
632            || is_binary_for_diff(git_dir, path, &new_blob));
633
634    let old_abbrev = abbrev_hex(&entry.old_oid, abbrev);
635    let new_abbrev = abbrev_hex(&entry.new_oid, abbrev);
636
637    let mut out = String::new();
638    out.push_str(&format!("diff --git a/{path} b/{path}\n"));
639    // Header lines depend on whether the file was added / deleted / mode-changed / modified,
640    // matching git's `diff --git` body (added => `new file mode`, deleted => `deleted file mode`).
641    let (old_disp, new_disp) = match entry.status {
642        DiffStatus::Added => {
643            out.push_str(&format!("new file mode {}\n", entry.new_mode));
644            out.push_str(&format!("index {old_abbrev}..{new_abbrev}\n"));
645            ("/dev/null".to_string(), format!("b/{path}"))
646        }
647        DiffStatus::Deleted => {
648            out.push_str(&format!("deleted file mode {}\n", entry.old_mode));
649            out.push_str(&format!("index {old_abbrev}..{new_abbrev}\n"));
650            (format!("a/{path}"), "/dev/null".to_string())
651        }
652        _ => {
653            if entry.old_mode != entry.new_mode {
654                out.push_str(&format!("old mode {}\n", entry.old_mode));
655                out.push_str(&format!("new mode {}\n", entry.new_mode));
656                out.push_str(&format!("index {old_abbrev}..{new_abbrev}\n"));
657            } else {
658                out.push_str(&format!(
659                    "index {old_abbrev}..{new_abbrev} {}\n",
660                    entry.new_mode
661                ));
662            }
663            (format!("a/{path}"), format!("b/{path}"))
664        }
665    };
666
667    if binary {
668        out.push_str(&format!("Binary files {old_disp} and {new_disp} differ\n"));
669        return Some(out);
670    }
671
672    let old_t = if textconv_for_patch {
673        blob_text_for_diff_with_oid(odb, git_dir, config, path, &old_blob, &entry.old_oid, true)
674    } else {
675        blob_text_for_diff(git_dir, config, path, &old_blob, use_textconv)
676    };
677    let new_t = if textconv_for_patch {
678        blob_text_for_diff_with_oid(odb, git_dir, config, path, &new_blob, &entry.new_oid, true)
679    } else {
680        blob_text_for_diff(git_dir, config, path, &new_blob, use_textconv)
681    };
682    // Empty prefixes here because `old_disp`/`new_disp` already carry the `a/`/`b/` (or
683    // `/dev/null`) decoration computed from the file status above.
684    let patch = crate::diff::unified_diff_with_prefix(
685        &old_t,
686        &new_t,
687        &old_disp,
688        &new_disp,
689        context,
690        0,
691        "",
692        "",
693        true,
694        config.quote_path_fully(),
695    );
696    out.push_str(&patch);
697    Some(out)
698}
699
700/// Combined diff header: `diff --combined` or `diff --cc`.
701pub fn format_combined_binary_header(
702    path: &str,
703    parent_oids: &[ObjectId],
704    result_oid: &ObjectId,
705    abbrev: usize,
706    use_cc_word: bool,
707) -> String {
708    format_combined_binary_header_n(path, parent_oids, result_oid, abbrev, use_cc_word)
709}
710
711/// `index` line for N-parent combined/binary diffs (`p1,p2,...pn..result`).
712#[must_use]
713pub fn format_combined_binary_header_n(
714    path: &str,
715    parent_oids: &[ObjectId],
716    result_oid: &ObjectId,
717    abbrev: usize,
718    use_cc_word: bool,
719) -> String {
720    let idx: Vec<String> = parent_oids.iter().map(|o| abbrev_hex(o, abbrev)).collect();
721    let res = abbrev_hex(result_oid, abbrev);
722    let kind = if use_cc_word { "cc" } else { "combined" };
723    format!(
724        "diff --{kind} {path}\nindex {}..{res}\nBinary files differ\n",
725        idx.join(",")
726    )
727}
728
729/// Full combined diff for a binary path (two parents).
730pub fn format_combined_binary(
731    path: &str,
732    parent_oids: &[ObjectId],
733    result_oid: &ObjectId,
734    abbrev: usize,
735    use_cc_word: bool,
736) -> String {
737    format_combined_binary_header_n(path, parent_oids, result_oid, abbrev, use_cc_word)
738}
739
740fn push_combined_file_headers(
741    out: &mut String,
742    merge_path: &str,
743    parent_paths: &[String],
744    parent_sides: &[CombinedParentSide],
745    combined_all_paths: bool,
746    quote_path_fully: bool,
747) {
748    let a_prefix = "a/";
749    let b_prefix = "b/";
750    if combined_all_paths {
751        for (i, p) in parent_paths.iter().enumerate() {
752            // Show `/dev/null` only when this parent genuinely lacks the path (a true add). When a
753            // per-parent rename was detected, `parent_paths[i]` holds the renamed-from path (which
754            // the parent *does* have), so emit that name even though the merge-tree walk classified
755            // the result side as "Added".
756            let added_no_rename = parent_sides.get(i).is_some_and(|s| {
757                s.status == crate::combined_tree_diff::CombinedParentStatus::Added
758            }) && (p.is_empty() || p == merge_path);
759            if added_no_rename {
760                out.push_str("--- /dev/null\n");
761            } else {
762                let line = format_diff_path_with_prefix(a_prefix, p, quote_path_fully);
763                out.push_str("--- ");
764                out.push_str(&line);
765                out.push('\n');
766            }
767        }
768        let line = format_diff_path_with_prefix(b_prefix, merge_path, quote_path_fully);
769        out.push_str("+++ ");
770        out.push_str(&line);
771        out.push('\n');
772    } else {
773        let la = format_diff_path_with_prefix(a_prefix, merge_path, quote_path_fully);
774        let lb = format_diff_path_with_prefix(b_prefix, merge_path, quote_path_fully);
775        out.push_str("--- ");
776        out.push_str(&la);
777        out.push('\n');
778        out.push_str("+++ ");
779        out.push_str(&lb);
780        out.push('\n');
781    }
782}
783
784/// Combined text diff with optional textconv (N parents, single merge path).
785///
786/// `parent_blob_paths` — when set, length must match `parent_trees`; each entry is the path
787/// used to read that parent's blob (for `--combined-all-paths` rename cases). When `None`,
788/// every parent uses `path`.
789#[allow(clippy::too_many_arguments)]
790pub fn format_combined_textconv_patch(
791    git_dir: &Path,
792    config: &ConfigSet,
793    odb: &Odb,
794    path: &str,
795    parent_trees: &[ObjectId],
796    result_tree: &ObjectId,
797    abbrev: usize,
798    context: usize,
799    use_cc_word: bool,
800    use_textconv: bool,
801    ws: CombinedDiffWsOptions,
802    combined_all_paths: bool,
803    parent_blob_paths: Option<&[String]>,
804    parent_sides: &[CombinedParentSide],
805    quote_path_fully: bool,
806) -> Option<String> {
807    if parent_trees.len() < 2 {
808        return None;
809    }
810    let parent_paths: Vec<&str> = if let Some(ps) = parent_blob_paths {
811        if ps.len() != parent_trees.len() {
812            return None;
813        }
814        ps.iter().map(|s| s.as_str()).collect()
815    } else {
816        vec![path; parent_trees.len()]
817    };
818
819    let mut parent_blobs = Vec::with_capacity(parent_trees.len());
820    let mut parent_oids = Vec::with_capacity(parent_trees.len());
821    for (i, t) in parent_trees.iter().enumerate() {
822        let p = parent_paths[i];
823        // A parent that lacks the path contributes /dev/null (empty content, zero OID): the path
824        // was added relative to that parent. Git still emits the combined patch (`AA`/`new file`),
825        // so do not bail out here.
826        match blob_oid_at_path(odb, t, p) {
827            Some(oid) => {
828                parent_blobs.push(read_blob(odb, &oid));
829                parent_oids.push(oid);
830            }
831            None => {
832                parent_blobs.push(Vec::new());
833                parent_oids.push(ObjectId::zero());
834            }
835        }
836    }
837    let result_blob = read_blob_at_path(odb, result_tree, path)?;
838    let roid = blob_oid_at_path(odb, result_tree, path)?;
839
840    let textconv_for_patch = use_textconv && diff_textconv_active(git_dir, config, path);
841    if !textconv_for_patch
842        && (parent_blobs
843            .iter()
844            .any(|b| is_binary_for_diff(git_dir, path, b))
845            || is_binary_for_diff(git_dir, path, &result_blob))
846    {
847        return Some(format_combined_binary(
848            path,
849            &parent_oids,
850            &roid,
851            abbrev,
852            use_cc_word,
853        ));
854    }
855
856    let mut parent_texts = Vec::with_capacity(parent_trees.len());
857    for (i, blob) in parent_blobs.iter().enumerate() {
858        let p = parent_paths[i];
859        let oid = &parent_oids[i];
860        let t = if textconv_for_patch {
861            blob_text_for_diff_with_oid(odb, git_dir, config, p, blob, oid, true)
862        } else {
863            blob_text_for_diff(git_dir, config, p, blob, use_textconv)
864        };
865        parent_texts.push(t);
866    }
867    let tr = if textconv_for_patch {
868        blob_text_for_diff_with_oid(odb, git_dir, config, path, &result_blob, &roid, true)
869    } else {
870        blob_text_for_diff(git_dir, config, path, &result_blob, use_textconv)
871    };
872
873    let idx: Vec<String> = parent_oids.iter().map(|o| abbrev_hex(o, abbrev)).collect();
874    let ra = abbrev_hex(&roid, abbrev);
875    let kind = if use_cc_word { "cc" } else { "combined" };
876
877    let header_paths: Vec<String> = if combined_all_paths {
878        parent_paths.iter().map(|s| (*s).to_string()).collect()
879    } else {
880        Vec::new()
881    };
882
883    let mut out = String::new();
884    out.push_str(&format!("diff --{kind} {path}\n"));
885    out.push_str(&format!("index {}..{ra}\n", idx.join(",")));
886    if combined_all_paths {
887        push_combined_file_headers(
888            &mut out,
889            path,
890            &header_paths,
891            parent_sides,
892            true,
893            quote_path_fully,
894        );
895    } else {
896        push_combined_file_headers(&mut out, path, &[], parent_sides, false, quote_path_fully);
897    }
898    out.push_str(&format_combined_diff_body(
899        &parent_texts,
900        &tr,
901        context,
902        use_cc_word,
903        ws,
904    ));
905    Some(out)
906}
907
908/// Combined `diff --cc` for an unmerged **gitlink** path when stage blobs are absent from the ODB
909/// (e.g. `t4027` synthetic `1ff…` / `2ff…` OIDs). Uses full hex in `Subproject commit` lines like Git.
910#[must_use]
911pub fn format_gitlink_unmerged_conflict_combined(
912    path: &str,
913    stage2_oid: &ObjectId,
914    stage3_oid: &ObjectId,
915    result_subproject_line: &str,
916    abbrev: usize,
917) -> String {
918    let p1a = abbrev_hex(stage2_oid, abbrev);
919    let p2a = abbrev_hex(stage3_oid, abbrev);
920    let z = crate::diff::zero_oid();
921    let za = abbrev_hex(&z, abbrev);
922
923    let t_ours = format!("Subproject commit {}", stage2_oid.to_hex());
924    let t_theirs = format!("Subproject commit {}", stage3_oid.to_hex());
925    let tr = result_subproject_line.trim_end_matches('\n').to_owned();
926
927    let mut out = String::new();
928    out.push_str(&format!("diff --cc {path}\n"));
929    out.push_str(&format!("index {p1a},{p2a}..{za}\n"));
930    out.push_str(&format!("--- a/{path}\n"));
931    out.push_str(&format!("+++ b/{path}\n"));
932    out.push_str(&combined_hunk_two_parents(&t_ours, &t_theirs, &tr));
933    out
934}
935
936/// `git diff` / `git diff --cc` during a conflict: worktree file with markers.
937#[allow(clippy::too_many_arguments)]
938pub fn format_worktree_conflict_combined(
939    git_dir: &Path,
940    config: &ConfigSet,
941    odb: &Odb,
942    path: &str,
943    stage1_oid: &ObjectId,
944    stage2_oid: &ObjectId,
945    stage3_oid: &ObjectId,
946    worktree_bytes: &[u8],
947    abbrev: usize,
948) -> String {
949    let ours_blob = read_blob(odb, stage2_oid);
950    let theirs_blob = read_blob(odb, stage3_oid);
951    let _base_blob = read_blob(odb, stage1_oid);
952
953    let use_conv = !worktree_bytes.contains(&0);
954    let textconv_cache_path = diff_textconv_active(git_dir, config, path);
955    let t_ours = if textconv_cache_path {
956        blob_text_for_diff_with_oid(odb, git_dir, config, path, &ours_blob, stage2_oid, true)
957    } else {
958        blob_text_for_diff(git_dir, config, path, &ours_blob, use_conv)
959    };
960    let t_theirs = if textconv_cache_path {
961        blob_text_for_diff_with_oid(odb, git_dir, config, path, &theirs_blob, stage3_oid, true)
962    } else {
963        blob_text_for_diff(git_dir, config, path, &theirs_blob, use_conv)
964    };
965    let wt_text = if textconv_cache_path || use_conv {
966        blob_text_for_diff(git_dir, config, path, worktree_bytes, true)
967    } else {
968        String::from_utf8_lossy(worktree_bytes).into_owned()
969    };
970    let wt_for_conflict = wt_text.clone();
971
972    let p1a = abbrev_hex(stage2_oid, abbrev);
973    let p2a = abbrev_hex(stage3_oid, abbrev);
974    let z = crate::diff::zero_oid();
975    let za = abbrev_hex(&z, abbrev);
976
977    let mut out = String::new();
978    out.push_str(&format!("diff --cc {path}\n"));
979    out.push_str(&format!("index {p1a},{p2a}..{za}\n"));
980    out.push_str(&format!("--- a/{path}\n"));
981    out.push_str(&format!("+++ b/{path}\n"));
982
983    if wt_text.contains("<<<<<<<") && wt_text.contains(">>>>>>>") {
984        out.push_str(&conflict_combined_body(&wt_for_conflict));
985    } else {
986        out.push_str(&format_combined_diff_body(
987            &[t_ours, t_theirs],
988            &wt_text,
989            3,
990            true,
991            CombinedDiffWsOptions::default(),
992        ));
993    }
994    out
995}
996
997/// Format the combined hunk for a worktree file that still contains conflict markers.
998fn conflict_combined_body(wt: &str) -> String {
999    let lines: Vec<&str> = wt.lines().collect();
1000    let mut body = String::new();
1001    let mut i = 0usize;
1002    while i < lines.len() {
1003        let line = lines[i];
1004        if line.starts_with("<<<<<<< ") {
1005            let mut hunk_new = 0u32;
1006            let mut ours_count = 0u32;
1007            let mut theirs_count = 0u32;
1008            body.push_str(&format!("++{line}\n"));
1009            hunk_new += 1;
1010            i += 1;
1011            while i < lines.len() && !lines[i].starts_with("=======") {
1012                body.push_str(&format!(" +{}\n", lines[i]));
1013                ours_count += 1;
1014                hunk_new += 1;
1015                i += 1;
1016            }
1017            if i < lines.len() && lines[i].starts_with("=======") {
1018                body.push_str("++=======\n");
1019                hunk_new += 1;
1020                i += 1;
1021            }
1022            while i < lines.len() && !lines[i].starts_with(">>>>>>>") {
1023                body.push_str(&format!("+ {}\n", lines[i]));
1024                theirs_count += 1;
1025                hunk_new += 1;
1026                i += 1;
1027            }
1028            if i < lines.len() {
1029                let closing = lines[i];
1030                body.push_str(&format!("++{closing}\n"));
1031                hunk_new += 1;
1032            }
1033            let header = format!(
1034                "@@@ -1,{} -1,{} +1,{} @@@\n",
1035                ours_count.max(1),
1036                theirs_count.max(1),
1037                hunk_new
1038            );
1039            return header + &body;
1040        }
1041        i += 1;
1042    }
1043    body
1044}
1045
1046/// For each line of `result`, whether that line is absent from `parent` per a line-oriented diff.
1047#[allow(dead_code)] // Reserved for tighter `--cc` hunk alignment with Git's `dump_sline`.
1048fn result_line_differs_from_parent(parent: &str, result: &str) -> Vec<bool> {
1049    let lr: Vec<&str> = result.lines().collect();
1050    let mut out = vec![false; lr.len()];
1051    let diff = TextDiff::configure().diff_lines(parent, result);
1052    for change in diff.iter_all_changes() {
1053        match change.tag() {
1054            ChangeTag::Equal => {}
1055            ChangeTag::Delete => {}
1056            ChangeTag::Insert => {
1057                let range = change.value().lines().count();
1058                let Some(start) = change.new_index() else {
1059                    continue;
1060                };
1061                for i in 0..range {
1062                    if let Some(slot) = out.get_mut(start + i) {
1063                        *slot = true;
1064                    }
1065                }
1066            }
1067        }
1068    }
1069    out
1070}
1071
1072/// Combined hunk body for two parents (Git `dump_sline` / `diff --cc` line prefixes).
1073///
1074/// Emits, like combine-diff.c `dump_sline`: first the per-parent deletion rows for any
1075/// parent line absent from the result (`-` in that parent's column, space in the other),
1076/// then the result rows with `+` in each column where the line differs from that parent.
1077fn combined_hunk_two_parents(a: &str, b: &str, result: &str) -> String {
1078    let la: Vec<&str> = a.lines().collect();
1079    let lb: Vec<&str> = b.lines().collect();
1080    let lr: Vec<&str> = result.lines().collect();
1081
1082    let old_a = la.len().max(1) as u32;
1083    let old_b = lb.len().max(1) as u32;
1084    let new_c = lr.len().max(1) as u32;
1085
1086    // Parent lines that do not survive into the result.
1087    let result_set: std::collections::HashSet<&&str> = lr.iter().collect();
1088
1089    let mut body = String::new();
1090    // Per-parent deletion rows: parent0 (column 0), then parent1 (column 1).
1091    for line in &la {
1092        if !result_set.contains(line) {
1093            body.push_str(&format!("- {line}\n"));
1094        }
1095    }
1096    for line in &lb {
1097        if !result_set.contains(line) {
1098            body.push_str(&format!(" -{line}\n"));
1099        }
1100    }
1101
1102    let d0 = result_line_differs_from_parent(a, result);
1103    let d1 = result_line_differs_from_parent(b, result);
1104    for (i, line) in lr.iter().enumerate() {
1105        let c0 = if d0.get(i).copied().unwrap_or(true) {
1106            '+'
1107        } else {
1108            ' '
1109        };
1110        let c1 = if d1.get(i).copied().unwrap_or(true) {
1111            '+'
1112        } else {
1113            ' '
1114        };
1115        body.push_str(&format!("{c0}{c1}{line}\n"));
1116    }
1117
1118    format!("@@@ -1,{old_a} -1,{old_b} +1,{new_c} @@@\n{body}")
1119}
1120
1121fn read_blob(odb: &Odb, oid: &ObjectId) -> Vec<u8> {
1122    if *oid == crate::diff::zero_oid() {
1123        return Vec::new();
1124    }
1125    odb.read(oid).map(|o| o.data).unwrap_or_default()
1126}
1127
1128/// Read the blob at `path` in `tree`, or `None` if missing.
1129#[must_use]
1130pub fn read_blob_at_path(odb: &Odb, tree: &ObjectId, path: &str) -> Option<Vec<u8>> {
1131    let oid = blob_oid_at_path(odb, tree, path)?;
1132    Some(read_blob(odb, &oid))
1133}
1134
1135/// OID of the blob at `path` in `tree`.
1136#[must_use]
1137pub fn blob_oid_at_path(odb: &Odb, tree: &ObjectId, path: &str) -> Option<ObjectId> {
1138    let mut current = *tree;
1139    let parts: Vec<&str> = path.split('/').collect();
1140    for (pi, part) in parts.iter().enumerate() {
1141        let obj = odb.read(&current).ok()?;
1142        let entries = crate::objects::parse_tree(&obj.data).ok()?;
1143        let found = entries
1144            .iter()
1145            .find(|e| std::str::from_utf8(&e.name).ok() == Some(*part))?;
1146        if pi + 1 == parts.len() {
1147            return Some(found.oid);
1148        }
1149        if found.mode != 0o040000 {
1150            return None;
1151        }
1152        current = found.oid;
1153    }
1154    None
1155}
1156
1157fn abbrev_hex(oid: &ObjectId, abbrev: usize) -> String {
1158    let hex = oid.to_hex();
1159    let len = abbrev.min(hex.len());
1160    hex[..len].to_owned()
1161}