Skip to main content

sley_diff_merge/
lib.rs

1use sley_core::{GitError, ObjectFormat, ObjectId, RepoPath, Result, object_id_for_bytes};
2
3pub mod range;
4pub mod render;
5pub mod ws;
6
7pub use sley_core::BString;
8use sley_index::{BorrowedIndex, Index, IndexStatCache};
9use sley_object::{Commit, EncodedObject, ObjectType, Tree, TreeEntries, TreeEntry};
10use sley_odb::{FileObjectDatabase, ObjectReader, ObjectWriter};
11use sley_refs::{FileRefStore, RefTarget};
12use std::collections::{BTreeMap, BTreeSet, HashMap};
13use std::fs;
14use std::path::{Path, PathBuf};
15
16// ===========================================================================
17// Gitlink (submodule) resolution helpers.
18//
19// A gitlink is a mode-160000 tree/index entry whose oid names the commit an
20// embedded repository has checked out. These helpers resolve, for a directory
21// in the working tree, (a) the embedded repository's git directory — either a
22// `.git` directory or a `.git` *file* carrying a `gitdir: <path>` pointer (the
23// layout `git submodule add`/`update` creates, pointing into the
24// superproject's `.git/modules/<name>`) — and (b) the commit its HEAD names.
25// They are the native equivalent of upstream's `resolve_gitlink_ref()`.
26// ===========================================================================
27
28/// Resolve the git directory of an embedded repository whose working tree is
29/// at `sub_root`. A `.git` directory is returned as-is; a `.git` file is
30/// followed through its `gitdir: <path>` pointer (a relative pointer resolves
31/// against `sub_root`). Returns `None` when there is no `.git` entry or the
32/// pointer does not name an existing directory.
33pub fn gitlink_git_dir(sub_root: &Path) -> Option<PathBuf> {
34    let dot_git = sub_root.join(".git");
35    let metadata = fs::symlink_metadata(&dot_git).ok()?;
36    if metadata.is_dir() {
37        return Some(dot_git);
38    }
39    if !metadata.is_file() {
40        return None;
41    }
42    let contents = fs::read_to_string(&dot_git).ok()?;
43    let target = contents.strip_prefix("gitdir:")?.trim();
44    if target.is_empty() {
45        return None;
46    }
47    let target = PathBuf::from(target);
48    let git_dir = if target.is_absolute() {
49        target
50    } else {
51        sub_root.join(target)
52    };
53    if git_dir.is_dir() {
54        Some(git_dir)
55    } else {
56        None
57    }
58}
59
60/// Resolve the commit checked out in the embedded repository at `sub_root`
61/// (the value a gitlink entry for that path records): its git directory's
62/// HEAD, followed through symbolic refs. `None` when `sub_root` is not a
63/// repository or its HEAD does not resolve to a commit (e.g. an unborn
64/// branch) — upstream's `resolve_gitlink_ref() < 0` case.
65pub fn gitlink_head_oid(sub_root: &Path, format: ObjectFormat) -> Option<ObjectId> {
66    let git_dir = gitlink_git_dir(sub_root)?;
67    let store = FileRefStore::new(&git_dir, format);
68    let mut target = store.read_ref("HEAD").ok()??;
69    // Follow symbolic-ref chains defensively (git caps the depth too).
70    for _ in 0..10 {
71        match target {
72            RefTarget::Direct(oid) => return Some(oid),
73            RefTarget::Symbolic(name) => target = store.read_ref(&name).ok()??,
74        }
75    }
76    None
77}
78
79// ===========================================================================
80// Line-level diff (Myers O(ND)) and 3-way blob merge (diff3).
81//
82// These operate purely on in-memory blobs and never touch the ODB or the
83// filesystem. They are the engine the CLI layers `git merge`, `cherry-pick`,
84// and `revert` on top of.
85// ===========================================================================
86
87/// A single line of a blob, slicing into the original buffer.
88///
89/// `content` includes the line's own trailing newline byte when present;
90/// `has_newline` records whether this line ended with `\n` in the source. Only
91/// the final line of a blob can have `has_newline == false` (a file with "no
92/// newline at end of file"). Comparing two `DiffLine`s for equality compares
93/// both the bytes and the trailing-newline flag, so a line that gained or lost
94/// its terminating newline is treated as a real change, matching git.
95#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub struct DiffLine<'a> {
97    /// The raw bytes of the line, including the trailing `\n` if it had one.
98    pub content: &'a [u8],
99    /// Whether the line was terminated by a newline in the source blob.
100    pub has_newline: bool,
101}
102
103impl<'a> DiffLine<'a> {
104    /// The line bytes without any trailing newline.
105    pub fn bytes_without_newline(&self) -> &'a [u8] {
106        if self.has_newline {
107            self.content.strip_suffix(b"\n").unwrap_or(self.content)
108        } else {
109            self.content
110        }
111    }
112}
113
114/// Split a blob into lines, preserving the exact bytes of each line.
115///
116/// Each returned [`DiffLine`] borrows from `blob`; its `content` includes the
117/// terminating `\n`. The returned vector is empty for an empty blob. A blob
118/// whose final byte is not `\n` yields a final line with `has_newline ==
119/// false` — git's "\ No newline at end of file" case.
120pub fn split_lines(blob: &[u8]) -> Vec<DiffLine<'_>> {
121    let mut lines = Vec::new();
122    let mut start = 0usize;
123    let len = blob.len();
124    let mut idx = 0usize;
125    while idx < len {
126        if blob[idx] == b'\n' {
127            lines.push(DiffLine {
128                content: &blob[start..=idx],
129                has_newline: true,
130            });
131            idx += 1;
132            start = idx;
133        } else {
134            idx += 1;
135        }
136    }
137    if start < len {
138        lines.push(DiffLine {
139            content: &blob[start..len],
140            has_newline: false,
141        });
142    }
143    lines
144}
145
146/// A run-length entry in a Myers edit script.
147///
148/// Each variant carries the number of consecutive lines it applies to:
149/// - [`DiffOp::Equal`] — `n` lines common to both `old` and `new`.
150/// - [`DiffOp::Delete`] — `n` lines present in `old` but not `new`.
151/// - [`DiffOp::Insert`] — `n` lines present in `new` but not `old`.
152///
153/// Walking the script in order and consuming `old`/`new` lines accordingly
154/// reconstructs `new` from `old`.
155#[derive(Debug, Clone, Copy, PartialEq, Eq)]
156pub enum DiffOp {
157    /// `n` lines are identical in both sequences.
158    Equal(usize),
159    /// `n` lines are removed from the old sequence.
160    Delete(usize),
161    /// `n` lines are added in the new sequence.
162    Insert(usize),
163}
164
165/// Compute a minimal line-level edit script transforming `old` into `new`
166/// using Myers' O(ND) difference algorithm.
167///
168/// Lines are compared for equality by their full bytes (see [`DiffLine`]). The
169/// result is a coalesced sequence of [`DiffOp`] runs; consecutive ops of the
170/// same kind are merged so the script is compact. The script is a standard
171/// (shortest-edit-script) diff: the number of `Delete` + `Insert` lines is
172/// minimal.
173pub fn myers_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
174    // Trim a common prefix and suffix first. This keeps the O(ND) search small
175    // for the typical case of a localized edit and does not affect minimality.
176    let n_total = old.len();
177    let m_total = new.len();
178    let mut prefix = 0usize;
179    while prefix < n_total && prefix < m_total && old[prefix] == new[prefix] {
180        prefix += 1;
181    }
182    let mut suffix = 0usize;
183    while suffix < n_total - prefix
184        && suffix < m_total - prefix
185        && old[n_total - 1 - suffix] == new[m_total - 1 - suffix]
186    {
187        suffix += 1;
188    }
189
190    let old_mid = &old[prefix..n_total - suffix];
191    let new_mid = &new[prefix..m_total - suffix];
192
193    let mut ops: Vec<DiffOp> = Vec::new();
194    if prefix > 0 {
195        ops.push(DiffOp::Equal(prefix));
196    }
197    myers_core(old_mid, new_mid, &mut ops);
198    if suffix > 0 {
199        ops.push(DiffOp::Equal(suffix));
200    }
201    coalesce_ops(ops)
202}
203
204/// Classic forward Myers O(ND) shortest-edit-script search over the trimmed
205/// sub-problem, followed by a backtrack through the stored traces.
206///
207/// `old`/`new` are the trimmed (no common prefix/suffix) line slices. Per-line
208/// ops are appended to `out` in order; they are coalesced by the caller. This
209/// is the algorithm from Myers' 1986 paper, which yields a shortest edit script
210/// (minimal number of insertions + deletions).
211fn myers_core(old: &[DiffLine<'_>], new: &[DiffLine<'_>], out: &mut Vec<DiffOp>) {
212    let n = old.len() as isize;
213    let m = new.len() as isize;
214    if n == 0 {
215        if m > 0 {
216            out.push(DiffOp::Insert(m as usize));
217        }
218        return;
219    }
220    if m == 0 {
221        out.push(DiffOp::Delete(n as usize));
222        return;
223    }
224
225    let max = (n + m) as usize;
226    let offset = max as isize; // shift so diagonal k maps to index (k + offset)
227    let width = 2 * max + 1;
228    // v[k + offset] holds the furthest-reaching x on diagonal k for the current d.
229    let mut v = vec![0isize; width];
230    // Save a snapshot of v after each d so we can backtrack the chosen path.
231    let mut trace: Vec<Vec<isize>> = Vec::new();
232
233    let mut found_d: Option<usize> = None;
234    'search: for d in 0..=(max as isize) {
235        trace.push(v.clone());
236        let mut k = -d;
237        while k <= d {
238            let kidx = (k + offset) as usize;
239            // Decide whether we arrived here by moving down (insert, from k+1)
240            // or right (delete, from k-1). Prefer the move that reaches further.
241            let mut x = if k == -d
242                || (k != d && v[(k - 1 + offset) as usize] < v[(k + 1 + offset) as usize])
243            {
244                // Move down: x stays, y increases (insertion from new).
245                v[(k + 1 + offset) as usize]
246            } else {
247                // Move right: x increases (deletion from old).
248                v[(k - 1 + offset) as usize] + 1
249            };
250            let mut y = x - k;
251            // Follow the diagonal (matching lines) as far as possible.
252            while x < n && y < m && old[x as usize] == new[y as usize] {
253                x += 1;
254                y += 1;
255            }
256            v[kidx] = x;
257            if x >= n && y >= m {
258                found_d = Some(d as usize);
259                break 'search;
260            }
261            k += 2;
262        }
263    }
264
265    // A shortest edit path always exists, so found_d is set; if somehow not,
266    // fall back to a delete-all/insert-all script (still correct, not minimal).
267    let Some(d_end) = found_d else {
268        out.push(DiffOp::Delete(n as usize));
269        out.push(DiffOp::Insert(m as usize));
270        return;
271    };
272
273    backtrack(n, m, &trace, d_end, offset, out);
274}
275
276/// Reconstruct the edit script from the saved Myers traces.
277///
278/// Walks backward from `(n, m)` to `(0, 0)`, emitting per-line `Delete`,
279/// `Insert`, and `Equal` ops, then reverses them into forward order before
280/// appending to `out`. `n`/`m` are the lengths of the (trimmed) old/new slices.
281fn backtrack(
282    n: isize,
283    m: isize,
284    trace: &[Vec<isize>],
285    d_end: usize,
286    offset: isize,
287    out: &mut Vec<DiffOp>,
288) {
289    let mut x = n;
290    let mut y = m;
291    let mut rev: Vec<DiffOp> = Vec::new();
292
293    for d in (0..=d_end).rev() {
294        let v = &trace[d];
295        let k = x - y;
296        // Determine the predecessor diagonal, mirroring the forward step rule.
297        let prev_k = if k == -(d as isize)
298            || (k != d as isize && v[(k - 1 + offset) as usize] < v[(k + 1 + offset) as usize])
299        {
300            k + 1 // came from a down move (insert)
301        } else {
302            k - 1 // came from a right move (delete)
303        };
304        let prev_x = v[(prev_k + offset) as usize];
305        let prev_y = prev_x - prev_k;
306
307        // Emit the diagonal (equal) moves taken after reaching the predecessor.
308        while x > prev_x && y > prev_y {
309            rev.push(DiffOp::Equal(1));
310            x -= 1;
311            y -= 1;
312        }
313        if d > 0 {
314            if x == prev_x {
315                // Down move: an insertion of new[prev_y].
316                rev.push(DiffOp::Insert(1));
317            } else {
318                // Right move: a deletion of old[prev_x].
319                rev.push(DiffOp::Delete(1));
320            }
321            x = prev_x;
322            y = prev_y;
323        }
324    }
325
326    rev.reverse();
327    out.extend(rev);
328}
329
330/// Merge adjacent ops of the same kind so the script is compact.
331fn coalesce_ops(ops: Vec<DiffOp>) -> Vec<DiffOp> {
332    let mut out: Vec<DiffOp> = Vec::with_capacity(ops.len());
333    for op in ops {
334        match (out.last_mut(), op) {
335            (Some(DiffOp::Equal(prev)), DiffOp::Equal(n)) => *prev += n,
336            (Some(DiffOp::Delete(prev)), DiffOp::Delete(n)) => *prev += n,
337            (Some(DiffOp::Insert(prev)), DiffOp::Insert(n)) => *prev += n,
338            _ => out.push(op),
339        }
340    }
341    out
342}
343
344// ===========================================================================
345// Whitespace-ignoring line comparison (git xdiff's XDF_WHITESPACE_FLAGS).
346//
347// git's xdiff compares two records (lines, including the trailing `\n`) for
348// equality under whitespace-ignore flags via `xdl_recmatch`. Rather than
349// re-implement the Myers core to take a custom equality predicate, we map each
350// flavour to a *canonicalization* of the line bytes that produces identical
351// output iff `xdl_recmatch` would return 1, then diff on the canonicalized
352// lines while emitting the original bytes. This is exact: it is a behavioural
353// port of `xdiff/xutils.c:xdl_recmatch` and `xdl_blankline`.
354// ===========================================================================
355
356/// Whitespace-ignore flags for line comparison, mirroring git's
357/// `XDF_WHITESPACE_FLAGS` (`-w`, `-b`, `--ignore-space-at-eol`,
358/// `--ignore-cr-at-eol`). Only one of the whitespace flavours is honoured per
359/// git's precedence (`-w` ⊃ `-b` ⊃ `--ignore-space-at-eol` ⊃
360/// `--ignore-cr-at-eol`); when several are set, the strongest wins, matching
361/// the cascade in `xdl_recmatch`.
362#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
363pub struct WsIgnore {
364    /// `-w` / `--ignore-all-space`: ignore all whitespace when comparing lines.
365    pub all_space: bool,
366    /// `-b` / `--ignore-space-change`: ignore changes in amount of whitespace.
367    pub space_change: bool,
368    /// `--ignore-space-at-eol`: ignore whitespace at end of line.
369    pub space_at_eol: bool,
370    /// `--ignore-cr-at-eol`: ignore a carriage-return at end of line.
371    pub cr_at_eol: bool,
372}
373
374impl WsIgnore {
375    /// True when no whitespace-ignore flavour is active.
376    pub fn is_empty(&self) -> bool {
377        !(self.all_space || self.space_change || self.space_at_eol || self.cr_at_eol)
378    }
379}
380
381/// `XDL_ISSPACE` — git uses C `isspace` over the unsigned byte (space, `\t`,
382/// `\n`, `\r`, `\x0b` vertical tab, `\x0c` form feed).
383#[inline]
384fn xdl_isspace(c: u8) -> bool {
385    matches!(c, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c)
386}
387
388/// Canonicalize a line's bytes (including any trailing `\n`) for whitespace-
389/// insensitive comparison, exactly mirroring `xdl_recmatch`'s acceptance set:
390/// two original lines are equal under `ignore` iff their canonical forms are
391/// byte-identical.
392///
393/// * `all_space` (`-w`): drop every whitespace byte.
394/// * `space_change` (`-b`): collapse each run of whitespace to a single `' '`
395///   and strip trailing whitespace (a run on one side matches a run on the
396///   other regardless of length; leading/internal whitespace must still align,
397///   trailing whitespace is dropped entirely).
398/// * `space_at_eol`: strip trailing whitespace only.
399/// * `cr_at_eol`: drop a single `\r` immediately before a terminating `\n`.
400///
401/// Exposed crate-internally so the change-compaction pass in [`crate::render`]
402/// can compare lines for sliding under the exact same equality the line-level
403/// diff uses (git's `recs_match` on the whitespace-canonicalized record).
404pub(crate) fn canonicalize_line_for_match(line: &[u8], ignore: WsIgnore) -> Vec<u8> {
405    canonicalize_line(line, ignore)
406}
407
408fn canonicalize_line(line: &[u8], ignore: WsIgnore) -> Vec<u8> {
409    if ignore.all_space {
410        return line.iter().copied().filter(|&c| !xdl_isspace(c)).collect();
411    }
412    if ignore.space_change {
413        let mut out = Vec::with_capacity(line.len());
414        let mut i = 0usize;
415        while i < line.len() {
416            if xdl_isspace(line[i]) {
417                // Collapse the whole whitespace run to a single space.
418                while i < line.len() && xdl_isspace(line[i]) {
419                    i += 1;
420                }
421                out.push(b' ');
422            } else {
423                out.push(line[i]);
424                i += 1;
425            }
426        }
427        // Strip a trailing collapsed-space (trailing whitespace is ignored).
428        if out.last() == Some(&b' ') {
429            out.pop();
430        }
431        return out;
432    }
433    if ignore.space_at_eol {
434        let mut end = line.len();
435        while end > 0 && xdl_isspace(line[end - 1]) {
436            end -= 1;
437        }
438        return line[..end].to_vec();
439    }
440    if ignore.cr_at_eol {
441        // Drop a `\r` directly before a terminating `\n`.
442        if let Some(stripped) = line.strip_suffix(b"\n") {
443            if let Some(without_cr) = stripped.strip_suffix(b"\r") {
444                let mut out = without_cr.to_vec();
445                out.push(b'\n');
446                return out;
447            }
448        } else if let Some(without_cr) = line.strip_suffix(b"\r") {
449            // Incomplete final line: a bare trailing `\r` is also ignored.
450            return without_cr.to_vec();
451        }
452        return line.to_vec();
453    }
454    line.to_vec()
455}
456
457/// `xdl_blankline`: a line is "blank" when, after applying the active
458/// whitespace flags, it has no content. With no whitespace flags, git treats a
459/// record of size ≤ 1 (empty, or a lone `\n`) as blank; with flags, a line all
460/// of whose bytes are whitespace is blank.
461fn line_is_blank(line: &[u8], ignore: WsIgnore) -> bool {
462    if ignore.is_empty() {
463        line.len() <= 1
464    } else {
465        line.iter().all(|&c| xdl_isspace(c))
466    }
467}
468
469/// Compute a line-level edit script transforming `old` into `new`, comparing
470/// lines under the whitespace-ignore flags `ignore` while the returned ops
471/// still index the *original* lines position-for-position.
472///
473/// When `ignore.is_empty()`, this is identical to [`myers_diff_lines`]. With
474/// flags, lines are canonicalized (see [`canonicalize_line`]) for the equality
475/// test only; the ops consume the same number of old/new lines as the originals
476/// so the caller can render the original bytes.
477pub fn myers_diff_lines_ws(
478    old: &[DiffLine<'_>],
479    new: &[DiffLine<'_>],
480    ignore: WsIgnore,
481    algorithm: DiffAlgorithm,
482) -> Vec<DiffOp> {
483    if ignore.is_empty() {
484        return diff_lines_with_algorithm(old, new, algorithm);
485    }
486    let old_canon: Vec<Vec<u8>> = old
487        .iter()
488        .map(|l| canonicalize_line(l.content, ignore))
489        .collect();
490    let new_canon: Vec<Vec<u8>> = new
491        .iter()
492        .map(|l| canonicalize_line(l.content, ignore))
493        .collect();
494    let old_lines: Vec<DiffLine<'_>> = old_canon
495        .iter()
496        .map(|c| DiffLine {
497            content: c.as_slice(),
498            has_newline: true,
499        })
500        .collect();
501    let new_lines: Vec<DiffLine<'_>> = new_canon
502        .iter()
503        .map(|c| DiffLine {
504            content: c.as_slice(),
505            has_newline: true,
506        })
507        .collect();
508    diff_lines_with_algorithm(&old_lines, &new_lines, algorithm)
509}
510
511// ===========================================================================
512// Alternative diff algorithms: patience and histogram.
513//
514// Both share the recursive "anchor and recurse" shape used by git's xdiff
515// implementations of `--patience` and `--histogram`:
516//
517//   1. trim the common prefix and suffix of the current line range,
518//   2. pick one or more common lines that are confidently aligned (the
519//      "anchors") according to the algorithm's rule,
520//   3. recurse on the gaps to the left of, between, and to the right of the
521//      anchors,
522//   4. when no anchor can be found, fall back to the Myers shortest-edit-script
523//      search for that range so the result is still a valid LCS-correct diff.
524//
525// They operate purely on slices of [`DiffLine`]s and emit the same coalesced
526// [`DiffOp`] run sequence as [`myers_diff_lines`], so any caller can swap
527// algorithms freely. The two functions differ only in the anchor-selection
528// rule in steps 2/3.
529// ===========================================================================
530
531/// A hashable key for a line, used to bucket equal lines when finding anchors.
532///
533/// Mirrors [`DiffLine`]'s `PartialEq`: two lines are the same iff their bytes
534/// and their trailing-newline flag match. Keying on this tuple lets us hash
535/// lines without changing the public [`DiffLine`] type.
536type LineKey<'a> = (&'a [u8], bool);
537
538#[inline]
539fn line_key<'a>(line: &DiffLine<'a>) -> LineKey<'a> {
540    (line.content, line.has_newline)
541}
542
543/// Compute a line-level edit script transforming `old` into `new` using the
544/// patience diff algorithm (Bram Cohen's algorithm, as in `git diff
545/// --patience`).
546///
547/// Patience diff anchors on lines that occur *exactly once* in both `old` and
548/// `new`; it aligns those unique lines via a longest-increasing-subsequence
549/// ("patience sorting") pass and recurses into the gaps, falling back to Myers
550/// when a gap has no unique common line. The result is a valid LCS-correct edit
551/// script with the same shape as [`myers_diff_lines`]: walking it reconstructs
552/// `new` from `old`, and every [`DiffOp::Equal`] run covers genuinely equal
553/// lines. Patience tends to produce more human-readable hunks than Myers when
554/// blocks of lines are moved or repeated, though it is not guaranteed to be a
555/// shortest edit script.
556pub fn patience_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
557    let mut ops: Vec<DiffOp> = Vec::new();
558    patience_recurse(old, new, 0, old.len(), 0, new.len(), &mut ops);
559    coalesce_ops(ops)
560}
561
562/// Compute a line-level edit script transforming `old` into `new` using the
563/// histogram diff algorithm (as in `git diff --histogram`, derived from JGit).
564///
565/// Histogram diff is a patience-style unique-anchor algorithm with a fallback:
566/// it builds an occurrence histogram of `old` and, scanning `new`, picks the
567/// longest run of matching lines whose `old` line has the *fewest* occurrences
568/// (preferring truly unique lines, like patience, but still able to anchor on
569/// low-frequency lines when no globally-unique line exists). It then recurses
570/// on the regions on either side of that run, falling back to Myers only when
571/// no common line exists in a region. The result is a valid LCS-correct edit
572/// script with the same shape as [`myers_diff_lines`].
573pub fn histogram_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
574    let mut ops: Vec<DiffOp> = Vec::new();
575    histogram_recurse(old, new, 0, old.len(), 0, new.len(), &mut ops);
576    coalesce_ops(ops)
577}
578
579/// Dispatch to the line-diff implementation selected by `algorithm`.
580///
581/// All variants return the same coalesced [`DiffOp`] run sequence as
582/// [`myers_diff_lines`], so callers can switch algorithms without changing how
583/// they consume the result.
584///
585/// - [`DiffAlgorithm::Myers`] and [`DiffAlgorithm::Minimal`] use the Myers
586///   O(ND) shortest-edit-script search ([`myers_diff_lines`]); that search is
587///   already minimal in deletions + insertions, so `Minimal` is an alias for
588///   it here rather than a distinct slower mode.
589/// - [`DiffAlgorithm::Patience`] uses [`patience_diff_lines`].
590/// - [`DiffAlgorithm::Histogram`] uses [`histogram_diff_lines`].
591pub fn diff_lines_with_algorithm(
592    old: &[DiffLine<'_>],
593    new: &[DiffLine<'_>],
594    algorithm: DiffAlgorithm,
595) -> Vec<DiffOp> {
596    match algorithm {
597        DiffAlgorithm::Myers | DiffAlgorithm::Minimal => myers_diff_lines(old, new),
598        DiffAlgorithm::Patience => patience_diff_lines(old, new),
599        DiffAlgorithm::Histogram => histogram_diff_lines(old, new),
600    }
601}
602
603/// Emit ops for an empty-on-one-side range; returns `true` if it handled it.
604///
605/// Covers the recursion base cases where one side of `old[a0..a1]` /
606/// `new[b0..b1]` is empty: a pure deletion, a pure insertion, or nothing at
607/// all. Used by both the patience and histogram recursions before they look
608/// for an anchor.
609fn emit_trivial_range(a0: usize, a1: usize, b0: usize, b1: usize, out: &mut Vec<DiffOp>) -> bool {
610    let old_len = a1 - a0;
611    let new_len = b1 - b0;
612    if old_len == 0 && new_len == 0 {
613        return true;
614    }
615    if old_len == 0 {
616        out.push(DiffOp::Insert(new_len));
617        return true;
618    }
619    if new_len == 0 {
620        out.push(DiffOp::Delete(old_len));
621        return true;
622    }
623    false
624}
625
626/// Trim the common prefix/suffix of `old[a0..a1]` vs `new[b0..b1]`.
627///
628/// Emits an `Equal` for the matched prefix immediately, returns the inner
629/// (still-differing) range, and reports the matched-suffix length so the caller
630/// can emit its `Equal` *after* it has processed the inner range. This keeps
631/// the per-range work proportional to the actual edit, mirroring the prefix /
632/// suffix trim in [`myers_diff_lines`].
633fn trim_common(
634    old: &[DiffLine<'_>],
635    new: &[DiffLine<'_>],
636    mut a0: usize,
637    mut a1: usize,
638    mut b0: usize,
639    mut b1: usize,
640    out: &mut Vec<DiffOp>,
641) -> (usize, usize, usize, usize, usize) {
642    let mut prefix = 0usize;
643    while a0 < a1 && b0 < b1 && old[a0] == new[b0] {
644        a0 += 1;
645        b0 += 1;
646        prefix += 1;
647    }
648    if prefix > 0 {
649        out.push(DiffOp::Equal(prefix));
650    }
651    let mut suffix = 0usize;
652    while a1 > a0 && b1 > b0 && old[a1 - 1] == new[b1 - 1] {
653        a1 -= 1;
654        b1 -= 1;
655        suffix += 1;
656    }
657    (a0, a1, b0, b1, suffix)
658}
659
660/// Recursive patience-diff worker over `old[a0..a1]` vs `new[b0..b1]`.
661fn patience_recurse(
662    old: &[DiffLine<'_>],
663    new: &[DiffLine<'_>],
664    a0: usize,
665    a1: usize,
666    b0: usize,
667    b1: usize,
668    out: &mut Vec<DiffOp>,
669) {
670    if emit_trivial_range(a0, a1, b0, b1, out) {
671        return;
672    }
673    let (a0, a1, b0, b1, suffix) = trim_common(old, new, a0, a1, b0, b1, out);
674    if !emit_trivial_range(a0, a1, b0, b1, out) {
675        match patience_anchors(old, new, a0, a1, b0, b1) {
676            Some(anchors) => {
677                // Walk the aligned anchors in order, recursing into each gap
678                // before emitting the anchor line as Equal.
679                let mut cur_a = a0;
680                let mut cur_b = b0;
681                for (ai, bi) in anchors {
682                    patience_recurse(old, new, cur_a, ai, cur_b, bi, out);
683                    out.push(DiffOp::Equal(1));
684                    cur_a = ai + 1;
685                    cur_b = bi + 1;
686                }
687                // Tail after the last anchor.
688                patience_recurse(old, new, cur_a, a1, cur_b, b1, out);
689            }
690            // No unique common line in this range: defer to Myers, which always
691            // yields a valid (and minimal) script for the leftover block.
692            None => myers_core(&old[a0..a1], &new[b0..b1], out),
693        }
694    }
695    if suffix > 0 {
696        out.push(DiffOp::Equal(suffix));
697    }
698}
699
700/// Find the patience anchors for `old[a0..a1]` vs `new[b0..b1]`.
701///
702/// An anchor is a line that occurs exactly once in `old[a0..a1]` and exactly
703/// once in `new[b0..b1]`. The matched (old_index, new_index) pairs are reduced
704/// to their longest increasing subsequence by new-index (the patience-sort LCS)
705/// so the returned anchors are strictly increasing in *both* indices and can be
706/// used as split points. Returns `None` when there are no such unique common
707/// lines (the caller then falls back to Myers).
708fn patience_anchors(
709    old: &[DiffLine<'_>],
710    new: &[DiffLine<'_>],
711    a0: usize,
712    a1: usize,
713    b0: usize,
714    b1: usize,
715) -> Option<Vec<(usize, usize)>> {
716    // Count occurrences and remember the (single) position of each line in each
717    // side's range. `count > 1` poisons the position so we can ignore it.
718    struct Occ {
719        count: usize,
720        pos: usize,
721    }
722    let mut in_old: HashMap<LineKey<'_>, Occ> = HashMap::new();
723    for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
724        in_old
725            .entry(line_key(line))
726            .and_modify(|o| o.count += 1)
727            .or_insert(Occ { count: 1, pos: i });
728    }
729    let mut in_new: HashMap<LineKey<'_>, Occ> = HashMap::new();
730    for (j, line) in new.iter().enumerate().take(b1).skip(b0) {
731        in_new
732            .entry(line_key(line))
733            .and_modify(|o| o.count += 1)
734            .or_insert(Occ { count: 1, pos: j });
735    }
736
737    // Collect lines unique in both, ordered by their position in `old`.
738    let mut pairs: Vec<(usize, usize)> = Vec::new();
739    for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
740        let key = line_key(line);
741        let Some(o) = in_old.get(&key) else { continue };
742        if o.count != 1 || o.pos != i {
743            continue;
744        }
745        // A line unique in both ranges is a candidate anchor.
746        if let Some(n) = in_new.get(&key)
747            && n.count == 1
748        {
749            pairs.push((i, n.pos));
750        }
751    }
752    if pairs.is_empty() {
753        return None;
754    }
755
756    // Patience sort: longest increasing subsequence of new-indices. `pairs` is
757    // already sorted by old-index, so an LIS by new-index yields a set of
758    // anchors increasing in both coordinates.
759    let lis = longest_increasing_by_new(&pairs);
760    if lis.is_empty() { None } else { Some(lis) }
761}
762
763/// Longest increasing subsequence of `pairs` (sorted by old-index) keyed on the
764/// new-index, returned as the chosen (old_index, new_index) pairs in order.
765///
766/// This is the patience-sorting core: standard O(k log k) LIS with predecessor
767/// links so the actual subsequence (not just its length) is recovered. Because
768/// the input is pre-sorted by old-index and the new-indices are distinct, the
769/// result is strictly increasing in both coordinates.
770fn longest_increasing_by_new(pairs: &[(usize, usize)]) -> Vec<(usize, usize)> {
771    if pairs.is_empty() {
772        return Vec::new();
773    }
774    // tails[len-1] = index into `pairs` of the smallest possible tail value of
775    // an increasing subsequence of length `len`.
776    let mut tails: Vec<usize> = Vec::new();
777    // prev[i] = index into `pairs` of the predecessor of pairs[i] in its LIS.
778    let mut prev: Vec<Option<usize>> = vec![None; pairs.len()];
779
780    for i in 0..pairs.len() {
781        let val = pairs[i].1;
782        // Binary search for the first tail whose new-index is >= val.
783        let mut lo = 0usize;
784        let mut hi = tails.len();
785        while lo < hi {
786            let mid = lo + (hi - lo) / 2;
787            if pairs[tails[mid]].1 < val {
788                lo = mid + 1;
789            } else {
790                hi = mid;
791            }
792        }
793        if lo > 0 {
794            prev[i] = Some(tails[lo - 1]);
795        }
796        if lo == tails.len() {
797            tails.push(i);
798        } else {
799            tails[lo] = i;
800        }
801    }
802
803    // Reconstruct by following predecessor links from the last tail.
804    let mut result: Vec<(usize, usize)> = Vec::with_capacity(tails.len());
805    let mut cur = tails.last().copied();
806    while let Some(i) = cur {
807        result.push(pairs[i]);
808        cur = prev[i];
809    }
810    result.reverse();
811    result
812}
813
814/// Recursive histogram-diff worker over `old[a0..a1]` vs `new[b0..b1]`.
815fn histogram_recurse(
816    old: &[DiffLine<'_>],
817    new: &[DiffLine<'_>],
818    a0: usize,
819    a1: usize,
820    b0: usize,
821    b1: usize,
822    out: &mut Vec<DiffOp>,
823) {
824    if emit_trivial_range(a0, a1, b0, b1, out) {
825        return;
826    }
827    let (a0, a1, b0, b1, suffix) = trim_common(old, new, a0, a1, b0, b1, out);
828    if !emit_trivial_range(a0, a1, b0, b1, out) {
829        match histogram_region(old, new, a0, a1, b0, b1) {
830            Some(region) => {
831                // Recurse left of the matched run, emit the run as Equal, then
832                // recurse right of it.
833                histogram_recurse(old, new, a0, region.old_start, b0, region.new_start, out);
834                out.push(DiffOp::Equal(region.len));
835                histogram_recurse(
836                    old,
837                    new,
838                    region.old_start + region.len,
839                    a1,
840                    region.new_start + region.len,
841                    b1,
842                    out,
843                );
844            }
845            // No common line at all in this range: hand it to Myers.
846            None => myers_core(&old[a0..a1], &new[b0..b1], out),
847        }
848    }
849    if suffix > 0 {
850        out.push(DiffOp::Equal(suffix));
851    }
852}
853
854/// The longest common run chosen by the histogram heuristic for one range.
855struct HistogramRegion {
856    old_start: usize,
857    new_start: usize,
858    len: usize,
859}
860
861/// Choose the histogram anchor run for `old[a0..a1]` vs `new[b0..b1]`.
862///
863/// Builds an occurrence histogram of the `old` range, then scans the `new`
864/// range. For each `new` line that also appears in `old`, it extends a matching
865/// run backward and forward and scores candidate alignments, preferring the run
866/// whose anchoring `old` line has the *fewest* occurrences (ties broken by run
867/// length, then by earliest position). This is the JGit/`git --histogram`
868/// heuristic: rare lines make the most reliable anchors. Returns `None` if no
869/// `new` line appears in the `old` range.
870fn histogram_region(
871    old: &[DiffLine<'_>],
872    new: &[DiffLine<'_>],
873    a0: usize,
874    a1: usize,
875    b0: usize,
876    b1: usize,
877) -> Option<HistogramRegion> {
878    // Occurrence count and the list of positions of each line within old[a0..a1].
879    let mut buckets: HashMap<LineKey<'_>, Vec<usize>> = HashMap::new();
880    for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
881        buckets.entry(line_key(line)).or_default().push(i);
882    }
883
884    let mut best: Option<HistogramRegion> = None;
885    // Lower occurrence count is better; among equal counts, longer run wins.
886    let mut best_count = usize::MAX;
887    let mut best_len = 0usize;
888
889    let mut bj = b0;
890    while bj < b1 {
891        let key = line_key(&new[bj]);
892        let Some(positions) = buckets.get(&key) else {
893            bj += 1;
894            continue;
895        };
896        let occ = positions.len();
897        // For every place this line sits in `old`, measure the maximal matching
898        // run that passes through (positions[*], bj).
899        let mut next_bj = bj + 1;
900        for &ai in positions {
901            // Extend backward while lines keep matching and we stay in range.
902            let mut start_a = ai;
903            let mut start_b = bj;
904            while start_a > a0 && start_b > b0 && old[start_a - 1] == new[start_b - 1] {
905                start_a -= 1;
906                start_b -= 1;
907            }
908            // Extend forward from the run start.
909            let mut len = 0usize;
910            while start_a + len < a1
911                && start_b + len < b1
912                && old[start_a + len] == new[start_b + len]
913            {
914                len += 1;
915            }
916            // Score this run by the rarest occurrence count along it; using the
917            // anchor line's own count is the standard, cheaper approximation.
918            let run_count = occ;
919            let better = run_count < best_count || (run_count == best_count && len > best_len);
920            if better && len > 0 {
921                best_count = run_count;
922                best_len = len;
923                best = Some(HistogramRegion {
924                    old_start: start_a,
925                    new_start: start_b,
926                    len,
927                });
928                // Skip past this matched run in `new` so we do not re-evaluate
929                // every interior line of the same run from scratch.
930                if start_b + len > next_bj {
931                    next_bj = start_b + len;
932                }
933            }
934        }
935        bj = next_bj.max(bj + 1);
936    }
937
938    best
939}
940
941/// Which conflict-marker style [`merge_blobs`] emits.
942#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
943pub enum ConflictStyle {
944    /// Standard two-section markers (`<<<<<<<` / `=======` / `>>>>>>>`).
945    #[default]
946    Merge,
947    /// `diff3` style: also include the common-ancestor section between `ours`
948    /// and the `=======` divider, delimited by `|||||||`.
949    Diff3,
950}
951
952/// Labels and style controlling [`merge_blobs`] conflict markers.
953#[derive(Debug, Clone, Copy)]
954pub struct MergeBlobOptions<'a> {
955    /// Label after the opening `<<<<<<<` marker (typically the local branch).
956    pub ours_label: &'a str,
957    /// Label after the closing `>>>>>>>` marker (typically the other branch).
958    pub theirs_label: &'a str,
959    /// Label after the `|||||||` marker (only used for [`ConflictStyle::Diff3`]).
960    pub base_label: &'a str,
961    /// Which marker style to emit.
962    pub style: ConflictStyle,
963}
964
965impl Default for MergeBlobOptions<'_> {
966    fn default() -> Self {
967        Self {
968            ours_label: "ours",
969            theirs_label: "theirs",
970            base_label: "base",
971            style: ConflictStyle::Merge,
972        }
973    }
974}
975
976/// The outcome of a 3-way blob merge.
977#[derive(Debug, Clone, PartialEq, Eq)]
978pub struct MergeBlobResult {
979    /// The merged blob bytes, including any conflict markers.
980    pub content: Vec<u8>,
981    /// True when at least one region conflicted and markers were written.
982    pub conflicted: bool,
983}
984
985/// Perform a 3-way merge of three blobs using the diff3 algorithm.
986///
987/// `base` is the common ancestor; `ours` and `theirs` are the two sides. The
988/// merge diffs base→ours and base→theirs (with [`myers_diff_lines`]) and walks
989/// the base in lockstep:
990/// - regions unchanged on both sides emit the base lines unchanged;
991/// - regions changed on exactly one side take that side's lines;
992/// - regions changed on both sides emit the side lines if they are
993///   byte-identical, otherwise a conflict (and [`MergeBlobResult::conflicted`]
994///   is set).
995///
996/// An empty `base` is supported: every line is then "added on both sides", so
997/// the result is the shared content if `ours == theirs`, else a single
998/// conflict (add/add).
999pub fn merge_blobs(
1000    base: &[u8],
1001    ours: &[u8],
1002    theirs: &[u8],
1003    options: &MergeBlobOptions<'_>,
1004) -> MergeBlobResult {
1005    let base_lines = split_lines(base);
1006    let ours_lines = split_lines(ours);
1007    let theirs_lines = split_lines(theirs);
1008
1009    // Per-side matched (equal) base regions, paired with the corresponding side
1010    // ranges, computed via Myers.
1011    let ours_matches = matching_regions(&base_lines, &ours_lines);
1012    let theirs_matches = matching_regions(&base_lines, &theirs_lines);
1013
1014    // Intersect the two match lists to get segments of base that are unchanged
1015    // on BOTH sides, each carrying the exact aligned side indices. Between these
1016    // common-stable segments lie the (potentially conflicting) changed regions.
1017    let stable = common_stable_segments(&ours_matches, &theirs_matches);
1018
1019    let mut writer = MergeWriter::new(options);
1020    // Cursors: next unconsumed line in base, ours, theirs.
1021    let mut base_idx = 0usize;
1022    let mut our_idx = 0usize;
1023    let mut their_idx = 0usize;
1024
1025    for seg in &stable {
1026        // Unstable (changed) region preceding this stable segment.
1027        let base_region = &base_lines[base_idx..seg.base_start];
1028        let our_region = &ours_lines[our_idx..seg.ours_start];
1029        let their_region = &theirs_lines[their_idx..seg.theirs_start];
1030        emit_region(&mut writer, base_region, our_region, their_region);
1031
1032        // The stable segment itself is identical on all three: emit base lines.
1033        writer.emit_lines(&base_lines[seg.base_start..seg.base_start + seg.len]);
1034
1035        base_idx = seg.base_start + seg.len;
1036        our_idx = seg.ours_start + seg.len;
1037        their_idx = seg.theirs_start + seg.len;
1038    }
1039
1040    // Trailing unstable region after the last stable segment (or the whole input
1041    // when there are no common-stable segments).
1042    emit_region(
1043        &mut writer,
1044        &base_lines[base_idx..],
1045        &ours_lines[our_idx..],
1046        &theirs_lines[their_idx..],
1047    );
1048
1049    writer.finish()
1050}
1051
1052/// Resolve and emit one changed region (the gap between two common-stable
1053/// segments) according to diff3 rules.
1054fn emit_region(
1055    writer: &mut MergeWriter<'_>,
1056    base_region: &[DiffLine<'_>],
1057    our_region: &[DiffLine<'_>],
1058    their_region: &[DiffLine<'_>],
1059) {
1060    if our_region.is_empty() && their_region.is_empty() {
1061        return;
1062    }
1063    let our_changed = our_region != base_region;
1064    let their_changed = their_region != base_region;
1065    match (our_changed, their_changed) {
1066        (false, false) => writer.emit_lines(base_region),
1067        (true, false) => writer.emit_lines(our_region),
1068        (false, true) => writer.emit_lines(their_region),
1069        (true, true) => {
1070            if our_region == their_region {
1071                // Both sides made the same change: no conflict.
1072                writer.emit_lines(our_region);
1073            } else {
1074                writer.emit_conflict(our_region, base_region, their_region);
1075            }
1076        }
1077    }
1078}
1079
1080/// A matched (equal) region between `base` and one side: `base_start..+len`
1081/// lines of base equal `side_start..+len` lines of that side.
1082#[derive(Debug, Clone, Copy)]
1083struct MatchRegion {
1084    base_start: usize,
1085    side_start: usize,
1086    len: usize,
1087}
1088
1089/// A run of base lines unchanged on *both* sides, with the aligned side starts.
1090#[derive(Debug, Clone, Copy)]
1091struct StableSegment {
1092    base_start: usize,
1093    ours_start: usize,
1094    theirs_start: usize,
1095    len: usize,
1096}
1097
1098/// Compute the matched regions between base and a side using [`myers_diff_lines`].
1099///
1100/// Each `Equal(n)` run becomes a [`MatchRegion`]; the regions are returned in
1101/// increasing base order. (Equal runs are coalesced by the diff, so adjacent
1102/// regions are already maximal.)
1103fn matching_regions(base: &[DiffLine<'_>], side: &[DiffLine<'_>]) -> Vec<MatchRegion> {
1104    let ops = myers_diff_lines(base, side);
1105    let mut regions = Vec::new();
1106    let mut base_idx = 0usize;
1107    let mut side_idx = 0usize;
1108    for op in ops {
1109        match op {
1110            DiffOp::Equal(n) => {
1111                regions.push(MatchRegion {
1112                    base_start: base_idx,
1113                    side_start: side_idx,
1114                    len: n,
1115                });
1116                base_idx += n;
1117                side_idx += n;
1118            }
1119            DiffOp::Delete(n) => base_idx += n,
1120            DiffOp::Insert(n) => side_idx += n,
1121        }
1122    }
1123    regions
1124}
1125
1126/// Intersect the ours/theirs match lists (both in base coordinates) to find the
1127/// base ranges unchanged on both sides, recording the aligned side indices.
1128///
1129/// For each overlapping pair of base ranges `[bs, be)` the ours-side index of
1130/// `bs` is `o.side_start + (bs - o.base_start)` and likewise for theirs; both
1131/// map contiguously across the overlap. The returned segments are in increasing
1132/// base order and never overlap.
1133fn common_stable_segments(ours: &[MatchRegion], theirs: &[MatchRegion]) -> Vec<StableSegment> {
1134    let mut segments = Vec::new();
1135    let mut oi = 0usize;
1136    let mut ti = 0usize;
1137    while oi < ours.len() && ti < theirs.len() {
1138        let o = ours[oi];
1139        let t = theirs[ti];
1140        let o_end = o.base_start + o.len;
1141        let t_end = t.base_start + t.len;
1142        let lo = o.base_start.max(t.base_start);
1143        let hi = o_end.min(t_end);
1144        if lo < hi {
1145            segments.push(StableSegment {
1146                base_start: lo,
1147                ours_start: o.side_start + (lo - o.base_start),
1148                theirs_start: t.side_start + (lo - t.base_start),
1149                len: hi - lo,
1150            });
1151        }
1152        // Advance whichever range ends first.
1153        if o_end <= t_end {
1154            oi += 1;
1155        } else {
1156            ti += 1;
1157        }
1158    }
1159    segments
1160}
1161
1162/// Accumulates merged output and renders conflict markers byte-for-byte like
1163/// upstream git.
1164struct MergeWriter<'a> {
1165    out: Vec<u8>,
1166    conflicted: bool,
1167    options: &'a MergeBlobOptions<'a>,
1168}
1169
1170impl<'a> MergeWriter<'a> {
1171    fn new(options: &'a MergeBlobOptions<'a>) -> Self {
1172        Self {
1173            out: Vec::new(),
1174            conflicted: false,
1175            options,
1176        }
1177    }
1178
1179    /// Append raw line bytes (each line already carries its own newline, except
1180    /// possibly a final no-newline line).
1181    fn emit_lines(&mut self, lines: &[DiffLine<'_>]) {
1182        for line in lines {
1183            self.out.extend_from_slice(line.content);
1184        }
1185    }
1186
1187    /// Emit a conflict hunk. Conflict markers always begin on their own line,
1188    /// so if the preceding emitted content did not end in a newline (a
1189    /// no-newline-at-end side), insert one first — matching git, which prints
1190    /// the "\ No newline at end of file" content followed by a newline before
1191    /// the next marker.
1192    fn emit_conflict(
1193        &mut self,
1194        ours: &[DiffLine<'_>],
1195        base: &[DiffLine<'_>],
1196        theirs: &[DiffLine<'_>],
1197    ) {
1198        self.conflicted = true;
1199        self.write_marker(b'<', self.options.ours_label);
1200        self.emit_section(ours);
1201        if self.options.style == ConflictStyle::Diff3 {
1202            self.ensure_newline();
1203            self.write_marker(b'|', self.options.base_label);
1204            self.emit_section(base);
1205        }
1206        self.ensure_newline();
1207        self.write_divider();
1208        self.emit_section(theirs);
1209        self.ensure_newline();
1210        self.write_marker(b'>', self.options.theirs_label);
1211    }
1212
1213    /// Emit one side's lines inside a conflict, preserving their exact bytes.
1214    fn emit_section(&mut self, lines: &[DiffLine<'_>]) {
1215        for line in lines {
1216            self.out.extend_from_slice(line.content);
1217        }
1218    }
1219
1220    /// Ensure the buffer ends with a newline before writing the next marker, so
1221    /// markers always start a fresh line even after a no-newline final line.
1222    fn ensure_newline(&mut self) {
1223        if !self.out.is_empty() && self.out.last() != Some(&b'\n') {
1224            self.out.push(b'\n');
1225        }
1226    }
1227
1228    /// Write a marker line: 7 copies of `ch`, then (if the label is non-empty)
1229    /// a space and the label, then a newline. No trailing space for an empty
1230    /// label — byte-for-byte with upstream git.
1231    fn write_marker(&mut self, ch: u8, label: &str) {
1232        for _ in 0..7 {
1233            self.out.push(ch);
1234        }
1235        if !label.is_empty() {
1236            self.out.push(b' ');
1237            self.out.extend_from_slice(label.as_bytes());
1238        }
1239        self.out.push(b'\n');
1240    }
1241
1242    /// Write the `=======` divider line (never labelled).
1243    fn write_divider(&mut self) {
1244        for _ in 0..7 {
1245            self.out.push(b'=');
1246        }
1247        self.out.push(b'\n');
1248    }
1249
1250    fn finish(self) -> MergeBlobResult {
1251        MergeBlobResult {
1252            content: self.out,
1253            conflicted: self.conflicted,
1254        }
1255    }
1256}
1257
1258#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1259pub enum DiffAlgorithm {
1260    Myers,
1261    Minimal,
1262    Patience,
1263    Histogram,
1264}
1265
1266#[derive(Debug, Clone, PartialEq, Eq)]
1267pub enum FileChange {
1268    Add { path: RepoPath },
1269    Delete { path: RepoPath },
1270    Modify { path: RepoPath },
1271    Rename { old: RepoPath, new: RepoPath },
1272    Copy { source: RepoPath, dest: RepoPath },
1273}
1274
1275#[derive(Debug, Clone, PartialEq, Eq)]
1276pub struct Conflict {
1277    pub path: RepoPath,
1278    pub ours: Vec<u8>,
1279    pub theirs: Vec<u8>,
1280}
1281
1282#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1283pub enum NameStatus {
1284    Added,
1285    Deleted,
1286    Modified,
1287    Renamed(u8),
1288    Copied(u8),
1289    /// An unmerged (conflicted) path: the index holds higher-stage entries.
1290    /// git emits a standalone `U <path>` pair (`diff_unmerge`) for it in
1291    /// addition to the regular worktree-vs-stage-2 modify.
1292    Unmerged,
1293}
1294
1295impl NameStatus {
1296    pub const fn code(self) -> char {
1297        match self {
1298            Self::Added => 'A',
1299            Self::Deleted => 'D',
1300            Self::Modified => 'M',
1301            Self::Renamed(_) => 'R',
1302            Self::Copied(_) => 'C',
1303            Self::Unmerged => 'U',
1304        }
1305    }
1306
1307    pub fn label(self) -> String {
1308        match self {
1309            Self::Renamed(score) => format!("R{score:03}"),
1310            Self::Copied(score) => format!("C{score:03}"),
1311            _ => self.code().to_string(),
1312        }
1313    }
1314}
1315
1316#[derive(Debug, Clone, PartialEq, Eq)]
1317pub struct NameStatusEntry {
1318    pub status: NameStatus,
1319    pub path: BString,
1320    pub old_path: Option<BString>,
1321    pub old_mode: Option<u32>,
1322    pub new_mode: Option<u32>,
1323    pub old_oid: Option<ObjectId>,
1324    pub new_oid: Option<ObjectId>,
1325}
1326
1327impl NameStatusEntry {
1328    pub fn line(&self) -> String {
1329        if let Some(old_path) = &self.old_path {
1330            format!(
1331                "{}\t{}\t{}",
1332                self.status.label(),
1333                String::from_utf8_lossy(old_path.as_bytes()),
1334                String::from_utf8_lossy(self.path.as_bytes())
1335            )
1336        } else {
1337            format!(
1338                "{}\t{}",
1339                self.status.label(),
1340                String::from_utf8_lossy(self.path.as_bytes())
1341            )
1342        }
1343    }
1344}
1345
1346#[derive(Debug, Clone, PartialEq, Eq)]
1347pub struct IndexGitlinkEntry {
1348    pub path: BString,
1349    pub oid: ObjectId,
1350}
1351
1352#[derive(Debug, Clone, PartialEq, Eq)]
1353pub struct IndexWorktreeDiff {
1354    pub entries: Vec<NameStatusEntry>,
1355    pub staged_gitlinks: Vec<IndexGitlinkEntry>,
1356}
1357
1358#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1359pub struct DiffNameStatusOptions {
1360    pub detect_renames: bool,
1361    pub detect_copies: bool,
1362    pub find_copies_harder: bool,
1363    pub rename_empty: bool,
1364}
1365
1366impl Default for DiffNameStatusOptions {
1367    fn default() -> Self {
1368        Self {
1369            detect_renames: true,
1370            detect_copies: false,
1371            find_copies_harder: false,
1372            rename_empty: true,
1373        }
1374    }
1375}
1376
1377/// git's default minimum similarity (as a percentage) for a pair of files to be
1378/// reported as a rename or copy. Matches `git`'s built-in `-M`/`-C` threshold
1379/// of 50% (`DEFAULT_RENAME_SCORE` is `MAX_SCORE / 2`).
1380pub const DEFAULT_RENAME_THRESHOLD: u8 = 50;
1381
1382/// Options controlling inexact (similarity-based) rename and copy detection,
1383/// layered additively on top of [`DiffNameStatusOptions`].
1384///
1385/// This is a separate struct rather than new fields on [`DiffNameStatusOptions`]
1386/// so that existing callers — which build `DiffNameStatusOptions` with a struct
1387/// literal — keep compiling unchanged. Code that wants inexact detection uses
1388/// the `*_with_rename_options` entry points and this type instead.
1389///
1390/// [`Default`] preserves the existing behaviour exactly: `detect_inexact` is
1391/// `false`, so unless a caller opts in, only exact-OID rename/copy detection
1392/// runs (identical to the plain `*_with_options` functions). When
1393/// `detect_inexact` is enabled, files added on one side are paired with the most
1394/// similar deleted/modified file on the other side whose similarity meets the
1395/// relevant threshold; exact-OID matches still take priority and are always
1396/// scored 100.
1397#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1398pub struct RenameDetectionOptions {
1399    /// The base name-status options (rename/copy enable flags, find-copies-harder,
1400    /// rename-empty). Exact detection honours these exactly as before.
1401    pub base: DiffNameStatusOptions,
1402    /// Enable inexact (content-similarity) detection. When `false`, only exact
1403    /// OID matches are detected, matching the legacy `*_with_options` behaviour.
1404    pub detect_inexact: bool,
1405    /// Minimum similarity percentage (`0..=100`) for an inexact *rename*. Pairs
1406    /// scoring below this are not reported as renames. Defaults to
1407    /// [`DEFAULT_RENAME_THRESHOLD`].
1408    pub rename_threshold: u8,
1409    /// Minimum similarity percentage (`0..=100`) for an inexact *copy*. Defaults
1410    /// to [`DEFAULT_RENAME_THRESHOLD`]; git uses the same default for `-C` as for
1411    /// `-M` unless `-C<n>` overrides it.
1412    pub copy_threshold: u8,
1413}
1414
1415impl Default for RenameDetectionOptions {
1416    fn default() -> Self {
1417        Self {
1418            base: DiffNameStatusOptions::default(),
1419            detect_inexact: false,
1420            rename_threshold: DEFAULT_RENAME_THRESHOLD,
1421            copy_threshold: DEFAULT_RENAME_THRESHOLD,
1422        }
1423    }
1424}
1425
1426impl RenameDetectionOptions {
1427    /// Build inexact-enabled options from a base [`DiffNameStatusOptions`], using
1428    /// the default thresholds for both renames and copies.
1429    pub fn inexact(base: DiffNameStatusOptions) -> Self {
1430        Self {
1431            base,
1432            detect_inexact: true,
1433            ..Self::default()
1434        }
1435    }
1436}
1437
1438pub fn diff_name_status_head_worktree(
1439    worktree_root: impl AsRef<Path>,
1440    git_dir: impl AsRef<Path>,
1441    format: ObjectFormat,
1442) -> Result<Vec<NameStatusEntry>> {
1443    diff_name_status_head_worktree_with_options(
1444        worktree_root,
1445        git_dir,
1446        format,
1447        DiffNameStatusOptions::default(),
1448    )
1449}
1450
1451pub fn diff_name_status_head_worktree_with_options(
1452    worktree_root: impl AsRef<Path>,
1453    git_dir: impl AsRef<Path>,
1454    format: ObjectFormat,
1455    options: DiffNameStatusOptions,
1456) -> Result<Vec<NameStatusEntry>> {
1457    let worktree_root = worktree_root.as_ref();
1458    let git_dir = git_dir.as_ref();
1459    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1460    let head = head_tree_entries(git_dir, format, &db)?;
1461    let IndexSnapshot {
1462        entries: index,
1463        stat_cache,
1464    } = read_index_snapshot(git_dir, format)?;
1465    let index_gitlinks = index_gitlinks(&index);
1466    let candidate_paths = candidate_path_set(head.keys().chain(index.keys()));
1467    let worktree = worktree_entries_for_path_set(
1468        worktree_root,
1469        format,
1470        &candidate_paths,
1471        &index_gitlinks,
1472        Some(&stat_cache),
1473    )?;
1474    let changes = diff_name_status_maps_for_path_set(&head, &worktree, &candidate_paths, options)?;
1475    Ok(mark_unstaged_worktree_oids_unresolved(
1476        changes, &index, &worktree,
1477    ))
1478}
1479
1480/// HEAD-vs-worktree name-status with full rename/copy options, including inexact
1481/// (similarity) detection when enabled. Worktree blob content is read directly
1482/// from the working tree; HEAD-side blobs come from the object database.
1483pub fn diff_name_status_head_worktree_with_rename_options(
1484    worktree_root: impl AsRef<Path>,
1485    git_dir: impl AsRef<Path>,
1486    format: ObjectFormat,
1487    options: RenameDetectionOptions,
1488) -> Result<Vec<NameStatusEntry>> {
1489    let worktree_root = worktree_root.as_ref();
1490    let git_dir = git_dir.as_ref();
1491    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1492    let head = head_tree_entries(git_dir, format, &db)?;
1493    let IndexSnapshot {
1494        entries: index,
1495        stat_cache,
1496    } = read_index_snapshot(git_dir, format)?;
1497    let index_gitlinks = index_gitlinks(&index);
1498    let candidate_paths = candidate_path_set(head.keys().chain(index.keys()));
1499    let worktree = worktree_entries_for_path_set(
1500        worktree_root,
1501        format,
1502        &candidate_paths,
1503        &index_gitlinks,
1504        Some(&stat_cache),
1505    )?;
1506    let cache = worktree_blob_cache_for_path_set(
1507        worktree_root,
1508        &head,
1509        &worktree,
1510        &candidate_paths,
1511        options,
1512    )?;
1513    let changes = diff_name_status_maps_with_renames_for_path_set(
1514        &head,
1515        &worktree,
1516        &candidate_paths,
1517        options,
1518        |oid| cache_or_odb_blob(&cache, &db, oid),
1519    )?;
1520    Ok(mark_unstaged_worktree_oids_unresolved(
1521        changes, &index, &worktree,
1522    ))
1523}
1524
1525pub fn diff_name_status_head_index(
1526    git_dir: impl AsRef<Path>,
1527    format: ObjectFormat,
1528) -> Result<Vec<NameStatusEntry>> {
1529    diff_name_status_head_index_with_options(git_dir, format, DiffNameStatusOptions::default())
1530}
1531
1532pub fn diff_name_status_head_index_with_options(
1533    git_dir: impl AsRef<Path>,
1534    format: ObjectFormat,
1535    options: DiffNameStatusOptions,
1536) -> Result<Vec<NameStatusEntry>> {
1537    let git_dir = git_dir.as_ref();
1538    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1539    let head = head_tree_entries(git_dir, format, &db)?;
1540    let index = read_index_entries(git_dir, format)?;
1541    diff_name_status_maps(&head, &index, head.keys().chain(index.keys()), options)
1542}
1543
1544/// HEAD-vs-index name-status with full rename/copy options, including inexact
1545/// (similarity) detection when enabled. All blob content (both sides) comes from
1546/// the object database.
1547pub fn diff_name_status_head_index_with_rename_options(
1548    git_dir: impl AsRef<Path>,
1549    format: ObjectFormat,
1550    options: RenameDetectionOptions,
1551) -> Result<Vec<NameStatusEntry>> {
1552    let git_dir = git_dir.as_ref();
1553    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1554    let head = head_tree_entries(git_dir, format, &db)?;
1555    let index = read_index_entries(git_dir, format)?;
1556    diff_name_status_maps_with_renames(
1557        &head,
1558        &index,
1559        head.keys().chain(index.keys()),
1560        options,
1561        |oid| read_blob_bytes(&db, oid),
1562    )
1563}
1564
1565/// Read an arbitrary tree object's flattened blob entries (recursively) keyed by
1566/// repository-relative path. This is the tree-side counterpart used by
1567/// `git diff-index <tree-ish>`: unlike [`head_tree_entries`] it does not consult
1568/// `HEAD`, so any commit/tag (peeled to a tree) or tree oid can be compared.
1569///
1570/// The canonical empty tree (`git hash-object -t tree /dev/null`) is treated as
1571/// always present and yields no entries, even when the object was never written
1572/// to the database. git makes the same guarantee, which keeps the common idiom
1573/// `git diff-index --cached <empty-tree-sha>` working in a fresh repository.
1574fn tree_entries(
1575    tree_oid: &ObjectId,
1576    format: ObjectFormat,
1577    db: &FileObjectDatabase,
1578) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
1579    let mut entries = BTreeMap::new();
1580    if *tree_oid == empty_tree_oid(format)? {
1581        return Ok(entries);
1582    }
1583    collect_tree_entries(db, format, tree_oid, Vec::new(), &mut entries)?;
1584    Ok(entries)
1585}
1586
1587/// The well-known oid of the empty tree for `format` (the hash of a zero-length
1588/// tree object). git hard-codes this value and treats it as always existing.
1589fn empty_tree_oid(format: ObjectFormat) -> Result<ObjectId> {
1590    object_id_for_bytes(format, "tree", b"")
1591}
1592
1593/// Name-status diff of an arbitrary tree against the index, the engine behind
1594/// `git diff-index --cached <tree-ish>`. Exact rename/copy detection follows
1595/// `options`; all blob content comes from the object database.
1596pub fn diff_name_status_tree_index_with_options(
1597    git_dir: impl AsRef<Path>,
1598    format: ObjectFormat,
1599    tree_oid: &ObjectId,
1600    options: DiffNameStatusOptions,
1601) -> Result<Vec<NameStatusEntry>> {
1602    let git_dir = git_dir.as_ref();
1603    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1604    let tree = tree_entries(tree_oid, format, &db)?;
1605    let index = read_index_entries(git_dir, format)?;
1606    diff_name_status_maps(&tree, &index, tree.keys().chain(index.keys()), options)
1607}
1608
1609/// Tree-vs-index name-status with full rename/copy options, including inexact
1610/// (similarity) detection when enabled. Both sides read blob content from the
1611/// object database. Counterpart of
1612/// [`diff_name_status_head_index_with_rename_options`] for an arbitrary tree.
1613pub fn diff_name_status_tree_index_with_rename_options(
1614    git_dir: impl AsRef<Path>,
1615    format: ObjectFormat,
1616    tree_oid: &ObjectId,
1617    options: RenameDetectionOptions,
1618) -> Result<Vec<NameStatusEntry>> {
1619    let git_dir = git_dir.as_ref();
1620    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1621    let tree = tree_entries(tree_oid, format, &db)?;
1622    let index = read_index_entries(git_dir, format)?;
1623    diff_name_status_maps_with_renames(
1624        &tree,
1625        &index,
1626        tree.keys().chain(index.keys()),
1627        options,
1628        |oid| read_blob_bytes(&db, oid),
1629    )
1630}
1631
1632/// Name-status diff of an arbitrary tree against the working tree, the engine
1633/// behind plain `git diff-index <tree-ish>` (no `--cached`). New-side oids for
1634/// paths whose worktree contents differ from the index are cleared (rendered as
1635/// zeros), matching git, which only reports the worktree blob oid when it is
1636/// known-clean against the index.
1637pub fn diff_name_status_tree_worktree_with_options(
1638    worktree_root: impl AsRef<Path>,
1639    git_dir: impl AsRef<Path>,
1640    format: ObjectFormat,
1641    tree_oid: &ObjectId,
1642    options: DiffNameStatusOptions,
1643) -> Result<Vec<NameStatusEntry>> {
1644    let worktree_root = worktree_root.as_ref();
1645    let git_dir = git_dir.as_ref();
1646    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1647    let tree = tree_entries(tree_oid, format, &db)?;
1648    let IndexSnapshot {
1649        entries: index,
1650        stat_cache,
1651    } = read_index_snapshot(git_dir, format)?;
1652    let index_gitlinks = index_gitlinks(&index);
1653    let candidate_paths = candidate_path_set(tree.keys().chain(index.keys()));
1654    let worktree = worktree_entries_for_path_set(
1655        worktree_root,
1656        format,
1657        &candidate_paths,
1658        &index_gitlinks,
1659        Some(&stat_cache),
1660    )?;
1661    let changes = diff_name_status_maps_for_path_set(&tree, &worktree, &candidate_paths, options)?;
1662    Ok(mark_unstaged_worktree_oids_unresolved(
1663        changes, &index, &worktree,
1664    ))
1665}
1666
1667/// Tree-vs-worktree name-status with full rename/copy options, including inexact
1668/// (similarity) detection when enabled. Worktree blob content is read directly
1669/// from the working tree (via an oid-keyed cache); tree-side blobs come from the
1670/// object database. As with [`diff_name_status_tree_worktree_with_options`],
1671/// new-side oids for paths that differ from the index are cleared.
1672pub fn diff_name_status_tree_worktree_with_rename_options(
1673    worktree_root: impl AsRef<Path>,
1674    git_dir: impl AsRef<Path>,
1675    format: ObjectFormat,
1676    tree_oid: &ObjectId,
1677    options: RenameDetectionOptions,
1678) -> Result<Vec<NameStatusEntry>> {
1679    let worktree_root = worktree_root.as_ref();
1680    let git_dir = git_dir.as_ref();
1681    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1682    let tree = tree_entries(tree_oid, format, &db)?;
1683    let IndexSnapshot {
1684        entries: index,
1685        stat_cache,
1686    } = read_index_snapshot(git_dir, format)?;
1687    let index_gitlinks = index_gitlinks(&index);
1688    let candidate_paths = candidate_path_set(tree.keys().chain(index.keys()));
1689    let worktree = worktree_entries_for_path_set(
1690        worktree_root,
1691        format,
1692        &candidate_paths,
1693        &index_gitlinks,
1694        Some(&stat_cache),
1695    )?;
1696    let cache = worktree_blob_cache_for_path_set(
1697        worktree_root,
1698        &tree,
1699        &worktree,
1700        &candidate_paths,
1701        options,
1702    )?;
1703    let changes = diff_name_status_maps_with_renames_for_path_set(
1704        &tree,
1705        &worktree,
1706        &candidate_paths,
1707        options,
1708        |oid| cache_or_odb_blob(&cache, &db, oid),
1709    )?;
1710    Ok(mark_unstaged_worktree_oids_unresolved(
1711        changes, &index, &worktree,
1712    ))
1713}
1714
1715pub fn diff_name_status_index_worktree(
1716    worktree_root: impl AsRef<Path>,
1717    git_dir: impl AsRef<Path>,
1718    format: ObjectFormat,
1719) -> Result<Vec<NameStatusEntry>> {
1720    diff_name_status_index_worktree_with_options(
1721        worktree_root,
1722        git_dir,
1723        format,
1724        DiffNameStatusOptions::default(),
1725    )
1726}
1727
1728pub fn diff_name_status_index_worktree_with_options(
1729    worktree_root: impl AsRef<Path>,
1730    git_dir: impl AsRef<Path>,
1731    format: ObjectFormat,
1732    options: DiffNameStatusOptions,
1733) -> Result<Vec<NameStatusEntry>> {
1734    Ok(diff_name_status_index_worktree_with_options_and_gitlinks(
1735        worktree_root,
1736        git_dir,
1737        format,
1738        options,
1739    )?
1740    .entries)
1741}
1742
1743pub fn diff_name_status_index_worktree_with_options_and_gitlinks(
1744    worktree_root: impl AsRef<Path>,
1745    git_dir: impl AsRef<Path>,
1746    format: ObjectFormat,
1747    options: DiffNameStatusOptions,
1748) -> Result<IndexWorktreeDiff> {
1749    let IndexWorktreeDiff {
1750        entries,
1751        staged_gitlinks,
1752    } = diff_name_status_index_worktree_changes(worktree_root.as_ref(), git_dir.as_ref(), format)?;
1753    let entries = apply_name_status_options_to_index_worktree_changes(entries, options)?;
1754    Ok(IndexWorktreeDiff {
1755        entries,
1756        staged_gitlinks,
1757    })
1758}
1759
1760/// Index-vs-worktree name-status with full rename/copy options, including inexact
1761/// (similarity) detection when enabled. Worktree blob content is read directly
1762/// from the working tree; index-side blobs come from the object database.
1763pub fn diff_name_status_index_worktree_with_rename_options(
1764    worktree_root: impl AsRef<Path>,
1765    git_dir: impl AsRef<Path>,
1766    format: ObjectFormat,
1767    options: RenameDetectionOptions,
1768) -> Result<Vec<NameStatusEntry>> {
1769    Ok(
1770        diff_name_status_index_worktree_with_rename_options_and_gitlinks(
1771            worktree_root,
1772            git_dir,
1773            format,
1774            options,
1775        )?
1776        .entries,
1777    )
1778}
1779
1780pub fn diff_name_status_index_worktree_with_rename_options_and_gitlinks(
1781    worktree_root: impl AsRef<Path>,
1782    git_dir: impl AsRef<Path>,
1783    format: ObjectFormat,
1784    options: RenameDetectionOptions,
1785) -> Result<IndexWorktreeDiff> {
1786    let IndexWorktreeDiff {
1787        entries,
1788        staged_gitlinks,
1789    } = diff_name_status_index_worktree_changes(worktree_root.as_ref(), git_dir.as_ref(), format)?;
1790    // Index-vs-worktree diffs only consider tracked index paths; untracked
1791    // worktree files are not additions, so rename/copy detection has no add
1792    // destinations to pair. Apply the base options for completeness.
1793    let entries = apply_name_status_options_to_index_worktree_changes(entries, options.base)?;
1794    Ok(IndexWorktreeDiff {
1795        entries,
1796        staged_gitlinks,
1797    })
1798}
1799
1800fn diff_name_status_index_worktree_changes(
1801    worktree_root: &Path,
1802    git_dir: &Path,
1803    format: ObjectFormat,
1804) -> Result<IndexWorktreeDiff> {
1805    let index_path = sley_index::repository_index_path(git_dir);
1806    let index_metadata = match fs::metadata(&index_path) {
1807        Ok(metadata) => metadata,
1808        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
1809            return Ok(IndexWorktreeDiff {
1810                entries: Vec::new(),
1811                staged_gitlinks: Vec::new(),
1812            });
1813        }
1814        Err(err) => return Err(err.into()),
1815    };
1816    let index_bytes = fs::read(&index_path)?;
1817    if let Ok(index) = BorrowedIndex::parse(&index_bytes, format)
1818        && index.extension(&sley_index::INDEX_EXT_LINK)?.is_none()
1819        && !index.entries.iter().any(borrowed_entry_is_sparse_dir)
1820    {
1821        let (has_non_normal_stage, staged_gitlinks) =
1822            index_worktree_metadata_for_entries(&index.entries);
1823        if has_non_normal_stage {
1824            return diff_name_status_index_worktree_changes_from_snapshot(
1825                worktree_root,
1826                git_dir,
1827                format,
1828            );
1829        }
1830        let stat_cache =
1831            IndexStatCache::from_index_mtime_only(sley_index::file_mtime_parts(&index_metadata));
1832        let entries = diff_name_status_index_worktree_changes_for_borrowed_entries(
1833            worktree_root,
1834            format,
1835            &index.entries,
1836            &stat_cache,
1837        )?;
1838        return Ok(IndexWorktreeDiff {
1839            entries,
1840            staged_gitlinks,
1841        });
1842    }
1843    let index = expand_sparse_index_for_worktree_diff(
1844        sley_index::read_repository_index(git_dir, format)?,
1845        git_dir,
1846        format,
1847    )?;
1848    let (has_non_normal_stage, staged_gitlinks) =
1849        index_worktree_metadata_for_entries(&index.entries);
1850    if has_non_normal_stage {
1851        return diff_name_status_index_worktree_changes_from_snapshot(
1852            worktree_root,
1853            git_dir,
1854            format,
1855        );
1856    }
1857    let stat_cache =
1858        IndexStatCache::from_index_mtime_only(sley_index::file_mtime_parts(&index_metadata));
1859    let entries = diff_name_status_index_worktree_changes_for_entries(
1860        worktree_root,
1861        format,
1862        &index.entries,
1863        &stat_cache,
1864    )?;
1865    Ok(IndexWorktreeDiff {
1866        entries,
1867        staged_gitlinks,
1868    })
1869}
1870
1871fn borrowed_entry_is_sparse_dir(entry: &sley_index::IndexEntryRef<'_>) -> bool {
1872    entry.mode == sley_index::SPARSE_DIR_MODE && entry.is_skip_worktree()
1873}
1874
1875fn expand_sparse_index_for_worktree_diff(
1876    mut index: Index,
1877    git_dir: &Path,
1878    format: ObjectFormat,
1879) -> Result<Index> {
1880    if !index
1881        .entries
1882        .iter()
1883        .any(sley_index::IndexEntry::is_sparse_dir)
1884    {
1885        return Ok(index);
1886    }
1887
1888    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1889    let mut expanded = Vec::with_capacity(index.entries.len());
1890    for entry in std::mem::take(&mut index.entries) {
1891        if !entry.is_sparse_dir() {
1892            expanded.push(entry);
1893            continue;
1894        }
1895
1896        let dir_prefix = entry.path.as_bytes();
1897        for (rel_path, (mode, oid)) in flatten_tree(&db, format, &entry.oid)? {
1898            let mut path = dir_prefix.to_vec();
1899            path.extend_from_slice(&rel_path);
1900            let mut expanded_entry = sley_index::IndexEntry {
1901                ctime_seconds: 0,
1902                ctime_nanoseconds: 0,
1903                mtime_seconds: 0,
1904                mtime_nanoseconds: 0,
1905                dev: 0,
1906                ino: 0,
1907                mode,
1908                uid: 0,
1909                gid: 0,
1910                size: 0,
1911                oid,
1912                flags: 0,
1913                flags_extended: 0,
1914                path: BString::from(path),
1915            };
1916            expanded_entry.set_skip_worktree(true);
1917            expanded_entry.refresh_name_length();
1918            expanded.push(expanded_entry);
1919        }
1920    }
1921
1922    expanded.sort_by(|left, right| left.path.as_bytes().cmp(right.path.as_bytes()));
1923    index.entries = expanded;
1924    index.clear_sparse_extension()?;
1925    Ok(index)
1926}
1927
1928fn diff_name_status_index_worktree_changes_for_borrowed_entries(
1929    worktree_root: &Path,
1930    format: ObjectFormat,
1931    entries: &[sley_index::IndexEntryRef<'_>],
1932    stat_cache: &IndexStatCache,
1933) -> Result<Vec<NameStatusEntry>> {
1934    const PARALLEL_SCAN_MIN_ENTRIES: usize = 2048;
1935    let workers = std::thread::available_parallelism()
1936        .map(|count| count.get())
1937        .unwrap_or(1)
1938        .min(8);
1939    if workers <= 1 || entries.len() < PARALLEL_SCAN_MIN_ENTRIES {
1940        return diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
1941            worktree_root,
1942            format,
1943            entries,
1944            stat_cache,
1945        );
1946    }
1947    let chunk_size = entries.len().div_ceil(workers);
1948    std::thread::scope(|scope| {
1949        let mut handles = Vec::new();
1950        for chunk in entries.chunks(chunk_size) {
1951            handles.push(scope.spawn(move || {
1952                diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
1953                    worktree_root,
1954                    format,
1955                    chunk,
1956                    stat_cache,
1957                )
1958            }));
1959        }
1960        let mut changes = Vec::new();
1961        for handle in handles {
1962            let chunk_changes = handle
1963                .join()
1964                .map_err(|_| GitError::Command("diff worker panicked".into()))??;
1965            changes.extend(chunk_changes);
1966        }
1967        Ok(changes)
1968    })
1969}
1970
1971fn diff_name_status_index_worktree_changes_for_entries(
1972    worktree_root: &Path,
1973    format: ObjectFormat,
1974    entries: &[sley_index::IndexEntry],
1975    stat_cache: &IndexStatCache,
1976) -> Result<Vec<NameStatusEntry>> {
1977    const PARALLEL_SCAN_MIN_ENTRIES: usize = 2048;
1978    let workers = std::thread::available_parallelism()
1979        .map(|count| count.get())
1980        .unwrap_or(1)
1981        .min(8);
1982    if workers <= 1 || entries.len() < PARALLEL_SCAN_MIN_ENTRIES {
1983        return diff_name_status_index_worktree_changes_for_entry_chunk(
1984            worktree_root,
1985            format,
1986            entries,
1987            stat_cache,
1988        );
1989    }
1990    let chunk_size = entries.len().div_ceil(workers);
1991    std::thread::scope(|scope| {
1992        let mut handles = Vec::new();
1993        for chunk in entries.chunks(chunk_size) {
1994            handles.push(scope.spawn(move || {
1995                diff_name_status_index_worktree_changes_for_entry_chunk(
1996                    worktree_root,
1997                    format,
1998                    chunk,
1999                    stat_cache,
2000                )
2001            }));
2002        }
2003        let mut changes = Vec::new();
2004        for handle in handles {
2005            let chunk_changes = handle
2006                .join()
2007                .map_err(|_| GitError::Command("diff worker panicked".into()))??;
2008            changes.extend(chunk_changes);
2009        }
2010        Ok(changes)
2011    })
2012}
2013
2014fn diff_name_status_index_worktree_changes_for_entry_chunk(
2015    worktree_root: &Path,
2016    format: ObjectFormat,
2017    entries: &[sley_index::IndexEntry],
2018    stat_cache: &IndexStatCache,
2019) -> Result<Vec<NameStatusEntry>> {
2020    let mut changes = Vec::new();
2021    let mut path = PathBuf::from(worktree_root);
2022    for entry in entries {
2023        worktree_path_for_repo_path_into(&mut path, worktree_root, entry.path.as_bytes());
2024        if let Some(change) = index_worktree_change_for_entry(&path, format, entry, stat_cache)? {
2025            changes.push(change);
2026        }
2027    }
2028    Ok(changes)
2029}
2030
2031fn diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
2032    worktree_root: &Path,
2033    format: ObjectFormat,
2034    entries: &[sley_index::IndexEntryRef<'_>],
2035    stat_cache: &IndexStatCache,
2036) -> Result<Vec<NameStatusEntry>> {
2037    let mut changes = Vec::new();
2038    let mut path = PathBuf::from(worktree_root);
2039    for entry in entries {
2040        worktree_path_for_repo_path_into(&mut path, worktree_root, entry.path);
2041        if let Some(change) = index_worktree_change_for_entry(&path, format, entry, stat_cache)? {
2042            changes.push(change);
2043        }
2044    }
2045    Ok(changes)
2046}
2047
2048fn index_worktree_metadata_for_entries(
2049    entries: &[impl WorktreeIndexEntry],
2050) -> (bool, Vec<IndexGitlinkEntry>) {
2051    let mut needs_snapshot = false;
2052    let mut staged_gitlinks = Vec::new();
2053    for entry in entries {
2054        if entry.stage() != sley_index::Stage::Normal {
2055            needs_snapshot = true;
2056        }
2057        // Intent-to-add entries (`git add -N`) must take the snapshot path, which
2058        // diffs them as new files rather than loading their empty-blob id.
2059        if entry.is_intent_to_add() {
2060            needs_snapshot = true;
2061        }
2062        if sley_index::is_gitlink(entry.mode()) {
2063            staged_gitlinks.push(IndexGitlinkEntry {
2064                path: BString::from_bytes(entry.git_path()),
2065                oid: entry.oid(),
2066            });
2067        }
2068    }
2069    (needs_snapshot, staged_gitlinks)
2070}
2071
2072fn diff_name_status_index_worktree_changes_from_snapshot(
2073    worktree_root: &Path,
2074    git_dir: &Path,
2075    format: ObjectFormat,
2076) -> Result<IndexWorktreeDiff> {
2077    let IndexSnapshot {
2078        entries: index,
2079        stat_cache,
2080    } = read_index_snapshot(git_dir, format)?;
2081    // Intent-to-add (`git add -N`) paths are placeholders: git's `run_diff_files`
2082    // diffs them as a brand-new file (`/dev/null` → worktree), never loading the
2083    // recorded empty-blob id. `read_index_snapshot` drops the ITA flag, so read
2084    // the set of ITA stage-0 paths separately and override their verdict below.
2085    let intent_to_add_paths = read_intent_to_add_paths(git_dir, format)?;
2086    // `read_index_snapshot` collapses each path to a single entry; for an
2087    // unmerged path it keeps the last-written stage. To match git's
2088    // `run_diff_files` we need the conflict stages, so read them separately:
2089    // git diffs the worktree against the "ours" stage (stage 2, the default
2090    // `diff_unmerged_stage`) and additionally emits a standalone `U <path>`
2091    // pair via `diff_unmerge` (diff-lib.c).
2092    let unmerged = read_unmerged_stages(git_dir, format)?;
2093    let index_gitlinks = index_gitlinks(&index);
2094    let staged_gitlinks = index_gitlinks
2095        .iter()
2096        .map(|(path, oid)| IndexGitlinkEntry {
2097            path: BString::from_bytes(path),
2098            oid: *oid,
2099        })
2100        .collect();
2101    let mut changes = Vec::new();
2102    for (git_path, left) in &index {
2103        // For a conflicted path git first queues the `U` pair, then compares the
2104        // worktree against stage 2 (ours). The snapshot's collapsed `left` may
2105        // be the wrong stage, so override it with the stage-2 entry when present.
2106        let conflict_stages = unmerged.get(git_path);
2107        let right = worktree_entry_for_path(
2108            worktree_root,
2109            format,
2110            git_path,
2111            &index_gitlinks,
2112            Some(&stat_cache),
2113        )?;
2114        if conflict_stages.is_some() {
2115            // git's `diff_unmerge` makes a pair with a null old side and the
2116            // worktree mode on the new side (diff-lib.c `wt_mode`); the oids stay
2117            // zero. The raw line is `:000000 <wt_mode> 0..0 0..0 U <path>`.
2118            changes.push(NameStatusEntry {
2119                status: NameStatus::Unmerged,
2120                path: git_path.clone().into(),
2121                old_path: None,
2122                old_mode: None,
2123                new_mode: right.as_ref().map(|entry| entry.mode),
2124                old_oid: None,
2125                new_oid: None,
2126            });
2127        }
2128        // The index side for the modify comparison: stage 2 (ours) for a
2129        // conflict, otherwise the normal stage-0 entry. If the conflict has no
2130        // stage-2 (deleted on our side / added by them), git has no entry to
2131        // diff the worktree against, so it emits only the `U` line.
2132        let left = match conflict_stages {
2133            Some(stages) => match stages.ours.as_ref() {
2134                Some(ours) => ours,
2135                None => continue,
2136            },
2137            None => left,
2138        };
2139        // Intent-to-add placeholder: git's `run_diff_files` diffs it as a new
2140        // file. With the worktree file present, queue an `Added` pair whose old
2141        // side is null (`/dev/null` → worktree blob); with the file gone, an ITA
2142        // entry yields no diff-files entry (there is nothing to add).
2143        if intent_to_add_paths.contains(git_path.as_slice()) {
2144            if let Some(right) = right {
2145                changes.push(NameStatusEntry {
2146                    status: NameStatus::Added,
2147                    path: git_path.clone().into(),
2148                    old_path: None,
2149                    old_mode: None,
2150                    new_mode: Some(right.mode),
2151                    old_oid: None,
2152                    new_oid: Some(right.oid),
2153                });
2154            }
2155            continue;
2156        }
2157        let Some(right) = right else {
2158            changes.push(NameStatusEntry {
2159                status: NameStatus::Deleted,
2160                path: git_path.clone().into(),
2161                old_path: None,
2162                old_mode: Some(left.mode),
2163                new_mode: None,
2164                old_oid: Some(left.oid),
2165                new_oid: None,
2166            });
2167            continue;
2168        };
2169        if right != *left {
2170            changes.push(NameStatusEntry {
2171                status: NameStatus::Modified,
2172                path: git_path.clone().into(),
2173                old_path: None,
2174                old_mode: Some(left.mode),
2175                new_mode: Some(right.mode),
2176                old_oid: Some(left.oid),
2177                new_oid: Some(right.oid),
2178            });
2179        }
2180    }
2181    Ok(IndexWorktreeDiff {
2182        entries: changes,
2183        staged_gitlinks,
2184    })
2185}
2186
2187/// The conflict stages recorded for one unmerged index path.
2188struct ConflictStages {
2189    ours: Option<TrackedEntry>,
2190}
2191
2192/// Read the higher-stage (conflict) index entries, keyed by path, recording the
2193/// "ours" (stage 2) entry git diffs the worktree against. Paths with only a
2194/// stage-0 entry are absent from the result.
2195fn read_unmerged_stages(
2196    git_dir: &Path,
2197    format: ObjectFormat,
2198) -> Result<BTreeMap<Vec<u8>, ConflictStages>> {
2199    let index_path = sley_index::repository_index_path(git_dir);
2200    if !index_path.exists() {
2201        return Ok(BTreeMap::new());
2202    }
2203    let index = sley_index::read_repository_index(git_dir, format)?;
2204    let mut out: BTreeMap<Vec<u8>, ConflictStages> = BTreeMap::new();
2205    for entry in &index.entries {
2206        let stage = entry.stage();
2207        if stage == sley_index::Stage::Normal {
2208            continue;
2209        }
2210        let path = entry.path.clone().into_bytes();
2211        let slot = out.entry(path).or_insert(ConflictStages { ours: None });
2212        if stage == sley_index::Stage::Ours {
2213            slot.ours = Some(TrackedEntry {
2214                mode: entry.mode,
2215                oid: entry.oid,
2216            });
2217        }
2218    }
2219    Ok(out)
2220}
2221
2222fn apply_name_status_options_to_index_worktree_changes(
2223    mut changes: Vec<NameStatusEntry>,
2224    options: DiffNameStatusOptions,
2225) -> Result<Vec<NameStatusEntry>> {
2226    if options.detect_renames {
2227        changes = detect_exact_renames_from_changes(changes, options.rename_empty);
2228    } else if options.detect_copies {
2229        changes.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2230    }
2231    Ok(changes)
2232}
2233
2234fn detect_exact_renames_from_changes(
2235    changes: Vec<NameStatusEntry>,
2236    rename_empty: bool,
2237) -> Vec<NameStatusEntry> {
2238    let added = changes
2239        .iter()
2240        .enumerate()
2241        .filter(|(_, entry)| entry.status == NameStatus::Added)
2242        .collect::<Vec<_>>();
2243    let deleted = changes
2244        .iter()
2245        .enumerate()
2246        .filter(|(_, entry)| entry.status == NameStatus::Deleted)
2247        .collect::<Vec<_>>();
2248    let mut consumed_added = BTreeSet::new();
2249    let mut consumed_deleted = BTreeSet::new();
2250    let mut result = Vec::new();
2251
2252    for (deleted_index, deleted_entry) in deleted {
2253        let Some(old_oid) = deleted_entry.old_oid else {
2254            continue;
2255        };
2256        if !rename_empty && is_empty_blob_oid(&old_oid) {
2257            continue;
2258        }
2259        if let Some((added_index, added_entry)) = added.iter().find(|(added_index, added_entry)| {
2260            !consumed_added.contains(added_index) && added_entry.new_oid == Some(old_oid)
2261        }) {
2262            consumed_deleted.insert(deleted_index);
2263            consumed_added.insert(*added_index);
2264            result.push(NameStatusEntry {
2265                status: NameStatus::Renamed(100),
2266                path: added_entry.path.clone(),
2267                old_path: Some(deleted_entry.path.clone()),
2268                old_mode: deleted_entry.old_mode,
2269                new_mode: added_entry.new_mode,
2270                old_oid: deleted_entry.old_oid,
2271                new_oid: added_entry.new_oid,
2272            });
2273        }
2274    }
2275
2276    for (index, entry) in changes.into_iter().enumerate() {
2277        if consumed_added.contains(&index) || consumed_deleted.contains(&index) {
2278            continue;
2279        }
2280        result.push(entry);
2281    }
2282    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2283    result
2284}
2285
2286/// Index-vs-worktree name-status for **`git diff-files`** (plumbing), which
2287/// selects changed paths by the cached *stat* rather than by content.
2288///
2289/// This is the crucial difference from [`diff_name_status_index_worktree_with_options`]
2290/// (the engine behind porcelain `git diff`): porcelain `git diff` refreshes the
2291/// index first, so a stat-dirty-but-content-identical entry (a `touch`ed file, or
2292/// a freshly `rm --cached`-then-`reset --no-refresh` entry with a zeroed cached
2293/// stat) is re-stamped clean and suppressed. `git diff-files` does **not** refresh
2294/// — it reports every entry whose cached stat fails to prove it clean as `M`,
2295/// without re-hashing the content to "rescue" it (`builtin/diff.c` →
2296/// `run_diff_files` → `ie_match_stat`). The raw / name-only / name-status output
2297/// and the `--quiet`/`--exit-code` status therefore list such entries even when
2298/// the content is byte-identical; patch/stat output, which diffs actual content,
2299/// renders them as an empty hunk.
2300///
2301/// We layer that stat-based selection on top of the content-based diff: the
2302/// content diff already catches adds/deletes/genuine-content modifies (with
2303/// rename detection), and we then append a `Modified` entry for any stage-0 path
2304/// whose worktree file is present and whose cached stat is dirty per
2305/// [`IndexStatCache::index_entry_worktree_stat_dirty`] but which the content diff
2306/// did not already report. Content-identical stat-dirty entries cannot be rename
2307/// sources/targets (their content is unchanged), so they never interact with the
2308/// rename machinery — they are plain `M`.
2309pub fn diff_name_status_index_worktree_for_diff_files_with_options(
2310    worktree_root: impl AsRef<Path>,
2311    git_dir: impl AsRef<Path>,
2312    format: ObjectFormat,
2313    options: DiffNameStatusOptions,
2314) -> Result<Vec<NameStatusEntry>> {
2315    let worktree_root = worktree_root.as_ref();
2316    let git_dir = git_dir.as_ref();
2317    let changes =
2318        diff_name_status_index_worktree_with_options(worktree_root, git_dir, format, options)?;
2319    augment_with_stat_dirty_entries(worktree_root, git_dir, format, changes)
2320}
2321
2322/// As [`diff_name_status_index_worktree_for_diff_files_with_options`], but with
2323/// full rename/copy options (the `git diff-files -M/-C` path). The stat-dirty
2324/// augmentation is identical; only the underlying content diff differs.
2325pub fn diff_name_status_index_worktree_for_diff_files_with_rename_options(
2326    worktree_root: impl AsRef<Path>,
2327    git_dir: impl AsRef<Path>,
2328    format: ObjectFormat,
2329    options: RenameDetectionOptions,
2330) -> Result<Vec<NameStatusEntry>> {
2331    let worktree_root = worktree_root.as_ref();
2332    let git_dir = git_dir.as_ref();
2333    let changes = diff_name_status_index_worktree_with_rename_options(
2334        worktree_root,
2335        git_dir,
2336        format,
2337        options,
2338    )?;
2339    augment_with_stat_dirty_entries(worktree_root, git_dir, format, changes)
2340}
2341
2342/// Append a `Modified` entry for every stage-0 index path whose worktree file is
2343/// present and whose cached stat is dirty (`ce_match_stat` "changed") but which
2344/// `content_changes` did not already report. The result is re-sorted by path so
2345/// the merged set keeps git's diff-queue ordering. New-side oids on the added
2346/// entries are left `None` (rendered as zeros in raw output), matching git, which
2347/// reports the worktree blob oid only for entries it has hashed.
2348fn augment_with_stat_dirty_entries(
2349    worktree_root: &Path,
2350    git_dir: &Path,
2351    format: ObjectFormat,
2352    mut content_changes: Vec<NameStatusEntry>,
2353) -> Result<Vec<NameStatusEntry>> {
2354    let IndexSnapshot {
2355        entries: index,
2356        stat_cache,
2357    } = read_index_snapshot(git_dir, format)?;
2358    // Paths the content diff already accounts for (by new-side path, the position
2359    // git queues a pair at — a rename's destination, a modify/add/delete's path).
2360    let already_reported: BTreeSet<&[u8]> = content_changes
2361        .iter()
2362        .map(|entry| entry.path.as_bytes())
2363        .collect();
2364    let mut extras = Vec::new();
2365    for (git_path, tracked) in &index {
2366        if already_reported.contains(git_path.as_slice()) {
2367            continue;
2368        }
2369        let Some(cached) = stat_cache.entry_for_git_path(git_path) else {
2370            continue;
2371        };
2372        // Gitlinks (submodules) have their own dirtiness model and are not stat-
2373        // compared here; the content diff already handles changed gitlink oids.
2374        if sley_index::is_gitlink(tracked.mode) {
2375            continue;
2376        }
2377        let path = worktree_path_for_repo_path(worktree_root, git_path);
2378        let Ok(metadata) = fs::symlink_metadata(&path) else {
2379            // A missing worktree file is a deletion, which the content diff
2380            // already reports; nothing to add here.
2381            continue;
2382        };
2383        if !(metadata.is_file() || metadata.file_type().is_symlink()) {
2384            continue;
2385        }
2386        match stat_cache.index_entry_worktree_stat_verdict(cached, &metadata) {
2387            sley_index::StatVerdict::Clean => continue,
2388            sley_index::StatVerdict::Dirty => {}
2389            // A racily-clean entry must be resolved by content: git re-hashes it
2390            // (`ce_compare_data`) and only reports `M` when the worktree bytes
2391            // actually differ from the cached oid — so a `touch`ed-then-re-`add`ed
2392            // file (same-second mtime as the index) stays clean.
2393            sley_index::StatVerdict::RacyNeedsContentCheck => {
2394                if worktree_oid_matches_index(worktree_root, git_path, &metadata, tracked, format)?
2395                {
2396                    continue;
2397                }
2398            }
2399        }
2400        extras.push(NameStatusEntry {
2401            status: NameStatus::Modified,
2402            path: git_path.clone().into(),
2403            old_path: None,
2404            old_mode: Some(tracked.mode),
2405            new_mode: Some(tracked.mode),
2406            old_oid: Some(tracked.oid),
2407            new_oid: None,
2408        });
2409    }
2410    if !extras.is_empty() {
2411        content_changes.extend(extras);
2412        content_changes
2413            .sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2414    }
2415    Ok(content_changes)
2416}
2417
2418/// Whether the worktree file at `git_path` hashes to the index entry's oid (mode
2419/// included). Used to resolve a racily-clean `diff-files` entry: git re-hashes the
2420/// content and only reports it changed when the bytes truly differ. Mirrors the
2421/// worktree-oid computation in [`worktree_entry_for_path`].
2422fn worktree_oid_matches_index(
2423    worktree_root: &Path,
2424    git_path: &[u8],
2425    metadata: &fs::Metadata,
2426    index_entry: &TrackedEntry,
2427    format: ObjectFormat,
2428) -> Result<bool> {
2429    let file_type = metadata.file_type();
2430    let path = worktree_path_for_repo_path(worktree_root, git_path);
2431    let body = if file_type.is_symlink() {
2432        symlink_target_bytes(&path)?
2433    } else {
2434        fs::read(&path)?
2435    };
2436    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
2437    let mode = if file_type.is_symlink() {
2438        0o120000
2439    } else {
2440        file_mode(metadata)
2441    };
2442    Ok(oid == index_entry.oid && mode == index_entry.mode)
2443}
2444
2445pub fn diff_name_status_trees_with_options(
2446    db: &FileObjectDatabase,
2447    format: ObjectFormat,
2448    left_tree: &ObjectId,
2449    right_tree: &ObjectId,
2450    options: DiffNameStatusOptions,
2451) -> Result<Vec<NameStatusEntry>> {
2452    // `--find-copies-harder` may pair an *unchanged* left-side file as a copy
2453    // source, so it needs the complete left map; every other mode only consults
2454    // changed paths, so the pruned simultaneous walk (which skips identical
2455    // subtrees) suffices and produces byte-identical output.
2456    let needs_full_maps = options.detect_copies && options.find_copies_harder;
2457    let (left_entries, right_entries) = if needs_full_maps {
2458        collect_full_tree_pair(db, format, left_tree, right_tree)?
2459    } else {
2460        changed_tree_entries(db, format, left_tree, right_tree)?
2461    };
2462    diff_name_status_maps(
2463        &left_entries,
2464        &right_entries,
2465        left_entries.keys().chain(right_entries.keys()),
2466        options,
2467    )
2468}
2469
2470pub fn diff_name_status_empty_tree_with_options(
2471    db: &FileObjectDatabase,
2472    format: ObjectFormat,
2473    right_tree: &ObjectId,
2474    options: DiffNameStatusOptions,
2475) -> Result<Vec<NameStatusEntry>> {
2476    let left_entries = BTreeMap::new();
2477    let mut right_entries = BTreeMap::new();
2478    collect_tree_entries(db, format, right_tree, Vec::new(), &mut right_entries)?;
2479    diff_name_status_maps(&left_entries, &right_entries, right_entries.keys(), options)
2480}
2481
2482/// Diff two trees with full rename/copy options, including inexact (similarity)
2483/// detection when [`RenameDetectionOptions::detect_inexact`] is set.
2484///
2485/// Blob bytes for similarity scoring are read from `db`. This is the inexact-
2486/// aware counterpart of [`diff_name_status_trees_with_options`]; passing
2487/// `RenameDetectionOptions::default()` (or `RenameDetectionOptions { base, ..
2488/// default }` with `detect_inexact: false`) reproduces the exact-only behaviour.
2489pub fn diff_name_status_trees_with_rename_options(
2490    db: &FileObjectDatabase,
2491    format: ObjectFormat,
2492    left_tree: &ObjectId,
2493    right_tree: &ObjectId,
2494    options: RenameDetectionOptions,
2495) -> Result<Vec<NameStatusEntry>> {
2496    // See `diff_name_status_trees_with_options`: only `--find-copies-harder`
2497    // needs unchanged left entries as copy sources; otherwise the pruned walk
2498    // (skipping identical subtrees) yields identical output far more cheaply.
2499    let needs_full_maps = options.base.detect_copies && options.base.find_copies_harder;
2500    let (left_entries, right_entries) = if needs_full_maps {
2501        collect_full_tree_pair(db, format, left_tree, right_tree)?
2502    } else {
2503        changed_tree_entries(db, format, left_tree, right_tree)?
2504    };
2505    diff_name_status_maps_with_renames(
2506        &left_entries,
2507        &right_entries,
2508        left_entries.keys().chain(right_entries.keys()),
2509        options,
2510        |oid| read_blob_bytes(db, oid),
2511    )
2512}
2513
2514/// Diff the empty tree against `right_tree` with full rename/copy options.
2515///
2516/// As with [`diff_name_status_trees_with_rename_options`], inexact detection is
2517/// gated on [`RenameDetectionOptions::detect_inexact`]; the left (empty) side
2518/// has no sources, so only copies among the right-side additions can match when
2519/// `find_copies_harder` is set.
2520pub fn diff_name_status_empty_tree_with_rename_options(
2521    db: &FileObjectDatabase,
2522    format: ObjectFormat,
2523    right_tree: &ObjectId,
2524    options: RenameDetectionOptions,
2525) -> Result<Vec<NameStatusEntry>> {
2526    let left_entries = BTreeMap::new();
2527    let mut right_entries = BTreeMap::new();
2528    collect_tree_entries(db, format, right_tree, Vec::new(), &mut right_entries)?;
2529    diff_name_status_maps_with_renames(
2530        &left_entries,
2531        &right_entries,
2532        right_entries.keys(),
2533        options,
2534        |oid| read_blob_bytes(db, oid),
2535    )
2536}
2537
2538/// Read a blob's raw bytes from the ODB, returning `None` if the object cannot
2539/// be read or is not a blob. Used as the similarity-scoring blob fetcher; a
2540/// missing object simply makes a candidate pair non-similar rather than failing
2541/// the whole diff.
2542fn read_blob_bytes(db: &FileObjectDatabase, oid: &ObjectId) -> Option<Vec<u8>> {
2543    match db.read_object(oid) {
2544        Ok(object) if object.object_type == ObjectType::Blob => Some(object.body.clone()),
2545        _ => None,
2546    }
2547}
2548
2549/// Build the raw per-path add/delete/modify change list (before any rename or
2550/// copy detection) from the two entry maps and the candidate path set.
2551fn raw_name_status_changes_for_unique_paths<'a>(
2552    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2553    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2554    paths: impl Iterator<Item = &'a Vec<u8>>,
2555) -> Vec<NameStatusEntry> {
2556    let mut changes = Vec::new();
2557    for path in paths {
2558        let left = left_entries.get(path);
2559        let right = right_entries.get(path);
2560        let status = match (left, right) {
2561            (None, Some(_)) => Some(NameStatus::Added),
2562            (Some(_), None) => Some(NameStatus::Deleted),
2563            (Some(left), Some(right)) if left != right => Some(NameStatus::Modified),
2564            _ => None,
2565        };
2566        if let Some(status) = status {
2567            changes.push(NameStatusEntry {
2568                status,
2569                path: path.clone().into(),
2570                old_path: None,
2571                old_mode: left.map(|entry| entry.mode),
2572                new_mode: right.map(|entry| entry.mode),
2573                old_oid: left.map(|entry| entry.oid),
2574                new_oid: right.map(|entry| entry.oid),
2575            });
2576        }
2577    }
2578    changes
2579}
2580
2581fn diff_name_status_maps<'a>(
2582    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2583    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2584    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2585    options: DiffNameStatusOptions,
2586) -> Result<Vec<NameStatusEntry>> {
2587    let paths = candidate_path_set(candidate_paths);
2588    diff_name_status_maps_for_path_set(left_entries, right_entries, &paths, options)
2589}
2590
2591fn diff_name_status_maps_for_path_set(
2592    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2593    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2594    candidate_paths: &BTreeSet<Vec<u8>>,
2595    options: DiffNameStatusOptions,
2596) -> Result<Vec<NameStatusEntry>> {
2597    diff_name_status_maps_for_unique_paths(
2598        left_entries,
2599        right_entries,
2600        candidate_paths.iter(),
2601        options,
2602    )
2603}
2604
2605fn diff_name_status_maps_for_unique_paths<'a>(
2606    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2607    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2608    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2609    options: DiffNameStatusOptions,
2610) -> Result<Vec<NameStatusEntry>> {
2611    let mut changes =
2612        raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
2613    if options.detect_renames {
2614        changes = detect_exact_renames(changes, left_entries, right_entries, options.rename_empty);
2615    }
2616    if options.detect_copies {
2617        changes = detect_exact_copies(
2618            changes,
2619            left_entries,
2620            right_entries,
2621            options.find_copies_harder,
2622            options.rename_empty,
2623        );
2624    }
2625    Ok(changes)
2626}
2627
2628/// Like [`diff_name_status_maps`], but additionally runs inexact (similarity)
2629/// rename/copy detection when `options.detect_inexact` is set.
2630///
2631/// `fetch_blob` resolves an [`ObjectId`] to that blob's raw bytes; it is only
2632/// consulted for the candidate pairs considered during inexact detection, and
2633/// only when inexact detection is enabled. A pair whose blob bytes cannot be
2634/// fetched is simply skipped (treated as not similar), so a missing object never
2635/// fails the whole diff.
2636fn diff_name_status_maps_with_renames<'a>(
2637    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2638    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2639    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2640    options: RenameDetectionOptions,
2641    fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
2642) -> Result<Vec<NameStatusEntry>> {
2643    let paths = candidate_path_set(candidate_paths);
2644    diff_name_status_maps_with_renames_for_path_set(
2645        left_entries,
2646        right_entries,
2647        &paths,
2648        options,
2649        fetch_blob,
2650    )
2651}
2652
2653fn diff_name_status_maps_with_renames_for_path_set(
2654    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2655    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2656    candidate_paths: &BTreeSet<Vec<u8>>,
2657    options: RenameDetectionOptions,
2658    fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
2659) -> Result<Vec<NameStatusEntry>> {
2660    diff_name_status_maps_with_renames_for_unique_paths(
2661        left_entries,
2662        right_entries,
2663        candidate_paths.iter(),
2664        options,
2665        fetch_blob,
2666    )
2667}
2668
2669fn diff_name_status_maps_with_renames_for_unique_paths<'a>(
2670    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2671    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2672    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2673    options: RenameDetectionOptions,
2674    fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
2675) -> Result<Vec<NameStatusEntry>> {
2676    let base = options.base;
2677    let mut changes =
2678        raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
2679    if base.detect_renames {
2680        changes = detect_exact_renames(changes, left_entries, right_entries, base.rename_empty);
2681    }
2682    // Inexact rename detection runs after exact renames so exact matches keep
2683    // priority (and their score of 100). It only fires when rename detection is
2684    // enabled at all, mirroring git's `-M`.
2685    if base.detect_renames && options.detect_inexact {
2686        changes = detect_inexact_renames(changes, &options, &fetch_blob);
2687    }
2688    if base.detect_copies {
2689        changes = detect_exact_copies(
2690            changes,
2691            left_entries,
2692            right_entries,
2693            base.find_copies_harder,
2694            base.rename_empty,
2695        );
2696    }
2697    if base.detect_copies && options.detect_inexact {
2698        changes = detect_inexact_copies(changes, left_entries, &options, &fetch_blob);
2699    }
2700    Ok(changes)
2701}
2702
2703fn detect_exact_renames(
2704    changes: Vec<NameStatusEntry>,
2705    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2706    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2707    rename_empty: bool,
2708) -> Vec<NameStatusEntry> {
2709    let added = changes
2710        .iter()
2711        .enumerate()
2712        .filter(|(_, entry)| entry.status == NameStatus::Added)
2713        .map(|(idx, entry)| (idx, entry.path.clone()))
2714        .collect::<Vec<_>>();
2715    let deleted = changes
2716        .iter()
2717        .filter(|entry| entry.status == NameStatus::Deleted)
2718        .map(|entry| entry.path.clone())
2719        .collect::<Vec<_>>();
2720    let mut consumed = BTreeSet::new();
2721    let mut renamed_old_paths = BTreeSet::new();
2722    let mut result = Vec::new();
2723
2724    for old_path in deleted {
2725        let Some(left) = left_entries.get(old_path.as_bytes()) else {
2726            continue;
2727        };
2728        if let Some((idx, new_path)) = added.iter().find(|(idx, new_path)| {
2729            !consumed.contains(idx)
2730                && right_entries.get(new_path.as_bytes()).is_some_and(|right| {
2731                    right.oid == left.oid && (rename_empty || !is_empty_blob_oid(&left.oid))
2732                })
2733        }) {
2734            consumed.insert(*idx);
2735            renamed_old_paths.insert(old_path.clone());
2736            let right = right_entries.get(new_path.as_bytes());
2737            result.push(NameStatusEntry {
2738                status: NameStatus::Renamed(100),
2739                path: new_path.clone(),
2740                old_path: Some(old_path),
2741                old_mode: Some(left.mode),
2742                new_mode: right.map(|entry| entry.mode),
2743                old_oid: Some(left.oid),
2744                new_oid: right.map(|entry| entry.oid),
2745            });
2746        }
2747    }
2748
2749    for (idx, entry) in changes.into_iter().enumerate() {
2750        if entry.status == NameStatus::Added && consumed.contains(&idx) {
2751            continue;
2752        }
2753        if entry.status == NameStatus::Deleted && renamed_old_paths.contains(&entry.path) {
2754            continue;
2755        }
2756        result.push(entry);
2757    }
2758    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2759    result
2760}
2761
2762fn detect_exact_copies(
2763    changes: Vec<NameStatusEntry>,
2764    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2765    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2766    find_copies_harder: bool,
2767    rename_empty: bool,
2768) -> Vec<NameStatusEntry> {
2769    let changed_sources = changes
2770        .iter()
2771        .filter(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified))
2772        .map(|entry| entry.path.clone())
2773        .collect::<BTreeSet<_>>();
2774    let source_paths = left_entries
2775        .keys()
2776        .filter(|path| find_copies_harder || changed_sources.contains(path.as_slice()))
2777        .cloned()
2778        .collect::<Vec<_>>();
2779
2780    let mut result = Vec::new();
2781    for entry in changes {
2782        if entry.status != NameStatus::Added {
2783            result.push(entry);
2784            continue;
2785        }
2786        let Some(right) = right_entries.get(entry.path.as_bytes()) else {
2787            result.push(entry);
2788            continue;
2789        };
2790        if let Some(old_path) = source_paths.iter().find(|old_path| {
2791            old_path.as_slice() != entry.path.as_bytes()
2792                && left_entries.get(*old_path).is_some_and(|left| {
2793                    left.oid == right.oid && (rename_empty || !is_empty_blob_oid(&left.oid))
2794                })
2795        }) {
2796            result.push(NameStatusEntry {
2797                status: NameStatus::Copied(100),
2798                path: entry.path,
2799                old_path: Some(old_path.clone().into()),
2800                old_mode: left_entries
2801                    .get(old_path.as_slice())
2802                    .map(|entry| entry.mode),
2803                new_mode: entry.new_mode,
2804                old_oid: left_entries.get(old_path.as_slice()).map(|entry| entry.oid),
2805                new_oid: entry.new_oid,
2806            });
2807        } else {
2808            result.push(entry);
2809        }
2810    }
2811    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2812    result
2813}
2814
2815/// Old-side metadata of a rename source, snapshotted before the source delete
2816/// entry is consumed so it can be attached to the renamed destination.
2817#[derive(Debug, Clone)]
2818struct RenameSourceMeta {
2819    path: BString,
2820    mode: Option<u32>,
2821    oid: Option<ObjectId>,
2822}
2823
2824/// A scored candidate pairing of a deleted source with an added destination,
2825/// used to order inexact-rename assignment best-match-first.
2826struct ScoredPair {
2827    /// Index into the `deleted` candidate list.
2828    src: usize,
2829    /// Index into the `added` candidate list.
2830    dst: usize,
2831    /// Similarity percentage in `0..=100`.
2832    score: u8,
2833}
2834
2835/// Inexact rename detection: pair still-unmatched deleted files with still-
2836/// unmatched added files by content similarity, replacing the best matches
2837/// (similarity >= `rename_threshold`) with [`NameStatus::Renamed`].
2838///
2839/// Exact renames have already run, so the only `Deleted`/`Added` entries left
2840/// here are ones with no identical-OID partner. Assignment is greedy by
2841/// descending score (then by source/destination order for determinism), and
2842/// each source and destination is used at most once — matching git's
2843/// `diffcore-rename` behaviour. Empty blobs are never used as a rename source
2844/// when `rename_empty` is false, mirroring exact detection.
2845fn detect_inexact_renames(
2846    changes: Vec<NameStatusEntry>,
2847    options: &RenameDetectionOptions,
2848    fetch_blob: &impl Fn(&ObjectId) -> Option<Vec<u8>>,
2849) -> Vec<NameStatusEntry> {
2850    let threshold = options.rename_threshold;
2851    // A threshold above 100 can never be met; nothing to do.
2852    if threshold > 100 {
2853        return changes;
2854    }
2855
2856    // Collect the candidate sources (Deletes) and destinations (Adds) with their
2857    // positions in `changes`, fetching blob bytes once each.
2858    let mut deleted: Vec<(usize, Vec<u8>)> = Vec::new();
2859    let mut added: Vec<(usize, Vec<u8>)> = Vec::new();
2860    for (idx, entry) in changes.iter().enumerate() {
2861        match entry.status {
2862            NameStatus::Deleted => {
2863                let Some(oid) = entry.old_oid.as_ref() else {
2864                    continue;
2865                };
2866                if !options.base.rename_empty && is_empty_blob_oid(oid) {
2867                    continue;
2868                }
2869                if let Some(bytes) = fetch_blob(oid) {
2870                    deleted.push((idx, bytes));
2871                }
2872            }
2873            NameStatus::Added => {
2874                let Some(oid) = entry.new_oid.as_ref() else {
2875                    continue;
2876                };
2877                if !options.base.rename_empty && is_empty_blob_oid(oid) {
2878                    continue;
2879                }
2880                if let Some(bytes) = fetch_blob(oid) {
2881                    added.push((idx, bytes));
2882                }
2883            }
2884            _ => {}
2885        }
2886    }
2887
2888    if deleted.is_empty() || added.is_empty() {
2889        return changes;
2890    }
2891
2892    // Score every (delete, add) pair; keep only those meeting the threshold.
2893    let mut pairs: Vec<ScoredPair> = Vec::new();
2894    for (si, (_, src_bytes)) in deleted.iter().enumerate() {
2895        for (di, (_, dst_bytes)) in added.iter().enumerate() {
2896            let score = blob_similarity(src_bytes, dst_bytes);
2897            if score >= threshold {
2898                pairs.push(ScoredPair {
2899                    src: si,
2900                    dst: di,
2901                    score,
2902                });
2903            }
2904        }
2905    }
2906    // Best score first; ties broken by source then destination order so the
2907    // result is deterministic regardless of input ordering.
2908    pairs.sort_by(|a, b| {
2909        b.score
2910            .cmp(&a.score)
2911            .then_with(|| a.src.cmp(&b.src))
2912            .then_with(|| a.dst.cmp(&b.dst))
2913    });
2914
2915    // Greedily assign each source/destination once.
2916    let mut src_used = vec![false; deleted.len()];
2917    let mut dst_used = vec![false; added.len()];
2918    // destination changes-index -> (source changes-index, score).
2919    let mut rename_of: BTreeMap<usize, (usize, u8)> = BTreeMap::new();
2920    for pair in pairs {
2921        if src_used[pair.src] || dst_used[pair.dst] {
2922            continue;
2923        }
2924        src_used[pair.src] = true;
2925        dst_used[pair.dst] = true;
2926        let src_change_idx = deleted[pair.src].0;
2927        let dst_change_idx = added[pair.dst].0;
2928        rename_of.insert(dst_change_idx, (src_change_idx, pair.score));
2929    }
2930
2931    if rename_of.is_empty() {
2932        return changes;
2933    }
2934
2935    // Snapshot the source (delete) entries' metadata before we consume them, so
2936    // each renamed destination can carry the correct old path/mode/oid.
2937    let consumed_sources: BTreeSet<usize> =
2938        rename_of.values().map(|(src_idx, _)| *src_idx).collect();
2939    let source_meta: BTreeMap<usize, RenameSourceMeta> = consumed_sources
2940        .iter()
2941        .map(|&src_idx| {
2942            let src = &changes[src_idx];
2943            (
2944                src_idx,
2945                RenameSourceMeta {
2946                    path: src.path.clone(),
2947                    mode: src.old_mode,
2948                    oid: src.old_oid,
2949                },
2950            )
2951        })
2952        .collect();
2953
2954    let mut result = Vec::with_capacity(changes.len());
2955    for (idx, entry) in changes.into_iter().enumerate() {
2956        if consumed_sources.contains(&idx) {
2957            // This delete became the source of a rename; drop it.
2958            continue;
2959        }
2960        if let Some((src_idx, score)) = rename_of.get(&idx) {
2961            // The destination becomes a rename from the matched source. Pull the
2962            // old-side metadata from the snapshot; the new-side metadata stays as
2963            // the destination's.
2964            let meta = source_meta
2965                .get(src_idx)
2966                .cloned()
2967                .unwrap_or(RenameSourceMeta {
2968                    path: BString::default(),
2969                    mode: None,
2970                    oid: None,
2971                });
2972            result.push(NameStatusEntry {
2973                status: NameStatus::Renamed(*score),
2974                path: entry.path,
2975                old_path: Some(meta.path),
2976                old_mode: meta.mode,
2977                new_mode: entry.new_mode,
2978                old_oid: meta.oid,
2979                new_oid: entry.new_oid,
2980            });
2981            continue;
2982        }
2983        result.push(entry);
2984    }
2985
2986    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2987    result
2988}
2989
2990/// Inexact copy detection: for each still-`Added` file, find the most similar
2991/// candidate *source* on the left side (similarity >= `copy_threshold`) and, if
2992/// found, report it as a [`NameStatus::Copied`]. The source is not removed
2993/// (copies leave the original in place).
2994///
2995/// Candidate sources follow the same rule as exact copy detection: with
2996/// `find_copies_harder` every left-side path is eligible; otherwise only paths
2997/// that were themselves changed (deleted or modified) on this diff. Exact copies
2998/// have already run, so any remaining `Added` here had no identical-OID source.
2999fn detect_inexact_copies(
3000    changes: Vec<NameStatusEntry>,
3001    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3002    options: &RenameDetectionOptions,
3003    fetch_blob: &impl Fn(&ObjectId) -> Option<Vec<u8>>,
3004) -> Vec<NameStatusEntry> {
3005    let threshold = options.copy_threshold;
3006    if threshold > 100 {
3007        return changes;
3008    }
3009
3010    let changed_sources = changes
3011        .iter()
3012        .filter(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified))
3013        .map(|entry| entry.path.clone())
3014        .collect::<BTreeSet<_>>();
3015    // Eligible source paths, paired with their bytes (fetched lazily/once).
3016    let mut sources: Vec<(Vec<u8>, &TrackedEntry, Vec<u8>)> = Vec::new();
3017    for (path, tracked) in left_entries {
3018        if !(options.base.find_copies_harder || changed_sources.contains(path.as_slice())) {
3019            continue;
3020        }
3021        if !options.base.rename_empty && is_empty_blob_oid(&tracked.oid) {
3022            continue;
3023        }
3024        if let Some(bytes) = fetch_blob(&tracked.oid) {
3025            sources.push((path.clone(), tracked, bytes));
3026        }
3027    }
3028    if sources.is_empty() {
3029        return changes;
3030    }
3031
3032    let mut result = Vec::with_capacity(changes.len());
3033    for entry in changes {
3034        if entry.status != NameStatus::Added {
3035            result.push(entry);
3036            continue;
3037        }
3038        let Some(new_oid) = entry.new_oid.as_ref() else {
3039            result.push(entry);
3040            continue;
3041        };
3042        let Some(dst_bytes) = fetch_blob(new_oid) else {
3043            result.push(entry);
3044            continue;
3045        };
3046
3047        // Pick the best-scoring source path that meets the threshold. Ties are
3048        // broken by path order (BTreeMap iteration is sorted) so the choice is
3049        // deterministic.
3050        let mut best: Option<(usize, u8)> = None;
3051        for (i, (src_path, _, src_bytes)) in sources.iter().enumerate() {
3052            if src_path.as_slice() == entry.path.as_bytes() {
3053                continue;
3054            }
3055            let score = blob_similarity(src_bytes, &dst_bytes);
3056            if score < threshold {
3057                continue;
3058            }
3059            match best {
3060                Some((_, best_score)) if best_score >= score => {}
3061                _ => best = Some((i, score)),
3062            }
3063        }
3064
3065        if let Some((src_idx, score)) = best {
3066            let (src_path, src_tracked, _) = &sources[src_idx];
3067            result.push(NameStatusEntry {
3068                status: NameStatus::Copied(score),
3069                path: entry.path,
3070                old_path: Some(src_path.clone().into()),
3071                old_mode: Some(src_tracked.mode),
3072                new_mode: entry.new_mode,
3073                old_oid: Some(src_tracked.oid),
3074                new_oid: entry.new_oid,
3075            });
3076        } else {
3077            result.push(entry);
3078        }
3079    }
3080    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
3081    result
3082}
3083
3084fn is_empty_blob_oid(oid: &ObjectId) -> bool {
3085    object_id_for_bytes(oid.format(), "blob", b"").is_ok_and(|empty| empty == *oid)
3086}
3087
3088// ===========================================================================
3089// Content similarity (the engine for inexact `-M`/`-C` rename/copy detection).
3090//
3091// This mirrors upstream git's similarity estimate from `diffcore-delta.c`
3092// (the span-hash counting) and `diffcore-rename.c` (the score formula), so the
3093// `R<score>`/`C<score>` we emit match git's percentages.
3094//
3095// The metric, precisely:
3096//
3097//   1. Each blob is broken into *spans*. Starting at a byte, we accumulate a
3098//      rolling hash of the bytes and end the span at the first `\n` (inclusive)
3099//      or once the span reaches `MAX_SPAN_BYTES` (64) bytes, whichever comes
3100//      first. (The 64-byte cap keeps a file with no/few newlines — e.g. a
3101//      binary blob or one very long line — from collapsing into a single span,
3102//      so similarity still tracks shared substrings.) Each span yields a
3103//      `(hash, byte_count)` pair, where `byte_count` is the span's length in
3104//      bytes. This is the exact loop git uses in `hash_chars()`.
3105//
3106//   2. The two blobs' spans are reduced to multisets keyed by hash: for each
3107//      hash we keep the total number of bytes spanned by entries with that
3108//      hash, on each side. `common_bytes` is then the sum over all hashes of
3109//      `min(bytes_on_src, bytes_on_dst)` — the bytes that exist on both sides.
3110//      This is git's `src_copied`.
3111//
3112//   3. The score is `common_bytes / max(size_src, size_dst)`, scaled to a
3113//      percentage and rounded to the nearest integer:
3114//
3115//          score% = round(common_bytes * 100 / max(size_src, size_dst))
3116//
3117//      git computes an internal score `src_copied * MAX_SCORE / max_size` with
3118//      `MAX_SCORE == 60000` and reports `round(score * 100 / MAX_SCORE)`; that
3119//      is algebraically the same rounded percentage, which we compute directly
3120//      to avoid intermediate precision loss.
3121//
3122// Edge cases match git: two empty blobs are 100% similar (identical content);
3123// an empty blob vs a non-empty one is 0%. Equal byte buffers are always 100%.
3124
3125/// Maximum number of bytes in a single similarity span before it is force-cut.
3126///
3127/// git uses 64 (`hash_chars()` breaks a span once `++chunks >= 64`).
3128const MAX_SPAN_BYTES: usize = 64;
3129
3130/// Compute the content similarity of two blobs as an integer percentage in
3131/// `0..=100`, using git's span-hash counting metric (see the module comment
3132/// above for the exact definition).
3133///
3134/// The result is symmetric (`blob_similarity(a, b) == blob_similarity(b, a)`)
3135/// because the score divides the common-byte count by the larger of the two
3136/// sizes. Byte-identical blobs return `100`; a non-empty blob compared against
3137/// an empty one returns `0`; two empty blobs return `100`.
3138///
3139/// This is the same number git prints as `similarity index N%` and uses to
3140/// decide `-M`/`-C` rename and copy detection.
3141pub fn blob_similarity(a: &[u8], b: &[u8]) -> u8 {
3142    // Fast paths that also pin down the empty-blob conventions.
3143    if a == b {
3144        return 100;
3145    }
3146    let max_size = a.len().max(b.len());
3147    if max_size == 0 {
3148        // Both empty (and not caught by `a == b` only if both are empty, which
3149        // they are here) -> identical.
3150        return 100;
3151    }
3152
3153    let src = span_hash_counts(a);
3154    let dst = span_hash_counts(b);
3155    let common = common_span_bytes(&src, &dst);
3156
3157    // Match git's diffcore-rename integer math exactly. git computes an internal
3158    // score `src_copied * MAX_SCORE / max_size` (MAX_SCORE == 60000) with integer
3159    // truncation, then reports the similarity index as `score * 100 / MAX_SCORE`,
3160    // truncated again. This two-step truncation -- *not* a single rounded
3161    // `common * 100 / max_size` -- is what yields git's exact percentages: e.g.
3162    // common=4, max_size=6 gives 4*60000/6=40000 then 40000*100/60000=66 (git's
3163    // `R066`), whereas a rounded single step would give 67.
3164    const MAX_SCORE: u64 = 60000;
3165    let internal = (common as u64 * MAX_SCORE) / max_size as u64;
3166    let score = internal * 100 / MAX_SCORE;
3167    score.min(100) as u8
3168}
3169
3170/// Break `data` into spans and return, per span hash, the total number of bytes
3171/// covered by spans with that hash. Spans end at a newline (inclusive) or once
3172/// they reach [`MAX_SPAN_BYTES`] bytes — exactly git's `hash_chars()` loop.
3173///
3174/// The returned map is `hash -> total_span_bytes`. Summing all values yields
3175/// `data.len()`, so the byte accounting is exact.
3176fn span_hash_counts(data: &[u8]) -> BTreeMap<u64, usize> {
3177    let mut counts: BTreeMap<u64, usize> = BTreeMap::new();
3178    let mut idx = 0usize;
3179    let len = data.len();
3180    while idx < len {
3181        // Roll a hash over the bytes of this span. The mixing mirrors git's
3182        // two-accumulator scheme from `diffcore-delta.c`; the exact constants do
3183        // not matter for correctness (any good per-span hash works), only that
3184        // identical spans collide and distinct spans rarely do.
3185        let mut accum1: u32 = 0;
3186        let mut accum2: u32 = 0;
3187        let mut span_len = 0usize;
3188        loop {
3189            let c = data[idx] as u32;
3190            idx += 1;
3191            span_len += 1;
3192            accum1 = (accum1 << 7) ^ (accum2 >> 25);
3193            accum2 = (accum2 << 7) ^ (accum1 >> 25);
3194            accum1 = accum1.wrapping_add(c);
3195            let newline = c == u32::from(b'\n');
3196            if span_len >= MAX_SPAN_BYTES || newline || idx >= len {
3197                break;
3198            }
3199        }
3200        // Fold the two accumulators (and the span length) into one 64-bit key.
3201        // Including the length keeps spans of different lengths from colliding
3202        // when their rolling-hash states happen to coincide.
3203        let hash = ((accum1 as u64) << 32) ^ (accum2 as u64) ^ ((span_len as u64) << 1);
3204        *counts.entry(hash).or_insert(0) += span_len;
3205    }
3206    counts
3207}
3208
3209/// Sum, over every hash present in both maps, the smaller of the two byte
3210/// counts. This is git's `src_copied`: the number of bytes that appear on both
3211/// sides (counting multiplicity via the per-hash byte totals).
3212/// git `diffcore_count_changes()`: span-hash byte accounting between two
3213/// blobs. Returns `(src_copied, literal_added)` — the bytes of `src` that
3214/// survive into `dst`, and the bytes of `dst` not accounted for by `src`.
3215/// `--dirstat`'s default "changes" damage is
3216/// `(src.len() - src_copied) + literal_added`.
3217pub fn count_changes(src: &[u8], dst: &[u8]) -> (usize, usize) {
3218    let src_counts = span_hash_counts(src);
3219    let dst_counts = span_hash_counts(dst);
3220    let copied = common_span_bytes(&src_counts, &dst_counts);
3221    (copied, dst.len() - copied)
3222}
3223
3224fn common_span_bytes(src: &BTreeMap<u64, usize>, dst: &BTreeMap<u64, usize>) -> usize {
3225    let mut common = 0usize;
3226    // Iterate the smaller map for a few less lookups.
3227    let (small, large) = if src.len() <= dst.len() {
3228        (src, dst)
3229    } else {
3230        (dst, src)
3231    };
3232    for (hash, small_bytes) in small {
3233        if let Some(large_bytes) = large.get(hash) {
3234            common += (*small_bytes).min(*large_bytes);
3235        }
3236    }
3237    common
3238}
3239
3240fn diff_entry_sort_path(entry: &NameStatusEntry) -> &[u8] {
3241    // git's diffcore re-inserts rename/copy pairs at their *destination*'s
3242    // position, so the queue (raw, numstat, stat, ...) sorts by the new path.
3243    entry.path.as_bytes()
3244}
3245
3246fn mark_unstaged_worktree_oids_unresolved(
3247    changes: Vec<NameStatusEntry>,
3248    index_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3249    worktree_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3250) -> Vec<NameStatusEntry> {
3251    changes
3252        .into_iter()
3253        .map(|mut entry| {
3254            let worktree_entry = worktree_entries.get(entry.path.as_bytes());
3255            if worktree_entry != index_entries.get(entry.path.as_bytes()) {
3256                entry.new_oid = None;
3257            }
3258            entry
3259        })
3260        .collect()
3261}
3262
3263#[derive(Debug, Clone, PartialEq, Eq)]
3264struct TrackedEntry {
3265    mode: u32,
3266    oid: ObjectId,
3267}
3268
3269/// A path-keyed map of tracked entries: one flattened side of a tree (or index/
3270/// worktree) snapshot.
3271type TrackedEntryMap = BTreeMap<Vec<u8>, TrackedEntry>;
3272
3273/// The `(left, right)` sides produced by a tree-vs-tree comparison.
3274type TrackedEntryPair = (TrackedEntryMap, TrackedEntryMap);
3275
3276struct IndexSnapshot {
3277    entries: BTreeMap<Vec<u8>, TrackedEntry>,
3278    stat_cache: IndexStatCache,
3279}
3280
3281fn read_index_entries(
3282    git_dir: &Path,
3283    format: ObjectFormat,
3284) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3285    let index_path = sley_index::repository_index_path(git_dir);
3286    if !index_path.exists() {
3287        return Ok(BTreeMap::new());
3288    }
3289    let index = expand_sparse_index_for_worktree_diff(
3290        sley_index::read_repository_index(git_dir, format)?,
3291        git_dir,
3292        format,
3293    )?;
3294    Ok(index
3295        .entries
3296        .into_iter()
3297        .filter(|entry| entry.stage() == sley_index::Stage::Normal && !entry.is_intent_to_add())
3298        .map(|entry| {
3299            (
3300                entry.path.into_bytes(),
3301                TrackedEntry {
3302                    mode: entry.mode,
3303                    oid: entry.oid,
3304                },
3305            )
3306        })
3307        .collect())
3308}
3309
3310/// Collect the set of stage-0 paths flagged intent-to-add (`git add -N`) in the
3311/// index. These diff as new files rather than as modifications of their recorded
3312/// empty-blob id.
3313fn read_intent_to_add_paths(
3314    git_dir: &Path,
3315    format: ObjectFormat,
3316) -> Result<std::collections::HashSet<Vec<u8>>> {
3317    let index_path = sley_index::repository_index_path(git_dir);
3318    if !index_path.exists() {
3319        return Ok(std::collections::HashSet::new());
3320    }
3321    let index = expand_sparse_index_for_worktree_diff(
3322        sley_index::read_repository_index(git_dir, format)?,
3323        git_dir,
3324        format,
3325    )?;
3326    Ok(index
3327        .entries
3328        .iter()
3329        .filter(|entry| entry.stage() == sley_index::Stage::Normal && entry.is_intent_to_add())
3330        .map(|entry| entry.path.as_bytes().to_vec())
3331        .collect())
3332}
3333
3334fn read_index_snapshot(git_dir: &Path, format: ObjectFormat) -> Result<IndexSnapshot> {
3335    let index_path = sley_index::repository_index_path(git_dir);
3336    let index_metadata = match fs::metadata(&index_path) {
3337        Ok(metadata) => metadata,
3338        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
3339            return Ok(IndexSnapshot {
3340                entries: BTreeMap::new(),
3341                stat_cache: IndexStatCache::default(),
3342            });
3343        }
3344        Err(err) => return Err(err.into()),
3345    };
3346    let index = expand_sparse_index_for_worktree_diff(
3347        sley_index::read_repository_index(git_dir, format)?,
3348        git_dir,
3349        format,
3350    )?;
3351    let stat_cache =
3352        IndexStatCache::from_index_mtime(&index, sley_index::file_mtime_parts(&index_metadata));
3353    let entries = index
3354        .entries
3355        .into_iter()
3356        .map(|entry| {
3357            (
3358                entry.path.into_bytes(),
3359                TrackedEntry {
3360                    mode: entry.mode,
3361                    oid: entry.oid,
3362                },
3363            )
3364        })
3365        .collect();
3366    Ok(IndexSnapshot {
3367        entries,
3368        stat_cache,
3369    })
3370}
3371
3372trait WorktreeIndexEntry {
3373    fn git_path(&self) -> &[u8];
3374    fn stage(&self) -> sley_index::Stage;
3375    fn mode(&self) -> u32;
3376    fn oid(&self) -> ObjectId;
3377    fn is_intent_to_add(&self) -> bool;
3378    fn is_skip_worktree(&self) -> bool;
3379    fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool;
3380}
3381
3382impl WorktreeIndexEntry for sley_index::IndexEntry {
3383    fn git_path(&self) -> &[u8] {
3384        self.path.as_bytes()
3385    }
3386
3387    fn stage(&self) -> sley_index::Stage {
3388        sley_index::IndexEntry::stage(self)
3389    }
3390
3391    fn mode(&self) -> u32 {
3392        self.mode
3393    }
3394
3395    fn oid(&self) -> ObjectId {
3396        self.oid
3397    }
3398
3399    fn is_intent_to_add(&self) -> bool {
3400        sley_index::IndexEntry::is_intent_to_add(self)
3401    }
3402
3403    fn is_skip_worktree(&self) -> bool {
3404        sley_index::IndexEntry::is_skip_worktree(self)
3405    }
3406
3407    fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool {
3408        stat_cache.reusable_index_entry(self, metadata).is_some()
3409    }
3410}
3411
3412impl WorktreeIndexEntry for sley_index::IndexEntryRef<'_> {
3413    fn git_path(&self) -> &[u8] {
3414        self.path
3415    }
3416
3417    fn stage(&self) -> sley_index::Stage {
3418        sley_index::IndexEntryRef::stage(self)
3419    }
3420
3421    fn mode(&self) -> u32 {
3422        self.mode
3423    }
3424
3425    fn oid(&self) -> ObjectId {
3426        self.oid
3427    }
3428
3429    fn is_intent_to_add(&self) -> bool {
3430        sley_index::IndexEntryRef::is_intent_to_add(self)
3431    }
3432
3433    fn is_skip_worktree(&self) -> bool {
3434        sley_index::IndexEntryRef::is_skip_worktree(self)
3435    }
3436
3437    fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool {
3438        stat_cache.reusable_index_entry_ref(self, metadata)
3439    }
3440}
3441
3442fn tracked_entry_from_index(entry: &impl WorktreeIndexEntry) -> TrackedEntry {
3443    TrackedEntry {
3444        mode: entry.mode(),
3445        oid: entry.oid(),
3446    }
3447}
3448
3449fn head_tree_entries(
3450    git_dir: &Path,
3451    format: ObjectFormat,
3452    db: &FileObjectDatabase,
3453) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3454    let refs = FileRefStore::new(git_dir, format);
3455    let Some(head) = refs.read_ref("HEAD")? else {
3456        return Ok(BTreeMap::new());
3457    };
3458    let commit_oid = match head {
3459        RefTarget::Direct(oid) => Some(oid),
3460        RefTarget::Symbolic(name) => match refs.read_ref(&name)? {
3461            Some(RefTarget::Direct(oid)) => Some(oid),
3462            _ => None,
3463        },
3464    };
3465    let Some(commit_oid) = commit_oid else {
3466        return Ok(BTreeMap::new());
3467    };
3468    let object = db.read_object(&commit_oid)?;
3469    if object.object_type != ObjectType::Commit {
3470        return Err(GitError::InvalidObject(format!(
3471            "HEAD {commit_oid} is not a commit"
3472        )));
3473    }
3474    let commit = Commit::parse_ref(format, &object.body)?;
3475    let mut entries = BTreeMap::new();
3476    collect_tree_entries(db, format, &commit.tree, Vec::new(), &mut entries)?;
3477    Ok(entries)
3478}
3479
3480/// Flatten `tree_oid` into `entries` (keyed by `prefix`-rooted full paths),
3481/// adapting the canonical [`flatten_tree`] tuples into [`TrackedEntry`].
3482///
3483/// `flatten_tree` flattens from an empty prefix; each of its paths is rejoined
3484/// under `prefix` with [`join_tree_path`], reproducing the recursive
3485/// prefix-building this helper previously did inline. Used by the full
3486/// (non-pruned) flatten paths: `--find-copies-harder` and the changed-subtree
3487/// add/delete sides of the simultaneous diff walk.
3488fn collect_tree_entries(
3489    db: &FileObjectDatabase,
3490    format: ObjectFormat,
3491    tree_oid: &ObjectId,
3492    prefix: Vec<u8>,
3493    entries: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3494) -> Result<()> {
3495    for (rel_path, (mode, oid)) in flatten_tree(db, format, tree_oid)? {
3496        let path = join_tree_path(&prefix, &rel_path);
3497        entries.insert(path, TrackedEntry { mode, oid });
3498    }
3499    Ok(())
3500}
3501
3502/// Git's mode value for a subtree (directory) entry inside a tree object.
3503const TREE_ENTRY_MODE: u32 = 0o040000;
3504
3505/// Read `tree_oid` and parse it as a tree, erroring if the object is some other
3506/// type. Shared by the simultaneous tree-diff walk so both sides validate the
3507/// object type identically to [`collect_tree_entries`].
3508fn read_tree_object(
3509    db: &FileObjectDatabase,
3510    format: ObjectFormat,
3511    tree_oid: &ObjectId,
3512) -> Result<Tree> {
3513    let object = db.read_object(tree_oid)?;
3514    if object.object_type != ObjectType::Tree {
3515        return Err(GitError::InvalidObject(format!(
3516            "expected tree {tree_oid}, found {}",
3517            object.object_type.as_str()
3518        )));
3519    }
3520    Tree::parse(format, &object.body)
3521}
3522
3523/// Append `name` to `prefix` with a `/` separator (mirroring the path
3524/// construction in [`collect_tree_entries`]), returning the joined path.
3525fn join_tree_path(prefix: &[u8], name: &[u8]) -> Vec<u8> {
3526    let mut path = Vec::with_capacity(prefix.len() + 1 + name.len());
3527    path.extend_from_slice(prefix);
3528    if !path.is_empty() {
3529        path.push(b'/');
3530    }
3531    path.extend_from_slice(name);
3532    path
3533}
3534
3535/// Fully flatten both trees into independent `left`/`right` maps (every blob on
3536/// each side, no pruning). Used only on the `--find-copies-harder` path, where
3537/// copy detection may reach into otherwise-unchanged subtrees for a source.
3538fn collect_full_tree_pair(
3539    db: &FileObjectDatabase,
3540    format: ObjectFormat,
3541    left_tree: &ObjectId,
3542    right_tree: &ObjectId,
3543) -> Result<TrackedEntryPair> {
3544    let mut left = BTreeMap::new();
3545    collect_tree_entries(db, format, left_tree, Vec::new(), &mut left)?;
3546    let mut right = BTreeMap::new();
3547    collect_tree_entries(db, format, right_tree, Vec::new(), &mut right)?;
3548    Ok((left, right))
3549}
3550
3551/// Walk two trees *simultaneously*, collecting into `left` and `right` only the
3552/// blob entries that differ between the two sides — every entry that is present
3553/// and byte-identical (same mode + same OID) on both sides is omitted, and any
3554/// subtree whose OID is identical on both sides is skipped wholesale without
3555/// being read or recursed into. This is the core optimization git relies on to
3556/// make tree diffs cheap: equal subtrees are pruned in O(1).
3557///
3558/// The resulting `left`/`right` maps are exactly the subset of the fully
3559/// flattened maps (as produced by [`collect_tree_entries`]) restricted to the
3560/// paths that participate in an Added/Deleted/Modified change. Because
3561/// [`raw_name_status_changes`] emits nothing for a path that is identical on both
3562/// sides, diffing these pruned maps yields byte-identical name-status output to
3563/// diffing the full maps. (Callers that need the *complete* left map — i.e.
3564/// `--find-copies-harder`, where an unchanged file may be a copy source — must
3565/// still use [`collect_tree_entries`]; see the tree-diff entry points.)
3566fn changed_tree_entries(
3567    db: &FileObjectDatabase,
3568    format: ObjectFormat,
3569    left_tree: &ObjectId,
3570    right_tree: &ObjectId,
3571) -> Result<TrackedEntryPair> {
3572    let mut left = BTreeMap::new();
3573    let mut right = BTreeMap::new();
3574    // Identical root trees produce no changes at all and need not be read.
3575    if left_tree != right_tree {
3576        diff_tree_pair(
3577            db,
3578            format,
3579            left_tree,
3580            right_tree,
3581            &[],
3582            &mut left,
3583            &mut right,
3584        )?;
3585    }
3586    Ok((left, right))
3587}
3588
3589/// Recursively diff two subtrees rooted at `prefix`, appending differing blob
3590/// entries to `left` / `right`. Invariant: the two OIDs are already known to
3591/// differ (identical subtrees are pruned by the caller before recursing).
3592fn diff_tree_pair(
3593    db: &FileObjectDatabase,
3594    format: ObjectFormat,
3595    left_tree: &ObjectId,
3596    right_tree: &ObjectId,
3597    prefix: &[u8],
3598    left: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3599    right: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3600) -> Result<()> {
3601    let left_entries = read_tree_object(db, format, left_tree)?.entries;
3602    let right_entries = read_tree_object(db, format, right_tree)?.entries;
3603
3604    // Index the right side by name so the union of names can be walked without
3605    // relying on git's directory-aware entry ordering. (Iterating the union of
3606    // names, rather than a positional merge, keeps correctness independent of
3607    // entry order.)
3608    let mut right_by_name: HashMap<&[u8], &TreeEntry> = HashMap::with_capacity(right_entries.len());
3609    for entry in &right_entries {
3610        right_by_name.insert(entry.name.as_bytes(), entry);
3611    }
3612
3613    for left_entry in &left_entries {
3614        match right_by_name.remove(left_entry.name.as_bytes()) {
3615            Some(right_entry) => {
3616                merge_tree_entry(
3617                    db,
3618                    format,
3619                    prefix,
3620                    Some(left_entry),
3621                    Some(right_entry),
3622                    left,
3623                    right,
3624                )?;
3625            }
3626            None => {
3627                merge_tree_entry(db, format, prefix, Some(left_entry), None, left, right)?;
3628            }
3629        }
3630    }
3631    // Names only present on the right are pure additions.
3632    for right_entry in &right_entries {
3633        if right_by_name.contains_key(right_entry.name.as_bytes()) {
3634            merge_tree_entry(db, format, prefix, None, Some(right_entry), left, right)?;
3635        }
3636    }
3637    Ok(())
3638}
3639
3640/// Reconcile a single name that may appear on the left side, the right side, or
3641/// both, recording any resulting blob change(s) into `left` / `right`. This
3642/// reproduces exactly the union-of-flattened-maps semantics:
3643///
3644/// * tree vs tree with equal OID -> pruned (no read, no recursion);
3645/// * tree vs tree with differing OID -> recurse;
3646/// * blob vs blob, equal mode+OID -> unchanged, emitted nowhere;
3647/// * blob vs blob, differing mode or OID -> both sides recorded (a Modify);
3648/// * a tree on one side and a non-tree on the other (or a name present on only
3649///   one side) -> the flattened paths differ (`name/...` vs `name`), so the two
3650///   are unrelated: the tree side is flattened wholesale and the blob side is
3651///   recorded independently (an Add and/or a Delete).
3652fn merge_tree_entry(
3653    db: &FileObjectDatabase,
3654    format: ObjectFormat,
3655    prefix: &[u8],
3656    left_entry: Option<&TreeEntry>,
3657    right_entry: Option<&TreeEntry>,
3658    left: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3659    right: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3660) -> Result<()> {
3661    let left_is_tree = left_entry.is_some_and(|entry| entry.mode == TREE_ENTRY_MODE);
3662    let right_is_tree = right_entry.is_some_and(|entry| entry.mode == TREE_ENTRY_MODE);
3663
3664    if let (Some(left_entry), Some(right_entry)) = (left_entry, right_entry) {
3665        if left_is_tree && right_is_tree {
3666            // Two subtrees under the same name: prune if identical, else recurse.
3667            if left_entry.oid == right_entry.oid {
3668                return Ok(());
3669            }
3670            let path = join_tree_path(prefix, left_entry.name.as_bytes());
3671            return diff_tree_pair(
3672                db,
3673                format,
3674                &left_entry.oid,
3675                &right_entry.oid,
3676                &path,
3677                left,
3678                right,
3679            );
3680        }
3681        if !left_is_tree && !right_is_tree {
3682            // Two blobs under the same name. Identical mode+OID means unchanged
3683            // (nothing emitted); otherwise both sides are recorded so the diff
3684            // sees a Modify, matching the full-map `left != right` comparison.
3685            if left_entry.mode == right_entry.mode && left_entry.oid == right_entry.oid {
3686                return Ok(());
3687            }
3688            let path = join_tree_path(prefix, left_entry.name.as_bytes());
3689            left.insert(
3690                path.clone(),
3691                TrackedEntry {
3692                    mode: left_entry.mode,
3693                    oid: left_entry.oid,
3694                },
3695            );
3696            right.insert(
3697                path,
3698                TrackedEntry {
3699                    mode: right_entry.mode,
3700                    oid: right_entry.oid,
3701                },
3702            );
3703            return Ok(());
3704        }
3705        // Mixed: tree on one side, blob on the other. Their flattened paths
3706        // never collide, so handle each side as if the name existed only there.
3707    }
3708
3709    // Left side (if any): record as deletions.
3710    if let Some(left_entry) = left_entry {
3711        let path = join_tree_path(prefix, left_entry.name.as_bytes());
3712        if left_is_tree {
3713            collect_tree_entries(db, format, &left_entry.oid, path, left)?;
3714        } else {
3715            left.insert(
3716                path,
3717                TrackedEntry {
3718                    mode: left_entry.mode,
3719                    oid: left_entry.oid,
3720                },
3721            );
3722        }
3723    }
3724    // Right side (if any): record as additions.
3725    if let Some(right_entry) = right_entry {
3726        let path = join_tree_path(prefix, right_entry.name.as_bytes());
3727        if right_is_tree {
3728            collect_tree_entries(db, format, &right_entry.oid, path, right)?;
3729        } else {
3730            right.insert(
3731                path,
3732                TrackedEntry {
3733                    mode: right_entry.mode,
3734                    oid: right_entry.oid,
3735                },
3736            );
3737        }
3738    }
3739    Ok(())
3740}
3741
3742fn index_gitlinks(index: &BTreeMap<Vec<u8>, TrackedEntry>) -> BTreeMap<Vec<u8>, ObjectId> {
3743    index
3744        .iter()
3745        .filter(|(_, entry)| sley_index::is_gitlink(entry.mode))
3746        .map(|(path, entry)| (path.clone(), entry.oid))
3747        .collect()
3748}
3749
3750fn candidate_path_set<'a>(candidate_paths: impl Iterator<Item = &'a Vec<u8>>) -> BTreeSet<Vec<u8>> {
3751    candidate_paths.cloned().collect()
3752}
3753
3754fn worktree_entries_for_path_set(
3755    worktree_root: &Path,
3756    format: ObjectFormat,
3757    candidates: &BTreeSet<Vec<u8>>,
3758    index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
3759    stat_cache: Option<&IndexStatCache>,
3760) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3761    worktree_entries_for_unique_paths(
3762        worktree_root,
3763        format,
3764        candidates.iter(),
3765        index_gitlinks,
3766        stat_cache,
3767    )
3768}
3769
3770fn worktree_entries_for_unique_paths<'a>(
3771    worktree_root: &Path,
3772    format: ObjectFormat,
3773    candidates: impl Iterator<Item = &'a Vec<u8>>,
3774    index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
3775    stat_cache: Option<&IndexStatCache>,
3776) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3777    let mut entries = BTreeMap::new();
3778    for git_path in candidates {
3779        if let Some(entry) =
3780            worktree_entry_for_path(worktree_root, format, git_path, index_gitlinks, stat_cache)?
3781        {
3782            entries.insert(git_path.clone(), entry);
3783        }
3784    }
3785    Ok(entries)
3786}
3787
3788fn worktree_entry_for_path(
3789    worktree_root: &Path,
3790    format: ObjectFormat,
3791    git_path: &[u8],
3792    index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
3793    stat_cache: Option<&IndexStatCache>,
3794) -> Result<Option<TrackedEntry>> {
3795    let path = worktree_path_for_repo_path(worktree_root, git_path);
3796    let metadata = match fs::symlink_metadata(&path) {
3797        Ok(metadata) => metadata,
3798        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
3799        Err(err) => return Err(GitError::Io(err.to_string())),
3800    };
3801    let file_type = metadata.file_type();
3802    if let Some(staged_oid) = index_gitlinks.get(git_path)
3803        && metadata.is_dir()
3804    {
3805        let oid = gitlink_head_oid(&path, format).unwrap_or(*staged_oid);
3806        return Ok(Some(TrackedEntry {
3807            mode: sley_index::GITLINK_MODE,
3808            oid,
3809        }));
3810    }
3811    if metadata.is_dir() {
3812        if let Some(oid) = gitlink_head_oid(&path, format) {
3813            return Ok(Some(TrackedEntry {
3814                mode: sley_index::GITLINK_MODE,
3815                oid,
3816            }));
3817        }
3818        return Ok(None);
3819    }
3820    if !(metadata.is_file() || file_type.is_symlink()) {
3821        return Ok(None);
3822    }
3823    if let Some(entry) = stat_cache.and_then(|cache| cache.reusable_entry(git_path, &metadata)) {
3824        return Ok(Some(tracked_entry_from_index(entry)));
3825    }
3826    let body = if file_type.is_symlink() {
3827        symlink_target_bytes(&path)?
3828    } else {
3829        fs::read(&path)?
3830    };
3831    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
3832    let mode = if file_type.is_symlink() {
3833        0o120000
3834    } else {
3835        file_mode(&metadata)
3836    };
3837    Ok(Some(TrackedEntry { mode, oid }))
3838}
3839
3840fn index_worktree_change_for_entry(
3841    path: &Path,
3842    format: ObjectFormat,
3843    index_entry: &impl WorktreeIndexEntry,
3844    stat_cache: &IndexStatCache,
3845) -> Result<Option<NameStatusEntry>> {
3846    let git_path = index_entry.git_path();
3847    let metadata = match fs::symlink_metadata(path) {
3848        Ok(metadata) => metadata,
3849        Err(err)
3850            if err.kind() == std::io::ErrorKind::NotFound && index_entry.is_skip_worktree() =>
3851        {
3852            return Ok(None);
3853        }
3854        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
3855            return Ok(Some(index_worktree_deleted_entry(index_entry)));
3856        }
3857        Err(err) => return Err(GitError::Io(err.to_string())),
3858    };
3859    let file_type = metadata.file_type();
3860    let right = if metadata.is_dir() {
3861        if sley_index::is_gitlink(index_entry.mode()) {
3862            let oid = gitlink_head_oid(path, format).unwrap_or(index_entry.oid());
3863            Some(TrackedEntry {
3864                mode: sley_index::GITLINK_MODE,
3865                oid,
3866            })
3867        } else {
3868            gitlink_head_oid(path, format).map(|oid| TrackedEntry {
3869                mode: sley_index::GITLINK_MODE,
3870                oid,
3871            })
3872        }
3873    } else if metadata.is_file() || file_type.is_symlink() {
3874        if index_entry.reusable_with(stat_cache, &metadata) {
3875            return Ok(None);
3876        }
3877        let body = if file_type.is_symlink() {
3878            symlink_target_bytes(path)?
3879        } else {
3880            fs::read(path)?
3881        };
3882        let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
3883        let mode = if file_type.is_symlink() {
3884            0o120000
3885        } else {
3886            file_mode(&metadata)
3887        };
3888        Some(TrackedEntry { mode, oid })
3889    } else {
3890        None
3891    };
3892    let Some(right) = right else {
3893        return Ok(Some(index_worktree_deleted_entry(index_entry)));
3894    };
3895    let left = tracked_entry_from_index(index_entry);
3896    if right == left {
3897        return Ok(None);
3898    }
3899    Ok(Some(NameStatusEntry {
3900        status: NameStatus::Modified,
3901        path: git_path.to_vec().into(),
3902        old_path: None,
3903        old_mode: Some(left.mode),
3904        new_mode: Some(right.mode),
3905        old_oid: Some(left.oid),
3906        new_oid: Some(right.oid),
3907    }))
3908}
3909
3910fn index_worktree_deleted_entry(index_entry: &impl WorktreeIndexEntry) -> NameStatusEntry {
3911    NameStatusEntry {
3912        status: NameStatus::Deleted,
3913        path: index_entry.git_path().to_vec().into(),
3914        old_path: None,
3915        old_mode: Some(index_entry.mode()),
3916        new_mode: None,
3917        old_oid: Some(index_entry.oid()),
3918        new_oid: None,
3919    }
3920}
3921
3922fn worktree_blob_cache_for_path_set(
3923    worktree_root: &Path,
3924    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3925    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3926    candidate_paths: &BTreeSet<Vec<u8>>,
3927    options: RenameDetectionOptions,
3928) -> Result<HashMap<ObjectId, Vec<u8>>> {
3929    worktree_blob_cache_for_unique_paths(
3930        worktree_root,
3931        left_entries,
3932        right_entries,
3933        candidate_paths.iter(),
3934        options,
3935    )
3936}
3937
3938fn worktree_blob_cache_for_unique_paths<'a>(
3939    worktree_root: &Path,
3940    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3941    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3942    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
3943    options: RenameDetectionOptions,
3944) -> Result<HashMap<ObjectId, Vec<u8>>> {
3945    if !options.detect_inexact || !(options.base.detect_renames || options.base.detect_copies) {
3946        return Ok(HashMap::new());
3947    }
3948    let base = options.base;
3949    let mut changes =
3950        raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
3951    if base.detect_renames {
3952        changes = detect_exact_renames(changes, left_entries, right_entries, base.rename_empty);
3953    }
3954    if base.detect_copies {
3955        changes = detect_exact_copies(
3956            changes,
3957            left_entries,
3958            right_entries,
3959            base.find_copies_harder,
3960            base.rename_empty,
3961        );
3962    }
3963    let has_rename_source = base.detect_renames
3964        && changes.iter().any(|entry| {
3965            entry.status == NameStatus::Deleted
3966                && entry
3967                    .old_oid
3968                    .as_ref()
3969                    .is_some_and(|oid| base.rename_empty || !is_empty_blob_oid(oid))
3970        });
3971    let has_copy_source = base.detect_copies
3972        && (base.find_copies_harder
3973            || changes
3974                .iter()
3975                .any(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified)));
3976    if !has_rename_source && !has_copy_source {
3977        return Ok(HashMap::new());
3978    }
3979    let candidate_oids = changes
3980        .iter()
3981        .filter(|entry| entry.status == NameStatus::Added)
3982        .filter_map(|entry| entry.new_oid)
3983        .filter(|oid| base.rename_empty || !is_empty_blob_oid(oid))
3984        .collect::<BTreeSet<_>>();
3985    if candidate_oids.is_empty() {
3986        return Ok(HashMap::new());
3987    }
3988    let mut cache = HashMap::new();
3989    for (git_path, entry) in right_entries {
3990        if sley_index::is_gitlink(entry.mode) || !candidate_oids.contains(&entry.oid) {
3991            continue;
3992        }
3993        let path = worktree_path_for_repo_path(worktree_root, git_path);
3994        let body = if entry.mode == 0o120000 {
3995            symlink_target_bytes(&path)?
3996        } else {
3997            fs::read(&path)?
3998        };
3999        cache.entry(entry.oid).or_insert(body);
4000    }
4001    Ok(cache)
4002}
4003
4004/// A blob fetcher that consults an in-memory `oid -> bytes` cache first (e.g.
4005/// freshly-read worktree files) and falls back to the object database.
4006fn cache_or_odb_blob(
4007    cache: &HashMap<ObjectId, Vec<u8>>,
4008    db: &FileObjectDatabase,
4009    oid: &ObjectId,
4010) -> Option<Vec<u8>> {
4011    if let Some(bytes) = cache.get(oid) {
4012        return Some(bytes.clone());
4013    }
4014    read_blob_bytes(db, oid)
4015}
4016
4017#[cfg(unix)]
4018fn worktree_path_for_repo_path(worktree_root: &Path, path: &[u8]) -> PathBuf {
4019    use std::ffi::OsStr;
4020    use std::os::unix::ffi::OsStrExt;
4021
4022    let mut out = PathBuf::from(worktree_root);
4023    out.push(OsStr::from_bytes(path));
4024    out
4025}
4026
4027#[cfg(unix)]
4028fn worktree_path_for_repo_path_into(out: &mut PathBuf, worktree_root: &Path, path: &[u8]) {
4029    use std::ffi::OsStr;
4030    use std::os::unix::ffi::OsStrExt;
4031
4032    out.clear();
4033    out.push(worktree_root);
4034    out.push(OsStr::from_bytes(path));
4035}
4036
4037#[cfg(not(unix))]
4038fn worktree_path_for_repo_path(worktree_root: &Path, path: &[u8]) -> PathBuf {
4039    worktree_root.join(repo_path_to_path(path))
4040}
4041
4042#[cfg(not(unix))]
4043fn worktree_path_for_repo_path_into(out: &mut PathBuf, worktree_root: &Path, path: &[u8]) {
4044    out.clear();
4045    out.push(worktree_root);
4046    out.push(repo_path_to_path(path));
4047}
4048
4049#[cfg(not(unix))]
4050fn repo_path_to_path(path: &[u8]) -> PathBuf {
4051    let mut out = PathBuf::new();
4052    for component in String::from_utf8_lossy(path).split('/') {
4053        if !component.is_empty() {
4054            out.push(component);
4055        }
4056    }
4057    out
4058}
4059
4060#[cfg(unix)]
4061fn file_mode(metadata: &fs::Metadata) -> u32 {
4062    use std::os::unix::fs::PermissionsExt;
4063    if metadata.permissions().mode() & 0o111 != 0 {
4064        0o100755
4065    } else {
4066        0o100644
4067    }
4068}
4069
4070#[cfg(not(unix))]
4071fn file_mode(_metadata: &fs::Metadata) -> u32 {
4072    0o100644
4073}
4074
4075#[cfg(unix)]
4076fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
4077    use std::os::unix::ffi::OsStrExt;
4078    let target = fs::read_link(path)?;
4079    Ok(target.as_os_str().as_bytes().to_vec())
4080}
4081
4082#[cfg(not(unix))]
4083fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
4084    let target = fs::read_link(path)?;
4085    Ok(target.to_string_lossy().replace('\\', "/").into_bytes())
4086}
4087
4088// ---------------------------------------------------------------------------
4089// Unified / git diff patch parsing and application (engine for `git apply`/`git am`).
4090//
4091// Operates purely on in-memory byte buffers; the caller is responsible for
4092// reading/writing blobs from the working tree or the object database. The
4093// parser understands the textual format git produces (`diff --git`, `---`/`+++`
4094// file headers, `@@` hunk headers, context/`+`/`-` body lines, the
4095// `\ No newline at end of file` marker, `/dev/null` for added/deleted files,
4096// file mode headers, and `rename from`/`rename to` headers).
4097// ---------------------------------------------------------------------------
4098
4099/// A single line inside a hunk. The stored bytes never include the trailing
4100/// line terminator; whether the line is terminated by `\n` is tracked
4101/// separately on the [`Hunk`] (see [`Hunk::old_no_newline`] /
4102/// [`Hunk::new_no_newline`]) so the no-final-newline case can be reproduced
4103/// byte-for-byte.
4104#[derive(Debug, Clone, PartialEq, Eq)]
4105pub enum HunkLine {
4106    /// A line present in both the old and new versions.
4107    Context(Vec<u8>),
4108    /// A line added by the patch (present only in the new version).
4109    Insert(Vec<u8>),
4110    /// A line removed by the patch (present only in the old version).
4111    Delete(Vec<u8>),
4112}
4113
4114impl HunkLine {
4115    /// The line content, without any trailing newline.
4116    pub fn content(&self) -> &[u8] {
4117        match self {
4118            Self::Context(bytes) | Self::Insert(bytes) | Self::Delete(bytes) => bytes,
4119        }
4120    }
4121}
4122
4123/// A single `@@ -old_start,old_len +new_start,new_len @@` hunk.
4124///
4125/// `old_start` / `new_start` are 1-based line numbers as they appear in the
4126/// patch header. The `*_no_newline` flags record that the final line on that
4127/// side of the hunk is *not* terminated by a newline (the `\ No newline at end
4128/// of file` marker).
4129#[derive(Debug, Clone, PartialEq, Eq)]
4130pub struct Hunk {
4131    pub old_start: usize,
4132    pub old_len: usize,
4133    pub new_start: usize,
4134    pub new_len: usize,
4135    pub lines: Vec<HunkLine>,
4136    /// The last context/deleted line of the old file lacks a trailing newline.
4137    pub old_no_newline: bool,
4138    /// The last context/inserted line of the new file lacks a trailing newline.
4139    pub new_no_newline: bool,
4140}
4141
4142/// A patch targeting a single file. Produced by [`parse_unified_patch`].
4143#[derive(Debug, Clone, PartialEq, Eq)]
4144pub struct FilePatch {
4145    /// Path on the `a/` (old) side, or `None` for a newly created file.
4146    pub old_path: Option<Vec<u8>>,
4147    /// Path on the `b/` (new) side, or `None` for a deleted file.
4148    pub new_path: Option<Vec<u8>>,
4149    /// Mode of the old file, when a mode header was present.
4150    pub old_mode: Option<u32>,
4151    /// Mode of the new file, when a mode header was present.
4152    pub new_mode: Option<u32>,
4153    pub hunks: Vec<Hunk>,
4154    /// The patch creates a new file (`--- /dev/null` / `new file mode`).
4155    pub is_new: bool,
4156    /// The patch deletes the file (`+++ /dev/null` / `deleted file mode`).
4157    pub is_delete: bool,
4158    /// The patch renames the file (`rename from`/`rename to`).
4159    pub is_rename: bool,
4160    /// The patch copies the file (`copy from`/`copy to`).
4161    pub is_copy: bool,
4162    /// Similarity score from `similarity index N%`, used for rename/copy summaries.
4163    pub similarity: Option<u8>,
4164    /// Dissimilarity score from `dissimilarity index N%`, used for rewrite summaries.
4165    pub dissimilarity: Option<u8>,
4166}
4167
4168/// Outcome of applying a [`FilePatch`] to a base buffer.
4169#[derive(Debug, Clone, PartialEq, Eq)]
4170pub enum ApplyOutcome {
4171    /// The patch applied cleanly; carries the resulting file bytes.
4172    Applied(Vec<u8>),
4173    /// At least one hunk's context/deleted lines did not match the base.
4174    Rejected,
4175}
4176
4177/// The minimum number of context lines git's `apply` insists on keeping when
4178/// it tries to fuzz a hunk into place — git's `apply_state.p_context`, which is
4179/// initialised to `UINT_MAX` (the `-C<n>` option lowers it). The fuzz loop in
4180/// `apply_one_fragment` stops the moment both leading and trailing context have
4181/// been reduced to this floor; with the default `UINT_MAX` floor that test is
4182/// already satisfied on the first failure, so **the default `git apply` / `git
4183/// am` path does no context fuzz and no begin/end relaxation at all** — a hunk
4184/// whose full preimage does not match at a valid position is simply rejected.
4185/// We keep the floor configurable so the structure mirrors git's, but the
4186/// shared apply engine only ever runs with the default.
4187const MIN_FUZZ_CONTEXT: usize = usize::MAX;
4188
4189/// Parse a unified/git diff into one [`FilePatch`] per file it touches.
4190///
4191/// The parser is intentionally lenient about leading commentary (commit
4192/// messages, `index <oid>..<oid>` lines, etc.): anything that is not part of a
4193/// recognised header or hunk body is skipped. It errors only on structurally
4194/// invalid hunks (bad `@@` headers, body lines that overflow the declared hunk
4195/// counts, or hunk bodies that appear with no preceding file header).
4196pub fn parse_unified_patch(input: &[u8]) -> Result<Vec<FilePatch>> {
4197    parse_unified_patch_with_recount(input, false)
4198}
4199
4200/// Parse a unified/git diff, optionally ignoring hunk header line counts and
4201/// recounting them from the hunk body. This mirrors `git apply --recount`.
4202pub fn parse_unified_patch_with_recount(input: &[u8], recount: bool) -> Result<Vec<FilePatch>> {
4203    let lines = split_patch_lines(input);
4204    let mut parser = PatchParser {
4205        lines: &lines,
4206        index: 0,
4207        recount,
4208    };
4209    parser.parse()
4210}
4211
4212/// Apply a single-file patch to `base`, returning the patched bytes.
4213///
4214/// This mirrors git's `apply.c` (`apply_one_fragment` / `find_pos` /
4215/// `match_fragment`) for the default, no-whitespace-fuzz settings `git am`
4216/// and `git apply` use:
4217///
4218/// * Each hunk builds a *preimage* (context + deleted lines) and *postimage*
4219///   (context + inserted lines).
4220/// * A hunk anchored at the file start (`old_start <= 1`) must match the
4221///   beginning of the file (`match_beginning`); a hunk with no trailing context
4222///   must match the end of the file (`match_end`).
4223/// * The full preimage is matched byte-for-byte; the search starts at the
4224///   recorded position and ping-pongs outward across the whole image.
4225/// * Fuzz is applied *only* by dropping leading/trailing context lines (never
4226///   by jumping to a spurious context-only match); if no position matches even
4227///   after dropping all context, the hunk — and thus the whole patch — is
4228///   [`ApplyOutcome::Rejected`].
4229///
4230/// Rejecting (rather than spuriously applying at a wrong offset) is what lets
4231/// `git am -3` correctly fall back to its 3-way merge path.
4232///
4233/// New-file patches (empty/ignored base) and the no-final-newline case are
4234/// handled byte-accurately. Clean exact-position applies are byte-identical to
4235/// the previous behaviour.
4236pub fn apply_file_patch(base: &[u8], patch: &FilePatch) -> ApplyOutcome {
4237    // A pure deletion with no hunks yields an empty file.
4238    if patch.is_delete && patch.hunks.is_empty() {
4239        return ApplyOutcome::Applied(Vec::new());
4240    }
4241    // A new file: the only sensible base is empty; ignore whatever was passed
4242    // and build the result from the inserted lines.
4243    let base_for_match: &[u8] = if patch.is_new { b"" } else { base };
4244
4245    // The "image" git mutates as each hunk applies. We splice in place so later
4246    // hunks see the effect of earlier ones (git carries the running offset for
4247    // the same reason).
4248    let mut image = split_blob_lines(base_for_match);
4249
4250    // git seeds the search for hunk N at `newpos-1` *plus* the offset earlier
4251    // hunks drifted by, so a uniform shift only costs the search once.
4252    let mut running_offset: isize = 0;
4253
4254    for hunk in &patch.hunks {
4255        match apply_one_hunk(&mut image, hunk, running_offset) {
4256            Some(drift) => running_offset += drift,
4257            None => return ApplyOutcome::Rejected,
4258        }
4259    }
4260
4261    ApplyOutcome::Applied(join_lines(&image))
4262}
4263
4264/// Splice a single hunk into `image`, returning the offset (applied position −
4265/// expected position) so later hunks can carry it forward, or `None` if the
4266/// hunk cannot be located (which rejects the whole patch).
4267///
4268/// Faithful to git's `apply_one_fragment`: build preimage/postimage, try the
4269/// full preimage at progressively-reduced context, and on a match replace the
4270/// matched preimage region with the postimage.
4271fn apply_one_hunk(image: &mut Vec<Line>, hunk: &Hunk, running_offset: isize) -> Option<isize> {
4272    // preimage = context + deletes (the old side we must find in the image).
4273    // postimage = context + inserts (what replaces it). They share their
4274    // leading/trailing *context* runs, which fuzz peels off symmetrically.
4275    let mut preimage: Vec<Line> = Vec::new();
4276    let mut postimage: Vec<Line> = Vec::new();
4277    let mut leading = 0usize; // context lines before the first +/-
4278    let mut trailing = 0usize; // context lines after the last +/-
4279    let mut seen_change = false;
4280    for hl in &hunk.lines {
4281        match hl {
4282            HunkLine::Context(bytes) => {
4283                preimage.push(Line {
4284                    content: bytes.clone(),
4285                    no_newline: false,
4286                });
4287                postimage.push(Line {
4288                    content: bytes.clone(),
4289                    no_newline: false,
4290                });
4291                if !seen_change {
4292                    leading += 1;
4293                }
4294                trailing += 1;
4295            }
4296            HunkLine::Delete(bytes) => {
4297                preimage.push(Line {
4298                    content: bytes.clone(),
4299                    no_newline: false,
4300                });
4301                seen_change = true;
4302                trailing = 0;
4303            }
4304            HunkLine::Insert(bytes) => {
4305                postimage.push(Line {
4306                    content: bytes.clone(),
4307                    no_newline: false,
4308                });
4309                seen_change = true;
4310                trailing = 0;
4311            }
4312        }
4313    }
4314
4315    // Mark the no-final-newline state on the last preimage/postimage line so the
4316    // exact-match check and the spliced result reproduce a missing terminal
4317    // newline byte-for-byte.
4318    if hunk.old_no_newline
4319        && let Some(last) = preimage.last_mut()
4320    {
4321        last.no_newline = true;
4322    }
4323    if hunk.new_no_newline
4324        && let Some(last) = postimage.last_mut()
4325    {
4326        last.no_newline = true;
4327    }
4328
4329    // A hunk that is `@@ -1,L ... @@` (or `@@ -0,0 ... @@` for an add-to-empty)
4330    // must match the beginning. A hunk with no trailing context must match the
4331    // end. (`git am`/`apply` do not pass `--unidiff-zero`, so old_start == 1
4332    // still implies match_beginning.)
4333    let mut match_beginning = hunk.old_start <= 1;
4334    let mut match_end = trailing == 0;
4335
4336    // git anchors the search at `newpos-1` (0-based), carried by the running
4337    // offset from earlier hunks. The anchor (`pos` in git) shifts up whenever a
4338    // *leading* context line is peeled, because the preimage then begins one
4339    // line later in its own content.
4340    let mut expected = expected_position(hunk, running_offset);
4341    // The full hunk's expected position never moves, so the returned drift is
4342    // measured against it (not the context-reduced anchor).
4343    let hunk_expected = expected;
4344
4345    loop {
4346        if let Some(pos) = find_hunk_pos(image, &preimage, expected, match_beginning, match_end) {
4347            // Splice: drop the matched preimage lines, insert the postimage.
4348            let take = preimage.len();
4349            let replacement: Vec<Line> = postimage.clone();
4350            image.splice(pos..pos + take, replacement);
4351            return Some(pos as isize - hunk_expected);
4352        }
4353
4354        // No position matched. Mirror git's guard *order* exactly: it first
4355        // checks whether context is already at the floor (`p_context`) and, if
4356        // so, gives up BEFORE relaxing match_beginning/match_end or peeling
4357        // context. With the default `UINT_MAX` floor this fires on the very
4358        // first failure, so the default path never fuzzes and never relaxes the
4359        // begin/end anchors — it rejects. (The comparison is intentionally
4360        // against the floor so the structure stays faithful to git even though
4361        // the default floor makes it unconditionally true.)
4362        #[allow(clippy::absurd_extreme_comparisons)]
4363        if leading <= MIN_FUZZ_CONTEXT && trailing <= MIN_FUZZ_CONTEXT {
4364            return None;
4365        }
4366
4367        // git relaxes the begin/end anchors before peeling context: a hunk that
4368        // "must match the start/end" but didn't is retried free-floating first.
4369        if match_beginning || match_end {
4370            match_beginning = false;
4371            match_end = false;
4372            continue;
4373        }
4374
4375        // Reduce context: peel the larger side (both if equal), exactly as git.
4376        if leading >= trailing {
4377            // Drop the first context line from pre+post; the anchor slides up.
4378            preimage.remove(0);
4379            postimage.remove(0);
4380            expected -= 1;
4381            leading -= 1;
4382        }
4383        if trailing > leading {
4384            preimage.pop();
4385            postimage.pop();
4386            trailing -= 1;
4387        }
4388    }
4389}
4390
4391/// A line with its content (sans terminator) and whether it is newline-terminated.
4392#[derive(Debug, Clone, PartialEq, Eq)]
4393struct Line {
4394    content: Vec<u8>,
4395    no_newline: bool,
4396}
4397
4398/// Split a blob into [`Line`]s. A trailing `\n` does not produce an empty final
4399/// line; instead the last real line is marked `no_newline = false`. A file that
4400/// does not end in `\n` marks its final line `no_newline = true`. An empty blob
4401/// produces no lines.
4402fn split_blob_lines(data: &[u8]) -> Vec<Line> {
4403    let mut lines = Vec::new();
4404    let mut start = 0usize;
4405    while start < data.len() {
4406        match data[start..].iter().position(|&b| b == b'\n') {
4407            Some(rel) => {
4408                let end = start + rel;
4409                lines.push(Line {
4410                    content: data[start..end].to_vec(),
4411                    no_newline: false,
4412                });
4413                start = end + 1;
4414            }
4415            None => {
4416                lines.push(Line {
4417                    content: data[start..].to_vec(),
4418                    no_newline: true,
4419                });
4420                start = data.len();
4421            }
4422        }
4423    }
4424    lines
4425}
4426
4427/// Reassemble lines into a byte buffer, honouring per-line newline state.
4428fn join_lines(lines: &[Line]) -> Vec<u8> {
4429    let mut out = Vec::new();
4430    for line in lines {
4431        out.extend_from_slice(&line.content);
4432        if !line.no_newline {
4433            out.push(b'\n');
4434        }
4435    }
4436    out
4437}
4438
4439/// The naive 0-based position where a hunk expects to apply, given the running
4440/// offset accumulated from earlier hunks.
4441fn expected_position(hunk: &Hunk, running_offset: isize) -> isize {
4442    // `old_start` is 1-based; an empty old side (new-file hunk) uses 0.
4443    let base = if hunk.old_start == 0 {
4444        0
4445    } else {
4446        hunk.old_start as isize - 1
4447    };
4448    base + running_offset
4449}
4450
4451/// Find the 0-based line index in `image` where `preimage` (the hunk's context
4452/// + deleted lines, possibly already context-reduced by fuzz) matches.
4453///
4454/// Port of git's `find_pos`: start the search at `expected` (clamped, or forced
4455/// to 0/end when `match_beginning`/`match_end`), then ping-pong outward across
4456/// the *whole* image — backward and forward alternately — until both ends are
4457/// exhausted. Returns the first matching line index, or `None`.
4458fn find_hunk_pos(
4459    image: &[Line],
4460    preimage: &[Line],
4461    expected: isize,
4462    match_beginning: bool,
4463    match_end: bool,
4464) -> Option<usize> {
4465    let line_nr = image.len();
4466    let pre_nr = preimage.len();
4467
4468    // git: if we must match the beginning, start at 0; if we must match the
4469    // end, start where the preimage would end exactly at EOF.
4470    let mut line: isize = if match_beginning {
4471        0
4472    } else if match_end {
4473        line_nr as isize - pre_nr as isize
4474    } else {
4475        expected
4476    };
4477    if line < 0 {
4478        line = 0;
4479    }
4480    if line as usize > line_nr {
4481        line = line_nr as isize;
4482    }
4483
4484    let start = line as usize;
4485    let mut backwards = start;
4486    let mut forwards = start;
4487    let mut current = start;
4488
4489    let mut i: u64 = 0;
4490    loop {
4491        if preimage_matches_at(image, preimage, current, match_beginning, match_end) {
4492            return Some(current);
4493        }
4494
4495        loop {
4496            // Both ends exhausted: no match anywhere.
4497            if backwards == 0 && forwards == line_nr {
4498                return None;
4499            }
4500            if i & 1 == 1 {
4501                // Step backward.
4502                if backwards == 0 {
4503                    i += 1;
4504                    continue;
4505                }
4506                backwards -= 1;
4507                current = backwards;
4508            } else {
4509                // Step forward.
4510                if forwards == line_nr {
4511                    i += 1;
4512                    continue;
4513                }
4514                forwards += 1;
4515                current = forwards;
4516            }
4517            break;
4518        }
4519        i += 1;
4520    }
4521}
4522
4523/// Whether `preimage` matches `image` starting at line `pos`.
4524///
4525/// Port of git's `match_fragment` for the default (no whitespace-fuzz) path:
4526/// a byte-exact full-preimage match. Honours `match_beginning` (pos must be 0)
4527/// and `match_end` (the preimage must reach *exactly* the end of the image),
4528/// and reproduces git's terminal-newline semantics — a preimage line marked
4529/// "no newline" only matches when it is the image's final line and that line is
4530/// itself newline-free.
4531fn preimage_matches_at(
4532    image: &[Line],
4533    preimage: &[Line],
4534    pos: usize,
4535    match_beginning: bool,
4536    match_end: bool,
4537) -> bool {
4538    if match_beginning && pos != 0 {
4539        return false;
4540    }
4541    // The whole preimage must fall within the image.
4542    if pos + preimage.len() > image.len() {
4543        return false;
4544    }
4545    if match_end && pos + preimage.len() != image.len() {
4546        return false;
4547    }
4548    for (i, pre) in preimage.iter().enumerate() {
4549        let img = &image[pos + i];
4550        if img.content != pre.content {
4551            return false;
4552        }
4553        // git compares the raw byte buffers, so a missing terminal newline on
4554        // either side only matches the other when both agree. A preimage line
4555        // that lacks a newline can only sit on the image's final line (which
4556        // must itself lack one); a preimage line that *has* a newline cannot
4557        // match a newline-free image line.
4558        if pre.no_newline != img.no_newline {
4559            return false;
4560        }
4561    }
4562    true
4563}
4564
4565/// Split raw patch bytes into lines, preserving the *content* without the
4566/// trailing `\n` (a final unterminated line is kept). Carriage returns are kept
4567/// as-is so CRLF patch bodies round-trip.
4568fn split_patch_lines(input: &[u8]) -> Vec<&[u8]> {
4569    let mut lines = Vec::new();
4570    let mut start = 0usize;
4571    while start < input.len() {
4572        match input[start..].iter().position(|&b| b == b'\n') {
4573            Some(rel) => {
4574                let end = start + rel;
4575                lines.push(&input[start..end]);
4576                start = end + 1;
4577            }
4578            None => {
4579                lines.push(&input[start..]);
4580                start = input.len();
4581            }
4582        }
4583    }
4584    lines
4585}
4586
4587struct PatchParser<'a> {
4588    lines: &'a [&'a [u8]],
4589    index: usize,
4590    recount: bool,
4591}
4592
4593impl<'a> PatchParser<'a> {
4594    fn parse(&mut self) -> Result<Vec<FilePatch>> {
4595        let mut patches = Vec::new();
4596        while self.index < self.lines.len() {
4597            let line = self.lines[self.index];
4598            if line.starts_with(b"diff --git ") {
4599                patches.push(self.parse_file(Some(line))?);
4600            } else if line.starts_with(b"--- ") {
4601                // A bare unified diff with no `diff --git` header.
4602                patches.push(self.parse_file(None)?);
4603            } else if line.starts_with(b"@@ ") {
4604                return Err(GitError::InvalidFormat(
4605                    "hunk header encountered before any file header".to_string(),
4606                ));
4607            } else {
4608                // Skip commentary / unrelated lines.
4609                self.index += 1;
4610            }
4611        }
4612        Ok(patches)
4613    }
4614
4615    /// Parse one file's headers and hunks. When `diff_line` is `Some`, the
4616    /// current line is the `diff --git` header (already inspected by the
4617    /// caller); otherwise parsing starts at a `--- ` line.
4618    fn parse_file(&mut self, diff_line: Option<&[u8]>) -> Result<FilePatch> {
4619        let mut patch = FilePatch {
4620            old_path: None,
4621            new_path: None,
4622            old_mode: None,
4623            new_mode: None,
4624            hunks: Vec::new(),
4625            is_new: false,
4626            is_delete: false,
4627            is_rename: false,
4628            is_copy: false,
4629            similarity: None,
4630            dissimilarity: None,
4631        };
4632        // Default paths from `diff --git a/x b/x` if present (overridden by
4633        // `---`/`+++` lines when those carry real paths).
4634        if let Some(diff_line) = diff_line {
4635            if let Some((a, b)) = parse_diff_git_paths(diff_line) {
4636                patch.old_path = Some(a);
4637                patch.new_path = Some(b);
4638            }
4639            self.index += 1;
4640        }
4641
4642        // Extended headers until the first `---`/`@@`/next `diff --git`.
4643        while self.index < self.lines.len() {
4644            let line = self.lines[self.index];
4645            if line.starts_with(b"--- ") {
4646                self.parse_old_file_header(line, &mut patch);
4647                self.index += 1;
4648                break;
4649            } else if line.starts_with(b"@@ ") {
4650                // No `---`/`+++` (e.g. pure rename or mode change with no body).
4651                break;
4652            } else if line.starts_with(b"diff --git ") {
4653                // Next file began with no body for this one.
4654                return Ok(patch);
4655            } else if let Some(rest) = strip_prefix(line, b"old mode ") {
4656                patch.old_mode = parse_octal(rest);
4657            } else if let Some(rest) = strip_prefix(line, b"new mode ") {
4658                patch.new_mode = parse_octal(rest);
4659            } else if let Some(rest) = strip_prefix(line, b"new file mode ") {
4660                patch.is_new = true;
4661                patch.new_mode = parse_octal(rest);
4662            } else if let Some(rest) = strip_prefix(line, b"deleted file mode ") {
4663                patch.is_delete = true;
4664                patch.old_mode = parse_octal(rest);
4665            } else if let Some(rest) = strip_prefix(line, b"rename from ") {
4666                patch.is_rename = true;
4667                patch.old_path = Some(rest.to_vec());
4668            } else if let Some(rest) = strip_prefix(line, b"rename to ") {
4669                patch.is_rename = true;
4670                patch.new_path = Some(rest.to_vec());
4671            } else if let Some(rest) = strip_prefix(line, b"copy from ") {
4672                patch.is_copy = true;
4673                patch.old_path = Some(rest.to_vec());
4674            } else if let Some(rest) = strip_prefix(line, b"copy to ") {
4675                patch.is_copy = true;
4676                patch.new_path = Some(rest.to_vec());
4677            } else if let Some(rest) = strip_prefix(line, b"similarity index ") {
4678                patch.similarity = parse_percent(rest);
4679            } else if let Some(rest) = strip_prefix(line, b"dissimilarity index ") {
4680                patch.dissimilarity = parse_percent(rest);
4681            } else {
4682                // `index ..`, `similarity index`, `copy from/to`, etc. — ignore.
4683                self.index += 1;
4684                continue;
4685            }
4686            self.index += 1;
4687        }
4688
4689        // `+++` header (the old-file branch above already advanced past `---`).
4690        if self.index < self.lines.len() && self.lines[self.index].starts_with(b"+++ ") {
4691            self.parse_new_file_header(self.lines[self.index], &mut patch);
4692            self.index += 1;
4693        }
4694
4695        // Hunks.
4696        while self.index < self.lines.len() {
4697            let line = self.lines[self.index];
4698            if line.starts_with(b"@@ ") {
4699                let hunk = self.parse_hunk()?;
4700                patch.hunks.push(hunk);
4701            } else if line.starts_with(b"diff --git ") {
4702                break;
4703            } else if line.starts_with(b"--- ") {
4704                // Start of a subsequent bare diff.
4705                break;
4706            } else {
4707                // Trailing commentary between/after hunks.
4708                self.index += 1;
4709            }
4710        }
4711
4712        Ok(patch)
4713    }
4714
4715    fn parse_old_file_header(&self, line: &[u8], patch: &mut FilePatch) {
4716        let rest = strip_prefix(line, b"--- ").unwrap_or(line);
4717        let path = strip_header_path(rest);
4718        match path {
4719            HeaderPath::DevNull => {
4720                patch.is_new = true;
4721                patch.old_path = None;
4722            }
4723            HeaderPath::Path(p) => {
4724                // Only override if we did not already learn a real path.
4725                if patch.old_path.is_none() || !(patch.is_rename || patch.is_copy) {
4726                    patch.old_path = Some(p);
4727                }
4728            }
4729        }
4730    }
4731
4732    fn parse_new_file_header(&self, line: &[u8], patch: &mut FilePatch) {
4733        let rest = strip_prefix(line, b"+++ ").unwrap_or(line);
4734        let path = strip_header_path(rest);
4735        match path {
4736            HeaderPath::DevNull => {
4737                patch.is_delete = true;
4738                patch.new_path = None;
4739            }
4740            HeaderPath::Path(p) => {
4741                if patch.new_path.is_none() || !(patch.is_rename || patch.is_copy) {
4742                    patch.new_path = Some(p);
4743                }
4744            }
4745        }
4746    }
4747
4748    fn parse_hunk(&mut self) -> Result<Hunk> {
4749        let header = self.lines[self.index];
4750        let (old_start, old_len, new_start, new_len) = parse_hunk_header(header)?;
4751        self.index += 1;
4752
4753        let mut hunk = Hunk {
4754            old_start,
4755            old_len,
4756            new_start,
4757            new_len,
4758            lines: Vec::new(),
4759            old_no_newline: false,
4760            new_no_newline: false,
4761        };
4762        let mut old_seen = 0usize;
4763        let mut new_seen = 0usize;
4764
4765        while self.index < self.lines.len() {
4766            // Stop when both sides are satisfied. In recount mode the header
4767            // counts are intentionally ignored; the next hunk/file header ends
4768            // the body.
4769            if !self.recount && old_seen >= old_len && new_seen >= new_len {
4770                break;
4771            }
4772            let line = self.lines[self.index];
4773            if self.recount
4774                && (line.starts_with(b"@@ ")
4775                    || line.starts_with(b"diff --git ")
4776                    || line.starts_with(b"diff a/")
4777                    || line.starts_with(b"--- "))
4778            {
4779                break;
4780            }
4781            if line.is_empty() {
4782                // A wholly empty line in a unified diff is a context line whose
4783                // content is the empty string (git emits a bare ` `, but some
4784                // tooling/email transport strips the trailing space).
4785                hunk.lines.push(HunkLine::Context(Vec::new()));
4786                old_seen += 1;
4787                new_seen += 1;
4788                self.index += 1;
4789                continue;
4790            }
4791            match line[0] {
4792                b' ' => {
4793                    hunk.lines.push(HunkLine::Context(line[1..].to_vec()));
4794                    old_seen += 1;
4795                    new_seen += 1;
4796                }
4797                b'+' => {
4798                    hunk.lines.push(HunkLine::Insert(line[1..].to_vec()));
4799                    new_seen += 1;
4800                }
4801                b'-' => {
4802                    hunk.lines.push(HunkLine::Delete(line[1..].to_vec()));
4803                    old_seen += 1;
4804                }
4805                b'\\' => {
4806                    // `\ No newline at end of file` — applies to the line just
4807                    // emitted. Set the appropriate side flag(s).
4808                    self.mark_no_newline(&mut hunk);
4809                    self.index += 1;
4810                    continue;
4811                }
4812                _ => {
4813                    // Anything else terminates the hunk body.
4814                    break;
4815                }
4816            }
4817            self.index += 1;
4818        }
4819
4820        // A trailing `\ No newline` may follow the final body line even after
4821        // the counts are satisfied; consume it.
4822        if self.index < self.lines.len() && self.lines[self.index].starts_with(b"\\") {
4823            self.mark_no_newline(&mut hunk);
4824            self.index += 1;
4825        }
4826
4827        if self.recount {
4828            hunk.old_len = old_seen;
4829            hunk.new_len = new_seen;
4830        } else if old_seen != old_len || new_seen != new_len {
4831            return Err(GitError::InvalidFormat(format!(
4832                "hunk body line counts mismatch: header declared -{old_len},+{new_len} \
4833                 but body had -{old_seen},+{new_seen}"
4834            )));
4835        }
4836
4837        Ok(hunk)
4838    }
4839
4840    /// Set the no-newline flag based on the kind of the most recently pushed
4841    /// hunk line.
4842    fn mark_no_newline(&self, hunk: &mut Hunk) {
4843        match hunk.lines.last() {
4844            Some(HunkLine::Context(_)) => {
4845                hunk.old_no_newline = true;
4846                hunk.new_no_newline = true;
4847            }
4848            Some(HunkLine::Insert(_)) => hunk.new_no_newline = true,
4849            Some(HunkLine::Delete(_)) => hunk.old_no_newline = true,
4850            None => {}
4851        }
4852    }
4853}
4854
4855enum HeaderPath {
4856    DevNull,
4857    Path(Vec<u8>),
4858}
4859
4860/// Extract the path from a `---`/`+++` header tail, stripping a leading `a/` or
4861/// `b/` prefix, an optional trailing timestamp (separated by a tab), and
4862/// recognising `/dev/null`.
4863fn strip_header_path(rest: &[u8]) -> HeaderPath {
4864    // Cut a trailing tab-delimited timestamp if present.
4865    let path = match rest.iter().position(|&b| b == b'\t') {
4866        Some(tab) => &rest[..tab],
4867        None => rest,
4868    };
4869    let path = trim_ascii_end(path);
4870    if path == b"/dev/null" {
4871        return HeaderPath::DevNull;
4872    }
4873    // Strip a leading `a/` or `b/` (git's default prefixes).
4874    let stripped = if path.starts_with(b"a/") || path.starts_with(b"b/") {
4875        &path[2..]
4876    } else {
4877        path
4878    };
4879    HeaderPath::Path(stripped.to_vec())
4880}
4881
4882/// Parse the two paths out of `diff --git a/<x> b/<y>`. Returns the paths with
4883/// their `a/`/`b/` prefixes stripped. Returns `None` when the line cannot be
4884/// split unambiguously (e.g. paths containing spaces, which git would quote).
4885fn parse_diff_git_paths(line: &[u8]) -> Option<(Vec<u8>, Vec<u8>)> {
4886    let rest = strip_prefix(line, b"diff --git ")?;
4887    // Quoted paths are uncommon in this engine's inputs; bail and let the
4888    // `---`/`+++` headers supply the names instead.
4889    if rest.first() == Some(&b'"') {
4890        return None;
4891    }
4892    // Find the split point: the boundary between the `a/...` and `b/...` halves.
4893    // git separates them with a single space; the simplest robust heuristic is
4894    // to look for ` b/` preceded by an `a/` start.
4895    if !rest.starts_with(b"a/") {
4896        return None;
4897    }
4898    let sep = find_subslice(rest, b" b/")?;
4899    let a = &rest[2..sep];
4900    let b = &rest[sep + 3..];
4901    Some((a.to_vec(), b.to_vec()))
4902}
4903
4904/// Parse an `@@ -l,s +l,s @@` header into `(old_start, old_len, new_start,
4905/// new_len)`. A missing `,s` means a length of 1.
4906fn parse_hunk_header(line: &[u8]) -> Result<(usize, usize, usize, usize)> {
4907    let err = || GitError::InvalidFormat(format!("malformed hunk header: {}", lossy(line)));
4908    let rest = strip_prefix(line, b"@@ ").ok_or_else(err)?;
4909    // Up to the closing ` @@`.
4910    let close = find_subslice(rest, b" @@").ok_or_else(err)?;
4911    let ranges = &rest[..close];
4912    let mut parts = ranges.split(|&b| b == b' ').filter(|p| !p.is_empty());
4913    let old = parts.next().ok_or_else(err)?;
4914    let new = parts.next().ok_or_else(err)?;
4915    let old = strip_prefix(old, b"-").ok_or_else(err)?;
4916    let new = strip_prefix(new, b"+").ok_or_else(err)?;
4917    let (old_start, old_len) = parse_range(old).ok_or_else(err)?;
4918    let (new_start, new_len) = parse_range(new).ok_or_else(err)?;
4919    Ok((old_start, old_len, new_start, new_len))
4920}
4921
4922/// Parse `start[,len]` into `(start, len)`, defaulting `len` to 1.
4923fn parse_range(range: &[u8]) -> Option<(usize, usize)> {
4924    match range.iter().position(|&b| b == b',') {
4925        Some(comma) => {
4926            let start = parse_usize(&range[..comma])?;
4927            let len = parse_usize(&range[comma + 1..])?;
4928            Some((start, len))
4929        }
4930        None => Some((parse_usize(range)?, 1)),
4931    }
4932}
4933
4934fn parse_usize(bytes: &[u8]) -> Option<usize> {
4935    if bytes.is_empty() {
4936        return None;
4937    }
4938    let mut value: usize = 0;
4939    for &b in bytes {
4940        if !b.is_ascii_digit() {
4941            return None;
4942        }
4943        value = value.checked_mul(10)?.checked_add((b - b'0') as usize)?;
4944    }
4945    Some(value)
4946}
4947
4948fn parse_octal(bytes: &[u8]) -> Option<u32> {
4949    let trimmed = trim_ascii_end(bytes);
4950    if trimmed.is_empty() {
4951        return None;
4952    }
4953    let mut value: u32 = 0;
4954    for &b in trimmed {
4955        if !(b'0'..=b'7').contains(&b) {
4956            return None;
4957        }
4958        value = value.checked_mul(8)?.checked_add((b - b'0') as u32)?;
4959    }
4960    Some(value)
4961}
4962
4963fn parse_percent(bytes: &[u8]) -> Option<u8> {
4964    let trimmed = trim_ascii_end(bytes)
4965        .strip_suffix(b"%")
4966        .unwrap_or(trim_ascii_end(bytes));
4967    let value = parse_usize(trimmed)?;
4968    u8::try_from(value).ok().filter(|value| *value <= 100)
4969}
4970
4971fn strip_prefix<'b>(line: &'b [u8], prefix: &[u8]) -> Option<&'b [u8]> {
4972    if line.starts_with(prefix) {
4973        Some(&line[prefix.len()..])
4974    } else {
4975        None
4976    }
4977}
4978
4979fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
4980    if needle.is_empty() || needle.len() > haystack.len() {
4981        return None;
4982    }
4983    haystack
4984        .windows(needle.len())
4985        .position(|window| window == needle)
4986}
4987
4988fn trim_ascii_end(bytes: &[u8]) -> &[u8] {
4989    let mut end = bytes.len();
4990    while end > 0 && (bytes[end - 1] == b' ' || bytes[end - 1] == b'\r') {
4991        end -= 1;
4992    }
4993    &bytes[..end]
4994}
4995
4996fn lossy(bytes: &[u8]) -> String {
4997    String::from_utf8_lossy(bytes).into_owned()
4998}
4999
5000// ===========================================================================
5001// Library tree-merge seam (`merge_trees`).
5002//
5003// This is the single 3-way tree-merge engine that every merge porcelain calls.
5004// Before it existed the logic was duplicated across the CLI: `merge-tree
5005// --write-tree` had its own copy and `git merge` / `cherry-pick` / `revert`
5006// had a second copy. Both copies implemented the identical per-path diff3
5007// resolution; the only differences were *rendering* (write-tree emits a tree +
5008// stage list + messages; the porcelains stage an index + materialize a
5009// worktree). This seam computes the merge once and returns a per-path result
5010// rich enough for both renderings, so the resolution lives in exactly one
5011// place.
5012//
5013// The result is byte-identical to the old per-command copies on every cell
5014// they already handled (clean merges, content / add-add / modify-delete
5015// conflicts, mode merges). On top of that it adds rename-aware resolution: a
5016// file renamed on one side and modified on the other follows the rename,
5017// gated by [`MergeTreesOptions::detect_renames`] (the classic merge-ort
5018// non-recursive rename case).
5019// ===========================================================================
5020
5021/// Flattened tree: repository-relative path -> (mode, blob/symlink/gitlink oid).
5022pub type MergeEntryMap = BTreeMap<Vec<u8>, (u32, ObjectId)>;
5023
5024/// Whether to favour one side wholesale for textual conflicts (`-Xours` /
5025/// `-Xtheirs`), or to leave conflict markers in place.
5026#[derive(Clone, Copy, PartialEq, Eq, Debug)]
5027pub enum MergeFavor {
5028    /// Leave conflict markers in place (the default).
5029    None,
5030    /// On a textual conflict, take ours' content wholesale.
5031    Ours,
5032    /// On a textual conflict, take theirs' content wholesale.
5033    Theirs,
5034}
5035
5036/// Options controlling a [`merge_trees`] run.
5037pub struct MergeTreesOptions<'a> {
5038    /// Conflict-marker label for ours (e.g. a branch name or `HEAD`).
5039    pub ours_label: &'a str,
5040    /// Conflict-marker label for theirs.
5041    pub theirs_label: &'a str,
5042    /// Diff3 ancestor label (the `|||||||` side); merge porcelains use
5043    /// `"merged common ancestors"`.
5044    pub ancestor_label: &'a str,
5045    /// `-Xours` / `-Xtheirs` favouring for textual conflicts.
5046    pub favor: MergeFavor,
5047    /// Enable rename-aware merging: a file renamed on one side and modified on
5048    /// the other follows the rename. When `false`, the merge is purely
5049    /// path-keyed (the historical behaviour).
5050    pub detect_renames: bool,
5051    /// Minimum similarity (`0..=100`) for inexact rename detection.
5052    pub rename_threshold: u8,
5053    /// Directory-rename detection mode. When [`DirectoryRenames::False`], a file
5054    /// added on one side under a directory that the *other* side renamed stays
5055    /// put. When enabled, such files are re-homed into the renamed directory,
5056    /// matching `merge.directoryRenames`. Requires `detect_renames` to have any
5057    /// effect (directory renames are inferred from the file renames it finds).
5058    pub directory_renames: DirectoryRenames,
5059    /// Conflict-marker style for textual conflicts (`merge.conflictStyle`).
5060    pub style: ConflictStyle,
5061}
5062
5063/// How directory-rename detection behaves, mirroring git's
5064/// `merge.directoryRenames` configuration.
5065#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)]
5066pub enum DirectoryRenames {
5067    /// Disable directory-rename detection (`merge.directoryRenames=false`).
5068    #[default]
5069    False,
5070    /// Apply directory renames silently (`merge.directoryRenames=true`).
5071    True,
5072    /// Detect directory renames but treat each re-homed path as a conflict
5073    /// requiring confirmation (`merge.directoryRenames=conflict`). git's default.
5074    Conflict,
5075}
5076
5077impl Default for MergeTreesOptions<'_> {
5078    fn default() -> Self {
5079        Self {
5080            ours_label: "ours",
5081            theirs_label: "theirs",
5082            ancestor_label: "merged common ancestors",
5083            favor: MergeFavor::None,
5084            detect_renames: false,
5085            rename_threshold: DEFAULT_RENAME_THRESHOLD,
5086            directory_renames: DirectoryRenames::False,
5087            style: ConflictStyle::Merge,
5088        }
5089    }
5090}
5091
5092/// The kind of conflict recorded for a path, used to render the stable
5093/// conflict-type token and human message.
5094#[derive(Debug, Clone, PartialEq, Eq)]
5095pub enum MergeConflictKind {
5096    /// Both sides changed the file content differently (or both added it with
5097    /// differing content — an add/add).
5098    Content { add_add: bool },
5099    /// The file was deleted on one side and modified on the other.
5100    ModifyDelete {
5101        /// The side label that deleted the path.
5102        deleted_in: String,
5103        /// The side label that modified (and thus kept) the path.
5104        modified_in: String,
5105    },
5106    /// A file renamed on one side, with a content conflict against the other
5107    /// side's change at the destination.
5108    RenameContent {
5109        /// The original (pre-rename) path.
5110        old_path: Vec<u8>,
5111    },
5112    /// Two paths were renamed to the same destination, producing a
5113    /// rename/rename(2to1) conflict.
5114    RenameRenameTwoToOne {
5115        /// Ours' pre-destination path.
5116        ours_path: Vec<u8>,
5117        /// Theirs' pre-destination path.
5118        theirs_path: Vec<u8>,
5119    },
5120    /// One source path was renamed to different destinations on each side,
5121    /// producing a rename/rename(1to2) conflict.
5122    RenameRenameOneToTwo {
5123        /// The pre-rename source path.
5124        old_path: Vec<u8>,
5125        /// Ours' destination path.
5126        ours_path: Vec<u8>,
5127        /// Theirs' destination path.
5128        theirs_path: Vec<u8>,
5129        /// The label for our side.
5130        ours_label: String,
5131        /// The label for their side.
5132        theirs_label: String,
5133    },
5134    /// An auxiliary higher-stage entry for a rename/rename(1to2) conflict. The
5135    /// user-facing message is emitted by [`RenameRenameOneToTwo`].
5136    RenameRenameOneToTwoStage,
5137    /// A directory was split evenly across multiple destinations, so no
5138    /// directory rename could be applied for paths the other side left there.
5139    DirRenameSplit {
5140        /// The original directory with no unique destination.
5141        source_dir: Vec<u8>,
5142    },
5143    /// A file renamed on one side whose source was deleted on the other side.
5144    RenameDelete {
5145        /// The pre-rename source path.
5146        old_path: Vec<u8>,
5147        /// The side label that performed the rename.
5148        renamed_in: String,
5149        /// The side label that deleted the source.
5150        deleted_in: String,
5151    },
5152    /// A file collides with a directory at the same path in the merged result:
5153    /// the directory wins at the original path and the file is moved aside to
5154    /// `path~<branch>` (merge-ort's D/F conflict, `unique_path`). git emits
5155    /// `CONFLICT (file/directory): directory in the way of <old> from <branch>;
5156    /// moving it to <new> instead.`
5157    FileDirectory {
5158        /// The original (pre-move) path now occupied by the directory.
5159        original_path: Vec<u8>,
5160        /// The side label whose file was moved aside.
5161        moved_from: String,
5162    },
5163    /// A path was added/renamed under a directory the other side renamed, so the
5164    /// merge silently moved it into the renamed directory but, in
5165    /// `merge.directoryRenames=conflict` mode, flags it for the user to confirm.
5166    /// git emits `CONFLICT (file location): ... suggesting it should perhaps be
5167    /// moved to <new_path>.` The tree still contains the re-homed content.
5168    DirRenameLocation {
5169        /// The pre-re-home path (`old_path` in git's message): where the side
5170        /// placed the file before directory-rename detection moved it.
5171        old_path: Vec<u8>,
5172        /// `Some(source)` when the file was *renamed* into `old_path` by this
5173        /// side (git's "renamed to" wording, naming the original `source`);
5174        /// `None` when it was a fresh add (git's "added in" wording).
5175        renamed_from: Option<Vec<u8>>,
5176        /// The side label that added/renamed the file (`branch_with_new_path`).
5177        added_in: String,
5178        /// The side label that renamed the directory (`branch_with_dir_rename`).
5179        dir_renamed_in: String,
5180    },
5181    /// A directory rename would have moved one or more paths onto this path, but
5182    /// it is already occupied (a file/dir in the way) or several sources map
5183    /// here. git emits `CONFLICT (implicit dir rename): Existing file/dir at
5184    /// <path> in the way of implicit directory rename(s) putting the following
5185    /// path(s) there: <sources>.` The path keeps its original content; the
5186    /// re-homed sources are left where they were.
5187    DirRenameImplicitCollision {
5188        /// The source path(s) the directory rename tried to move onto this path.
5189        sources: Vec<Vec<u8>>,
5190    },
5191}
5192
5193/// One resolved/conflicted path in the merged tree.
5194#[derive(Debug, Clone)]
5195pub struct MergedPath {
5196    /// Destination path in the merged tree.
5197    pub path: Vec<u8>,
5198    /// The per-stage (1=base, 2=ours, 3=theirs) entries when conflicted; all
5199    /// `None` for a clean resolution.
5200    pub stages: MergeStages,
5201    /// `Some((mode, oid))` is the final leaf written to the merged tree; `None`
5202    /// means the path is absent in the result (a clean delete).
5203    pub result: Option<(u32, ObjectId)>,
5204    /// When conflicted, the worktree bytes + mode to materialize (content with
5205    /// conflict markers, or the surviving side's bytes). `None` for a clean
5206    /// path.
5207    pub worktree: Option<(u32, Vec<u8>)>,
5208    /// `Some(..)` exactly when this path conflicted.
5209    pub conflict: Option<MergeConflictKind>,
5210    /// True when this path went through a textual 3-way content merge (both
5211    /// sides diverged and both were mergeable files). Drives the "Auto-merging
5212    /// <path>" informational message, which `git merge-tree` emits for every
5213    /// such path — clean or conflicted.
5214    pub auto_merged: bool,
5215}
5216
5217impl MergedPath {
5218    /// True when this path resolved cleanly (no conflict recorded).
5219    pub fn is_clean(&self) -> bool {
5220        self.conflict.is_none()
5221    }
5222}
5223
5224/// Per-stage higher-order index entries for a conflicted path.
5225#[derive(Debug, Clone, Default)]
5226pub struct MergeStages {
5227    pub base: Option<(u32, ObjectId)>,
5228    pub ours: Option<(u32, ObjectId)>,
5229    pub theirs: Option<(u32, ObjectId)>,
5230}
5231
5232/// The outcome of a 3-way tree merge: the merged top-level tree plus per-path
5233/// detail and a clean/conflicted flag.
5234#[derive(Debug, Clone)]
5235pub struct MergeTreesResult {
5236    /// Object id of the merged top-level tree (always written, even on
5237    /// conflict — conflicted blobs go in with their marker content).
5238    pub tree: ObjectId,
5239    /// Per-path results, sorted by path.
5240    pub paths: Vec<MergedPath>,
5241    /// False if any path conflicted.
5242    pub clean: bool,
5243    /// Original paths removed by rename or directory-rename rewrites. These are
5244    /// cleanup-only paths for porcelains materializing a conflicted merge; they
5245    /// are absent from the merged tree.
5246    pub cleanup_paths: Vec<Vec<u8>>,
5247    /// Non-conflict informational messages produced while detecting renames.
5248    pub info_messages: Vec<MergeInfoMessage>,
5249}
5250
5251impl MergeTreesResult {
5252    /// Iterate over the paths that conflicted, in path order.
5253    pub fn conflicts(&self) -> impl Iterator<Item = &MergedPath> {
5254        self.paths.iter().filter(|entry| entry.conflict.is_some())
5255    }
5256}
5257
5258/// Non-conflict merge information that porcelain commands may print.
5259#[derive(Debug, Clone, PartialEq, Eq)]
5260pub enum MergeInfoMessage {
5261    /// A directory rename was skipped because the suggested target directory was
5262    /// itself renamed away on this side.
5263    DirRenameSkippedDueToRerename {
5264        old_dir: Vec<u8>,
5265        path: Vec<u8>,
5266        new_dir: Vec<u8>,
5267    },
5268    /// A path was updated due to a directory rename in
5269    /// `merge.directoryRenames=true` mode.
5270    DirRenameApplied {
5271        old_path: Vec<u8>,
5272        new_path: Vec<u8>,
5273        renamed_from: Option<Vec<u8>>,
5274        added_in: String,
5275        dir_renamed_in: String,
5276    },
5277    /// A directory-rename location conflict that overlaps another conflict at
5278    /// the same final path, such as a content conflict. The path's primary
5279    /// conflict kind remains attached to the path; this carries git's extra
5280    /// `CONFLICT (file location)` line.
5281    DirRenameLocationConflict {
5282        old_path: Vec<u8>,
5283        new_path: Vec<u8>,
5284        renamed_from: Option<Vec<u8>>,
5285        added_in: String,
5286        dir_renamed_in: String,
5287    },
5288    /// A rename/delete conflict whose conflicted destination was later moved
5289    /// aside by directory/file conflict handling. The primary per-path conflict
5290    /// remains `FileDirectory`; this preserves git's extra rename/delete line.
5291    RenameDeleteConflict {
5292        old_path: Vec<u8>,
5293        new_path: Vec<u8>,
5294        renamed_in: String,
5295        deleted_in: String,
5296    },
5297}
5298
5299/// Read a tree object (by oid) into a flattened path -> (mode, oid) map,
5300/// descending into subtrees. The canonical empty tree yields an empty map.
5301pub fn flatten_tree(
5302    reader: &impl ObjectReader,
5303    format: ObjectFormat,
5304    tree_oid: &ObjectId,
5305) -> Result<MergeEntryMap> {
5306    let mut entries = BTreeMap::new();
5307    if *tree_oid == empty_tree_oid(format)? {
5308        return Ok(entries);
5309    }
5310    collect_flat_tree(reader, format, tree_oid, Vec::new(), &mut entries)?;
5311    Ok(entries)
5312}
5313
5314fn collect_flat_tree(
5315    reader: &impl ObjectReader,
5316    format: ObjectFormat,
5317    tree_oid: &ObjectId,
5318    prefix: Vec<u8>,
5319    entries: &mut MergeEntryMap,
5320) -> Result<()> {
5321    let object = reader.read_object(tree_oid)?;
5322    if object.object_type != ObjectType::Tree {
5323        return Err(GitError::InvalidObject(format!(
5324            "expected tree {}, found {}",
5325            tree_oid,
5326            object.object_type.as_str()
5327        )));
5328    }
5329    for entry in TreeEntries::new(format, &object.body) {
5330        let entry = entry?;
5331        let mut path = prefix.clone();
5332        if !path.is_empty() {
5333            path.push(b'/');
5334        }
5335        path.extend_from_slice(entry.name);
5336        if entry.mode == 0o040000 {
5337            collect_flat_tree(reader, format, &entry.oid, path, entries)?;
5338        } else {
5339            entries.insert(path, (entry.mode, entry.oid));
5340        }
5341    }
5342    Ok(())
5343}
5344
5345/// True for a plain file blob (regular or executable) — i.e. a mode whose
5346/// content can be textually 3-way merged. Symlinks and gitlinks are excluded.
5347pub fn is_mergeable_file_mode(mode: u32) -> bool {
5348    mode == 0o100644 || mode == 0o100755
5349}
5350
5351/// 3-way merge of three trees into a single merged tree.
5352///
5353/// `base` is the common-ancestor tree (`None` for unrelated histories — every
5354/// path is then treated as added on both sides). `ours`/`theirs` are the two
5355/// sides. Cleanly-merged blob content and the resulting (sub)trees are written
5356/// to `db`; the returned [`MergeTreesResult`] carries the merged top-level tree
5357/// oid plus per-path detail.
5358///
5359/// This is the shared engine behind `git merge-tree --write-tree`, `git merge`,
5360/// `git cherry-pick`, and `git revert`. It is behaviour-preserving relative to
5361/// the per-command copies it replaced, and additionally resolves renames when
5362/// [`MergeTreesOptions::detect_renames`] is set.
5363pub fn merge_trees(
5364    db: &FileObjectDatabase,
5365    format: ObjectFormat,
5366    base: Option<&ObjectId>,
5367    ours: &ObjectId,
5368    theirs: &ObjectId,
5369    options: &MergeTreesOptions<'_>,
5370) -> Result<MergeTreesResult> {
5371    let base_map = match base {
5372        Some(tree) => flatten_tree(db, format, tree)?,
5373        None => MergeEntryMap::new(),
5374    };
5375    let ours_map = flatten_tree(db, format, ours)?;
5376    let theirs_map = flatten_tree(db, format, theirs)?;
5377    merge_entry_maps(db, format, &base_map, &ours_map, &theirs_map, options)
5378}
5379
5380/// [`merge_trees`] operating on already-flattened entry maps. The merge
5381/// porcelains often hold the flattened maps already (e.g. cherry-pick builds
5382/// `theirs` from a picked commit's tree), so this avoids re-reading them.
5383pub fn merge_entry_maps(
5384    db: &FileObjectDatabase,
5385    format: ObjectFormat,
5386    base_map: &MergeEntryMap,
5387    ours_map: &MergeEntryMap,
5388    theirs_map: &MergeEntryMap,
5389    options: &MergeTreesOptions<'_>,
5390) -> Result<MergeTreesResult> {
5391    // Rename-aware step: detect files renamed on exactly one side relative to
5392    // base, so a modification on the other side follows the rename. This is the
5393    // non-recursive merge-ort rename case. We compute a rewrite map that, for a
5394    // one-sided rename old->new, presents the *other* side's `old` content at
5395    // `new` (and drops `old`), letting the path-keyed core below do the 3-way
5396    // content merge at the destination.
5397    let (mut renames, side_renames) = if options.detect_renames {
5398        let (renames, ours_side, theirs_side) =
5399            detect_merge_renames(db, format, base_map, ours_map, theirs_map, options)?;
5400        (renames, Some((ours_side, theirs_side)))
5401    } else {
5402        (MergeRenames::default(), None)
5403    };
5404
5405    // Build the effective per-side maps with file renames applied.
5406    let (mut eff_base, mut eff_ours, mut eff_theirs) =
5407        apply_merge_renames(base_map, ours_map, theirs_map, &renames);
5408
5409    // Directory-rename detection: when one side renamed a whole directory and
5410    // the other side added a file under (or renamed a file into) the old
5411    // directory, re-home that path into the renamed directory — including
5412    // transitive renames (a file the other side renamed into a directory this
5413    // side renamed follows on into the final directory). This is the
5414    // merge.directoryRenames behaviour, applied as a rewrite of the rename/add
5415    // destination paths so every merged path consults directory renames.
5416    let mut dir_rename_dirty = false;
5417    let mut rehomed_paths: BTreeMap<Vec<u8>, RehomeSides> = BTreeMap::new();
5418    let mut dir_rename_two_to_one: Vec<DirRenameTwoToOne> = Vec::new();
5419    let mut dir_rename_collisions: Vec<DirRenameCollision> = Vec::new();
5420    let mut dir_rename_splits: BTreeSet<Vec<u8>> = BTreeSet::new();
5421    let mut info_messages = Vec::new();
5422    let mut cleanup_paths: BTreeSet<Vec<u8>> = renames
5423        .dest_to_source
5424        .values()
5425        .map(|rename| rename.source.clone())
5426        .collect();
5427    if options.directory_renames != DirectoryRenames::False
5428        && let Some((ours_side, theirs_side)) = &side_renames
5429    {
5430        let dir_renames = compute_directory_renames(ours_map, theirs_map, ours_side, theirs_side);
5431        let outcome = apply_directory_renames(
5432            base_map,
5433            &eff_base,
5434            &eff_ours,
5435            &eff_theirs,
5436            ours_side,
5437            theirs_side,
5438            &dir_renames,
5439            &renames.dest_to_source,
5440        );
5441        eff_base = outcome.base;
5442        eff_ours = outcome.ours;
5443        eff_theirs = outcome.theirs;
5444        rehomed_paths = outcome.rehomed;
5445        dir_rename_collisions = outcome.collisions;
5446        dir_rename_splits = outcome.splits;
5447        info_messages = outcome.info_messages;
5448        dir_rename_dirty = outcome.dirty;
5449        remap_rename_destinations(&mut renames, &rehomed_paths);
5450        drop_collapsed_rename_rename_conflicts(&mut renames);
5451        dir_rename_two_to_one = collect_dir_rename_two_to_one(&renames, &rehomed_paths);
5452    }
5453    for info in rehomed_paths
5454        .values()
5455        .flat_map(|sides| [&sides.ours, &sides.theirs])
5456        .flatten()
5457    {
5458        cleanup_paths.insert(info.old_path.clone());
5459    }
5460    if options.directory_renames == DirectoryRenames::True {
5461        for (dest, sides) in &rehomed_paths {
5462            for info in [&sides.ours, &sides.theirs].into_iter().flatten() {
5463                let (added_in, dir_renamed_in) = if info.added_on_ours {
5464                    (
5465                        options.ours_label.to_string(),
5466                        options.theirs_label.to_string(),
5467                    )
5468                } else {
5469                    (
5470                        options.theirs_label.to_string(),
5471                        options.ours_label.to_string(),
5472                    )
5473                };
5474                info_messages.push(MergeInfoMessage::DirRenameApplied {
5475                    old_path: info.old_path.clone(),
5476                    new_path: dest.clone(),
5477                    renamed_from: info.renamed_from.clone(),
5478                    added_in,
5479                    dir_renamed_in,
5480                });
5481            }
5482        }
5483    }
5484    // In =conflict mode, every re-homed path is reported as a location conflict
5485    // (the tree still gets the re-homed content, but the merge is marked dirty).
5486    let dir_rename_conflict_paths: BTreeMap<Vec<u8>, RehomeSides> =
5487        if options.directory_renames == DirectoryRenames::Conflict {
5488            rehomed_paths.clone()
5489        } else {
5490            BTreeMap::new()
5491        };
5492
5493    let mut all_paths = BTreeSet::new();
5494    all_paths.extend(eff_base.keys().cloned());
5495    all_paths.extend(eff_ours.keys().cloned());
5496    all_paths.extend(eff_theirs.keys().cloned());
5497
5498    let mut paths: Vec<MergedPath> = Vec::new();
5499    let mut leaves: MergeEntryMap = BTreeMap::new();
5500    let mut clean = true;
5501
5502    for path in all_paths {
5503        let base = eff_base.get(&path).cloned();
5504        let ours = eff_ours.get(&path).cloned();
5505        let theirs = eff_theirs.get(&path).cloned();
5506        let rename = renames.dest_to_source.get(&path);
5507        let old_path = rename.map(|r| r.source.clone());
5508
5509        // Trivial resolutions (identical to the historical per-command logic).
5510        if ours == theirs {
5511            if let Some(entry) = ours {
5512                leaves.insert(path.clone(), entry);
5513            }
5514            paths.push(clean_path(path, ours));
5515            continue;
5516        }
5517        if ours == base {
5518            if let Some(entry) = &theirs {
5519                leaves.insert(path.clone(), *entry);
5520            }
5521            paths.push(clean_path(path, theirs));
5522            continue;
5523        }
5524        if theirs == base {
5525            if let Some(entry) = &ours {
5526                leaves.insert(path.clone(), *entry);
5527            }
5528            paths.push(clean_path(path, ours));
5529            continue;
5530        }
5531
5532        // Both sides diverged. Decide how to combine.
5533        let content_mergeable = matches!(&ours, Some((mode, _)) if is_mergeable_file_mode(*mode))
5534            && matches!(&theirs, Some((mode, _)) if is_mergeable_file_mode(*mode))
5535            && match &base {
5536                Some((mode, _)) => is_mergeable_file_mode(*mode),
5537                None => true,
5538            };
5539
5540        if let (true, Some((ours_mode, ours_oid)), Some((theirs_mode, theirs_oid))) =
5541            (content_mergeable, &ours, &theirs)
5542        {
5543            let add_add = base.is_none();
5544            let base_bytes = match &base {
5545                Some((_, oid)) => merge_blob_bytes(db, oid)?,
5546                None => Vec::new(),
5547            };
5548            let ours_bytes = merge_blob_bytes(db, ours_oid)?;
5549            let theirs_bytes = merge_blob_bytes(db, theirs_oid)?;
5550            // When this destination came from a one-sided rename, git qualifies
5551            // the conflict-marker labels with the per-side path (the renaming
5552            // side shows the new path, the other side the old path), e.g.
5553            // `<<<<<<< HEAD:old.txt` / `>>>>>>> feature:new.txt`.
5554            let rehome = rehomed_paths.get(&path);
5555            let (ours_label, theirs_label) = match rename {
5556                Some(MergeRename { source, side }) => {
5557                    let (ours_path, theirs_path) = match side {
5558                        // theirs renamed -> ours kept the source path.
5559                        RenameSide::Theirs => (source.as_slice(), path.as_slice()),
5560                        // ours renamed -> theirs kept the source path.
5561                        RenameSide::Ours => (path.as_slice(), source.as_slice()),
5562                    };
5563                    (
5564                        qualify_label(options.ours_label, ours_path),
5565                        qualify_label(options.theirs_label, theirs_path),
5566                    )
5567                }
5568                None => {
5569                    let ours_path = rehome
5570                        .and_then(|info| info.ours.as_ref())
5571                        .map_or(path.as_slice(), |info| info.old_path.as_slice());
5572                    let theirs_path = rehome
5573                        .and_then(|info| info.theirs.as_ref())
5574                        .map_or(path.as_slice(), |info| info.old_path.as_slice());
5575                    if ours_path != path.as_slice() || theirs_path != path.as_slice() {
5576                        (
5577                            qualify_label(options.ours_label, ours_path),
5578                            qualify_label(options.theirs_label, theirs_path),
5579                        )
5580                    } else {
5581                        (
5582                            options.ours_label.to_string(),
5583                            options.theirs_label.to_string(),
5584                        )
5585                    }
5586                }
5587            };
5588            let result = merge_blobs(
5589                &base_bytes,
5590                &ours_bytes,
5591                &theirs_bytes,
5592                &MergeBlobOptions {
5593                    ours_label: &ours_label,
5594                    theirs_label: &theirs_label,
5595                    base_label: options.ancestor_label,
5596                    style: options.style,
5597                },
5598            );
5599
5600            let base_mode = base.as_ref().map(|(mode, _)| *mode);
5601            let (resolved_mode, mode_conflict) =
5602                merge_file_modes(base_mode, *ours_mode, *theirs_mode);
5603
5604            if !result.conflicted && !mode_conflict {
5605                let oid = db.write_object(EncodedObject::new(ObjectType::Blob, result.content))?;
5606                leaves.insert(path.clone(), (resolved_mode, oid));
5607                paths.push(clean_path_auto(path, Some((resolved_mode, oid)), true));
5608            } else if options.favor != MergeFavor::None && !mode_conflict {
5609                let chosen = if options.favor == MergeFavor::Ours {
5610                    ours
5611                } else {
5612                    theirs
5613                };
5614                if let Some(entry) = chosen {
5615                    leaves.insert(path.clone(), entry);
5616                }
5617                paths.push(clean_path_auto(path, chosen, true));
5618            } else {
5619                clean = false;
5620                let oid =
5621                    db.write_object(EncodedObject::new(ObjectType::Blob, result.content.clone()))?;
5622                leaves.insert(path.clone(), (resolved_mode, oid));
5623                let worktree_mode = if *ours_mode == *theirs_mode {
5624                    *ours_mode
5625                } else {
5626                    0o100644
5627                };
5628                let conflict = if let Some(old) = &old_path {
5629                    MergeConflictKind::RenameContent {
5630                        old_path: old.clone(),
5631                    }
5632                } else if add_add {
5633                    match rehome.and_then(|info| Some((info.ours.as_ref()?, info.theirs.as_ref()?)))
5634                    {
5635                        Some((ours_info, theirs_info)) => MergeConflictKind::RenameRenameTwoToOne {
5636                            ours_path: ours_info.old_path.clone(),
5637                            theirs_path: theirs_info.old_path.clone(),
5638                        },
5639                        None => MergeConflictKind::Content { add_add },
5640                    }
5641                } else {
5642                    MergeConflictKind::Content { add_add }
5643                };
5644                paths.push(MergedPath {
5645                    path: path.clone(),
5646                    stages: stages_for(&base, &ours, &theirs),
5647                    result: Some((resolved_mode, oid)),
5648                    worktree: Some((worktree_mode, result.content)),
5649                    conflict: Some(conflict),
5650                    auto_merged: true,
5651                });
5652            }
5653        } else if base.is_some() && (ours.is_none() || theirs.is_none()) {
5654            // modify/delete.
5655            clean = false;
5656            let (deleted_in, modified_in, surviving) = if ours.is_none() {
5657                (
5658                    options.ours_label.to_string(),
5659                    options.theirs_label.to_string(),
5660                    theirs,
5661                )
5662            } else {
5663                (
5664                    options.theirs_label.to_string(),
5665                    options.ours_label.to_string(),
5666                    ours,
5667                )
5668            };
5669            let worktree = match &surviving {
5670                Some((mode, oid)) => Some((*mode, merge_worktree_bytes(db, *mode, oid)?)),
5671                None => None,
5672            };
5673            if let Some(entry) = surviving {
5674                leaves.insert(path.clone(), entry);
5675            }
5676            paths.push(MergedPath {
5677                path: path.clone(),
5678                stages: stages_for(&base, &ours, &theirs),
5679                result: surviving,
5680                worktree,
5681                conflict: Some(MergeConflictKind::ModifyDelete {
5682                    deleted_in,
5683                    modified_in,
5684                }),
5685                auto_merged: false,
5686            });
5687        } else {
5688            // add/add of non-files, type changes, mode changes, etc. Keep the
5689            // surviving side's content and record a generic content conflict.
5690            clean = false;
5691            let add_add = base.is_none();
5692            let surviving = ours.or(theirs);
5693            let worktree = match &surviving {
5694                Some((mode, oid)) => Some((*mode, merge_worktree_bytes(db, *mode, oid)?)),
5695                None => None,
5696            };
5697            if let Some(entry) = surviving {
5698                leaves.insert(path.clone(), entry);
5699            }
5700            paths.push(MergedPath {
5701                path: path.clone(),
5702                stages: stages_for(&base, &ours, &theirs),
5703                result: surviving,
5704                worktree,
5705                conflict: Some(MergeConflictKind::Content { add_add }),
5706                auto_merged: false,
5707            });
5708        }
5709    }
5710
5711    if !renames.rename_rename_one_to_two.is_empty() {
5712        apply_rename_rename_one_to_two_conflicts(
5713            db,
5714            base_map,
5715            &eff_ours,
5716            &eff_theirs,
5717            &renames.rename_rename_one_to_two,
5718            &mut paths,
5719            &mut leaves,
5720            options,
5721        )?;
5722        clean = false;
5723    }
5724
5725    if !dir_rename_two_to_one.is_empty() {
5726        apply_dir_rename_two_to_one_conflicts(
5727            db,
5728            &eff_ours,
5729            &eff_theirs,
5730            &dir_rename_two_to_one,
5731            &mut paths,
5732            &mut leaves,
5733            options,
5734        )?;
5735        clean = false;
5736    }
5737
5738    // Rename/delete conflicts: a file renamed on one side whose source the other
5739    // side deleted. The merge core resolved the destination cleanly (only the
5740    // renaming side has it), but git flags this as a conflict — keep the renamed
5741    // content in the tree, record higher-order stages, and mark the merge dirty.
5742    if !renames.rename_deletes.is_empty() {
5743        for (dest, rd) in &renames.rename_deletes {
5744            // Skip if another conflict already claimed this destination.
5745            let Some(slot) = paths.iter_mut().find(|p| &p.path == dest) else {
5746                continue;
5747            };
5748            if slot.conflict.is_some() {
5749                continue;
5750            }
5751            let base_entry = base_map.get(&rd.source).copied();
5752            let renamed_entry = slot.result;
5753            // The renamed content sits on the renaming side; the deleting side
5754            // contributes no stage at the destination.
5755            let (ours_stage, theirs_stage) = match rd.side {
5756                RenameSide::Ours => (renamed_entry, None),
5757                RenameSide::Theirs => (None, renamed_entry),
5758            };
5759            let (renamed_in, deleted_in) = match rd.side {
5760                RenameSide::Ours => (
5761                    options.ours_label.to_string(),
5762                    options.theirs_label.to_string(),
5763                ),
5764                RenameSide::Theirs => (
5765                    options.theirs_label.to_string(),
5766                    options.ours_label.to_string(),
5767                ),
5768            };
5769            let worktree = match &renamed_entry {
5770                Some((mode, oid)) => Some((*mode, merge_worktree_bytes(db, *mode, oid)?)),
5771                None => None,
5772            };
5773            slot.stages = MergeStages {
5774                base: base_entry,
5775                ours: ours_stage,
5776                theirs: theirs_stage,
5777            };
5778            slot.worktree = worktree;
5779            slot.conflict = Some(MergeConflictKind::RenameDelete {
5780                old_path: rd.source.clone(),
5781                renamed_in,
5782                deleted_in,
5783            });
5784            clean = false;
5785        }
5786    }
5787
5788    // Directory-rename outcomes that make the merge dirty. A collision/split
5789    // detected while re-homing (two paths onto one destination, an ambiguous
5790    // split source, or a file in the way) marks the merge unclean regardless of
5791    // mode. In =conflict mode, every silently re-homed path is *also* reported
5792    // as a location conflict: the tree keeps the re-homed content but git wants
5793    // the user to confirm the suggested move.
5794    if dir_rename_dirty {
5795        clean = false;
5796    }
5797    // Implicit-directory-rename collisions (a directory rename would put a path
5798    // onto an existing file/dir, or N paths onto one destination). git emits
5799    // `CONFLICT (implicit dir rename): Existing file/dir at <dest> in the way ...`
5800    // regardless of mode, and the merge is unclean. Attach the conflict to the
5801    // blocked destination path (which keeps its original content).
5802    for collision in &dir_rename_collisions {
5803        clean = false;
5804        if let Some(slot) = paths.iter_mut().find(|p| p.path == collision.dest)
5805            && slot.conflict.is_none()
5806        {
5807            slot.conflict = Some(MergeConflictKind::DirRenameImplicitCollision {
5808                sources: collision.sources.clone(),
5809            });
5810        } else if !paths.iter().any(|p| p.path == collision.dest) {
5811            paths.push(MergedPath {
5812                path: collision.dest.clone(),
5813                stages: MergeStages::default(),
5814                result: None,
5815                worktree: None,
5816                conflict: Some(MergeConflictKind::DirRenameImplicitCollision {
5817                    sources: collision.sources.clone(),
5818                }),
5819                auto_merged: false,
5820            });
5821        }
5822    }
5823    for source_dir in &dir_rename_splits {
5824        clean = false;
5825        paths.push(MergedPath {
5826            path: source_dir.clone(),
5827            stages: MergeStages::default(),
5828            result: None,
5829            worktree: None,
5830            conflict: Some(MergeConflictKind::DirRenameSplit {
5831                source_dir: source_dir.clone(),
5832            }),
5833            auto_merged: false,
5834        });
5835    }
5836    if !dir_rename_conflict_paths.is_empty() {
5837        clean = false;
5838        for (dest, infos) in &dir_rename_conflict_paths {
5839            for info in [&infos.ours, &infos.theirs].into_iter().flatten() {
5840                let (added_in, dir_renamed_in) = if info.added_on_ours {
5841                    // The path was added/renamed by ours, into a dir theirs renamed.
5842                    (
5843                        options.ours_label.to_string(),
5844                        options.theirs_label.to_string(),
5845                    )
5846                } else {
5847                    (
5848                        options.theirs_label.to_string(),
5849                        options.ours_label.to_string(),
5850                    )
5851                };
5852                if let Some(slot) = paths.iter_mut().find(|p| &p.path == dest)
5853                    && slot.conflict.is_none()
5854                {
5855                    slot.conflict = Some(MergeConflictKind::DirRenameLocation {
5856                        old_path: info.old_path.clone(),
5857                        renamed_from: info.renamed_from.clone(),
5858                        added_in,
5859                        dir_renamed_in,
5860                    });
5861                } else {
5862                    info_messages.push(MergeInfoMessage::DirRenameLocationConflict {
5863                        old_path: info.old_path.clone(),
5864                        new_path: dest.clone(),
5865                        renamed_from: info.renamed_from.clone(),
5866                        added_in,
5867                        dir_renamed_in,
5868                    });
5869                }
5870            }
5871        }
5872    }
5873
5874    // Directory/file (D/F) conflict resolution (merge-ort `process_entry`): a
5875    // path that ends up as a *file* in the merged result while another result
5876    // path lives *under* it (so the path is simultaneously a directory) cannot
5877    // coexist. git keeps the directory at the original path and moves the file
5878    // aside to `path~<branch>` via `unique_path`, where `<branch>` is the side
5879    // that contributed the file. We resolve this on the flattened `leaves` after
5880    // every per-path decision is made, so renames/dir-renames have settled first.
5881    resolve_directory_file_conflicts(
5882        db,
5883        &mut paths,
5884        &mut leaves,
5885        &mut clean,
5886        &eff_ours,
5887        &eff_theirs,
5888        options,
5889        &mut info_messages,
5890    )?;
5891
5892    let tree = write_merged_tree(db, &leaves)?;
5893
5894    cleanup_paths.retain(|path| !leaves.contains_key(path));
5895
5896    Ok(MergeTreesResult {
5897        tree,
5898        paths,
5899        clean,
5900        cleanup_paths: cleanup_paths.into_iter().collect(),
5901        info_messages,
5902    })
5903}
5904
5905/// Flatten a branch label the way git's `add_flattened_path` does for
5906/// `unique_path`: any `/` in the branch name becomes `_` so the synthesized
5907/// `path~branch` stays a single path component family.
5908fn flatten_branch_label(branch: &str) -> String {
5909    branch.replace('/', "_")
5910}
5911
5912/// Pick a `path~<branch>` name not already present in `leaves` (or claimed by an
5913/// existing `paths` entry), mirroring merge-ort's `unique_path`: start from
5914/// `path~branch`, then append `_0`, `_1`, … on collision.
5915fn unique_df_path(
5916    path: &[u8],
5917    branch: &str,
5918    leaves: &MergeEntryMap,
5919    paths: &[MergedPath],
5920) -> Vec<u8> {
5921    let mut base = path.to_vec();
5922    base.push(b'~');
5923    base.extend_from_slice(flatten_branch_label(branch).as_bytes());
5924    let taken = |candidate: &[u8]| {
5925        leaves.contains_key(candidate) || paths.iter().any(|p| p.path == candidate)
5926    };
5927    if !taken(&base) {
5928        return base;
5929    }
5930    let mut suffix = 0usize;
5931    loop {
5932        let mut candidate = base.clone();
5933        candidate.push(b'_');
5934        candidate.extend_from_slice(suffix.to_string().as_bytes());
5935        if !taken(&candidate) {
5936            return candidate;
5937        }
5938        suffix += 1;
5939    }
5940}
5941
5942/// Resolve directory/file collisions in the merged leaf set. For every file leaf
5943/// whose path is also a directory (some other leaf lives under `path/`), move the
5944/// file to `path~<branch>` and record a [`MergeConflictKind::FileDirectory`].
5945#[allow(clippy::too_many_arguments)]
5946fn resolve_directory_file_conflicts(
5947    db: &FileObjectDatabase,
5948    paths: &mut Vec<MergedPath>,
5949    leaves: &mut MergeEntryMap,
5950    clean: &mut bool,
5951    eff_ours: &MergeEntryMap,
5952    eff_theirs: &MergeEntryMap,
5953    options: &MergeTreesOptions<'_>,
5954    info_messages: &mut Vec<MergeInfoMessage>,
5955) -> Result<()> {
5956    // A path is a "directory" in the result iff some leaf key has it as a strict
5957    // `path/` prefix. Collect every such directory prefix once.
5958    let mut directory_prefixes: BTreeSet<Vec<u8>> = BTreeSet::new();
5959    for key in leaves.keys() {
5960        let mut idx = 0;
5961        while let Some(pos) = key[idx..].iter().position(|b| *b == b'/') {
5962            let end = idx + pos;
5963            directory_prefixes.insert(key[..end].to_vec());
5964            idx = end + 1;
5965        }
5966    }
5967    if directory_prefixes.is_empty() {
5968        return Ok(());
5969    }
5970
5971    // File leaves that collide with a directory of the same name.
5972    let colliding: Vec<Vec<u8>> = leaves
5973        .keys()
5974        .filter(|key| directory_prefixes.contains(*key))
5975        .cloned()
5976        .collect();
5977
5978    for original in colliding {
5979        let Some(entry) = leaves.remove(&original) else {
5980            continue;
5981        };
5982        // The moved-aside file must be materialized in the worktree at its new
5983        // path; read its blob bytes once so the porcelain has worktree content.
5984        let moved_bytes = merge_worktree_bytes(db, entry.0, &entry.1)?;
5985        // Which side contributed the file? git keys off `dirmask`: the file lives
5986        // on the side that is NOT the directory. We read it off the effective side
5987        // maps — whichever side has this path as a plain file. When only theirs has
5988        // it, use the theirs label; otherwise (ours has it, or both do) ours wins,
5989        // matching git's index-1 bias for the moved-aside name.
5990        let ours_has_file = eff_ours.contains_key(&original);
5991        let theirs_has_file = eff_theirs.contains_key(&original);
5992        let from_ours = ours_has_file || !theirs_has_file;
5993        let branch = if from_ours {
5994            options.ours_label
5995        } else {
5996            options.theirs_label
5997        };
5998        let new_path = unique_df_path(&original, branch, leaves, paths);
5999        leaves.insert(new_path.clone(), entry);
6000        *clean = false;
6001
6002        // Relocate the path's MergedPath: update its destination and stamp the D/F
6003        // conflict. If the path had no MergedPath (defensive), synthesize one.
6004        if let Some(slot) = paths.iter_mut().find(|p| p.path == original) {
6005            if let Some(MergeConflictKind::RenameDelete {
6006                old_path,
6007                renamed_in,
6008                deleted_in,
6009            }) = &slot.conflict
6010            {
6011                info_messages.push(MergeInfoMessage::RenameDeleteConflict {
6012                    old_path: old_path.clone(),
6013                    new_path: original.clone(),
6014                    renamed_in: renamed_in.clone(),
6015                    deleted_in: deleted_in.clone(),
6016                });
6017            }
6018            slot.path = new_path.clone();
6019            slot.result = Some(entry);
6020            // Preserve any pre-existing higher-order stages; a clean file leaf has
6021            // none, so seed ours/theirs from the effective maps for `ls-files -u`.
6022            if slot.stages.base.is_none()
6023                && slot.stages.ours.is_none()
6024                && slot.stages.theirs.is_none()
6025            {
6026                slot.stages = MergeStages {
6027                    base: None,
6028                    ours: if from_ours { Some(entry) } else { None },
6029                    theirs: if from_ours { None } else { Some(entry) },
6030                };
6031            }
6032            // Keep the slot's existing `auto_merged`: git only emits
6033            // `Auto-merging <new_path>` for the moved file when a real content
6034            // merge ran (a rename or both-sides change drives filemask>=6 through
6035            // handle_content_merge). A plain one-sided add (filemask 2/4) is moved
6036            // aside silently, so we must NOT force the flag on here.
6037            slot.worktree = Some((entry.0, moved_bytes));
6038            slot.conflict = Some(MergeConflictKind::FileDirectory {
6039                original_path: original.clone(),
6040                moved_from: branch.to_string(),
6041            });
6042        } else {
6043            paths.push(MergedPath {
6044                path: new_path.clone(),
6045                stages: MergeStages {
6046                    base: None,
6047                    ours: if from_ours { Some(entry) } else { None },
6048                    theirs: if from_ours { None } else { Some(entry) },
6049                },
6050                result: Some(entry),
6051                worktree: Some((entry.0, moved_bytes)),
6052                conflict: Some(MergeConflictKind::FileDirectory {
6053                    original_path: original.clone(),
6054                    moved_from: branch.to_string(),
6055                }),
6056                auto_merged: false,
6057            });
6058        }
6059    }
6060
6061    // Keep `paths` sorted by destination path (callers and tests assume order).
6062    paths.sort_by(|a, b| a.path.cmp(&b.path));
6063    Ok(())
6064}
6065
6066/// Construct a clean (non-conflicted) [`MergedPath`].
6067fn clean_path(path: Vec<u8>, result: Option<(u32, ObjectId)>) -> MergedPath {
6068    clean_path_auto(path, result, false)
6069}
6070
6071/// Like [`clean_path`] but records whether the path went through a textual
6072/// 3-way content merge (for the "Auto-merging" message).
6073fn clean_path_auto(
6074    path: Vec<u8>,
6075    result: Option<(u32, ObjectId)>,
6076    auto_merged: bool,
6077) -> MergedPath {
6078    MergedPath {
6079        path,
6080        stages: MergeStages::default(),
6081        result,
6082        worktree: None,
6083        conflict: None,
6084        auto_merged,
6085    }
6086}
6087
6088/// Snapshot the present stages for a conflicted path.
6089fn stages_for(
6090    base: &Option<(u32, ObjectId)>,
6091    ours: &Option<(u32, ObjectId)>,
6092    theirs: &Option<(u32, ObjectId)>,
6093) -> MergeStages {
6094    MergeStages {
6095        base: *base,
6096        ours: *ours,
6097        theirs: *theirs,
6098    }
6099}
6100
6101/// Read a blob's raw bytes, requiring it to be a blob object.
6102fn merge_blob_bytes(reader: &impl ObjectReader, oid: &ObjectId) -> Result<Vec<u8>> {
6103    let object = reader.read_object(oid)?;
6104    if object.object_type != ObjectType::Blob {
6105        return Err(GitError::InvalidObject(format!(
6106            "expected blob {}, found {}",
6107            oid,
6108            object.object_type.as_str()
6109        )));
6110    }
6111    Ok(object.body.clone())
6112}
6113
6114fn merge_worktree_bytes(reader: &impl ObjectReader, mode: u32, oid: &ObjectId) -> Result<Vec<u8>> {
6115    if sley_index::is_gitlink(mode) {
6116        Ok(Vec::new())
6117    } else {
6118        merge_blob_bytes(reader, oid)
6119    }
6120}
6121
6122/// 3-way merge of a file mode. Returns the resolved mode and whether the modes
6123/// conflict (both sides changed it to different non-base values).
6124fn merge_file_modes(base: Option<u32>, ours: u32, theirs: u32) -> (u32, bool) {
6125    if ours == theirs {
6126        return (ours, false);
6127    }
6128    match base {
6129        Some(base) if ours == base => (theirs, false),
6130        Some(base) if theirs == base => (ours, false),
6131        _ => (ours, true),
6132    }
6133}
6134
6135/// Build a top-level tree object from a flat map of `path -> (mode, oid)`
6136/// leaves, writing every (sub)tree object to `db`.
6137fn write_merged_tree(db: &FileObjectDatabase, leaves: &MergeEntryMap) -> Result<ObjectId> {
6138    let mut root = MergeTreeNode::default();
6139    for (path, (mode, oid)) in leaves {
6140        root.insert(path, *mode, *oid);
6141    }
6142    root.write(db)
6143}
6144
6145#[derive(Default)]
6146struct MergeTreeNode {
6147    blobs: BTreeMap<Vec<u8>, (u32, ObjectId)>,
6148    subtrees: BTreeMap<Vec<u8>, MergeTreeNode>,
6149}
6150
6151impl MergeTreeNode {
6152    fn insert(&mut self, path: &[u8], mode: u32, oid: ObjectId) {
6153        match path.iter().position(|byte| *byte == b'/') {
6154            Some(slash) => {
6155                let component = path[..slash].to_vec();
6156                let rest = &path[slash + 1..];
6157                self.subtrees
6158                    .entry(component)
6159                    .or_default()
6160                    .insert(rest, mode, oid);
6161            }
6162            None => {
6163                self.blobs.insert(path.to_vec(), (mode, oid));
6164            }
6165        }
6166    }
6167
6168    fn write(&self, db: &FileObjectDatabase) -> Result<ObjectId> {
6169        let mut entries: Vec<TreeEntry> = Vec::new();
6170        for (name, (mode, oid)) in &self.blobs {
6171            entries.push(TreeEntry {
6172                mode: *mode,
6173                name: BString::from(name.clone()),
6174                oid: *oid,
6175            });
6176        }
6177        for (name, subtree) in &self.subtrees {
6178            let oid = subtree.write(db)?;
6179            entries.push(TreeEntry {
6180                mode: 0o040000,
6181                name: BString::from(name.clone()),
6182                oid,
6183            });
6184        }
6185        entries.sort_by_key(merge_tree_sort_key);
6186        let tree = Tree { entries };
6187        db.write_object(EncodedObject::new(ObjectType::Tree, tree.write()))
6188    }
6189}
6190
6191fn merge_tree_sort_key(entry: &TreeEntry) -> Vec<u8> {
6192    let mut key = entry.name.as_bytes().to_vec();
6193    if entry.mode == 0o040000 {
6194        key.push(b'/');
6195    }
6196    key
6197}
6198
6199// --- Rename-aware non-recursive merge -------------------------------------
6200
6201/// Which side of the merge performed a rename.
6202#[derive(Clone, Copy, PartialEq, Eq)]
6203enum RenameSide {
6204    Ours,
6205    Theirs,
6206}
6207
6208/// One detected one-sided rename: its source path and which side renamed it.
6209#[derive(Clone)]
6210struct MergeRename {
6211    source: Vec<u8>,
6212    side: RenameSide,
6213}
6214
6215/// A file renamed on one side whose source was *deleted* on the other side — a
6216/// rename/delete conflict. git keeps the renamed content at the destination but
6217/// flags the merge as conflicted.
6218#[derive(Clone)]
6219struct RenameDelete {
6220    /// The pre-rename source path (deleted on the other side).
6221    source: Vec<u8>,
6222    /// Which side performed the rename (the other side deleted the source).
6223    side: RenameSide,
6224}
6225
6226/// The rename pairings discovered for one merge: which destination paths came
6227/// from which source path, and which side renamed (so the other side's change
6228/// can follow the rename and conflict labels can be path-qualified like git).
6229#[derive(Default)]
6230struct MergeRenames {
6231    /// One-sided renames keyed by *destination* path. Only renames where the
6232    /// OTHER side kept/modified the source in place are recorded (the case
6233    /// where the modification must follow the rename).
6234    dest_to_source: BTreeMap<Vec<u8>, MergeRename>,
6235    /// Rename/delete conflicts: a file renamed on one side whose source the
6236    /// other side deleted. Keyed by destination path.
6237    rename_deletes: BTreeMap<Vec<u8>, RenameDelete>,
6238    /// Rename/rename(1to2) conflicts keyed by source path.
6239    rename_rename_one_to_two: BTreeMap<Vec<u8>, RenameRenameOneToTwo>,
6240}
6241
6242#[derive(Clone)]
6243struct RenameRenameOneToTwo {
6244    ours_dest: Vec<u8>,
6245    theirs_dest: Vec<u8>,
6246}
6247
6248/// Every file rename observed on one side (base->side), as `(old, new)` pairs.
6249/// Unlike [`MergeRenames`] this is the *complete* rename set on a side — it is
6250/// the input to directory-rename inference, which needs to see all the per-file
6251/// moves between directories, not just the ones the other side kept in place.
6252struct SideRenames {
6253    pairs: Vec<(Vec<u8>, Vec<u8>)>,
6254}
6255
6256/// Detect one-sided renames usable for a non-recursive merge: a path present in
6257/// `base`, deleted on one side and present (renamed) at a new path on that same
6258/// side, while the OTHER side still has the original path (modified or
6259/// unchanged). Such a rename lets the other side's change move to the
6260/// destination.
6261///
6262/// Also returns the complete per-side rename set so the caller can infer
6263/// directory renames (which need every file move, not just the merge-relevant
6264/// ones).
6265fn detect_merge_renames(
6266    db: &FileObjectDatabase,
6267    format: ObjectFormat,
6268    base_map: &MergeEntryMap,
6269    ours_map: &MergeEntryMap,
6270    theirs_map: &MergeEntryMap,
6271    options: &MergeTreesOptions<'_>,
6272) -> Result<(MergeRenames, SideRenames, SideRenames)> {
6273    let mut renames = MergeRenames::default();
6274
6275    // Renames on ours: the other side that must carry its change is theirs.
6276    let ours_side = collect_side_renames(
6277        db,
6278        format,
6279        base_map,
6280        ours_map,
6281        theirs_map,
6282        RenameSide::Ours,
6283        options.rename_threshold,
6284        &mut renames,
6285    )?;
6286    // Renames on theirs: the other side that carries its change is ours.
6287    let theirs_side = collect_side_renames(
6288        db,
6289        format,
6290        base_map,
6291        theirs_map,
6292        ours_map,
6293        RenameSide::Theirs,
6294        options.rename_threshold,
6295        &mut renames,
6296    )?;
6297
6298    collect_rename_rename_one_to_two(&mut renames, &ours_side, &theirs_side);
6299
6300    Ok((renames, ours_side, theirs_side))
6301}
6302
6303fn collect_rename_rename_one_to_two(
6304    renames: &mut MergeRenames,
6305    ours_side: &SideRenames,
6306    theirs_side: &SideRenames,
6307) {
6308    let ours_by_source: BTreeMap<&[u8], &[u8]> = ours_side
6309        .pairs
6310        .iter()
6311        .map(|(old, new)| (old.as_slice(), new.as_slice()))
6312        .collect();
6313    for (old, theirs_new) in &theirs_side.pairs {
6314        let Some(ours_new) = ours_by_source.get(old.as_slice()) else {
6315            continue;
6316        };
6317        if *ours_new == theirs_new.as_slice() {
6318            continue;
6319        }
6320        renames.rename_deletes.remove(*ours_new);
6321        renames.rename_deletes.remove(theirs_new);
6322        renames.dest_to_source.remove(*ours_new);
6323        renames.dest_to_source.remove(theirs_new);
6324        renames.rename_rename_one_to_two.insert(
6325            old.clone(),
6326            RenameRenameOneToTwo {
6327                ours_dest: (*ours_new).to_vec(),
6328                theirs_dest: theirs_new.clone(),
6329            },
6330        );
6331    }
6332}
6333
6334/// Collect renames that occurred on `side` (relative to `base`). Records the
6335/// merge-relevant subset (renames the `other` side still references) into
6336/// `renames`, and returns the *complete* per-side rename set for directory-rename
6337/// inference. `db`/`format` resolve blob bytes for similarity scoring.
6338#[allow(clippy::too_many_arguments)]
6339fn collect_side_renames(
6340    db: &FileObjectDatabase,
6341    format: ObjectFormat,
6342    base_map: &MergeEntryMap,
6343    side_map: &MergeEntryMap,
6344    other_map: &MergeEntryMap,
6345    side: RenameSide,
6346    threshold: u8,
6347    renames: &mut MergeRenames,
6348) -> Result<SideRenames> {
6349    // Diff base->side with inexact rename detection; the resulting `Renamed`
6350    // entries name (old_path -> new_path) pairs on this side.
6351    let base_tree = entry_map_as_tracked(base_map);
6352    let side_tree = entry_map_as_tracked(side_map);
6353    let options = RenameDetectionOptions {
6354        base: DiffNameStatusOptions {
6355            detect_renames: true,
6356            detect_copies: false,
6357            find_copies_harder: false,
6358            rename_empty: false,
6359        },
6360        detect_inexact: true,
6361        rename_threshold: threshold,
6362        copy_threshold: threshold,
6363    };
6364    let changes = diff_name_status_maps_with_renames(
6365        &base_tree,
6366        &side_tree,
6367        base_tree.keys().chain(side_tree.keys()),
6368        options,
6369        |oid| merge_blob_bytes(db, oid).ok(),
6370    )?;
6371
6372    let mut pairs = Vec::new();
6373    for change in changes {
6374        let NameStatus::Renamed(_) = change.status else {
6375            continue;
6376        };
6377        let Some(old_path) = change.old_path.as_ref() else {
6378            continue;
6379        };
6380        let old = old_path.as_bytes().to_vec();
6381        let new = change.path.as_bytes().to_vec();
6382        // Complete rename set, fed to directory-rename inference.
6383        pairs.push((old.clone(), new.clone()));
6384
6385        // Only act when the destination is genuinely new (not already present
6386        // in either side from a different origin) and the OTHER side still
6387        // references the source path — i.e. the other side modified/kept `old`,
6388        // and its change should follow the rename to `new`.
6389        if !other_map.contains_key(&old) {
6390            // The source path is gone on the other side. If it existed in base
6391            // (so the other side *deleted* it) and the other side did not also
6392            // produce `new`, this is a rename/delete conflict: this side renamed
6393            // the file, the other side deleted its source.
6394            if base_map.contains_key(&old) && !other_map.contains_key(&new) {
6395                renames
6396                    .rename_deletes
6397                    .entry(new.clone())
6398                    .or_insert(RenameDelete {
6399                        source: old.clone(),
6400                        side,
6401                    });
6402            }
6403            continue;
6404        }
6405        // If the other side ALSO renamed/created `new`, that is a rename/rename
6406        // or rename/add corner case we leave to the path-keyed core (stage-b).
6407        if other_map.contains_key(&new) {
6408            continue;
6409        }
6410        // Skip if both sides renamed the same source to the same dest (already
6411        // recorded) or to anything (first writer wins; the path-keyed core then
6412        // sees identical dest entries and resolves trivially).
6413        renames
6414            .dest_to_source
6415            .entry(new)
6416            .or_insert(MergeRename { source: old, side });
6417    }
6418
6419    let _ = format;
6420    Ok(SideRenames { pairs })
6421}
6422
6423/// Rewrite the three side maps so that each detected one-sided rename old->new
6424/// presents the OTHER side's `old` entry at `new`, and removes `old` from
6425/// every side. The path-keyed merge core then performs the 3-way content merge
6426/// at `new` with base=base[old], one side = the renaming side's new content,
6427/// the other side = the modifying side's old content.
6428fn apply_merge_renames(
6429    base_map: &MergeEntryMap,
6430    ours_map: &MergeEntryMap,
6431    theirs_map: &MergeEntryMap,
6432    renames: &MergeRenames,
6433) -> (MergeEntryMap, MergeEntryMap, MergeEntryMap) {
6434    if renames.dest_to_source.is_empty() {
6435        return (base_map.clone(), ours_map.clone(), theirs_map.clone());
6436    }
6437    let mut base = base_map.clone();
6438    let mut ours = ours_map.clone();
6439    let mut theirs = theirs_map.clone();
6440
6441    for (new, rename) in &renames.dest_to_source {
6442        let old = &rename.source;
6443        // Move base[old] to base[new] so the destination has a proper ancestor.
6444        if let Some(entry) = base.remove(old) {
6445            base.entry(new.clone()).or_insert(entry);
6446        }
6447        // For each side, if it still has `old`, move that entry to `new`.
6448        for side in [&mut ours, &mut theirs] {
6449            if let Some(entry) = side.remove(old) {
6450                side.entry(new.clone()).or_insert(entry);
6451            }
6452        }
6453    }
6454    (base, ours, theirs)
6455}
6456
6457// --- Directory-rename detection -------------------------------------------
6458
6459/// The parent directory of `path`, or `None` for a top-level path.
6460fn parent_dir(path: &[u8]) -> Option<&[u8]> {
6461    path.iter().rposition(|b| *b == b'/').map(|i| &path[..i])
6462}
6463
6464/// Apply a directory rename `old_dir -> new_dir` to `path` (which must live
6465/// under `old_dir`). E.g. `old_dir=z`, `new_dir=y`, `path=z/d` -> `y/d`; an
6466/// empty `new_dir` (rename into the repo root) drops the directory prefix.
6467fn apply_dir_rename(old_dir: &[u8], new_dir: &[u8], path: &[u8]) -> Vec<u8> {
6468    // The portion of `path` after `old_dir/` (handle root-target by stepping
6469    // past the separator, exactly as git's apply_dir_rename does).
6470    let rest_start = if new_dir.is_empty() {
6471        old_dir.len() + 1
6472    } else {
6473        old_dir.len()
6474    };
6475    let mut out = new_dir.to_vec();
6476    out.extend_from_slice(&path[rest_start..]);
6477    out
6478}
6479
6480/// Find the longest renamed ancestor directory of `path`: walk parent dirs from
6481/// the deepest up and return the first one present in `dir_renames`. Mirrors
6482/// merge-ort's `check_dir_renamed`.
6483fn check_dir_renamed<'a>(
6484    path: &[u8],
6485    dir_renames: &'a BTreeMap<Vec<u8>, Vec<u8>>,
6486) -> Option<(&'a [u8], &'a [u8])> {
6487    let mut cur = parent_dir(path);
6488    while let Some(dir) = cur {
6489        if let Some((old_dir, new_dir)) = dir_renames.get_key_value(dir) {
6490            return Some((old_dir.as_slice(), new_dir.as_slice()));
6491        }
6492        cur = parent_dir(dir);
6493    }
6494    None
6495}
6496
6497/// The provisional directory renames computed for both sides, plus the source
6498/// directories whose rename was ambiguous (a "split").
6499struct DirectoryRenameMaps {
6500    /// `old_dir -> new_dir` directory renames detected on ours' side. A path
6501    /// added/renamed by theirs under `old_dir` re-homes into `new_dir`.
6502    ours: BTreeMap<Vec<u8>, Vec<u8>>,
6503    /// Directory renames detected on theirs' side.
6504    theirs: BTreeMap<Vec<u8>, Vec<u8>>,
6505    /// Source directories whose split was unclear on ours' side (no unique
6506    /// majority target); paths on theirs that would need to follow such a rename
6507    /// are a conflict, not silent.
6508    ours_split: BTreeSet<Vec<u8>>,
6509    /// Source directories whose split was unclear on theirs' side.
6510    theirs_split: BTreeSet<Vec<u8>>,
6511}
6512
6513/// Infer directory renames from the complete per-side file-rename sets, mirroring
6514/// merge-ort's `get_provisional_directory_renames` + `handle_directory_level_conflicts`.
6515/// For every file moved `.../old_dir/x -> .../new_dir/x`, the ancestor pairs are
6516/// tallied (`dir_rename_count`) and collapsed to `old_dir -> best_new_dir` where
6517/// `best` is the unique highest count. A tie marks the source directory as a
6518/// "split". A rename is only kept if the source directory was *entirely removed*
6519/// on that side (the `dirs_removed` gate). A directory renamed on BOTH sides is
6520/// dropped from both maps (ambiguous).
6521fn compute_directory_renames(
6522    ours_map: &MergeEntryMap,
6523    theirs_map: &MergeEntryMap,
6524    ours_side: &SideRenames,
6525    theirs_side: &SideRenames,
6526) -> DirectoryRenameMaps {
6527    let ours = compute_side_dir_renames(&ours_side.pairs, ours_map);
6528    let theirs = compute_side_dir_renames(&theirs_side.pairs, theirs_map);
6529
6530    // A directory renamed on BOTH sides (to whatever target) is ambiguous;
6531    // git's handle_directory_level_conflicts drops it from both maps so neither
6532    // side's directory rename is applied.
6533    let mut ours_map_out = ours.renames;
6534    let mut theirs_map_out = theirs.renames;
6535    let dup: Vec<Vec<u8>> = ours_map_out
6536        .keys()
6537        .filter(|k| theirs_map_out.contains_key(*k))
6538        .cloned()
6539        .collect();
6540    for k in dup {
6541        ours_map_out.remove(&k);
6542        theirs_map_out.remove(&k);
6543    }
6544
6545    DirectoryRenameMaps {
6546        ours: ours_map_out,
6547        theirs: theirs_map_out,
6548        ours_split: ours.split,
6549        theirs_split: theirs.split,
6550    }
6551}
6552
6553/// Per-side directory-rename computation result.
6554struct SideDirRenames {
6555    renames: BTreeMap<Vec<u8>, Vec<u8>>,
6556    split: BTreeSet<Vec<u8>>,
6557}
6558
6559/// Compute one side's `old_dir -> new_dir` map from its file renames, gated on
6560/// the source directory being fully removed on that side.
6561fn compute_side_dir_renames(
6562    pairs: &[(Vec<u8>, Vec<u8>)],
6563    side_map: &MergeEntryMap,
6564) -> SideDirRenames {
6565    // dir_rename_count: count[old_dir][new_dir]. Built by walking every rename's
6566    // ancestor directories while the *trailing* path components match, exactly
6567    // as merge-ort's update_dir_rename_counts does. For
6568    //   a/b/c/d/e/foo.c -> a/b/some/thing/else/e/foo.c
6569    // this records both
6570    //   a/b/c/d/e => a/b/some/thing/else/e   AND   a/b/c/d => a/b/some/thing/else
6571    // but stops once the trailing components diverge.
6572    let mut counts: BTreeMap<Vec<u8>, BTreeMap<Vec<u8>, usize>> = BTreeMap::new();
6573    for (old, new) in pairs {
6574        update_dir_rename_counts(&mut counts, old, new);
6575    }
6576
6577    let mut renames = BTreeMap::new();
6578    let mut split = BTreeSet::new();
6579    for (old_dir, targets) in counts {
6580        let mut max = 0usize;
6581        let mut bad_max = 0usize;
6582        let mut best: Option<Vec<u8>> = None;
6583        for (target, count) in &targets {
6584            if *count == max {
6585                bad_max = max;
6586            } else if *count > max {
6587                max = *count;
6588                best = Some(target.clone());
6589            }
6590        }
6591        if max == 0 {
6592            continue;
6593        }
6594        if bad_max == max {
6595            split.insert(old_dir);
6596            continue;
6597        }
6598        // dirs_removed gate: the source directory must be entirely gone on this
6599        // side. New files that recreate the old directory count too; otherwise
6600        // cases like "both sides renamed z/ -> y/, but one side added z/d"
6601        // incorrectly look like both sides performed a whole-directory rename.
6602        if let Some(best) = best
6603            && directory_fully_removed(&old_dir, side_map)
6604        {
6605            renames.insert(old_dir, best);
6606        }
6607    }
6608
6609    SideDirRenames { renames, split }
6610}
6611
6612/// Tally the ancestor directory-rename pairs implied by a single file rename
6613/// `old -> new`, mirroring merge-ort's `update_dir_rename_counts`. Starting from
6614/// the immediate parent dirs, we strip one trailing component at a time and
6615/// record `old_ancestor -> new_ancestor` as long as the *remaining* trailing
6616/// suffix still matches between the two paths.
6617fn update_dir_rename_counts(
6618    counts: &mut BTreeMap<Vec<u8>, BTreeMap<Vec<u8>, usize>>,
6619    old: &[u8],
6620    new: &[u8],
6621) {
6622    // Work on owned copies we progressively truncate at each '/'.
6623    let mut old_dir = old.to_vec();
6624    let mut new_dir = new.to_vec();
6625    let mut first = true;
6626    loop {
6627        // Strip the trailing component (basename on the first pass, then a dir
6628        // each pass) to ascend one level.
6629        let old_has = dir_munge(&mut old_dir);
6630        let new_has = dir_munge(&mut new_dir);
6631
6632        // On the first pass we only stripped the basename; the dirs need not
6633        // match. On later passes the *trailing* components must agree, otherwise
6634        // the rename no longer implies this ancestor pairing.
6635        if !first {
6636            let old_sub = trailing_component(old, &old_dir);
6637            let new_sub = trailing_component(new, &new_dir);
6638            if old_sub != new_sub {
6639                break;
6640            }
6641        }
6642
6643        if old_dir == new_dir {
6644            // Same directory at this level — no rename implied, and no deeper
6645            // ancestor can differ usefully either.
6646            break;
6647        }
6648        *counts
6649            .entry(old_dir.clone())
6650            .or_default()
6651            .entry(new_dir.clone())
6652            .or_default() += 1;
6653
6654        first = false;
6655        // Hitting the toplevel ("") on either side ends the ascent.
6656        if old_dir.is_empty() || new_dir.is_empty() {
6657            break;
6658        }
6659        // If the two ancestors are identical from here up, stop (git stops once
6660        // the suffix-equal walk reaches a common prefix).
6661        if !old_has || !new_has {
6662            break;
6663        }
6664    }
6665}
6666
6667/// Truncate `buf` at its last '/', leaving the parent directory (or empty for a
6668/// toplevel name). Returns whether a '/' was present (i.e. there is a deeper
6669/// ancestor to ascend into).
6670fn dir_munge(buf: &mut Vec<u8>) -> bool {
6671    match buf.iter().rposition(|b| *b == b'/') {
6672        Some(i) => {
6673            buf.truncate(i);
6674            true
6675        }
6676        None => {
6677            buf.clear();
6678            false
6679        }
6680    }
6681}
6682
6683/// The trailing path component that was stripped from `full` to reach `dir`
6684/// (i.e. the suffix of `full` after `dir/`). Used to compare whether the two
6685/// sides of a rename share the same trailing directory chain.
6686fn trailing_component<'a>(full: &'a [u8], dir: &[u8]) -> &'a [u8] {
6687    if dir.is_empty() {
6688        full
6689    } else {
6690        // full = dir + "/" + suffix
6691        &full[dir.len() + 1..]
6692    }
6693}
6694
6695/// True when no path under `dir/` exists on `side` (the directory was entirely
6696/// removed there). Mirrors merge-ort's `dirs_removed` precondition.
6697fn directory_fully_removed(dir: &[u8], side_map: &MergeEntryMap) -> bool {
6698    let mut prefix = dir.to_vec();
6699    prefix.push(b'/');
6700    for path in side_map.keys() {
6701        if path.starts_with(&prefix) {
6702            return false;
6703        }
6704    }
6705    true
6706}
6707
6708/// A path on one side whose location is rewritten by a directory rename the
6709/// *other* side performed. The rewrite applies equally to a freshly added file
6710/// and to a file the side itself renamed (a transitive rename).
6711struct DirRenameMove {
6712    /// The path as it currently sits in the side's effective map (the side's own
6713    /// rename, if any, already applied).
6714    from: Vec<u8>,
6715    /// The re-homed destination, after applying the other side's directory rename.
6716    to: Vec<u8>,
6717    /// `Some(source)` when `from` is a rename destination produced by this side
6718    /// (transitive rename); `None` for a fresh add. Drives git's
6719    /// "renamed to"/"added in" message wording.
6720    renamed_from: Option<Vec<u8>>,
6721}
6722
6723struct DirRenameTwoToOne {
6724    dest: Vec<u8>,
6725    ours_source: Vec<u8>,
6726    theirs_source: Vec<u8>,
6727    ours_label_path: Vec<u8>,
6728    theirs_label_path: Vec<u8>,
6729}
6730
6731/// Provenance of a re-homed path, for `=conflict`-mode `CONFLICT (file location)`
6732/// reporting.
6733#[derive(Clone)]
6734struct RehomeInfo {
6735    /// The pre-re-home path on the adding/renaming side.
6736    old_path: Vec<u8>,
6737    /// `Some(source)` for a transitive rename, `None` for a fresh add.
6738    renamed_from: Option<Vec<u8>>,
6739    /// Whether the *adding/renaming* side was ours (true) or theirs (false). The
6740    /// caller resolves this to a branch label.
6741    added_on_ours: bool,
6742}
6743
6744/// Per-side provenance for a destination created by directory-rename rehoming.
6745#[derive(Clone, Default)]
6746struct RehomeSides {
6747    ours: Option<RehomeInfo>,
6748    theirs: Option<RehomeInfo>,
6749}
6750
6751/// An implicit-directory-rename collision: one or more paths a directory rename
6752/// would re-home onto `dest`, which is blocked because `dest` is already
6753/// occupied (a file in the way) or because multiple sources map to it. git emits
6754/// `CONFLICT (implicit dir rename): Existing file/dir at <dest> in the way ...`.
6755struct DirRenameCollision {
6756    /// The blocked destination path (the file/dir already there).
6757    dest: Vec<u8>,
6758    /// The source path(s) the directory rename tried to move onto `dest`.
6759    sources: Vec<Vec<u8>>,
6760}
6761
6762/// Outcome of applying directory renames to all three effective maps.
6763struct DirRenameOutcome {
6764    /// Rewritten base/ours/theirs maps with re-homed paths moved to their
6765    /// destinations. `base` moves too so a re-homed content-merge keeps its
6766    /// ancestor at the new location.
6767    base: MergeEntryMap,
6768    ours: MergeEntryMap,
6769    theirs: MergeEntryMap,
6770    /// Re-homed destination path -> provenance (for `=conflict`-mode reporting).
6771    rehomed: BTreeMap<Vec<u8>, RehomeSides>,
6772    /// Implicit-dir-rename collisions (file in the way / N-to-1), for the
6773    /// `CONFLICT (implicit dir rename)` message; always conflicts regardless of
6774    /// mode.
6775    collisions: Vec<DirRenameCollision>,
6776    /// Split source dirs that were relevant to a path on the other side.
6777    splits: BTreeSet<Vec<u8>>,
6778    /// True if a directory-level collision or split made the merge dirty even in
6779    /// `=true` mode (e.g. two paths re-homed onto one destination).
6780    dirty: bool,
6781    info_messages: Vec<MergeInfoMessage>,
6782}
6783
6784/// Apply directory renames to both sides' effective maps.
6785///
6786/// This mirrors merge-ort's `collect_renames` + `check_for_directory_rename` +
6787/// `apply_directory_rename_modifications`: every path a side *added* or *renamed*
6788/// that lives under a directory the OTHER side renamed has its destination
6789/// rewritten to follow that rename — making the directory rename a property of
6790/// the rename-detection pass that every path consults, not a per-file special
6791/// case. Handles:
6792///   - transitive renames (a file the side renamed into a dir the other side
6793///     renamed follows on into the final directory),
6794///   - `dir_rename_exclusions` (never re-home into a directory THIS side itself
6795///     renamed — that would create a spurious rename/rename(1to2)),
6796///   - collisions (N paths mapping to one destination -> conflict),
6797///   - splits (a source dir with no majority target -> conflict, leave in place).
6798#[allow(clippy::too_many_arguments)]
6799fn apply_directory_renames(
6800    base_map: &MergeEntryMap,
6801    eff_base: &MergeEntryMap,
6802    eff_ours: &MergeEntryMap,
6803    eff_theirs: &MergeEntryMap,
6804    ours_side: &SideRenames,
6805    theirs_side: &SideRenames,
6806    dir_renames: &DirectoryRenameMaps,
6807    file_rename_dests: &BTreeMap<Vec<u8>, MergeRename>,
6808) -> DirRenameOutcome {
6809    let mut base = eff_base.clone();
6810    let mut ours = eff_ours.clone();
6811    let mut theirs = eff_theirs.clone();
6812    let mut rehomed = BTreeMap::new();
6813    let mut collisions = Vec::new();
6814    let mut splits = BTreeSet::new();
6815    let mut info_messages = Vec::new();
6816    let mut dirty = false;
6817
6818    // Ours' paths follow THEIRS' directory renames; the exclusions are OURS' own
6819    // renamed-into dirs (never re-home a path into a directory this same side
6820    // renamed). Symmetrically for theirs.
6821    let ours_excl = exclusion_dirs(&dir_renames.ours);
6822    let theirs_excl = exclusion_dirs(&dir_renames.theirs);
6823
6824    // Plan ours' moves (following theirs' dir-renames) and theirs' moves
6825    // (following ours' dir-renames). Planning before applying lets us detect
6826    // collisions (N paths onto one destination) across the whole side.
6827    let ours_moves = plan_rehome(
6828        base_map,
6829        &ours,
6830        ours_side,
6831        &dir_renames.theirs,
6832        &ours_excl,
6833        &dir_renames.theirs_split,
6834        &mut collisions,
6835        &mut splits,
6836        &mut info_messages,
6837        &mut dirty,
6838    );
6839    let theirs_moves = plan_rehome(
6840        base_map,
6841        &theirs,
6842        theirs_side,
6843        &dir_renames.ours,
6844        &theirs_excl,
6845        &dir_renames.ours_split,
6846        &mut collisions,
6847        &mut splits,
6848        &mut info_messages,
6849        &mut dirty,
6850    );
6851
6852    apply_rehome_moves(
6853        base_map,
6854        file_rename_dests,
6855        &mut base,
6856        &mut ours,
6857        &mut theirs,
6858        ours_moves,
6859        true,
6860        &mut rehomed,
6861        &mut collisions,
6862        &mut dirty,
6863    );
6864    apply_rehome_moves(
6865        base_map,
6866        file_rename_dests,
6867        &mut base,
6868        &mut ours,
6869        &mut theirs,
6870        theirs_moves,
6871        false,
6872        &mut rehomed,
6873        &mut collisions,
6874        &mut dirty,
6875    );
6876
6877    DirRenameOutcome {
6878        base,
6879        ours,
6880        theirs,
6881        rehomed,
6882        collisions,
6883        splits,
6884        dirty,
6885        info_messages,
6886    }
6887}
6888
6889/// The set of *source* directories a side renamed away from. A directory rename
6890/// the other side wants to apply into one of these dirs is skipped (it would
6891/// produce a spurious rename/rename(1to2)); git's `dir_rename_exclusions`.
6892fn exclusion_dirs(side_dir_renames: &BTreeMap<Vec<u8>, Vec<u8>>) -> BTreeSet<Vec<u8>> {
6893    side_dir_renames.keys().cloned().collect()
6894}
6895
6896/// Re-home `target`'s added/renamed paths that fall under a directory the other
6897/// side renamed (`renamer_dirs`: `old_dir -> new_dir`).
6898///
6899/// Candidates are paths present on this side and absent in base — i.e. both
6900/// Plan the directory-rename moves for one side: which of its added/renamed
6901/// paths re-home where, following `renamer_dirs` (the OTHER side's dir-renames).
6902///
6903/// Candidates are paths present on this side and absent in base — both freshly
6904/// added files AND this side's own rename destinations (the latter give the
6905/// transitive-rename behaviour). A candidate whose target directory is in
6906/// `exclusions` (a dir this side itself renamed) is skipped. Splits mark the
6907/// merge dirty; N-to-1 collisions (multiple sources onto one destination) record
6908/// a `DirRenameCollision` and yield no move. Returns the surviving single moves
6909/// (one per destination).
6910#[allow(clippy::too_many_arguments)]
6911fn plan_rehome(
6912    base_map: &MergeEntryMap,
6913    side: &MergeEntryMap,
6914    side_renames: &SideRenames,
6915    renamer_dirs: &BTreeMap<Vec<u8>, Vec<u8>>,
6916    exclusions: &BTreeSet<Vec<u8>>,
6917    split_dirs: &BTreeSet<Vec<u8>>,
6918    collisions: &mut Vec<DirRenameCollision>,
6919    splits: &mut BTreeSet<Vec<u8>>,
6920    info_messages: &mut Vec<MergeInfoMessage>,
6921    dirty: &mut bool,
6922) -> Vec<DirRenameMove> {
6923    if renamer_dirs.is_empty() && split_dirs.is_empty() {
6924        return Vec::new();
6925    }
6926
6927    // This side's rename destinations -> sources; eligible for a transitive
6928    // rewrite and carry the original source for message wording.
6929    let side_rename_src: BTreeMap<&[u8], &[u8]> = side_renames
6930        .pairs
6931        .iter()
6932        .map(|(o, n)| (n.as_slice(), o.as_slice()))
6933        .collect();
6934
6935    let candidates: Vec<Vec<u8>> = side
6936        .keys()
6937        .filter(|p| !base_map.contains_key(*p) || side_rename_src.contains_key(p.as_slice()))
6938        .cloned()
6939        .collect();
6940
6941    // dest -> the moves wanting to land there (collision detection).
6942    let mut planned: BTreeMap<Vec<u8>, Vec<DirRenameMove>> = BTreeMap::new();
6943    for path in candidates {
6944        if let Some(split_dir) = check_dir_split(&path, split_dirs) {
6945            splits.insert(split_dir.to_vec());
6946            *dirty = true;
6947            continue;
6948        }
6949        let Some((old_dir, new_dir)) = check_dir_renamed(&path, renamer_dirs) else {
6950            continue;
6951        };
6952        // dir_rename_exclusions: don't apply a rename INTO a directory this side
6953        // itself renamed; that would cause a spurious rename/rename(1to2). The
6954        // file instead follows this side's own rename, so leave it.
6955        let new_dir_is_exclusion = exclusions.contains(new_dir);
6956        let new_dir_inside_exclusion = exclusions
6957            .iter()
6958            .any(|dir| directory_contains_proper(dir, new_dir));
6959        if new_dir_is_exclusion
6960            || (new_dir_inside_exclusion
6961                && !side_has_pure_add_under_dir(side, base_map, &side_rename_src, old_dir))
6962        {
6963            info_messages.push(MergeInfoMessage::DirRenameSkippedDueToRerename {
6964                old_dir: old_dir.to_vec(),
6965                path: path.clone(),
6966                new_dir: new_dir.to_vec(),
6967            });
6968            continue;
6969        }
6970        let dest = apply_dir_rename(old_dir, new_dir, &path);
6971        if dest == path {
6972            // Directory rename causes a rename-to-self: already in place.
6973            continue;
6974        }
6975        let renamed_from = side_rename_src.get(path.as_slice()).map(|s| s.to_vec());
6976        planned
6977            .entry(dest.clone())
6978            .or_default()
6979            .push(DirRenameMove {
6980                from: path,
6981                to: dest,
6982                renamed_from,
6983            });
6984    }
6985
6986    let mut moves = Vec::new();
6987    for (dest, group) in planned {
6988        if group.len() > 1 {
6989            // Multiple paths map to one destination: an implicit-dir-rename
6990            // collision. git leaves all of them in place and conflicts.
6991            *dirty = true;
6992            collisions.push(DirRenameCollision {
6993                dest,
6994                sources: group.into_iter().map(|m| m.from).collect(),
6995            });
6996            continue;
6997        }
6998        moves.push(group.into_iter().next().expect("non-empty"));
6999    }
7000    moves
7001}
7002
7003fn check_dir_split<'a>(path: &[u8], split_dirs: &'a BTreeSet<Vec<u8>>) -> Option<&'a [u8]> {
7004    let mut dir = parent_dir(path)?;
7005    loop {
7006        if let Some(split_dir) = split_dirs.get(dir) {
7007            return Some(split_dir);
7008        }
7009        dir = parent_dir(dir)?;
7010    }
7011}
7012
7013fn directory_contains_proper(parent: &[u8], child: &[u8]) -> bool {
7014    !parent.is_empty()
7015        && child.len() > parent.len()
7016        && child.starts_with(parent)
7017        && child[parent.len()] == b'/'
7018}
7019
7020fn side_has_pure_add_under_dir(
7021    side: &MergeEntryMap,
7022    base_map: &MergeEntryMap,
7023    side_rename_src: &BTreeMap<&[u8], &[u8]>,
7024    dir: &[u8],
7025) -> bool {
7026    side.keys().any(|path| {
7027        path_is_under_dir(path, dir)
7028            && !base_map.contains_key(path)
7029            && !side_rename_src.contains_key(path.as_slice())
7030    })
7031}
7032
7033fn path_is_under_dir(path: &[u8], dir: &[u8]) -> bool {
7034    !dir.is_empty() && path.len() > dir.len() && path.starts_with(dir) && path[dir.len()] == b'/'
7035}
7036
7037/// Apply a side's planned re-home moves to all three effective maps.
7038///
7039/// `side_is_ours` says whether the moves originate from ours' (true) or theirs'
7040/// (false) paths — used both for `=conflict`-mode provenance and to decide which
7041/// side's entry the move primarily belongs to. A move whose source is a
7042/// content-merge path (present on the other side and in base too) re-homes
7043/// across `base`/`ours`/`theirs` together, so the 3-way merge follows it to the
7044/// new location; a pure add re-homes only its own side.
7045#[allow(clippy::too_many_arguments)]
7046fn apply_rehome_moves(
7047    original_base: &MergeEntryMap,
7048    file_rename_dests: &BTreeMap<Vec<u8>, MergeRename>,
7049    base: &mut MergeEntryMap,
7050    ours: &mut MergeEntryMap,
7051    theirs: &mut MergeEntryMap,
7052    moves: Vec<DirRenameMove>,
7053    side_is_ours: bool,
7054    rehomed: &mut BTreeMap<Vec<u8>, RehomeSides>,
7055    collisions: &mut Vec<DirRenameCollision>,
7056    dirty: &mut bool,
7057) {
7058    for mv in moves {
7059        // A file in the way at the destination is only a blocker when it is
7060        // present on this same side (or in base). If the other side already
7061        // occupies the destination, applying this move produces the normal
7062        // two-sided conflict at that path (e.g. t6423 1d's rename/rename(2to1)).
7063        let occupied_on_this_side = if side_is_ours {
7064            ours.contains_key(&mv.to) || map_has_directory_at(ours, &mv.to)
7065        } else {
7066            theirs.contains_key(&mv.to) || map_has_directory_at(theirs, &mv.to)
7067        };
7068        let occupied_by_cross_rename =
7069            file_rename_dests
7070                .get(&mv.to)
7071                .is_some_and(|rename| match (side_is_ours, rename.side) {
7072                    (true, RenameSide::Theirs) | (false, RenameSide::Ours) => true,
7073                    (true, RenameSide::Ours) | (false, RenameSide::Theirs) => false,
7074                });
7075        let base_entry_at_dest = original_base.get(&mv.to).copied();
7076        let base_entry_at_source = original_base.get(&mv.from).copied();
7077        let other_side_entry_at_dest = if side_is_ours {
7078            theirs.get(&mv.to).copied()
7079        } else {
7080            ours.get(&mv.to).copied()
7081        };
7082        let other_side_entry_at_source = if side_is_ours {
7083            theirs.get(&mv.from).copied()
7084        } else {
7085            ours.get(&mv.from).copied()
7086        };
7087        let base_entry_for_shifted_source = base_entry_at_source.or(base_entry_at_dest);
7088        let rename_back_to_modified_source = mv
7089            .renamed_from
7090            .as_ref()
7091            .is_some_and(|source| source == &mv.to)
7092            && base_entry_at_dest.is_some()
7093            && (other_side_entry_at_dest.is_some_and(|entry| Some(entry) != base_entry_at_dest)
7094                || other_side_entry_at_source
7095                    .is_some_and(|entry| Some(entry) != base_entry_for_shifted_source));
7096        if ((base_entry_at_dest.is_some() && !rename_back_to_modified_source)
7097            || (occupied_on_this_side && !occupied_by_cross_rename))
7098            && mv.to != mv.from
7099        {
7100            *dirty = true;
7101            collisions.push(DirRenameCollision {
7102                dest: mv.to.clone(),
7103                sources: vec![mv.from.clone()],
7104            });
7105            continue;
7106        }
7107        let mut moved = false;
7108        if occupied_by_cross_rename {
7109            base.remove(&mv.from);
7110            if side_is_ours {
7111                if let Some(entry) = ours.remove(&mv.from) {
7112                    ours.insert(mv.to.clone(), entry);
7113                    moved = true;
7114                }
7115                theirs.remove(&mv.from);
7116            } else {
7117                ours.remove(&mv.from);
7118                if let Some(entry) = theirs.remove(&mv.from) {
7119                    theirs.insert(mv.to.clone(), entry);
7120                    moved = true;
7121                }
7122            }
7123        } else {
7124            // Move the path on every map that holds it (base for the ancestor,
7125            // and whichever sides carry content at the path). This keeps a
7126            // content-merge keyed consistently at the re-homed destination.
7127            for m in [&mut *base, &mut *ours, &mut *theirs] {
7128                if let Some(entry) = m.remove(&mv.from) {
7129                    m.insert(mv.to.clone(), entry);
7130                    moved = true;
7131                }
7132            }
7133        }
7134        if moved {
7135            let info = RehomeInfo {
7136                old_path: mv.from.clone(),
7137                renamed_from: mv.renamed_from.clone(),
7138                added_on_ours: side_is_ours,
7139            };
7140            let entry = rehomed.entry(mv.to.clone()).or_default();
7141            if side_is_ours {
7142                entry.ours = Some(info);
7143            } else {
7144                entry.theirs = Some(info);
7145            }
7146        }
7147    }
7148}
7149
7150fn collect_dir_rename_two_to_one(
7151    renames: &MergeRenames,
7152    rehomed: &BTreeMap<Vec<u8>, RehomeSides>,
7153) -> Vec<DirRenameTwoToOne> {
7154    let mut conflicts = Vec::new();
7155    for (dest, sides) in rehomed {
7156        let Some(file_rename) = renames.dest_to_source.get(dest) else {
7157            continue;
7158        };
7159        match file_rename.side {
7160            RenameSide::Ours => {
7161                let Some(info) = sides.theirs.as_ref() else {
7162                    continue;
7163                };
7164                let Some(theirs_source) = info.renamed_from.as_ref() else {
7165                    continue;
7166                };
7167                conflicts.push(DirRenameTwoToOne {
7168                    dest: dest.clone(),
7169                    ours_source: file_rename.source.clone(),
7170                    theirs_source: theirs_source.clone(),
7171                    ours_label_path: dest.clone(),
7172                    theirs_label_path: info.old_path.clone(),
7173                });
7174            }
7175            RenameSide::Theirs => {
7176                let Some(info) = sides.ours.as_ref() else {
7177                    continue;
7178                };
7179                let Some(ours_source) = info.renamed_from.as_ref() else {
7180                    continue;
7181                };
7182                conflicts.push(DirRenameTwoToOne {
7183                    dest: dest.clone(),
7184                    ours_source: ours_source.clone(),
7185                    theirs_source: file_rename.source.clone(),
7186                    ours_label_path: info.old_path.clone(),
7187                    theirs_label_path: dest.clone(),
7188                });
7189            }
7190        }
7191    }
7192    conflicts
7193}
7194
7195fn map_has_directory_at(map: &MergeEntryMap, path: &[u8]) -> bool {
7196    let mut prefix = path.to_vec();
7197    prefix.push(b'/');
7198    map.keys().any(|candidate| candidate.starts_with(&prefix))
7199}
7200
7201fn remap_rename_destinations(renames: &mut MergeRenames, rehomed: &BTreeMap<Vec<u8>, RehomeSides>) {
7202    if rehomed.is_empty() {
7203        return;
7204    }
7205    let mut remapped_deletes = BTreeMap::new();
7206    for (dest, rd) in std::mem::take(&mut renames.rename_deletes) {
7207        let new_dest = rehomed
7208            .iter()
7209            .find_map(|(new_dest, sides)| {
7210                let moved = sides
7211                    .ours
7212                    .as_ref()
7213                    .is_some_and(|info| info.old_path == dest)
7214                    || sides
7215                        .theirs
7216                        .as_ref()
7217                        .is_some_and(|info| info.old_path == dest);
7218                moved.then(|| new_dest.clone())
7219            })
7220            .unwrap_or(dest);
7221        remapped_deletes.insert(new_dest, rd);
7222    }
7223    renames.rename_deletes = remapped_deletes;
7224
7225    for rename in renames.rename_rename_one_to_two.values_mut() {
7226        for (dest, sides) in rehomed {
7227            if sides
7228                .ours
7229                .as_ref()
7230                .is_some_and(|info| info.old_path == rename.ours_dest)
7231            {
7232                rename.ours_dest = dest.clone();
7233            }
7234            if sides
7235                .theirs
7236                .as_ref()
7237                .is_some_and(|info| info.old_path == rename.theirs_dest)
7238            {
7239                rename.theirs_dest = dest.clone();
7240            }
7241        }
7242    }
7243}
7244
7245fn drop_collapsed_rename_rename_conflicts(renames: &mut MergeRenames) {
7246    renames
7247        .rename_rename_one_to_two
7248        .retain(|_, rename| rename.ours_dest != rename.theirs_dest);
7249}
7250
7251fn apply_dir_rename_two_to_one_conflicts(
7252    db: &FileObjectDatabase,
7253    eff_ours: &MergeEntryMap,
7254    eff_theirs: &MergeEntryMap,
7255    conflicts: &[DirRenameTwoToOne],
7256    paths: &mut [MergedPath],
7257    leaves: &mut MergeEntryMap,
7258    options: &MergeTreesOptions<'_>,
7259) -> Result<()> {
7260    for conflict in conflicts {
7261        let Some(slot) = paths.iter_mut().find(|path| path.path == conflict.dest) else {
7262            continue;
7263        };
7264        let ours_entry = eff_ours.get(&conflict.dest).copied();
7265        let theirs_entry = eff_theirs.get(&conflict.dest).copied();
7266        let (Some((ours_mode, ours_oid)), Some((theirs_mode, theirs_oid))) =
7267            (ours_entry, theirs_entry)
7268        else {
7269            continue;
7270        };
7271        let ours_bytes = merge_blob_bytes(db, &ours_oid)?;
7272        let theirs_bytes = merge_blob_bytes(db, &theirs_oid)?;
7273        let (resolved_mode, mode_conflict) = merge_file_modes(None, ours_mode, theirs_mode);
7274        let result = if is_mergeable_file_mode(ours_mode) && is_mergeable_file_mode(theirs_mode) {
7275            merge_blobs(
7276                &[],
7277                &ours_bytes,
7278                &theirs_bytes,
7279                &MergeBlobOptions {
7280                    ours_label: &qualify_label(options.ours_label, &conflict.ours_label_path),
7281                    theirs_label: &qualify_label(options.theirs_label, &conflict.theirs_label_path),
7282                    base_label: options.ancestor_label,
7283                    style: options.style,
7284                },
7285            )
7286        } else {
7287            MergeBlobResult {
7288                content: ours_bytes.clone(),
7289                conflicted: true,
7290            }
7291        };
7292        let oid = db.write_object(EncodedObject::new(ObjectType::Blob, result.content.clone()))?;
7293        leaves.insert(conflict.dest.clone(), (resolved_mode, oid));
7294        slot.stages = MergeStages {
7295            base: None,
7296            ours: ours_entry,
7297            theirs: theirs_entry,
7298        };
7299        slot.result = Some((resolved_mode, oid));
7300        slot.worktree = Some((
7301            if ours_mode == theirs_mode {
7302                ours_mode
7303            } else {
7304                0o100644
7305            },
7306            result.content,
7307        ));
7308        slot.conflict = Some(MergeConflictKind::RenameRenameTwoToOne {
7309            ours_path: conflict.ours_source.clone(),
7310            theirs_path: conflict.theirs_source.clone(),
7311        });
7312        slot.auto_merged = !mode_conflict;
7313    }
7314    Ok(())
7315}
7316
7317#[allow(clippy::too_many_arguments)]
7318fn apply_rename_rename_one_to_two_conflicts(
7319    db: &FileObjectDatabase,
7320    base_map: &MergeEntryMap,
7321    eff_ours: &MergeEntryMap,
7322    eff_theirs: &MergeEntryMap,
7323    conflicts: &BTreeMap<Vec<u8>, RenameRenameOneToTwo>,
7324    paths: &mut Vec<MergedPath>,
7325    leaves: &mut MergeEntryMap,
7326    options: &MergeTreesOptions<'_>,
7327) -> Result<()> {
7328    for (old_path, conflict) in conflicts {
7329        let base_entry = base_map.get(old_path).copied();
7330        let ours_entry = eff_ours.get(&conflict.ours_dest).copied();
7331        let theirs_entry = eff_theirs.get(&conflict.theirs_dest).copied();
7332        let theirs_add_at_ours_dest = eff_theirs.get(&conflict.ours_dest).copied();
7333        let ours_add_at_theirs_dest = eff_ours.get(&conflict.theirs_dest).copied();
7334
7335        leaves.remove(old_path);
7336        leaves.remove(&conflict.ours_dest);
7337        leaves.remove(&conflict.theirs_dest);
7338        paths.retain(|path| {
7339            path.path != *old_path
7340                && path.path != conflict.ours_dest
7341                && path.path != conflict.theirs_dest
7342        });
7343
7344        paths.push(MergedPath {
7345            path: old_path.clone(),
7346            stages: MergeStages {
7347                base: base_entry,
7348                ours: None,
7349                theirs: None,
7350            },
7351            result: None,
7352            worktree: None,
7353            conflict: Some(MergeConflictKind::RenameRenameOneToTwo {
7354                old_path: old_path.clone(),
7355                ours_path: conflict.ours_dest.clone(),
7356                theirs_path: conflict.theirs_dest.clone(),
7357                ours_label: options.ours_label.to_string(),
7358                theirs_label: options.theirs_label.to_string(),
7359            }),
7360            auto_merged: false,
7361        });
7362
7363        let ours_worktree = match ours_entry {
7364            Some((mode, oid)) => Some((mode, merge_worktree_bytes(db, mode, &oid)?)),
7365            None => None,
7366        };
7367        paths.push(MergedPath {
7368            path: conflict.ours_dest.clone(),
7369            stages: MergeStages {
7370                base: None,
7371                ours: ours_entry,
7372                theirs: theirs_add_at_ours_dest,
7373            },
7374            result: None,
7375            worktree: ours_worktree,
7376            conflict: Some(MergeConflictKind::RenameRenameOneToTwoStage),
7377            auto_merged: false,
7378        });
7379
7380        let theirs_worktree = match theirs_entry {
7381            Some((mode, oid)) => Some((mode, merge_worktree_bytes(db, mode, &oid)?)),
7382            None => None,
7383        };
7384        paths.push(MergedPath {
7385            path: conflict.theirs_dest.clone(),
7386            stages: MergeStages {
7387                base: None,
7388                ours: ours_add_at_theirs_dest,
7389                theirs: theirs_entry,
7390            },
7391            result: None,
7392            worktree: theirs_worktree,
7393            conflict: Some(MergeConflictKind::RenameRenameOneToTwoStage),
7394            auto_merged: false,
7395        });
7396    }
7397    Ok(())
7398}
7399
7400/// Build a path-qualified conflict-marker label `"<label>:<path>"`, as git does
7401/// for renamed files (so the two sides of a conflict name their distinct paths).
7402fn qualify_label(label: &str, path: &[u8]) -> String {
7403    format!("{label}:{}", String::from_utf8_lossy(path))
7404}
7405
7406/// Adapt a flat `path -> (mode, oid)` map into the `TrackedEntry` map the
7407/// name-status diff core consumes.
7408fn entry_map_as_tracked(map: &MergeEntryMap) -> BTreeMap<Vec<u8>, TrackedEntry> {
7409    map.iter()
7410        .map(|(path, (mode, oid))| {
7411            (
7412                path.clone(),
7413                TrackedEntry {
7414                    mode: *mode,
7415                    oid: *oid,
7416                },
7417            )
7418        })
7419        .collect()
7420}
7421
7422#[cfg(test)]
7423mod tests {
7424    use super::*;
7425    use sley_formats::RepositoryLayout;
7426    use sley_object::TreeEntry;
7427    use sley_odb::ObjectWriter;
7428    use std::path::PathBuf;
7429    use std::sync::atomic::{AtomicU64, Ordering};
7430
7431    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
7432
7433    #[test]
7434    fn name_status_reports_added_from_index() {
7435        let root = temp_root();
7436        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
7437            .expect("test operation should succeed");
7438        let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
7439        let oid = db
7440            .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
7441            .expect("test operation should succeed");
7442        let index = Index {
7443            version: 2,
7444            entries: vec![sley_index::IndexEntry {
7445                ctime_seconds: 0,
7446                ctime_nanoseconds: 0,
7447                mtime_seconds: 0,
7448                mtime_nanoseconds: 0,
7449                dev: 0,
7450                ino: 0,
7451                mode: 0o100644,
7452                uid: 0,
7453                gid: 0,
7454                size: 6,
7455                oid,
7456                flags: "hello.txt".len() as u16,
7457                flags_extended: 0,
7458                path: BString::from(b"hello.txt"),
7459            }],
7460            extensions: Vec::new(),
7461            checksum: None,
7462        };
7463        fs::write(
7464            layout.git_dir.join("index"),
7465            index
7466                .write_v2_sha1()
7467                .expect("test operation should succeed"),
7468        )
7469        .expect("test operation should succeed");
7470        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
7471        let changes = diff_name_status_head_worktree(&root, &layout.git_dir, ObjectFormat::Sha1)
7472            .expect("test operation should succeed");
7473        assert_eq!(changes[0].line(), "A\thello.txt");
7474        fs::remove_dir_all(root).expect("test operation should succeed");
7475    }
7476
7477    #[test]
7478    fn index_worktree_diff_returns_staged_gitlinks() {
7479        let root = temp_root();
7480        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
7481            .expect("test operation should succeed");
7482        let oid = ObjectId::from_hex(
7483            ObjectFormat::Sha1,
7484            "1111111111111111111111111111111111111111",
7485        )
7486        .expect("test operation should succeed");
7487        let index = Index {
7488            version: 2,
7489            entries: vec![sley_index::IndexEntry {
7490                ctime_seconds: 0,
7491                ctime_nanoseconds: 0,
7492                mtime_seconds: 0,
7493                mtime_nanoseconds: 0,
7494                dev: 0,
7495                ino: 0,
7496                mode: sley_index::GITLINK_MODE,
7497                uid: 0,
7498                gid: 0,
7499                size: 0,
7500                oid,
7501                flags: "deps/sub".len() as u16,
7502                flags_extended: 0,
7503                path: BString::from(b"deps/sub"),
7504            }],
7505            extensions: Vec::new(),
7506            checksum: None,
7507        };
7508        fs::write(
7509            layout.git_dir.join("index"),
7510            index
7511                .write_v2_sha1()
7512                .expect("test operation should succeed"),
7513        )
7514        .expect("test operation should succeed");
7515
7516        let diff = diff_name_status_index_worktree_with_options_and_gitlinks(
7517            &root,
7518            &layout.git_dir,
7519            ObjectFormat::Sha1,
7520            DiffNameStatusOptions::default(),
7521        )
7522        .expect("test operation should succeed");
7523
7524        assert_eq!(diff.entries.len(), 1);
7525        let gitlinks = diff.staged_gitlinks;
7526        assert_eq!(gitlinks.len(), 1);
7527        assert_eq!(gitlinks[0].path.as_bytes(), b"deps/sub");
7528        assert_eq!(gitlinks[0].oid, oid);
7529        fs::remove_dir_all(root).expect("test operation should succeed");
7530    }
7531
7532    #[cfg(unix)]
7533    #[test]
7534    fn index_worktree_diff_ignores_untracked_dangling_symlink() {
7535        use std::os::unix::fs::symlink;
7536
7537        let root = temp_root();
7538        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
7539            .expect("test operation should succeed");
7540        let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
7541        let oid = db
7542            .write_object(EncodedObject::new(ObjectType::Blob, b"clean\n".to_vec()))
7543            .expect("test operation should succeed");
7544        let index = Index {
7545            version: 2,
7546            entries: vec![sley_index::IndexEntry {
7547                ctime_seconds: 0,
7548                ctime_nanoseconds: 0,
7549                mtime_seconds: 0,
7550                mtime_nanoseconds: 0,
7551                dev: 0,
7552                ino: 0,
7553                mode: 0o100644,
7554                uid: 0,
7555                gid: 0,
7556                size: 6,
7557                oid,
7558                flags: "tracked.txt".len() as u16,
7559                flags_extended: 0,
7560                path: BString::from(b"tracked.txt"),
7561            }],
7562            extensions: Vec::new(),
7563            checksum: None,
7564        };
7565        fs::write(
7566            layout.git_dir.join("index"),
7567            index
7568                .write_v2_sha1()
7569                .expect("test operation should succeed"),
7570        )
7571        .expect("test operation should succeed");
7572        fs::write(root.join("tracked.txt"), b"clean\n").expect("test operation should succeed");
7573        symlink("missing-target", root.join("untracked-link"))
7574            .expect("test operation should succeed");
7575
7576        let changes = diff_name_status_index_worktree_with_options(
7577            &root,
7578            &layout.git_dir,
7579            ObjectFormat::Sha1,
7580            DiffNameStatusOptions {
7581                detect_renames: false,
7582                detect_copies: false,
7583                find_copies_harder: false,
7584                rename_empty: true,
7585            },
7586        )
7587        .expect("untracked dangling symlink should be ignored");
7588        assert!(changes.is_empty());
7589        fs::remove_dir_all(root).expect("test operation should succeed");
7590    }
7591
7592    #[test]
7593    fn index_worktree_diff_trusts_non_racy_stat_cache() {
7594        let root = temp_root();
7595        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
7596            .expect("test operation should succeed");
7597        let worktree_path = root.join("tracked.txt");
7598        fs::write(&worktree_path, b"clean\n").expect("test operation should succeed");
7599        let metadata = fs::symlink_metadata(&worktree_path).expect("test operation should succeed");
7600        let (mtime_seconds, mtime_nanoseconds) =
7601            sley_index::file_mtime_parts(&metadata).expect("test operation should succeed");
7602        let bogus_oid = ObjectId::from_hex(
7603            ObjectFormat::Sha1,
7604            "1111111111111111111111111111111111111111",
7605        )
7606        .expect("test operation should succeed");
7607        let index = Index {
7608            version: 2,
7609            entries: vec![sley_index::IndexEntry {
7610                ctime_seconds: 0,
7611                ctime_nanoseconds: 0,
7612                mtime_seconds: mtime_seconds as u32,
7613                mtime_nanoseconds: mtime_nanoseconds as u32,
7614                dev: 0,
7615                ino: 0,
7616                mode: sley_index::worktree_metadata_mode(&metadata),
7617                uid: 0,
7618                gid: 0,
7619                size: metadata.len() as u32,
7620                oid: bogus_oid,
7621                flags: "tracked.txt".len() as u16,
7622                flags_extended: 0,
7623                path: BString::from(b"tracked.txt"),
7624            }],
7625            extensions: Vec::new(),
7626            checksum: None,
7627        };
7628        std::thread::sleep(std::time::Duration::from_millis(1100));
7629        fs::write(
7630            layout.git_dir.join("index"),
7631            index
7632                .write_v2_sha1()
7633                .expect("test operation should succeed"),
7634        )
7635        .expect("test operation should succeed");
7636
7637        let changes = diff_name_status_index_worktree(&root, &layout.git_dir, ObjectFormat::Sha1)
7638            .expect("test operation should succeed");
7639        assert!(
7640            changes.is_empty(),
7641            "a clean non-racy stat match must reuse the cached index oid"
7642        );
7643        fs::remove_dir_all(root).expect("test operation should succeed");
7644    }
7645
7646    fn temp_root() -> PathBuf {
7647        let path = std::env::temp_dir().join(format!(
7648            "sley-diff-{}-{}",
7649            std::process::id(),
7650            TEMP_COUNTER.fetch_add(1, Ordering::Relaxed)
7651        ));
7652        fs::create_dir_all(&path).expect("test operation should succeed");
7653        path
7654    }
7655
7656    // ---- line diff / blob merge tests ---------------------------------------
7657
7658    fn merge_opts() -> MergeBlobOptions<'static> {
7659        MergeBlobOptions {
7660            ours_label: "ours",
7661            theirs_label: "theirs",
7662            base_label: "base",
7663            style: ConflictStyle::Merge,
7664        }
7665    }
7666
7667    #[test]
7668    fn split_lines_preserves_content_and_newlines() {
7669        let lines = split_lines(b"a\nb\nc\n");
7670        assert_eq!(lines.len(), 3);
7671        assert_eq!(lines[0].content, b"a\n");
7672        assert!(lines[0].has_newline);
7673        assert_eq!(lines[2].content, b"c\n");
7674        assert!(lines[2].has_newline);
7675        assert!(split_lines(b"").is_empty());
7676    }
7677
7678    #[test]
7679    fn split_lines_tracks_missing_final_newline() {
7680        let lines = split_lines(b"a\nb");
7681        assert_eq!(lines.len(), 2);
7682        assert!(lines[0].has_newline);
7683        assert!(!lines[1].has_newline);
7684        assert_eq!(lines[1].content, b"b");
7685        assert_eq!(lines[1].bytes_without_newline(), b"b");
7686        // A line that lost its newline must not compare equal to one that has it.
7687        let with_nl = split_lines(b"b\n");
7688        assert_ne!(lines[1], with_nl[0]);
7689    }
7690
7691    #[test]
7692    fn myers_replace_single_line() {
7693        let old = split_lines(b"a\nb\nc\n");
7694        let new = split_lines(b"a\nx\nc\n");
7695        assert_eq!(
7696            myers_diff_lines(&old, &new),
7697            vec![
7698                DiffOp::Equal(1),
7699                DiffOp::Delete(1),
7700                DiffOp::Insert(1),
7701                DiffOp::Equal(1),
7702            ]
7703        );
7704    }
7705
7706    #[test]
7707    fn myers_identical_is_single_equal() {
7708        let old = split_lines(b"a\nb\nc\n");
7709        let new = split_lines(b"a\nb\nc\n");
7710        assert_eq!(myers_diff_lines(&old, &new), vec![DiffOp::Equal(3)]);
7711    }
7712
7713    #[test]
7714    fn myers_pure_insert_and_delete() {
7715        let empty = split_lines(b"");
7716        let two = split_lines(b"a\nb\n");
7717        assert_eq!(myers_diff_lines(&empty, &two), vec![DiffOp::Insert(2)]);
7718        assert_eq!(myers_diff_lines(&two, &empty), vec![DiffOp::Delete(2)]);
7719
7720        let old = split_lines(b"a\nb\nc\nd\n");
7721        let new = split_lines(b"a\nc\nd\n");
7722        assert_eq!(
7723            myers_diff_lines(&old, &new),
7724            vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Equal(2)]
7725        );
7726    }
7727
7728    #[test]
7729    fn myers_reconstructs_new_and_is_minimal() {
7730        // Apply the script to `old` and confirm it yields `new`; also count edits.
7731        let old = split_lines(b"the\nquick\nbrown\nfox\n");
7732        let new = split_lines(b"the\nlazy\nbrown\ncat\n");
7733        let ops = myers_diff_lines(&old, &new);
7734        let mut oi = 0usize;
7735        let mut ni = 0usize;
7736        let mut edits = 0usize;
7737        let mut rebuilt: Vec<u8> = Vec::new();
7738        for op in &ops {
7739            match *op {
7740                DiffOp::Equal(n) => {
7741                    for _ in 0..n {
7742                        assert_eq!(old[oi], new[ni]);
7743                        rebuilt.extend_from_slice(old[oi].content);
7744                        oi += 1;
7745                        ni += 1;
7746                    }
7747                }
7748                DiffOp::Delete(n) => {
7749                    oi += n;
7750                    edits += n;
7751                }
7752                DiffOp::Insert(n) => {
7753                    for _ in 0..n {
7754                        rebuilt.extend_from_slice(new[ni].content);
7755                        ni += 1;
7756                    }
7757                    edits += n;
7758                }
7759            }
7760        }
7761        assert_eq!(rebuilt, b"the\nlazy\nbrown\ncat\n");
7762        // Two lines changed -> 2 deletes + 2 inserts is the minimal SES here.
7763        assert_eq!(edits, 4);
7764    }
7765
7766    #[test]
7767    fn merge_non_overlapping_changes_is_clean() {
7768        let base = b"a\nb\nc\nd\ne\n";
7769        let ours = b"A\nb\nc\nd\ne\n";
7770        let theirs = b"a\nb\nc\nd\nE\n";
7771        let result = merge_blobs(base, ours, theirs, &merge_opts());
7772        assert!(!result.conflicted);
7773        assert_eq!(result.content, b"A\nb\nc\nd\nE\n");
7774    }
7775
7776    #[test]
7777    fn merge_identical_changes_no_conflict() {
7778        let base = b"a\nb\nc\n";
7779        let ours = b"a\nX\nc\n";
7780        let theirs = b"a\nX\nc\n";
7781        let result = merge_blobs(base, ours, theirs, &merge_opts());
7782        assert!(!result.conflicted);
7783        assert_eq!(result.content, b"a\nX\nc\n");
7784    }
7785
7786    #[test]
7787    fn merge_overlapping_change_emits_exact_markers() {
7788        let base = b"a\nb\nc\n";
7789        let ours = b"a\nOURS\nc\n";
7790        let theirs = b"a\nTHEIRS\nc\n";
7791        let result = merge_blobs(base, ours, theirs, &merge_opts());
7792        assert!(result.conflicted);
7793        assert_eq!(
7794            result.content,
7795            b"a\n<<<<<<< ours\nOURS\n=======\nTHEIRS\n>>>>>>> theirs\nc\n".to_vec(),
7796        );
7797    }
7798
7799    #[test]
7800    fn merge_diff3_style_includes_base_section() {
7801        let base = b"a\nb\nc\n";
7802        let ours = b"a\nOURS\nc\n";
7803        let theirs = b"a\nTHEIRS\nc\n";
7804        let options = MergeBlobOptions {
7805            style: ConflictStyle::Diff3,
7806            ..merge_opts()
7807        };
7808        let result = merge_blobs(base, ours, theirs, &options);
7809        assert!(result.conflicted);
7810        assert_eq!(
7811            result.content,
7812            b"a\n<<<<<<< ours\nOURS\n||||||| base\nb\n=======\nTHEIRS\n>>>>>>> theirs\nc\n"
7813                .to_vec(),
7814        );
7815    }
7816
7817    #[test]
7818    fn merge_empty_label_omits_trailing_space() {
7819        let base = b"a\nb\nc\n";
7820        let ours = b"a\nOURS\nc\n";
7821        let theirs = b"a\nTHEIRS\nc\n";
7822        let options = MergeBlobOptions {
7823            ours_label: "",
7824            theirs_label: "",
7825            base_label: "",
7826            style: ConflictStyle::Merge,
7827        };
7828        let result = merge_blobs(base, ours, theirs, &options);
7829        assert!(result.conflicted);
7830        // No trailing space after the 7 marker chars when the label is empty.
7831        assert_eq!(
7832            result.content,
7833            b"a\n<<<<<<<\nOURS\n=======\nTHEIRS\n>>>>>>>\nc\n".to_vec(),
7834        );
7835    }
7836
7837    #[test]
7838    fn merge_add_add_empty_base_conflicts() {
7839        let result = merge_blobs(b"", b"x\ny\n", b"p\nq\n", &merge_opts());
7840        assert!(result.conflicted);
7841        assert_eq!(
7842            result.content,
7843            b"<<<<<<< ours\nx\ny\n=======\np\nq\n>>>>>>> theirs\n".to_vec(),
7844        );
7845    }
7846
7847    #[test]
7848    fn merge_add_add_empty_base_identical_is_clean() {
7849        let result = merge_blobs(b"", b"x\ny\n", b"x\ny\n", &merge_opts());
7850        assert!(!result.conflicted);
7851        assert_eq!(result.content, b"x\ny\n");
7852    }
7853
7854    #[test]
7855    fn merge_deletion_one_side_takes_deletion() {
7856        // ours deletes line b; theirs leaves it -> clean, deletion wins.
7857        let result = merge_blobs(b"a\nb\nc\n", b"a\nc\n", b"a\nb\nc\n", &merge_opts());
7858        assert!(!result.conflicted);
7859        assert_eq!(result.content, b"a\nc\n");
7860    }
7861
7862    #[test]
7863    fn merge_deletion_vs_modification_conflicts() {
7864        // ours deletes b; theirs modifies b -> conflict.
7865        let result = merge_blobs(b"a\nb\nc\n", b"a\nc\n", b"a\nB!\nc\n", &merge_opts());
7866        assert!(result.conflicted);
7867        // ours side of the conflict is empty (the line was deleted).
7868        assert_eq!(
7869            result.content,
7870            b"a\n<<<<<<< ours\n=======\nB!\n>>>>>>> theirs\nc\n".to_vec(),
7871        );
7872    }
7873
7874    #[test]
7875    fn merge_missing_final_newline_marker_starts_on_own_line() {
7876        // Both sides drop the trailing newline AND conflict at the end. The
7877        // closing marker section must still begin on its own line.
7878        let base = b"a\nb";
7879        let ours = b"a\nOURS";
7880        let theirs = b"a\nTHEIRS";
7881        let result = merge_blobs(base, ours, theirs, &merge_opts());
7882        assert!(result.conflicted);
7883        assert_eq!(
7884            result.content,
7885            b"a\n<<<<<<< ours\nOURS\n=======\nTHEIRS\n>>>>>>> theirs\n".to_vec(),
7886        );
7887    }
7888
7889    #[test]
7890    fn merge_clean_preserves_missing_final_newline() {
7891        // ours removes the trailing newline; theirs is unchanged -> ours wins,
7892        // and the result keeps the missing newline.
7893        let result = merge_blobs(b"a\nb\n", b"a\nb", b"a\nb\n", &merge_opts());
7894        assert!(!result.conflicted);
7895        assert_eq!(result.content, b"a\nb");
7896    }
7897
7898    #[test]
7899    fn merge_both_append_identical_tail_is_clean() {
7900        let result = merge_blobs(b"a\n", b"a\nz\n", b"a\nz\n", &merge_opts());
7901        assert!(!result.conflicted);
7902        assert_eq!(result.content, b"a\nz\n");
7903    }
7904
7905    #[test]
7906    fn merge_when_ours_equals_base_yields_theirs() {
7907        // Regression: a side that did not change must not suppress the other
7908        // side's edits anywhere in the file.
7909        let base = b"b\na\n";
7910        let theirs = b"b\nb\nc\na\nc\n";
7911        let result = merge_blobs(base, base, theirs, &merge_opts());
7912        assert!(!result.conflicted);
7913        assert_eq!(result.content, theirs.to_vec());
7914    }
7915    fn applied(outcome: ApplyOutcome) -> Vec<u8> {
7916        match outcome {
7917            ApplyOutcome::Applied(bytes) => bytes,
7918            ApplyOutcome::Rejected => panic!("expected Applied, got Rejected"),
7919        }
7920    }
7921
7922    #[test]
7923    fn parse_multi_file_patch() {
7924        let patch = b"\
7925diff --git a/one.txt b/one.txt
7926index aaaaaaa..bbbbbbb 100644
7927--- a/one.txt
7928+++ b/one.txt
7929@@ -1,3 +1,3 @@
7930 alpha
7931-beta
7932+BETA
7933 gamma
7934diff --git a/two.txt b/two.txt
7935index ccccccc..ddddddd 100644
7936--- a/two.txt
7937+++ b/two.txt
7938@@ -1,2 +1,3 @@
7939 first
7940+inserted
7941 second
7942";
7943        let patches = parse_unified_patch(patch).expect("test operation should succeed");
7944        assert_eq!(patches.len(), 2);
7945
7946        assert_eq!(patches[0].old_path.as_deref(), Some(b"one.txt".as_slice()));
7947        assert_eq!(patches[0].new_path.as_deref(), Some(b"one.txt".as_slice()));
7948        assert_eq!(patches[0].old_mode, None);
7949        assert_eq!(patches[0].hunks.len(), 1);
7950        let h = &patches[0].hunks[0];
7951        assert_eq!(
7952            (h.old_start, h.old_len, h.new_start, h.new_len),
7953            (1, 3, 1, 3)
7954        );
7955        assert_eq!(
7956            h.lines,
7957            vec![
7958                HunkLine::Context(b"alpha".to_vec()),
7959                HunkLine::Delete(b"beta".to_vec()),
7960                HunkLine::Insert(b"BETA".to_vec()),
7961                HunkLine::Context(b"gamma".to_vec()),
7962            ]
7963        );
7964
7965        assert_eq!(patches[1].new_path.as_deref(), Some(b"two.txt".as_slice()));
7966        assert_eq!(patches[1].hunks[0].new_len, 3);
7967    }
7968
7969    #[test]
7970    fn parse_default_hunk_range_length() {
7971        // `@@ -1 +1,2 @@` (no comma) means a length of 1 on the old side.
7972        let patch = b"\
7973--- a/x
7974+++ b/x
7975@@ -1 +1,2 @@
7976 line
7977+added
7978";
7979        let patches = parse_unified_patch(patch).expect("test operation should succeed");
7980        let h = &patches[0].hunks[0];
7981        assert_eq!(
7982            (h.old_start, h.old_len, h.new_start, h.new_len),
7983            (1, 1, 1, 2)
7984        );
7985    }
7986
7987    #[test]
7988    fn parse_hunk_header_before_file_errors() {
7989        let patch = b"@@ -1,1 +1,1 @@\n context\n";
7990        assert!(parse_unified_patch(patch).is_err());
7991    }
7992
7993    #[test]
7994    fn parse_mismatched_counts_errors() {
7995        // Header promises two old lines but only one is present.
7996        let patch = b"--- a/x\n+++ b/x\n@@ -1,2 +1,2 @@\n only\n+new\n";
7997        assert!(parse_unified_patch(patch).is_err());
7998    }
7999
8000    #[test]
8001    fn apply_clean_hunk() {
8002        let base = b"alpha\nbeta\ngamma\n";
8003        let patch = parse_unified_patch(
8004            b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
8005        )
8006        .expect("test operation should succeed");
8007        let out = applied(apply_file_patch(base, &patch[0]));
8008        assert_eq!(out, b"alpha\nBETA\ngamma\n");
8009    }
8010
8011    #[test]
8012    fn apply_with_line_offset() {
8013        // The hunk's recorded position (line 2) is a couple of lines above where
8014        // the matching context actually lives (line 4); the outward search must
8015        // find it. The hunk is NOT anchored at the file start (old_start > 1, so
8016        // no match_beginning) and has trailing context (`tail`, so no
8017        // match_end), which is exactly the shape a real drifted patch takes —
8018        // verified against `git apply` ("Hunk #1 succeeded at 4 (offset 2)").
8019        let base = b"pre1\npre2\npre3\nalpha\nbeta\ngamma\ntail\n";
8020        let patch = parse_unified_patch(
8021            b"--- a/x\n+++ b/x\n@@ -2,4 +2,4 @@\n alpha\n-beta\n+BETA\n gamma\n tail\n",
8022        )
8023        .expect("test operation should succeed");
8024        let out = applied(apply_file_patch(base, &patch[0]));
8025        assert_eq!(out, b"pre1\npre2\npre3\nalpha\nBETA\ngamma\ntail\n");
8026    }
8027
8028    #[test]
8029    fn apply_with_negative_line_offset() {
8030        // Recorded position is well past the real location; search backward.
8031        let base = b"alpha\nbeta\ngamma\n";
8032        let patch = parse_unified_patch(
8033            b"--- a/x\n+++ b/x\n@@ -50,3 +50,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
8034        )
8035        .expect("test operation should succeed");
8036        let out = applied(apply_file_patch(base, &patch[0]));
8037        assert_eq!(out, b"alpha\nBETA\ngamma\n");
8038    }
8039
8040    #[test]
8041    fn apply_multiple_hunks() {
8042        let base = b"a\nb\nc\nd\ne\nf\ng\nh\n";
8043        let patch = parse_unified_patch(
8044            b"--- a/x\n+++ b/x\n\
8045@@ -1,3 +1,3 @@\n a\n-b\n+B\n c\n\
8046@@ -6,3 +6,3 @@\n f\n-g\n+G\n h\n",
8047        )
8048        .expect("test operation should succeed");
8049        let out = applied(apply_file_patch(base, &patch[0]));
8050        assert_eq!(out, b"a\nB\nc\nd\ne\nf\nG\nh\n");
8051    }
8052
8053    #[test]
8054    fn reject_on_context_mismatch() {
8055        let base = b"alpha\nDIFFERENT\ngamma\n";
8056        let patch = parse_unified_patch(
8057            b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
8058        )
8059        .expect("test operation should succeed");
8060        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
8061    }
8062
8063    #[test]
8064    fn reject_when_match_end_required_but_not_at_eof() {
8065        // git's `apply.c`: a hunk with NO trailing context must match the END of
8066        // the file (`match_end`). Here the leading context (`tail`/`anchor`)
8067        // matches at the middle of the base, but there are further lines after
8068        // it, so the preimage does not reach EOF. git rejects this; the old
8069        // sley matcher wrongly applied it (duplicating the appended block). This
8070        // is the t4150-am cell-34 lever: rejection forces `am -3`'s 3-way path.
8071        let base = b"one\ntwo\nanchor\nalready\nappended\n";
8072        // Hunk: context `anchor`, then append `added1`/`added2`. No trailing
8073        // context => match_end. At line 3 (`anchor`) the preimage is just one
8074        // line and does not reach EOF, so it must be rejected.
8075        let patch =
8076            parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -3,1 +3,3 @@\n anchor\n+added1\n+added2\n")
8077                .expect("test operation should succeed");
8078        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
8079    }
8080
8081    #[test]
8082    fn append_at_eof_matches_when_context_reaches_end() {
8083        // The mirror of the rejection case: the same shape applies cleanly when
8084        // the matching context IS the last line of the file (preimage reaches
8085        // EOF), so `match_end` is satisfied.
8086        let base = b"one\ntwo\nanchor\n";
8087        let patch =
8088            parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -3,1 +3,3 @@\n anchor\n+added1\n+added2\n")
8089                .expect("test operation should succeed");
8090        let out = applied(apply_file_patch(base, &patch[0]));
8091        assert_eq!(out, b"one\ntwo\nanchor\nadded1\nadded2\n");
8092    }
8093
8094    #[test]
8095    fn reject_when_match_beginning_required_but_not_at_start() {
8096        // A hunk anchored at line 1 (`old_start <= 1`) must match the START of
8097        // the file (`match_beginning`). If the matching context only appears
8098        // later, git rejects rather than wandering to it.
8099        let base = b"junk\nalpha\nbeta\ngamma\n";
8100        let patch =
8101            parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,2 +1,3 @@\n alpha\n+INSERT\n beta\n")
8102                .expect("test operation should succeed");
8103        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
8104    }
8105
8106    #[test]
8107    fn no_default_fuzz_rejects_on_trailing_context_mismatch() {
8108        // `git apply` / `git am` keep `p_context = UINT_MAX` by default, so they
8109        // do NOT fuzz a hunk in by dropping context. Here the trailing context
8110        // line (`gamma`) differs from the base (`DIVERGED`), and because the
8111        // anchor is line 1 the hunk must match the beginning with its FULL
8112        // preimage. Verified against real `git apply`: this is rejected.
8113        let base = b"alpha\nbeta\nDIVERGED\n";
8114        let patch = parse_unified_patch(
8115            b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
8116        )
8117        .expect("test operation should succeed");
8118        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
8119    }
8120
8121    #[test]
8122    fn parse_and_apply_new_file() {
8123        let patch = parse_unified_patch(
8124            b"\
8125diff --git a/new.txt b/new.txt
8126new file mode 100644
8127index 0000000..1111111
8128--- /dev/null
8129+++ b/new.txt
8130@@ -0,0 +1,2 @@
8131+hello
8132+world
8133",
8134        )
8135        .expect("test operation should succeed");
8136        assert!(patches_first_is_new(&patch));
8137        assert_eq!(patch[0].old_path, None);
8138        assert_eq!(patch[0].new_path.as_deref(), Some(b"new.txt".as_slice()));
8139        assert_eq!(patch[0].new_mode, Some(0o100644));
8140        // Base is ignored for a new file.
8141        let out = applied(apply_file_patch(b"garbage that is ignored", &patch[0]));
8142        assert_eq!(out, b"hello\nworld\n");
8143    }
8144
8145    fn patches_first_is_new(patches: &[FilePatch]) -> bool {
8146        patches.first().map(|p| p.is_new).unwrap_or(false)
8147    }
8148
8149    #[test]
8150    fn parse_and_apply_delete_file() {
8151        let patch = parse_unified_patch(
8152            b"\
8153diff --git a/gone.txt b/gone.txt
8154deleted file mode 100644
8155index 1111111..0000000
8156--- a/gone.txt
8157+++ /dev/null
8158@@ -1,2 +0,0 @@
8159-hello
8160-world
8161",
8162        )
8163        .expect("test operation should succeed");
8164        assert!(patch[0].is_delete);
8165        assert_eq!(patch[0].old_path.as_deref(), Some(b"gone.txt".as_slice()));
8166        assert_eq!(patch[0].new_path, None);
8167        assert_eq!(patch[0].old_mode, Some(0o100644));
8168        let out = applied(apply_file_patch(b"hello\nworld\n", &patch[0]));
8169        assert_eq!(out, b"");
8170    }
8171
8172    #[test]
8173    fn parse_rename_headers() {
8174        let patch = parse_unified_patch(
8175            b"\
8176diff --git a/old/name.txt b/new/name.txt
8177similarity index 100%
8178rename from old/name.txt
8179rename to new/name.txt
8180",
8181        )
8182        .expect("test operation should succeed");
8183        assert!(patch[0].is_rename);
8184        assert_eq!(
8185            patch[0].old_path.as_deref(),
8186            Some(b"old/name.txt".as_slice())
8187        );
8188        assert_eq!(
8189            patch[0].new_path.as_deref(),
8190            Some(b"new/name.txt".as_slice())
8191        );
8192        assert!(patch[0].hunks.is_empty());
8193    }
8194
8195    #[test]
8196    fn parse_mode_change_headers() {
8197        let patch = parse_unified_patch(
8198            b"\
8199diff --git a/script.sh b/script.sh
8200old mode 100644
8201new mode 100755
8202",
8203        )
8204        .expect("test operation should succeed");
8205        assert_eq!(patch[0].old_mode, Some(0o100644));
8206        assert_eq!(patch[0].new_mode, Some(0o100755));
8207        assert!(!patch[0].is_new);
8208        assert!(!patch[0].is_delete);
8209    }
8210
8211    #[test]
8212    fn no_final_newline_base_preserved_when_untouched() {
8213        // The change is on line 1; the final line has no newline and is not
8214        // modified, so its no-newline state must survive. This uses the patch
8215        // shape real `git diff` emits for such a change — `@@ -1,3 +1,3 @@` with
8216        // the two unchanged lines as trailing context (the `\ No newline`
8217        // marker rides the last context line). A hand-rolled `@@ -1,1 +1,1 @@`
8218        // with NO trailing context would (correctly) be rejected by git, since
8219        // a no-trailing-context hunk anchored at line 1 must span the whole
8220        // file (`match_beginning` && `match_end`).
8221        let base = b"alpha\nbeta\nnotail"; // "notail" has no trailing \n
8222        let patch = parse_unified_patch(
8223            b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n-alpha\n+ALPHA\n beta\n notail\n\\ No newline at end of file\n",
8224        )
8225        .expect("test operation should succeed");
8226        let out = applied(apply_file_patch(base, &patch[0]));
8227        assert_eq!(out, b"ALPHA\nbeta\nnotail");
8228    }
8229
8230    #[test]
8231    fn no_final_newline_added_by_patch() {
8232        // Old file ends with a newline; patch rewrites the last line to one
8233        // without a trailing newline.
8234        let base = b"alpha\nbeta\n";
8235        let patch = parse_unified_patch(
8236            b"--- a/x\n+++ b/x\n@@ -2,1 +2,1 @@\n-beta\n+beta-notail\n\\ No newline at end of file\n",
8237        )
8238        .expect("test operation should succeed");
8239        assert!(patch[0].hunks[0].new_no_newline);
8240        assert!(!patch[0].hunks[0].old_no_newline);
8241        let out = applied(apply_file_patch(base, &patch[0]));
8242        assert_eq!(out, b"alpha\nbeta-notail");
8243    }
8244
8245    #[test]
8246    fn no_final_newline_in_base_matched_and_kept() {
8247        // Both sides lack a trailing newline; context match must require the
8248        // base's final line to itself be newline-free.
8249        let base = b"alpha\nbeta"; // no trailing newline
8250        let patch = parse_unified_patch(
8251            b"--- a/x\n+++ b/x\n@@ -1,2 +1,2 @@\n-alpha\n+ALPHA\n beta\n\\ No newline at end of file\n",
8252        )
8253        .expect("test operation should succeed");
8254        assert!(patch[0].hunks[0].old_no_newline);
8255        assert!(patch[0].hunks[0].new_no_newline);
8256        let out = applied(apply_file_patch(base, &patch[0]));
8257        assert_eq!(out, b"ALPHA\nbeta");
8258    }
8259
8260    #[test]
8261    fn no_final_newline_mismatch_rejected() {
8262        // Patch asserts the old file has no trailing newline, but the base does.
8263        // That must be rejected rather than silently mis-applied.
8264        let base = b"alpha\nbeta\n"; // HAS trailing newline
8265        let patch = parse_unified_patch(
8266            b"--- a/x\n+++ b/x\n@@ -2,1 +2,1 @@\n-beta\n\\ No newline at end of file\n+beta2\n",
8267        )
8268        .expect("test operation should succeed");
8269        assert!(patch[0].hunks[0].old_no_newline);
8270        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
8271    }
8272
8273    #[test]
8274    fn delete_with_no_final_newline() {
8275        // Deleting the entire content of a file that had no trailing newline.
8276        let base = b"only line no newline";
8277        let patch = parse_unified_patch(
8278            b"--- a/x\n+++ /dev/null\n@@ -1,1 +0,0 @@\n-only line no newline\n\\ No newline at end of file\n",
8279        )
8280        .expect("test operation should succeed");
8281        assert!(patch[0].is_delete);
8282        let out = applied(apply_file_patch(base, &patch[0]));
8283        assert_eq!(out, b"");
8284    }
8285
8286    #[test]
8287    fn apply_pure_insertion_hunk() {
8288        let base = b"first\nsecond\n";
8289        let patch =
8290            parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,2 +1,3 @@\n first\n+middle\n second\n")
8291                .expect("test operation should succeed");
8292        let out = applied(apply_file_patch(base, &patch[0]));
8293        assert_eq!(out, b"first\nmiddle\nsecond\n");
8294    }
8295
8296    #[test]
8297    fn apply_pure_deletion_hunk() {
8298        let base = b"first\nmiddle\nsecond\n";
8299        let patch =
8300            parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,3 +1,2 @@\n first\n-middle\n second\n")
8301                .expect("test operation should succeed");
8302        let out = applied(apply_file_patch(base, &patch[0]));
8303        assert_eq!(out, b"first\nsecond\n");
8304    }
8305
8306    #[test]
8307    fn apply_then_reparse_round_trip() {
8308        // Hand-written unified diff -> apply -> the result is exactly the new
8309        // file content the diff describes. Re-parsing the same patch yields an
8310        // identical structure (idempotent parse).
8311        let base = b"l1\nl2\nl3\nl4\nl5\n";
8312        let text = b"--- a/f\n+++ b/f\n@@ -2,3 +2,4 @@\n l2\n-l3\n+L3\n+L3b\n l4\n";
8313        let p1 = parse_unified_patch(text).expect("test operation should succeed");
8314        let p2 = parse_unified_patch(text).expect("test operation should succeed");
8315        assert_eq!(p1, p2);
8316        let out = applied(apply_file_patch(base, &p1[0]));
8317        assert_eq!(out, b"l1\nl2\nL3\nL3b\nl4\nl5\n");
8318    }
8319
8320    #[test]
8321    fn empty_context_line_without_trailing_space() {
8322        // Some transports strip the single leading space from blank context
8323        // lines; the parser treats a wholly empty body line as blank context.
8324        let base = b"a\n\nb\n";
8325        let patch = parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n a\n\n-b\n+B\n")
8326            .expect("test operation should succeed");
8327        assert_eq!(patch[0].hunks[0].lines[1], HunkLine::Context(Vec::new()));
8328        let out = applied(apply_file_patch(base, &patch[0]));
8329        assert_eq!(out, b"a\n\nB\n");
8330    }
8331
8332    #[test]
8333    fn split_blob_lines_handles_edge_cases() {
8334        assert!(split_blob_lines(b"").is_empty());
8335        let single = split_blob_lines(b"abc");
8336        assert_eq!(single.len(), 1);
8337        assert!(single[0].no_newline);
8338        let terminated = split_blob_lines(b"abc\n");
8339        assert_eq!(terminated.len(), 1);
8340        assert!(!terminated[0].no_newline);
8341        let blank_then_eof = split_blob_lines(b"x\n");
8342        assert_eq!(blank_then_eof.len(), 1);
8343    }
8344
8345    // ---- content similarity & inexact rename/copy detection -----------------
8346
8347    #[test]
8348    fn similarity_identical_and_empty_conventions() {
8349        // Byte-identical blobs are always 100% similar.
8350        assert_eq!(blob_similarity(b"hello\nworld\n", b"hello\nworld\n"), 100);
8351        // Two empty blobs are identical -> 100.
8352        assert_eq!(blob_similarity(b"", b""), 100);
8353        // An empty blob vs a non-empty one shares nothing -> 0.
8354        assert_eq!(blob_similarity(b"", b"hello\n"), 0);
8355        assert_eq!(blob_similarity(b"hello\n", b""), 0);
8356    }
8357
8358    #[test]
8359    fn similarity_one_changed_line_is_75_and_symmetric() {
8360        // A = one/two/three/four/five (bytes: 4+4+6+5+5 = 24).
8361        // B changes "three\n" -> "THREE\n" (same total size 24).
8362        // Common spans: one,two,four,five = 4+4+5+5 = 18 bytes.
8363        // score = round(18 * 100 / max(24, 24)) = round(75) = 75.
8364        // Verified against `git diff -M` which reports "similarity index 75%".
8365        let a = b"one\ntwo\nthree\nfour\nfive\n";
8366        let b = b"one\ntwo\nTHREE\nfour\nfive\n";
8367        assert_eq!(blob_similarity(a, b), 75);
8368        // The metric is symmetric.
8369        assert_eq!(blob_similarity(b, a), 75);
8370    }
8371
8372    #[test]
8373    fn similarity_one_edited_line_of_three_is_66_not_67() {
8374        // "a\nb\nc\n" -> "a\nB\nc\n": one of three lines edited (4 common bytes of
8375        // 6). git reports `R066` / "similarity index 66%". git's two-step integer
8376        // math is `4 * 60000 / 6 = 40000`, then `40000 * 100 / 60000 = 66` (both
8377        // truncated); a single rounded `4 * 100 / 6` would give 67. This pins the
8378        // MAX_SCORE-based rounding so it stays aligned with diffcore-rename.
8379        assert_eq!(blob_similarity(b"a\nb\nc\n", b"a\nB\nc\n"), 66);
8380        assert_eq!(blob_similarity(b"a\nB\nc\n", b"a\nb\nc\n"), 66);
8381    }
8382
8383    #[test]
8384    fn similarity_small_append_is_88() {
8385        // A: 8 lines totalling 46 bytes. B: same 8 lines + "ADDED\n" (6 bytes) = 52.
8386        // Common = the 46 original bytes; score = round(46*100/52) = 88.
8387        // Verified against `git diff -M` -> "similarity index 88%".
8388        let a = b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\n";
8389        let b = b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\nADDED\n";
8390        assert_eq!(blob_similarity(a, b), 88);
8391    }
8392
8393    #[test]
8394    fn similarity_half_rewrite_is_50() {
8395        // 6 lines, last 3 rewritten. Common = l1,l2,l3 = 9 bytes; total each 18.
8396        // score = round(9*100/18) = 50. Verified against `git diff -M`.
8397        let a = b"l1\nl2\nl3\nl4\nl5\nl6\n";
8398        let b = b"l1\nl2\nl3\nX4\nX5\nX6\n";
8399        assert_eq!(blob_similarity(a, b), 50);
8400    }
8401
8402    // ---- tree-diff based inexact detection ----------------------------------
8403
8404    /// Write a blob and return its oid.
8405    fn write_blob(db: &mut FileObjectDatabase, bytes: &[u8]) -> ObjectId {
8406        db.write_object(EncodedObject::new(ObjectType::Blob, bytes.to_vec()))
8407            .expect("test operation should succeed")
8408    }
8409
8410    /// Write a tree from `(name, mode, oid)` entries (sorted by name as git
8411    /// requires) and return its oid.
8412    fn write_tree(db: &mut FileObjectDatabase, entries: &[(&[u8], u32, ObjectId)]) -> ObjectId {
8413        let mut tree_entries: Vec<TreeEntry> = entries
8414            .iter()
8415            .map(|(name, mode, oid)| TreeEntry {
8416                mode: *mode,
8417                name: BString::from(*name),
8418                oid: *oid,
8419            })
8420            .collect();
8421        tree_entries.sort_by(|a, b| a.name.cmp(&b.name));
8422        let tree = Tree {
8423            entries: tree_entries,
8424        };
8425        db.write_object(EncodedObject::new(ObjectType::Tree, tree.write()))
8426            .expect("test operation should succeed")
8427    }
8428
8429    #[test]
8430    fn inexact_rename_detected_with_plausible_score() {
8431        // a.txt (one changed line vs the new b.txt) should be detected as a
8432        // rename with score 75 (see `similarity_one_changed_line_is_75`).
8433        let root = temp_root();
8434        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
8435            .expect("test operation should succeed");
8436        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
8437
8438        let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
8439        let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
8440        let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
8441        let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
8442
8443        let opts = RenameDetectionOptions {
8444            base: DiffNameStatusOptions {
8445                detect_renames: true,
8446                detect_copies: false,
8447                find_copies_harder: false,
8448                rename_empty: true,
8449            },
8450            detect_inexact: true,
8451            rename_threshold: DEFAULT_RENAME_THRESHOLD,
8452            copy_threshold: DEFAULT_RENAME_THRESHOLD,
8453        };
8454        let entries = diff_name_status_trees_with_rename_options(
8455            &db,
8456            ObjectFormat::Sha1,
8457            &left,
8458            &right,
8459            opts,
8460        )
8461        .expect("test operation should succeed");
8462
8463        assert_eq!(
8464            entries.len(),
8465            1,
8466            "expected a single rename entry: {entries:?}"
8467        );
8468        assert_eq!(entries[0].status, NameStatus::Renamed(75));
8469        assert_eq!(
8470            entries[0].old_path.as_ref().map(|p| p.as_bytes()),
8471            Some(b"a.txt".as_slice())
8472        );
8473        assert_eq!(entries[0].path, b"b.txt");
8474        assert_eq!(entries[0].line(), "R075\ta.txt\tb.txt");
8475        fs::remove_dir_all(root).expect("test operation should succeed");
8476    }
8477
8478    #[test]
8479    fn inexact_rename_below_threshold_not_detected() {
8480        // A half-rewrite scores 50%. With a 60% threshold it must NOT be paired;
8481        // the change shows up as a separate Add + Delete instead.
8482        let root = temp_root();
8483        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
8484            .expect("test operation should succeed");
8485        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
8486
8487        let old = write_blob(&mut db, b"l1\nl2\nl3\nl4\nl5\nl6\n");
8488        let new = write_blob(&mut db, b"l1\nl2\nl3\nX4\nX5\nX6\n");
8489        let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
8490        let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
8491
8492        let opts = RenameDetectionOptions {
8493            base: DiffNameStatusOptions {
8494                detect_renames: true,
8495                detect_copies: false,
8496                find_copies_harder: false,
8497                rename_empty: true,
8498            },
8499            detect_inexact: true,
8500            rename_threshold: 60,
8501            copy_threshold: 60,
8502        };
8503        let entries = diff_name_status_trees_with_rename_options(
8504            &db,
8505            ObjectFormat::Sha1,
8506            &left,
8507            &right,
8508            opts,
8509        )
8510        .expect("test operation should succeed");
8511
8512        let statuses: Vec<_> = entries.iter().map(|e| e.status).collect();
8513        assert!(
8514            statuses.contains(&NameStatus::Added) && statuses.contains(&NameStatus::Deleted),
8515            "expected separate add/delete below threshold, got {entries:?}"
8516        );
8517        assert!(
8518            !statuses.iter().any(|s| matches!(s, NameStatus::Renamed(_))),
8519            "no rename should be reported below threshold: {entries:?}"
8520        );
8521
8522        // Sanity: lowering the threshold to 50 *does* detect it (boundary is
8523        // inclusive), and the score is exactly 50.
8524        let opts_low = RenameDetectionOptions {
8525            rename_threshold: 50,
8526            ..opts
8527        };
8528        let entries_low = diff_name_status_trees_with_rename_options(
8529            &db,
8530            ObjectFormat::Sha1,
8531            &left,
8532            &right,
8533            opts_low,
8534        )
8535        .expect("test operation should succeed");
8536        assert_eq!(entries_low.len(), 1);
8537        assert_eq!(entries_low[0].status, NameStatus::Renamed(50));
8538        fs::remove_dir_all(root).expect("test operation should succeed");
8539    }
8540
8541    #[test]
8542    fn exact_rename_scores_100_and_takes_priority() {
8543        // Identical content moved to a new path is an exact rename: score 100,
8544        // detected even with inexact disabled, and still 100 with it enabled.
8545        let root = temp_root();
8546        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
8547            .expect("test operation should succeed");
8548        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
8549
8550        let oid = write_blob(&mut db, b"identical\ncontent\nhere\n");
8551        let left = write_tree(&mut db, &[(b"old.txt", 0o100644, oid)]);
8552        let right = write_tree(&mut db, &[(b"new.txt", 0o100644, oid)]);
8553
8554        for inexact in [false, true] {
8555            let opts = RenameDetectionOptions {
8556                base: DiffNameStatusOptions {
8557                    detect_renames: true,
8558                    detect_copies: false,
8559                    find_copies_harder: false,
8560                    rename_empty: true,
8561                },
8562                detect_inexact: inexact,
8563                rename_threshold: DEFAULT_RENAME_THRESHOLD,
8564                copy_threshold: DEFAULT_RENAME_THRESHOLD,
8565            };
8566            let entries = diff_name_status_trees_with_rename_options(
8567                &db,
8568                ObjectFormat::Sha1,
8569                &left,
8570                &right,
8571                opts,
8572            )
8573            .expect("test operation should succeed");
8574            assert_eq!(entries.len(), 1, "inexact={inexact}: {entries:?}");
8575            assert_eq!(entries[0].status, NameStatus::Renamed(100));
8576            assert_eq!(
8577                entries[0].old_path.as_ref().map(|p| p.as_bytes()),
8578                Some(b"old.txt".as_slice())
8579            );
8580            assert_eq!(entries[0].path, b"new.txt");
8581        }
8582        fs::remove_dir_all(root).expect("test operation should succeed");
8583    }
8584
8585    #[test]
8586    fn inexact_copy_detected_with_score() {
8587        // orig.txt is unchanged and a near-copy (one line differs, 80% similar)
8588        // is added. With copy detection + find_copies_harder + inexact, the new
8589        // file is reported as a copy with score 80 (matches `git diff -C
8590        // --find-copies-harder`).
8591        let root = temp_root();
8592        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
8593            .expect("test operation should succeed");
8594        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
8595
8596        let orig = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\neee\n");
8597        let copy = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\nEEE\n");
8598        let left = write_tree(&mut db, &[(b"orig.txt", 0o100644, orig.clone())]);
8599        let right = write_tree(
8600            &mut db,
8601            &[(b"orig.txt", 0o100644, orig), (b"copy.txt", 0o100644, copy)],
8602        );
8603
8604        let opts = RenameDetectionOptions {
8605            base: DiffNameStatusOptions {
8606                detect_renames: true,
8607                detect_copies: true,
8608                find_copies_harder: true,
8609                rename_empty: true,
8610            },
8611            detect_inexact: true,
8612            rename_threshold: DEFAULT_RENAME_THRESHOLD,
8613            copy_threshold: DEFAULT_RENAME_THRESHOLD,
8614        };
8615        let entries = diff_name_status_trees_with_rename_options(
8616            &db,
8617            ObjectFormat::Sha1,
8618            &left,
8619            &right,
8620            opts,
8621        )
8622        .expect("test operation should succeed");
8623
8624        let copy_entry = entries
8625            .iter()
8626            .find(|e| e.path == b"copy.txt")
8627            .unwrap_or_else(|| panic!("no copy.txt entry: {entries:?}"));
8628        assert_eq!(copy_entry.status, NameStatus::Copied(80));
8629        assert_eq!(
8630            copy_entry.old_path.as_ref().map(|p| p.as_bytes()),
8631            Some(b"orig.txt".as_slice())
8632        );
8633        // The source remains present (copies do not consume the original).
8634        assert!(
8635            entries.iter().all(|e| e.status != NameStatus::Deleted),
8636            "copy must not delete the source: {entries:?}"
8637        );
8638        fs::remove_dir_all(root).expect("test operation should succeed");
8639    }
8640
8641    #[test]
8642    fn inexact_rename_with_small_edit_scores_88() {
8643        // A rename that also appends a single line scores 88% (see
8644        // `similarity_small_append_is_88`).
8645        let root = temp_root();
8646        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
8647            .expect("test operation should succeed");
8648        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
8649
8650        let old = write_blob(
8651            &mut db,
8652            b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\n",
8653        );
8654        let new = write_blob(
8655            &mut db,
8656            b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\nADDED\n",
8657        );
8658        let left = write_tree(&mut db, &[(b"src.txt", 0o100644, old)]);
8659        let right = write_tree(&mut db, &[(b"dst.txt", 0o100644, new)]);
8660
8661        let opts = RenameDetectionOptions::inexact(DiffNameStatusOptions {
8662            detect_renames: true,
8663            detect_copies: false,
8664            find_copies_harder: false,
8665            rename_empty: true,
8666        });
8667        let entries = diff_name_status_trees_with_rename_options(
8668            &db,
8669            ObjectFormat::Sha1,
8670            &left,
8671            &right,
8672            opts,
8673        )
8674        .expect("test operation should succeed");
8675
8676        assert_eq!(entries.len(), 1, "{entries:?}");
8677        assert_eq!(entries[0].status, NameStatus::Renamed(88));
8678        assert_eq!(
8679            entries[0].old_path.as_ref().map(|p| p.as_bytes()),
8680            Some(b"src.txt".as_slice())
8681        );
8682        assert_eq!(entries[0].path, b"dst.txt");
8683        fs::remove_dir_all(root).expect("test operation should succeed");
8684    }
8685
8686    #[test]
8687    fn inexact_disabled_default_preserves_exact_only_behavior() {
8688        // With RenameDetectionOptions::default() (detect_inexact == false), a
8689        // similar-but-not-identical pair is NOT a rename — identical to the
8690        // legacy exact-only path. Defaults must not silently turn on inexact.
8691        assert!(!RenameDetectionOptions::default().detect_inexact);
8692        assert_eq!(
8693            RenameDetectionOptions::default().rename_threshold,
8694            DEFAULT_RENAME_THRESHOLD
8695        );
8696
8697        let root = temp_root();
8698        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
8699            .expect("test operation should succeed");
8700        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
8701
8702        let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
8703        let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
8704        let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
8705        let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
8706
8707        let entries = diff_name_status_trees_with_rename_options(
8708            &db,
8709            ObjectFormat::Sha1,
8710            &left,
8711            &right,
8712            RenameDetectionOptions::default(),
8713        )
8714        .expect("test operation should succeed");
8715        let statuses: Vec<_> = entries.iter().map(|e| e.status).collect();
8716        assert!(statuses.contains(&NameStatus::Added));
8717        assert!(statuses.contains(&NameStatus::Deleted));
8718        assert!(!statuses.iter().any(|s| matches!(s, NameStatus::Renamed(_))));
8719        fs::remove_dir_all(root).expect("test operation should succeed");
8720    }
8721
8722    // ---- patience / histogram diff tests ------------------------------------
8723
8724    /// Apply an edit script to `old` and return the reconstructed `new` bytes.
8725    ///
8726    /// Panics (test-only) if the script ever references a line out of range or
8727    /// claims a line is `Equal` when the corresponding `old`/`new` lines differ
8728    /// — that is exactly the invariant a correct LCS diff must uphold.
8729    fn apply_ops(old: &[DiffLine<'_>], new: &[DiffLine<'_>], ops: &[DiffOp]) -> Vec<u8> {
8730        let mut oi = 0usize;
8731        let mut ni = 0usize;
8732        let mut rebuilt: Vec<u8> = Vec::new();
8733        for op in ops {
8734            match *op {
8735                DiffOp::Equal(n) => {
8736                    for _ in 0..n {
8737                        // Equal must mean genuinely-equal lines (LCS-correct).
8738                        assert_eq!(old[oi], new[ni], "Equal op covered unequal lines");
8739                        rebuilt.extend_from_slice(old[oi].content);
8740                        oi += 1;
8741                        ni += 1;
8742                    }
8743                }
8744                DiffOp::Delete(n) => oi += n,
8745                DiffOp::Insert(n) => {
8746                    for _ in 0..n {
8747                        rebuilt.extend_from_slice(new[ni].content);
8748                        ni += 1;
8749                    }
8750                }
8751            }
8752        }
8753        // The script must consume every line of both sides exactly once.
8754        assert_eq!(oi, old.len(), "script did not consume all of old");
8755        assert_eq!(ni, new.len(), "script did not consume all of new");
8756        rebuilt
8757    }
8758
8759    /// Assert that `ops` is a valid LCS-correct script: it reconstructs `new`
8760    /// from `old`, and consecutive ops are coalesced (no two same-kind in a row).
8761    fn assert_valid_script(old_bytes: &[u8], new_bytes: &[u8], ops: &[DiffOp]) {
8762        let old = split_lines(old_bytes);
8763        let new = split_lines(new_bytes);
8764        let rebuilt = apply_ops(&old, &new, ops);
8765        assert_eq!(rebuilt, new_bytes, "script did not rebuild new");
8766        for pair in ops.windows(2) {
8767            let same_kind = matches!(
8768                (pair[0], pair[1]),
8769                (DiffOp::Equal(_), DiffOp::Equal(_))
8770                    | (DiffOp::Delete(_), DiffOp::Delete(_))
8771                    | (DiffOp::Insert(_), DiffOp::Insert(_))
8772            );
8773            assert!(!same_kind, "ops not coalesced: {:?}", ops);
8774        }
8775    }
8776
8777    /// Run all three real algorithms over a byte pair and assert each produces a
8778    /// valid, coalesced, LCS-correct script.
8779    fn check_all_algorithms(old_bytes: &[u8], new_bytes: &[u8]) {
8780        let old = split_lines(old_bytes);
8781        let new = split_lines(new_bytes);
8782        for algo in [
8783            DiffAlgorithm::Myers,
8784            DiffAlgorithm::Minimal,
8785            DiffAlgorithm::Patience,
8786            DiffAlgorithm::Histogram,
8787        ] {
8788            let ops = diff_lines_with_algorithm(&old, &new, algo);
8789            assert_valid_script(old_bytes, new_bytes, &ops);
8790        }
8791    }
8792
8793    #[test]
8794    fn patience_and_histogram_match_myers_on_simple_cases() {
8795        // For localized single-line edits with no repeated lines, all three
8796        // algorithms agree with the canonical Myers script.
8797        let cases: &[(&[u8], &[u8], Vec<DiffOp>)] = &[
8798            (
8799                b"a\nb\nc\n",
8800                b"a\nx\nc\n",
8801                vec![
8802                    DiffOp::Equal(1),
8803                    DiffOp::Delete(1),
8804                    DiffOp::Insert(1),
8805                    DiffOp::Equal(1),
8806                ],
8807            ),
8808            (b"a\nb\nc\n", b"a\nb\nc\n", vec![DiffOp::Equal(3)]),
8809            (b"", b"a\nb\n", vec![DiffOp::Insert(2)]),
8810            (b"a\nb\n", b"", vec![DiffOp::Delete(2)]),
8811            (
8812                b"a\nb\nc\nd\n",
8813                b"a\nc\nd\n",
8814                vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Equal(2)],
8815            ),
8816        ];
8817        for (old_bytes, new_bytes, expected) in cases {
8818            let old = split_lines(old_bytes);
8819            let new = split_lines(new_bytes);
8820            assert_eq!(&patience_diff_lines(&old, &new), expected);
8821            assert_eq!(&histogram_diff_lines(&old, &new), expected);
8822            assert_eq!(&myers_diff_lines(&old, &new), expected);
8823        }
8824    }
8825
8826    #[test]
8827    fn patience_handles_both_empty() {
8828        let empty = split_lines(b"");
8829        assert!(patience_diff_lines(&empty, &empty).is_empty());
8830        assert!(histogram_diff_lines(&empty, &empty).is_empty());
8831    }
8832
8833    #[test]
8834    fn patience_aligns_unique_anchors_across_moved_block() {
8835        // Reordering two unique blocks: patience anchors on the unique lines and
8836        // produces a delete-then-insert (or insert-then-delete) that still
8837        // reconstructs `new`. Validity is the contract; exact shape may differ
8838        // from Myers, so we only assert reconstruction here.
8839        check_all_algorithms(
8840            b"alpha\nbeta\ngamma\ndelta\n",
8841            b"gamma\ndelta\nalpha\nbeta\n",
8842        );
8843    }
8844
8845    #[test]
8846    fn histogram_differs_from_myers_keeping_block_contiguous() {
8847        // A case where histogram diverges from Myers. With old = "b a" and a new
8848        // that surrounds an intact "b a" with inserted "b" lines, Myers splits
8849        // the common run into two single-line Equals (matching the leading and
8850        // trailing `b`/`a` separately), while histogram anchors on the rare line
8851        // and keeps the original two lines together as one Equal(2) block.
8852        let old = b"b\na\n";
8853        let new = b"a\nb\nb\na\nb\n";
8854        let old_l = split_lines(old);
8855        let new_l = split_lines(new);
8856
8857        let myers = myers_diff_lines(&old_l, &new_l);
8858        let histogram = histogram_diff_lines(&old_l, &new_l);
8859
8860        // All variants must reconstruct `new`.
8861        assert_valid_script(old, new, &myers);
8862        assert_valid_script(old, new, &histogram);
8863
8864        // Exact, pinned shapes: Myers interleaves single-line equals; histogram
8865        // keeps "b\na\n" contiguous.
8866        assert_eq!(
8867            myers,
8868            vec![
8869                DiffOp::Insert(1),
8870                DiffOp::Equal(1),
8871                DiffOp::Insert(1),
8872                DiffOp::Equal(1),
8873                DiffOp::Insert(1),
8874            ]
8875        );
8876        assert_eq!(
8877            histogram,
8878            vec![DiffOp::Insert(2), DiffOp::Equal(2), DiffOp::Insert(1)]
8879        );
8880        // The contract the task calls out: histogram differs from Myers here.
8881        assert_ne!(myers, histogram);
8882    }
8883
8884    #[test]
8885    fn patience_differs_from_myers_on_repeated_lines() {
8886        // A case where patience diverges from Myers. old = "b a", new = "a a b".
8887        // Myers deletes the leading `b` and appends; patience anchors on the
8888        // single unique-in-both line `a`... but `a` occurs twice in `new`, so it
8889        // is NOT unique there; patience instead falls through to its recursive
8890        // structure and produces the mirror script. Both reconstruct `new`.
8891        let old = b"b\na\n";
8892        let new = b"a\na\nb\n";
8893        let old_l = split_lines(old);
8894        let new_l = split_lines(new);
8895
8896        let myers = myers_diff_lines(&old_l, &new_l);
8897        let patience = patience_diff_lines(&old_l, &new_l);
8898
8899        assert_valid_script(old, new, &myers);
8900        assert_valid_script(old, new, &patience);
8901
8902        assert_eq!(
8903            myers,
8904            vec![DiffOp::Delete(1), DiffOp::Equal(1), DiffOp::Insert(2)]
8905        );
8906        assert_eq!(
8907            patience,
8908            vec![DiffOp::Insert(2), DiffOp::Equal(1), DiffOp::Delete(1)]
8909        );
8910        assert_ne!(myers, patience);
8911    }
8912
8913    #[test]
8914    fn realistic_function_insertion_all_valid() {
8915        // A more lifelike example: a new function is inserted ahead of an
8916        // existing one that shares structural lines ("}", blank line). We don't
8917        // pin exact shapes (they depend on trim interactions) but every
8918        // algorithm must produce a valid LCS-correct script.
8919        let old = b"int f() {\n    return 1;\n}\n";
8920        let new = b"int g() {\n    return 2;\n}\n\nint f() {\n    return 1;\n}\n";
8921        check_all_algorithms(old, new);
8922    }
8923
8924    #[test]
8925    fn histogram_anchors_on_rare_line_when_no_unique_line_exists() {
8926        // No line is globally unique on both sides (every distinct line repeats
8927        // on at least one side), so plain patience would fall straight to Myers.
8928        // Histogram still anchors on the least-frequent shared line. We assert
8929        // both produce valid, reconstructing scripts.
8930        check_all_algorithms(b"x\nx\nmid\nx\nx\n", b"x\nmid\nx\nx\nx\n");
8931        check_all_algorithms(
8932            b"dup\ndup\nrare\ndup\ndup\n",
8933            b"dup\nrare\ndup\ndup\ndup\ndup\n",
8934        );
8935    }
8936
8937    #[test]
8938    fn all_algorithms_treat_missing_final_newline_as_change() {
8939        // "b" (no newline) vs "b\n" is a real change for every algorithm.
8940        let old = split_lines(b"a\nb");
8941        let new = split_lines(b"a\nb\n");
8942        for algo in [
8943            DiffAlgorithm::Myers,
8944            DiffAlgorithm::Minimal,
8945            DiffAlgorithm::Patience,
8946            DiffAlgorithm::Histogram,
8947        ] {
8948            let ops = diff_lines_with_algorithm(&old, &new, algo);
8949            assert_eq!(
8950                ops,
8951                vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Insert(1)],
8952                "algorithm {:?} mishandled missing final newline",
8953                algo
8954            );
8955        }
8956    }
8957
8958    #[test]
8959    fn dispatcher_routes_each_variant() {
8960        let old = split_lines(b"a\nb\nc\n");
8961        let new = split_lines(b"a\nx\nc\n");
8962        assert_eq!(
8963            diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Myers),
8964            myers_diff_lines(&old, &new)
8965        );
8966        // Minimal aliases Myers (the Myers search is already a minimal SES).
8967        assert_eq!(
8968            diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Minimal),
8969            myers_diff_lines(&old, &new)
8970        );
8971        assert_eq!(
8972            diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Patience),
8973            patience_diff_lines(&old, &new)
8974        );
8975        assert_eq!(
8976            diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Histogram),
8977            histogram_diff_lines(&old, &new)
8978        );
8979    }
8980
8981    #[test]
8982    fn patience_recurses_into_gaps_between_anchors() {
8983        // Unique anchors `head`/`tail` bracket an inner edit; patience must
8984        // recurse into the middle gap and diff `mid1`->`MID` there.
8985        let old = b"head\nmid1\nmid2\ntail\n";
8986        let new = b"head\nMID\nmid2\ntail\n";
8987        let old_l = split_lines(old);
8988        let new_l = split_lines(new);
8989        let ops = patience_diff_lines(&old_l, &new_l);
8990        assert_eq!(
8991            ops,
8992            vec![
8993                DiffOp::Equal(1),
8994                DiffOp::Delete(1),
8995                DiffOp::Insert(1),
8996                DiffOp::Equal(2),
8997            ]
8998        );
8999        assert_valid_script(old, new, &ops);
9000    }
9001
9002    #[test]
9003    fn patience_falls_back_to_myers_with_no_unique_lines() {
9004        // Every line is duplicated within its own side, so there are no
9005        // unique-in-both anchors; patience must defer to Myers but still return
9006        // a valid script.
9007        let old = b"a\na\nb\nb\n";
9008        let new = b"a\na\na\nb\n";
9009        let old_l = split_lines(old);
9010        let new_l = split_lines(new);
9011        let ops = patience_diff_lines(&old_l, &new_l);
9012        // The contract for the fallback path is validity, not minimality: after
9013        // the greedy prefix/suffix trim (which git's patience does too) the
9014        // leftover block is handed to Myers, and the whole script must still
9015        // reconstruct `new`.
9016        assert_valid_script(old, new, &ops);
9017    }
9018
9019    #[test]
9020    fn algorithms_agree_with_myers_when_all_lines_distinct() {
9021        // When every line is globally unique, patience's anchor set is the full
9022        // LCS, so patience and histogram must produce exactly the Myers script.
9023        let cases: &[(&[u8], &[u8])] = &[
9024            (b"a\nb\nc\nd\ne\n", b"a\nc\nd\nf\ne\n"),
9025            (b"1\n2\n3\n4\n5\n6\n", b"1\n3\n2\n4\n6\n5\n"),
9026            (b"q\nw\ne\nr\nt\ny\n", b"q\nw\nx\nr\nt\nz\n"),
9027        ];
9028        for (old_bytes, new_bytes) in cases {
9029            let old = split_lines(old_bytes);
9030            let new = split_lines(new_bytes);
9031            let myers = myers_diff_lines(&old, &new);
9032            assert_eq!(
9033                patience_diff_lines(&old, &new),
9034                myers,
9035                "patience must equal Myers when all lines are distinct: {:?}",
9036                old_bytes
9037            );
9038            assert_eq!(
9039                histogram_diff_lines(&old, &new),
9040                myers,
9041                "histogram must equal Myers when all lines are distinct: {:?}",
9042                old_bytes
9043            );
9044        }
9045    }
9046
9047    #[test]
9048    fn fuzz_all_algorithms_reconstruct_new() {
9049        // A small deterministic LCG drives many random small inputs over a tiny
9050        // alphabet (so lines repeat and exercise the anchor/fallback paths).
9051        // Every algorithm must produce a valid LCS-correct script for each pair.
9052        let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
9053        let mut next = || {
9054            state = state
9055                .wrapping_mul(6364136223846793005)
9056                .wrapping_add(1442695040888963407);
9057            (state >> 33) as u32
9058        };
9059        let alphabet = [b"a\n", b"b\n", b"c\n", b"d\n"];
9060        let build = |rng: &mut dyn FnMut() -> u32| -> Vec<u8> {
9061            let len = (rng() % 9) as usize; // 0..=8 lines
9062            let mut buf = Vec::new();
9063            for _ in 0..len {
9064                let pick = (rng() % alphabet.len() as u32) as usize;
9065                buf.extend_from_slice(alphabet[pick]);
9066            }
9067            // Occasionally drop the trailing newline to exercise that path.
9068            if !buf.is_empty() && rng().is_multiple_of(4) {
9069                buf.pop();
9070            }
9071            buf
9072        };
9073        for _ in 0..400 {
9074            let old_bytes = build(&mut next);
9075            let new_bytes = build(&mut next);
9076            check_all_algorithms(&old_bytes, &new_bytes);
9077        }
9078    }
9079
9080    #[test]
9081    fn exhaustive_small_inputs_all_algorithms_reconstruct() {
9082        // Brute force over a 3-symbol alphabet up to 5 lines per side: every
9083        // algorithm must produce a valid LCS-correct script for *every* pair.
9084        // This is the strongest correctness net for the recursion/fallback
9085        // paths; apply_ops asserts both reconstruction and Equal-correctness.
9086        let syms = [b"a\n".to_vec(), b"b\n".to_vec(), b"c\n".to_vec()];
9087        let make = |n: usize, mut code: usize| -> Vec<u8> {
9088            let mut v = Vec::new();
9089            for _ in 0..n {
9090                v.extend_from_slice(&syms[code % 3]);
9091                code /= 3;
9092            }
9093            v
9094        };
9095        for la in 0..=5usize {
9096            for lb in 0..=5usize {
9097                for ca in 0..3usize.pow(la as u32) {
9098                    for cb in 0..3usize.pow(lb as u32) {
9099                        let ob = make(la, ca);
9100                        let nb = make(lb, cb);
9101                        let ol = split_lines(&ob);
9102                        let nl = split_lines(&nb);
9103                        assert_eq!(apply_ops(&ol, &nl, &myers_diff_lines(&ol, &nl)), nb);
9104                        assert_eq!(apply_ops(&ol, &nl, &patience_diff_lines(&ol, &nl)), nb);
9105                        assert_eq!(apply_ops(&ol, &nl, &histogram_diff_lines(&ol, &nl)), nb);
9106                    }
9107                }
9108            }
9109        }
9110    }
9111
9112    #[test]
9113    fn fuzz_distinct_lines_patience_histogram_equal_myers() {
9114        // When inputs are permutations/subsequences of globally-unique lines,
9115        // patience and histogram must match Myers exactly. We generate sequences
9116        // of distinct tokens to guarantee global uniqueness on both sides.
9117        let mut state: u64 = 0x1234_5678_9ABC_DEF0;
9118        let mut next = || {
9119            state = state
9120                .wrapping_mul(6364136223846793005)
9121                .wrapping_add(1442695040888963407);
9122            (state >> 33) as u32
9123        };
9124        for _ in 0..200 {
9125            // Random subset+order of tokens "0\n".."9\n" for each side; tokens
9126            // are globally unique, so any common line is unique in both.
9127            let pick_subseq = |rng: &mut dyn FnMut() -> u32| -> Vec<u8> {
9128                let mut buf = Vec::new();
9129                for t in 0..10u32 {
9130                    if rng().is_multiple_of(2) {
9131                        buf.extend_from_slice(format!("{t}\n").as_bytes());
9132                    }
9133                }
9134                buf
9135            };
9136            let old_bytes = pick_subseq(&mut next);
9137            let new_bytes = pick_subseq(&mut next);
9138            let old = split_lines(&old_bytes);
9139            let new = split_lines(&new_bytes);
9140            let myers = myers_diff_lines(&old, &new);
9141            assert_eq!(patience_diff_lines(&old, &new), myers);
9142            assert_eq!(histogram_diff_lines(&old, &new), myers);
9143        }
9144    }
9145
9146    // ===================================================================
9147    // Subtree-skip-by-OID tree-diff optimization: the pruned simultaneous
9148    // walk (`changed_tree_entries`) must produce byte-identical name-status
9149    // output to the legacy "flatten both sides fully" walk
9150    // (`collect_full_tree_pair`) on every representative diff shape.
9151    // ===================================================================
9152
9153    /// Format a name-status result into stable, comparable lines.
9154    fn status_lines(entries: &[NameStatusEntry]) -> Vec<String> {
9155        entries.iter().map(|entry| entry.line()).collect()
9156    }
9157
9158    /// Assert the pruned walk and the full flatten agree, both as raw map diffs
9159    /// and through the public tree-diff entry points, for the given options.
9160    fn assert_tree_diff_matches_full(
9161        db: &FileObjectDatabase,
9162        left: &ObjectId,
9163        right: &ObjectId,
9164        options: DiffNameStatusOptions,
9165    ) {
9166        // Reference ("old") behaviour: fully flatten both trees, then diff.
9167        let (full_left, full_right) = collect_full_tree_pair(db, ObjectFormat::Sha1, left, right)
9168            .expect("test operation should succeed");
9169        let reference = diff_name_status_maps(
9170            &full_left,
9171            &full_right,
9172            full_left.keys().chain(full_right.keys()),
9173            options,
9174        )
9175        .expect("test operation should succeed");
9176
9177        // Optimized ("new") behaviour: prune identical subtrees, then diff.
9178        let (pruned_left, pruned_right) = changed_tree_entries(db, ObjectFormat::Sha1, left, right)
9179            .expect("test operation should succeed");
9180        let pruned = diff_name_status_maps(
9181            &pruned_left,
9182            &pruned_right,
9183            pruned_left.keys().chain(pruned_right.keys()),
9184            options,
9185        )
9186        .expect("test operation should succeed");
9187
9188        assert_eq!(
9189            status_lines(&reference),
9190            status_lines(&pruned),
9191            "pruned map diff diverged from full map diff for {options:?}"
9192        );
9193
9194        // And the public entry point (which itself selects pruned vs full) must
9195        // match the reference too.
9196        let public =
9197            diff_name_status_trees_with_options(db, ObjectFormat::Sha1, left, right, options)
9198                .expect("test operation should succeed");
9199        assert_eq!(
9200            status_lines(&reference),
9201            status_lines(&public),
9202            "public tree diff diverged from full map diff for {options:?}"
9203        );
9204
9205        // The pruned maps must be a subset of the full maps and must contain
9206        // exactly the paths that actually changed (no identical entries leak in,
9207        // no changed entries get dropped).
9208        for (path, tracked) in &pruned_left {
9209            assert_eq!(
9210                full_left.get(path),
9211                Some(tracked),
9212                "pruned left entry not present (or differs) in full left map: {:?}",
9213                String::from_utf8_lossy(path)
9214            );
9215        }
9216        for (path, tracked) in &pruned_right {
9217            assert_eq!(
9218                full_right.get(path),
9219                Some(tracked),
9220                "pruned right entry not present (or differs) in full right map: {:?}",
9221                String::from_utf8_lossy(path)
9222            );
9223        }
9224        // Every path the full diff reports as changed must survive pruning on
9225        // whichever side(s) it lives.
9226        for entry in &reference {
9227            let path = entry.path.as_bytes();
9228            match entry.status {
9229                NameStatus::Added => assert!(
9230                    pruned_right.contains_key(path),
9231                    "added path dropped by pruning: {:?}",
9232                    String::from_utf8_lossy(path)
9233                ),
9234                NameStatus::Deleted => assert!(
9235                    pruned_left.contains_key(path),
9236                    "deleted path dropped by pruning: {:?}",
9237                    String::from_utf8_lossy(path)
9238                ),
9239                NameStatus::Modified => {
9240                    assert!(
9241                        pruned_left.contains_key(path) && pruned_right.contains_key(path),
9242                        "modified path dropped by pruning: {:?}",
9243                        String::from_utf8_lossy(path)
9244                    );
9245                }
9246                _ => {}
9247            }
9248        }
9249    }
9250
9251    /// Run the equivalence assertion across the option matrix that the pruned
9252    /// path serves (everything except `--find-copies-harder`, which uses the
9253    /// full maps and is checked separately).
9254    fn assert_tree_diff_matches_full_all_modes(
9255        db: &FileObjectDatabase,
9256        left: &ObjectId,
9257        right: &ObjectId,
9258    ) {
9259        for detect_renames in [false, true] {
9260            for detect_copies in [false, true] {
9261                let options = DiffNameStatusOptions {
9262                    detect_renames,
9263                    detect_copies,
9264                    find_copies_harder: false,
9265                    rename_empty: true,
9266                };
9267                assert_tree_diff_matches_full(db, left, right, options);
9268            }
9269        }
9270    }
9271
9272    /// Build a DB pre-seeded with a fixed bank of blobs for the structural tests.
9273    fn structural_db() -> (PathBuf, FileObjectDatabase) {
9274        let root = temp_root();
9275        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
9276            .expect("test operation should succeed");
9277        let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
9278        (root, db)
9279    }
9280
9281    #[test]
9282    fn pruned_walk_skips_identical_subtree_and_matches_full() {
9283        // A large shared subtree (`shared/`) is byte-identical on both sides; the
9284        // only change lives in `app/`. The pruned walk must skip `shared/`
9285        // entirely yet still produce the exact same diff as flattening it.
9286        let (root, mut db) = structural_db();
9287
9288        // shared/ — identical on both sides, several nested files.
9289        let s1 = write_blob(&mut db, b"shared one\n");
9290        let s2 = write_blob(&mut db, b"shared two\n");
9291        let s3 = write_blob(&mut db, b"deep nested\n");
9292        let shared_inner = write_tree(&mut db, &[(b"c.txt", 0o100644, s3.clone())]);
9293        let shared = write_tree(
9294            &mut db,
9295            &[
9296                (b"a.txt", 0o100644, s1.clone()),
9297                (b"b.txt", 0o100644, s2.clone()),
9298                (b"inner", 0o040000, shared_inner.clone()),
9299            ],
9300        );
9301
9302        // app/ — one file modified between sides.
9303        let app_old = write_blob(&mut db, b"version 1\n");
9304        let app_new = write_blob(&mut db, b"version 2\n");
9305        let app_left = write_tree(&mut db, &[(b"main.rs", 0o100644, app_old)]);
9306        let app_right = write_tree(&mut db, &[(b"main.rs", 0o100644, app_new)]);
9307
9308        let left = write_tree(
9309            &mut db,
9310            &[
9311                (b"app", 0o040000, app_left),
9312                (b"shared", 0o040000, shared.clone()),
9313            ],
9314        );
9315        let right = write_tree(
9316            &mut db,
9317            &[(b"app", 0o040000, app_right), (b"shared", 0o040000, shared)],
9318        );
9319
9320        // Sanity: the only change is the nested app/main.rs modification.
9321        let (pruned_left, pruned_right) =
9322            changed_tree_entries(&db, ObjectFormat::Sha1, &left, &right)
9323                .expect("test operation should succeed");
9324        assert_eq!(
9325            pruned_left.keys().collect::<Vec<_>>(),
9326            vec![&b"app/main.rs".to_vec()],
9327            "pruning should leave only the changed path on the left"
9328        );
9329        assert_eq!(
9330            pruned_right.keys().collect::<Vec<_>>(),
9331            vec![&b"app/main.rs".to_vec()],
9332            "pruning should leave only the changed path on the right"
9333        );
9334        assert!(
9335            !pruned_left.contains_key(b"shared/a.txt".as_slice()),
9336            "identical shared subtree must not appear in pruned maps"
9337        );
9338
9339        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
9340        fs::remove_dir_all(root).expect("test operation should succeed");
9341    }
9342
9343    #[test]
9344    fn pruned_walk_matches_full_for_add_delete_modify_nested() {
9345        // Mixed shape: a top-level add, a top-level delete, a nested modify, and
9346        // an untouched nested subtree that must be skipped.
9347        let (root, mut db) = structural_db();
9348
9349        let keep = write_blob(&mut db, b"unchanged\n");
9350        let untouched_dir = write_tree(&mut db, &[(b"keep.txt", 0o100644, keep.clone())]);
9351
9352        let nested_old = write_blob(&mut db, b"nested old\n");
9353        let nested_new = write_blob(&mut db, b"nested new\n");
9354        let dir_left = write_tree(
9355            &mut db,
9356            &[
9357                (b"changed.txt", 0o100644, nested_old),
9358                (b"stable.txt", 0o100644, keep.clone()),
9359            ],
9360        );
9361        let dir_right = write_tree(
9362            &mut db,
9363            &[
9364                (b"changed.txt", 0o100644, nested_new),
9365                (b"stable.txt", 0o100644, keep.clone()),
9366            ],
9367        );
9368
9369        let only_left = write_blob(&mut db, b"will be deleted\n");
9370        let only_right = write_blob(&mut db, b"freshly added\n");
9371
9372        let left = write_tree(
9373            &mut db,
9374            &[
9375                (b"dir", 0o040000, dir_left),
9376                (b"gone.txt", 0o100644, only_left),
9377                (b"untouched", 0o040000, untouched_dir.clone()),
9378            ],
9379        );
9380        let right = write_tree(
9381            &mut db,
9382            &[
9383                (b"dir", 0o040000, dir_right),
9384                (b"new.txt", 0o100644, only_right),
9385                (b"untouched", 0o040000, untouched_dir),
9386            ],
9387        );
9388
9389        let entries = diff_name_status_trees_with_options(
9390            &db,
9391            ObjectFormat::Sha1,
9392            &left,
9393            &right,
9394            DiffNameStatusOptions {
9395                detect_renames: false,
9396                detect_copies: false,
9397                find_copies_harder: false,
9398                rename_empty: true,
9399            },
9400        )
9401        .expect("test operation should succeed");
9402        assert_eq!(
9403            status_lines(&entries),
9404            vec![
9405                "M\tdir/changed.txt".to_string(),
9406                "D\tgone.txt".to_string(),
9407                "A\tnew.txt".to_string(),
9408            ],
9409            "unexpected raw status for mixed nested diff"
9410        );
9411
9412        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
9413        fs::remove_dir_all(root).expect("test operation should succeed");
9414    }
9415
9416    #[test]
9417    fn pruned_walk_matches_full_for_rename_across_dirs() {
9418        // An exact rename (same blob oid) moving between directories. Rename
9419        // detection runs on the pruned add/delete set and must match the full
9420        // walk's result exactly.
9421        let (root, mut db) = structural_db();
9422
9423        let moved = write_blob(&mut db, b"i get moved across directories\n");
9424        let companion = write_blob(&mut db, b"i stay put\n");
9425        let stable_dir = write_tree(&mut db, &[(b"keep.txt", 0o100644, companion.clone())]);
9426
9427        let src_dir = write_tree(&mut db, &[(b"file.txt", 0o100644, moved.clone())]);
9428        let dst_dir = write_tree(&mut db, &[(b"renamed.txt", 0o100644, moved.clone())]);
9429
9430        let left = write_tree(
9431            &mut db,
9432            &[
9433                (b"src", 0o040000, src_dir),
9434                (b"stable", 0o040000, stable_dir.clone()),
9435            ],
9436        );
9437        let right = write_tree(
9438            &mut db,
9439            &[
9440                (b"dst", 0o040000, dst_dir),
9441                (b"stable", 0o040000, stable_dir),
9442            ],
9443        );
9444
9445        let entries = diff_name_status_trees_with_options(
9446            &db,
9447            ObjectFormat::Sha1,
9448            &left,
9449            &right,
9450            DiffNameStatusOptions {
9451                detect_renames: true,
9452                detect_copies: false,
9453                find_copies_harder: false,
9454                rename_empty: true,
9455            },
9456        )
9457        .expect("test operation should succeed");
9458        assert_eq!(
9459            status_lines(&entries),
9460            vec!["R100\tsrc/file.txt\tdst/renamed.txt".to_string()],
9461            "rename across dirs should be detected on pruned set"
9462        );
9463
9464        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
9465        fs::remove_dir_all(root).expect("test operation should succeed");
9466    }
9467
9468    #[test]
9469    fn pruned_walk_matches_full_for_binary_and_mode_change() {
9470        // Binary blob modification plus an executable-bit (mode) change on an
9471        // otherwise-identical blob. Mode-only changes must still register as a
9472        // Modify (the pruned walk compares mode + oid, like the full map).
9473        let (root, mut db) = structural_db();
9474
9475        let bin_old = write_blob(&mut db, &[0u8, 159, 146, 150, 0, 255, 1, 2, 3]);
9476        let bin_new = write_blob(&mut db, &[0u8, 159, 146, 150, 0, 254, 9, 8, 7]);
9477        let script = write_blob(&mut db, b"#!/bin/sh\necho hi\n");
9478
9479        let left = write_tree(
9480            &mut db,
9481            &[
9482                (b"image.bin", 0o100644, bin_old),
9483                (b"run.sh", 0o100644, script.clone()),
9484            ],
9485        );
9486        let right = write_tree(
9487            &mut db,
9488            &[
9489                (b"image.bin", 0o100644, bin_new),
9490                // same blob oid, executable bit flipped on
9491                (b"run.sh", 0o100755, script),
9492            ],
9493        );
9494
9495        let entries = diff_name_status_trees_with_options(
9496            &db,
9497            ObjectFormat::Sha1,
9498            &left,
9499            &right,
9500            DiffNameStatusOptions {
9501                detect_renames: false,
9502                detect_copies: false,
9503                find_copies_harder: false,
9504                rename_empty: true,
9505            },
9506        )
9507        .expect("test operation should succeed");
9508        assert_eq!(
9509            status_lines(&entries),
9510            vec!["M\timage.bin".to_string(), "M\trun.sh".to_string()],
9511            "binary edit and mode-only change should both be Modify"
9512        );
9513
9514        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
9515        fs::remove_dir_all(root).expect("test operation should succeed");
9516    }
9517
9518    #[test]
9519    fn pruned_walk_matches_full_for_dir_replaced_by_file() {
9520        // A name that is a directory on the left and a regular file on the right
9521        // (and vice versa). The flattened paths differ (`thing/...` vs `thing`),
9522        // so the pruned walk must treat them as unrelated add/delete pairs,
9523        // exactly as the full flatten does.
9524        let (root, mut db) = structural_db();
9525
9526        let inner_a = write_blob(&mut db, b"inner a\n");
9527        let inner_b = write_blob(&mut db, b"inner b\n");
9528        let thing_dir = write_tree(
9529            &mut db,
9530            &[(b"a.txt", 0o100644, inner_a), (b"b.txt", 0o100644, inner_b)],
9531        );
9532        let thing_file = write_blob(&mut db, b"now i am a file\n");
9533
9534        // other/ is a file on the left, a directory on the right.
9535        let other_file = write_blob(&mut db, b"i was a file\n");
9536        let other_inner = write_blob(&mut db, b"now nested\n");
9537        let other_dir = write_tree(&mut db, &[(b"x.txt", 0o100644, other_inner)]);
9538
9539        let left = write_tree(
9540            &mut db,
9541            &[
9542                (b"other", 0o100644, other_file),
9543                (b"thing", 0o040000, thing_dir),
9544            ],
9545        );
9546        let right = write_tree(
9547            &mut db,
9548            &[
9549                (b"other", 0o040000, other_dir),
9550                (b"thing", 0o100644, thing_file),
9551            ],
9552        );
9553
9554        let entries = diff_name_status_trees_with_options(
9555            &db,
9556            ObjectFormat::Sha1,
9557            &left,
9558            &right,
9559            DiffNameStatusOptions {
9560                detect_renames: false,
9561                detect_copies: false,
9562                find_copies_harder: false,
9563                rename_empty: true,
9564            },
9565        )
9566        .expect("test operation should succeed");
9567        assert_eq!(
9568            status_lines(&entries),
9569            vec![
9570                "D\tother".to_string(),
9571                "A\tother/x.txt".to_string(),
9572                "A\tthing".to_string(),
9573                "D\tthing/a.txt".to_string(),
9574                "D\tthing/b.txt".to_string(),
9575            ],
9576            "dir<->file swap should flatten to independent adds/deletes"
9577        );
9578
9579        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
9580        fs::remove_dir_all(root).expect("test operation should succeed");
9581    }
9582
9583    #[test]
9584    fn pruned_walk_matches_full_for_identical_trees() {
9585        // Two identical root trees: zero changes, and the root must be skipped
9586        // without reading anything below it.
9587        let (root, mut db) = structural_db();
9588
9589        let blob = write_blob(&mut db, b"same\n");
9590        let sub = write_tree(&mut db, &[(b"f.txt", 0o100644, blob.clone())]);
9591        let tree = write_tree(
9592            &mut db,
9593            &[(b"sub", 0o040000, sub), (b"top.txt", 0o100644, blob)],
9594        );
9595
9596        let (pruned_left, pruned_right) =
9597            changed_tree_entries(&db, ObjectFormat::Sha1, &tree, &tree)
9598                .expect("test operation should succeed");
9599        assert!(
9600            pruned_left.is_empty() && pruned_right.is_empty(),
9601            "identical trees must produce no changed entries"
9602        );
9603
9604        let entries = diff_name_status_trees_with_options(
9605            &db,
9606            ObjectFormat::Sha1,
9607            &tree,
9608            &tree,
9609            DiffNameStatusOptions::default(),
9610        )
9611        .expect("test operation should succeed");
9612        assert!(entries.is_empty(), "identical trees must produce no diff");
9613
9614        assert_tree_diff_matches_full_all_modes(&db, &tree, &tree);
9615        fs::remove_dir_all(root).expect("test operation should succeed");
9616    }
9617
9618    #[test]
9619    fn find_copies_harder_uses_full_left_map_and_finds_unchanged_source() {
9620        // `--find-copies-harder` must still see an *unchanged* file as a copy
9621        // source. This is the case where the public entry point deliberately
9622        // falls back to the full flatten; verify the full-map fallback both
9623        // behaves correctly and matches a direct full-map computation.
9624        let (root, mut db) = structural_db();
9625
9626        // `template.txt` is unchanged between sides (lives in an untouched
9627        // subtree), and `copy.txt` is added on the right with the same content.
9628        let template = write_blob(&mut db, b"reusable boilerplate content\n");
9629        let lib_dir = write_tree(&mut db, &[(b"template.txt", 0o100644, template.clone())]);
9630
9631        let trigger_old = write_blob(&mut db, b"trigger old\n");
9632        let trigger_new = write_blob(&mut db, b"trigger new\n");
9633
9634        let left = write_tree(
9635            &mut db,
9636            &[
9637                (b"lib", 0o040000, lib_dir.clone()),
9638                (b"trigger.txt", 0o100644, trigger_old),
9639            ],
9640        );
9641        let right = write_tree(
9642            &mut db,
9643            &[
9644                (b"copy.txt", 0o100644, template.clone()),
9645                (b"lib", 0o040000, lib_dir),
9646                (b"trigger.txt", 0o100644, trigger_new),
9647            ],
9648        );
9649
9650        let options = DiffNameStatusOptions {
9651            detect_renames: true,
9652            detect_copies: true,
9653            find_copies_harder: true,
9654            rename_empty: true,
9655        };
9656
9657        // Reference via the full flatten (the old algorithm).
9658        let (full_left, full_right) =
9659            collect_full_tree_pair(&db, ObjectFormat::Sha1, &left, &right)
9660                .expect("test operation should succeed");
9661        let reference = diff_name_status_maps(
9662            &full_left,
9663            &full_right,
9664            full_left.keys().chain(full_right.keys()),
9665            options,
9666        )
9667        .expect("test operation should succeed");
9668
9669        let public =
9670            diff_name_status_trees_with_options(&db, ObjectFormat::Sha1, &left, &right, options)
9671                .expect("test operation should succeed");
9672        assert_eq!(
9673            status_lines(&reference),
9674            status_lines(&public),
9675            "find-copies-harder public diff must match full-map reference"
9676        );
9677        // The copy must be detected from the unchanged template source.
9678        assert!(
9679            public
9680                .iter()
9681                .any(|entry| matches!(entry.status, NameStatus::Copied(_))
9682                    && entry.old_path.as_ref().map(|p| p.as_bytes())
9683                        == Some(b"lib/template.txt".as_slice())
9684                    && entry.path == b"copy.txt"),
9685            "copy from unchanged source must be found with find_copies_harder: {public:?}"
9686        );
9687        fs::remove_dir_all(root).expect("test operation should succeed");
9688    }
9689
9690    #[test]
9691    fn pruned_walk_matches_full_with_inexact_rename_options() {
9692        // Exercise the rename-options entry point (which also selects pruned vs
9693        // full) with inexact detection enabled, across an untouched subtree.
9694        let (root, mut db) = structural_db();
9695
9696        let untouched = write_blob(&mut db, b"untouched file\n");
9697        let untouched_dir = write_tree(&mut db, &[(b"u.txt", 0o100644, untouched.clone())]);
9698
9699        // a.txt -> b.txt with one changed line (a 75% inexact rename).
9700        let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
9701        let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
9702
9703        let left = write_tree(
9704            &mut db,
9705            &[
9706                (b"a.txt", 0o100644, old),
9707                (b"keep", 0o040000, untouched_dir.clone()),
9708            ],
9709        );
9710        let right = write_tree(
9711            &mut db,
9712            &[
9713                (b"b.txt", 0o100644, new),
9714                (b"keep", 0o040000, untouched_dir),
9715            ],
9716        );
9717
9718        let options = RenameDetectionOptions {
9719            base: DiffNameStatusOptions {
9720                detect_renames: true,
9721                detect_copies: false,
9722                find_copies_harder: false,
9723                rename_empty: true,
9724            },
9725            detect_inexact: true,
9726            rename_threshold: DEFAULT_RENAME_THRESHOLD,
9727            copy_threshold: DEFAULT_RENAME_THRESHOLD,
9728        };
9729
9730        // Reference: full flatten + same detection.
9731        let (full_left, full_right) =
9732            collect_full_tree_pair(&db, ObjectFormat::Sha1, &left, &right)
9733                .expect("test operation should succeed");
9734        let reference = diff_name_status_maps_with_renames(
9735            &full_left,
9736            &full_right,
9737            full_left.keys().chain(full_right.keys()),
9738            options,
9739            |oid| read_blob_bytes(&db, oid),
9740        )
9741        .expect("test operation should succeed");
9742
9743        let public = diff_name_status_trees_with_rename_options(
9744            &db,
9745            ObjectFormat::Sha1,
9746            &left,
9747            &right,
9748            options,
9749        )
9750        .expect("test operation should succeed");
9751
9752        assert_eq!(
9753            status_lines(&reference),
9754            status_lines(&public),
9755            "inexact rename via pruned walk must match full-map reference"
9756        );
9757        assert_eq!(
9758            status_lines(&public),
9759            vec!["R075\ta.txt\tb.txt".to_string()],
9760            "expected a 75% inexact rename"
9761        );
9762        fs::remove_dir_all(root).expect("test operation should succeed");
9763    }
9764}