Skip to main content

sley_diff_merge/
lib.rs

1use sley_core::{GitError, ObjectFormat, ObjectId, RepoPath, Result, object_id_for_bytes};
2
3mod name;
4pub mod range;
5pub mod render;
6pub mod ws;
7
8pub use sley_core::BString;
9use sley_index::{BorrowedIndex, Index, IndexStatCache};
10use sley_object::{Commit, EncodedObject, ObjectType, Tree, TreeEntries, TreeEntry};
11use sley_odb::{FileObjectDatabase, ObjectReader, ObjectWriter};
12use sley_refs::{FileRefStore, RefTarget};
13use std::collections::{BTreeMap, BTreeSet, HashMap};
14use std::fs;
15use std::path::{Path, PathBuf};
16
17// ===========================================================================
18// Gitlink (submodule) resolution helpers.
19//
20// A gitlink is a mode-160000 tree/index entry whose oid names the commit an
21// embedded repository has checked out. These helpers resolve, for a directory
22// in the working tree, (a) the embedded repository's git directory — either a
23// `.git` directory or a `.git` *file* carrying a `gitdir: <path>` pointer (the
24// layout `git submodule add`/`update` creates, pointing into the
25// superproject's `.git/modules/<name>`) — and (b) the commit its HEAD names.
26// They are the native equivalent of upstream's `resolve_gitlink_ref()`.
27// ===========================================================================
28
29/// Resolve the git directory of an embedded repository whose working tree is
30/// at `sub_root`. A `.git` directory is returned as-is; a `.git` file is
31/// followed through its `gitdir: <path>` pointer (a relative pointer resolves
32/// against `sub_root`). Returns `None` when there is no `.git` entry or the
33/// pointer does not name an existing directory.
34pub fn gitlink_git_dir(sub_root: &Path) -> Option<PathBuf> {
35    let dot_git = sub_root.join(".git");
36    let metadata = fs::symlink_metadata(&dot_git).ok()?;
37    if metadata.is_dir() {
38        return Some(dot_git);
39    }
40    if !metadata.is_file() {
41        return None;
42    }
43    let contents = fs::read_to_string(&dot_git).ok()?;
44    let target = contents.strip_prefix("gitdir:")?.trim();
45    if target.is_empty() {
46        return None;
47    }
48    let target = PathBuf::from(target);
49    let git_dir = if target.is_absolute() {
50        target
51    } else {
52        sub_root.join(target)
53    };
54    if git_dir.is_dir() {
55        Some(git_dir)
56    } else {
57        None
58    }
59}
60
61/// When `sub_root` holds a *broken* gitlink — a `.git` file whose `gitdir:`
62/// pointer names a directory that no longer exists (e.g. the submodule's git
63/// directory was moved out of `.git/modules/`) — return that unresolved gitdir
64/// path. git's status / diff-index fail fatally ("not a git repository: …")
65/// here. Returns `None` for a valid gitlink (a `.git` directory, or a `.git`
66/// file with a live gitdir) and for an *unpopulated* gitlink (no `.git` entry at
67/// all), both of which git treats as non-fatal (the latter as unchanged).
68pub fn gitlink_broken_gitdir(sub_root: &Path) -> Option<PathBuf> {
69    let dot_git = sub_root.join(".git");
70    let metadata = fs::symlink_metadata(&dot_git).ok()?;
71    if !metadata.is_file() {
72        // No `.git` (unpopulated) or a real `.git` directory — not broken.
73        return None;
74    }
75    let contents = fs::read_to_string(&dot_git).ok()?;
76    let target = contents.strip_prefix("gitdir:")?.trim();
77    if target.is_empty() {
78        return None;
79    }
80    let target_path = if Path::new(target).is_absolute() {
81        PathBuf::from(target)
82    } else {
83        sub_root.join(target)
84    };
85    if target_path.is_dir() {
86        None
87    } else {
88        Some(target_path)
89    }
90}
91
92/// Resolve the commit checked out in the embedded repository at `sub_root`
93/// (the value a gitlink entry for that path records): its git directory's
94/// HEAD, followed through symbolic refs. `None` when `sub_root` is not a
95/// repository or its HEAD does not resolve to a commit (e.g. an unborn
96/// branch) — upstream's `resolve_gitlink_ref() < 0` case.
97pub fn gitlink_head_oid(sub_root: &Path, format: ObjectFormat) -> Option<ObjectId> {
98    let git_dir = gitlink_git_dir(sub_root)?;
99    let store = FileRefStore::new(&git_dir, format);
100    let mut target = store.read_ref("HEAD").ok()??;
101    // Follow symbolic-ref chains defensively (git caps the depth too).
102    for _ in 0..10 {
103        match target {
104            RefTarget::Direct(oid) => return Some(oid),
105            RefTarget::Symbolic(name) => target = store.read_ref(&name).ok()??,
106        }
107    }
108    None
109}
110
111// ===========================================================================
112// Line-level diff (Myers O(ND)) and 3-way blob merge (diff3).
113//
114// These operate purely on in-memory blobs and never touch the ODB or the
115// filesystem. They are the engine the CLI layers `git merge`, `cherry-pick`,
116// and `revert` on top of.
117// ===========================================================================
118
119/// A single line of a blob, slicing into the original buffer.
120///
121/// `content` includes the line's own trailing newline byte when present;
122/// `has_newline` records whether this line ended with `\n` in the source. Only
123/// the final line of a blob can have `has_newline == false` (a file with "no
124/// newline at end of file"). Comparing two `DiffLine`s for equality compares
125/// both the bytes and the trailing-newline flag, so a line that gained or lost
126/// its terminating newline is treated as a real change, matching git.
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
128pub struct DiffLine<'a> {
129    /// The raw bytes of the line, including the trailing `\n` if it had one.
130    pub content: &'a [u8],
131    /// Whether the line was terminated by a newline in the source blob.
132    pub has_newline: bool,
133}
134
135impl<'a> DiffLine<'a> {
136    /// The line bytes without any trailing newline.
137    pub fn bytes_without_newline(&self) -> &'a [u8] {
138        if self.has_newline {
139            self.content.strip_suffix(b"\n").unwrap_or(self.content)
140        } else {
141            self.content
142        }
143    }
144}
145
146/// Split a blob into lines, preserving the exact bytes of each line.
147///
148/// Each returned [`DiffLine`] borrows from `blob`; its `content` includes the
149/// terminating `\n`. The returned vector is empty for an empty blob. A blob
150/// whose final byte is not `\n` yields a final line with `has_newline ==
151/// false` — git's "\ No newline at end of file" case.
152pub fn split_lines(blob: &[u8]) -> Vec<DiffLine<'_>> {
153    let mut lines = Vec::new();
154    let mut start = 0usize;
155    let len = blob.len();
156    let mut idx = 0usize;
157    while idx < len {
158        if blob[idx] == b'\n' {
159            lines.push(DiffLine {
160                content: &blob[start..=idx],
161                has_newline: true,
162            });
163            idx += 1;
164            start = idx;
165        } else {
166            idx += 1;
167        }
168    }
169    if start < len {
170        lines.push(DiffLine {
171            content: &blob[start..len],
172            has_newline: false,
173        });
174    }
175    lines
176}
177
178/// A run-length entry in a Myers edit script.
179///
180/// Each variant carries the number of consecutive lines it applies to:
181/// - [`DiffOp::Equal`] — `n` lines common to both `old` and `new`.
182/// - [`DiffOp::Delete`] — `n` lines present in `old` but not `new`.
183/// - [`DiffOp::Insert`] — `n` lines present in `new` but not `old`.
184///
185/// Walking the script in order and consuming `old`/`new` lines accordingly
186/// reconstructs `new` from `old`.
187#[derive(Debug, Clone, Copy, PartialEq, Eq)]
188pub enum DiffOp {
189    /// `n` lines are identical in both sequences.
190    Equal(usize),
191    /// `n` lines are removed from the old sequence.
192    Delete(usize),
193    /// `n` lines are added in the new sequence.
194    Insert(usize),
195}
196
197/// Compute a minimal line-level edit script transforming `old` into `new`
198/// using Myers' O(ND) difference algorithm.
199///
200/// Lines are compared for equality by their full bytes (see [`DiffLine`]). The
201/// result is a coalesced sequence of [`DiffOp`] runs; consecutive ops of the
202/// same kind are merged so the script is compact. The script is a standard
203/// (shortest-edit-script) diff: the number of `Delete` + `Insert` lines is
204/// minimal.
205pub fn myers_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
206    // Trim a common prefix and suffix first. This keeps the O(ND) search small
207    // for the typical case of a localized edit and does not affect minimality.
208    let n_total = old.len();
209    let m_total = new.len();
210    let mut prefix = 0usize;
211    while prefix < n_total && prefix < m_total && old[prefix] == new[prefix] {
212        prefix += 1;
213    }
214    let mut suffix = 0usize;
215    while suffix < n_total - prefix
216        && suffix < m_total - prefix
217        && old[n_total - 1 - suffix] == new[m_total - 1 - suffix]
218    {
219        suffix += 1;
220    }
221
222    let old_mid = &old[prefix..n_total - suffix];
223    let new_mid = &new[prefix..m_total - suffix];
224
225    let mut ops: Vec<DiffOp> = Vec::new();
226    if prefix > 0 {
227        ops.push(DiffOp::Equal(prefix));
228    }
229    myers_core(old_mid, new_mid, &mut ops);
230    if suffix > 0 {
231        ops.push(DiffOp::Equal(suffix));
232    }
233    coalesce_ops(ops)
234}
235
236/// Classic forward Myers O(ND) shortest-edit-script search over the trimmed
237/// sub-problem, followed by a backtrack through the stored traces.
238///
239/// `old`/`new` are the trimmed (no common prefix/suffix) line slices. Per-line
240/// ops are appended to `out` in order; they are coalesced by the caller. This
241/// is the algorithm from Myers' 1986 paper, which yields a shortest edit script
242/// (minimal number of insertions + deletions).
243fn myers_core(old: &[DiffLine<'_>], new: &[DiffLine<'_>], out: &mut Vec<DiffOp>) {
244    let n = old.len() as isize;
245    let m = new.len() as isize;
246    if n == 0 {
247        if m > 0 {
248            out.push(DiffOp::Insert(m as usize));
249        }
250        return;
251    }
252    if m == 0 {
253        out.push(DiffOp::Delete(n as usize));
254        return;
255    }
256
257    let max = (n + m) as usize;
258    let offset = max as isize; // shift so diagonal k maps to index (k + offset)
259    let width = 2 * max + 1;
260    // v[k + offset] holds the furthest-reaching x on diagonal k for the current d.
261    let mut v = vec![0isize; width];
262    // Save a snapshot of v after each d so we can backtrack the chosen path.
263    let mut trace: Vec<Vec<isize>> = Vec::new();
264
265    let mut found_d: Option<usize> = None;
266    'search: for d in 0..=(max as isize) {
267        trace.push(v.clone());
268        let mut k = -d;
269        while k <= d {
270            let kidx = (k + offset) as usize;
271            // Decide whether we arrived here by moving down (insert, from k+1)
272            // or right (delete, from k-1). Prefer the move that reaches further.
273            let mut x = if k == -d
274                || (k != d && v[(k - 1 + offset) as usize] < v[(k + 1 + offset) as usize])
275            {
276                // Move down: x stays, y increases (insertion from new).
277                v[(k + 1 + offset) as usize]
278            } else {
279                // Move right: x increases (deletion from old).
280                v[(k - 1 + offset) as usize] + 1
281            };
282            let mut y = x - k;
283            // Follow the diagonal (matching lines) as far as possible.
284            while x < n && y < m && old[x as usize] == new[y as usize] {
285                x += 1;
286                y += 1;
287            }
288            v[kidx] = x;
289            if x >= n && y >= m {
290                found_d = Some(d as usize);
291                break 'search;
292            }
293            k += 2;
294        }
295    }
296
297    // A shortest edit path always exists, so found_d is set; if somehow not,
298    // fall back to a delete-all/insert-all script (still correct, not minimal).
299    let Some(d_end) = found_d else {
300        out.push(DiffOp::Delete(n as usize));
301        out.push(DiffOp::Insert(m as usize));
302        return;
303    };
304
305    backtrack(n, m, &trace, d_end, offset, out);
306}
307
308/// Reconstruct the edit script from the saved Myers traces.
309///
310/// Walks backward from `(n, m)` to `(0, 0)`, emitting per-line `Delete`,
311/// `Insert`, and `Equal` ops, then reverses them into forward order before
312/// appending to `out`. `n`/`m` are the lengths of the (trimmed) old/new slices.
313fn backtrack(
314    n: isize,
315    m: isize,
316    trace: &[Vec<isize>],
317    d_end: usize,
318    offset: isize,
319    out: &mut Vec<DiffOp>,
320) {
321    let mut x = n;
322    let mut y = m;
323    let mut rev: Vec<DiffOp> = Vec::new();
324
325    for d in (0..=d_end).rev() {
326        let v = &trace[d];
327        let k = x - y;
328        // Determine the predecessor diagonal, mirroring the forward step rule.
329        let prev_k = if k == -(d as isize)
330            || (k != d as isize && v[(k - 1 + offset) as usize] < v[(k + 1 + offset) as usize])
331        {
332            k + 1 // came from a down move (insert)
333        } else {
334            k - 1 // came from a right move (delete)
335        };
336        let prev_x = v[(prev_k + offset) as usize];
337        let prev_y = prev_x - prev_k;
338
339        // Emit the diagonal (equal) moves taken after reaching the predecessor.
340        while x > prev_x && y > prev_y {
341            rev.push(DiffOp::Equal(1));
342            x -= 1;
343            y -= 1;
344        }
345        if d > 0 {
346            if x == prev_x {
347                // Down move: an insertion of new[prev_y].
348                rev.push(DiffOp::Insert(1));
349            } else {
350                // Right move: a deletion of old[prev_x].
351                rev.push(DiffOp::Delete(1));
352            }
353            x = prev_x;
354            y = prev_y;
355        }
356    }
357
358    rev.reverse();
359    out.extend(rev);
360}
361
362/// Merge adjacent ops of the same kind so the script is compact.
363fn coalesce_ops(ops: Vec<DiffOp>) -> Vec<DiffOp> {
364    let mut out: Vec<DiffOp> = Vec::with_capacity(ops.len());
365    for op in ops {
366        match (out.last_mut(), op) {
367            (Some(DiffOp::Equal(prev)), DiffOp::Equal(n)) => *prev += n,
368            (Some(DiffOp::Delete(prev)), DiffOp::Delete(n)) => *prev += n,
369            (Some(DiffOp::Insert(prev)), DiffOp::Insert(n)) => *prev += n,
370            _ => out.push(op),
371        }
372    }
373    out
374}
375
376// ===========================================================================
377// Whitespace-ignoring line comparison (git xdiff's XDF_WHITESPACE_FLAGS).
378//
379// git's xdiff compares two records (lines, including the trailing `\n`) for
380// equality under whitespace-ignore flags via `xdl_recmatch`. Rather than
381// re-implement the Myers core to take a custom equality predicate, we map each
382// flavour to a *canonicalization* of the line bytes that produces identical
383// output iff `xdl_recmatch` would return 1, then diff on the canonicalized
384// lines while emitting the original bytes. This is exact: it is a behavioural
385// port of `xdiff/xutils.c:xdl_recmatch` and `xdl_blankline`.
386// ===========================================================================
387
388/// Whitespace-ignore flags for line comparison, mirroring git's
389/// `XDF_WHITESPACE_FLAGS` (`-w`, `-b`, `--ignore-space-at-eol`,
390/// `--ignore-cr-at-eol`). Only one of the whitespace flavours is honoured per
391/// git's precedence (`-w` ⊃ `-b` ⊃ `--ignore-space-at-eol` ⊃
392/// `--ignore-cr-at-eol`); when several are set, the strongest wins, matching
393/// the cascade in `xdl_recmatch`.
394#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
395pub struct WsIgnore {
396    /// `-w` / `--ignore-all-space`: ignore all whitespace when comparing lines.
397    pub all_space: bool,
398    /// `-b` / `--ignore-space-change`: ignore changes in amount of whitespace.
399    pub space_change: bool,
400    /// `--ignore-space-at-eol`: ignore whitespace at end of line.
401    pub space_at_eol: bool,
402    /// `--ignore-cr-at-eol`: ignore a carriage-return at end of line.
403    pub cr_at_eol: bool,
404}
405
406impl WsIgnore {
407    /// No whitespace-ignore flavour active (the exact, byte-for-byte comparison).
408    pub const EMPTY: Self = Self {
409        all_space: false,
410        space_change: false,
411        space_at_eol: false,
412        cr_at_eol: false,
413    };
414
415    /// True when no whitespace-ignore flavour is active.
416    pub fn is_empty(&self) -> bool {
417        !(self.all_space || self.space_change || self.space_at_eol || self.cr_at_eol)
418    }
419}
420
421/// `XDL_ISSPACE` — git uses C `isspace` over the unsigned byte (space, `\t`,
422/// `\n`, `\r`, `\x0b` vertical tab, `\x0c` form feed).
423#[inline]
424fn xdl_isspace(c: u8) -> bool {
425    matches!(c, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c)
426}
427
428/// Canonicalize a line's bytes (including any trailing `\n`) for whitespace-
429/// insensitive comparison, exactly mirroring `xdl_recmatch`'s acceptance set:
430/// two original lines are equal under `ignore` iff their canonical forms are
431/// byte-identical.
432///
433/// * `all_space` (`-w`): drop every whitespace byte.
434/// * `space_change` (`-b`): collapse each run of whitespace to a single `' '`
435///   and strip trailing whitespace (a run on one side matches a run on the
436///   other regardless of length; leading/internal whitespace must still align,
437///   trailing whitespace is dropped entirely).
438/// * `space_at_eol`: strip trailing whitespace only.
439/// * `cr_at_eol`: drop a single `\r` immediately before a terminating `\n`.
440///
441/// Exposed crate-internally so the change-compaction pass in [`crate::render`]
442/// can compare lines for sliding under the exact same equality the line-level
443/// diff uses (git's `recs_match` on the whitespace-canonicalized record).
444pub(crate) fn canonicalize_line_for_match(line: &[u8], ignore: WsIgnore) -> Vec<u8> {
445    canonicalize_line(line, ignore)
446}
447
448fn canonicalize_line(line: &[u8], ignore: WsIgnore) -> Vec<u8> {
449    if ignore.all_space {
450        return line.iter().copied().filter(|&c| !xdl_isspace(c)).collect();
451    }
452    if ignore.space_change {
453        let mut out = Vec::with_capacity(line.len());
454        let mut i = 0usize;
455        while i < line.len() {
456            if xdl_isspace(line[i]) {
457                // Collapse the whole whitespace run to a single space.
458                while i < line.len() && xdl_isspace(line[i]) {
459                    i += 1;
460                }
461                out.push(b' ');
462            } else {
463                out.push(line[i]);
464                i += 1;
465            }
466        }
467        // Strip a trailing collapsed-space (trailing whitespace is ignored).
468        if out.last() == Some(&b' ') {
469            out.pop();
470        }
471        return out;
472    }
473    if ignore.space_at_eol {
474        let mut end = line.len();
475        while end > 0 && xdl_isspace(line[end - 1]) {
476            end -= 1;
477        }
478        return line[..end].to_vec();
479    }
480    if ignore.cr_at_eol {
481        // Drop a `\r` directly before a terminating `\n`.
482        if let Some(stripped) = line.strip_suffix(b"\n") {
483            if let Some(without_cr) = stripped.strip_suffix(b"\r") {
484                let mut out = without_cr.to_vec();
485                out.push(b'\n');
486                return out;
487            }
488        } else if let Some(without_cr) = line.strip_suffix(b"\r") {
489            // Incomplete final line: a bare trailing `\r` is also ignored.
490            return without_cr.to_vec();
491        }
492        return line.to_vec();
493    }
494    line.to_vec()
495}
496
497/// `xdl_blankline`: a line is "blank" when, after applying the active
498/// whitespace flags, it has no content. With no whitespace flags, git treats a
499/// record of size ≤ 1 (empty, or a lone `\n`) as blank; with flags, a line all
500/// of whose bytes are whitespace is blank.
501fn line_is_blank(line: &[u8], ignore: WsIgnore) -> bool {
502    if ignore.is_empty() {
503        line.len() <= 1
504    } else {
505        line.iter().all(|&c| xdl_isspace(c))
506    }
507}
508
509/// Compute a line-level edit script transforming `old` into `new`, comparing
510/// lines under the whitespace-ignore flags `ignore` while the returned ops
511/// still index the *original* lines position-for-position.
512///
513/// When `ignore.is_empty()`, this is identical to [`myers_diff_lines`]. With
514/// flags, lines are canonicalized (see [`canonicalize_line`]) for the equality
515/// test only; the ops consume the same number of old/new lines as the originals
516/// so the caller can render the original bytes.
517pub fn myers_diff_lines_ws(
518    old: &[DiffLine<'_>],
519    new: &[DiffLine<'_>],
520    ignore: WsIgnore,
521    algorithm: DiffAlgorithm,
522) -> Vec<DiffOp> {
523    if ignore.is_empty() {
524        return diff_lines_with_algorithm(old, new, algorithm);
525    }
526    let old_canon: Vec<Vec<u8>> = old
527        .iter()
528        .map(|l| canonicalize_line(l.content, ignore))
529        .collect();
530    let new_canon: Vec<Vec<u8>> = new
531        .iter()
532        .map(|l| canonicalize_line(l.content, ignore))
533        .collect();
534    let old_lines: Vec<DiffLine<'_>> = old_canon
535        .iter()
536        .map(|c| DiffLine {
537            content: c.as_slice(),
538            has_newline: true,
539        })
540        .collect();
541    let new_lines: Vec<DiffLine<'_>> = new_canon
542        .iter()
543        .map(|c| DiffLine {
544            content: c.as_slice(),
545            has_newline: true,
546        })
547        .collect();
548    diff_lines_with_algorithm(&old_lines, &new_lines, algorithm)
549}
550
551// ===========================================================================
552// Alternative diff algorithms: patience and histogram.
553//
554// Both share the recursive "anchor and recurse" shape used by git's xdiff
555// implementations of `--patience` and `--histogram`:
556//
557//   1. trim the common prefix and suffix of the current line range,
558//   2. pick one or more common lines that are confidently aligned (the
559//      "anchors") according to the algorithm's rule,
560//   3. recurse on the gaps to the left of, between, and to the right of the
561//      anchors,
562//   4. when no anchor can be found, fall back to the Myers shortest-edit-script
563//      search for that range so the result is still a valid LCS-correct diff.
564//
565// They operate purely on slices of [`DiffLine`]s and emit the same coalesced
566// [`DiffOp`] run sequence as [`myers_diff_lines`], so any caller can swap
567// algorithms freely. The two functions differ only in the anchor-selection
568// rule in steps 2/3.
569// ===========================================================================
570
571/// A hashable key for a line, used to bucket equal lines when finding anchors.
572///
573/// Mirrors [`DiffLine`]'s `PartialEq`: two lines are the same iff their bytes
574/// and their trailing-newline flag match. Keying on this tuple lets us hash
575/// lines without changing the public [`DiffLine`] type.
576type LineKey<'a> = (&'a [u8], bool);
577
578#[inline]
579fn line_key<'a>(line: &DiffLine<'a>) -> LineKey<'a> {
580    (line.content, line.has_newline)
581}
582
583/// Compute a line-level edit script transforming `old` into `new` using the
584/// patience diff algorithm (Bram Cohen's algorithm, as in `git diff
585/// --patience`).
586///
587/// Patience diff anchors on lines that occur *exactly once* in both `old` and
588/// `new`; it aligns those unique lines via a longest-increasing-subsequence
589/// ("patience sorting") pass and recurses into the gaps, falling back to Myers
590/// when a gap has no unique common line. The result is a valid LCS-correct edit
591/// script with the same shape as [`myers_diff_lines`]: walking it reconstructs
592/// `new` from `old`, and every [`DiffOp::Equal`] run covers genuinely equal
593/// lines. Patience tends to produce more human-readable hunks than Myers when
594/// blocks of lines are moved or repeated, though it is not guaranteed to be a
595/// shortest edit script.
596pub fn patience_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
597    patience_diff_lines_anchored(old, new, &[])
598}
599
600/// As [`patience_diff_lines`], but pins lines whose content has any of `anchors`
601/// as a byte prefix into the common subsequence (git's `--anchored=<text>`).
602///
603/// Mirrors xdiff's `xpatience.c`: an anchor line that is unique in both ranges is
604/// forced to remain aligned (so *other* lines are moved instead), taken greedily
605/// in old-side order; an anchor that would break the increasing order with an
606/// already-pinned anchor is dropped. Anchors that are non-unique or absent have
607/// no effect, exactly as in git. With `anchors` empty this is plain patience.
608pub fn patience_diff_lines_anchored(
609    old: &[DiffLine<'_>],
610    new: &[DiffLine<'_>],
611    anchors: &[Vec<u8>],
612) -> Vec<DiffOp> {
613    let mut ops: Vec<DiffOp> = Vec::new();
614    patience_recurse(old, new, 0, old.len(), 0, new.len(), anchors, &mut ops);
615    coalesce_ops(ops)
616}
617
618/// Compute a line-level edit script transforming `old` into `new` using the
619/// histogram diff algorithm (as in `git diff --histogram`, derived from JGit).
620///
621/// Histogram diff is a patience-style unique-anchor algorithm with a fallback:
622/// it builds an occurrence histogram of `old` and, scanning `new`, picks the
623/// longest run of matching lines whose `old` line has the *fewest* occurrences
624/// (preferring truly unique lines, like patience, but still able to anchor on
625/// low-frequency lines when no globally-unique line exists). It then recurses
626/// on the regions on either side of that run, falling back to Myers only when
627/// no common line exists in a region. The result is a valid LCS-correct edit
628/// script with the same shape as [`myers_diff_lines`].
629pub fn histogram_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
630    let mut ops: Vec<DiffOp> = Vec::new();
631    histogram_recurse(old, new, 0, old.len(), 0, new.len(), &mut ops);
632    coalesce_ops(ops)
633}
634
635/// Dispatch to the line-diff implementation selected by `algorithm`.
636///
637/// All variants return the same coalesced [`DiffOp`] run sequence as
638/// [`myers_diff_lines`], so callers can switch algorithms without changing how
639/// they consume the result.
640///
641/// - [`DiffAlgorithm::Myers`] and [`DiffAlgorithm::Minimal`] use the Myers
642///   O(ND) shortest-edit-script search ([`myers_diff_lines`]); that search is
643///   already minimal in deletions + insertions, so `Minimal` is an alias for
644///   it here rather than a distinct slower mode.
645/// - [`DiffAlgorithm::Patience`] uses [`patience_diff_lines`].
646/// - [`DiffAlgorithm::Histogram`] uses [`histogram_diff_lines`].
647pub fn diff_lines_with_algorithm(
648    old: &[DiffLine<'_>],
649    new: &[DiffLine<'_>],
650    algorithm: DiffAlgorithm,
651) -> Vec<DiffOp> {
652    match algorithm {
653        DiffAlgorithm::Myers | DiffAlgorithm::Minimal => myers_diff_lines(old, new),
654        DiffAlgorithm::Patience => patience_diff_lines(old, new),
655        DiffAlgorithm::Histogram => histogram_diff_lines(old, new),
656    }
657}
658
659/// Emit ops for an empty-on-one-side range; returns `true` if it handled it.
660///
661/// Covers the recursion base cases where one side of `old[a0..a1]` /
662/// `new[b0..b1]` is empty: a pure deletion, a pure insertion, or nothing at
663/// all. Used by both the patience and histogram recursions before they look
664/// for an anchor.
665fn emit_trivial_range(a0: usize, a1: usize, b0: usize, b1: usize, out: &mut Vec<DiffOp>) -> bool {
666    let old_len = a1 - a0;
667    let new_len = b1 - b0;
668    if old_len == 0 && new_len == 0 {
669        return true;
670    }
671    if old_len == 0 {
672        out.push(DiffOp::Insert(new_len));
673        return true;
674    }
675    if new_len == 0 {
676        out.push(DiffOp::Delete(old_len));
677        return true;
678    }
679    false
680}
681
682/// Trim the common prefix/suffix of `old[a0..a1]` vs `new[b0..b1]`.
683///
684/// Emits an `Equal` for the matched prefix immediately, returns the inner
685/// (still-differing) range, and reports the matched-suffix length so the caller
686/// can emit its `Equal` *after* it has processed the inner range. This keeps
687/// the per-range work proportional to the actual edit, mirroring the prefix /
688/// suffix trim in [`myers_diff_lines`].
689fn trim_common(
690    old: &[DiffLine<'_>],
691    new: &[DiffLine<'_>],
692    mut a0: usize,
693    mut a1: usize,
694    mut b0: usize,
695    mut b1: usize,
696    out: &mut Vec<DiffOp>,
697) -> (usize, usize, usize, usize, usize) {
698    let mut prefix = 0usize;
699    while a0 < a1 && b0 < b1 && old[a0] == new[b0] {
700        a0 += 1;
701        b0 += 1;
702        prefix += 1;
703    }
704    if prefix > 0 {
705        out.push(DiffOp::Equal(prefix));
706    }
707    let mut suffix = 0usize;
708    while a1 > a0 && b1 > b0 && old[a1 - 1] == new[b1 - 1] {
709        a1 -= 1;
710        b1 -= 1;
711        suffix += 1;
712    }
713    (a0, a1, b0, b1, suffix)
714}
715
716/// Recursive patience-diff worker over `old[a0..a1]` vs `new[b0..b1]`.
717///
718/// `anchors` carries the `--anchored=<text>` prefixes (empty for plain
719/// patience); they are re-evaluated at every recursion level, since a line that
720/// is non-unique in the whole file can become unique within a sub-range.
721#[allow(clippy::too_many_arguments)]
722fn patience_recurse(
723    old: &[DiffLine<'_>],
724    new: &[DiffLine<'_>],
725    a0: usize,
726    a1: usize,
727    b0: usize,
728    b1: usize,
729    anchors: &[Vec<u8>],
730    out: &mut Vec<DiffOp>,
731) {
732    if emit_trivial_range(a0, a1, b0, b1, out) {
733        return;
734    }
735    let (a0, a1, b0, b1, suffix) = trim_common(old, new, a0, a1, b0, b1, out);
736    if !emit_trivial_range(a0, a1, b0, b1, out) {
737        match patience_anchors(old, new, a0, a1, b0, b1, anchors) {
738            Some(aligned) => {
739                // Walk the aligned anchors in order, recursing into each gap
740                // before emitting the anchor line as Equal.
741                let mut cur_a = a0;
742                let mut cur_b = b0;
743                for (ai, bi) in aligned {
744                    patience_recurse(old, new, cur_a, ai, cur_b, bi, anchors, out);
745                    out.push(DiffOp::Equal(1));
746                    cur_a = ai + 1;
747                    cur_b = bi + 1;
748                }
749                // Tail after the last anchor.
750                patience_recurse(old, new, cur_a, a1, cur_b, b1, anchors, out);
751            }
752            // No unique common line in this range: defer to Myers, which always
753            // yields a valid (and minimal) script for the leftover block.
754            None => myers_core(&old[a0..a1], &new[b0..b1], out),
755        }
756    }
757    if suffix > 0 {
758        out.push(DiffOp::Equal(suffix));
759    }
760}
761
762/// Find the patience anchors for `old[a0..a1]` vs `new[b0..b1]`.
763///
764/// An anchor is a line that occurs exactly once in `old[a0..a1]` and exactly
765/// once in `new[b0..b1]`. The matched (old_index, new_index) pairs are reduced
766/// to their longest increasing subsequence by new-index (the patience-sort LCS)
767/// so the returned anchors are strictly increasing in *both* indices and can be
768/// used as split points. Returns `None` when there are no such unique common
769/// lines (the caller then falls back to Myers).
770fn patience_anchors(
771    old: &[DiffLine<'_>],
772    new: &[DiffLine<'_>],
773    a0: usize,
774    a1: usize,
775    b0: usize,
776    b1: usize,
777    anchors: &[Vec<u8>],
778) -> Option<Vec<(usize, usize)>> {
779    // Count occurrences and remember the (single) position of each line in each
780    // side's range. `count > 1` poisons the position so we can ignore it.
781    struct Occ {
782        count: usize,
783        pos: usize,
784    }
785    let mut in_old: HashMap<LineKey<'_>, Occ> = HashMap::new();
786    for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
787        in_old
788            .entry(line_key(line))
789            .and_modify(|o| o.count += 1)
790            .or_insert(Occ { count: 1, pos: i });
791    }
792    let mut in_new: HashMap<LineKey<'_>, Occ> = HashMap::new();
793    for (j, line) in new.iter().enumerate().take(b1).skip(b0) {
794        in_new
795            .entry(line_key(line))
796            .and_modify(|o| o.count += 1)
797            .or_insert(Occ { count: 1, pos: j });
798    }
799
800    // Collect lines unique in both, ordered by their position in `old`.
801    let mut pairs: Vec<(usize, usize)> = Vec::new();
802    for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
803        let key = line_key(line);
804        let Some(o) = in_old.get(&key) else { continue };
805        if o.count != 1 || o.pos != i {
806            continue;
807        }
808        // A line unique in both ranges is a candidate anchor.
809        if let Some(n) = in_new.get(&key)
810            && n.count == 1
811        {
812            pairs.push((i, n.pos));
813        }
814    }
815    if pairs.is_empty() {
816        return None;
817    }
818
819    // Patience sort: longest increasing subsequence of new-indices. `pairs` is
820    // already sorted by old-index, so an LIS by new-index yields a set of
821    // anchors increasing in both coordinates. With `--anchored` text(s) present,
822    // pin the matching (unique-in-both) lines into the subsequence instead.
823    let lis = if anchors.is_empty() {
824        longest_increasing_by_new(&pairs)
825    } else {
826        let is_anchor: Vec<bool> = pairs
827            .iter()
828            .map(|&(_, nj)| line_matches_anchor(new[nj].content, anchors))
829            .collect();
830        longest_increasing_by_new_anchored(&pairs, &is_anchor)
831    };
832    if lis.is_empty() { None } else { Some(lis) }
833}
834
835/// Whether `line` begins with any of the `--anchored` prefixes (git's
836/// `is_anchor`: a byte-prefix `strncmp` against the line's content, trailing
837/// newline included). An empty anchor prefix matches every line, matching git.
838fn line_matches_anchor(line: &[u8], anchors: &[Vec<u8>]) -> bool {
839    anchors.iter().any(|anchor| line.starts_with(anchor))
840}
841
842/// Longest increasing subsequence of `pairs` (sorted by old-index) keyed on the
843/// new-index, returned as the chosen (old_index, new_index) pairs in order.
844///
845/// This is the patience-sorting core: standard O(k log k) LIS with predecessor
846/// links so the actual subsequence (not just its length) is recovered. Because
847/// the input is pre-sorted by old-index and the new-indices are distinct, the
848/// result is strictly increasing in both coordinates.
849fn longest_increasing_by_new(pairs: &[(usize, usize)]) -> Vec<(usize, usize)> {
850    if pairs.is_empty() {
851        return Vec::new();
852    }
853    // tails[len-1] = index into `pairs` of the smallest possible tail value of
854    // an increasing subsequence of length `len`.
855    let mut tails: Vec<usize> = Vec::new();
856    // prev[i] = index into `pairs` of the predecessor of pairs[i] in its LIS.
857    let mut prev: Vec<Option<usize>> = vec![None; pairs.len()];
858
859    for i in 0..pairs.len() {
860        let val = pairs[i].1;
861        // Binary search for the first tail whose new-index is >= val.
862        let mut lo = 0usize;
863        let mut hi = tails.len();
864        while lo < hi {
865            let mid = lo + (hi - lo) / 2;
866            if pairs[tails[mid]].1 < val {
867                lo = mid + 1;
868            } else {
869                hi = mid;
870            }
871        }
872        if lo > 0 {
873            prev[i] = Some(tails[lo - 1]);
874        }
875        if lo == tails.len() {
876            tails.push(i);
877        } else {
878            tails[lo] = i;
879        }
880    }
881
882    // Reconstruct by following predecessor links from the last tail.
883    let mut result: Vec<(usize, usize)> = Vec::with_capacity(tails.len());
884    let mut cur = tails.last().copied();
885    while let Some(i) = cur {
886        result.push(pairs[i]);
887        cur = prev[i];
888    }
889    result.reverse();
890    result
891}
892
893/// Longest increasing subsequence of `pairs` (sorted by old-index, keyed on the
894/// new-index) that is *forced* to pass through every includible anchor.
895///
896/// A direct port of git's anchored `find_longest_common_sequence`
897/// (xdiff/xpatience.c): entries are processed in old-index order and placed into
898/// the patience-sort `sequence` by their new-index. When an anchor entry
899/// (`is_anchor[i]`) is placed at position `k`, `anchor_i` is pinned to `k` and
900/// the running length is forced to `k + 1`; thereafter positions `<= anchor_i`
901/// can never be overridden, so the result must contain that anchor. A later
902/// anchor whose placement would fall at or before `anchor_i` is skipped, exactly
903/// matching git's greedy handling of mutually-incompatible anchors.
904fn longest_increasing_by_new_anchored(
905    pairs: &[(usize, usize)],
906    is_anchor: &[bool],
907) -> Vec<(usize, usize)> {
908    if pairs.is_empty() {
909        return Vec::new();
910    }
911    // sequence[k] = index into `pairs` of the smallest-new-index tail of an
912    // increasing subsequence of length k+1; `prev` links to the predecessor.
913    let mut sequence: Vec<usize> = Vec::with_capacity(pairs.len());
914    let mut prev: Vec<Option<usize>> = vec![None; pairs.len()];
915    let mut longest: usize = 0;
916    let mut anchor_i: isize = -1;
917    for (e, &(_, val)) in pairs.iter().enumerate() {
918        // i = largest position in sequence[0..longest] whose new-index < val,
919        // or -1 if none (git's fast-path + `binary_search`).
920        let i: isize = if longest == 0 || val > pairs[sequence[longest - 1]].1 {
921            longest as isize - 1
922        } else {
923            let mut lo = 0usize;
924            let mut hi = longest;
925            while lo < hi {
926                let mid = lo + (hi - lo) / 2;
927                if pairs[sequence[mid]].1 < val {
928                    lo = mid + 1;
929                } else {
930                    hi = mid;
931                }
932            }
933            lo as isize - 1
934        };
935        prev[e] = if i < 0 {
936            None
937        } else {
938            Some(sequence[i as usize])
939        };
940        let pos = (i + 1) as usize;
941        if (pos as isize) <= anchor_i {
942            continue;
943        }
944        if pos == sequence.len() {
945            sequence.push(e);
946        } else {
947            sequence[pos] = e;
948        }
949        if is_anchor[e] {
950            anchor_i = pos as isize;
951            longest = pos + 1;
952        } else if pos == longest {
953            longest += 1;
954        }
955    }
956    if longest == 0 {
957        return Vec::new();
958    }
959    let mut result: Vec<(usize, usize)> = Vec::with_capacity(longest);
960    let mut cur = Some(sequence[longest - 1]);
961    while let Some(i) = cur {
962        result.push(pairs[i]);
963        cur = prev[i];
964    }
965    result.reverse();
966    result
967}
968
969/// Recursive histogram-diff worker over `old[a0..a1]` vs `new[b0..b1]`.
970fn histogram_recurse(
971    old: &[DiffLine<'_>],
972    new: &[DiffLine<'_>],
973    a0: usize,
974    a1: usize,
975    b0: usize,
976    b1: usize,
977    out: &mut Vec<DiffOp>,
978) {
979    if emit_trivial_range(a0, a1, b0, b1, out) {
980        return;
981    }
982    let (a0, a1, b0, b1, suffix) = trim_common(old, new, a0, a1, b0, b1, out);
983    if !emit_trivial_range(a0, a1, b0, b1, out) {
984        match histogram_region(old, new, a0, a1, b0, b1) {
985            Some(region) => {
986                // Recurse left of the matched run, emit the run as Equal, then
987                // recurse right of it.
988                histogram_recurse(old, new, a0, region.old_start, b0, region.new_start, out);
989                out.push(DiffOp::Equal(region.len));
990                histogram_recurse(
991                    old,
992                    new,
993                    region.old_start + region.len,
994                    a1,
995                    region.new_start + region.len,
996                    b1,
997                    out,
998                );
999            }
1000            // No common line at all in this range: hand it to Myers.
1001            None => myers_core(&old[a0..a1], &new[b0..b1], out),
1002        }
1003    }
1004    if suffix > 0 {
1005        out.push(DiffOp::Equal(suffix));
1006    }
1007}
1008
1009/// The longest common run chosen by the histogram heuristic for one range.
1010struct HistogramRegion {
1011    old_start: usize,
1012    new_start: usize,
1013    len: usize,
1014}
1015
1016/// Choose the histogram anchor run for `old[a0..a1]` vs `new[b0..b1]`.
1017///
1018/// Builds an occurrence histogram of the `old` range, then scans the `new`
1019/// range. For each `new` line that also appears in `old`, it extends a matching
1020/// run backward and forward and scores candidate alignments, preferring the run
1021/// whose anchoring `old` line has the *fewest* occurrences (ties broken by run
1022/// length, then by earliest position). This is the JGit/`git --histogram`
1023/// heuristic: rare lines make the most reliable anchors. Returns `None` if no
1024/// `new` line appears in the `old` range.
1025fn histogram_region(
1026    old: &[DiffLine<'_>],
1027    new: &[DiffLine<'_>],
1028    a0: usize,
1029    a1: usize,
1030    b0: usize,
1031    b1: usize,
1032) -> Option<HistogramRegion> {
1033    // Occurrence count and the list of positions of each line within old[a0..a1].
1034    let mut buckets: HashMap<LineKey<'_>, Vec<usize>> = HashMap::new();
1035    for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
1036        buckets.entry(line_key(line)).or_default().push(i);
1037    }
1038
1039    let mut best: Option<HistogramRegion> = None;
1040    // Lower occurrence count is better; among equal counts, longer run wins.
1041    let mut best_count = usize::MAX;
1042    let mut best_len = 0usize;
1043
1044    let mut bj = b0;
1045    while bj < b1 {
1046        let key = line_key(&new[bj]);
1047        let Some(positions) = buckets.get(&key) else {
1048            bj += 1;
1049            continue;
1050        };
1051        let occ = positions.len();
1052        // For every place this line sits in `old`, measure the maximal matching
1053        // run that passes through (positions[*], bj).
1054        let mut next_bj = bj + 1;
1055        for &ai in positions {
1056            // Extend backward while lines keep matching and we stay in range.
1057            let mut start_a = ai;
1058            let mut start_b = bj;
1059            while start_a > a0 && start_b > b0 && old[start_a - 1] == new[start_b - 1] {
1060                start_a -= 1;
1061                start_b -= 1;
1062            }
1063            // Extend forward from the run start.
1064            let mut len = 0usize;
1065            while start_a + len < a1
1066                && start_b + len < b1
1067                && old[start_a + len] == new[start_b + len]
1068            {
1069                len += 1;
1070            }
1071            // Score this run by the rarest occurrence count along it; using the
1072            // anchor line's own count is the standard, cheaper approximation.
1073            let run_count = occ;
1074            let better = run_count < best_count || (run_count == best_count && len > best_len);
1075            if better && len > 0 {
1076                best_count = run_count;
1077                best_len = len;
1078                best = Some(HistogramRegion {
1079                    old_start: start_a,
1080                    new_start: start_b,
1081                    len,
1082                });
1083                // Skip past this matched run in `new` so we do not re-evaluate
1084                // every interior line of the same run from scratch.
1085                if start_b + len > next_bj {
1086                    next_bj = start_b + len;
1087                }
1088            }
1089        }
1090        bj = next_bj.max(bj + 1);
1091    }
1092
1093    best
1094}
1095
1096/// Which conflict-marker style [`merge_blobs`] emits.
1097#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
1098pub enum ConflictStyle {
1099    /// Standard two-section markers (`<<<<<<<` / `=======` / `>>>>>>>`).
1100    #[default]
1101    Merge,
1102    /// `diff3` style: also include the common-ancestor section between `ours`
1103    /// and the `=======` divider, delimited by `|||||||`.
1104    Diff3,
1105}
1106
1107/// Labels and style controlling [`merge_blobs`] conflict markers.
1108#[derive(Debug, Clone, Copy)]
1109pub struct MergeBlobOptions<'a> {
1110    /// Label after the opening `<<<<<<<` marker (typically the local branch).
1111    pub ours_label: &'a str,
1112    /// Label after the closing `>>>>>>>` marker (typically the other branch).
1113    pub theirs_label: &'a str,
1114    /// Label after the `|||||||` marker (only used for [`ConflictStyle::Diff3`]).
1115    pub base_label: &'a str,
1116    /// Which marker style to emit.
1117    pub style: ConflictStyle,
1118    /// How to resolve a textual conflict. [`MergeFavor::Union`] keeps both sides'
1119    /// lines with no markers (and a non-conflicted result); other values leave
1120    /// markers (favouring ours/theirs is applied by the caller at the file level).
1121    pub favor: MergeFavor,
1122    /// Whitespace-insensitivity for the 3-way line matching, mirroring
1123    /// `-Xignore-space-change`/`-Xignore-all-space`/`-Xignore-space-at-eol` (git's
1124    /// `ll_opts.xdl_opts`). When non-empty, regions that differ only by ignored
1125    /// whitespace are not conflicts, and unchanged spans emit ours' actual bytes
1126    /// (xdl_merge copies the common parts from file1). Empty (the default) is the
1127    /// exact, byte-for-byte merge.
1128    pub ws_ignore: WsIgnore,
1129}
1130
1131impl Default for MergeBlobOptions<'_> {
1132    fn default() -> Self {
1133        Self {
1134            ours_label: "ours",
1135            theirs_label: "theirs",
1136            base_label: "base",
1137            style: ConflictStyle::Merge,
1138            favor: MergeFavor::None,
1139            ws_ignore: WsIgnore::EMPTY,
1140        }
1141    }
1142}
1143
1144/// The outcome of a 3-way blob merge.
1145#[derive(Debug, Clone, PartialEq, Eq)]
1146pub struct MergeBlobResult {
1147    /// The merged blob bytes, including any conflict markers.
1148    pub content: Vec<u8>,
1149    /// True when at least one region conflicted and markers were written.
1150    pub conflicted: bool,
1151}
1152
1153/// Perform a 3-way merge of three blobs using the diff3 algorithm.
1154///
1155/// `base` is the common ancestor; `ours` and `theirs` are the two sides. The
1156/// merge diffs base→ours and base→theirs (with [`myers_diff_lines`]) and walks
1157/// the base in lockstep:
1158/// - regions unchanged on both sides emit the base lines unchanged;
1159/// - regions changed on exactly one side take that side's lines;
1160/// - regions changed on both sides emit the side lines if they are
1161///   byte-identical, otherwise a conflict (and [`MergeBlobResult::conflicted`]
1162///   is set).
1163///
1164/// An empty `base` is supported: every line is then "added on both sides", so
1165/// the result is the shared content if `ours == theirs`, else a single
1166/// conflict (add/add).
1167pub fn merge_blobs(
1168    base: &[u8],
1169    ours: &[u8],
1170    theirs: &[u8],
1171    options: &MergeBlobOptions<'_>,
1172) -> MergeBlobResult {
1173    let base_lines = split_lines(base);
1174    let ours_lines = split_lines(ours);
1175    let theirs_lines = split_lines(theirs);
1176
1177    // Per-side matched (equal) base regions, paired with the corresponding side
1178    // ranges, computed via Myers. Under `ws_ignore`, lines that differ only by
1179    // ignored whitespace match, so whitespace-only changes are absorbed into the
1180    // stable spans rather than surfacing as conflicts.
1181    let ours_matches = matching_regions(&base_lines, &ours_lines, options.ws_ignore);
1182    let theirs_matches = matching_regions(&base_lines, &theirs_lines, options.ws_ignore);
1183
1184    // Intersect the two match lists to get segments of base that are unchanged
1185    // on BOTH sides, each carrying the exact aligned side indices. Between these
1186    // common-stable segments lie the (potentially conflicting) changed regions.
1187    let stable = common_stable_segments(&ours_matches, &theirs_matches);
1188
1189    let mut writer = MergeWriter::new(options);
1190    // Cursors: next unconsumed line in base, ours, theirs.
1191    let mut base_idx = 0usize;
1192    let mut our_idx = 0usize;
1193    let mut their_idx = 0usize;
1194
1195    for seg in &stable {
1196        // Unstable (changed) region preceding this stable segment.
1197        let base_region = &base_lines[base_idx..seg.base_start];
1198        let our_region = &ours_lines[our_idx..seg.ours_start];
1199        let their_region = &theirs_lines[their_idx..seg.theirs_start];
1200        emit_region(
1201            &mut writer,
1202            base_region,
1203            our_region,
1204            their_region,
1205            options.ws_ignore,
1206        );
1207
1208        // The stable segment matched on both sides. Emit ours' actual bytes
1209        // (xdl_merge copies common spans from file1): identical to base under an
1210        // exact match, and ours' whitespace under `ws_ignore`.
1211        writer.emit_lines(&ours_lines[seg.ours_start..seg.ours_start + seg.len]);
1212
1213        base_idx = seg.base_start + seg.len;
1214        our_idx = seg.ours_start + seg.len;
1215        their_idx = seg.theirs_start + seg.len;
1216    }
1217
1218    // Trailing unstable region after the last stable segment (or the whole input
1219    // when there are no common-stable segments).
1220    emit_region(
1221        &mut writer,
1222        &base_lines[base_idx..],
1223        &ours_lines[our_idx..],
1224        &theirs_lines[their_idx..],
1225        options.ws_ignore,
1226    );
1227
1228    writer.finish()
1229}
1230
1231/// Resolve and emit one changed region (the gap between two common-stable
1232/// segments) according to diff3 rules.
1233fn emit_region(
1234    writer: &mut MergeWriter<'_>,
1235    base_region: &[DiffLine<'_>],
1236    our_region: &[DiffLine<'_>],
1237    their_region: &[DiffLine<'_>],
1238    ws_ignore: WsIgnore,
1239) {
1240    if our_region.is_empty() && their_region.is_empty() {
1241        return;
1242    }
1243    // Under `ws_ignore`, "changed" means changed beyond ignored whitespace; with
1244    // the empty default the comparison is exact byte equality.
1245    let our_changed = !regions_match(our_region, base_region, ws_ignore);
1246    let their_changed = !regions_match(their_region, base_region, ws_ignore);
1247    match (our_changed, their_changed) {
1248        (false, false) => writer.emit_lines(our_region),
1249        (true, false) => writer.emit_lines(our_region),
1250        (false, true) => writer.emit_lines(their_region),
1251        (true, true) => {
1252            if regions_match(our_region, their_region, ws_ignore) {
1253                // Both sides made the same change (up to ignored whitespace): no
1254                // conflict. xdl_merge keeps ours' bytes.
1255                writer.emit_lines(our_region);
1256            } else {
1257                writer.emit_conflict_refined(our_region, base_region, their_region);
1258            }
1259        }
1260    }
1261}
1262
1263/// Whether two line slices are equal, exactly when `ws_ignore` is empty and up to
1264/// the active whitespace-ignore canonicalization otherwise.
1265fn regions_match(a: &[DiffLine<'_>], b: &[DiffLine<'_>], ws_ignore: WsIgnore) -> bool {
1266    if ws_ignore.is_empty() {
1267        return a == b;
1268    }
1269    a.len() == b.len()
1270        && a.iter().zip(b).all(|(x, y)| {
1271            canonicalize_line(x.content, ws_ignore) == canonicalize_line(y.content, ws_ignore)
1272        })
1273}
1274
1275/// One unit produced by zealous conflict refinement: either context lines shared
1276/// by both sides (emitted verbatim) or a minimal conflict spanning the named
1277/// ours/theirs line ranges.
1278enum RefineItem {
1279    Context(std::ops::Range<usize>),
1280    Conflict(std::ops::Range<usize>, std::ops::Range<usize>),
1281}
1282
1283/// git's `xdl_refine_conflicts` + `xdl_simplify_non_conflicts` (level
1284/// `XDL_MERGE_ZEALOUS`): re-diff the two conflicting sides against each other,
1285/// factor the lines they share out of the conflict as context, and split the
1286/// remainder into the minimal set of conflicting hunks — then re-merge any two
1287/// conflicts separated by 3 or fewer context lines (the smaller-output rule).
1288///
1289/// Ranges index into `ours`/`theirs`; `Context` ranges are in ours coordinates
1290/// (the shared lines are identical on both sides).
1291fn refine_conflict_items(ours: &[DiffLine<'_>], theirs: &[DiffLine<'_>]) -> Vec<RefineItem> {
1292    // Coalesce the ours-vs-theirs diff into alternating context (equal) and
1293    // conflict (changed) runs.
1294    let ops = myers_diff_lines(ours, theirs);
1295    let mut raw: Vec<RefineItem> = Vec::new();
1296    let mut oi = 0usize;
1297    let mut ti = 0usize;
1298    let mut pending: Option<(usize, usize, usize, usize)> = None; // o0,o1,t0,t1
1299    for op in ops {
1300        match op {
1301            DiffOp::Equal(n) => {
1302                if let Some((o0, o1, t0, t1)) = pending.take() {
1303                    raw.push(RefineItem::Conflict(o0..o1, t0..t1));
1304                }
1305                raw.push(RefineItem::Context(oi..oi + n));
1306                oi += n;
1307                ti += n;
1308            }
1309            DiffOp::Delete(n) => {
1310                let entry = pending.get_or_insert((oi, oi, ti, ti));
1311                entry.1 = oi + n;
1312                oi += n;
1313            }
1314            DiffOp::Insert(n) => {
1315                let entry = pending.get_or_insert((oi, oi, ti, ti));
1316                entry.3 = ti + n;
1317                ti += n;
1318            }
1319        }
1320    }
1321    if let Some((o0, o1, t0, t1)) = pending.take() {
1322        raw.push(RefineItem::Conflict(o0..o1, t0..t1));
1323    }
1324
1325    // Merge two conflicts when the context between them is <= 3 lines: the
1326    // absorbed context lines are identical on both sides, so they fold into the
1327    // combined conflict's ours and theirs ranges alike.
1328    let mut out: Vec<RefineItem> = Vec::new();
1329    let mut idx = 0usize;
1330    while idx < raw.len() {
1331        match &raw[idx] {
1332            RefineItem::Context(range) => {
1333                let small = range.len() <= 3;
1334                let prev_conflict = matches!(out.last(), Some(RefineItem::Conflict(..)));
1335                let next_conflict = matches!(raw.get(idx + 1), Some(RefineItem::Conflict(..)));
1336                if small && prev_conflict && next_conflict {
1337                    let Some(RefineItem::Conflict(po, pt)) = out.pop() else {
1338                        unreachable!()
1339                    };
1340                    let RefineItem::Conflict(no, nt) = &raw[idx + 1] else {
1341                        unreachable!()
1342                    };
1343                    out.push(RefineItem::Conflict(po.start..no.end, pt.start..nt.end));
1344                    idx += 2;
1345                } else {
1346                    out.push(RefineItem::Context(range.clone()));
1347                    idx += 1;
1348                }
1349            }
1350            RefineItem::Conflict(o, t) => {
1351                out.push(RefineItem::Conflict(o.clone(), t.clone()));
1352                idx += 1;
1353            }
1354        }
1355    }
1356    out
1357}
1358
1359/// A matched (equal) region between `base` and one side: `base_start..+len`
1360/// lines of base equal `side_start..+len` lines of that side.
1361#[derive(Debug, Clone, Copy)]
1362struct MatchRegion {
1363    base_start: usize,
1364    side_start: usize,
1365    len: usize,
1366}
1367
1368/// A run of base lines unchanged on *both* sides, with the aligned side starts.
1369#[derive(Debug, Clone, Copy)]
1370struct StableSegment {
1371    base_start: usize,
1372    ours_start: usize,
1373    theirs_start: usize,
1374    len: usize,
1375}
1376
1377/// Compute the matched regions between base and a side using [`myers_diff_lines`].
1378///
1379/// Each `Equal(n)` run becomes a [`MatchRegion`]; the regions are returned in
1380/// increasing base order. (Equal runs are coalesced by the diff, so adjacent
1381/// regions are already maximal.)
1382fn matching_regions(
1383    base: &[DiffLine<'_>],
1384    side: &[DiffLine<'_>],
1385    ws_ignore: WsIgnore,
1386) -> Vec<MatchRegion> {
1387    let ops = if ws_ignore.is_empty() {
1388        myers_diff_lines(base, side)
1389    } else {
1390        // The 3-way content merge uses the Myers line diff (git's ll-merge xdl
1391        // default); the whitespace flags affect only the equality test.
1392        myers_diff_lines_ws(base, side, ws_ignore, DiffAlgorithm::Myers)
1393    };
1394    let mut regions = Vec::new();
1395    let mut base_idx = 0usize;
1396    let mut side_idx = 0usize;
1397    for op in ops {
1398        match op {
1399            DiffOp::Equal(n) => {
1400                regions.push(MatchRegion {
1401                    base_start: base_idx,
1402                    side_start: side_idx,
1403                    len: n,
1404                });
1405                base_idx += n;
1406                side_idx += n;
1407            }
1408            DiffOp::Delete(n) => base_idx += n,
1409            DiffOp::Insert(n) => side_idx += n,
1410        }
1411    }
1412    regions
1413}
1414
1415/// Intersect the ours/theirs match lists (both in base coordinates) to find the
1416/// base ranges unchanged on both sides, recording the aligned side indices.
1417///
1418/// For each overlapping pair of base ranges `[bs, be)` the ours-side index of
1419/// `bs` is `o.side_start + (bs - o.base_start)` and likewise for theirs; both
1420/// map contiguously across the overlap. The returned segments are in increasing
1421/// base order and never overlap.
1422fn common_stable_segments(ours: &[MatchRegion], theirs: &[MatchRegion]) -> Vec<StableSegment> {
1423    let mut segments = Vec::new();
1424    let mut oi = 0usize;
1425    let mut ti = 0usize;
1426    while oi < ours.len() && ti < theirs.len() {
1427        let o = ours[oi];
1428        let t = theirs[ti];
1429        let o_end = o.base_start + o.len;
1430        let t_end = t.base_start + t.len;
1431        let lo = o.base_start.max(t.base_start);
1432        let hi = o_end.min(t_end);
1433        if lo < hi {
1434            segments.push(StableSegment {
1435                base_start: lo,
1436                ours_start: o.side_start + (lo - o.base_start),
1437                theirs_start: t.side_start + (lo - t.base_start),
1438                len: hi - lo,
1439            });
1440        }
1441        // Advance whichever range ends first.
1442        if o_end <= t_end {
1443            oi += 1;
1444        } else {
1445            ti += 1;
1446        }
1447    }
1448    segments
1449}
1450
1451/// Accumulates merged output and renders conflict markers byte-for-byte like
1452/// upstream git.
1453struct MergeWriter<'a> {
1454    out: Vec<u8>,
1455    conflicted: bool,
1456    options: &'a MergeBlobOptions<'a>,
1457}
1458
1459impl<'a> MergeWriter<'a> {
1460    fn new(options: &'a MergeBlobOptions<'a>) -> Self {
1461        Self {
1462            out: Vec::new(),
1463            conflicted: false,
1464            options,
1465        }
1466    }
1467
1468    /// Append raw line bytes (each line already carries its own newline, except
1469    /// possibly a final no-newline line).
1470    fn emit_lines(&mut self, lines: &[DiffLine<'_>]) {
1471        for line in lines {
1472            self.out.extend_from_slice(line.content);
1473        }
1474    }
1475
1476    /// Emit a conflict hunk. Conflict markers always begin on their own line,
1477    /// so if the preceding emitted content did not end in a newline (a
1478    /// no-newline-at-end side), insert one first — matching git, which prints
1479    /// the "\ No newline at end of file" content followed by a newline before
1480    /// the next marker.
1481    fn emit_conflict(
1482        &mut self,
1483        ours: &[DiffLine<'_>],
1484        base: &[DiffLine<'_>],
1485        theirs: &[DiffLine<'_>],
1486    ) {
1487        // Union: keep both sides' lines (ours then theirs) with no markers, and do
1488        // NOT flag a conflict — git's `XDL_MERGE_FAVOR_UNION`.
1489        if self.options.favor == MergeFavor::Union {
1490            self.emit_section(ours);
1491            self.ensure_newline();
1492            self.emit_section(theirs);
1493            return;
1494        }
1495        self.conflicted = true;
1496        self.write_marker(b'<', self.options.ours_label);
1497        self.emit_section(ours);
1498        if self.options.style == ConflictStyle::Diff3 {
1499            self.ensure_newline();
1500            self.write_marker(b'|', self.options.base_label);
1501            self.emit_section(base);
1502        }
1503        self.ensure_newline();
1504        self.write_divider();
1505        self.emit_section(theirs);
1506        self.ensure_newline();
1507        self.write_marker(b'>', self.options.theirs_label);
1508    }
1509
1510    /// Emit a conflict with git's zealous refinement applied. The default
1511    /// (non-diff3) merge re-diffs the two sides to shrink the conflict to the
1512    /// lines that genuinely differ (`xdl_refine_conflicts`); diff3-style output
1513    /// keeps the conflict whole (the base section straddles it), a favored merge
1514    /// resolves at a coarser granularity, and an empty side cannot be refined —
1515    /// all three fall back to a single unrefined conflict hunk.
1516    fn emit_conflict_refined(
1517        &mut self,
1518        ours: &[DiffLine<'_>],
1519        base: &[DiffLine<'_>],
1520        theirs: &[DiffLine<'_>],
1521    ) {
1522        if self.options.style == ConflictStyle::Diff3
1523            || self.options.favor != MergeFavor::None
1524            || ours.is_empty()
1525            || theirs.is_empty()
1526        {
1527            self.emit_conflict(ours, base, theirs);
1528            return;
1529        }
1530        for item in refine_conflict_items(ours, theirs) {
1531            match item {
1532                RefineItem::Context(range) => self.emit_lines(&ours[range]),
1533                RefineItem::Conflict(o, t) => self.emit_conflict(&ours[o], &[], &theirs[t]),
1534            }
1535        }
1536    }
1537
1538    /// Emit one side's lines inside a conflict, preserving their exact bytes.
1539    fn emit_section(&mut self, lines: &[DiffLine<'_>]) {
1540        for line in lines {
1541            self.out.extend_from_slice(line.content);
1542        }
1543    }
1544
1545    /// Ensure the buffer ends with a newline before writing the next marker, so
1546    /// markers always start a fresh line even after a no-newline final line.
1547    fn ensure_newline(&mut self) {
1548        if !self.out.is_empty() && self.out.last() != Some(&b'\n') {
1549            self.out.push(b'\n');
1550        }
1551    }
1552
1553    /// Write a marker line: 7 copies of `ch`, then (if the label is non-empty)
1554    /// a space and the label, then a newline. No trailing space for an empty
1555    /// label — byte-for-byte with upstream git.
1556    fn write_marker(&mut self, ch: u8, label: &str) {
1557        for _ in 0..7 {
1558            self.out.push(ch);
1559        }
1560        if !label.is_empty() {
1561            self.out.push(b' ');
1562            self.out.extend_from_slice(label.as_bytes());
1563        }
1564        self.out.push(b'\n');
1565    }
1566
1567    /// Write the `=======` divider line (never labelled).
1568    fn write_divider(&mut self) {
1569        for _ in 0..7 {
1570            self.out.push(b'=');
1571        }
1572        self.out.push(b'\n');
1573    }
1574
1575    fn finish(self) -> MergeBlobResult {
1576        MergeBlobResult {
1577            content: self.out,
1578            conflicted: self.conflicted,
1579        }
1580    }
1581}
1582
1583#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1584pub enum DiffAlgorithm {
1585    Myers,
1586    Minimal,
1587    Patience,
1588    Histogram,
1589}
1590
1591#[derive(Debug, Clone, PartialEq, Eq)]
1592pub enum FileChange {
1593    Add { path: RepoPath },
1594    Delete { path: RepoPath },
1595    Modify { path: RepoPath },
1596    Rename { old: RepoPath, new: RepoPath },
1597    Copy { source: RepoPath, dest: RepoPath },
1598}
1599
1600#[derive(Debug, Clone, PartialEq, Eq)]
1601pub struct Conflict {
1602    pub path: RepoPath,
1603    pub ours: Vec<u8>,
1604    pub theirs: Vec<u8>,
1605}
1606
1607#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1608pub enum NameStatus {
1609    Added,
1610    Deleted,
1611    Modified,
1612    /// A path whose file type (`S_IFMT` bits of the mode) changed between the two
1613    /// sides — regular↔symlink, regular↔gitlink, symlink↔gitlink. git renders this
1614    /// as `T` (`DIFF_STATUS_TYPE_CHANGED`, set by `diffcore.h`'s
1615    /// `DIFF_PAIR_TYPE_CHANGED` before rename/modify resolution). An exec-bit-only
1616    /// change (100644↔100755) is NOT a typechange — same `S_IFMT`.
1617    TypeChanged,
1618    Renamed(u8),
1619    Copied(u8),
1620    /// An unmerged (conflicted) path: the index holds higher-stage entries.
1621    /// git emits a standalone `U <path>` pair (`diff_unmerge`) for it in
1622    /// addition to the regular worktree-vs-stage-2 modify.
1623    Unmerged,
1624}
1625
1626impl NameStatus {
1627    pub const fn code(self) -> char {
1628        match self {
1629            Self::Added => 'A',
1630            Self::Deleted => 'D',
1631            Self::Modified => 'M',
1632            Self::TypeChanged => 'T',
1633            Self::Renamed(_) => 'R',
1634            Self::Copied(_) => 'C',
1635            Self::Unmerged => 'U',
1636        }
1637    }
1638
1639    pub fn label(self) -> String {
1640        match self {
1641            Self::Renamed(score) => format!("R{score:03}"),
1642            Self::Copied(score) => format!("C{score:03}"),
1643            _ => self.code().to_string(),
1644        }
1645    }
1646}
1647
1648/// The bit mask isolating the file-type bits of a git mode (`S_IFMT`). Regular
1649/// files are `0o100000`, symlinks `0o120000`, gitlinks `0o160000`, trees
1650/// `0o040000`.
1651pub const S_IFMT: u32 = 0o170000;
1652
1653/// Whether a pair of (non-zero) modes constitutes a git "typechange": the file
1654/// type bits (`S_IFMT`) differ. Mirrors `diffcore.h`'s `DIFF_PAIR_TYPE_CHANGED`
1655/// (`(S_IFMT & one->mode) != (S_IFMT & two->mode)`). An exec-bit-only change
1656/// (`0o100644` ↔ `0o100755`) is NOT a typechange — same `S_IFMT`.
1657#[must_use]
1658pub const fn is_type_change(old_mode: u32, new_mode: u32) -> bool {
1659    (old_mode & S_IFMT) != (new_mode & S_IFMT)
1660}
1661
1662/// Classify a both-sides-present change whose entries already differ: a
1663/// [`NameStatus::TypeChanged`] when the modes' `S_IFMT` bits differ, otherwise a
1664/// plain [`NameStatus::Modified`]. git sets `DIFF_STATUS_TYPE_CHANGED` before any
1665/// rename/modify resolution (`diff.c` ~6650).
1666#[must_use]
1667pub const fn modify_or_type_change(old_mode: u32, new_mode: u32) -> NameStatus {
1668    if is_type_change(old_mode, new_mode) {
1669        NameStatus::TypeChanged
1670    } else {
1671        NameStatus::Modified
1672    }
1673}
1674
1675#[derive(Debug, Clone, PartialEq, Eq)]
1676pub struct NameStatusEntry {
1677    pub status: NameStatus,
1678    pub path: BString,
1679    pub old_path: Option<BString>,
1680    pub old_mode: Option<u32>,
1681    pub new_mode: Option<u32>,
1682    pub old_oid: Option<ObjectId>,
1683    pub new_oid: Option<ObjectId>,
1684}
1685
1686impl NameStatusEntry {
1687    pub fn line(&self) -> String {
1688        if let Some(old_path) = &self.old_path {
1689            format!(
1690                "{}\t{}\t{}",
1691                self.status.label(),
1692                String::from_utf8_lossy(old_path.as_bytes()),
1693                String::from_utf8_lossy(self.path.as_bytes())
1694            )
1695        } else {
1696            format!(
1697                "{}\t{}",
1698                self.status.label(),
1699                String::from_utf8_lossy(self.path.as_bytes())
1700            )
1701        }
1702    }
1703}
1704
1705#[derive(Debug, Clone, PartialEq, Eq)]
1706pub struct IndexGitlinkEntry {
1707    pub path: BString,
1708    pub oid: ObjectId,
1709}
1710
1711#[derive(Debug, Clone, PartialEq, Eq)]
1712pub struct IndexWorktreeDiff {
1713    pub entries: Vec<NameStatusEntry>,
1714    pub staged_gitlinks: Vec<IndexGitlinkEntry>,
1715}
1716
1717#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1718pub struct DiffNameStatusOptions {
1719    pub detect_renames: bool,
1720    pub detect_copies: bool,
1721    pub find_copies_harder: bool,
1722    pub rename_empty: bool,
1723}
1724
1725impl Default for DiffNameStatusOptions {
1726    fn default() -> Self {
1727        Self {
1728            detect_renames: true,
1729            detect_copies: false,
1730            find_copies_harder: false,
1731            rename_empty: true,
1732        }
1733    }
1734}
1735
1736/// git's default minimum similarity (as a percentage) for a pair of files to be
1737/// reported as a rename or copy. Matches `git`'s built-in `-M`/`-C` threshold
1738/// of 50% (`DEFAULT_RENAME_SCORE` is `MAX_SCORE / 2`).
1739pub const DEFAULT_RENAME_THRESHOLD: u8 = 50;
1740
1741/// Options controlling inexact (similarity-based) rename and copy detection,
1742/// layered additively on top of [`DiffNameStatusOptions`].
1743///
1744/// This is a separate struct rather than new fields on [`DiffNameStatusOptions`]
1745/// so that existing callers — which build `DiffNameStatusOptions` with a struct
1746/// literal — keep compiling unchanged. Code that wants inexact detection uses
1747/// the `*_with_rename_options` entry points and this type instead.
1748///
1749/// [`Default`] preserves the existing behaviour exactly: `detect_inexact` is
1750/// `false`, so unless a caller opts in, only exact-OID rename/copy detection
1751/// runs (identical to the plain `*_with_options` functions). When
1752/// `detect_inexact` is enabled, files added on one side are paired with the most
1753/// similar deleted/modified file on the other side whose similarity meets the
1754/// relevant threshold; exact-OID matches still take priority and are always
1755/// scored 100.
1756#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1757pub struct RenameDetectionOptions {
1758    /// The base name-status options (rename/copy enable flags, find-copies-harder,
1759    /// rename-empty). Exact detection honours these exactly as before.
1760    pub base: DiffNameStatusOptions,
1761    /// Enable inexact (content-similarity) detection. When `false`, only exact
1762    /// OID matches are detected, matching the legacy `*_with_options` behaviour.
1763    pub detect_inexact: bool,
1764    /// Minimum similarity percentage (`0..=100`) for an inexact *rename*. Pairs
1765    /// scoring below this are not reported as renames. Defaults to
1766    /// [`DEFAULT_RENAME_THRESHOLD`].
1767    pub rename_threshold: u8,
1768    /// Minimum similarity percentage (`0..=100`) for an inexact *copy*. Defaults
1769    /// to [`DEFAULT_RENAME_THRESHOLD`]; git uses the same default for `-C` as for
1770    /// `-M` unless `-C<n>` overrides it.
1771    pub copy_threshold: u8,
1772    /// Cap on the inexact rename matrix (git's `diff.renameLimit` /
1773    /// `merge.renameLimit`): when the number of candidate sources times the
1774    /// number of candidate destinations exceeds `rename_limit²`, inexact
1775    /// detection is skipped entirely (only exact-OID renames survive). `0` means
1776    /// unlimited — git's `too_many_rename_candidates` treats a non-positive limit
1777    /// the same way.
1778    pub rename_limit: usize,
1779}
1780
1781impl Default for RenameDetectionOptions {
1782    fn default() -> Self {
1783        Self {
1784            base: DiffNameStatusOptions::default(),
1785            detect_inexact: false,
1786            rename_threshold: DEFAULT_RENAME_THRESHOLD,
1787            copy_threshold: DEFAULT_RENAME_THRESHOLD,
1788            rename_limit: 0,
1789        }
1790    }
1791}
1792
1793impl RenameDetectionOptions {
1794    /// Build inexact-enabled options from a base [`DiffNameStatusOptions`], using
1795    /// the default thresholds for both renames and copies.
1796    pub fn inexact(base: DiffNameStatusOptions) -> Self {
1797        Self {
1798            base,
1799            detect_inexact: true,
1800            ..Self::default()
1801        }
1802    }
1803}
1804
1805pub fn diff_name_status_head_worktree(
1806    worktree_root: impl AsRef<Path>,
1807    git_dir: impl AsRef<Path>,
1808    format: ObjectFormat,
1809) -> Result<Vec<NameStatusEntry>> {
1810    diff_name_status_head_worktree_with_options(
1811        worktree_root,
1812        git_dir,
1813        format,
1814        DiffNameStatusOptions::default(),
1815    )
1816}
1817
1818pub fn diff_name_status_head_worktree_with_options(
1819    worktree_root: impl AsRef<Path>,
1820    git_dir: impl AsRef<Path>,
1821    format: ObjectFormat,
1822    options: DiffNameStatusOptions,
1823) -> Result<Vec<NameStatusEntry>> {
1824    let worktree_root = worktree_root.as_ref();
1825    let git_dir = git_dir.as_ref();
1826    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1827    let head = head_tree_entries(git_dir, format, &db)?;
1828    let IndexSnapshot {
1829        entries: index,
1830        stat_cache,
1831    } = read_index_snapshot(git_dir, format)?;
1832    let index_gitlinks = index_gitlinks(&index);
1833    let candidate_paths = candidate_path_set(head.keys().chain(index.keys()));
1834    let worktree = worktree_entries_for_path_set(
1835        worktree_root,
1836        format,
1837        &candidate_paths,
1838        &index_gitlinks,
1839        Some(&stat_cache),
1840    )?;
1841    let changes = diff_name_status_maps_for_path_set(&head, &worktree, &candidate_paths, options)?;
1842    Ok(mark_unstaged_worktree_oids_unresolved(
1843        changes, &index, &worktree,
1844    ))
1845}
1846
1847/// HEAD-vs-worktree name-status with full rename/copy options, including inexact
1848/// (similarity) detection when enabled. Worktree blob content is read directly
1849/// from the working tree; HEAD-side blobs come from the object database.
1850pub fn diff_name_status_head_worktree_with_rename_options(
1851    worktree_root: impl AsRef<Path>,
1852    git_dir: impl AsRef<Path>,
1853    format: ObjectFormat,
1854    options: RenameDetectionOptions,
1855) -> Result<Vec<NameStatusEntry>> {
1856    let worktree_root = worktree_root.as_ref();
1857    let git_dir = git_dir.as_ref();
1858    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1859    let head = head_tree_entries(git_dir, format, &db)?;
1860    let IndexSnapshot {
1861        entries: index,
1862        stat_cache,
1863    } = read_index_snapshot(git_dir, format)?;
1864    let index_gitlinks = index_gitlinks(&index);
1865    let candidate_paths = candidate_path_set(head.keys().chain(index.keys()));
1866    let worktree = worktree_entries_for_path_set(
1867        worktree_root,
1868        format,
1869        &candidate_paths,
1870        &index_gitlinks,
1871        Some(&stat_cache),
1872    )?;
1873    let cache = worktree_blob_cache_for_path_set(
1874        worktree_root,
1875        &head,
1876        &worktree,
1877        &candidate_paths,
1878        options,
1879    )?;
1880    let changes = diff_name_status_maps_with_renames_for_path_set(
1881        &head,
1882        &worktree,
1883        &candidate_paths,
1884        options,
1885        |oid| cache_or_odb_blob(&cache, &db, oid),
1886    )?;
1887    Ok(mark_unstaged_worktree_oids_unresolved(
1888        changes, &index, &worktree,
1889    ))
1890}
1891
1892pub fn diff_name_status_head_index(
1893    git_dir: impl AsRef<Path>,
1894    format: ObjectFormat,
1895) -> Result<Vec<NameStatusEntry>> {
1896    diff_name_status_head_index_with_options(git_dir, format, DiffNameStatusOptions::default())
1897}
1898
1899pub fn diff_name_status_head_index_with_options(
1900    git_dir: impl AsRef<Path>,
1901    format: ObjectFormat,
1902    options: DiffNameStatusOptions,
1903) -> Result<Vec<NameStatusEntry>> {
1904    let git_dir = git_dir.as_ref();
1905    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1906    let head = head_tree_entries(git_dir, format, &db)?;
1907    let index = read_index_entries(git_dir, format)?;
1908    diff_name_status_maps(&head, &index, head.keys().chain(index.keys()), options)
1909}
1910
1911/// HEAD-vs-index name-status with full rename/copy options, including inexact
1912/// (similarity) detection when enabled. All blob content (both sides) comes from
1913/// the object database.
1914pub fn diff_name_status_head_index_with_rename_options(
1915    git_dir: impl AsRef<Path>,
1916    format: ObjectFormat,
1917    options: RenameDetectionOptions,
1918) -> Result<Vec<NameStatusEntry>> {
1919    let git_dir = git_dir.as_ref();
1920    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1921    let head = head_tree_entries(git_dir, format, &db)?;
1922    let index = read_index_entries(git_dir, format)?;
1923    diff_name_status_maps_with_renames(
1924        &head,
1925        &index,
1926        head.keys().chain(index.keys()),
1927        options,
1928        |oid| read_blob_bytes(&db, oid),
1929    )
1930}
1931
1932/// Read an arbitrary tree object's flattened blob entries (recursively) keyed by
1933/// repository-relative path. This is the tree-side counterpart used by
1934/// `git diff-index <tree-ish>`: unlike [`head_tree_entries`] it does not consult
1935/// `HEAD`, so any commit/tag (peeled to a tree) or tree oid can be compared.
1936///
1937/// The canonical empty tree (`git hash-object -t tree /dev/null`) is treated as
1938/// always present and yields no entries, even when the object was never written
1939/// to the database. git makes the same guarantee, which keeps the common idiom
1940/// `git diff-index --cached <empty-tree-sha>` working in a fresh repository.
1941fn tree_entries(
1942    tree_oid: &ObjectId,
1943    format: ObjectFormat,
1944    db: &FileObjectDatabase,
1945) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
1946    let mut entries = BTreeMap::new();
1947    if *tree_oid == empty_tree_oid(format)? {
1948        return Ok(entries);
1949    }
1950    collect_tree_entries(db, format, tree_oid, Vec::new(), &mut entries)?;
1951    Ok(entries)
1952}
1953
1954/// The well-known oid of the empty tree for `format` (the hash of a zero-length
1955/// tree object). git hard-codes this value and treats it as always existing.
1956fn empty_tree_oid(format: ObjectFormat) -> Result<ObjectId> {
1957    object_id_for_bytes(format, "tree", b"")
1958}
1959
1960/// Name-status diff of an arbitrary tree against the index, the engine behind
1961/// `git diff-index --cached <tree-ish>`. Exact rename/copy detection follows
1962/// `options`; all blob content comes from the object database.
1963pub fn diff_name_status_tree_index_with_options(
1964    git_dir: impl AsRef<Path>,
1965    format: ObjectFormat,
1966    tree_oid: &ObjectId,
1967    options: DiffNameStatusOptions,
1968) -> Result<Vec<NameStatusEntry>> {
1969    let git_dir = git_dir.as_ref();
1970    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1971    let tree = tree_entries(tree_oid, format, &db)?;
1972    let index = read_index_entries(git_dir, format)?;
1973    diff_name_status_maps(&tree, &index, tree.keys().chain(index.keys()), options)
1974}
1975
1976/// Tree-vs-index name-status with full rename/copy options, including inexact
1977/// (similarity) detection when enabled. Both sides read blob content from the
1978/// object database. Counterpart of
1979/// [`diff_name_status_head_index_with_rename_options`] for an arbitrary tree.
1980pub fn diff_name_status_tree_index_with_rename_options(
1981    git_dir: impl AsRef<Path>,
1982    format: ObjectFormat,
1983    tree_oid: &ObjectId,
1984    options: RenameDetectionOptions,
1985) -> Result<Vec<NameStatusEntry>> {
1986    let git_dir = git_dir.as_ref();
1987    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1988    let tree = tree_entries(tree_oid, format, &db)?;
1989    let index = read_index_entries(git_dir, format)?;
1990    diff_name_status_maps_with_renames(
1991        &tree,
1992        &index,
1993        tree.keys().chain(index.keys()),
1994        options,
1995        |oid| read_blob_bytes(&db, oid),
1996    )
1997}
1998
1999/// Name-status diff of an arbitrary tree against the working tree, the engine
2000/// behind plain `git diff-index <tree-ish>` (no `--cached`). New-side oids for
2001/// paths whose worktree contents differ from the index are cleared (rendered as
2002/// zeros), matching git, which only reports the worktree blob oid when it is
2003/// known-clean against the index.
2004pub fn diff_name_status_tree_worktree_with_options(
2005    worktree_root: impl AsRef<Path>,
2006    git_dir: impl AsRef<Path>,
2007    format: ObjectFormat,
2008    tree_oid: &ObjectId,
2009    options: DiffNameStatusOptions,
2010) -> Result<Vec<NameStatusEntry>> {
2011    let worktree_root = worktree_root.as_ref();
2012    let git_dir = git_dir.as_ref();
2013    let db = FileObjectDatabase::from_git_dir(git_dir, format);
2014    let tree = tree_entries(tree_oid, format, &db)?;
2015    let IndexSnapshot {
2016        entries: index,
2017        stat_cache,
2018    } = read_index_snapshot(git_dir, format)?;
2019    let index_gitlinks = index_gitlinks(&index);
2020    let candidate_paths = candidate_path_set(tree.keys().chain(index.keys()));
2021    let worktree = worktree_entries_for_path_set(
2022        worktree_root,
2023        format,
2024        &candidate_paths,
2025        &index_gitlinks,
2026        Some(&stat_cache),
2027    )?;
2028    let changes = diff_name_status_maps_for_path_set(&tree, &worktree, &candidate_paths, options)?;
2029    Ok(mark_unstaged_worktree_oids_unresolved(
2030        changes, &index, &worktree,
2031    ))
2032}
2033
2034/// Tree-vs-worktree name-status with full rename/copy options, including inexact
2035/// (similarity) detection when enabled. Worktree blob content is read directly
2036/// from the working tree (via an oid-keyed cache); tree-side blobs come from the
2037/// object database. As with [`diff_name_status_tree_worktree_with_options`],
2038/// new-side oids for paths that differ from the index are cleared.
2039pub fn diff_name_status_tree_worktree_with_rename_options(
2040    worktree_root: impl AsRef<Path>,
2041    git_dir: impl AsRef<Path>,
2042    format: ObjectFormat,
2043    tree_oid: &ObjectId,
2044    options: RenameDetectionOptions,
2045) -> Result<Vec<NameStatusEntry>> {
2046    let worktree_root = worktree_root.as_ref();
2047    let git_dir = git_dir.as_ref();
2048    let db = FileObjectDatabase::from_git_dir(git_dir, format);
2049    let tree = tree_entries(tree_oid, format, &db)?;
2050    let IndexSnapshot {
2051        entries: index,
2052        stat_cache,
2053    } = read_index_snapshot(git_dir, format)?;
2054    let index_gitlinks = index_gitlinks(&index);
2055    let candidate_paths = candidate_path_set(tree.keys().chain(index.keys()));
2056    let worktree = worktree_entries_for_path_set(
2057        worktree_root,
2058        format,
2059        &candidate_paths,
2060        &index_gitlinks,
2061        Some(&stat_cache),
2062    )?;
2063    let cache = worktree_blob_cache_for_path_set(
2064        worktree_root,
2065        &tree,
2066        &worktree,
2067        &candidate_paths,
2068        options,
2069    )?;
2070    let changes = diff_name_status_maps_with_renames_for_path_set(
2071        &tree,
2072        &worktree,
2073        &candidate_paths,
2074        options,
2075        |oid| cache_or_odb_blob(&cache, &db, oid),
2076    )?;
2077    Ok(mark_unstaged_worktree_oids_unresolved(
2078        changes, &index, &worktree,
2079    ))
2080}
2081
2082pub fn diff_name_status_index_worktree(
2083    worktree_root: impl AsRef<Path>,
2084    git_dir: impl AsRef<Path>,
2085    format: ObjectFormat,
2086) -> Result<Vec<NameStatusEntry>> {
2087    diff_name_status_index_worktree_with_options(
2088        worktree_root,
2089        git_dir,
2090        format,
2091        DiffNameStatusOptions::default(),
2092    )
2093}
2094
2095pub fn diff_name_status_index_worktree_with_options(
2096    worktree_root: impl AsRef<Path>,
2097    git_dir: impl AsRef<Path>,
2098    format: ObjectFormat,
2099    options: DiffNameStatusOptions,
2100) -> Result<Vec<NameStatusEntry>> {
2101    Ok(diff_name_status_index_worktree_with_options_and_gitlinks(
2102        worktree_root,
2103        git_dir,
2104        format,
2105        options,
2106    )?
2107    .entries)
2108}
2109
2110pub fn diff_name_status_index_worktree_with_options_and_gitlinks(
2111    worktree_root: impl AsRef<Path>,
2112    git_dir: impl AsRef<Path>,
2113    format: ObjectFormat,
2114    options: DiffNameStatusOptions,
2115) -> Result<IndexWorktreeDiff> {
2116    let IndexWorktreeDiff {
2117        entries,
2118        staged_gitlinks,
2119    } = diff_name_status_index_worktree_changes(worktree_root.as_ref(), git_dir.as_ref(), format)?;
2120    let entries = apply_name_status_options_to_index_worktree_changes(entries, options)?;
2121    Ok(IndexWorktreeDiff {
2122        entries,
2123        staged_gitlinks,
2124    })
2125}
2126
2127/// Index-vs-worktree name-status with full rename/copy options, including inexact
2128/// (similarity) detection when enabled. Worktree blob content is read directly
2129/// from the working tree; index-side blobs come from the object database.
2130pub fn diff_name_status_index_worktree_with_rename_options(
2131    worktree_root: impl AsRef<Path>,
2132    git_dir: impl AsRef<Path>,
2133    format: ObjectFormat,
2134    options: RenameDetectionOptions,
2135) -> Result<Vec<NameStatusEntry>> {
2136    Ok(
2137        diff_name_status_index_worktree_with_rename_options_and_gitlinks(
2138            worktree_root,
2139            git_dir,
2140            format,
2141            options,
2142        )?
2143        .entries,
2144    )
2145}
2146
2147pub fn diff_name_status_index_worktree_with_rename_options_and_gitlinks(
2148    worktree_root: impl AsRef<Path>,
2149    git_dir: impl AsRef<Path>,
2150    format: ObjectFormat,
2151    options: RenameDetectionOptions,
2152) -> Result<IndexWorktreeDiff> {
2153    let IndexWorktreeDiff {
2154        entries,
2155        staged_gitlinks,
2156    } = diff_name_status_index_worktree_changes(worktree_root.as_ref(), git_dir.as_ref(), format)?;
2157    // Index-vs-worktree diffs only consider tracked index paths; untracked
2158    // worktree files are not additions, so rename/copy detection has no add
2159    // destinations to pair. Apply the base options for completeness.
2160    let entries = apply_name_status_options_to_index_worktree_changes(entries, options.base)?;
2161    Ok(IndexWorktreeDiff {
2162        entries,
2163        staged_gitlinks,
2164    })
2165}
2166
2167fn diff_name_status_index_worktree_changes(
2168    worktree_root: &Path,
2169    git_dir: &Path,
2170    format: ObjectFormat,
2171) -> Result<IndexWorktreeDiff> {
2172    let index_path = sley_index::repository_index_path(git_dir);
2173    let index_metadata = match fs::metadata(&index_path) {
2174        Ok(metadata) => metadata,
2175        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
2176            return Ok(IndexWorktreeDiff {
2177                entries: Vec::new(),
2178                staged_gitlinks: Vec::new(),
2179            });
2180        }
2181        Err(err) => return Err(err.into()),
2182    };
2183    let index_bytes = fs::read(&index_path)?;
2184    if let Ok(index) = BorrowedIndex::parse(&index_bytes, format)
2185        && index.extension(&sley_index::INDEX_EXT_LINK)?.is_none()
2186        && !index.entries.iter().any(borrowed_entry_is_sparse_dir)
2187    {
2188        let (has_non_normal_stage, staged_gitlinks) =
2189            index_worktree_metadata_for_entries(&index.entries);
2190        if has_non_normal_stage {
2191            return diff_name_status_index_worktree_changes_from_snapshot(
2192                worktree_root,
2193                git_dir,
2194                format,
2195            );
2196        }
2197        let stat_cache =
2198            IndexStatCache::from_index_mtime_only(sley_index::file_mtime_parts(&index_metadata));
2199        let entries = diff_name_status_index_worktree_changes_for_borrowed_entries(
2200            worktree_root,
2201            format,
2202            &index.entries,
2203            &stat_cache,
2204        )?;
2205        return Ok(IndexWorktreeDiff {
2206            entries,
2207            staged_gitlinks,
2208        });
2209    }
2210    let index = expand_sparse_index_for_worktree_diff(
2211        sley_index::read_repository_index(git_dir, format)?,
2212        git_dir,
2213        format,
2214    )?;
2215    let (has_non_normal_stage, staged_gitlinks) =
2216        index_worktree_metadata_for_entries(&index.entries);
2217    if has_non_normal_stage {
2218        return diff_name_status_index_worktree_changes_from_snapshot(
2219            worktree_root,
2220            git_dir,
2221            format,
2222        );
2223    }
2224    let stat_cache =
2225        IndexStatCache::from_index_mtime_only(sley_index::file_mtime_parts(&index_metadata));
2226    let entries = diff_name_status_index_worktree_changes_for_entries(
2227        worktree_root,
2228        format,
2229        &index.entries,
2230        &stat_cache,
2231    )?;
2232    Ok(IndexWorktreeDiff {
2233        entries,
2234        staged_gitlinks,
2235    })
2236}
2237
2238fn borrowed_entry_is_sparse_dir(entry: &sley_index::IndexEntryRef<'_>) -> bool {
2239    entry.mode == sley_index::SPARSE_DIR_MODE && entry.is_skip_worktree()
2240}
2241
2242fn expand_sparse_index_for_worktree_diff(
2243    mut index: Index,
2244    git_dir: &Path,
2245    format: ObjectFormat,
2246) -> Result<Index> {
2247    if !index
2248        .entries
2249        .iter()
2250        .any(sley_index::IndexEntry::is_sparse_dir)
2251    {
2252        return Ok(index);
2253    }
2254
2255    let db = FileObjectDatabase::from_git_dir(git_dir, format);
2256    let mut expanded = Vec::with_capacity(index.entries.len());
2257    for entry in std::mem::take(&mut index.entries) {
2258        if !entry.is_sparse_dir() {
2259            expanded.push(entry);
2260            continue;
2261        }
2262
2263        let dir_prefix = entry.path.as_bytes();
2264        for (rel_path, (mode, oid)) in flatten_tree(&db, format, &entry.oid)? {
2265            let mut path = dir_prefix.to_vec();
2266            path.extend_from_slice(&rel_path);
2267            let mut expanded_entry = sley_index::IndexEntry {
2268                ctime_seconds: 0,
2269                ctime_nanoseconds: 0,
2270                mtime_seconds: 0,
2271                mtime_nanoseconds: 0,
2272                dev: 0,
2273                ino: 0,
2274                mode,
2275                uid: 0,
2276                gid: 0,
2277                size: 0,
2278                oid,
2279                flags: 0,
2280                flags_extended: 0,
2281                path: BString::from(path),
2282            };
2283            expanded_entry.set_skip_worktree(true);
2284            expanded_entry.refresh_name_length();
2285            expanded.push(expanded_entry);
2286        }
2287    }
2288
2289    expanded.sort_by(|left, right| left.path.as_bytes().cmp(right.path.as_bytes()));
2290    index.entries = expanded;
2291    index.clear_sparse_extension()?;
2292    Ok(index)
2293}
2294
2295fn diff_name_status_index_worktree_changes_for_borrowed_entries(
2296    worktree_root: &Path,
2297    format: ObjectFormat,
2298    entries: &[sley_index::IndexEntryRef<'_>],
2299    stat_cache: &IndexStatCache,
2300) -> Result<Vec<NameStatusEntry>> {
2301    const PARALLEL_SCAN_MIN_ENTRIES: usize = 2048;
2302    let workers = std::thread::available_parallelism()
2303        .map(|count| count.get())
2304        .unwrap_or(1)
2305        .min(8);
2306    if workers <= 1 || entries.len() < PARALLEL_SCAN_MIN_ENTRIES {
2307        return diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
2308            worktree_root,
2309            format,
2310            entries,
2311            stat_cache,
2312        );
2313    }
2314    let chunk_size = entries.len().div_ceil(workers);
2315    std::thread::scope(|scope| {
2316        let mut handles = Vec::new();
2317        for chunk in entries.chunks(chunk_size) {
2318            handles.push(scope.spawn(move || {
2319                diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
2320                    worktree_root,
2321                    format,
2322                    chunk,
2323                    stat_cache,
2324                )
2325            }));
2326        }
2327        let mut changes = Vec::new();
2328        for handle in handles {
2329            let chunk_changes = handle
2330                .join()
2331                .map_err(|_| GitError::Command("diff worker panicked".into()))??;
2332            changes.extend(chunk_changes);
2333        }
2334        Ok(changes)
2335    })
2336}
2337
2338fn diff_name_status_index_worktree_changes_for_entries(
2339    worktree_root: &Path,
2340    format: ObjectFormat,
2341    entries: &[sley_index::IndexEntry],
2342    stat_cache: &IndexStatCache,
2343) -> Result<Vec<NameStatusEntry>> {
2344    const PARALLEL_SCAN_MIN_ENTRIES: usize = 2048;
2345    let workers = std::thread::available_parallelism()
2346        .map(|count| count.get())
2347        .unwrap_or(1)
2348        .min(8);
2349    if workers <= 1 || entries.len() < PARALLEL_SCAN_MIN_ENTRIES {
2350        return diff_name_status_index_worktree_changes_for_entry_chunk(
2351            worktree_root,
2352            format,
2353            entries,
2354            stat_cache,
2355        );
2356    }
2357    let chunk_size = entries.len().div_ceil(workers);
2358    std::thread::scope(|scope| {
2359        let mut handles = Vec::new();
2360        for chunk in entries.chunks(chunk_size) {
2361            handles.push(scope.spawn(move || {
2362                diff_name_status_index_worktree_changes_for_entry_chunk(
2363                    worktree_root,
2364                    format,
2365                    chunk,
2366                    stat_cache,
2367                )
2368            }));
2369        }
2370        let mut changes = Vec::new();
2371        for handle in handles {
2372            let chunk_changes = handle
2373                .join()
2374                .map_err(|_| GitError::Command("diff worker panicked".into()))??;
2375            changes.extend(chunk_changes);
2376        }
2377        Ok(changes)
2378    })
2379}
2380
2381fn diff_name_status_index_worktree_changes_for_entry_chunk(
2382    worktree_root: &Path,
2383    format: ObjectFormat,
2384    entries: &[sley_index::IndexEntry],
2385    stat_cache: &IndexStatCache,
2386) -> Result<Vec<NameStatusEntry>> {
2387    let mut changes = Vec::new();
2388    let mut path = PathBuf::from(worktree_root);
2389    for entry in entries {
2390        worktree_path_for_repo_path_into(&mut path, worktree_root, entry.path.as_bytes());
2391        if let Some(change) = index_worktree_change_for_entry(&path, format, entry, stat_cache)? {
2392            changes.push(change);
2393        }
2394    }
2395    Ok(changes)
2396}
2397
2398fn diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
2399    worktree_root: &Path,
2400    format: ObjectFormat,
2401    entries: &[sley_index::IndexEntryRef<'_>],
2402    stat_cache: &IndexStatCache,
2403) -> Result<Vec<NameStatusEntry>> {
2404    let mut changes = Vec::new();
2405    let mut path = PathBuf::from(worktree_root);
2406    for entry in entries {
2407        worktree_path_for_repo_path_into(&mut path, worktree_root, entry.path);
2408        if let Some(change) = index_worktree_change_for_entry(&path, format, entry, stat_cache)? {
2409            changes.push(change);
2410        }
2411    }
2412    Ok(changes)
2413}
2414
2415fn index_worktree_metadata_for_entries(
2416    entries: &[impl WorktreeIndexEntry],
2417) -> (bool, Vec<IndexGitlinkEntry>) {
2418    let mut needs_snapshot = false;
2419    let mut staged_gitlinks = Vec::new();
2420    for entry in entries {
2421        if entry.stage() != sley_index::Stage::Normal {
2422            needs_snapshot = true;
2423        }
2424        // Intent-to-add entries (`git add -N`) must take the snapshot path, which
2425        // diffs them as new files rather than loading their empty-blob id.
2426        if entry.is_intent_to_add() {
2427            needs_snapshot = true;
2428        }
2429        if sley_index::is_gitlink(entry.mode()) {
2430            staged_gitlinks.push(IndexGitlinkEntry {
2431                path: BString::from_bytes(entry.git_path()),
2432                oid: entry.oid(),
2433            });
2434        }
2435    }
2436    (needs_snapshot, staged_gitlinks)
2437}
2438
2439fn diff_name_status_index_worktree_changes_from_snapshot(
2440    worktree_root: &Path,
2441    git_dir: &Path,
2442    format: ObjectFormat,
2443) -> Result<IndexWorktreeDiff> {
2444    let IndexSnapshot {
2445        entries: index,
2446        stat_cache,
2447    } = read_index_snapshot(git_dir, format)?;
2448    // Intent-to-add (`git add -N`) paths are placeholders: git's `run_diff_files`
2449    // diffs them as a brand-new file (`/dev/null` → worktree), never loading the
2450    // recorded empty-blob id. `read_index_snapshot` drops the ITA flag, so read
2451    // the set of ITA stage-0 paths separately and override their verdict below.
2452    let intent_to_add_paths = read_intent_to_add_paths(git_dir, format)?;
2453    // `read_index_snapshot` collapses each path to a single entry; for an
2454    // unmerged path it keeps the last-written stage. To match git's
2455    // `run_diff_files` we need the conflict stages, so read them separately:
2456    // git diffs the worktree against the "ours" stage (stage 2, the default
2457    // `diff_unmerged_stage`) and additionally emits a standalone `U <path>`
2458    // pair via `diff_unmerge` (diff-lib.c).
2459    let unmerged = read_unmerged_stages(git_dir, format)?;
2460    let index_gitlinks = index_gitlinks(&index);
2461    let staged_gitlinks = index_gitlinks
2462        .iter()
2463        .map(|(path, oid)| IndexGitlinkEntry {
2464            path: BString::from_bytes(path),
2465            oid: *oid,
2466        })
2467        .collect();
2468    let mut changes = Vec::new();
2469    for (git_path, left) in &index {
2470        // For a conflicted path git first queues the `U` pair, then compares the
2471        // worktree against stage 2 (ours). The snapshot's collapsed `left` may
2472        // be the wrong stage, so override it with the stage-2 entry when present.
2473        let conflict_stages = unmerged.get(git_path);
2474        let right = worktree_entry_for_path(
2475            worktree_root,
2476            format,
2477            git_path,
2478            &index_gitlinks,
2479            Some(&stat_cache),
2480        )?;
2481        if conflict_stages.is_some() {
2482            // git's `diff_unmerge` makes a pair with a null old side and the
2483            // worktree mode on the new side (diff-lib.c `wt_mode`); the oids stay
2484            // zero. The raw line is `:000000 <wt_mode> 0..0 0..0 U <path>`.
2485            changes.push(NameStatusEntry {
2486                status: NameStatus::Unmerged,
2487                path: git_path.clone().into(),
2488                old_path: None,
2489                old_mode: None,
2490                new_mode: right.as_ref().map(|entry| entry.mode),
2491                old_oid: None,
2492                new_oid: None,
2493            });
2494        }
2495        // The index side for the modify comparison: stage 2 (ours) for a
2496        // conflict, otherwise the normal stage-0 entry. If the conflict has no
2497        // stage-2 (deleted on our side / added by them), git has no entry to
2498        // diff the worktree against, so it emits only the `U` line.
2499        let left = match conflict_stages {
2500            Some(stages) => match stages.ours.as_ref() {
2501                Some(ours) => ours,
2502                None => continue,
2503            },
2504            None => left,
2505        };
2506        // Intent-to-add placeholder: git's `run_diff_files` diffs it as a new
2507        // file. With the worktree file present, queue an `Added` pair whose old
2508        // side is null (`/dev/null` → worktree blob); with the file gone, an ITA
2509        // entry yields no diff-files entry (there is nothing to add).
2510        if intent_to_add_paths.contains(git_path.as_slice()) {
2511            if let Some(right) = right {
2512                changes.push(NameStatusEntry {
2513                    status: NameStatus::Added,
2514                    path: git_path.clone().into(),
2515                    old_path: None,
2516                    old_mode: None,
2517                    new_mode: Some(right.mode),
2518                    old_oid: None,
2519                    new_oid: Some(right.oid),
2520                });
2521            }
2522            continue;
2523        }
2524        let Some(right) = right else {
2525            changes.push(NameStatusEntry {
2526                status: NameStatus::Deleted,
2527                path: git_path.clone().into(),
2528                old_path: None,
2529                old_mode: Some(left.mode),
2530                new_mode: None,
2531                old_oid: Some(left.oid),
2532                new_oid: None,
2533            });
2534            continue;
2535        };
2536        if right != *left {
2537            changes.push(NameStatusEntry {
2538                status: modify_or_type_change(left.mode, right.mode),
2539                path: git_path.clone().into(),
2540                old_path: None,
2541                old_mode: Some(left.mode),
2542                new_mode: Some(right.mode),
2543                old_oid: Some(left.oid),
2544                new_oid: Some(right.oid),
2545            });
2546        }
2547    }
2548    Ok(IndexWorktreeDiff {
2549        entries: changes,
2550        staged_gitlinks,
2551    })
2552}
2553
2554/// The conflict stages recorded for one unmerged index path.
2555struct ConflictStages {
2556    ours: Option<TrackedEntry>,
2557}
2558
2559/// Read the higher-stage (conflict) index entries, keyed by path, recording the
2560/// "ours" (stage 2) entry git diffs the worktree against. Paths with only a
2561/// stage-0 entry are absent from the result.
2562fn read_unmerged_stages(
2563    git_dir: &Path,
2564    format: ObjectFormat,
2565) -> Result<BTreeMap<Vec<u8>, ConflictStages>> {
2566    let index_path = sley_index::repository_index_path(git_dir);
2567    if !index_path.exists() {
2568        return Ok(BTreeMap::new());
2569    }
2570    let index = sley_index::read_repository_index(git_dir, format)?;
2571    let mut out: BTreeMap<Vec<u8>, ConflictStages> = BTreeMap::new();
2572    for entry in &index.entries {
2573        let stage = entry.stage();
2574        if stage == sley_index::Stage::Normal {
2575            continue;
2576        }
2577        let path = entry.path.clone().into_bytes();
2578        let slot = out.entry(path).or_insert(ConflictStages { ours: None });
2579        if stage == sley_index::Stage::Ours {
2580            slot.ours = Some(TrackedEntry {
2581                mode: entry.mode,
2582                oid: entry.oid,
2583            });
2584        }
2585    }
2586    Ok(out)
2587}
2588
2589fn apply_name_status_options_to_index_worktree_changes(
2590    mut changes: Vec<NameStatusEntry>,
2591    options: DiffNameStatusOptions,
2592) -> Result<Vec<NameStatusEntry>> {
2593    if options.detect_renames {
2594        changes = detect_exact_renames_from_changes(changes, options.rename_empty);
2595    } else if options.detect_copies {
2596        changes.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2597    }
2598    Ok(changes)
2599}
2600
2601fn detect_exact_renames_from_changes(
2602    changes: Vec<NameStatusEntry>,
2603    rename_empty: bool,
2604) -> Vec<NameStatusEntry> {
2605    let added = changes
2606        .iter()
2607        .enumerate()
2608        .filter(|(_, entry)| entry.status == NameStatus::Added)
2609        .collect::<Vec<_>>();
2610    let deleted = changes
2611        .iter()
2612        .enumerate()
2613        .filter(|(_, entry)| entry.status == NameStatus::Deleted)
2614        .collect::<Vec<_>>();
2615    let mut consumed_added = BTreeSet::new();
2616    let mut consumed_deleted = BTreeSet::new();
2617    let mut result = Vec::new();
2618
2619    for (deleted_index, deleted_entry) in deleted {
2620        let Some(old_oid) = deleted_entry.old_oid else {
2621            continue;
2622        };
2623        if !rename_empty && is_empty_blob_oid(&old_oid) {
2624            continue;
2625        }
2626        if let Some((added_index, added_entry)) = added.iter().find(|(added_index, added_entry)| {
2627            !consumed_added.contains(added_index) && added_entry.new_oid == Some(old_oid)
2628        }) {
2629            consumed_deleted.insert(deleted_index);
2630            consumed_added.insert(*added_index);
2631            result.push(NameStatusEntry {
2632                status: NameStatus::Renamed(100),
2633                path: added_entry.path.clone(),
2634                old_path: Some(deleted_entry.path.clone()),
2635                old_mode: deleted_entry.old_mode,
2636                new_mode: added_entry.new_mode,
2637                old_oid: deleted_entry.old_oid,
2638                new_oid: added_entry.new_oid,
2639            });
2640        }
2641    }
2642
2643    for (index, entry) in changes.into_iter().enumerate() {
2644        if consumed_added.contains(&index) || consumed_deleted.contains(&index) {
2645            continue;
2646        }
2647        result.push(entry);
2648    }
2649    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2650    result
2651}
2652
2653/// Index-vs-worktree name-status for **`git diff-files`** (plumbing), which
2654/// selects changed paths by the cached *stat* rather than by content.
2655///
2656/// This is the crucial difference from [`diff_name_status_index_worktree_with_options`]
2657/// (the engine behind porcelain `git diff`): porcelain `git diff` refreshes the
2658/// index first, so a stat-dirty-but-content-identical entry (a `touch`ed file, or
2659/// a freshly `rm --cached`-then-`reset --no-refresh` entry with a zeroed cached
2660/// stat) is re-stamped clean and suppressed. `git diff-files` does **not** refresh
2661/// — it reports every entry whose cached stat fails to prove it clean as `M`,
2662/// without re-hashing the content to "rescue" it (`builtin/diff.c` →
2663/// `run_diff_files` → `ie_match_stat`). The raw / name-only / name-status output
2664/// and the `--quiet`/`--exit-code` status therefore list such entries even when
2665/// the content is byte-identical; patch/stat output, which diffs actual content,
2666/// renders them as an empty hunk.
2667///
2668/// We layer that stat-based selection on top of the content-based diff: the
2669/// content diff already catches adds/deletes/genuine-content modifies (with
2670/// rename detection), and we then append a `Modified` entry for any stage-0 path
2671/// whose worktree file is present and whose cached stat is dirty per
2672/// [`IndexStatCache::index_entry_worktree_stat_dirty`] but which the content diff
2673/// did not already report. Content-identical stat-dirty entries cannot be rename
2674/// sources/targets (their content is unchanged), so they never interact with the
2675/// rename machinery — they are plain `M`.
2676pub fn diff_name_status_index_worktree_for_diff_files_with_options(
2677    worktree_root: impl AsRef<Path>,
2678    git_dir: impl AsRef<Path>,
2679    format: ObjectFormat,
2680    options: DiffNameStatusOptions,
2681) -> Result<Vec<NameStatusEntry>> {
2682    let worktree_root = worktree_root.as_ref();
2683    let git_dir = git_dir.as_ref();
2684    let changes =
2685        diff_name_status_index_worktree_with_options(worktree_root, git_dir, format, options)?;
2686    augment_with_stat_dirty_entries(worktree_root, git_dir, format, changes)
2687}
2688
2689/// As [`diff_name_status_index_worktree_for_diff_files_with_options`], but with
2690/// full rename/copy options (the `git diff-files -M/-C` path). The stat-dirty
2691/// augmentation is identical; only the underlying content diff differs.
2692pub fn diff_name_status_index_worktree_for_diff_files_with_rename_options(
2693    worktree_root: impl AsRef<Path>,
2694    git_dir: impl AsRef<Path>,
2695    format: ObjectFormat,
2696    options: RenameDetectionOptions,
2697) -> Result<Vec<NameStatusEntry>> {
2698    let worktree_root = worktree_root.as_ref();
2699    let git_dir = git_dir.as_ref();
2700    let changes = diff_name_status_index_worktree_with_rename_options(
2701        worktree_root,
2702        git_dir,
2703        format,
2704        options,
2705    )?;
2706    augment_with_stat_dirty_entries(worktree_root, git_dir, format, changes)
2707}
2708
2709/// Append a `Modified` entry for every stage-0 index path whose worktree file is
2710/// present and whose cached stat is dirty (`ce_match_stat` "changed") but which
2711/// `content_changes` did not already report. The result is re-sorted by path so
2712/// the merged set keeps git's diff-queue ordering. New-side oids on the added
2713/// entries are left `None` (rendered as zeros in raw output), matching git, which
2714/// reports the worktree blob oid only for entries it has hashed.
2715fn augment_with_stat_dirty_entries(
2716    worktree_root: &Path,
2717    git_dir: &Path,
2718    format: ObjectFormat,
2719    mut content_changes: Vec<NameStatusEntry>,
2720) -> Result<Vec<NameStatusEntry>> {
2721    let IndexSnapshot {
2722        entries: index,
2723        stat_cache,
2724    } = read_index_snapshot(git_dir, format)?;
2725    // Paths the content diff already accounts for (by new-side path, the position
2726    // git queues a pair at — a rename's destination, a modify/add/delete's path).
2727    let already_reported: BTreeSet<&[u8]> = content_changes
2728        .iter()
2729        .map(|entry| entry.path.as_bytes())
2730        .collect();
2731    let mut extras = Vec::new();
2732    for (git_path, tracked) in &index {
2733        if already_reported.contains(git_path.as_slice()) {
2734            continue;
2735        }
2736        let Some(cached) = stat_cache.entry_for_git_path(git_path) else {
2737            continue;
2738        };
2739        // Gitlinks (submodules) have their own dirtiness model and are not stat-
2740        // compared here; the content diff already handles changed gitlink oids.
2741        if sley_index::is_gitlink(tracked.mode) {
2742            continue;
2743        }
2744        let path = worktree_path_for_repo_path(worktree_root, git_path);
2745        let Ok(metadata) = fs::symlink_metadata(&path) else {
2746            // A missing worktree file is a deletion, which the content diff
2747            // already reports; nothing to add here.
2748            continue;
2749        };
2750        if !(metadata.is_file() || metadata.file_type().is_symlink()) {
2751            continue;
2752        }
2753        match stat_cache.index_entry_worktree_stat_verdict(cached, &metadata) {
2754            sley_index::StatVerdict::Clean => continue,
2755            sley_index::StatVerdict::Dirty => {}
2756            // A racily-clean entry must be resolved by content: git re-hashes it
2757            // (`ce_compare_data`) and only reports `M` when the worktree bytes
2758            // actually differ from the cached oid — so a `touch`ed-then-re-`add`ed
2759            // file (same-second mtime as the index) stays clean.
2760            sley_index::StatVerdict::RacyNeedsContentCheck => {
2761                if worktree_oid_matches_index(worktree_root, git_path, &metadata, tracked, format)?
2762                {
2763                    continue;
2764                }
2765            }
2766        }
2767        extras.push(NameStatusEntry {
2768            status: NameStatus::Modified,
2769            path: git_path.clone().into(),
2770            old_path: None,
2771            old_mode: Some(tracked.mode),
2772            new_mode: Some(tracked.mode),
2773            old_oid: Some(tracked.oid),
2774            new_oid: None,
2775        });
2776    }
2777    if !extras.is_empty() {
2778        content_changes.extend(extras);
2779        content_changes
2780            .sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2781    }
2782    Ok(content_changes)
2783}
2784
2785/// Hash the worktree file or symlink at `path` into the `(mode, oid)` blob entry
2786/// git would record for it. `metadata` must already describe a regular file or a
2787/// symlink — gitlink and directory classification is the caller's concern, since
2788/// those need the index/HEAD context this leaf does not have. This is the single
2789/// owner of the symlink-vs-regular body-source and mode split that `diff-files`
2790/// ([`worktree_oid_matches_index`]), the candidate-path collector
2791/// ([`worktree_entry_for_path`]), and the index↔worktree walk
2792/// ([`index_worktree_change_for_entry`]) all share; before consolidation each
2793/// open-coded it with a bare `0o120000`.
2794fn classify_worktree_entry(
2795    path: &Path,
2796    metadata: &fs::Metadata,
2797    format: ObjectFormat,
2798) -> Result<TrackedEntry> {
2799    let is_symlink = metadata.file_type().is_symlink();
2800    let body = if is_symlink {
2801        symlink_target_bytes(path)?
2802    } else {
2803        fs::read(path)?
2804    };
2805    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
2806    let mode = if is_symlink {
2807        sley_index::SYMLINK_MODE
2808    } else {
2809        file_mode(metadata)
2810    };
2811    Ok(TrackedEntry { mode, oid })
2812}
2813
2814/// Whether the worktree file at `git_path` hashes to the index entry's oid (mode
2815/// included). Used to resolve a racily-clean `diff-files` entry: git re-hashes the
2816/// content and only reports it changed when the bytes truly differ. Shares the
2817/// worktree-oid computation with [`worktree_entry_for_path`] via
2818/// [`classify_worktree_entry`].
2819fn worktree_oid_matches_index(
2820    worktree_root: &Path,
2821    git_path: &[u8],
2822    metadata: &fs::Metadata,
2823    index_entry: &TrackedEntry,
2824    format: ObjectFormat,
2825) -> Result<bool> {
2826    let path = worktree_path_for_repo_path(worktree_root, git_path);
2827    let entry = classify_worktree_entry(&path, metadata, format)?;
2828    Ok(entry.oid == index_entry.oid && entry.mode == index_entry.mode)
2829}
2830
2831pub fn diff_name_status_trees_with_options(
2832    db: &FileObjectDatabase,
2833    format: ObjectFormat,
2834    left_tree: &ObjectId,
2835    right_tree: &ObjectId,
2836    options: DiffNameStatusOptions,
2837) -> Result<Vec<NameStatusEntry>> {
2838    // `--find-copies-harder` may pair an *unchanged* left-side file as a copy
2839    // source, so it needs the complete left map; every other mode only consults
2840    // changed paths, so the pruned simultaneous walk (which skips identical
2841    // subtrees) suffices and produces byte-identical output.
2842    let needs_full_maps = options.detect_copies && options.find_copies_harder;
2843    let (left_entries, right_entries) = if needs_full_maps {
2844        collect_full_tree_pair(db, format, left_tree, right_tree)?
2845    } else {
2846        changed_tree_entries(db, format, left_tree, right_tree)?
2847    };
2848    diff_name_status_maps(
2849        &left_entries,
2850        &right_entries,
2851        left_entries.keys().chain(right_entries.keys()),
2852        options,
2853    )
2854}
2855
2856pub fn diff_name_status_empty_tree_with_options(
2857    db: &FileObjectDatabase,
2858    format: ObjectFormat,
2859    right_tree: &ObjectId,
2860    options: DiffNameStatusOptions,
2861) -> Result<Vec<NameStatusEntry>> {
2862    let left_entries = BTreeMap::new();
2863    let mut right_entries = BTreeMap::new();
2864    collect_tree_entries(db, format, right_tree, Vec::new(), &mut right_entries)?;
2865    diff_name_status_maps(&left_entries, &right_entries, right_entries.keys(), options)
2866}
2867
2868/// Diff two trees with full rename/copy options, including inexact (similarity)
2869/// detection when [`RenameDetectionOptions::detect_inexact`] is set.
2870///
2871/// Blob bytes for similarity scoring are read from `db`. This is the inexact-
2872/// aware counterpart of [`diff_name_status_trees_with_options`]; passing
2873/// `RenameDetectionOptions::default()` (or `RenameDetectionOptions { base, ..
2874/// default }` with `detect_inexact: false`) reproduces the exact-only behaviour.
2875pub fn diff_name_status_trees_with_rename_options(
2876    db: &FileObjectDatabase,
2877    format: ObjectFormat,
2878    left_tree: &ObjectId,
2879    right_tree: &ObjectId,
2880    options: RenameDetectionOptions,
2881) -> Result<Vec<NameStatusEntry>> {
2882    // See `diff_name_status_trees_with_options`: only `--find-copies-harder`
2883    // needs unchanged left entries as copy sources; otherwise the pruned walk
2884    // (skipping identical subtrees) yields identical output far more cheaply.
2885    let needs_full_maps = options.base.detect_copies && options.base.find_copies_harder;
2886    let (left_entries, right_entries) = if needs_full_maps {
2887        collect_full_tree_pair(db, format, left_tree, right_tree)?
2888    } else {
2889        changed_tree_entries(db, format, left_tree, right_tree)?
2890    };
2891    diff_name_status_maps_with_renames(
2892        &left_entries,
2893        &right_entries,
2894        left_entries.keys().chain(right_entries.keys()),
2895        options,
2896        |oid| read_blob_bytes(db, oid),
2897    )
2898}
2899
2900/// Diff the empty tree against `right_tree` with full rename/copy options.
2901///
2902/// As with [`diff_name_status_trees_with_rename_options`], inexact detection is
2903/// gated on [`RenameDetectionOptions::detect_inexact`]; the left (empty) side
2904/// has no sources, so only copies among the right-side additions can match when
2905/// `find_copies_harder` is set.
2906pub fn diff_name_status_empty_tree_with_rename_options(
2907    db: &FileObjectDatabase,
2908    format: ObjectFormat,
2909    right_tree: &ObjectId,
2910    options: RenameDetectionOptions,
2911) -> Result<Vec<NameStatusEntry>> {
2912    let left_entries = BTreeMap::new();
2913    let mut right_entries = BTreeMap::new();
2914    collect_tree_entries(db, format, right_tree, Vec::new(), &mut right_entries)?;
2915    diff_name_status_maps_with_renames(
2916        &left_entries,
2917        &right_entries,
2918        right_entries.keys(),
2919        options,
2920        |oid| read_blob_bytes(db, oid),
2921    )
2922}
2923
2924/// Read a blob's raw bytes from the ODB, returning `None` if the object cannot
2925/// be read or is not a blob. Used as the similarity-scoring blob fetcher; a
2926/// missing object simply makes a candidate pair non-similar rather than failing
2927/// the whole diff.
2928fn read_blob_bytes(db: &FileObjectDatabase, oid: &ObjectId) -> Option<Vec<u8>> {
2929    match db.read_object(oid) {
2930        Ok(object) if object.object_type == ObjectType::Blob => Some(object.body.clone()),
2931        _ => None,
2932    }
2933}
2934
2935/// Build the raw per-path add/delete/modify change list (before any rename or
2936/// copy detection) from the two entry maps and the candidate path set.
2937fn raw_name_status_changes_for_unique_paths<'a>(
2938    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2939    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2940    paths: impl Iterator<Item = &'a Vec<u8>>,
2941) -> Vec<NameStatusEntry> {
2942    let mut changes = Vec::new();
2943    for path in paths {
2944        let left = left_entries.get(path);
2945        let right = right_entries.get(path);
2946        let status = match (left, right) {
2947            (None, Some(_)) => Some(NameStatus::Added),
2948            (Some(_), None) => Some(NameStatus::Deleted),
2949            (Some(left), Some(right)) if left != right => {
2950                Some(modify_or_type_change(left.mode, right.mode))
2951            }
2952            _ => None,
2953        };
2954        if let Some(status) = status {
2955            changes.push(NameStatusEntry {
2956                status,
2957                path: path.clone().into(),
2958                old_path: None,
2959                old_mode: left.map(|entry| entry.mode),
2960                new_mode: right.map(|entry| entry.mode),
2961                old_oid: left.map(|entry| entry.oid),
2962                new_oid: right.map(|entry| entry.oid),
2963            });
2964        }
2965    }
2966    changes
2967}
2968
2969fn diff_name_status_maps<'a>(
2970    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2971    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2972    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2973    options: DiffNameStatusOptions,
2974) -> Result<Vec<NameStatusEntry>> {
2975    let paths = candidate_path_set(candidate_paths);
2976    diff_name_status_maps_for_path_set(left_entries, right_entries, &paths, options)
2977}
2978
2979fn diff_name_status_maps_for_path_set(
2980    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2981    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2982    candidate_paths: &BTreeSet<Vec<u8>>,
2983    options: DiffNameStatusOptions,
2984) -> Result<Vec<NameStatusEntry>> {
2985    diff_name_status_maps_for_unique_paths(
2986        left_entries,
2987        right_entries,
2988        candidate_paths.iter(),
2989        options,
2990    )
2991}
2992
2993fn diff_name_status_maps_for_unique_paths<'a>(
2994    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2995    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2996    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2997    options: DiffNameStatusOptions,
2998) -> Result<Vec<NameStatusEntry>> {
2999    let mut changes =
3000        raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
3001    if options.detect_renames {
3002        changes = detect_exact_renames(changes, left_entries, right_entries, options.rename_empty);
3003    }
3004    if options.detect_copies {
3005        changes = detect_exact_copies(
3006            changes,
3007            left_entries,
3008            right_entries,
3009            options.find_copies_harder,
3010            options.rename_empty,
3011        );
3012    }
3013    Ok(changes)
3014}
3015
3016/// Like [`diff_name_status_maps`], but additionally runs inexact (similarity)
3017/// rename/copy detection when `options.detect_inexact` is set.
3018///
3019/// `fetch_blob` resolves an [`ObjectId`] to that blob's raw bytes; it is only
3020/// consulted for the candidate pairs considered during inexact detection, and
3021/// only when inexact detection is enabled. A pair whose blob bytes cannot be
3022/// fetched is simply skipped (treated as not similar), so a missing object never
3023/// fails the whole diff.
3024fn diff_name_status_maps_with_renames<'a>(
3025    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3026    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3027    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
3028    options: RenameDetectionOptions,
3029    fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
3030) -> Result<Vec<NameStatusEntry>> {
3031    let paths = candidate_path_set(candidate_paths);
3032    diff_name_status_maps_with_renames_for_path_set(
3033        left_entries,
3034        right_entries,
3035        &paths,
3036        options,
3037        fetch_blob,
3038    )
3039}
3040
3041fn diff_name_status_maps_with_renames_for_path_set(
3042    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3043    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3044    candidate_paths: &BTreeSet<Vec<u8>>,
3045    options: RenameDetectionOptions,
3046    fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
3047) -> Result<Vec<NameStatusEntry>> {
3048    diff_name_status_maps_with_renames_for_unique_paths(
3049        left_entries,
3050        right_entries,
3051        candidate_paths.iter(),
3052        options,
3053        fetch_blob,
3054    )
3055}
3056
3057fn diff_name_status_maps_with_renames_for_unique_paths<'a>(
3058    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3059    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3060    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
3061    options: RenameDetectionOptions,
3062    fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
3063) -> Result<Vec<NameStatusEntry>> {
3064    let base = options.base;
3065    let mut changes =
3066        raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
3067    if base.detect_renames {
3068        changes = detect_exact_renames(changes, left_entries, right_entries, base.rename_empty);
3069    }
3070    // Inexact rename detection runs after exact renames so exact matches keep
3071    // priority (and their score of 100). It only fires when rename detection is
3072    // enabled at all, mirroring git's `-M`.
3073    if base.detect_renames && options.detect_inexact {
3074        changes = detect_inexact_renames(changes, &options, &fetch_blob);
3075    }
3076    if base.detect_copies {
3077        changes = detect_exact_copies(
3078            changes,
3079            left_entries,
3080            right_entries,
3081            base.find_copies_harder,
3082            base.rename_empty,
3083        );
3084    }
3085    if base.detect_copies && options.detect_inexact {
3086        changes = detect_inexact_copies(changes, left_entries, &options, &fetch_blob);
3087    }
3088    Ok(changes)
3089}
3090
3091fn detect_exact_renames(
3092    changes: Vec<NameStatusEntry>,
3093    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3094    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3095    rename_empty: bool,
3096) -> Vec<NameStatusEntry> {
3097    let added = changes
3098        .iter()
3099        .enumerate()
3100        .filter(|(_, entry)| entry.status == NameStatus::Added)
3101        .map(|(idx, entry)| (idx, entry.path.clone()))
3102        .collect::<Vec<_>>();
3103    // Candidate sources in path order (git's `rename_src` ordering), so the
3104    // best-source search tie-breaks deterministically.
3105    let mut sources = changes
3106        .iter()
3107        .filter(|entry| entry.status == NameStatus::Deleted)
3108        .filter_map(|entry| {
3109            left_entries
3110                .get(entry.path.as_bytes())
3111                .map(|left| (entry.path.clone(), left.oid))
3112        })
3113        .collect::<Vec<_>>();
3114    sources.sort_by(|a, b| a.0.cmp(&b.0));
3115    let mut src_used = vec![false; sources.len()];
3116    let mut consumed = BTreeSet::new();
3117    let mut renamed_old_paths = BTreeSet::new();
3118    let mut result = Vec::new();
3119
3120    // git's `find_identical_files`: for each destination, among the still-unused
3121    // sources with the identical OID, prefer one that shares the destination's
3122    // basename (score 2 short-circuits; otherwise the first such source wins).
3123    // Iterating destinations (not sources) is what lets a same-basename source
3124    // win over an alphabetically-earlier different-basename one.
3125    for (idx, new_path) in &added {
3126        let Some(right) = right_entries.get(new_path.as_bytes()) else {
3127            continue;
3128        };
3129        if !rename_empty && is_empty_blob_oid(&right.oid) {
3130            continue;
3131        }
3132        let mut best: Option<usize> = None;
3133        let mut best_score = -1i32;
3134        for (si, (src_path, src_oid)) in sources.iter().enumerate() {
3135            if src_used[si] || *src_oid != right.oid {
3136                continue;
3137            }
3138            let score = 1 + i32::from(path_basename(src_path) == path_basename(new_path));
3139            if score > best_score {
3140                best = Some(si);
3141                best_score = score;
3142                if score == 2 {
3143                    break;
3144                }
3145            }
3146        }
3147        if let Some(si) = best {
3148            src_used[si] = true;
3149            consumed.insert(*idx);
3150            let old_path = sources[si].0.clone();
3151            let left = &left_entries[old_path.as_bytes()];
3152            renamed_old_paths.insert(old_path.clone());
3153            result.push(NameStatusEntry {
3154                status: NameStatus::Renamed(100),
3155                path: new_path.clone(),
3156                old_path: Some(old_path),
3157                old_mode: Some(left.mode),
3158                new_mode: Some(right.mode),
3159                old_oid: Some(left.oid),
3160                new_oid: Some(right.oid),
3161            });
3162        }
3163    }
3164
3165    for (idx, entry) in changes.into_iter().enumerate() {
3166        if entry.status == NameStatus::Added && consumed.contains(&idx) {
3167            continue;
3168        }
3169        if entry.status == NameStatus::Deleted && renamed_old_paths.contains(&entry.path) {
3170            continue;
3171        }
3172        result.push(entry);
3173    }
3174    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
3175    result
3176}
3177
3178fn detect_exact_copies(
3179    changes: Vec<NameStatusEntry>,
3180    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3181    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3182    find_copies_harder: bool,
3183    rename_empty: bool,
3184) -> Vec<NameStatusEntry> {
3185    let changed_sources = changes
3186        .iter()
3187        .filter(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified))
3188        .map(|entry| entry.path.clone())
3189        .collect::<BTreeSet<_>>();
3190    let source_paths = left_entries
3191        .keys()
3192        .filter(|path| find_copies_harder || changed_sources.contains(path.as_slice()))
3193        .cloned()
3194        .collect::<Vec<_>>();
3195
3196    let mut result = Vec::new();
3197    for entry in changes {
3198        if entry.status != NameStatus::Added {
3199            result.push(entry);
3200            continue;
3201        }
3202        let Some(right) = right_entries.get(entry.path.as_bytes()) else {
3203            result.push(entry);
3204            continue;
3205        };
3206        if let Some(old_path) = source_paths.iter().find(|old_path| {
3207            old_path.as_slice() != entry.path.as_bytes()
3208                && left_entries.get(*old_path).is_some_and(|left| {
3209                    left.oid == right.oid && (rename_empty || !is_empty_blob_oid(&left.oid))
3210                })
3211        }) {
3212            result.push(NameStatusEntry {
3213                status: NameStatus::Copied(100),
3214                path: entry.path,
3215                old_path: Some(old_path.clone().into()),
3216                old_mode: left_entries
3217                    .get(old_path.as_slice())
3218                    .map(|entry| entry.mode),
3219                new_mode: entry.new_mode,
3220                old_oid: left_entries.get(old_path.as_slice()).map(|entry| entry.oid),
3221                new_oid: entry.new_oid,
3222            });
3223        } else {
3224            result.push(entry);
3225        }
3226    }
3227    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
3228    result
3229}
3230
3231/// Old-side metadata of a rename source, snapshotted before the source delete
3232/// entry is consumed so it can be attached to the renamed destination.
3233#[derive(Debug, Clone)]
3234struct RenameSourceMeta {
3235    path: BString,
3236    mode: Option<u32>,
3237    oid: Option<ObjectId>,
3238}
3239
3240/// A scored candidate pairing of a deleted source with an added destination,
3241/// used to order inexact-rename assignment best-match-first.
3242struct ScoredPair {
3243    /// Index into the `deleted` candidate list.
3244    src: usize,
3245    /// Index into the `added` candidate list.
3246    dst: usize,
3247    /// Similarity percentage in `0..=100`.
3248    score: u8,
3249}
3250
3251/// Inexact rename detection: pair still-unmatched deleted files with still-
3252/// unmatched added files by content similarity, replacing the best matches
3253/// (similarity >= `rename_threshold`) with [`NameStatus::Renamed`].
3254///
3255/// Exact renames have already run, so the only `Deleted`/`Added` entries left
3256/// here are ones with no identical-OID partner. Assignment is greedy by
3257/// descending score (then by source/destination order for determinism), and
3258/// each source and destination is used at most once — matching git's
3259/// `diffcore-rename` behaviour. Empty blobs are never used as a rename source
3260/// when `rename_empty` is false, mirroring exact detection.
3261fn detect_inexact_renames(
3262    changes: Vec<NameStatusEntry>,
3263    options: &RenameDetectionOptions,
3264    fetch_blob: &impl Fn(&ObjectId) -> Option<Vec<u8>>,
3265) -> Vec<NameStatusEntry> {
3266    let threshold = options.rename_threshold;
3267    // A threshold above 100 can never be met; nothing to do.
3268    if threshold > 100 {
3269        return changes;
3270    }
3271
3272    // Collect the candidate sources (Deletes) and destinations (Adds) with their
3273    // positions in `changes`, fetching blob bytes once each.
3274    let mut deleted: Vec<(usize, Vec<u8>)> = Vec::new();
3275    let mut added: Vec<(usize, Vec<u8>)> = Vec::new();
3276    for (idx, entry) in changes.iter().enumerate() {
3277        match entry.status {
3278            NameStatus::Deleted => {
3279                let Some(oid) = entry.old_oid.as_ref() else {
3280                    continue;
3281                };
3282                if !options.base.rename_empty && is_empty_blob_oid(oid) {
3283                    continue;
3284                }
3285                if let Some(bytes) = fetch_blob(oid) {
3286                    deleted.push((idx, bytes));
3287                }
3288            }
3289            NameStatus::Added => {
3290                let Some(oid) = entry.new_oid.as_ref() else {
3291                    continue;
3292                };
3293                if !options.base.rename_empty && is_empty_blob_oid(oid) {
3294                    continue;
3295                }
3296                if let Some(bytes) = fetch_blob(oid) {
3297                    added.push((idx, bytes));
3298                }
3299            }
3300            _ => {}
3301        }
3302    }
3303
3304    if deleted.is_empty() || added.is_empty() {
3305        return changes;
3306    }
3307
3308    // git's `too_many_rename_candidates`: if the rename matrix would exceed a
3309    // `rename_limit` square, skip inexact detection wholesale (exact-OID renames
3310    // were already resolved upstream). A non-positive limit is unlimited.
3311    if options.rename_limit > 0
3312        && deleted
3313            .len()
3314            .saturating_mul(added.len())
3315            .gt(&options.rename_limit.saturating_mul(options.rename_limit))
3316    {
3317        return changes;
3318    }
3319
3320    let mut src_used = vec![false; deleted.len()];
3321    let mut dst_used = vec![false; added.len()];
3322    // destination changes-index -> (source changes-index, score).
3323    let mut rename_of: BTreeMap<usize, (usize, u8)> = BTreeMap::new();
3324
3325    // Basename pre-pass (git's `find_basename_matches`): before the global
3326    // matrix, pair unique-basename src/dst at the stricter basename score, so a
3327    // same-basename rename wins over a globally-more-similar different basename.
3328    // git only does this for pure rename detection (`!want_copies`); when copies
3329    // are also wanted it culls differently and skips the basename heuristic.
3330    if !options.base.detect_copies {
3331        let src_paths: Vec<&[u8]> = deleted
3332            .iter()
3333            .map(|(idx, _)| &changes[*idx].path[..])
3334            .collect();
3335        let dst_paths: Vec<&[u8]> = added
3336            .iter()
3337            .map(|(idx, _)| &changes[*idx].path[..])
3338            .collect();
3339        let basename_pairs = basename_rename_matches(
3340            &src_paths,
3341            &dst_paths,
3342            &src_used,
3343            &dst_used,
3344            threshold,
3345            |si, di| Some(blob_similarity(&deleted[si].1, &added[di].1)),
3346        );
3347        for (si, di, score) in basename_pairs {
3348            src_used[si] = true;
3349            dst_used[di] = true;
3350            rename_of.insert(added[di].0, (deleted[si].0, score));
3351        }
3352    }
3353
3354    // Score every remaining (delete, add) pair; keep only those meeting the
3355    // threshold.
3356    let mut pairs: Vec<ScoredPair> = Vec::new();
3357    for (si, (_, src_bytes)) in deleted.iter().enumerate() {
3358        if src_used[si] {
3359            continue;
3360        }
3361        for (di, (_, dst_bytes)) in added.iter().enumerate() {
3362            if dst_used[di] {
3363                continue;
3364            }
3365            let score = blob_similarity(src_bytes, dst_bytes);
3366            if score >= threshold {
3367                pairs.push(ScoredPair {
3368                    src: si,
3369                    dst: di,
3370                    score,
3371                });
3372            }
3373        }
3374    }
3375    // Best score first; ties broken by source then destination order so the
3376    // result is deterministic regardless of input ordering.
3377    pairs.sort_by(|a, b| {
3378        b.score
3379            .cmp(&a.score)
3380            .then_with(|| a.src.cmp(&b.src))
3381            .then_with(|| a.dst.cmp(&b.dst))
3382    });
3383
3384    for pair in pairs {
3385        if src_used[pair.src] || dst_used[pair.dst] {
3386            continue;
3387        }
3388        src_used[pair.src] = true;
3389        dst_used[pair.dst] = true;
3390        let src_change_idx = deleted[pair.src].0;
3391        let dst_change_idx = added[pair.dst].0;
3392        rename_of.insert(dst_change_idx, (src_change_idx, pair.score));
3393    }
3394
3395    if rename_of.is_empty() {
3396        return changes;
3397    }
3398
3399    // Snapshot the source (delete) entries' metadata before we consume them, so
3400    // each renamed destination can carry the correct old path/mode/oid.
3401    let consumed_sources: BTreeSet<usize> =
3402        rename_of.values().map(|(src_idx, _)| *src_idx).collect();
3403    let source_meta: BTreeMap<usize, RenameSourceMeta> = consumed_sources
3404        .iter()
3405        .map(|&src_idx| {
3406            let src = &changes[src_idx];
3407            (
3408                src_idx,
3409                RenameSourceMeta {
3410                    path: src.path.clone(),
3411                    mode: src.old_mode,
3412                    oid: src.old_oid,
3413                },
3414            )
3415        })
3416        .collect();
3417
3418    let mut result = Vec::with_capacity(changes.len());
3419    for (idx, entry) in changes.into_iter().enumerate() {
3420        if consumed_sources.contains(&idx) {
3421            // This delete became the source of a rename; drop it.
3422            continue;
3423        }
3424        if let Some((src_idx, score)) = rename_of.get(&idx) {
3425            // The destination becomes a rename from the matched source. Pull the
3426            // old-side metadata from the snapshot; the new-side metadata stays as
3427            // the destination's.
3428            let meta = source_meta
3429                .get(src_idx)
3430                .cloned()
3431                .unwrap_or(RenameSourceMeta {
3432                    path: BString::default(),
3433                    mode: None,
3434                    oid: None,
3435                });
3436            result.push(NameStatusEntry {
3437                status: NameStatus::Renamed(*score),
3438                path: entry.path,
3439                old_path: Some(meta.path),
3440                old_mode: meta.mode,
3441                new_mode: entry.new_mode,
3442                old_oid: meta.oid,
3443                new_oid: entry.new_oid,
3444            });
3445            continue;
3446        }
3447        result.push(entry);
3448    }
3449
3450    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
3451    result
3452}
3453
3454/// Inexact copy detection: for each still-`Added` file, find the most similar
3455/// candidate *source* on the left side (similarity >= `copy_threshold`) and, if
3456/// found, report it as a [`NameStatus::Copied`]. The source is not removed
3457/// (copies leave the original in place).
3458///
3459/// Candidate sources follow the same rule as exact copy detection: with
3460/// `find_copies_harder` every left-side path is eligible; otherwise only paths
3461/// that were themselves changed (deleted or modified) on this diff. Exact copies
3462/// have already run, so any remaining `Added` here had no identical-OID source.
3463fn detect_inexact_copies(
3464    changes: Vec<NameStatusEntry>,
3465    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3466    options: &RenameDetectionOptions,
3467    fetch_blob: &impl Fn(&ObjectId) -> Option<Vec<u8>>,
3468) -> Vec<NameStatusEntry> {
3469    let threshold = options.copy_threshold;
3470    if threshold > 100 {
3471        return changes;
3472    }
3473
3474    let changed_sources = changes
3475        .iter()
3476        .filter(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified))
3477        .map(|entry| entry.path.clone())
3478        .collect::<BTreeSet<_>>();
3479    // Eligible source paths, paired with their bytes (fetched lazily/once).
3480    let mut sources: Vec<(Vec<u8>, &TrackedEntry, Vec<u8>)> = Vec::new();
3481    for (path, tracked) in left_entries {
3482        if !(options.base.find_copies_harder || changed_sources.contains(path.as_slice())) {
3483            continue;
3484        }
3485        if !options.base.rename_empty && is_empty_blob_oid(&tracked.oid) {
3486            continue;
3487        }
3488        if let Some(bytes) = fetch_blob(&tracked.oid) {
3489            sources.push((path.clone(), tracked, bytes));
3490        }
3491    }
3492    if sources.is_empty() {
3493        return changes;
3494    }
3495
3496    // git's `too_many_rename_candidates`: when the copy matrix would exceed a
3497    // `rename_limit` square, skip inexact copy detection wholesale. Under
3498    // `--find-copies-harder` the source set is every left-side path (not just the
3499    // changed ones), so this O(sources × dests) matrix is the one most likely to
3500    // blow up — yet the rename gate above guards only `detect_inexact_renames`.
3501    // A non-positive limit is unlimited. Mirrors the identical gate there.
3502    let dest_count = changes
3503        .iter()
3504        .filter(|entry| entry.status == NameStatus::Added)
3505        .count();
3506    if options.rename_limit > 0
3507        && sources
3508            .len()
3509            .saturating_mul(dest_count)
3510            .gt(&options.rename_limit.saturating_mul(options.rename_limit))
3511    {
3512        return changes;
3513    }
3514
3515    let mut result = Vec::with_capacity(changes.len());
3516    for entry in changes {
3517        if entry.status != NameStatus::Added {
3518            result.push(entry);
3519            continue;
3520        }
3521        let Some(new_oid) = entry.new_oid.as_ref() else {
3522            result.push(entry);
3523            continue;
3524        };
3525        let Some(dst_bytes) = fetch_blob(new_oid) else {
3526            result.push(entry);
3527            continue;
3528        };
3529
3530        // Pick the best-scoring source path that meets the threshold. Ties are
3531        // broken by path order (BTreeMap iteration is sorted) so the choice is
3532        // deterministic.
3533        let mut best: Option<(usize, u8)> = None;
3534        for (i, (src_path, _, src_bytes)) in sources.iter().enumerate() {
3535            if src_path.as_slice() == entry.path.as_bytes() {
3536                continue;
3537            }
3538            let score = blob_similarity(src_bytes, &dst_bytes);
3539            if score < threshold {
3540                continue;
3541            }
3542            match best {
3543                Some((_, best_score)) if best_score >= score => {}
3544                _ => best = Some((i, score)),
3545            }
3546        }
3547
3548        if let Some((src_idx, score)) = best {
3549            let (src_path, src_tracked, _) = &sources[src_idx];
3550            result.push(NameStatusEntry {
3551                status: NameStatus::Copied(score),
3552                path: entry.path,
3553                old_path: Some(src_path.clone().into()),
3554                old_mode: Some(src_tracked.mode),
3555                new_mode: entry.new_mode,
3556                old_oid: Some(src_tracked.oid),
3557                new_oid: entry.new_oid,
3558            });
3559        } else {
3560            result.push(entry);
3561        }
3562    }
3563    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
3564    result
3565}
3566
3567fn is_empty_blob_oid(oid: &ObjectId) -> bool {
3568    object_id_for_bytes(oid.format(), "blob", b"").is_ok_and(|empty| empty == *oid)
3569}
3570
3571// ===========================================================================
3572// Content similarity (the engine for inexact `-M`/`-C` rename/copy detection).
3573//
3574// This mirrors upstream git's similarity estimate from `diffcore-delta.c`
3575// (the span-hash counting) and `diffcore-rename.c` (the score formula), so the
3576// `R<score>`/`C<score>` we emit match git's percentages.
3577//
3578// The metric, precisely:
3579//
3580//   1. Each blob is broken into *spans*. Starting at a byte, we accumulate a
3581//      rolling hash of the bytes and end the span at the first `\n` (inclusive)
3582//      or once the span reaches `MAX_SPAN_BYTES` (64) bytes, whichever comes
3583//      first. (The 64-byte cap keeps a file with no/few newlines — e.g. a
3584//      binary blob or one very long line — from collapsing into a single span,
3585//      so similarity still tracks shared substrings.) Each span yields a
3586//      `(hash, byte_count)` pair, where `byte_count` is the span's length in
3587//      bytes. This is the exact loop git uses in `hash_chars()`.
3588//
3589//   2. The two blobs' spans are reduced to multisets keyed by hash: for each
3590//      hash we keep the total number of bytes spanned by entries with that
3591//      hash, on each side. `common_bytes` is then the sum over all hashes of
3592//      `min(bytes_on_src, bytes_on_dst)` — the bytes that exist on both sides.
3593//      This is git's `src_copied`.
3594//
3595//   3. The score is `common_bytes / max(size_src, size_dst)`, scaled to a
3596//      percentage and rounded to the nearest integer:
3597//
3598//          score% = round(common_bytes * 100 / max(size_src, size_dst))
3599//
3600//      git computes an internal score `src_copied * MAX_SCORE / max_size` with
3601//      `MAX_SCORE == 60000` and reports `round(score * 100 / MAX_SCORE)`; that
3602//      is algebraically the same rounded percentage, which we compute directly
3603//      to avoid intermediate precision loss.
3604//
3605// Edge cases match git: two empty blobs are 100% similar (identical content);
3606// an empty blob vs a non-empty one is 0%. Equal byte buffers are always 100%.
3607
3608/// Maximum number of bytes in a single similarity span before it is force-cut.
3609///
3610/// git uses 64 (`hash_chars()` breaks a span once `++chunks >= 64`).
3611const MAX_SPAN_BYTES: usize = 64;
3612
3613/// Compute the content similarity of two blobs as an integer percentage in
3614/// `0..=100`, using git's span-hash counting metric (see the module comment
3615/// above for the exact definition).
3616///
3617/// The result is symmetric (`blob_similarity(a, b) == blob_similarity(b, a)`)
3618/// because the score divides the common-byte count by the larger of the two
3619/// sizes. Byte-identical blobs return `100`; a non-empty blob compared against
3620/// an empty one returns `0`; two empty blobs return `100`.
3621///
3622/// This is the same number git prints as `similarity index N%` and uses to
3623/// decide `-M`/`-C` rename and copy detection.
3624pub fn blob_similarity(a: &[u8], b: &[u8]) -> u8 {
3625    // Fast paths that also pin down the empty-blob conventions.
3626    if a == b {
3627        return 100;
3628    }
3629    let max_size = a.len().max(b.len());
3630    if max_size == 0 {
3631        // Both empty (and not caught by `a == b` only if both are empty, which
3632        // they are here) -> identical.
3633        return 100;
3634    }
3635
3636    let src = span_hash_counts(a, blob_is_text(a));
3637    let dst = span_hash_counts(b, blob_is_text(b));
3638    let common = common_span_bytes(&src, &dst);
3639
3640    // Match git's diffcore-rename integer math exactly. git computes an internal
3641    // score `src_copied * MAX_SCORE / max_size` (MAX_SCORE == 60000) with integer
3642    // truncation, then reports the similarity index as `score * 100 / MAX_SCORE`,
3643    // truncated again. This two-step truncation -- *not* a single rounded
3644    // `common * 100 / max_size` -- is what yields git's exact percentages: e.g.
3645    // common=4, max_size=6 gives 4*60000/6=40000 then 40000*100/60000=66 (git's
3646    // `R066`), whereas a rounded single step would give 67.
3647    const MAX_SCORE: u64 = 60000;
3648    let internal = (common as u64 * MAX_SCORE) / max_size as u64;
3649    let score = internal * 100 / MAX_SCORE;
3650    score.min(100) as u8
3651}
3652
3653/// The basename of a slash-separated path: the portion after the last `/`
3654/// (git's `get_basename`).
3655pub fn path_basename(path: &[u8]) -> &[u8] {
3656    match path.iter().rposition(|&byte| byte == b'/') {
3657        Some(slash) => &path[slash + 1..],
3658        None => path,
3659    }
3660}
3661
3662/// The stricter score a basename match must reach: git's `min_basename_score`
3663/// with the default `GIT_BASENAME_FACTOR` of 0.5, i.e. halfway between the
3664/// rename threshold and 100%. (For the default 50% threshold this is 75%.)
3665pub fn basename_min_score(threshold: u8) -> u8 {
3666    let threshold = threshold.min(100);
3667    threshold + (100 - threshold) / 2
3668}
3669
3670/// git's `find_basename_matches`: among the still-unmatched rename sources and
3671/// destinations, pair those whose basename is UNIQUE on *both* sides and whose
3672/// similarity meets [`basename_min_score`]. Returns the `(src_local, dst_local,
3673/// score)` pairings to apply *before* the full O(n·m) similarity matrix, so a
3674/// same-basename rename wins over a globally-more-similar different-basename
3675/// candidate (diffcore-rename.c).
3676///
3677/// `src_paths`/`dst_paths` are the candidate paths, indexed in parallel with the
3678/// `src_used`/`dst_used` flags (entries already consumed by exact-OID matching).
3679/// `similarity(src_local, dst_local)` returns the blob similarity for a pair, or
3680/// `None` when a blob is unreadable / ineligible. Only unique basenames are
3681/// considered: git's plain-diff path has no directory-rename fallback, so an
3682/// ambiguous basename is skipped entirely.
3683pub fn basename_rename_matches(
3684    src_paths: &[&[u8]],
3685    dst_paths: &[&[u8]],
3686    src_used: &[bool],
3687    dst_used: &[bool],
3688    threshold: u8,
3689    mut similarity: impl FnMut(usize, usize) -> Option<u8>,
3690) -> Vec<(usize, usize, u8)> {
3691    let min_score = basename_min_score(threshold);
3692    // basename -> Some(unique local index), or None once a second candidate with
3693    // the same basename appears (ambiguous).
3694    let mut src_by_base: HashMap<&[u8], Option<usize>> = HashMap::new();
3695    for (si, path) in src_paths.iter().enumerate() {
3696        if src_used.get(si).copied().unwrap_or(false) {
3697            continue;
3698        }
3699        src_by_base
3700            .entry(path_basename(path))
3701            .and_modify(|slot| *slot = None)
3702            .or_insert(Some(si));
3703    }
3704    let mut dst_by_base: HashMap<&[u8], Option<usize>> = HashMap::new();
3705    for (di, path) in dst_paths.iter().enumerate() {
3706        if dst_used.get(di).copied().unwrap_or(false) {
3707            continue;
3708        }
3709        dst_by_base
3710            .entry(path_basename(path))
3711            .and_modify(|slot| *slot = None)
3712            .or_insert(Some(di));
3713    }
3714    let mut matches = Vec::new();
3715    let mut dst_taken = vec![false; dst_paths.len()];
3716    for (si, path) in src_paths.iter().enumerate() {
3717        if src_used.get(si).copied().unwrap_or(false) {
3718            continue;
3719        }
3720        let base = path_basename(path);
3721        // Both basenames must be unique among the remaining candidates.
3722        let Some(Some(src_idx)) = src_by_base.get(base).copied() else {
3723            continue;
3724        };
3725        if src_idx != si {
3726            continue;
3727        }
3728        let Some(Some(dst_idx)) = dst_by_base.get(base).copied() else {
3729            continue;
3730        };
3731        if dst_used.get(dst_idx).copied().unwrap_or(false) || dst_taken[dst_idx] {
3732            continue;
3733        }
3734        let Some(score) = similarity(si, dst_idx) else {
3735            continue;
3736        };
3737        if score < min_score {
3738            continue;
3739        }
3740        dst_taken[dst_idx] = true;
3741        matches.push((si, dst_idx, score));
3742    }
3743    matches
3744}
3745
3746/// Break `data` into spans and return, per span hash, the total number of bytes
3747/// covered by spans with that hash. Spans end at a newline (inclusive) or once
3748/// they reach [`MAX_SPAN_BYTES`] bytes — exactly git's `hash_chars()` loop.
3749///
3750/// The returned map is `hash -> total_span_bytes`. Summing all values yields
3751/// `data.len()`, so the byte accounting is exact.
3752fn span_hash_counts(data: &[u8], is_text: bool) -> BTreeMap<u64, usize> {
3753    let mut counts: BTreeMap<u64, usize> = BTreeMap::new();
3754    let mut idx = 0usize;
3755    let len = data.len();
3756    while idx < len {
3757        // Roll a hash over the bytes of this span. The mixing mirrors git's
3758        // two-accumulator scheme from `diffcore-delta.c`; the exact constants do
3759        // not matter for correctness (any good per-span hash works), only that
3760        // identical spans collide and distinct spans rarely do.
3761        let mut accum1: u32 = 0;
3762        let mut accum2: u32 = 0;
3763        let mut span_len = 0usize;
3764        loop {
3765            let c = data[idx] as u32;
3766            idx += 1;
3767            // Ignore CR in a CRLF sequence for text blobs, so a file that only
3768            // differs by LF<->CRLF is still scored as (near-)identical — git's
3769            // `hash_chars()` does the same, which is what makes a CRLF-only
3770            // rename detectable.
3771            if is_text && c == u32::from(b'\r') && idx < len && data[idx] == b'\n' {
3772                continue;
3773            }
3774            span_len += 1;
3775            accum1 = (accum1 << 7) ^ (accum2 >> 25);
3776            accum2 = (accum2 << 7) ^ (accum1 >> 25);
3777            accum1 = accum1.wrapping_add(c);
3778            let newline = c == u32::from(b'\n');
3779            if span_len >= MAX_SPAN_BYTES || newline || idx >= len {
3780                break;
3781            }
3782        }
3783        // Fold the two accumulators (and the span length) into one 64-bit key.
3784        // Including the length keeps spans of different lengths from colliding
3785        // when their rolling-hash states happen to coincide.
3786        let hash = ((accum1 as u64) << 32) ^ (accum2 as u64) ^ ((span_len as u64) << 1);
3787        *counts.entry(hash).or_insert(0) += span_len;
3788    }
3789    counts
3790}
3791
3792/// Sum, over every hash present in both maps, the smaller of the two byte
3793/// counts. This is git's `src_copied`: the number of bytes that appear on both
3794/// sides (counting multiplicity via the per-hash byte totals).
3795/// git `diffcore_count_changes()`: span-hash byte accounting between two
3796/// blobs. Returns `(src_copied, literal_added)` — the bytes of `src` that
3797/// survive into `dst`, and the bytes of `dst` not accounted for by `src`.
3798/// `--dirstat`'s default "changes" damage is
3799/// `(src.len() - src_copied) + literal_added`.
3800pub fn count_changes(src: &[u8], dst: &[u8]) -> (usize, usize) {
3801    let src_counts = span_hash_counts(src, blob_is_text(src));
3802    let dst_counts = span_hash_counts(dst, blob_is_text(dst));
3803    let copied = common_span_bytes(&src_counts, &dst_counts);
3804    (copied, dst.len() - copied)
3805}
3806
3807/// Whether a blob is treated as text for span hashing (git's
3808/// `diff_filespec_is_binary` / `buffer_is_binary`): a NUL byte within the first
3809/// 8000 bytes marks it binary, in which case CRs are hashed literally.
3810fn blob_is_text(data: &[u8]) -> bool {
3811    const FIRST_FEW_BYTES: usize = 8000;
3812    !data.iter().take(FIRST_FEW_BYTES).any(|&byte| byte == 0)
3813}
3814
3815fn common_span_bytes(src: &BTreeMap<u64, usize>, dst: &BTreeMap<u64, usize>) -> usize {
3816    let mut common = 0usize;
3817    // Iterate the smaller map for a few less lookups.
3818    let (small, large) = if src.len() <= dst.len() {
3819        (src, dst)
3820    } else {
3821        (dst, src)
3822    };
3823    for (hash, small_bytes) in small {
3824        if let Some(large_bytes) = large.get(hash) {
3825            common += (*small_bytes).min(*large_bytes);
3826        }
3827    }
3828    common
3829}
3830
3831fn diff_entry_sort_path(entry: &NameStatusEntry) -> &[u8] {
3832    // git's diffcore re-inserts rename/copy pairs at their *destination*'s
3833    // position, so the queue (raw, numstat, stat, ...) sorts by the new path.
3834    entry.path.as_bytes()
3835}
3836
3837fn mark_unstaged_worktree_oids_unresolved(
3838    changes: Vec<NameStatusEntry>,
3839    index_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3840    worktree_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3841) -> Vec<NameStatusEntry> {
3842    changes
3843        .into_iter()
3844        .map(|mut entry| {
3845            let worktree_entry = worktree_entries.get(entry.path.as_bytes());
3846            if worktree_entry != index_entries.get(entry.path.as_bytes()) {
3847                entry.new_oid = None;
3848            }
3849            entry
3850        })
3851        .collect()
3852}
3853
3854#[derive(Debug, Clone, PartialEq, Eq)]
3855struct TrackedEntry {
3856    mode: u32,
3857    oid: ObjectId,
3858}
3859
3860/// A path-keyed map of tracked entries: one flattened side of a tree (or index/
3861/// worktree) snapshot.
3862type TrackedEntryMap = BTreeMap<Vec<u8>, TrackedEntry>;
3863
3864/// The `(left, right)` sides produced by a tree-vs-tree comparison.
3865type TrackedEntryPair = (TrackedEntryMap, TrackedEntryMap);
3866
3867struct IndexSnapshot {
3868    entries: BTreeMap<Vec<u8>, TrackedEntry>,
3869    stat_cache: IndexStatCache,
3870}
3871
3872fn read_index_entries(
3873    git_dir: &Path,
3874    format: ObjectFormat,
3875) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3876    let index_path = sley_index::repository_index_path(git_dir);
3877    if !index_path.exists() {
3878        return Ok(BTreeMap::new());
3879    }
3880    let index = expand_sparse_index_for_worktree_diff(
3881        sley_index::read_repository_index(git_dir, format)?,
3882        git_dir,
3883        format,
3884    )?;
3885    Ok(index
3886        .entries
3887        .into_iter()
3888        .filter(|entry| entry.stage() == sley_index::Stage::Normal && !entry.is_intent_to_add())
3889        .map(|entry| {
3890            (
3891                entry.path.into_bytes(),
3892                TrackedEntry {
3893                    mode: entry.mode,
3894                    oid: entry.oid,
3895                },
3896            )
3897        })
3898        .collect())
3899}
3900
3901/// Collect the set of stage-0 paths flagged intent-to-add (`git add -N`) in the
3902/// index. These diff as new files rather than as modifications of their recorded
3903/// empty-blob id.
3904fn read_intent_to_add_paths(
3905    git_dir: &Path,
3906    format: ObjectFormat,
3907) -> Result<std::collections::HashSet<Vec<u8>>> {
3908    let index_path = sley_index::repository_index_path(git_dir);
3909    if !index_path.exists() {
3910        return Ok(std::collections::HashSet::new());
3911    }
3912    let index = expand_sparse_index_for_worktree_diff(
3913        sley_index::read_repository_index(git_dir, format)?,
3914        git_dir,
3915        format,
3916    )?;
3917    Ok(index
3918        .entries
3919        .iter()
3920        .filter(|entry| entry.stage() == sley_index::Stage::Normal && entry.is_intent_to_add())
3921        .map(|entry| entry.path.as_bytes().to_vec())
3922        .collect())
3923}
3924
3925fn read_index_snapshot(git_dir: &Path, format: ObjectFormat) -> Result<IndexSnapshot> {
3926    let index_path = sley_index::repository_index_path(git_dir);
3927    let index_metadata = match fs::metadata(&index_path) {
3928        Ok(metadata) => metadata,
3929        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
3930            return Ok(IndexSnapshot {
3931                entries: BTreeMap::new(),
3932                stat_cache: IndexStatCache::default(),
3933            });
3934        }
3935        Err(err) => return Err(err.into()),
3936    };
3937    let index = expand_sparse_index_for_worktree_diff(
3938        sley_index::read_repository_index(git_dir, format)?,
3939        git_dir,
3940        format,
3941    )?;
3942    let stat_cache =
3943        IndexStatCache::from_index_mtime(&index, sley_index::file_mtime_parts(&index_metadata));
3944    let entries = index
3945        .entries
3946        .into_iter()
3947        .map(|entry| {
3948            (
3949                entry.path.into_bytes(),
3950                TrackedEntry {
3951                    mode: entry.mode,
3952                    oid: entry.oid,
3953                },
3954            )
3955        })
3956        .collect();
3957    Ok(IndexSnapshot {
3958        entries,
3959        stat_cache,
3960    })
3961}
3962
3963trait WorktreeIndexEntry {
3964    fn git_path(&self) -> &[u8];
3965    fn stage(&self) -> sley_index::Stage;
3966    fn mode(&self) -> u32;
3967    fn oid(&self) -> ObjectId;
3968    fn is_intent_to_add(&self) -> bool;
3969    fn is_skip_worktree(&self) -> bool;
3970    fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool;
3971}
3972
3973impl WorktreeIndexEntry for sley_index::IndexEntry {
3974    fn git_path(&self) -> &[u8] {
3975        self.path.as_bytes()
3976    }
3977
3978    fn stage(&self) -> sley_index::Stage {
3979        sley_index::IndexEntry::stage(self)
3980    }
3981
3982    fn mode(&self) -> u32 {
3983        self.mode
3984    }
3985
3986    fn oid(&self) -> ObjectId {
3987        self.oid
3988    }
3989
3990    fn is_intent_to_add(&self) -> bool {
3991        sley_index::IndexEntry::is_intent_to_add(self)
3992    }
3993
3994    fn is_skip_worktree(&self) -> bool {
3995        sley_index::IndexEntry::is_skip_worktree(self)
3996    }
3997
3998    fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool {
3999        stat_cache.reusable_index_entry(self, metadata).is_some()
4000    }
4001}
4002
4003impl WorktreeIndexEntry for sley_index::IndexEntryRef<'_> {
4004    fn git_path(&self) -> &[u8] {
4005        self.path
4006    }
4007
4008    fn stage(&self) -> sley_index::Stage {
4009        sley_index::IndexEntryRef::stage(self)
4010    }
4011
4012    fn mode(&self) -> u32 {
4013        self.mode
4014    }
4015
4016    fn oid(&self) -> ObjectId {
4017        self.oid
4018    }
4019
4020    fn is_intent_to_add(&self) -> bool {
4021        sley_index::IndexEntryRef::is_intent_to_add(self)
4022    }
4023
4024    fn is_skip_worktree(&self) -> bool {
4025        sley_index::IndexEntryRef::is_skip_worktree(self)
4026    }
4027
4028    fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool {
4029        stat_cache.reusable_index_entry_ref(self, metadata)
4030    }
4031}
4032
4033fn tracked_entry_from_index(entry: &impl WorktreeIndexEntry) -> TrackedEntry {
4034    TrackedEntry {
4035        mode: entry.mode(),
4036        oid: entry.oid(),
4037    }
4038}
4039
4040fn head_tree_entries(
4041    git_dir: &Path,
4042    format: ObjectFormat,
4043    db: &FileObjectDatabase,
4044) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
4045    let refs = FileRefStore::new(git_dir, format);
4046    let Some(head) = refs.read_ref("HEAD")? else {
4047        return Ok(BTreeMap::new());
4048    };
4049    let commit_oid = match head {
4050        RefTarget::Direct(oid) => Some(oid),
4051        RefTarget::Symbolic(name) => match refs.read_ref(&name)? {
4052            Some(RefTarget::Direct(oid)) => Some(oid),
4053            _ => None,
4054        },
4055    };
4056    let Some(commit_oid) = commit_oid else {
4057        return Ok(BTreeMap::new());
4058    };
4059    let object = db.read_object(&commit_oid)?;
4060    if object.object_type != ObjectType::Commit {
4061        return Err(GitError::InvalidObject(format!(
4062            "HEAD {commit_oid} is not a commit"
4063        )));
4064    }
4065    let commit = Commit::parse_ref(format, &object.body)?;
4066    let mut entries = BTreeMap::new();
4067    collect_tree_entries(db, format, &commit.tree, Vec::new(), &mut entries)?;
4068    Ok(entries)
4069}
4070
4071/// Flatten `tree_oid` into `entries` (keyed by `prefix`-rooted full paths),
4072/// adapting the canonical [`flatten_tree`] tuples into [`TrackedEntry`].
4073///
4074/// `flatten_tree` flattens from an empty prefix; each of its paths is rejoined
4075/// under `prefix` with [`join_tree_path`], reproducing the recursive
4076/// prefix-building this helper previously did inline. Used by the full
4077/// (non-pruned) flatten paths: `--find-copies-harder` and the changed-subtree
4078/// add/delete sides of the simultaneous diff walk.
4079fn collect_tree_entries(
4080    db: &FileObjectDatabase,
4081    format: ObjectFormat,
4082    tree_oid: &ObjectId,
4083    prefix: Vec<u8>,
4084    entries: &mut BTreeMap<Vec<u8>, TrackedEntry>,
4085) -> Result<()> {
4086    for (rel_path, (mode, oid)) in flatten_tree(db, format, tree_oid)? {
4087        let path = join_tree_path(&prefix, &rel_path);
4088        entries.insert(path, TrackedEntry { mode, oid });
4089    }
4090    Ok(())
4091}
4092
4093/// Git's mode value for a subtree (directory) entry inside a tree object.
4094const TREE_ENTRY_MODE: u32 = 0o040000;
4095
4096/// Read `tree_oid` and parse it as a tree, erroring if the object is some other
4097/// type. Shared by the simultaneous tree-diff walk so both sides validate the
4098/// object type identically to [`collect_tree_entries`].
4099fn read_tree_object(
4100    db: &FileObjectDatabase,
4101    format: ObjectFormat,
4102    tree_oid: &ObjectId,
4103) -> Result<Tree> {
4104    let object = db.read_object(tree_oid)?;
4105    if object.object_type != ObjectType::Tree {
4106        return Err(GitError::InvalidObject(format!(
4107            "expected tree {tree_oid}, found {}",
4108            object.object_type.as_str()
4109        )));
4110    }
4111    Tree::parse(format, &object.body)
4112}
4113
4114/// Append `name` to `prefix` with a `/` separator (mirroring the path
4115/// construction in [`collect_tree_entries`]), returning the joined path.
4116fn join_tree_path(prefix: &[u8], name: &[u8]) -> Vec<u8> {
4117    let mut path = Vec::with_capacity(prefix.len() + 1 + name.len());
4118    path.extend_from_slice(prefix);
4119    if !path.is_empty() {
4120        path.push(b'/');
4121    }
4122    path.extend_from_slice(name);
4123    path
4124}
4125
4126/// Fully flatten both trees into independent `left`/`right` maps (every blob on
4127/// each side, no pruning). Used only on the `--find-copies-harder` path, where
4128/// copy detection may reach into otherwise-unchanged subtrees for a source.
4129fn collect_full_tree_pair(
4130    db: &FileObjectDatabase,
4131    format: ObjectFormat,
4132    left_tree: &ObjectId,
4133    right_tree: &ObjectId,
4134) -> Result<TrackedEntryPair> {
4135    let mut left = BTreeMap::new();
4136    collect_tree_entries(db, format, left_tree, Vec::new(), &mut left)?;
4137    let mut right = BTreeMap::new();
4138    collect_tree_entries(db, format, right_tree, Vec::new(), &mut right)?;
4139    Ok((left, right))
4140}
4141
4142/// Walk two trees *simultaneously*, collecting into `left` and `right` only the
4143/// blob entries that differ between the two sides — every entry that is present
4144/// and byte-identical (same mode + same OID) on both sides is omitted, and any
4145/// subtree whose OID is identical on both sides is skipped wholesale without
4146/// being read or recursed into. This is the core optimization git relies on to
4147/// make tree diffs cheap: equal subtrees are pruned in O(1).
4148///
4149/// The resulting `left`/`right` maps are exactly the subset of the fully
4150/// flattened maps (as produced by [`collect_tree_entries`]) restricted to the
4151/// paths that participate in an Added/Deleted/Modified change. Because
4152/// [`raw_name_status_changes`] emits nothing for a path that is identical on both
4153/// sides, diffing these pruned maps yields byte-identical name-status output to
4154/// diffing the full maps. (Callers that need the *complete* left map — i.e.
4155/// `--find-copies-harder`, where an unchanged file may be a copy source — must
4156/// still use [`collect_tree_entries`]; see the tree-diff entry points.)
4157fn changed_tree_entries(
4158    db: &FileObjectDatabase,
4159    format: ObjectFormat,
4160    left_tree: &ObjectId,
4161    right_tree: &ObjectId,
4162) -> Result<TrackedEntryPair> {
4163    let mut left = BTreeMap::new();
4164    let mut right = BTreeMap::new();
4165    // Identical root trees produce no changes at all and need not be read.
4166    if left_tree != right_tree {
4167        diff_tree_pair(
4168            db,
4169            format,
4170            left_tree,
4171            right_tree,
4172            &[],
4173            &mut left,
4174            &mut right,
4175        )?;
4176    }
4177    Ok((left, right))
4178}
4179
4180/// Recursively diff two subtrees rooted at `prefix`, appending differing blob
4181/// entries to `left` / `right`. Invariant: the two OIDs are already known to
4182/// differ (identical subtrees are pruned by the caller before recursing).
4183fn diff_tree_pair(
4184    db: &FileObjectDatabase,
4185    format: ObjectFormat,
4186    left_tree: &ObjectId,
4187    right_tree: &ObjectId,
4188    prefix: &[u8],
4189    left: &mut BTreeMap<Vec<u8>, TrackedEntry>,
4190    right: &mut BTreeMap<Vec<u8>, TrackedEntry>,
4191) -> Result<()> {
4192    let left_entries = read_tree_object(db, format, left_tree)?.entries;
4193    let right_entries = read_tree_object(db, format, right_tree)?.entries;
4194
4195    // Index the right side by name so the union of names can be walked without
4196    // relying on git's directory-aware entry ordering. (Iterating the union of
4197    // names, rather than a positional merge, keeps correctness independent of
4198    // entry order.)
4199    let mut right_by_name: HashMap<&[u8], &TreeEntry> = HashMap::with_capacity(right_entries.len());
4200    for entry in &right_entries {
4201        right_by_name.insert(entry.name.as_bytes(), entry);
4202    }
4203
4204    for left_entry in &left_entries {
4205        match right_by_name.remove(left_entry.name.as_bytes()) {
4206            Some(right_entry) => {
4207                merge_tree_entry(
4208                    db,
4209                    format,
4210                    prefix,
4211                    Some(left_entry),
4212                    Some(right_entry),
4213                    left,
4214                    right,
4215                )?;
4216            }
4217            None => {
4218                merge_tree_entry(db, format, prefix, Some(left_entry), None, left, right)?;
4219            }
4220        }
4221    }
4222    // Names only present on the right are pure additions.
4223    for right_entry in &right_entries {
4224        if right_by_name.contains_key(right_entry.name.as_bytes()) {
4225            merge_tree_entry(db, format, prefix, None, Some(right_entry), left, right)?;
4226        }
4227    }
4228    Ok(())
4229}
4230
4231/// Reconcile a single name that may appear on the left side, the right side, or
4232/// both, recording any resulting blob change(s) into `left` / `right`. This
4233/// reproduces exactly the union-of-flattened-maps semantics:
4234///
4235/// * tree vs tree with equal OID -> pruned (no read, no recursion);
4236/// * tree vs tree with differing OID -> recurse;
4237/// * blob vs blob, equal mode+OID -> unchanged, emitted nowhere;
4238/// * blob vs blob, differing mode or OID -> both sides recorded (a Modify);
4239/// * a tree on one side and a non-tree on the other (or a name present on only
4240///   one side) -> the flattened paths differ (`name/...` vs `name`), so the two
4241///   are unrelated: the tree side is flattened wholesale and the blob side is
4242///   recorded independently (an Add and/or a Delete).
4243fn merge_tree_entry(
4244    db: &FileObjectDatabase,
4245    format: ObjectFormat,
4246    prefix: &[u8],
4247    left_entry: Option<&TreeEntry>,
4248    right_entry: Option<&TreeEntry>,
4249    left: &mut BTreeMap<Vec<u8>, TrackedEntry>,
4250    right: &mut BTreeMap<Vec<u8>, TrackedEntry>,
4251) -> Result<()> {
4252    let left_is_tree = left_entry.is_some_and(|entry| entry.mode == TREE_ENTRY_MODE);
4253    let right_is_tree = right_entry.is_some_and(|entry| entry.mode == TREE_ENTRY_MODE);
4254
4255    if let (Some(left_entry), Some(right_entry)) = (left_entry, right_entry) {
4256        if left_is_tree && right_is_tree {
4257            // Two subtrees under the same name: prune if identical, else recurse.
4258            if left_entry.oid == right_entry.oid {
4259                return Ok(());
4260            }
4261            let path = join_tree_path(prefix, left_entry.name.as_bytes());
4262            return diff_tree_pair(
4263                db,
4264                format,
4265                &left_entry.oid,
4266                &right_entry.oid,
4267                &path,
4268                left,
4269                right,
4270            );
4271        }
4272        if !left_is_tree && !right_is_tree {
4273            // Two blobs under the same name. Identical mode+OID means unchanged
4274            // (nothing emitted); otherwise both sides are recorded so the diff
4275            // sees a Modify, matching the full-map `left != right` comparison.
4276            if left_entry.mode == right_entry.mode && left_entry.oid == right_entry.oid {
4277                return Ok(());
4278            }
4279            let path = join_tree_path(prefix, left_entry.name.as_bytes());
4280            left.insert(
4281                path.clone(),
4282                TrackedEntry {
4283                    mode: left_entry.mode,
4284                    oid: left_entry.oid,
4285                },
4286            );
4287            right.insert(
4288                path,
4289                TrackedEntry {
4290                    mode: right_entry.mode,
4291                    oid: right_entry.oid,
4292                },
4293            );
4294            return Ok(());
4295        }
4296        // Mixed: tree on one side, blob on the other. Their flattened paths
4297        // never collide, so handle each side as if the name existed only there.
4298    }
4299
4300    // Left side (if any): record as deletions.
4301    if let Some(left_entry) = left_entry {
4302        let path = join_tree_path(prefix, left_entry.name.as_bytes());
4303        if left_is_tree {
4304            collect_tree_entries(db, format, &left_entry.oid, path, left)?;
4305        } else {
4306            left.insert(
4307                path,
4308                TrackedEntry {
4309                    mode: left_entry.mode,
4310                    oid: left_entry.oid,
4311                },
4312            );
4313        }
4314    }
4315    // Right side (if any): record as additions.
4316    if let Some(right_entry) = right_entry {
4317        let path = join_tree_path(prefix, right_entry.name.as_bytes());
4318        if right_is_tree {
4319            collect_tree_entries(db, format, &right_entry.oid, path, right)?;
4320        } else {
4321            right.insert(
4322                path,
4323                TrackedEntry {
4324                    mode: right_entry.mode,
4325                    oid: right_entry.oid,
4326                },
4327            );
4328        }
4329    }
4330    Ok(())
4331}
4332
4333fn index_gitlinks(index: &BTreeMap<Vec<u8>, TrackedEntry>) -> BTreeMap<Vec<u8>, ObjectId> {
4334    index
4335        .iter()
4336        .filter(|(_, entry)| sley_index::is_gitlink(entry.mode))
4337        .map(|(path, entry)| (path.clone(), entry.oid))
4338        .collect()
4339}
4340
4341fn candidate_path_set<'a>(candidate_paths: impl Iterator<Item = &'a Vec<u8>>) -> BTreeSet<Vec<u8>> {
4342    candidate_paths.cloned().collect()
4343}
4344
4345fn worktree_entries_for_path_set(
4346    worktree_root: &Path,
4347    format: ObjectFormat,
4348    candidates: &BTreeSet<Vec<u8>>,
4349    index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
4350    stat_cache: Option<&IndexStatCache>,
4351) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
4352    worktree_entries_for_unique_paths(
4353        worktree_root,
4354        format,
4355        candidates.iter(),
4356        index_gitlinks,
4357        stat_cache,
4358    )
4359}
4360
4361fn worktree_entries_for_unique_paths<'a>(
4362    worktree_root: &Path,
4363    format: ObjectFormat,
4364    candidates: impl Iterator<Item = &'a Vec<u8>>,
4365    index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
4366    stat_cache: Option<&IndexStatCache>,
4367) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
4368    let mut entries = BTreeMap::new();
4369    for git_path in candidates {
4370        if let Some(entry) =
4371            worktree_entry_for_path(worktree_root, format, git_path, index_gitlinks, stat_cache)?
4372        {
4373            entries.insert(git_path.clone(), entry);
4374        }
4375    }
4376    Ok(entries)
4377}
4378
4379fn worktree_entry_for_path(
4380    worktree_root: &Path,
4381    format: ObjectFormat,
4382    git_path: &[u8],
4383    index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
4384    stat_cache: Option<&IndexStatCache>,
4385) -> Result<Option<TrackedEntry>> {
4386    let path = worktree_path_for_repo_path(worktree_root, git_path);
4387    let metadata = match fs::symlink_metadata(&path) {
4388        Ok(metadata) => metadata,
4389        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
4390        Err(err) => return Err(GitError::Io(err.to_string())),
4391    };
4392    let file_type = metadata.file_type();
4393    if let Some(staged_oid) = index_gitlinks.get(git_path)
4394        && metadata.is_dir()
4395    {
4396        let oid = gitlink_head_oid(&path, format).unwrap_or(*staged_oid);
4397        return Ok(Some(TrackedEntry {
4398            mode: sley_index::GITLINK_MODE,
4399            oid,
4400        }));
4401    }
4402    if metadata.is_dir() {
4403        if let Some(oid) = gitlink_head_oid(&path, format) {
4404            return Ok(Some(TrackedEntry {
4405                mode: sley_index::GITLINK_MODE,
4406                oid,
4407            }));
4408        }
4409        return Ok(None);
4410    }
4411    if !(metadata.is_file() || file_type.is_symlink()) {
4412        return Ok(None);
4413    }
4414    if let Some(entry) = stat_cache.and_then(|cache| cache.reusable_entry(git_path, &metadata)) {
4415        return Ok(Some(tracked_entry_from_index(entry)));
4416    }
4417    Ok(Some(classify_worktree_entry(&path, &metadata, format)?))
4418}
4419
4420fn index_worktree_change_for_entry(
4421    path: &Path,
4422    format: ObjectFormat,
4423    index_entry: &impl WorktreeIndexEntry,
4424    stat_cache: &IndexStatCache,
4425) -> Result<Option<NameStatusEntry>> {
4426    let git_path = index_entry.git_path();
4427    let metadata = match fs::symlink_metadata(path) {
4428        Ok(metadata) => metadata,
4429        Err(err)
4430            if err.kind() == std::io::ErrorKind::NotFound && index_entry.is_skip_worktree() =>
4431        {
4432            return Ok(None);
4433        }
4434        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
4435            return Ok(Some(index_worktree_deleted_entry(index_entry)));
4436        }
4437        Err(err) => return Err(GitError::Io(err.to_string())),
4438    };
4439    let file_type = metadata.file_type();
4440    let right = if metadata.is_dir() {
4441        if sley_index::is_gitlink(index_entry.mode()) {
4442            let oid = gitlink_head_oid(path, format).unwrap_or(index_entry.oid());
4443            Some(TrackedEntry {
4444                mode: sley_index::GITLINK_MODE,
4445                oid,
4446            })
4447        } else {
4448            gitlink_head_oid(path, format).map(|oid| TrackedEntry {
4449                mode: sley_index::GITLINK_MODE,
4450                oid,
4451            })
4452        }
4453    } else if metadata.is_file() || file_type.is_symlink() {
4454        if index_entry.reusable_with(stat_cache, &metadata) {
4455            return Ok(None);
4456        }
4457        Some(classify_worktree_entry(path, &metadata, format)?)
4458    } else {
4459        None
4460    };
4461    let Some(right) = right else {
4462        return Ok(Some(index_worktree_deleted_entry(index_entry)));
4463    };
4464    let left = tracked_entry_from_index(index_entry);
4465    if right == left {
4466        return Ok(None);
4467    }
4468    Ok(Some(NameStatusEntry {
4469        status: modify_or_type_change(left.mode, right.mode),
4470        path: git_path.to_vec().into(),
4471        old_path: None,
4472        old_mode: Some(left.mode),
4473        new_mode: Some(right.mode),
4474        old_oid: Some(left.oid),
4475        new_oid: Some(right.oid),
4476    }))
4477}
4478
4479fn index_worktree_deleted_entry(index_entry: &impl WorktreeIndexEntry) -> NameStatusEntry {
4480    NameStatusEntry {
4481        status: NameStatus::Deleted,
4482        path: index_entry.git_path().to_vec().into(),
4483        old_path: None,
4484        old_mode: Some(index_entry.mode()),
4485        new_mode: None,
4486        old_oid: Some(index_entry.oid()),
4487        new_oid: None,
4488    }
4489}
4490
4491fn worktree_blob_cache_for_path_set(
4492    worktree_root: &Path,
4493    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
4494    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
4495    candidate_paths: &BTreeSet<Vec<u8>>,
4496    options: RenameDetectionOptions,
4497) -> Result<HashMap<ObjectId, Vec<u8>>> {
4498    worktree_blob_cache_for_unique_paths(
4499        worktree_root,
4500        left_entries,
4501        right_entries,
4502        candidate_paths.iter(),
4503        options,
4504    )
4505}
4506
4507fn worktree_blob_cache_for_unique_paths<'a>(
4508    worktree_root: &Path,
4509    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
4510    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
4511    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
4512    options: RenameDetectionOptions,
4513) -> Result<HashMap<ObjectId, Vec<u8>>> {
4514    if !options.detect_inexact || !(options.base.detect_renames || options.base.detect_copies) {
4515        return Ok(HashMap::new());
4516    }
4517    let base = options.base;
4518    let mut changes =
4519        raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
4520    if base.detect_renames {
4521        changes = detect_exact_renames(changes, left_entries, right_entries, base.rename_empty);
4522    }
4523    if base.detect_copies {
4524        changes = detect_exact_copies(
4525            changes,
4526            left_entries,
4527            right_entries,
4528            base.find_copies_harder,
4529            base.rename_empty,
4530        );
4531    }
4532    let has_rename_source = base.detect_renames
4533        && changes.iter().any(|entry| {
4534            entry.status == NameStatus::Deleted
4535                && entry
4536                    .old_oid
4537                    .as_ref()
4538                    .is_some_and(|oid| base.rename_empty || !is_empty_blob_oid(oid))
4539        });
4540    let has_copy_source = base.detect_copies
4541        && (base.find_copies_harder
4542            || changes
4543                .iter()
4544                .any(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified)));
4545    if !has_rename_source && !has_copy_source {
4546        return Ok(HashMap::new());
4547    }
4548    let candidate_oids = changes
4549        .iter()
4550        .filter(|entry| entry.status == NameStatus::Added)
4551        .filter_map(|entry| entry.new_oid)
4552        .filter(|oid| base.rename_empty || !is_empty_blob_oid(oid))
4553        .collect::<BTreeSet<_>>();
4554    if candidate_oids.is_empty() {
4555        return Ok(HashMap::new());
4556    }
4557    let mut cache = HashMap::new();
4558    for (git_path, entry) in right_entries {
4559        if sley_index::is_gitlink(entry.mode) || !candidate_oids.contains(&entry.oid) {
4560            continue;
4561        }
4562        let path = worktree_path_for_repo_path(worktree_root, git_path);
4563        let body = if sley_index::is_symlink_mode(entry.mode) {
4564            symlink_target_bytes(&path)?
4565        } else {
4566            fs::read(&path)?
4567        };
4568        cache.entry(entry.oid).or_insert(body);
4569    }
4570    Ok(cache)
4571}
4572
4573/// A blob fetcher that consults an in-memory `oid -> bytes` cache first (e.g.
4574/// freshly-read worktree files) and falls back to the object database.
4575fn cache_or_odb_blob(
4576    cache: &HashMap<ObjectId, Vec<u8>>,
4577    db: &FileObjectDatabase,
4578    oid: &ObjectId,
4579) -> Option<Vec<u8>> {
4580    if let Some(bytes) = cache.get(oid) {
4581        return Some(bytes.clone());
4582    }
4583    read_blob_bytes(db, oid)
4584}
4585
4586#[cfg(unix)]
4587fn worktree_path_for_repo_path(worktree_root: &Path, path: &[u8]) -> PathBuf {
4588    use std::ffi::OsStr;
4589    use std::os::unix::ffi::OsStrExt;
4590
4591    let mut out = PathBuf::from(worktree_root);
4592    out.push(OsStr::from_bytes(path));
4593    out
4594}
4595
4596#[cfg(unix)]
4597fn worktree_path_for_repo_path_into(out: &mut PathBuf, worktree_root: &Path, path: &[u8]) {
4598    use std::ffi::OsStr;
4599    use std::os::unix::ffi::OsStrExt;
4600
4601    out.clear();
4602    out.push(worktree_root);
4603    out.push(OsStr::from_bytes(path));
4604}
4605
4606#[cfg(not(unix))]
4607fn worktree_path_for_repo_path(worktree_root: &Path, path: &[u8]) -> PathBuf {
4608    worktree_root.join(repo_path_to_path(path))
4609}
4610
4611#[cfg(not(unix))]
4612fn worktree_path_for_repo_path_into(out: &mut PathBuf, worktree_root: &Path, path: &[u8]) {
4613    out.clear();
4614    out.push(worktree_root);
4615    out.push(repo_path_to_path(path));
4616}
4617
4618#[cfg(not(unix))]
4619fn repo_path_to_path(path: &[u8]) -> PathBuf {
4620    let mut out = PathBuf::new();
4621    for component in String::from_utf8_lossy(path).split('/') {
4622        if !component.is_empty() {
4623            out.push(component);
4624        }
4625    }
4626    out
4627}
4628
4629#[cfg(unix)]
4630fn file_mode(metadata: &fs::Metadata) -> u32 {
4631    use std::os::unix::fs::PermissionsExt;
4632    if metadata.permissions().mode() & 0o111 != 0 {
4633        0o100755
4634    } else {
4635        0o100644
4636    }
4637}
4638
4639#[cfg(not(unix))]
4640fn file_mode(_metadata: &fs::Metadata) -> u32 {
4641    0o100644
4642}
4643
4644/// Read a symbolic link's target as git stores it: the raw target path bytes,
4645/// with no trailing newline. This is the "content" of a symlink blob (mode
4646/// `120000`) — git's `diff_populate_filespec` uses `strbuf_readlink` for a
4647/// worktree symlink rather than dereferencing it.
4648#[cfg(unix)]
4649pub fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
4650    use std::os::unix::ffi::OsStrExt;
4651    let target = fs::read_link(path)?;
4652    Ok(target.as_os_str().as_bytes().to_vec())
4653}
4654
4655/// See the unix variant: the raw symlink target bytes git stores as the blob.
4656#[cfg(not(unix))]
4657pub fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
4658    let target = fs::read_link(path)?;
4659    Ok(target.to_string_lossy().replace('\\', "/").into_bytes())
4660}
4661
4662// ---------------------------------------------------------------------------
4663// Unified / git diff patch parsing and application (engine for `git apply`/`git am`).
4664//
4665// Operates purely on in-memory byte buffers; the caller is responsible for
4666// reading/writing blobs from the working tree or the object database. The
4667// parser understands the textual format git produces (`diff --git`, `---`/`+++`
4668// file headers, `@@` hunk headers, context/`+`/`-` body lines, the
4669// `\ No newline at end of file` marker, `/dev/null` for added/deleted files,
4670// file mode headers, and `rename from`/`rename to` headers).
4671// ---------------------------------------------------------------------------
4672
4673/// A single line inside a hunk. The stored bytes never include the trailing
4674/// line terminator; whether the line is terminated by `\n` is tracked
4675/// separately on the [`Hunk`] (see [`Hunk::old_no_newline`] /
4676/// [`Hunk::new_no_newline`]) so the no-final-newline case can be reproduced
4677/// byte-for-byte.
4678#[derive(Debug, Clone, PartialEq, Eq)]
4679pub enum HunkLine {
4680    /// A line present in both the old and new versions.
4681    Context(Vec<u8>),
4682    /// A line added by the patch (present only in the new version).
4683    Insert(Vec<u8>),
4684    /// A line removed by the patch (present only in the old version).
4685    Delete(Vec<u8>),
4686}
4687
4688impl HunkLine {
4689    /// The line content, without any trailing newline.
4690    pub fn content(&self) -> &[u8] {
4691        match self {
4692            Self::Context(bytes) | Self::Insert(bytes) | Self::Delete(bytes) => bytes,
4693        }
4694    }
4695}
4696
4697/// A single `@@ -old_start,old_len +new_start,new_len @@` hunk.
4698///
4699/// `old_start` / `new_start` are 1-based line numbers as they appear in the
4700/// patch header. The `*_no_newline` flags record that the final line on that
4701/// side of the hunk is *not* terminated by a newline (the `\ No newline at end
4702/// of file` marker).
4703#[derive(Debug, Clone, PartialEq, Eq)]
4704pub struct Hunk {
4705    pub old_start: usize,
4706    pub old_len: usize,
4707    pub new_start: usize,
4708    pub new_len: usize,
4709    pub lines: Vec<HunkLine>,
4710    /// The last context/deleted line of the old file lacks a trailing newline.
4711    pub old_no_newline: bool,
4712    /// The last context/inserted line of the new file lacks a trailing newline.
4713    pub new_no_newline: bool,
4714    /// The 1-based line number (in the patch input) of each entry in `lines`,
4715    /// used by `git apply`'s whitespace-error reporting (git's `state->linenr`).
4716    /// Empty when the patch was not parsed from input (e.g. synthesised hunks).
4717    pub line_input_lines: Vec<usize>,
4718}
4719
4720/// A patch targeting a single file. Produced by [`parse_unified_patch`].
4721#[derive(Debug, Clone, PartialEq, Eq)]
4722pub struct FilePatch {
4723    /// Path on the `a/` (old) side, or `None` for a newly created file.
4724    pub old_path: Option<Vec<u8>>,
4725    /// Path on the `b/` (new) side, or `None` for a deleted file.
4726    pub new_path: Option<Vec<u8>>,
4727    /// Mode of the old file, when a mode header was present.
4728    pub old_mode: Option<u32>,
4729    /// Mode of the new file, when a mode header was present.
4730    pub new_mode: Option<u32>,
4731    pub hunks: Vec<Hunk>,
4732    /// The patch creates a new file (`--- /dev/null` / `new file mode`).
4733    pub is_new: bool,
4734    /// The patch deletes the file (`+++ /dev/null` / `deleted file mode`).
4735    pub is_delete: bool,
4736    /// The patch renames the file (`rename from`/`rename to`).
4737    pub is_rename: bool,
4738    /// The patch copies the file (`copy from`/`copy to`).
4739    pub is_copy: bool,
4740    /// Similarity score from `similarity index N%`, used for rename/copy summaries.
4741    pub similarity: Option<u8>,
4742    /// Dissimilarity score from `dissimilarity index N%`, used for rewrite summaries.
4743    pub dissimilarity: Option<u8>,
4744    /// Hex object id prefixes from the `index <old>..<new>[ mode]` line, if any.
4745    /// Carried verbatim (abbreviated or full); the binary apply and the `-3`
4746    /// fallback need these to resolve the pre-/post-image blobs.
4747    pub old_oid_hex: Option<Vec<u8>>,
4748    pub new_oid_hex: Option<Vec<u8>>,
4749    /// True when the patch is binary: either a `GIT binary patch` block (with
4750    /// `binary` payload) or a metadata-only `Binary files ... differ` line
4751    /// (no payload — the postimage must be reconstructed from the object store).
4752    pub is_binary: bool,
4753    /// The `GIT binary patch` payload, when this is a binary file patch. The
4754    /// fragment bytes are still zlib-deflated (the caller inflates them with
4755    /// the recorded original length), matching git's two-hunk forward/reverse
4756    /// layout.
4757    pub binary: Option<BinaryPatch>,
4758    /// True for git (`diff --git`) patches, whose names are relative to the
4759    /// repository top-level; false for traditional diffs, whose names are
4760    /// relative to the current directory (git's `is_toplevel_relative`). The
4761    /// `apply` cwd-prefix is only prepended to non-toplevel-relative patches.
4762    pub is_toplevel_relative: bool,
4763}
4764
4765/// A `GIT binary patch` payload: a mandatory forward hunk (preimage → postimage)
4766/// and an optional reverse hunk (postimage → preimage), mirroring git's
4767/// `parse_binary`.
4768#[derive(Debug, Clone, PartialEq, Eq)]
4769pub struct BinaryPatch {
4770    pub forward: BinaryHunk,
4771    pub reverse: Option<BinaryHunk>,
4772}
4773
4774/// One binary hunk: the encoding method and the still-deflated data, plus the
4775/// declared original (inflated) length.
4776#[derive(Debug, Clone, PartialEq, Eq)]
4777pub struct BinaryHunk {
4778    pub method: BinaryMethod,
4779    /// Length of the data *after* inflation (the `literal <N>` / `delta <N>`
4780    /// number). The caller inflates `deflated` to exactly this many bytes.
4781    pub origlen: usize,
4782    /// base85-decoded, still zlib-deflated bytes.
4783    pub deflated: Vec<u8>,
4784}
4785
4786/// How a binary hunk encodes the postimage.
4787#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4788pub enum BinaryMethod {
4789    /// The inflated bytes ARE the postimage (`literal <N>`).
4790    Literal,
4791    /// The inflated bytes are a git delta to apply to the preimage (`delta <N>`).
4792    Delta,
4793}
4794
4795/// Outcome of applying a [`FilePatch`] to a base buffer.
4796#[derive(Debug, Clone, PartialEq, Eq)]
4797pub enum ApplyOutcome {
4798    /// The patch applied cleanly; carries the resulting file bytes.
4799    Applied(Vec<u8>),
4800    /// At least one hunk's context/deleted lines did not match the base.
4801    Rejected,
4802}
4803
4804/// The minimum number of context lines git's `apply` insists on keeping when
4805/// it tries to fuzz a hunk into place — git's `apply_state.p_context`, which is
4806/// initialised to `UINT_MAX` (the `-C<n>` option lowers it). The fuzz loop in
4807/// `apply_one_fragment` stops the moment both leading and trailing context have
4808/// been reduced to this floor; with the default `UINT_MAX` floor that test is
4809/// already satisfied on the first failure, so **the default `git apply` / `git
4810/// am` path does no context fuzz and no begin/end relaxation at all** — a hunk
4811/// whose full preimage does not match at a valid position is simply rejected.
4812/// We keep the floor configurable so the structure mirrors git's, but the
4813/// shared apply engine only ever runs with the default.
4814const MIN_FUZZ_CONTEXT: usize = usize::MAX;
4815
4816/// Parse a unified/git diff into one [`FilePatch`] per file it touches.
4817///
4818/// The parser is intentionally lenient about leading commentary (commit
4819/// messages, `index <oid>..<oid>` lines, etc.): anything that is not part of a
4820/// recognised header or hunk body is skipped. It errors only on structurally
4821/// invalid hunks (bad `@@` headers, body lines that overflow the declared hunk
4822/// counts, or hunk bodies that appear with no preceding file header).
4823pub fn parse_unified_patch(input: &[u8]) -> Result<Vec<FilePatch>> {
4824    parse_unified_patch_with_recount(input, false)
4825}
4826
4827/// Parse a unified/git diff, optionally ignoring hunk header line counts and
4828/// recounting them from the hunk body. This mirrors `git apply --recount`.
4829pub fn parse_unified_patch_with_recount(input: &[u8], recount: bool) -> Result<Vec<FilePatch>> {
4830    parse_unified_patch_with_options(input, recount, &PatchPathOptions::default())
4831}
4832
4833/// Path-resolution options for [`parse_unified_patch_with_options`], mirroring
4834/// `git apply`'s `-p<n>` strip (`p_value`) and `--directory=<root>` prefix.
4835#[derive(Clone)]
4836pub struct PatchPathOptions {
4837    /// Number of leading path components to strip (`-p<n>`); default 1.
4838    pub p_value: usize,
4839    /// Whether `p_value` was given explicitly. When false, traditional (non-git)
4840    /// diffs guess it from the `---`/`+++` lines.
4841    pub p_value_known: bool,
4842    /// `--directory=<dir>` root, normalised with a trailing slash (or empty).
4843    pub root: Vec<u8>,
4844    /// The cwd prefix (`state->prefix`): the current directory relative to the
4845    /// work tree, with a trailing slash (empty at the top level). Used only to
4846    /// guess `-p<n>` for traditional patches run from a subdirectory; the prefix
4847    /// itself is prepended to names by the caller, not here.
4848    pub prefix: Vec<u8>,
4849}
4850
4851impl Default for PatchPathOptions {
4852    fn default() -> Self {
4853        PatchPathOptions {
4854            p_value: 1,
4855            p_value_known: false,
4856            root: Vec::new(),
4857            prefix: Vec::new(),
4858        }
4859    }
4860}
4861
4862/// Parse a unified/git diff, applying `-p<n>` strip and `--directory` prefix to
4863/// every resolved pathname exactly as `git apply` does.
4864pub fn parse_unified_patch_with_options(
4865    input: &[u8],
4866    recount: bool,
4867    options: &PatchPathOptions,
4868) -> Result<Vec<FilePatch>> {
4869    let lines = split_patch_lines(input);
4870    let mut parser = PatchParser {
4871        lines: &lines,
4872        index: 0,
4873        recount,
4874        p_value: options.p_value,
4875        p_value_known: options.p_value_known,
4876        root: options.root.clone(),
4877        prefix: options.prefix.clone(),
4878    };
4879    parser.parse()
4880}
4881
4882/// Apply a single-file patch to `base`, returning the patched bytes.
4883///
4884/// This mirrors git's `apply.c` (`apply_one_fragment` / `find_pos` /
4885/// `match_fragment`) for the default, no-whitespace-fuzz settings `git am`
4886/// and `git apply` use:
4887///
4888/// * Each hunk builds a *preimage* (context + deleted lines) and *postimage*
4889///   (context + inserted lines).
4890/// * A hunk anchored at the file start (`old_start <= 1`) must match the
4891///   beginning of the file (`match_beginning`); a hunk with no trailing context
4892///   must match the end of the file (`match_end`).
4893/// * The full preimage is matched byte-for-byte; the search starts at the
4894///   recorded position and ping-pongs outward across the whole image.
4895/// * Fuzz is applied *only* by dropping leading/trailing context lines (never
4896///   by jumping to a spurious context-only match); if no position matches even
4897///   after dropping all context, the hunk — and thus the whole patch — is
4898///   [`ApplyOutcome::Rejected`].
4899///
4900/// Rejecting (rather than spuriously applying at a wrong offset) is what lets
4901/// `git am -3` correctly fall back to its 3-way merge path.
4902///
4903/// New-file patches (empty/ignored base) and the no-final-newline case are
4904/// handled byte-accurately. Clean exact-position applies are byte-identical to
4905/// the previous behaviour.
4906pub fn apply_file_patch(base: &[u8], patch: &FilePatch) -> ApplyOutcome {
4907    apply_file_patch_with_options(base, patch, &ApplyFileOptions::default())
4908}
4909
4910/// Options for [`apply_file_patch_with_options`], mirroring the `git apply`
4911/// flags that change fragment placement.
4912#[derive(Clone, Default)]
4913pub struct ApplyFileOptions {
4914    /// `--unidiff-zero`: trust the line numbers of context-free hunks instead of
4915    /// forcing them to anchor at the file's beginning/end.
4916    pub unidiff_zero: bool,
4917}
4918
4919/// Reverse a file patch (`git apply -R`): swap the old/new names, modes, hunk
4920/// ranges, and no-newline flags, exchange add↔delete status, and flip every
4921/// `Insert`/`Delete` line. Applying the result undoes the original patch.
4922pub fn reverse_file_patch(patch: &FilePatch) -> FilePatch {
4923    let hunks = patch
4924        .hunks
4925        .iter()
4926        .map(|hunk| {
4927            let lines = hunk
4928                .lines
4929                .iter()
4930                .map(|line| match line {
4931                    HunkLine::Context(b) => HunkLine::Context(b.clone()),
4932                    HunkLine::Insert(b) => HunkLine::Delete(b.clone()),
4933                    HunkLine::Delete(b) => HunkLine::Insert(b.clone()),
4934                })
4935                .collect();
4936            Hunk {
4937                old_start: hunk.new_start,
4938                old_len: hunk.new_len,
4939                new_start: hunk.old_start,
4940                new_len: hunk.old_len,
4941                lines,
4942                old_no_newline: hunk.new_no_newline,
4943                new_no_newline: hunk.old_no_newline,
4944                // Reversal keeps the line order (only the +/- sense flips), so the
4945                // per-line patch-input line numbers carry over unchanged.
4946                line_input_lines: hunk.line_input_lines.clone(),
4947            }
4948        })
4949        .collect();
4950    // git's `reverse_patches` only swaps the modes when the patch actually
4951    // carries a new mode (a mode change) or is a deletion; a content-only patch
4952    // keeps its (old) mode so the type-mismatch check still compares against it.
4953    let (old_mode, new_mode) = if patch.new_mode.is_some() || patch.is_delete {
4954        (patch.new_mode, patch.old_mode)
4955    } else {
4956        (patch.old_mode, patch.new_mode)
4957    };
4958    FilePatch {
4959        old_path: patch.new_path.clone(),
4960        new_path: patch.old_path.clone(),
4961        old_mode,
4962        new_mode,
4963        hunks,
4964        is_new: patch.is_delete,
4965        is_delete: patch.is_new,
4966        is_rename: patch.is_rename,
4967        is_copy: patch.is_copy,
4968        similarity: patch.similarity,
4969        dissimilarity: patch.dissimilarity,
4970        // Swap the index OIDs so a reverse-applied binary patch resolves the
4971        // (formerly new) preimage and (formerly old) postimage correctly.
4972        old_oid_hex: patch.new_oid_hex.clone(),
4973        new_oid_hex: patch.old_oid_hex.clone(),
4974        is_binary: patch.is_binary,
4975        binary: patch.binary.as_ref().map(|binary| BinaryPatch {
4976            // `-R` swaps forward and reverse hunks (git's apply_in_reverse).
4977            forward: binary
4978                .reverse
4979                .clone()
4980                .unwrap_or_else(|| binary.forward.clone()),
4981            reverse: Some(binary.forward.clone()),
4982        }),
4983        is_toplevel_relative: patch.is_toplevel_relative,
4984    }
4985}
4986
4987/// Apply a single-file patch with explicit fragment-placement options.
4988pub fn apply_file_patch_with_options(
4989    base: &[u8],
4990    patch: &FilePatch,
4991    options: &ApplyFileOptions,
4992) -> ApplyOutcome {
4993    // A pure deletion with no hunks yields an empty file.
4994    if patch.is_delete && patch.hunks.is_empty() {
4995        return ApplyOutcome::Applied(Vec::new());
4996    }
4997    // A new file: the only sensible base is empty; ignore whatever was passed
4998    // and build the result from the inserted lines.
4999    let base_for_match: &[u8] = if patch.is_new { b"" } else { base };
5000
5001    // The "image" git mutates as each hunk applies. We splice in place so later
5002    // hunks see the effect of earlier ones (git carries the running offset for
5003    // the same reason).
5004    let mut image = split_blob_lines(base_for_match);
5005
5006    // git seeds the search for hunk N at `newpos-1` *plus* the offset earlier
5007    // hunks drifted by, so a uniform shift only costs the search once.
5008    let mut running_offset: isize = 0;
5009
5010    for hunk in &patch.hunks {
5011        match apply_one_hunk(&mut image, hunk, running_offset, options.unidiff_zero) {
5012            Some(drift) => running_offset += drift,
5013            None => return ApplyOutcome::Rejected,
5014        }
5015    }
5016
5017    ApplyOutcome::Applied(join_lines(&image))
5018}
5019
5020/// The outcome of a hunk-by-hunk apply (`git apply --reject`).
5021#[derive(Debug, Clone, PartialEq, Eq)]
5022pub struct RejectApply {
5023    /// The bytes after every hunk that applied (rejected hunks are skipped).
5024    pub content: Vec<u8>,
5025    /// Indices into `patch.hunks` of the hunks that did not apply.
5026    pub rejected: Vec<usize>,
5027}
5028
5029/// Apply a single-file patch hunk-by-hunk, collecting the hunks that do not
5030/// apply rather than rejecting the whole patch — `git apply --reject`.
5031///
5032/// Each hunk is tried independently against the running image; an applied hunk
5033/// contributes its offset to later hunks (git's `apply_fragments` carries the
5034/// running line shift), a rejected hunk is recorded and left out. The returned
5035/// `content` is the image after all applicable hunks; `rejected` lists the
5036/// 0-based indices of the hunks the caller must write to `<file>.rej`.
5037pub fn apply_file_patch_rejecting(
5038    base: &[u8],
5039    patch: &FilePatch,
5040    options: &ApplyFileOptions,
5041) -> RejectApply {
5042    if patch.is_delete && patch.hunks.is_empty() {
5043        return RejectApply {
5044            content: Vec::new(),
5045            rejected: Vec::new(),
5046        };
5047    }
5048    let base_for_match: &[u8] = if patch.is_new { b"" } else { base };
5049    let mut image = split_blob_lines(base_for_match);
5050    let mut running_offset: isize = 0;
5051    let mut rejected = Vec::new();
5052    for (index, hunk) in patch.hunks.iter().enumerate() {
5053        match apply_one_hunk(&mut image, hunk, running_offset, options.unidiff_zero) {
5054            Some(drift) => running_offset += drift,
5055            None => rejected.push(index),
5056        }
5057    }
5058    RejectApply {
5059        content: join_lines(&image),
5060        rejected,
5061    }
5062}
5063
5064/// Reconstruct the unified-diff text of one hunk for a `.rej` file. Mirrors the
5065/// raw fragment text git copies into `<file>.rej`: the `@@ -os[,oc] +ns[,nc] @@`
5066/// header (the `,1` count is omitted, matching git) followed by each line with
5067/// its ` `/`+`/`-` prefix, plus the `\ No newline at end of file` note where the
5068/// old/new side's final line is unterminated.
5069pub fn render_reject_hunk(hunk: &Hunk) -> Vec<u8> {
5070    fn range(start: usize, count: usize) -> String {
5071        if count == 1 {
5072            start.to_string()
5073        } else {
5074            format!("{start},{count}")
5075        }
5076    }
5077    let mut out = Vec::new();
5078    out.extend_from_slice(b"@@ -");
5079    out.extend_from_slice(range(hunk.old_start, hunk.old_len).as_bytes());
5080    out.extend_from_slice(b" +");
5081    out.extend_from_slice(range(hunk.new_start, hunk.new_len).as_bytes());
5082    out.extend_from_slice(b" @@\n");
5083    // The last old-side line is the last Context/Delete; the last new-side line
5084    // is the last Context/Insert. Their no-newline state drives the markers.
5085    let last_old = hunk
5086        .lines
5087        .iter()
5088        .rposition(|line| matches!(line, HunkLine::Context(_) | HunkLine::Delete(_)));
5089    let last_new = hunk
5090        .lines
5091        .iter()
5092        .rposition(|line| matches!(line, HunkLine::Context(_) | HunkLine::Insert(_)));
5093    for (index, line) in hunk.lines.iter().enumerate() {
5094        let (prefix, content) = match line {
5095            HunkLine::Context(bytes) => (b' ', bytes),
5096            HunkLine::Insert(bytes) => (b'+', bytes),
5097            HunkLine::Delete(bytes) => (b'-', bytes),
5098        };
5099        out.push(prefix);
5100        out.extend_from_slice(content);
5101        out.push(b'\n');
5102        let old_incomplete = hunk.old_no_newline && Some(index) == last_old;
5103        let new_incomplete = hunk.new_no_newline && Some(index) == last_new;
5104        if old_incomplete || new_incomplete {
5105            out.extend_from_slice(b"\\ No newline at end of file\n");
5106        }
5107    }
5108    out
5109}
5110
5111// ---------------------------------------------------------------------------
5112// Whitespace-aware apply (`git apply --whitespace=fix` / `--ignore-space-change`)
5113//
5114// A faithful port of git's `apply_one_fragment` matching path (`match_fragment`,
5115// `find_pos`, `line_by_line_fuzzy_match`, `update_pre_post_images`,
5116// `update_image`). It adds the matching concerns that need the whitespace rule —
5117// blank-at-EOF tolerance, whitespace-corrected / whitespace-ignoring context
5118// matching, and removal of newly-added blank lines at EOF — on top of the plain
5119// exact-match engine above. The patch's `+` lines are expected to already carry
5120// their whitespace fixes (the apply command's whitespace pass applies them);
5121// this routine fixes the *context* lines as part of matching.
5122// ---------------------------------------------------------------------------
5123
5124/// Options for [`apply_file_patch_ws`].
5125#[derive(Clone, Copy)]
5126pub struct WsApplyOptions {
5127    /// `--unidiff-zero`.
5128    pub unidiff_zero: bool,
5129    /// The per-path whitespace rule.
5130    pub ws_rule: ws::WsRule,
5131    /// `--whitespace=fix` (git's `correct_ws_error`).
5132    pub ws_fix: bool,
5133    /// `--ignore-space-change` / `--ignore-whitespace` (git's `ignore_ws_change`).
5134    pub ws_ignore_change: bool,
5135}
5136
5137/// Outcome of [`apply_file_patch_ws`].
5138pub enum WsApplyOutcome {
5139    /// Applied; carries the bytes and the count of blank lines removed at EOF.
5140    Applied {
5141        content: Vec<u8>,
5142        blank_at_eof_removed: usize,
5143    },
5144    /// At least one hunk could not be placed.
5145    Rejected,
5146}
5147
5148/// A pre/postimage line carrying git's `LINE_COMMON` flag (set for context lines,
5149/// clear for added/deleted lines).
5150#[derive(Clone)]
5151struct WsImageLine {
5152    content: Vec<u8>,
5153    no_newline: bool,
5154    common: bool,
5155}
5156
5157impl WsImageLine {
5158    fn bytes(&self) -> Vec<u8> {
5159        let mut out = self.content.clone();
5160        if !self.no_newline {
5161            out.push(b'\n');
5162        }
5163        out
5164    }
5165}
5166
5167fn line_bytes(line: &Line) -> Vec<u8> {
5168    let mut out = line.content.clone();
5169    if !line.no_newline {
5170        out.push(b'\n');
5171    }
5172    out
5173}
5174
5175/// Split ws-fixed line bytes back into a [`WsImageLine`] (content sans trailing
5176/// newline, plus the no-newline flag).
5177fn ws_line_from_bytes(bytes: Vec<u8>, common: bool) -> WsImageLine {
5178    if bytes.last() == Some(&b'\n') {
5179        WsImageLine {
5180            content: bytes[..bytes.len() - 1].to_vec(),
5181            no_newline: false,
5182            common,
5183        }
5184    } else {
5185        WsImageLine {
5186            content: bytes,
5187            no_newline: true,
5188            common,
5189        }
5190    }
5191}
5192
5193/// Whitespace-aware single-file apply — git's `apply_one_fragment` matching path.
5194pub fn apply_file_patch_ws(
5195    base: &[u8],
5196    patch: &FilePatch,
5197    opts: &WsApplyOptions,
5198) -> WsApplyOutcome {
5199    if patch.is_delete && patch.hunks.is_empty() {
5200        return WsApplyOutcome::Applied {
5201            content: Vec::new(),
5202            blank_at_eof_removed: 0,
5203        };
5204    }
5205    let base_for_match: &[u8] = if patch.is_new { b"" } else { base };
5206    let mut image = split_blob_lines(base_for_match);
5207    let mut running_offset: isize = 0;
5208    let mut blank_removed = 0usize;
5209    for hunk in &patch.hunks {
5210        match apply_one_fragment_ws(&mut image, hunk, running_offset, opts, &mut blank_removed) {
5211            Some(drift) => running_offset += drift,
5212            None => return WsApplyOutcome::Rejected,
5213        }
5214    }
5215    WsApplyOutcome::Applied {
5216        content: join_lines(&image),
5217        blank_at_eof_removed: blank_removed,
5218    }
5219}
5220
5221fn apply_one_fragment_ws(
5222    image: &mut Vec<Line>,
5223    hunk: &Hunk,
5224    running_offset: isize,
5225    opts: &WsApplyOptions,
5226    blank_removed: &mut usize,
5227) -> Option<isize> {
5228    let blank_eof = opts.ws_rule & ws::WS_BLANK_AT_EOF != 0;
5229    let mut preimage: Vec<WsImageLine> = Vec::new();
5230    let mut postimage: Vec<WsImageLine> = Vec::new();
5231    let mut leading = 0usize;
5232    let mut trailing = 0usize;
5233    let mut seen_change = false;
5234    // git's `new_blank_lines_at_end`: blank lines added at the end, where a blank
5235    // *context* line does not reset the run but a non-blank line does.
5236    let mut new_blank_lines_at_end = 0usize;
5237    for hl in &hunk.lines {
5238        let mut added_blank_line = false;
5239        let mut is_blank_context = false;
5240        match hl {
5241            HunkLine::Context(bytes) => {
5242                if blank_eof && ws::ws_blank_line(bytes) {
5243                    is_blank_context = true;
5244                }
5245                preimage.push(WsImageLine {
5246                    content: bytes.clone(),
5247                    no_newline: false,
5248                    common: true,
5249                });
5250                postimage.push(WsImageLine {
5251                    content: bytes.clone(),
5252                    no_newline: false,
5253                    common: true,
5254                });
5255                if !seen_change {
5256                    leading += 1;
5257                }
5258                trailing += 1;
5259            }
5260            HunkLine::Delete(bytes) => {
5261                preimage.push(WsImageLine {
5262                    content: bytes.clone(),
5263                    no_newline: false,
5264                    common: false,
5265                });
5266                seen_change = true;
5267                trailing = 0;
5268            }
5269            HunkLine::Insert(bytes) => {
5270                postimage.push(WsImageLine {
5271                    content: bytes.clone(),
5272                    no_newline: false,
5273                    common: false,
5274                });
5275                if blank_eof && ws::ws_blank_line(bytes) {
5276                    added_blank_line = true;
5277                }
5278                seen_change = true;
5279                trailing = 0;
5280            }
5281        }
5282        if added_blank_line {
5283            new_blank_lines_at_end += 1;
5284        } else if is_blank_context {
5285            // leave the running count alone
5286        } else {
5287            new_blank_lines_at_end = 0;
5288        }
5289    }
5290    if hunk.old_no_newline
5291        && let Some(last) = preimage.last_mut()
5292    {
5293        last.no_newline = true;
5294    }
5295    if hunk.new_no_newline
5296        && let Some(last) = postimage.last_mut()
5297    {
5298        last.no_newline = true;
5299    }
5300
5301    let mut match_beginning = hunk.old_start == 0 || (hunk.old_start == 1 && !opts.unidiff_zero);
5302    let mut match_end = !opts.unidiff_zero && trailing == 0;
5303
5304    let mut expected = if preimage.is_empty() {
5305        new_side_position(hunk, running_offset)
5306    } else {
5307        expected_position(hunk, running_offset)
5308    };
5309    let hunk_expected = expected;
5310    let mut leading_v = leading;
5311    let mut trailing_v = trailing;
5312
5313    let applied_pos = loop {
5314        if let Some(pos) = find_pos_ws(
5315            image,
5316            &mut preimage,
5317            &mut postimage,
5318            expected,
5319            opts,
5320            match_beginning,
5321            match_end,
5322        ) {
5323            break pos;
5324        }
5325        #[allow(clippy::absurd_extreme_comparisons)]
5326        if leading_v <= MIN_FUZZ_CONTEXT && trailing_v <= MIN_FUZZ_CONTEXT {
5327            return None;
5328        }
5329        if match_beginning || match_end {
5330            match_beginning = false;
5331            match_end = false;
5332            continue;
5333        }
5334        if leading_v >= trailing_v {
5335            preimage.remove(0);
5336            postimage.remove(0);
5337            expected -= 1;
5338            leading_v -= 1;
5339        }
5340        if trailing_v > leading_v {
5341            preimage.pop();
5342            postimage.pop();
5343            trailing_v -= 1;
5344        }
5345    };
5346
5347    // Remove the blank lines added at EOF when the hunk lands at (or beyond) the
5348    // end of the image — git's `--whitespace=fix` blank-at-EOF correction.
5349    if new_blank_lines_at_end > 0
5350        && preimage.len() + applied_pos >= image.len()
5351        && blank_eof
5352        && opts.ws_fix
5353    {
5354        for _ in 0..new_blank_lines_at_end {
5355            postimage.pop();
5356        }
5357        *blank_removed += new_blank_lines_at_end;
5358    }
5359
5360    // git's `update_image`: the preimage may extend beyond EOF, so only the part
5361    // that falls within the image is removed.
5362    let preimage_limit = preimage.len().min(image.len() - applied_pos);
5363    let replacement: Vec<Line> = postimage
5364        .iter()
5365        .map(|line| Line {
5366            content: line.content.clone(),
5367            no_newline: line.no_newline,
5368        })
5369        .collect();
5370    image.splice(applied_pos..applied_pos + preimage_limit, replacement);
5371    Some(applied_pos as isize - hunk_expected)
5372}
5373
5374/// Port of git's `find_pos`: ping-pong outward from `expected` calling
5375/// [`match_fragment_ws`] at each candidate line. On a match the preimage and the
5376/// common lines of the postimage may be rewritten in place (whitespace fix).
5377fn find_pos_ws(
5378    image: &[Line],
5379    preimage: &mut Vec<WsImageLine>,
5380    postimage: &mut Vec<WsImageLine>,
5381    expected: isize,
5382    opts: &WsApplyOptions,
5383    match_beginning: bool,
5384    match_end: bool,
5385) -> Option<usize> {
5386    let line_nr = image.len();
5387    let pre_nr = preimage.len();
5388    let mut line: isize = if match_beginning {
5389        0
5390    } else if match_end {
5391        line_nr as isize - pre_nr as isize
5392    } else {
5393        expected
5394    };
5395    if line < 0 {
5396        line = 0;
5397    }
5398    if line as usize > line_nr {
5399        line = line_nr as isize;
5400    }
5401    let start = line as usize;
5402    let mut backwards = start;
5403    let mut forwards = start;
5404    let mut current = start;
5405    let mut i: u64 = 0;
5406    loop {
5407        if match_fragment_ws(
5408            image,
5409            preimage,
5410            postimage,
5411            current,
5412            opts,
5413            match_beginning,
5414            match_end,
5415        ) {
5416            return Some(current);
5417        }
5418        loop {
5419            if backwards == 0 && forwards == line_nr {
5420                return None;
5421            }
5422            if i & 1 == 1 {
5423                if backwards == 0 {
5424                    i += 1;
5425                    continue;
5426                }
5427                backwards -= 1;
5428                current = backwards;
5429            } else {
5430                if forwards == line_nr {
5431                    i += 1;
5432                    continue;
5433                }
5434                forwards += 1;
5435                current = forwards;
5436            }
5437            break;
5438        }
5439        i += 1;
5440    }
5441}
5442
5443/// Port of git's `match_fragment`. Returns whether `preimage` matches `image` at
5444/// `current_lno`, trying (in order) an exact match, then — when `--whitespace=fix`
5445/// or `--ignore-space-change` is in effect — a whitespace-corrected or
5446/// whitespace-ignoring match, rewriting the preimage and the postimage's common
5447/// lines to the matched whitespace on success.
5448fn match_fragment_ws(
5449    image: &[Line],
5450    preimage: &mut Vec<WsImageLine>,
5451    postimage: &mut Vec<WsImageLine>,
5452    current_lno: usize,
5453    opts: &WsApplyOptions,
5454    match_beginning: bool,
5455    match_end: bool,
5456) -> bool {
5457    let blank_eof = opts.ws_rule & ws::WS_BLANK_AT_EOF != 0;
5458    let preimage_limit: usize;
5459    if preimage.len() + current_lno <= image.len() {
5460        preimage_limit = preimage.len();
5461        if match_end && (preimage.len() + current_lno != image.len()) {
5462            return false;
5463        }
5464    } else if opts.ws_fix && blank_eof {
5465        // The hunk extends beyond EOF and we are removing blank lines there; only
5466        // the in-image prefix must match, the rest of the preimage must be blank.
5467        preimage_limit = image.len() - current_lno;
5468    } else {
5469        return false;
5470    }
5471
5472    if match_beginning && current_lno != 0 {
5473        return false;
5474    }
5475
5476    if preimage_limit == preimage.len() {
5477        // Try an exact byte match of the whole preimage.
5478        let mut exact = true;
5479        if match_end && current_lno + preimage_limit != image.len() {
5480            exact = false;
5481        }
5482        if exact {
5483            for i in 0..preimage_limit {
5484                let img = &image[current_lno + i];
5485                let pre = &preimage[i];
5486                if img.content != pre.content || img.no_newline != pre.no_newline {
5487                    exact = false;
5488                    break;
5489                }
5490            }
5491        }
5492        if exact {
5493            return true;
5494        }
5495    } else {
5496        // The preimage extends beyond EOF: there must be at least one non-blank
5497        // context line within the in-image prefix.
5498        let mut all_blank = true;
5499        for line in preimage.iter().take(preimage_limit) {
5500            if !line.content.iter().all(|&b| ws::is_space(b)) {
5501                all_blank = false;
5502                break;
5503            }
5504        }
5505        if all_blank {
5506            return false;
5507        }
5508    }
5509
5510    // No exact match. Try fuzzy / whitespace-corrected matching.
5511    if opts.ws_ignore_change {
5512        return fuzzy_match_ws(image, preimage, postimage, current_lno, preimage_limit);
5513    }
5514    if !opts.ws_fix {
5515        return false;
5516    }
5517
5518    // Whitespace-corrected match: fix the in-image preimage lines and the target
5519    // lines, requiring equality; the beyond-EOF preimage lines must become blank.
5520    let mut fixed: Vec<WsImageLine> = Vec::with_capacity(preimage.len());
5521    for i in 0..preimage_limit {
5522        let fixed_pre = ws::ws_fix_bytes(&preimage[i].bytes(), opts.ws_rule);
5523        let fixed_tgt = ws::ws_fix_bytes(&line_bytes(&image[current_lno + i]), opts.ws_rule);
5524        if fixed_pre != fixed_tgt {
5525            return false;
5526        }
5527        fixed.push(ws_line_from_bytes(fixed_pre, preimage[i].common));
5528    }
5529    for line in preimage.iter().skip(preimage_limit) {
5530        let fixed_pre = ws::ws_fix_bytes(&line.bytes(), opts.ws_rule);
5531        if !fixed_pre.iter().all(|&b| ws::is_space(b)) {
5532            return false;
5533        }
5534        fixed.push(ws_line_from_bytes(fixed_pre, line.common));
5535    }
5536    update_pre_post_images_ws(preimage, postimage, fixed);
5537    true
5538}
5539
5540/// Port of git's `line_by_line_fuzzy_match` (the `--ignore-space-change` path):
5541/// compare each line ignoring whitespace runs; on success the matched lines use
5542/// the *target's* whitespace in-image and the *preimage's* whitespace beyond EOF.
5543fn fuzzy_match_ws(
5544    image: &[Line],
5545    preimage: &mut Vec<WsImageLine>,
5546    postimage: &mut Vec<WsImageLine>,
5547    current_lno: usize,
5548    preimage_limit: usize,
5549) -> bool {
5550    for i in 0..preimage_limit {
5551        if !fuzzy_matchlines(&line_bytes(&image[current_lno + i]), &preimage[i].bytes()) {
5552            return false;
5553        }
5554    }
5555    // The beyond-EOF preimage lines must be all whitespace.
5556    for line in preimage.iter().skip(preimage_limit) {
5557        if !line.bytes().iter().all(|&b| ws::is_space(b)) {
5558            return false;
5559        }
5560    }
5561    // Build the fixed preimage: in-image lines take the target's whitespace, the
5562    // beyond-EOF lines keep the preimage's whitespace.
5563    let mut fixed: Vec<WsImageLine> = Vec::with_capacity(preimage.len());
5564    for i in 0..preimage_limit {
5565        let img = &image[current_lno + i];
5566        fixed.push(WsImageLine {
5567            content: img.content.clone(),
5568            no_newline: img.no_newline,
5569            common: preimage[i].common,
5570        });
5571    }
5572    for line in preimage.iter().skip(preimage_limit) {
5573        fixed.push(line.clone());
5574    }
5575    update_pre_post_images_ws(preimage, postimage, fixed);
5576    true
5577}
5578
5579/// Port of git's `fuzzy_matchlines`: compare two lines ignoring whitespace
5580/// differences (any whitespace run matches any other; line endings are ignored).
5581fn fuzzy_matchlines(s1: &[u8], s2: &[u8]) -> bool {
5582    let trim = |s: &[u8]| {
5583        let mut end = s.len();
5584        while end > 0 && (s[end - 1] == b'\r' || s[end - 1] == b'\n') {
5585            end -= 1;
5586        }
5587        end
5588    };
5589    let end1 = trim(s1);
5590    let end2 = trim(s2);
5591    let (mut i, mut j) = (0usize, 0usize);
5592    while i < end1 && j < end2 {
5593        if ws::is_space(s1[i]) {
5594            if !ws::is_space(s2[j]) {
5595                return false;
5596            }
5597            while i < end1 && ws::is_space(s1[i]) {
5598                i += 1;
5599            }
5600            while j < end2 && ws::is_space(s2[j]) {
5601                j += 1;
5602            }
5603        } else if s1[i] != s2[j] {
5604            return false;
5605        } else {
5606            i += 1;
5607            j += 1;
5608        }
5609    }
5610    i == end1 && j == end2
5611}
5612
5613/// Port of git's `update_pre_post_images`: replace the preimage with the fixed
5614/// lines (carrying the original common flags), then rewrite each *common* line of
5615/// the postimage to use the fixed preimage's content. A common postimage line
5616/// whose fixed-preimage counterpart ran out (a trailing blank trimmed at EOF) is
5617/// dropped (git's `reduced`).
5618fn update_pre_post_images_ws(
5619    preimage: &mut Vec<WsImageLine>,
5620    postimage: &mut Vec<WsImageLine>,
5621    fixed: Vec<WsImageLine>,
5622) {
5623    *preimage = fixed;
5624    let mut new_post: Vec<WsImageLine> = Vec::with_capacity(postimage.len());
5625    let mut ctx = 0usize;
5626    for line in postimage.iter() {
5627        if !line.common {
5628            new_post.push(line.clone());
5629            continue;
5630        }
5631        while ctx < preimage.len() && !preimage[ctx].common {
5632            ctx += 1;
5633        }
5634        if ctx >= preimage.len() {
5635            // preimage ran out (a fixed-away trailing blank): drop this line.
5636            continue;
5637        }
5638        new_post.push(WsImageLine {
5639            content: preimage[ctx].content.clone(),
5640            no_newline: preimage[ctx].no_newline,
5641            common: true,
5642        });
5643        ctx += 1;
5644    }
5645    *postimage = new_post;
5646}
5647
5648/// Splice a single hunk into `image`, returning the offset (applied position −
5649/// expected position) so later hunks can carry it forward, or `None` if the
5650/// hunk cannot be located (which rejects the whole patch).
5651///
5652/// Faithful to git's `apply_one_fragment`: build preimage/postimage, try the
5653/// full preimage at progressively-reduced context, and on a match replace the
5654/// matched preimage region with the postimage.
5655fn apply_one_hunk(
5656    image: &mut Vec<Line>,
5657    hunk: &Hunk,
5658    running_offset: isize,
5659    unidiff_zero: bool,
5660) -> Option<isize> {
5661    // preimage = context + deletes (the old side we must find in the image).
5662    // postimage = context + inserts (what replaces it). They share their
5663    // leading/trailing *context* runs, which fuzz peels off symmetrically.
5664    let mut preimage: Vec<Line> = Vec::new();
5665    let mut postimage: Vec<Line> = Vec::new();
5666    let mut leading = 0usize; // context lines before the first +/-
5667    let mut trailing = 0usize; // context lines after the last +/-
5668    let mut seen_change = false;
5669    for hl in &hunk.lines {
5670        match hl {
5671            HunkLine::Context(bytes) => {
5672                preimage.push(Line {
5673                    content: bytes.clone(),
5674                    no_newline: false,
5675                });
5676                postimage.push(Line {
5677                    content: bytes.clone(),
5678                    no_newline: false,
5679                });
5680                if !seen_change {
5681                    leading += 1;
5682                }
5683                trailing += 1;
5684            }
5685            HunkLine::Delete(bytes) => {
5686                preimage.push(Line {
5687                    content: bytes.clone(),
5688                    no_newline: false,
5689                });
5690                seen_change = true;
5691                trailing = 0;
5692            }
5693            HunkLine::Insert(bytes) => {
5694                postimage.push(Line {
5695                    content: bytes.clone(),
5696                    no_newline: false,
5697                });
5698                seen_change = true;
5699                trailing = 0;
5700            }
5701        }
5702    }
5703
5704    // Mark the no-final-newline state on the last preimage/postimage line so the
5705    // exact-match check and the spliced result reproduce a missing terminal
5706    // newline byte-for-byte.
5707    if hunk.old_no_newline
5708        && let Some(last) = preimage.last_mut()
5709    {
5710        last.no_newline = true;
5711    }
5712    if hunk.new_no_newline
5713        && let Some(last) = postimage.last_mut()
5714    {
5715        last.no_newline = true;
5716    }
5717
5718    // A hunk that is `@@ -1,L ... @@` (or `@@ -0,0 ... @@` for an add-to-empty)
5719    // must match the beginning, and a hunk with no trailing context must match
5720    // the end — UNLESS `--unidiff-zero` was given, which tells apply to trust the
5721    // line numbers of a context-free hunk (`match_beginning = !oldpos ||
5722    // (oldpos == 1 && !unidiff_zero)`, `match_end = !unidiff_zero && !trailing`).
5723    let mut match_beginning = hunk.old_start == 0 || (hunk.old_start == 1 && !unidiff_zero);
5724    let mut match_end = !unidiff_zero && trailing == 0;
5725
5726    // git anchors the search at `newpos-1` (0-based), carried by the running
5727    // offset from earlier hunks. The anchor (`pos` in git) shifts up whenever a
5728    // *leading* context line is peeled, because the preimage then begins one
5729    // line later in its own content. For a context-free pure insertion the
5730    // preimage is empty and matches anywhere, so the anchor alone decides the
5731    // result — there we must use the new-side line number exactly as git's
5732    // `newpos - 1` does (the old-side `oldpos` differs for `@@ -1,0 +2,1 @@`
5733    // inserts).
5734    let mut expected = if preimage.is_empty() {
5735        new_side_position(hunk, running_offset)
5736    } else {
5737        expected_position(hunk, running_offset)
5738    };
5739    // The full hunk's expected position never moves, so the returned drift is
5740    // measured against it (not the context-reduced anchor).
5741    let hunk_expected = expected;
5742
5743    loop {
5744        if let Some(pos) = find_hunk_pos(image, &preimage, expected, match_beginning, match_end) {
5745            // Splice: drop the matched preimage lines, insert the postimage.
5746            let take = preimage.len();
5747            let replacement: Vec<Line> = postimage.clone();
5748            image.splice(pos..pos + take, replacement);
5749            return Some(pos as isize - hunk_expected);
5750        }
5751
5752        // No position matched. Mirror git's guard *order* exactly: it first
5753        // checks whether context is already at the floor (`p_context`) and, if
5754        // so, gives up BEFORE relaxing match_beginning/match_end or peeling
5755        // context. With the default `UINT_MAX` floor this fires on the very
5756        // first failure, so the default path never fuzzes and never relaxes the
5757        // begin/end anchors — it rejects. (The comparison is intentionally
5758        // against the floor so the structure stays faithful to git even though
5759        // the default floor makes it unconditionally true.)
5760        #[allow(clippy::absurd_extreme_comparisons)]
5761        if leading <= MIN_FUZZ_CONTEXT && trailing <= MIN_FUZZ_CONTEXT {
5762            return None;
5763        }
5764
5765        // git relaxes the begin/end anchors before peeling context: a hunk that
5766        // "must match the start/end" but didn't is retried free-floating first.
5767        if match_beginning || match_end {
5768            match_beginning = false;
5769            match_end = false;
5770            continue;
5771        }
5772
5773        // Reduce context: peel the larger side (both if equal), exactly as git.
5774        if leading >= trailing {
5775            // Drop the first context line from pre+post; the anchor slides up.
5776            preimage.remove(0);
5777            postimage.remove(0);
5778            expected -= 1;
5779            leading -= 1;
5780        }
5781        if trailing > leading {
5782            preimage.pop();
5783            postimage.pop();
5784            trailing -= 1;
5785        }
5786    }
5787}
5788
5789/// A line with its content (sans terminator) and whether it is newline-terminated.
5790#[derive(Debug, Clone, PartialEq, Eq)]
5791struct Line {
5792    content: Vec<u8>,
5793    no_newline: bool,
5794}
5795
5796/// Split a blob into [`Line`]s. A trailing `\n` does not produce an empty final
5797/// line; instead the last real line is marked `no_newline = false`. A file that
5798/// does not end in `\n` marks its final line `no_newline = true`. An empty blob
5799/// produces no lines.
5800fn split_blob_lines(data: &[u8]) -> Vec<Line> {
5801    let mut lines = Vec::new();
5802    let mut start = 0usize;
5803    while start < data.len() {
5804        match data[start..].iter().position(|&b| b == b'\n') {
5805            Some(rel) => {
5806                let end = start + rel;
5807                lines.push(Line {
5808                    content: data[start..end].to_vec(),
5809                    no_newline: false,
5810                });
5811                start = end + 1;
5812            }
5813            None => {
5814                lines.push(Line {
5815                    content: data[start..].to_vec(),
5816                    no_newline: true,
5817                });
5818                start = data.len();
5819            }
5820        }
5821    }
5822    lines
5823}
5824
5825/// Reassemble lines into a byte buffer, honouring per-line newline state.
5826fn join_lines(lines: &[Line]) -> Vec<u8> {
5827    let mut out = Vec::new();
5828    for line in lines {
5829        out.extend_from_slice(&line.content);
5830        if !line.no_newline {
5831            out.push(b'\n');
5832        }
5833    }
5834    out
5835}
5836
5837/// The naive 0-based position where a hunk expects to apply, given the running
5838/// offset accumulated from earlier hunks.
5839fn expected_position(hunk: &Hunk, running_offset: isize) -> isize {
5840    // `old_start` is 1-based; an empty old side (new-file hunk) uses 0.
5841    let base = if hunk.old_start == 0 {
5842        0
5843    } else {
5844        hunk.old_start as isize - 1
5845    };
5846    base + running_offset
5847}
5848
5849/// git's `pos = frag->newpos ? newpos - 1 : 0` anchor, used for a context-free
5850/// pure insertion whose empty preimage matches anywhere.
5851fn new_side_position(hunk: &Hunk, running_offset: isize) -> isize {
5852    let base = if hunk.new_start == 0 {
5853        0
5854    } else {
5855        hunk.new_start as isize - 1
5856    };
5857    base + running_offset
5858}
5859
5860/// Find the 0-based line index in `image` where `preimage` (the hunk's context
5861/// + deleted lines, possibly already context-reduced by fuzz) matches.
5862///
5863/// Port of git's `find_pos`: start the search at `expected` (clamped, or forced
5864/// to 0/end when `match_beginning`/`match_end`), then ping-pong outward across
5865/// the *whole* image — backward and forward alternately — until both ends are
5866/// exhausted. Returns the first matching line index, or `None`.
5867fn find_hunk_pos(
5868    image: &[Line],
5869    preimage: &[Line],
5870    expected: isize,
5871    match_beginning: bool,
5872    match_end: bool,
5873) -> Option<usize> {
5874    let line_nr = image.len();
5875    let pre_nr = preimage.len();
5876
5877    // git: if we must match the beginning, start at 0; if we must match the
5878    // end, start where the preimage would end exactly at EOF.
5879    let mut line: isize = if match_beginning {
5880        0
5881    } else if match_end {
5882        line_nr as isize - pre_nr as isize
5883    } else {
5884        expected
5885    };
5886    if line < 0 {
5887        line = 0;
5888    }
5889    if line as usize > line_nr {
5890        line = line_nr as isize;
5891    }
5892
5893    let start = line as usize;
5894    let mut backwards = start;
5895    let mut forwards = start;
5896    let mut current = start;
5897
5898    let mut i: u64 = 0;
5899    loop {
5900        if preimage_matches_at(image, preimage, current, match_beginning, match_end) {
5901            return Some(current);
5902        }
5903
5904        loop {
5905            // Both ends exhausted: no match anywhere.
5906            if backwards == 0 && forwards == line_nr {
5907                return None;
5908            }
5909            if i & 1 == 1 {
5910                // Step backward.
5911                if backwards == 0 {
5912                    i += 1;
5913                    continue;
5914                }
5915                backwards -= 1;
5916                current = backwards;
5917            } else {
5918                // Step forward.
5919                if forwards == line_nr {
5920                    i += 1;
5921                    continue;
5922                }
5923                forwards += 1;
5924                current = forwards;
5925            }
5926            break;
5927        }
5928        i += 1;
5929    }
5930}
5931
5932/// Whether `preimage` matches `image` starting at line `pos`.
5933///
5934/// Port of git's `match_fragment` for the default (no whitespace-fuzz) path:
5935/// a byte-exact full-preimage match. Honours `match_beginning` (pos must be 0)
5936/// and `match_end` (the preimage must reach *exactly* the end of the image),
5937/// and reproduces git's terminal-newline semantics — a preimage line marked
5938/// "no newline" only matches when it is the image's final line and that line is
5939/// itself newline-free.
5940fn preimage_matches_at(
5941    image: &[Line],
5942    preimage: &[Line],
5943    pos: usize,
5944    match_beginning: bool,
5945    match_end: bool,
5946) -> bool {
5947    if match_beginning && pos != 0 {
5948        return false;
5949    }
5950    // The whole preimage must fall within the image.
5951    if pos + preimage.len() > image.len() {
5952        return false;
5953    }
5954    if match_end && pos + preimage.len() != image.len() {
5955        return false;
5956    }
5957    for (i, pre) in preimage.iter().enumerate() {
5958        let img = &image[pos + i];
5959        if img.content != pre.content {
5960            return false;
5961        }
5962        // git compares the raw byte buffers, so a missing terminal newline on
5963        // either side only matches the other when both agree. A preimage line
5964        // that lacks a newline can only sit on the image's final line (which
5965        // must itself lack one); a preimage line that *has* a newline cannot
5966        // match a newline-free image line.
5967        if pre.no_newline != img.no_newline {
5968            return false;
5969        }
5970    }
5971    true
5972}
5973
5974/// Split raw patch bytes into lines, preserving the *content* without the
5975/// trailing `\n` (a final unterminated line is kept). Carriage returns are kept
5976/// as-is so CRLF patch bodies round-trip.
5977fn split_patch_lines(input: &[u8]) -> Vec<&[u8]> {
5978    let mut lines = Vec::new();
5979    let mut start = 0usize;
5980    while start < input.len() {
5981        match input[start..].iter().position(|&b| b == b'\n') {
5982            Some(rel) => {
5983                let end = start + rel;
5984                lines.push(&input[start..end]);
5985                start = end + 1;
5986            }
5987            None => {
5988                lines.push(&input[start..]);
5989                start = input.len();
5990            }
5991        }
5992    }
5993    lines
5994}
5995
5996struct PatchParser<'a> {
5997    lines: &'a [&'a [u8]],
5998    index: usize,
5999    recount: bool,
6000    /// `-p<n>` strip count (git's `state->p_value`); shared across the input so
6001    /// a guessed value sticks for subsequent traditional patches.
6002    p_value: usize,
6003    p_value_known: bool,
6004    /// `--directory` root (normalised, trailing slash) prepended to every name.
6005    root: Vec<u8>,
6006    /// The cwd prefix (`state->prefix`), used only to guess `-p<n>` for
6007    /// traditional patches run from a subdirectory.
6008    prefix: Vec<u8>,
6009}
6010
6011impl<'a> PatchParser<'a> {
6012    fn parse(&mut self) -> Result<Vec<FilePatch>> {
6013        let mut patches = Vec::new();
6014        while self.index < self.lines.len() {
6015            let line = self.lines[self.index];
6016            if line.starts_with(b"diff --git ") {
6017                patches.push(self.parse_file(Some(line))?);
6018            } else if line.starts_with(b"--- ") {
6019                // A bare unified diff with no `diff --git` header.
6020                patches.push(self.parse_file(None)?);
6021            } else if line.starts_with(b"@@ ") {
6022                return Err(GitError::InvalidFormat(
6023                    "hunk header encountered before any file header".to_string(),
6024                ));
6025            } else {
6026                // Skip commentary / unrelated lines.
6027                self.index += 1;
6028            }
6029        }
6030        Ok(patches)
6031    }
6032
6033    /// Parse one file's headers and hunks. When `diff_line` is `Some`, the
6034    /// current line is the `diff --git` header (already inspected by the
6035    /// caller); otherwise parsing starts at a `--- ` line of a traditional diff.
6036    fn parse_file(&mut self, diff_line: Option<&[u8]>) -> Result<FilePatch> {
6037        match diff_line {
6038            Some(diff_line) => self.parse_git_file(diff_line),
6039            None => self.parse_traditional_file(),
6040        }
6041    }
6042
6043    /// p_value with one component removed — git uses `p_value - 1` for the
6044    /// `rename`/`copy from`/`to` extended headers, whose names lack the `a/`/`b/`
6045    /// prefix the `---`/`+++` lines carry.
6046    fn p_minus_one(&self) -> usize {
6047        self.p_value.saturating_sub(1)
6048    }
6049
6050    /// Parse a git (`diff --git`) file section, resolving every pathname through
6051    /// git's `git_header_name` / `find_name` with the active `-p<n>`/`--directory`.
6052    fn parse_git_file(&mut self, diff_line: &[u8]) -> Result<FilePatch> {
6053        let mut patch = empty_file_patch();
6054        // `def_name`: the common name from the `diff --git` line, used when the
6055        // section carries no explicit `---`/`+++`/rename names.
6056        let rest = &diff_line[b"diff --git ".len()..];
6057        let mut def_name = name::git_header_name(self.p_value, rest);
6058        if let (Some(d), false) = (def_name.as_mut(), self.root.is_empty()) {
6059            let mut s = self.root.clone();
6060            s.extend_from_slice(d);
6061            *d = s;
6062        }
6063        self.index += 1;
6064
6065        // Git patches name files relative to the repository top-level, so the
6066        // `apply` cwd-prefix is never prepended to them (git's is_toplevel_relative).
6067        patch.is_toplevel_relative = true;
6068
6069        // Set once a `GIT binary patch` / `Binary files … differ` body is seen,
6070        // so the file is not run through the textual hunk parser afterwards.
6071        let mut binary_seen = false;
6072
6073        // Extended headers until the first `---`/`@@`/next `diff --git`.
6074        while self.index < self.lines.len() {
6075            let line = self.lines[self.index];
6076            if line.starts_with(b"--- ") {
6077                self.parse_git_old_header(&line[b"--- ".len()..], &mut patch);
6078                self.index += 1;
6079                break;
6080            } else if line.starts_with(b"@@ ") {
6081                // No `---`/`+++` (e.g. pure rename or mode change with no body).
6082                break;
6083            } else if line.starts_with(b"diff --git ") {
6084                // Next file began with no body for this one.
6085                break;
6086            } else if let Some(rest) = strip_prefix(line, b"old mode ") {
6087                patch.old_mode = Some(self.parse_mode_line(rest)?);
6088            } else if let Some(rest) = strip_prefix(line, b"new mode ") {
6089                patch.new_mode = Some(self.parse_mode_line(rest)?);
6090            } else if let Some(rest) = strip_prefix(line, b"new file mode ") {
6091                patch.is_new = true;
6092                patch.new_mode = Some(self.parse_mode_line(rest)?);
6093                patch.new_path = def_name.clone();
6094            } else if let Some(rest) = strip_prefix(line, b"deleted file mode ") {
6095                patch.is_delete = true;
6096                patch.old_mode = Some(self.parse_mode_line(rest)?);
6097                patch.old_path = def_name.clone();
6098            } else if let Some(rest) = strip_prefix(line, b"index ") {
6099                // `index <old>..<new>[ <mode>]`: capture the blob OIDs (needed by
6100                // the binary apply and the `-3` fallback) and the unchanged-file
6101                // mode (git's gitdiff_index → gitdiff_oldmode).
6102                self.parse_index_line(rest, &mut patch)?;
6103            } else if let Some(rest) = strip_prefix(line, b"rename from ") {
6104                patch.is_rename = true;
6105                patch.old_path = name::find_name(rest, None, self.p_minus_one(), 0, &self.root);
6106            } else if let Some(rest) = strip_prefix(line, b"rename to ") {
6107                patch.is_rename = true;
6108                patch.new_path = name::find_name(rest, None, self.p_minus_one(), 0, &self.root);
6109            } else if let Some(rest) = strip_prefix(line, b"copy from ") {
6110                patch.is_copy = true;
6111                patch.old_path = name::find_name(rest, None, self.p_minus_one(), 0, &self.root);
6112            } else if let Some(rest) = strip_prefix(line, b"copy to ") {
6113                patch.is_copy = true;
6114                patch.new_path = name::find_name(rest, None, self.p_minus_one(), 0, &self.root);
6115            } else if let Some(rest) = strip_prefix(line, b"similarity index ") {
6116                patch.similarity = parse_percent(rest);
6117            } else if let Some(rest) = strip_prefix(line, b"dissimilarity index ") {
6118                patch.dissimilarity = parse_percent(rest);
6119            } else if line == b"GIT binary patch" {
6120                // The binary payload follows (no `---`/`+++`, no `@@` hunks).
6121                let gitbin_line = self.index + 1;
6122                patch.is_binary = true;
6123                patch.binary = Some(self.parse_binary_block(gitbin_line)?);
6124                binary_seen = true;
6125                break;
6126            } else if apply_is_binary_files_differ(line) {
6127                // A `--binary`-less diff records only `Binary files … differ`;
6128                // the postimage has to come from the object store at apply time.
6129                patch.is_binary = true;
6130                binary_seen = true;
6131                self.index += 1;
6132                break;
6133            } else {
6134                // Unrecognised commentary line — ignore.
6135                self.index += 1;
6136                continue;
6137            }
6138            self.index += 1;
6139        }
6140
6141        // `+++` header (the old-file branch above already advanced past `---`).
6142        if !binary_seen
6143            && self.index < self.lines.len()
6144            && self.lines[self.index].starts_with(b"+++ ")
6145        {
6146            let line = self.lines[self.index];
6147            self.parse_git_new_header(&line[b"+++ ".len()..], &mut patch);
6148            self.index += 1;
6149        }
6150
6151        // No explicit names anywhere: fall back to `def_name`, or fail like git
6152        // when `-p<n>` stripped every component away.
6153        if patch.old_path.is_none() && patch.new_path.is_none() {
6154            match &def_name {
6155                Some(d) => {
6156                    patch.old_path = Some(d.clone());
6157                    patch.new_path = Some(d.clone());
6158                }
6159                None => {
6160                    return Err(GitError::InvalidFormat(format!(
6161                        "git diff header lacks filename information when removing {} \
6162                         leading pathname components",
6163                        self.p_value
6164                    )));
6165                }
6166            }
6167        }
6168
6169        // Binary patches carry no `@@` hunks.
6170        if !binary_seen {
6171            self.parse_hunks(&mut patch)?;
6172        }
6173        Ok(patch)
6174    }
6175
6176    /// Parse a `index <old>..<new>[ <mode>]` line, capturing the blob OIDs and,
6177    /// when present, the unchanged-file mode (git's `gitdiff_index`).
6178    fn parse_index_line(&self, rest: &[u8], patch: &mut FilePatch) -> Result<()> {
6179        let Some(dotdot) = find_subslice(rest, b"..") else {
6180            return Ok(());
6181        };
6182        let old = &rest[..dotdot];
6183        let after = &rest[dotdot + 2..];
6184        // `new` runs to the first space (mode) or end of line.
6185        let (new, mode_part) = match after.iter().position(|&b| b == b' ') {
6186            Some(space) => (&after[..space], Some(&after[space + 1..])),
6187            None => (after, None),
6188        };
6189        if !old.is_empty() {
6190            patch.old_oid_hex = Some(old.to_vec());
6191        }
6192        if !new.is_empty() {
6193            patch.new_oid_hex = Some(new.to_vec());
6194        }
6195        if let Some(mode) = mode_part
6196            && !mode.is_empty()
6197        {
6198            patch.old_mode = Some(self.parse_mode_line(mode)?);
6199        }
6200        Ok(())
6201    }
6202
6203    /// Parse a `<octal mode>` field, mirroring git's `parse_mode_line`: leading
6204    /// octal digits terminated by whitespace or end of line. Errors otherwise.
6205    fn parse_mode_line(&self, rest: &[u8]) -> Result<u32> {
6206        let mut value: u32 = 0;
6207        let mut i = 0;
6208        while i < rest.len() && (b'0'..=b'7').contains(&rest[i]) {
6209            value = value
6210                .checked_mul(8)
6211                .and_then(|value| value.checked_add((rest[i] - b'0') as u32))
6212                .ok_or_else(|| self.invalid_mode_error(rest))?;
6213            i += 1;
6214        }
6215        if i == 0 || (i < rest.len() && !rest[i].is_ascii_whitespace()) {
6216            return Err(self.invalid_mode_error(rest));
6217        }
6218        Ok(value)
6219    }
6220
6221    fn invalid_mode_error(&self, rest: &[u8]) -> GitError {
6222        GitError::InvalidFormat(format!(
6223            "invalid mode on line {}: {}",
6224            self.index + 1,
6225            lossy(rest)
6226        ))
6227    }
6228
6229    /// Parse a `GIT binary patch` body: a mandatory forward hunk and an optional
6230    /// reverse hunk, each base85-encoded over zlib-deflated data. Mirrors git's
6231    /// `parse_binary`. `gitbin_line` is the 1-based line of the `GIT binary patch`
6232    /// marker (used in the "unrecognized binary patch" message).
6233    fn parse_binary_block(&mut self, gitbin_line: usize) -> Result<BinaryPatch> {
6234        // self.index points at "GIT binary patch"; advance past it.
6235        self.index += 1;
6236        let forward = match self.parse_binary_hunk()? {
6237            Some(hunk) => hunk,
6238            None => {
6239                return Err(GitError::InvalidFormat(format!(
6240                    "binary-unrecognized:{gitbin_line}"
6241                )));
6242            }
6243        };
6244        let reverse = self.parse_binary_hunk()?;
6245        Ok(BinaryPatch { forward, reverse })
6246    }
6247
6248    /// Parse one binary hunk (method line + base85 data lines + blank terminator),
6249    /// or `Ok(None)` when the current line is not a `literal`/`delta` method line.
6250    fn parse_binary_hunk(&mut self) -> Result<Option<BinaryHunk>> {
6251        if self.index >= self.lines.len() {
6252            return Ok(None);
6253        }
6254        let line = self.lines[self.index];
6255        let (method, num) = if let Some(rest) = strip_prefix(line, b"delta ") {
6256            (BinaryMethod::Delta, rest)
6257        } else if let Some(rest) = strip_prefix(line, b"literal ") {
6258            (BinaryMethod::Literal, rest)
6259        } else {
6260            return Ok(None);
6261        };
6262        let origlen = parse_leading_usize(num);
6263        self.index += 1;
6264
6265        let mut deflated = Vec::new();
6266        loop {
6267            if self.index >= self.lines.len() {
6268                // Ran out of input before the blank terminator (truncated patch).
6269                return Err(self.corrupt_binary_error());
6270            }
6271            let data = self.lines[self.index];
6272            if data.is_empty() {
6273                // Blank line terminates the hunk.
6274                self.index += 1;
6275                break;
6276            }
6277            // git counts the trailing newline in its line length; our split-off
6278            // lines do not carry it, so `git llen == data.len() + 1`.
6279            let len = data.len();
6280            if len < 6 || !(len - 1).is_multiple_of(5) {
6281                return Err(self.corrupt_binary_error());
6282            }
6283            let max_byte_length = (len - 1) / 5 * 4;
6284            let byte_length = match data[0] {
6285                b'A'..=b'Z' => (data[0] - b'A') as usize + 1,
6286                b'a'..=b'z' => (data[0] - b'a') as usize + 27,
6287                _ => return Err(self.corrupt_binary_error()),
6288            };
6289            if max_byte_length < byte_length || byte_length <= max_byte_length.saturating_sub(4) {
6290                return Err(self.corrupt_binary_error());
6291            }
6292            let decoded = decode_base85(&data[1..], byte_length)
6293                .ok_or_else(|| self.corrupt_binary_error())?;
6294            deflated.extend_from_slice(&decoded);
6295            self.index += 1;
6296        }
6297        Ok(Some(BinaryHunk {
6298            method,
6299            origlen,
6300            deflated,
6301        }))
6302    }
6303
6304    fn corrupt_binary_error(&self) -> GitError {
6305        GitError::InvalidFormat(format!("binary-corrupt:{}", self.index + 1))
6306    }
6307
6308    fn parse_git_old_header(&self, rest: &[u8], patch: &mut FilePatch) {
6309        if name::is_dev_null(rest) {
6310            patch.is_new = true;
6311            patch.old_path = None;
6312        } else if patch.old_path.is_none() {
6313            patch.old_path = name::find_name(rest, None, self.p_value, name::TERM_TAB, &self.root);
6314        }
6315    }
6316
6317    fn parse_git_new_header(&self, rest: &[u8], patch: &mut FilePatch) {
6318        if name::is_dev_null(rest) {
6319            patch.is_delete = true;
6320            patch.new_path = None;
6321        } else if patch.new_path.is_none() {
6322            patch.new_path = name::find_name(rest, None, self.p_value, name::TERM_TAB, &self.root);
6323        }
6324    }
6325
6326    /// Parse a traditional (non-git) diff section, mirroring git's
6327    /// `parse_traditional_patch`: guess the strip count, recognise epoch
6328    /// timestamps as creation/deletion, and prefer the shorter of the two names.
6329    fn parse_traditional_file(&mut self) -> Result<FilePatch> {
6330        let mut patch = empty_file_patch();
6331        let first_line = self.lines[self.index];
6332        let first = first_line[b"--- ".len()..].to_vec();
6333        self.index += 1;
6334        let second = if self.index < self.lines.len() && self.lines[self.index].starts_with(b"+++ ")
6335        {
6336            let s = self.lines[self.index][b"+++ ".len()..].to_vec();
6337            self.index += 1;
6338            Some(s)
6339        } else {
6340            None
6341        };
6342
6343        if let Some(second) = &second {
6344            if !self.p_value_known {
6345                let p0 = name::guess_p_value(&first, &self.root, &self.prefix);
6346                let q0 = name::guess_p_value(second, &self.root, &self.prefix);
6347                let p = if p0.is_none() { q0 } else { p0 };
6348                if let Some(pv) = p
6349                    && Some(pv) == q0
6350                {
6351                    self.p_value = pv;
6352                    self.p_value_known = true;
6353                }
6354            }
6355
6356            let name = if name::is_dev_null(&first) {
6357                patch.is_new = true;
6358                let name = name::find_name_traditional(second, None, self.p_value, &self.root);
6359                patch.new_path = name.clone();
6360                name
6361            } else if name::is_dev_null(second) {
6362                patch.is_delete = true;
6363                let name = name::find_name_traditional(&first, None, self.p_value, &self.root);
6364                patch.old_path = name.clone();
6365                name
6366            } else {
6367                let first_name =
6368                    name::find_name_traditional(&first, None, self.p_value, &self.root);
6369                let name = name::find_name_traditional(
6370                    second,
6371                    first_name.as_deref(),
6372                    self.p_value,
6373                    &self.root,
6374                );
6375                if name::has_epoch_timestamp(&first) {
6376                    patch.is_new = true;
6377                    patch.new_path = name.clone();
6378                } else if name::has_epoch_timestamp(second) {
6379                    patch.is_delete = true;
6380                    patch.old_path = name.clone();
6381                } else {
6382                    patch.old_path = name.clone();
6383                    patch.new_path = name.clone();
6384                }
6385                name
6386            };
6387            // git's `parse_traditional_patch`: a name that strips away every
6388            // component (e.g. `-p2` against a one-component `file_in_root`) is a
6389            // hard error — the whole apply fails rather than silently skipping the
6390            // unresolved file.
6391            if name.is_none() {
6392                return Err(GitError::InvalidFormat(format!(
6393                    "unable to find filename in patch at line {}",
6394                    self.index
6395                )));
6396            }
6397        }
6398
6399        self.parse_hunks(&mut patch)?;
6400        Ok(patch)
6401    }
6402
6403    /// Parse the hunk bodies that follow a file header, stopping at the next
6404    /// file header.
6405    fn parse_hunks(&mut self, patch: &mut FilePatch) -> Result<()> {
6406        while self.index < self.lines.len() {
6407            let line = self.lines[self.index];
6408            // git's `parse_single_patch` only treats a line as a fragment when it
6409            // begins with `@@ -` (old side first). A `@@ +…` line — e.g. the
6410            // malformed header a Subversion-generated diff emits — is not a hunk;
6411            // it (and the lines after it) are skipped as commentary, so a deletion
6412            // with no real hunk still applies from its metadata alone.
6413            if line.starts_with(b"@@ -") {
6414                let hunk = self.parse_hunk()?;
6415                patch.hunks.push(hunk);
6416            } else if line.starts_with(b"diff --git ") {
6417                break;
6418            } else if line.starts_with(b"--- ") {
6419                // Start of a subsequent bare diff.
6420                break;
6421            } else {
6422                // Trailing commentary between/after hunks.
6423                self.index += 1;
6424            }
6425        }
6426        Ok(())
6427    }
6428
6429    fn parse_hunk(&mut self) -> Result<Hunk> {
6430        let header = self.lines[self.index];
6431        let (old_start, old_len, new_start, new_len) = parse_hunk_header(header)?;
6432        self.index += 1;
6433
6434        let mut hunk = Hunk {
6435            old_start,
6436            old_len,
6437            new_start,
6438            new_len,
6439            lines: Vec::new(),
6440            old_no_newline: false,
6441            new_no_newline: false,
6442            line_input_lines: Vec::new(),
6443        };
6444        let mut old_seen = 0usize;
6445        let mut new_seen = 0usize;
6446
6447        while self.index < self.lines.len() {
6448            // Stop when both sides are satisfied. In recount mode the header
6449            // counts are intentionally ignored; the next hunk/file header ends
6450            // the body.
6451            if !self.recount && old_seen >= old_len && new_seen >= new_len {
6452                break;
6453            }
6454            let line = self.lines[self.index];
6455            if self.recount
6456                && (line.starts_with(b"@@ ")
6457                    || line.starts_with(b"diff --git ")
6458                    || line.starts_with(b"diff a/")
6459                    || line.starts_with(b"--- "))
6460            {
6461                break;
6462            }
6463            if line.is_empty() {
6464                // A wholly empty line in a unified diff is a context line whose
6465                // content is the empty string (git emits a bare ` `, but some
6466                // tooling/email transport strips the trailing space).
6467                hunk.lines.push(HunkLine::Context(Vec::new()));
6468                hunk.line_input_lines.push(self.index + 1);
6469                old_seen += 1;
6470                new_seen += 1;
6471                self.index += 1;
6472                continue;
6473            }
6474            match line[0] {
6475                b' ' => {
6476                    hunk.lines.push(HunkLine::Context(line[1..].to_vec()));
6477                    hunk.line_input_lines.push(self.index + 1);
6478                    old_seen += 1;
6479                    new_seen += 1;
6480                }
6481                b'+' => {
6482                    hunk.lines.push(HunkLine::Insert(line[1..].to_vec()));
6483                    hunk.line_input_lines.push(self.index + 1);
6484                    new_seen += 1;
6485                }
6486                b'-' => {
6487                    hunk.lines.push(HunkLine::Delete(line[1..].to_vec()));
6488                    hunk.line_input_lines.push(self.index + 1);
6489                    old_seen += 1;
6490                }
6491                b'\\' => {
6492                    // `\ No newline at end of file` — applies to the line just
6493                    // emitted. Set the appropriate side flag(s).
6494                    self.mark_no_newline(&mut hunk);
6495                    self.index += 1;
6496                    continue;
6497                }
6498                _ => {
6499                    // Anything else terminates the hunk body.
6500                    break;
6501                }
6502            }
6503            self.index += 1;
6504        }
6505
6506        // A trailing `\ No newline` may follow the final body line even after
6507        // the counts are satisfied; consume it.
6508        if self.index < self.lines.len() && self.lines[self.index].starts_with(b"\\") {
6509            self.mark_no_newline(&mut hunk);
6510            self.index += 1;
6511        }
6512
6513        if self.recount {
6514            hunk.old_len = old_seen;
6515            hunk.new_len = new_seen;
6516        } else if old_seen != old_len || new_seen != new_len {
6517            return Err(GitError::InvalidFormat(format!(
6518                "hunk body line counts mismatch: header declared -{old_len},+{new_len} \
6519                 but body had -{old_seen},+{new_seen}"
6520            )));
6521        }
6522
6523        Ok(hunk)
6524    }
6525
6526    /// Set the no-newline flag based on the kind of the most recently pushed
6527    /// hunk line.
6528    fn mark_no_newline(&self, hunk: &mut Hunk) {
6529        match hunk.lines.last() {
6530            Some(HunkLine::Context(_)) => {
6531                hunk.old_no_newline = true;
6532                hunk.new_no_newline = true;
6533            }
6534            Some(HunkLine::Insert(_)) => hunk.new_no_newline = true,
6535            Some(HunkLine::Delete(_)) => hunk.old_no_newline = true,
6536            None => {}
6537        }
6538    }
6539}
6540
6541/// An all-empty [`FilePatch`] for the parser to fill in.
6542fn empty_file_patch() -> FilePatch {
6543    FilePatch {
6544        old_path: None,
6545        new_path: None,
6546        old_mode: None,
6547        new_mode: None,
6548        hunks: Vec::new(),
6549        is_new: false,
6550        is_delete: false,
6551        is_rename: false,
6552        is_copy: false,
6553        similarity: None,
6554        dissimilarity: None,
6555        old_oid_hex: None,
6556        new_oid_hex: None,
6557        is_binary: false,
6558        binary: None,
6559        is_toplevel_relative: false,
6560    }
6561}
6562
6563/// Parse an `@@ -l,s +l,s @@` header into `(old_start, old_len, new_start,
6564/// new_len)`. A missing `,s` means a length of 1.
6565fn parse_hunk_header(line: &[u8]) -> Result<(usize, usize, usize, usize)> {
6566    let err = || GitError::InvalidFormat(format!("malformed hunk header: {}", lossy(line)));
6567    let rest = strip_prefix(line, b"@@ ").ok_or_else(err)?;
6568    // Up to the closing ` @@`.
6569    let close = find_subslice(rest, b" @@").ok_or_else(err)?;
6570    let ranges = &rest[..close];
6571    let mut parts = ranges.split(|&b| b == b' ').filter(|p| !p.is_empty());
6572    let old = parts.next().ok_or_else(err)?;
6573    let new = parts.next().ok_or_else(err)?;
6574    let old = strip_prefix(old, b"-").ok_or_else(err)?;
6575    let new = strip_prefix(new, b"+").ok_or_else(err)?;
6576    let (old_start, old_len) = parse_range(old).ok_or_else(err)?;
6577    let (new_start, new_len) = parse_range(new).ok_or_else(err)?;
6578    Ok((old_start, old_len, new_start, new_len))
6579}
6580
6581/// Parse `start[,len]` into `(start, len)`, defaulting `len` to 1.
6582fn parse_range(range: &[u8]) -> Option<(usize, usize)> {
6583    match range.iter().position(|&b| b == b',') {
6584        Some(comma) => {
6585            let start = parse_usize(&range[..comma])?;
6586            let len = parse_usize(&range[comma + 1..])?;
6587            Some((start, len))
6588        }
6589        None => Some((parse_usize(range)?, 1)),
6590    }
6591}
6592
6593fn parse_usize(bytes: &[u8]) -> Option<usize> {
6594    if bytes.is_empty() {
6595        return None;
6596    }
6597    let mut value: usize = 0;
6598    for &b in bytes {
6599        if !b.is_ascii_digit() {
6600            return None;
6601        }
6602        value = value.checked_mul(10)?.checked_add((b - b'0') as usize)?;
6603    }
6604    Some(value)
6605}
6606
6607fn parse_percent(bytes: &[u8]) -> Option<u8> {
6608    let trimmed = trim_ascii_end(bytes)
6609        .strip_suffix(b"%")
6610        .unwrap_or(trim_ascii_end(bytes));
6611    let value = parse_usize(trimmed)?;
6612    u8::try_from(value).ok().filter(|value| *value <= 100)
6613}
6614
6615fn strip_prefix<'b>(line: &'b [u8], prefix: &[u8]) -> Option<&'b [u8]> {
6616    if line.starts_with(prefix) {
6617        Some(&line[prefix.len()..])
6618    } else {
6619        None
6620    }
6621}
6622
6623/// Whether a diff body line is a metadata-only binary marker (`Binary files …
6624/// differ` / `Files … differ`), git's binhdr detection.
6625fn apply_is_binary_files_differ(line: &[u8]) -> bool {
6626    line.ends_with(b" differ")
6627        && (line.starts_with(b"Binary files ") || line.starts_with(b"Files "))
6628}
6629
6630/// Parse leading decimal digits (git uses `strtoul`, which ignores trailing
6631/// junk). Returns 0 when there are no leading digits.
6632fn parse_leading_usize(bytes: &[u8]) -> usize {
6633    let mut value = 0usize;
6634    for &b in bytes {
6635        if !b.is_ascii_digit() {
6636            break;
6637        }
6638        value = value.saturating_mul(10).saturating_add((b - b'0') as usize);
6639    }
6640    value
6641}
6642
6643/// git's base85 alphabet (`base85.c` `en85`).
6644const BASE85_ALPHABET: &[u8; 85] =
6645    b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
6646
6647fn base85_value(ch: u8) -> Option<u32> {
6648    BASE85_ALPHABET
6649        .iter()
6650        .position(|&c| c == ch)
6651        .map(|index| index as u32)
6652}
6653
6654/// Decode `len` bytes from a base85 buffer (5 chars → 4 bytes, big-endian), a
6655/// port of git's `decode_85`. Returns `None` on an invalid alphabet character or
6656/// an overflowing 5-char group. `buffer` must contain `ceil(len/4) * 5` chars.
6657fn decode_base85(buffer: &[u8], len: usize) -> Option<Vec<u8>> {
6658    let mut out = Vec::with_capacity(len);
6659    let mut pos = 0usize;
6660    let mut remaining = len;
6661    while remaining > 0 {
6662        let mut acc: u32 = 0;
6663        // First four characters never overflow a u32 (85^4 < 2^32).
6664        for _ in 0..4 {
6665            let de = base85_value(*buffer.get(pos)?)?;
6666            pos += 1;
6667            acc = acc * 85 + de;
6668        }
6669        let de = base85_value(*buffer.get(pos)?)?;
6670        pos += 1;
6671        // The fifth character can overflow; reject it as git does.
6672        if 0xffff_ffffu32 / 85 < acc {
6673            return None;
6674        }
6675        acc *= 85;
6676        if 0xffff_ffffu32 - de < acc {
6677            return None;
6678        }
6679        acc += de;
6680
6681        let cnt = remaining.min(4);
6682        remaining -= cnt;
6683        let bytes = acc.to_be_bytes();
6684        out.extend_from_slice(&bytes[..cnt]);
6685    }
6686    Some(out)
6687}
6688
6689/// Apply a git delta (`delta.c` `patch_delta`) to reconstruct the postimage from
6690/// `base`. The delta begins with the base size and result size as varints,
6691/// followed by copy (`0x80` bit set: offset/size from base) and insert (literal
6692/// bytes) opcodes. Returns `None` on any malformed/inconsistent delta.
6693pub fn git_patch_delta(base: &[u8], delta: &[u8]) -> Option<Vec<u8>> {
6694    let mut data = 0usize;
6695    let read_hdr_size = |data: &mut usize| -> Option<usize> {
6696        let mut size = 0usize;
6697        let mut shift = 0u32;
6698        loop {
6699            let cmd = *delta.get(*data)?;
6700            *data += 1;
6701            size |= ((cmd & 0x7f) as usize).checked_shl(shift)?;
6702            shift += 7;
6703            if cmd & 0x80 == 0 {
6704                break;
6705            }
6706        }
6707        Some(size)
6708    };
6709
6710    let base_size = read_hdr_size(&mut data)?;
6711    if base_size != base.len() {
6712        return None;
6713    }
6714    let result_size = read_hdr_size(&mut data)?;
6715    let mut out = Vec::with_capacity(result_size);
6716
6717    while data < delta.len() {
6718        let cmd = delta[data];
6719        data += 1;
6720        if cmd & 0x80 != 0 {
6721            // Copy from base.
6722            let mut cp_off = 0usize;
6723            let mut cp_size = 0usize;
6724            if cmd & 0x01 != 0 {
6725                cp_off = *delta.get(data)? as usize;
6726                data += 1;
6727            }
6728            if cmd & 0x02 != 0 {
6729                cp_off |= (*delta.get(data)? as usize) << 8;
6730                data += 1;
6731            }
6732            if cmd & 0x04 != 0 {
6733                cp_off |= (*delta.get(data)? as usize) << 16;
6734                data += 1;
6735            }
6736            if cmd & 0x08 != 0 {
6737                cp_off |= (*delta.get(data)? as usize) << 24;
6738                data += 1;
6739            }
6740            if cmd & 0x10 != 0 {
6741                cp_size = *delta.get(data)? as usize;
6742                data += 1;
6743            }
6744            if cmd & 0x20 != 0 {
6745                cp_size |= (*delta.get(data)? as usize) << 8;
6746                data += 1;
6747            }
6748            if cmd & 0x40 != 0 {
6749                cp_size |= (*delta.get(data)? as usize) << 16;
6750                data += 1;
6751            }
6752            if cp_size == 0 {
6753                cp_size = 0x10000;
6754            }
6755            let end = cp_off.checked_add(cp_size)?;
6756            if end > base.len() || cp_size > result_size {
6757                return None;
6758            }
6759            out.extend_from_slice(&base[cp_off..end]);
6760        } else if cmd != 0 {
6761            // Insert literal bytes from the delta.
6762            let len = cmd as usize;
6763            let end = data.checked_add(len)?;
6764            if end > delta.len() {
6765                return None;
6766            }
6767            out.extend_from_slice(&delta[data..end]);
6768            data = end;
6769        } else {
6770            // Opcode 0 is reserved.
6771            return None;
6772        }
6773    }
6774
6775    if data != delta.len() || out.len() != result_size {
6776        return None;
6777    }
6778    Some(out)
6779}
6780
6781fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
6782    if needle.is_empty() || needle.len() > haystack.len() {
6783        return None;
6784    }
6785    haystack
6786        .windows(needle.len())
6787        .position(|window| window == needle)
6788}
6789
6790fn trim_ascii_end(bytes: &[u8]) -> &[u8] {
6791    let mut end = bytes.len();
6792    while end > 0 && (bytes[end - 1] == b' ' || bytes[end - 1] == b'\r') {
6793        end -= 1;
6794    }
6795    &bytes[..end]
6796}
6797
6798fn lossy(bytes: &[u8]) -> String {
6799    String::from_utf8_lossy(bytes).into_owned()
6800}
6801
6802// ===========================================================================
6803// Library tree-merge seam (`merge_trees`).
6804//
6805// This is the single 3-way tree-merge engine that every merge porcelain calls.
6806// Before it existed the logic was duplicated across the CLI: `merge-tree
6807// --write-tree` had its own copy and `git merge` / `cherry-pick` / `revert`
6808// had a second copy. Both copies implemented the identical per-path diff3
6809// resolution; the only differences were *rendering* (write-tree emits a tree +
6810// stage list + messages; the porcelains stage an index + materialize a
6811// worktree). This seam computes the merge once and returns a per-path result
6812// rich enough for both renderings, so the resolution lives in exactly one
6813// place.
6814//
6815// The result is byte-identical to the old per-command copies on every cell
6816// they already handled (clean merges, content / add-add / modify-delete
6817// conflicts, mode merges). On top of that it adds rename-aware resolution: a
6818// file renamed on one side and modified on the other follows the rename,
6819// gated by [`MergeTreesOptions::detect_renames`] (the classic merge-ort
6820// non-recursive rename case).
6821// ===========================================================================
6822
6823/// Flattened tree: repository-relative path -> (mode, blob/symlink/gitlink oid).
6824pub type MergeEntryMap = BTreeMap<Vec<u8>, (u32, ObjectId)>;
6825
6826/// Whether to favour one side wholesale for textual conflicts (`-Xours` /
6827/// `-Xtheirs`), or to leave conflict markers in place.
6828#[derive(Clone, Copy, PartialEq, Eq, Debug)]
6829pub enum MergeFavor {
6830    /// Leave conflict markers in place (the default).
6831    None,
6832    /// On a textual conflict, take ours' content wholesale.
6833    Ours,
6834    /// On a textual conflict, take theirs' content wholesale.
6835    Theirs,
6836    /// On a textual conflict, keep BOTH sides' lines (ours then theirs) with no
6837    /// markers — git's `merge=union` attribute / `--union` (`XDL_MERGE_FAVOR_UNION`).
6838    Union,
6839}
6840
6841/// Options controlling a [`merge_trees`] run.
6842pub struct MergeTreesOptions<'a> {
6843    /// Conflict-marker label for ours (e.g. a branch name or `HEAD`).
6844    pub ours_label: &'a str,
6845    /// Conflict-marker label for theirs.
6846    pub theirs_label: &'a str,
6847    /// Diff3 ancestor label (the `|||||||` side); merge porcelains use
6848    /// `"merged common ancestors"`.
6849    pub ancestor_label: &'a str,
6850    /// `-Xours` / `-Xtheirs` favouring for textual conflicts.
6851    pub favor: MergeFavor,
6852    /// Enable rename-aware merging: a file renamed on one side and modified on
6853    /// the other follows the rename. When `false`, the merge is purely
6854    /// path-keyed (the historical behaviour).
6855    pub detect_renames: bool,
6856    /// Minimum similarity (`0..=100`) for inexact rename detection.
6857    pub rename_threshold: u8,
6858    /// Cap on the inexact rename matrix (`merge.renameLimit`/`diff.renameLimit`).
6859    /// `0` means unlimited; otherwise inexact detection is skipped when the
6860    /// candidate source × destination count exceeds `rename_limit²`.
6861    pub rename_limit: usize,
6862    /// Directory-rename detection mode. When [`DirectoryRenames::False`], a file
6863    /// added on one side under a directory that the *other* side renamed stays
6864    /// put. When enabled, such files are re-homed into the renamed directory,
6865    /// matching `merge.directoryRenames`. Requires `detect_renames` to have any
6866    /// effect (directory renames are inferred from the file renames it finds).
6867    pub directory_renames: DirectoryRenames,
6868    /// Conflict-marker style for textual conflicts (`merge.conflictStyle`).
6869    pub style: ConflictStyle,
6870    /// Whitespace-insensitivity for textual 3-way merges, mirroring
6871    /// `-Xignore-space-change`/`-Xignore-all-space`/`-Xignore-space-at-eol`.
6872    pub ws_ignore: WsIgnore,
6873}
6874
6875/// How directory-rename detection behaves, mirroring git's
6876/// `merge.directoryRenames` configuration.
6877#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)]
6878pub enum DirectoryRenames {
6879    /// Disable directory-rename detection (`merge.directoryRenames=false`).
6880    #[default]
6881    False,
6882    /// Apply directory renames silently (`merge.directoryRenames=true`).
6883    True,
6884    /// Detect directory renames but treat each re-homed path as a conflict
6885    /// requiring confirmation (`merge.directoryRenames=conflict`). git's default.
6886    Conflict,
6887}
6888
6889impl Default for MergeTreesOptions<'_> {
6890    fn default() -> Self {
6891        Self {
6892            ours_label: "ours",
6893            theirs_label: "theirs",
6894            ancestor_label: "merged common ancestors",
6895            favor: MergeFavor::None,
6896            detect_renames: false,
6897            rename_threshold: DEFAULT_RENAME_THRESHOLD,
6898            rename_limit: 0,
6899            directory_renames: DirectoryRenames::False,
6900            style: ConflictStyle::Merge,
6901            ws_ignore: WsIgnore::EMPTY,
6902        }
6903    }
6904}
6905
6906/// The kind of conflict recorded for a path, used to render the stable
6907/// conflict-type token and human message.
6908#[derive(Debug, Clone, PartialEq, Eq)]
6909pub enum MergeConflictKind {
6910    /// Both sides changed the file content differently (or both added it with
6911    /// differing content — an add/add).
6912    Content { add_add: bool },
6913    /// The file was deleted on one side and modified on the other.
6914    ModifyDelete {
6915        /// The side label that deleted the path.
6916        deleted_in: String,
6917        /// The side label that modified (and thus kept) the path.
6918        modified_in: String,
6919    },
6920    /// A file renamed on one side, with a content conflict against the other
6921    /// side's change at the destination.
6922    RenameContent {
6923        /// The original (pre-rename) path.
6924        old_path: Vec<u8>,
6925    },
6926    /// Two paths were renamed to the same destination, producing a
6927    /// rename/rename(2to1) conflict.
6928    RenameRenameTwoToOne {
6929        /// Ours' pre-destination path.
6930        ours_path: Vec<u8>,
6931        /// Theirs' pre-destination path.
6932        theirs_path: Vec<u8>,
6933    },
6934    /// One source path was renamed to different destinations on each side,
6935    /// producing a rename/rename(1to2) conflict.
6936    RenameRenameOneToTwo {
6937        /// The pre-rename source path.
6938        old_path: Vec<u8>,
6939        /// Ours' destination path.
6940        ours_path: Vec<u8>,
6941        /// Theirs' destination path.
6942        theirs_path: Vec<u8>,
6943        /// The label for our side.
6944        ours_label: String,
6945        /// The label for their side.
6946        theirs_label: String,
6947    },
6948    /// An auxiliary higher-stage entry for a rename/rename(1to2) conflict. The
6949    /// user-facing message is emitted by [`RenameRenameOneToTwo`].
6950    RenameRenameOneToTwoStage,
6951    /// A directory was split evenly across multiple destinations, so no
6952    /// directory rename could be applied for paths the other side left there.
6953    DirRenameSplit {
6954        /// The original directory with no unique destination.
6955        source_dir: Vec<u8>,
6956    },
6957    /// A file renamed on one side whose source was deleted on the other side.
6958    RenameDelete {
6959        /// The pre-rename source path.
6960        old_path: Vec<u8>,
6961        /// The side label that performed the rename.
6962        renamed_in: String,
6963        /// The side label that deleted the source.
6964        deleted_in: String,
6965    },
6966    /// A file collides with a directory at the same path in the merged result:
6967    /// the directory wins at the original path and the file is moved aside to
6968    /// `path~<branch>` (merge-ort's D/F conflict, `unique_path`). git emits
6969    /// `CONFLICT (file/directory): directory in the way of <old> from <branch>;
6970    /// moving it to <new> instead.`
6971    FileDirectory {
6972        /// The original (pre-move) path now occupied by the directory.
6973        original_path: Vec<u8>,
6974        /// The side label whose file was moved aside.
6975        moved_from: String,
6976    },
6977    /// A path was added/renamed under a directory the other side renamed, so the
6978    /// merge silently moved it into the renamed directory but, in
6979    /// `merge.directoryRenames=conflict` mode, flags it for the user to confirm.
6980    /// git emits `CONFLICT (file location): ... suggesting it should perhaps be
6981    /// moved to <new_path>.` The tree still contains the re-homed content.
6982    DirRenameLocation {
6983        /// The pre-re-home path (`old_path` in git's message): where the side
6984        /// placed the file before directory-rename detection moved it.
6985        old_path: Vec<u8>,
6986        /// `Some(source)` when the file was *renamed* into `old_path` by this
6987        /// side (git's "renamed to" wording, naming the original `source`);
6988        /// `None` when it was a fresh add (git's "added in" wording).
6989        renamed_from: Option<Vec<u8>>,
6990        /// The side label that added/renamed the file (`branch_with_new_path`).
6991        added_in: String,
6992        /// The side label that renamed the directory (`branch_with_dir_rename`).
6993        dir_renamed_in: String,
6994        /// True when the directory rename moved the file back onto its own base
6995        /// source path (rename-to-self) and the other side modified that path. The
6996        /// `CONFLICT (file location)` message is the same, but git records the
6997        /// path UNMERGED (stages 1/2/3) instead of staging the re-homed content
6998        /// cleanly: the index writers stage these 1/2/3, not at stage 0.
6999        back_to_self: bool,
7000    },
7001    /// A directory rename would have moved one or more paths onto this path, but
7002    /// it is already occupied (a file/dir in the way) or several sources map
7003    /// here. git emits `CONFLICT (implicit dir rename): Existing file/dir at
7004    /// <path> in the way of implicit directory rename(s) putting the following
7005    /// path(s) there: <sources>.` The path keeps its original content; the
7006    /// re-homed sources are left where they were.
7007    DirRenameImplicitCollision {
7008        /// The source path(s) the directory rename tried to move onto this path.
7009        sources: Vec<Vec<u8>>,
7010    },
7011    /// The two sides hold different object types at one path (regular↔symlink,
7012    /// regular↔gitlink, symlink↔gitlink). git's `process_entry` (merge-ort.c
7013    /// ~4220) renames each *regular-file* side to `path~<branch>` so each type
7014    /// can be recorded somewhere, ignoring `-Xours`/`-Xtheirs`, and emits a
7015    /// single `CONFLICT (distinct types)` line. (gitlink↔gitlink and
7016    /// symlink↔symlink share an `S_IFMT` and never reach this arm.) This kind is
7017    /// attached to the leaf that carries the message — the side left at
7018    /// `original_path` when only one side moved, else ours; the other renamed
7019    /// leaf carries [`DistinctTypesStage`].
7020    DistinctTypes {
7021        /// The original colliding path (git's message subject and sort key).
7022        original_path: Vec<u8>,
7023        /// `Some(p)` when ours was renamed aside to `p`; `None` when ours stayed
7024        /// at `original_path`.
7025        ours_renamed: Option<Vec<u8>>,
7026        /// `Some(p)` when theirs was renamed aside to `p`; `None` when theirs
7027        /// stayed at `original_path`.
7028        theirs_renamed: Option<Vec<u8>>,
7029    },
7030    /// The non-message-carrying leaf of a [`DistinctTypes`] conflict. The
7031    /// user-facing line is emitted once by the [`DistinctTypes`] leaf.
7032    DistinctTypesStage,
7033}
7034
7035/// One resolved/conflicted path in the merged tree.
7036#[derive(Debug, Clone)]
7037pub struct MergedPath {
7038    /// Destination path in the merged tree.
7039    pub path: Vec<u8>,
7040    /// The per-stage (1=base, 2=ours, 3=theirs) entries when conflicted; all
7041    /// `None` for a clean resolution.
7042    pub stages: MergeStages,
7043    /// `Some((mode, oid))` is the final leaf written to the merged tree; `None`
7044    /// means the path is absent in the result (a clean delete).
7045    pub result: Option<(u32, ObjectId)>,
7046    /// When conflicted, the worktree bytes + mode to materialize (content with
7047    /// conflict markers, or the surviving side's bytes). `None` for a clean
7048    /// path.
7049    pub worktree: Option<(u32, Vec<u8>)>,
7050    /// `Some(..)` exactly when this path conflicted.
7051    pub conflict: Option<MergeConflictKind>,
7052    /// True when this path went through a textual 3-way content merge (both
7053    /// sides diverged and both were mergeable files). Drives the "Auto-merging
7054    /// <path>" informational message, which `git merge-tree` emits for every
7055    /// such path — clean or conflicted.
7056    pub auto_merged: bool,
7057}
7058
7059impl MergedPath {
7060    /// True when this path resolved cleanly (no conflict recorded).
7061    pub fn is_clean(&self) -> bool {
7062        self.conflict.is_none()
7063    }
7064}
7065
7066/// Per-stage higher-order index entries for a conflicted path.
7067#[derive(Debug, Clone, Default)]
7068pub struct MergeStages {
7069    pub base: Option<(u32, ObjectId)>,
7070    pub ours: Option<(u32, ObjectId)>,
7071    pub theirs: Option<(u32, ObjectId)>,
7072}
7073
7074/// The outcome of a 3-way tree merge: the merged top-level tree plus per-path
7075/// detail and a clean/conflicted flag.
7076#[derive(Debug, Clone)]
7077pub struct MergeTreesResult {
7078    /// Object id of the merged top-level tree (always written, even on
7079    /// conflict — conflicted blobs go in with their marker content).
7080    pub tree: ObjectId,
7081    /// Per-path results, sorted by path.
7082    pub paths: Vec<MergedPath>,
7083    /// False if any path conflicted.
7084    pub clean: bool,
7085    /// Original paths removed by rename or directory-rename rewrites. These are
7086    /// cleanup-only paths for porcelains materializing a conflicted merge; they
7087    /// are absent from the merged tree.
7088    pub cleanup_paths: Vec<Vec<u8>>,
7089    /// Non-conflict informational messages produced while detecting renames.
7090    pub info_messages: Vec<MergeInfoMessage>,
7091}
7092
7093impl MergeTreesResult {
7094    /// Iterate over the paths that conflicted, in path order.
7095    pub fn conflicts(&self) -> impl Iterator<Item = &MergedPath> {
7096        self.paths.iter().filter(|entry| entry.conflict.is_some())
7097    }
7098}
7099
7100/// Non-conflict merge information that porcelain commands may print.
7101#[derive(Debug, Clone, PartialEq, Eq)]
7102pub enum MergeInfoMessage {
7103    /// A directory rename was skipped because the suggested target directory was
7104    /// itself renamed away on this side.
7105    DirRenameSkippedDueToRerename {
7106        old_dir: Vec<u8>,
7107        path: Vec<u8>,
7108        new_dir: Vec<u8>,
7109    },
7110    /// A path was updated due to a directory rename in
7111    /// `merge.directoryRenames=true` mode.
7112    DirRenameApplied {
7113        old_path: Vec<u8>,
7114        new_path: Vec<u8>,
7115        renamed_from: Option<Vec<u8>>,
7116        added_in: String,
7117        dir_renamed_in: String,
7118    },
7119    /// A directory-rename location conflict that overlaps another conflict at
7120    /// the same final path, such as a content conflict. The path's primary
7121    /// conflict kind remains attached to the path; this carries git's extra
7122    /// `CONFLICT (file location)` line.
7123    DirRenameLocationConflict {
7124        old_path: Vec<u8>,
7125        new_path: Vec<u8>,
7126        renamed_from: Option<Vec<u8>>,
7127        added_in: String,
7128        dir_renamed_in: String,
7129    },
7130    /// A rename/delete conflict whose conflicted destination was later moved
7131    /// aside by directory/file conflict handling. The primary per-path conflict
7132    /// remains `FileDirectory`; this preserves git's extra rename/delete line.
7133    RenameDeleteConflict {
7134        old_path: Vec<u8>,
7135        new_path: Vec<u8>,
7136        renamed_in: String,
7137        deleted_in: String,
7138    },
7139}
7140
7141/// Read a tree object (by oid) into a flattened path -> (mode, oid) map,
7142/// descending into subtrees. The canonical empty tree yields an empty map.
7143pub fn flatten_tree(
7144    reader: &impl ObjectReader,
7145    format: ObjectFormat,
7146    tree_oid: &ObjectId,
7147) -> Result<MergeEntryMap> {
7148    let mut entries = BTreeMap::new();
7149    if *tree_oid == empty_tree_oid(format)? {
7150        return Ok(entries);
7151    }
7152    collect_flat_tree(reader, format, tree_oid, Vec::new(), &mut entries)?;
7153    Ok(entries)
7154}
7155
7156fn collect_flat_tree(
7157    reader: &impl ObjectReader,
7158    format: ObjectFormat,
7159    tree_oid: &ObjectId,
7160    prefix: Vec<u8>,
7161    entries: &mut MergeEntryMap,
7162) -> Result<()> {
7163    let object = reader.read_object(tree_oid)?;
7164    if object.object_type != ObjectType::Tree {
7165        return Err(GitError::InvalidObject(format!(
7166            "expected tree {}, found {}",
7167            tree_oid,
7168            object.object_type.as_str()
7169        )));
7170    }
7171    for entry in TreeEntries::new(format, &object.body) {
7172        let entry = entry?;
7173        let mut path = prefix.clone();
7174        if !path.is_empty() {
7175            path.push(b'/');
7176        }
7177        path.extend_from_slice(entry.name);
7178        if entry.mode == 0o040000 {
7179            collect_flat_tree(reader, format, &entry.oid, path, entries)?;
7180        } else {
7181            entries.insert(path, (entry.mode, entry.oid));
7182        }
7183    }
7184    Ok(())
7185}
7186
7187/// True for a plain file blob (regular or executable) — i.e. a mode whose
7188/// content can be textually 3-way merged. Symlinks and gitlinks are excluded.
7189pub fn is_mergeable_file_mode(mode: u32) -> bool {
7190    mode == 0o100644 || mode == 0o100755
7191}
7192
7193/// 3-way merge of three trees into a single merged tree.
7194///
7195/// `base` is the common-ancestor tree (`None` for unrelated histories — every
7196/// path is then treated as added on both sides). `ours`/`theirs` are the two
7197/// sides. Cleanly-merged blob content and the resulting (sub)trees are written
7198/// to `db`; the returned [`MergeTreesResult`] carries the merged top-level tree
7199/// oid plus per-path detail.
7200///
7201/// This is the shared engine behind `git merge-tree --write-tree`, `git merge`,
7202/// `git cherry-pick`, and `git revert`. It is behaviour-preserving relative to
7203/// the per-command copies it replaced, and additionally resolves renames when
7204/// [`MergeTreesOptions::detect_renames`] is set.
7205pub fn merge_trees(
7206    db: &FileObjectDatabase,
7207    format: ObjectFormat,
7208    base: Option<&ObjectId>,
7209    ours: &ObjectId,
7210    theirs: &ObjectId,
7211    options: &MergeTreesOptions<'_>,
7212) -> Result<MergeTreesResult> {
7213    let base_map = match base {
7214        Some(tree) => flatten_tree(db, format, tree)?,
7215        None => MergeEntryMap::new(),
7216    };
7217    let ours_map = flatten_tree(db, format, ours)?;
7218    let theirs_map = flatten_tree(db, format, theirs)?;
7219    merge_entry_maps(db, format, &base_map, &ours_map, &theirs_map, options)
7220}
7221
7222/// [`merge_trees`] operating on already-flattened entry maps. The merge
7223/// porcelains often hold the flattened maps already (e.g. cherry-pick builds
7224/// `theirs` from a picked commit's tree), so this avoids re-reading them.
7225pub fn merge_entry_maps(
7226    db: &FileObjectDatabase,
7227    format: ObjectFormat,
7228    base_map: &MergeEntryMap,
7229    ours_map: &MergeEntryMap,
7230    theirs_map: &MergeEntryMap,
7231    options: &MergeTreesOptions<'_>,
7232) -> Result<MergeTreesResult> {
7233    // Rename-aware step: detect files renamed on exactly one side relative to
7234    // base, so a modification on the other side follows the rename. This is the
7235    // non-recursive merge-ort rename case. We compute a rewrite map that, for a
7236    // one-sided rename old->new, presents the *other* side's `old` content at
7237    // `new` (and drops `old`), letting the path-keyed core below do the 3-way
7238    // content merge at the destination.
7239    let (mut renames, side_renames) = if options.detect_renames {
7240        let (renames, ours_side, theirs_side) =
7241            detect_merge_renames(db, format, base_map, ours_map, theirs_map, options)?;
7242        (renames, Some((ours_side, theirs_side)))
7243    } else {
7244        (MergeRenames::default(), None)
7245    };
7246
7247    // Build the effective per-side maps with file renames applied.
7248    let (mut eff_base, mut eff_ours, mut eff_theirs) =
7249        apply_merge_renames(base_map, ours_map, theirs_map, &renames);
7250
7251    // Directory-rename detection: when one side renamed a whole directory and
7252    // the other side added a file under (or renamed a file into) the old
7253    // directory, re-home that path into the renamed directory — including
7254    // transitive renames (a file the other side renamed into a directory this
7255    // side renamed follows on into the final directory). This is the
7256    // merge.directoryRenames behaviour, applied as a rewrite of the rename/add
7257    // destination paths so every merged path consults directory renames.
7258    let mut dir_rename_dirty = false;
7259    let mut rehomed_paths: BTreeMap<Vec<u8>, RehomeSides> = BTreeMap::new();
7260    let mut dir_rename_two_to_one: Vec<DirRenameTwoToOne> = Vec::new();
7261    let mut dir_rename_collisions: Vec<DirRenameCollision> = Vec::new();
7262    let mut dir_rename_splits: BTreeSet<Vec<u8>> = BTreeSet::new();
7263    let mut dir_rename_back_to_self: BTreeSet<Vec<u8>> = BTreeSet::new();
7264    let mut info_messages = Vec::new();
7265    let mut cleanup_paths: BTreeSet<Vec<u8>> = renames
7266        .dest_to_source
7267        .values()
7268        .map(|rename| rename.source.clone())
7269        .collect();
7270    if options.directory_renames != DirectoryRenames::False
7271        && let Some((ours_side, theirs_side)) = &side_renames
7272    {
7273        let dir_renames = compute_directory_renames(ours_map, theirs_map, ours_side, theirs_side);
7274        let outcome = apply_directory_renames(
7275            base_map,
7276            &eff_base,
7277            &eff_ours,
7278            &eff_theirs,
7279            ours_side,
7280            theirs_side,
7281            &dir_renames,
7282            &renames.dest_to_source,
7283        );
7284        eff_base = outcome.base;
7285        eff_ours = outcome.ours;
7286        eff_theirs = outcome.theirs;
7287        rehomed_paths = outcome.rehomed;
7288        dir_rename_collisions = outcome.collisions;
7289        dir_rename_splits = outcome.splits;
7290        dir_rename_back_to_self = outcome.back_to_self;
7291        info_messages = outcome.info_messages;
7292        dir_rename_dirty = outcome.dirty;
7293        remap_rename_destinations(&mut renames, &rehomed_paths);
7294        drop_collapsed_rename_rename_conflicts(&mut renames);
7295        dir_rename_two_to_one = collect_dir_rename_two_to_one(&renames, &rehomed_paths);
7296    }
7297    for info in rehomed_paths
7298        .values()
7299        .flat_map(|sides| [&sides.ours, &sides.theirs])
7300        .flatten()
7301    {
7302        cleanup_paths.insert(info.old_path.clone());
7303    }
7304    if options.directory_renames == DirectoryRenames::True {
7305        for (dest, sides) in &rehomed_paths {
7306            for info in [&sides.ours, &sides.theirs].into_iter().flatten() {
7307                let (added_in, dir_renamed_in) = if info.added_on_ours {
7308                    (
7309                        options.ours_label.to_string(),
7310                        options.theirs_label.to_string(),
7311                    )
7312                } else {
7313                    (
7314                        options.theirs_label.to_string(),
7315                        options.ours_label.to_string(),
7316                    )
7317                };
7318                info_messages.push(MergeInfoMessage::DirRenameApplied {
7319                    old_path: info.old_path.clone(),
7320                    new_path: dest.clone(),
7321                    renamed_from: info.renamed_from.clone(),
7322                    added_in,
7323                    dir_renamed_in,
7324                });
7325            }
7326        }
7327    }
7328    // In =conflict mode, every re-homed path is reported as a location conflict
7329    // (the tree still gets the re-homed content, but the merge is marked dirty).
7330    let dir_rename_conflict_paths: BTreeMap<Vec<u8>, RehomeSides> =
7331        if options.directory_renames == DirectoryRenames::Conflict {
7332            rehomed_paths.clone()
7333        } else {
7334            BTreeMap::new()
7335        };
7336
7337    let mut all_paths = BTreeSet::new();
7338    all_paths.extend(eff_base.keys().cloned());
7339    all_paths.extend(eff_ours.keys().cloned());
7340    all_paths.extend(eff_theirs.keys().cloned());
7341
7342    let mut paths: Vec<MergedPath> = Vec::new();
7343    let mut leaves: MergeEntryMap = BTreeMap::new();
7344    let mut clean = true;
7345
7346    for path in all_paths {
7347        let base = eff_base.get(&path).cloned();
7348        let ours = eff_ours.get(&path).cloned();
7349        let theirs = eff_theirs.get(&path).cloned();
7350        let rename = renames.dest_to_source.get(&path);
7351        let old_path = rename.map(|r| r.source.clone());
7352
7353        // Trivial resolutions (identical to the historical per-command logic).
7354        if ours == theirs {
7355            if let Some(entry) = ours {
7356                leaves.insert(path.clone(), entry);
7357            }
7358            paths.push(clean_path(path, ours));
7359            continue;
7360        }
7361        if ours == base {
7362            if let Some(entry) = &theirs {
7363                leaves.insert(path.clone(), *entry);
7364            }
7365            paths.push(clean_path(path, theirs));
7366            continue;
7367        }
7368        if theirs == base {
7369            if let Some(entry) = &ours {
7370                leaves.insert(path.clone(), *entry);
7371            }
7372            paths.push(clean_path(path, ours));
7373            continue;
7374        }
7375
7376        // Both sides diverged. Decide how to combine.
7377        let content_mergeable = matches!(&ours, Some((mode, _)) if is_mergeable_file_mode(*mode))
7378            && matches!(&theirs, Some((mode, _)) if is_mergeable_file_mode(*mode))
7379            && match &base {
7380                Some((mode, _)) => is_mergeable_file_mode(*mode),
7381                None => true,
7382            };
7383
7384        if let (true, Some((ours_mode, ours_oid)), Some((theirs_mode, theirs_oid))) =
7385            (content_mergeable, &ours, &theirs)
7386        {
7387            let add_add = base.is_none();
7388            let base_bytes = match &base {
7389                Some((_, oid)) => merge_blob_bytes(db, oid)?,
7390                None => Vec::new(),
7391            };
7392            let ours_bytes = merge_blob_bytes(db, ours_oid)?;
7393            let theirs_bytes = merge_blob_bytes(db, theirs_oid)?;
7394            // When this destination came from a one-sided rename, git qualifies
7395            // the conflict-marker labels with the per-side path (the renaming
7396            // side shows the new path, the other side the old path), e.g.
7397            // `<<<<<<< HEAD:old.txt` / `>>>>>>> feature:new.txt`.
7398            let rehome = rehomed_paths.get(&path);
7399            // git's `merge_3way` qualifies all three labels with their per-side
7400            // path (`<name>:<path>`) whenever the three paths are not identical —
7401            // pathnames[0] is the base/ancestor path (the rename source). When
7402            // they are identical (no rename), it uses the bare names.
7403            let (base_label_owned, ours_label, theirs_label) = match rename {
7404                Some(MergeRename { source, side }) => {
7405                    let (ours_path, theirs_path) = match side {
7406                        // theirs renamed -> ours kept the source path.
7407                        RenameSide::Theirs => (source.as_slice(), path.as_slice()),
7408                        // ours renamed -> theirs kept the source path.
7409                        RenameSide::Ours => (path.as_slice(), source.as_slice()),
7410                    };
7411                    (
7412                        qualify_label(options.ancestor_label, source.as_slice()),
7413                        qualify_label(options.ours_label, ours_path),
7414                        qualify_label(options.theirs_label, theirs_path),
7415                    )
7416                }
7417                None => {
7418                    let ours_path = rehome
7419                        .and_then(|info| info.ours.as_ref())
7420                        .map_or(path.as_slice(), |info| info.old_path.as_slice());
7421                    let theirs_path = rehome
7422                        .and_then(|info| info.theirs.as_ref())
7423                        .map_or(path.as_slice(), |info| info.old_path.as_slice());
7424                    if ours_path != path.as_slice() || theirs_path != path.as_slice() {
7425                        (
7426                            qualify_label(options.ancestor_label, path.as_slice()),
7427                            qualify_label(options.ours_label, ours_path),
7428                            qualify_label(options.theirs_label, theirs_path),
7429                        )
7430                    } else {
7431                        (
7432                            options.ancestor_label.to_string(),
7433                            options.ours_label.to_string(),
7434                            options.theirs_label.to_string(),
7435                        )
7436                    }
7437                }
7438            };
7439            let result = merge_blobs(
7440                &base_bytes,
7441                &ours_bytes,
7442                &theirs_bytes,
7443                &MergeBlobOptions {
7444                    ours_label: &ours_label,
7445                    theirs_label: &theirs_label,
7446                    base_label: &base_label_owned,
7447                    style: options.style,
7448                    favor: options.favor,
7449                    ws_ignore: options.ws_ignore,
7450                },
7451            );
7452
7453            let base_mode = base.as_ref().map(|(mode, _)| *mode);
7454            let (resolved_mode, mode_conflict) =
7455                merge_file_modes(base_mode, *ours_mode, *theirs_mode);
7456
7457            if !result.conflicted && !mode_conflict {
7458                let oid = db.write_object(EncodedObject::new(ObjectType::Blob, result.content))?;
7459                leaves.insert(path.clone(), (resolved_mode, oid));
7460                paths.push(clean_path_auto(path, Some((resolved_mode, oid)), true));
7461            } else if options.favor != MergeFavor::None && !mode_conflict {
7462                let chosen = if options.favor == MergeFavor::Ours {
7463                    ours
7464                } else {
7465                    theirs
7466                };
7467                if let Some(entry) = chosen {
7468                    leaves.insert(path.clone(), entry);
7469                }
7470                paths.push(clean_path_auto(path, chosen, true));
7471            } else {
7472                clean = false;
7473                let oid =
7474                    db.write_object(EncodedObject::new(ObjectType::Blob, result.content.clone()))?;
7475                leaves.insert(path.clone(), (resolved_mode, oid));
7476                let worktree_mode = if *ours_mode == *theirs_mode {
7477                    *ours_mode
7478                } else {
7479                    0o100644
7480                };
7481                let conflict = if let Some(old) = &old_path {
7482                    MergeConflictKind::RenameContent {
7483                        old_path: old.clone(),
7484                    }
7485                } else if add_add {
7486                    match rehome.and_then(|info| Some((info.ours.as_ref()?, info.theirs.as_ref()?)))
7487                    {
7488                        Some((ours_info, theirs_info)) => MergeConflictKind::RenameRenameTwoToOne {
7489                            ours_path: ours_info.old_path.clone(),
7490                            theirs_path: theirs_info.old_path.clone(),
7491                        },
7492                        None => MergeConflictKind::Content { add_add },
7493                    }
7494                } else {
7495                    MergeConflictKind::Content { add_add }
7496                };
7497                paths.push(MergedPath {
7498                    path: path.clone(),
7499                    stages: stages_for(&base, &ours, &theirs),
7500                    result: Some((resolved_mode, oid)),
7501                    worktree: Some((worktree_mode, result.content)),
7502                    conflict: Some(conflict),
7503                    auto_merged: true,
7504                });
7505            }
7506        } else if base.is_some() && (ours.is_none() || theirs.is_none()) {
7507            // modify/delete.
7508            clean = false;
7509            let (deleted_in, modified_in, surviving) = if ours.is_none() {
7510                (
7511                    options.ours_label.to_string(),
7512                    options.theirs_label.to_string(),
7513                    theirs,
7514                )
7515            } else {
7516                (
7517                    options.theirs_label.to_string(),
7518                    options.ours_label.to_string(),
7519                    ours,
7520                )
7521            };
7522            let worktree = match &surviving {
7523                Some((mode, oid)) => Some((*mode, merge_worktree_bytes(db, *mode, oid)?)),
7524                None => None,
7525            };
7526            if let Some(entry) = surviving {
7527                leaves.insert(path.clone(), entry);
7528            }
7529            paths.push(MergedPath {
7530                path: path.clone(),
7531                stages: stages_for(&base, &ours, &theirs),
7532                result: surviving,
7533                worktree,
7534                conflict: Some(MergeConflictKind::ModifyDelete {
7535                    deleted_in,
7536                    modified_in,
7537                }),
7538                auto_merged: false,
7539            });
7540        } else if let (Some(&(ours_mode, ours_oid)), Some(&(theirs_mode, theirs_oid))) =
7541            (ours.as_ref(), theirs.as_ref())
7542            && sley_index::is_symlink_mode(ours_mode)
7543            && sley_index::is_symlink_mode(theirs_mode)
7544        {
7545            // Both sides are symlinks that diverged from the base and from each
7546            // other (the trivial oid resolutions above already took the agreeing
7547            // cases). A symlink is never textually merged; git's
7548            // `handle_content_merge` symlink arm (merge-ort.c) resolves CLEAN to
7549            // a side under `-Xours`/`-Xtheirs`, and otherwise records a CONFLICT
7550            // carrying ours' target.
7551            match options.favor {
7552                MergeFavor::Ours => {
7553                    leaves.insert(path.clone(), (ours_mode, ours_oid));
7554                    paths.push(clean_path_auto(
7555                        path.clone(),
7556                        Some((ours_mode, ours_oid)),
7557                        false,
7558                    ));
7559                }
7560                MergeFavor::Theirs => {
7561                    leaves.insert(path.clone(), (theirs_mode, theirs_oid));
7562                    paths.push(clean_path_auto(
7563                        path.clone(),
7564                        Some((theirs_mode, theirs_oid)),
7565                        false,
7566                    ));
7567                }
7568                MergeFavor::None | MergeFavor::Union => {
7569                    clean = false;
7570                    leaves.insert(path.clone(), (ours_mode, ours_oid));
7571                    let worktree =
7572                        Some((ours_mode, merge_worktree_bytes(db, ours_mode, &ours_oid)?));
7573                    paths.push(MergedPath {
7574                        path: path.clone(),
7575                        stages: stages_for(&base, &ours, &theirs),
7576                        result: Some((ours_mode, ours_oid)),
7577                        worktree,
7578                        conflict: Some(MergeConflictKind::Content {
7579                            add_add: base.is_none(),
7580                        }),
7581                        auto_merged: false,
7582                    });
7583                }
7584            }
7585        } else if let (Some((ours_mode, ours_oid)), Some((theirs_mode, theirs_oid))) =
7586            (ours, theirs)
7587            && is_type_change(ours_mode, theirs_mode)
7588        {
7589            // Distinct types at one path: both sides present with different
7590            // `S_IFMT` (regular↔symlink, regular↔gitlink, symlink↔gitlink).
7591            // Mirror merge-ort's `process_entry`: rename each regular-file side
7592            // to `path~<branch>` so each type is recorded somewhere; ignore
7593            // `-Xours`/`-Xtheirs`. gitlink↔gitlink and symlink↔symlink share an
7594            // `S_IFMT` and are handled by the arms above.
7595            clean = false;
7596            // git renames the regular-file side(s): only the regular side when
7597            // exactly one is regular, both when neither is (symlink↔gitlink).
7598            let (rename_ours, rename_theirs) = if is_mergeable_file_mode(ours_mode) {
7599                (true, false)
7600            } else if is_mergeable_file_mode(theirs_mode) {
7601                (false, true)
7602            } else {
7603                (true, true)
7604            };
7605            // git keeps the base stage (index stage 1) for a side only when that
7606            // side shares the base's file type.
7607            let ours_base = base.filter(|(mode, _)| !is_type_change(*mode, ours_mode));
7608            let theirs_base = base.filter(|(mode, _)| !is_type_change(*mode, theirs_mode));
7609            // Name and reserve ours' aside-path first so the two renamed paths
7610            // can never collide (`unique_df_path` consults `leaves`/`paths`).
7611            let ours_path = if rename_ours {
7612                unique_df_path(&path, options.ours_label, &leaves, &paths)
7613            } else {
7614                path.clone()
7615            };
7616            leaves.insert(ours_path.clone(), (ours_mode, ours_oid));
7617            let theirs_path = if rename_theirs {
7618                unique_df_path(&path, options.theirs_label, &leaves, &paths)
7619            } else {
7620                path.clone()
7621            };
7622            leaves.insert(theirs_path.clone(), (theirs_mode, theirs_oid));
7623
7624            // The message is emitted once, by the leaf left at `original_path`
7625            // when only one side moved (matching git's keying), else by ours.
7626            let ours_carries_message = !rename_ours || rename_theirs;
7627            let distinct = MergeConflictKind::DistinctTypes {
7628                original_path: path.clone(),
7629                ours_renamed: rename_ours.then(|| ours_path.clone()),
7630                theirs_renamed: rename_theirs.then(|| theirs_path.clone()),
7631            };
7632            let ours_worktree = Some((ours_mode, merge_worktree_bytes(db, ours_mode, &ours_oid)?));
7633            paths.push(MergedPath {
7634                path: ours_path,
7635                stages: MergeStages {
7636                    base: ours_base,
7637                    ours: Some((ours_mode, ours_oid)),
7638                    theirs: None,
7639                },
7640                result: Some((ours_mode, ours_oid)),
7641                worktree: ours_worktree,
7642                conflict: Some(if ours_carries_message {
7643                    distinct.clone()
7644                } else {
7645                    MergeConflictKind::DistinctTypesStage
7646                }),
7647                auto_merged: false,
7648            });
7649            let theirs_worktree = Some((
7650                theirs_mode,
7651                merge_worktree_bytes(db, theirs_mode, &theirs_oid)?,
7652            ));
7653            paths.push(MergedPath {
7654                path: theirs_path,
7655                stages: MergeStages {
7656                    base: theirs_base,
7657                    ours: None,
7658                    theirs: Some((theirs_mode, theirs_oid)),
7659                },
7660                result: Some((theirs_mode, theirs_oid)),
7661                worktree: theirs_worktree,
7662                conflict: Some(if ours_carries_message {
7663                    MergeConflictKind::DistinctTypesStage
7664                } else {
7665                    distinct
7666                }),
7667                auto_merged: false,
7668            });
7669        } else {
7670            // add/add of non-files, mode changes on same-type entries, etc. Keep
7671            // the surviving side's content and record a generic content conflict.
7672            clean = false;
7673            let add_add = base.is_none();
7674            let surviving = ours.or(theirs);
7675            let worktree = match &surviving {
7676                Some((mode, oid)) => Some((*mode, merge_worktree_bytes(db, *mode, oid)?)),
7677                None => None,
7678            };
7679            if let Some(entry) = surviving {
7680                leaves.insert(path.clone(), entry);
7681            }
7682            paths.push(MergedPath {
7683                path: path.clone(),
7684                stages: stages_for(&base, &ours, &theirs),
7685                result: surviving,
7686                worktree,
7687                conflict: Some(MergeConflictKind::Content { add_add }),
7688                auto_merged: false,
7689            });
7690        }
7691    }
7692
7693    if !renames.rename_rename_one_to_two.is_empty() {
7694        apply_rename_rename_one_to_two_conflicts(
7695            db,
7696            base_map,
7697            &eff_ours,
7698            &eff_theirs,
7699            &renames.rename_rename_one_to_two,
7700            &mut paths,
7701            &mut leaves,
7702            options,
7703        )?;
7704        clean = false;
7705    }
7706
7707    if !dir_rename_two_to_one.is_empty() {
7708        apply_dir_rename_two_to_one_conflicts(
7709            db,
7710            &eff_ours,
7711            &eff_theirs,
7712            &dir_rename_two_to_one,
7713            &mut paths,
7714            &mut leaves,
7715            options,
7716        )?;
7717        clean = false;
7718    }
7719
7720    // Rename/rename(2to1) and rename/add: two distinct contents collide on one
7721    // destination (and the rename source(s) are consumed). Detected from the full
7722    // per-side rename sets, applied here so the destination carries both sides'
7723    // content-merged stages instead of the path-keyed core's raw add/add.
7724    if !renames.rename_rename_two_to_one.is_empty() || !renames.rename_adds.is_empty() {
7725        apply_rename_two_to_one_and_add_conflicts(
7726            db,
7727            base_map,
7728            ours_map,
7729            theirs_map,
7730            &renames,
7731            &mut paths,
7732            &mut leaves,
7733            options,
7734        )?;
7735        clean = false;
7736    }
7737
7738    // Rename/delete conflicts: a file renamed on one side whose source the other
7739    // side deleted. The merge core resolved the destination cleanly (only the
7740    // renaming side has it), but git flags this as a conflict — keep the renamed
7741    // content in the tree, record higher-order stages, and mark the merge dirty.
7742    if !renames.rename_deletes.is_empty() {
7743        for (dest, rd) in &renames.rename_deletes {
7744            // Skip if another conflict already claimed this destination.
7745            let Some(slot) = paths.iter_mut().find(|p| &p.path == dest) else {
7746                continue;
7747            };
7748            if slot.conflict.is_some() {
7749                continue;
7750            }
7751            let base_entry = base_map.get(&rd.source).copied();
7752            let renamed_entry = slot.result;
7753            // The renamed content sits on the renaming side; the deleting side
7754            // contributes no stage at the destination.
7755            let (ours_stage, theirs_stage) = match rd.side {
7756                RenameSide::Ours => (renamed_entry, None),
7757                RenameSide::Theirs => (None, renamed_entry),
7758            };
7759            let (renamed_in, deleted_in) = match rd.side {
7760                RenameSide::Ours => (
7761                    options.ours_label.to_string(),
7762                    options.theirs_label.to_string(),
7763                ),
7764                RenameSide::Theirs => (
7765                    options.theirs_label.to_string(),
7766                    options.ours_label.to_string(),
7767                ),
7768            };
7769            let worktree = match &renamed_entry {
7770                Some((mode, oid)) => Some((*mode, merge_worktree_bytes(db, *mode, oid)?)),
7771                None => None,
7772            };
7773            slot.stages = MergeStages {
7774                base: base_entry,
7775                ours: ours_stage,
7776                theirs: theirs_stage,
7777            };
7778            slot.worktree = worktree;
7779            slot.conflict = Some(MergeConflictKind::RenameDelete {
7780                old_path: rd.source.clone(),
7781                renamed_in,
7782                deleted_in,
7783            });
7784            clean = false;
7785        }
7786    }
7787
7788    // Directory-rename outcomes that make the merge dirty. A collision/split
7789    // detected while re-homing (two paths onto one destination, an ambiguous
7790    // split source, or a file in the way) marks the merge unclean regardless of
7791    // mode. In =conflict mode, every silently re-homed path is *also* reported
7792    // as a location conflict: the tree keeps the re-homed content but git wants
7793    // the user to confirm the suggested move.
7794    if dir_rename_dirty {
7795        clean = false;
7796    }
7797    // Implicit-directory-rename collisions (a directory rename would put a path
7798    // onto an existing file/dir, or N paths onto one destination). git emits
7799    // `CONFLICT (implicit dir rename): Existing file/dir at <dest> in the way ...`
7800    // regardless of mode, and the merge is unclean. Attach the conflict to the
7801    // blocked destination path (which keeps its original content).
7802    for collision in &dir_rename_collisions {
7803        clean = false;
7804        if let Some(slot) = paths.iter_mut().find(|p| p.path == collision.dest)
7805            && slot.conflict.is_none()
7806        {
7807            slot.conflict = Some(MergeConflictKind::DirRenameImplicitCollision {
7808                sources: collision.sources.clone(),
7809            });
7810        } else if !paths.iter().any(|p| p.path == collision.dest) {
7811            paths.push(MergedPath {
7812                path: collision.dest.clone(),
7813                stages: MergeStages::default(),
7814                result: None,
7815                worktree: None,
7816                conflict: Some(MergeConflictKind::DirRenameImplicitCollision {
7817                    sources: collision.sources.clone(),
7818                }),
7819                auto_merged: false,
7820            });
7821        }
7822    }
7823    for source_dir in &dir_rename_splits {
7824        clean = false;
7825        paths.push(MergedPath {
7826            path: source_dir.clone(),
7827            stages: MergeStages::default(),
7828            result: None,
7829            worktree: None,
7830            conflict: Some(MergeConflictKind::DirRenameSplit {
7831                source_dir: source_dir.clone(),
7832            }),
7833            auto_merged: false,
7834        });
7835    }
7836    if !dir_rename_conflict_paths.is_empty() {
7837        clean = false;
7838        for (dest, infos) in &dir_rename_conflict_paths {
7839            for info in [&infos.ours, &infos.theirs].into_iter().flatten() {
7840                let (added_in, dir_renamed_in) = if info.added_on_ours {
7841                    // The path was added/renamed by ours, into a dir theirs renamed.
7842                    (
7843                        options.ours_label.to_string(),
7844                        options.theirs_label.to_string(),
7845                    )
7846                } else {
7847                    (
7848                        options.theirs_label.to_string(),
7849                        options.ours_label.to_string(),
7850                    )
7851                };
7852                // Rename-to-self via a directory rename (merge-ort 12i2): the
7853                // re-home landed the file back on its own base source path where
7854                // the other side modified it. git records this UNMERGED (UU) even
7855                // though the trivial 3-way at the destination resolves cleanly
7856                // (the renamed side's content equals base). Stage the three
7857                // versions so the index carries the conflict.
7858                let back_to_self = dir_rename_back_to_self.contains(dest);
7859                if let Some(slot) = paths.iter_mut().find(|p| &p.path == dest)
7860                    && slot.conflict.is_none()
7861                {
7862                    if back_to_self {
7863                        slot.stages = MergeStages {
7864                            base: eff_base.get(dest).copied(),
7865                            ours: eff_ours.get(dest).copied(),
7866                            theirs: eff_theirs.get(dest).copied(),
7867                        };
7868                        slot.worktree = match &slot.result {
7869                            Some((mode, oid)) => {
7870                                Some((*mode, merge_worktree_bytes(db, *mode, oid)?))
7871                            }
7872                            None => slot.worktree.clone(),
7873                        };
7874                    }
7875                    slot.conflict = Some(MergeConflictKind::DirRenameLocation {
7876                        old_path: info.old_path.clone(),
7877                        renamed_from: info.renamed_from.clone(),
7878                        added_in,
7879                        dir_renamed_in,
7880                        back_to_self,
7881                    });
7882                } else {
7883                    info_messages.push(MergeInfoMessage::DirRenameLocationConflict {
7884                        old_path: info.old_path.clone(),
7885                        new_path: dest.clone(),
7886                        renamed_from: info.renamed_from.clone(),
7887                        added_in,
7888                        dir_renamed_in,
7889                    });
7890                }
7891            }
7892        }
7893    }
7894
7895    // Directory/file (D/F) conflict resolution (merge-ort `process_entry`): a
7896    // path that ends up as a *file* in the merged result while another result
7897    // path lives *under* it (so the path is simultaneously a directory) cannot
7898    // coexist. git keeps the directory at the original path and moves the file
7899    // aside to `path~<branch>` via `unique_path`, where `<branch>` is the side
7900    // that contributed the file. We resolve this on the flattened `leaves` after
7901    // every per-path decision is made, so renames/dir-renames have settled first.
7902    resolve_directory_file_conflicts(
7903        db,
7904        &mut paths,
7905        &mut leaves,
7906        &mut clean,
7907        &eff_ours,
7908        &eff_theirs,
7909        options,
7910        &mut info_messages,
7911    )?;
7912
7913    let tree = write_merged_tree(db, &leaves)?;
7914
7915    cleanup_paths.retain(|path| !leaves.contains_key(path));
7916
7917    Ok(MergeTreesResult {
7918        tree,
7919        paths,
7920        clean,
7921        cleanup_paths: cleanup_paths.into_iter().collect(),
7922        info_messages,
7923    })
7924}
7925
7926/// Flatten a branch label the way git's `add_flattened_path` does for
7927/// `unique_path`: any `/` in the branch name becomes `_` so the synthesized
7928/// `path~branch` stays a single path component family.
7929fn flatten_branch_label(branch: &str) -> String {
7930    branch.replace('/', "_")
7931}
7932
7933/// Pick a `path~<branch>` name not already present in `leaves` (or claimed by an
7934/// existing `paths` entry), mirroring merge-ort's `unique_path`: start from
7935/// `path~branch`, then append `_0`, `_1`, … on collision.
7936fn unique_df_path(
7937    path: &[u8],
7938    branch: &str,
7939    leaves: &MergeEntryMap,
7940    paths: &[MergedPath],
7941) -> Vec<u8> {
7942    let mut base = path.to_vec();
7943    base.push(b'~');
7944    base.extend_from_slice(flatten_branch_label(branch).as_bytes());
7945    let taken = |candidate: &[u8]| {
7946        leaves.contains_key(candidate) || paths.iter().any(|p| p.path == candidate)
7947    };
7948    if !taken(&base) {
7949        return base;
7950    }
7951    let mut suffix = 0usize;
7952    loop {
7953        let mut candidate = base.clone();
7954        candidate.push(b'_');
7955        candidate.extend_from_slice(suffix.to_string().as_bytes());
7956        if !taken(&candidate) {
7957            return candidate;
7958        }
7959        suffix += 1;
7960    }
7961}
7962
7963/// Resolve directory/file collisions in the merged leaf set. For every file leaf
7964/// whose path is also a directory (some other leaf lives under `path/`), move the
7965/// file to `path~<branch>` and record a [`MergeConflictKind::FileDirectory`].
7966#[allow(clippy::too_many_arguments)]
7967fn resolve_directory_file_conflicts(
7968    db: &FileObjectDatabase,
7969    paths: &mut Vec<MergedPath>,
7970    leaves: &mut MergeEntryMap,
7971    clean: &mut bool,
7972    eff_ours: &MergeEntryMap,
7973    eff_theirs: &MergeEntryMap,
7974    options: &MergeTreesOptions<'_>,
7975    info_messages: &mut Vec<MergeInfoMessage>,
7976) -> Result<()> {
7977    // A path is a "directory" in the result iff some leaf key has it as a strict
7978    // `path/` prefix. Collect every such directory prefix once.
7979    let mut directory_prefixes: BTreeSet<Vec<u8>> = BTreeSet::new();
7980    for key in leaves.keys() {
7981        let mut idx = 0;
7982        while let Some(pos) = key[idx..].iter().position(|b| *b == b'/') {
7983            let end = idx + pos;
7984            directory_prefixes.insert(key[..end].to_vec());
7985            idx = end + 1;
7986        }
7987    }
7988    if directory_prefixes.is_empty() {
7989        return Ok(());
7990    }
7991
7992    // File leaves that collide with a directory of the same name.
7993    let colliding: Vec<Vec<u8>> = leaves
7994        .keys()
7995        .filter(|key| directory_prefixes.contains(*key))
7996        .cloned()
7997        .collect();
7998
7999    for original in colliding {
8000        let Some(entry) = leaves.remove(&original) else {
8001            continue;
8002        };
8003        // The moved-aside file must be materialized in the worktree at its new
8004        // path; read its blob bytes once so the porcelain has worktree content.
8005        let moved_bytes = merge_worktree_bytes(db, entry.0, &entry.1)?;
8006        // Which side contributed the file? git keys off `dirmask`: the file lives
8007        // on the side that is NOT the directory. We read it off the effective side
8008        // maps — whichever side has this path as a plain file. When only theirs has
8009        // it, use the theirs label; otherwise (ours has it, or both do) ours wins,
8010        // matching git's index-1 bias for the moved-aside name.
8011        let ours_has_file = eff_ours.contains_key(&original);
8012        let theirs_has_file = eff_theirs.contains_key(&original);
8013        let from_ours = ours_has_file || !theirs_has_file;
8014        let branch = if from_ours {
8015            options.ours_label
8016        } else {
8017            options.theirs_label
8018        };
8019        let new_path = unique_df_path(&original, branch, leaves, paths);
8020        leaves.insert(new_path.clone(), entry);
8021        *clean = false;
8022
8023        // Relocate the path's MergedPath: update its destination and stamp the D/F
8024        // conflict. If the path had no MergedPath (defensive), synthesize one.
8025        if let Some(slot) = paths.iter_mut().find(|p| p.path == original) {
8026            if let Some(MergeConflictKind::RenameDelete {
8027                old_path,
8028                renamed_in,
8029                deleted_in,
8030            }) = &slot.conflict
8031            {
8032                info_messages.push(MergeInfoMessage::RenameDeleteConflict {
8033                    old_path: old_path.clone(),
8034                    new_path: original.clone(),
8035                    renamed_in: renamed_in.clone(),
8036                    deleted_in: deleted_in.clone(),
8037                });
8038            }
8039            slot.path = new_path.clone();
8040            slot.result = Some(entry);
8041            // Preserve any pre-existing higher-order stages; a clean file leaf has
8042            // none, so seed ours/theirs from the effective maps for `ls-files -u`.
8043            if slot.stages.base.is_none()
8044                && slot.stages.ours.is_none()
8045                && slot.stages.theirs.is_none()
8046            {
8047                slot.stages = MergeStages {
8048                    base: None,
8049                    ours: if from_ours { Some(entry) } else { None },
8050                    theirs: if from_ours { None } else { Some(entry) },
8051                };
8052            }
8053            // Keep the slot's existing `auto_merged`: git only emits
8054            // `Auto-merging <new_path>` for the moved file when a real content
8055            // merge ran (a rename or both-sides change drives filemask>=6 through
8056            // handle_content_merge). A plain one-sided add (filemask 2/4) is moved
8057            // aside silently, so we must NOT force the flag on here.
8058            slot.worktree = Some((entry.0, moved_bytes));
8059            slot.conflict = Some(MergeConflictKind::FileDirectory {
8060                original_path: original.clone(),
8061                moved_from: branch.to_string(),
8062            });
8063        } else {
8064            paths.push(MergedPath {
8065                path: new_path.clone(),
8066                stages: MergeStages {
8067                    base: None,
8068                    ours: if from_ours { Some(entry) } else { None },
8069                    theirs: if from_ours { None } else { Some(entry) },
8070                },
8071                result: Some(entry),
8072                worktree: Some((entry.0, moved_bytes)),
8073                conflict: Some(MergeConflictKind::FileDirectory {
8074                    original_path: original.clone(),
8075                    moved_from: branch.to_string(),
8076                }),
8077                auto_merged: false,
8078            });
8079        }
8080    }
8081
8082    // Keep `paths` sorted by destination path (callers and tests assume order).
8083    paths.sort_by(|a, b| a.path.cmp(&b.path));
8084    Ok(())
8085}
8086
8087/// Construct a clean (non-conflicted) [`MergedPath`].
8088fn clean_path(path: Vec<u8>, result: Option<(u32, ObjectId)>) -> MergedPath {
8089    clean_path_auto(path, result, false)
8090}
8091
8092/// Like [`clean_path`] but records whether the path went through a textual
8093/// 3-way content merge (for the "Auto-merging" message).
8094fn clean_path_auto(
8095    path: Vec<u8>,
8096    result: Option<(u32, ObjectId)>,
8097    auto_merged: bool,
8098) -> MergedPath {
8099    MergedPath {
8100        path,
8101        stages: MergeStages::default(),
8102        result,
8103        worktree: None,
8104        conflict: None,
8105        auto_merged,
8106    }
8107}
8108
8109/// Snapshot the present stages for a conflicted path.
8110fn stages_for(
8111    base: &Option<(u32, ObjectId)>,
8112    ours: &Option<(u32, ObjectId)>,
8113    theirs: &Option<(u32, ObjectId)>,
8114) -> MergeStages {
8115    MergeStages {
8116        base: *base,
8117        ours: *ours,
8118        theirs: *theirs,
8119    }
8120}
8121
8122/// Read a blob's raw bytes, requiring it to be a blob object.
8123fn merge_blob_bytes(reader: &impl ObjectReader, oid: &ObjectId) -> Result<Vec<u8>> {
8124    let object = reader.read_object(oid)?;
8125    if object.object_type != ObjectType::Blob {
8126        return Err(GitError::InvalidObject(format!(
8127            "expected blob {}, found {}",
8128            oid,
8129            object.object_type.as_str()
8130        )));
8131    }
8132    Ok(object.body.clone())
8133}
8134
8135fn merge_worktree_bytes(reader: &impl ObjectReader, mode: u32, oid: &ObjectId) -> Result<Vec<u8>> {
8136    if sley_index::is_gitlink(mode) {
8137        Ok(Vec::new())
8138    } else {
8139        merge_blob_bytes(reader, oid)
8140    }
8141}
8142
8143/// 3-way merge of a file mode. Returns the resolved mode and whether the modes
8144/// conflict (both sides changed it to different non-base values).
8145fn merge_file_modes(base: Option<u32>, ours: u32, theirs: u32) -> (u32, bool) {
8146    if ours == theirs {
8147        return (ours, false);
8148    }
8149    match base {
8150        Some(base) if ours == base => (theirs, false),
8151        Some(base) if theirs == base => (ours, false),
8152        _ => (ours, true),
8153    }
8154}
8155
8156/// Build a top-level tree object from a flat map of `path -> (mode, oid)`
8157/// leaves, writing every (sub)tree object to `db`.
8158fn write_merged_tree(db: &FileObjectDatabase, leaves: &MergeEntryMap) -> Result<ObjectId> {
8159    let mut root = MergeTreeNode::default();
8160    for (path, (mode, oid)) in leaves {
8161        root.insert(path, *mode, *oid);
8162    }
8163    root.write(db)
8164}
8165
8166#[derive(Default)]
8167struct MergeTreeNode {
8168    blobs: BTreeMap<Vec<u8>, (u32, ObjectId)>,
8169    subtrees: BTreeMap<Vec<u8>, MergeTreeNode>,
8170}
8171
8172impl MergeTreeNode {
8173    fn insert(&mut self, path: &[u8], mode: u32, oid: ObjectId) {
8174        match path.iter().position(|byte| *byte == b'/') {
8175            Some(slash) => {
8176                let component = path[..slash].to_vec();
8177                let rest = &path[slash + 1..];
8178                self.subtrees
8179                    .entry(component)
8180                    .or_default()
8181                    .insert(rest, mode, oid);
8182            }
8183            None => {
8184                self.blobs.insert(path.to_vec(), (mode, oid));
8185            }
8186        }
8187    }
8188
8189    fn write(&self, db: &FileObjectDatabase) -> Result<ObjectId> {
8190        let mut entries: Vec<TreeEntry> = Vec::new();
8191        for (name, (mode, oid)) in &self.blobs {
8192            entries.push(TreeEntry {
8193                mode: *mode,
8194                name: BString::from(name.clone()),
8195                oid: *oid,
8196            });
8197        }
8198        for (name, subtree) in &self.subtrees {
8199            let oid = subtree.write(db)?;
8200            entries.push(TreeEntry {
8201                mode: 0o040000,
8202                name: BString::from(name.clone()),
8203                oid,
8204            });
8205        }
8206        entries.sort_by_key(merge_tree_sort_key);
8207        let tree = Tree { entries };
8208        db.write_object(EncodedObject::new(ObjectType::Tree, tree.write()))
8209    }
8210}
8211
8212fn merge_tree_sort_key(entry: &TreeEntry) -> Vec<u8> {
8213    let mut key = entry.name.as_bytes().to_vec();
8214    if entry.mode == 0o040000 {
8215        key.push(b'/');
8216    }
8217    key
8218}
8219
8220// --- Rename-aware non-recursive merge -------------------------------------
8221
8222/// Which side of the merge performed a rename.
8223#[derive(Clone, Copy, PartialEq, Eq)]
8224enum RenameSide {
8225    Ours,
8226    Theirs,
8227}
8228
8229/// One detected one-sided rename: its source path and which side renamed it.
8230#[derive(Clone)]
8231struct MergeRename {
8232    source: Vec<u8>,
8233    side: RenameSide,
8234}
8235
8236/// A file renamed on one side whose source was *deleted* on the other side — a
8237/// rename/delete conflict. git keeps the renamed content at the destination but
8238/// flags the merge as conflicted.
8239#[derive(Clone)]
8240struct RenameDelete {
8241    /// The pre-rename source path (deleted on the other side).
8242    source: Vec<u8>,
8243    /// Which side performed the rename (the other side deleted the source).
8244    side: RenameSide,
8245}
8246
8247/// The rename pairings discovered for one merge: which destination paths came
8248/// from which source path, and which side renamed (so the other side's change
8249/// can follow the rename and conflict labels can be path-qualified like git).
8250#[derive(Default)]
8251struct MergeRenames {
8252    /// One-sided renames keyed by *destination* path. Only renames where the
8253    /// OTHER side kept/modified the source in place are recorded (the case
8254    /// where the modification must follow the rename).
8255    dest_to_source: BTreeMap<Vec<u8>, MergeRename>,
8256    /// Rename/delete conflicts: a file renamed on one side whose source the
8257    /// other side deleted. Keyed by destination path.
8258    rename_deletes: BTreeMap<Vec<u8>, RenameDelete>,
8259    /// Rename/rename(1to2) conflicts keyed by source path.
8260    rename_rename_one_to_two: BTreeMap<Vec<u8>, RenameRenameOneToTwo>,
8261    /// Rename/rename(2to1) conflicts keyed by the shared *destination* path:
8262    /// ours renamed `ours_source`->dest and theirs renamed `theirs_source`->dest.
8263    rename_rename_two_to_one: BTreeMap<Vec<u8>, RenameRenameTwoToOne>,
8264    /// Rename/add conflicts keyed by *destination*: one side renamed a file to
8265    /// `dest` while the other side added a different file at the same `dest`.
8266    rename_adds: BTreeMap<Vec<u8>, RenameAdd>,
8267}
8268
8269#[derive(Clone)]
8270struct RenameRenameOneToTwo {
8271    ours_dest: Vec<u8>,
8272    theirs_dest: Vec<u8>,
8273}
8274
8275/// A rename/rename(2to1): two distinct sources renamed onto one destination, one
8276/// rename per side. Each side's content at the destination is the 3-way merge of
8277/// its rename (the other side's change to that source follows the rename).
8278#[derive(Clone)]
8279struct RenameRenameTwoToOne {
8280    /// The source ours renamed onto the destination.
8281    ours_source: Vec<u8>,
8282    /// The source theirs renamed onto the destination.
8283    theirs_source: Vec<u8>,
8284}
8285
8286/// A rename/add: one side renamed a file onto `dest`, the other side added an
8287/// unrelated file at `dest`. The renaming side's content is the 3-way merge of
8288/// its rename; the adding side contributes its added blob verbatim.
8289#[derive(Clone)]
8290struct RenameAdd {
8291    /// The pre-rename source path on the renaming side.
8292    source: Vec<u8>,
8293    /// Which side performed the rename (the other side added at `dest`).
8294    side: RenameSide,
8295}
8296
8297/// Every file rename observed on one side (base->side), as `(old, new)` pairs.
8298/// Unlike [`MergeRenames`] this is the *complete* rename set on a side — it is
8299/// the input to directory-rename inference, which needs to see all the per-file
8300/// moves between directories, not just the ones the other side kept in place.
8301struct SideRenames {
8302    pairs: Vec<(Vec<u8>, Vec<u8>)>,
8303}
8304
8305/// Detect one-sided renames usable for a non-recursive merge: a path present in
8306/// `base`, deleted on one side and present (renamed) at a new path on that same
8307/// side, while the OTHER side still has the original path (modified or
8308/// unchanged). Such a rename lets the other side's change move to the
8309/// destination.
8310///
8311/// Also returns the complete per-side rename set so the caller can infer
8312/// directory renames (which need every file move, not just the merge-relevant
8313/// ones).
8314fn detect_merge_renames(
8315    db: &FileObjectDatabase,
8316    format: ObjectFormat,
8317    base_map: &MergeEntryMap,
8318    ours_map: &MergeEntryMap,
8319    theirs_map: &MergeEntryMap,
8320    options: &MergeTreesOptions<'_>,
8321) -> Result<(MergeRenames, SideRenames, SideRenames)> {
8322    let mut renames = MergeRenames::default();
8323
8324    // Renames on ours: the other side that must carry its change is theirs.
8325    let ours_side = collect_side_renames(
8326        db,
8327        format,
8328        base_map,
8329        ours_map,
8330        theirs_map,
8331        RenameSide::Ours,
8332        options.rename_threshold,
8333        options.rename_limit,
8334        &mut renames,
8335    )?;
8336    // Renames on theirs: the other side that carries its change is ours.
8337    let theirs_side = collect_side_renames(
8338        db,
8339        format,
8340        base_map,
8341        theirs_map,
8342        ours_map,
8343        RenameSide::Theirs,
8344        options.rename_threshold,
8345        options.rename_limit,
8346        &mut renames,
8347    )?;
8348
8349    collect_rename_rename_one_to_two(&mut renames, &ours_side, &theirs_side);
8350    collect_rename_rename_two_to_one_and_adds(
8351        &mut renames,
8352        &ours_side,
8353        &theirs_side,
8354        base_map,
8355        ours_map,
8356        theirs_map,
8357    );
8358
8359    Ok((renames, ours_side, theirs_side))
8360}
8361
8362/// Detect rename/rename(2to1) and rename/add conflicts from the complete per-side
8363/// rename sets. Both arise when a one-sided rename's destination is *occupied* on
8364/// the other side (so [`collect_side_renames`] left it out of `dest_to_source`):
8365///
8366/// * 2to1 — both sides renamed (distinct sources) onto the same destination.
8367/// * rename/add — one side renamed onto a path the other side *added* fresh
8368///   (the destination is new to the other side, not a base path it kept and not
8369///   itself a rename destination on that side).
8370fn collect_rename_rename_two_to_one_and_adds(
8371    renames: &mut MergeRenames,
8372    ours_side: &SideRenames,
8373    theirs_side: &SideRenames,
8374    base_map: &MergeEntryMap,
8375    ours_map: &MergeEntryMap,
8376    theirs_map: &MergeEntryMap,
8377) {
8378    let ours_by_dest: BTreeMap<&[u8], &[u8]> = ours_side
8379        .pairs
8380        .iter()
8381        .map(|(old, new)| (new.as_slice(), old.as_slice()))
8382        .collect();
8383    let theirs_by_dest: BTreeMap<&[u8], &[u8]> = theirs_side
8384        .pairs
8385        .iter()
8386        .map(|(old, new)| (new.as_slice(), old.as_slice()))
8387        .collect();
8388
8389    // 2to1: a destination that is a rename target on BOTH sides from different
8390    // sources. (Same source on both sides is a rename/rename(1to1), handled by
8391    // the path-keyed core; same source to two dests is the 1to2 case above.)
8392    for (dest, ours_src) in &ours_by_dest {
8393        let Some(theirs_src) = theirs_by_dest.get(dest) else {
8394            continue;
8395        };
8396        if ours_src == theirs_src {
8397            continue;
8398        }
8399        // Don't disturb a destination the 1to2 pass already claimed.
8400        if renames.rename_rename_one_to_two.contains_key(*dest) {
8401            continue;
8402        }
8403        renames.rename_rename_two_to_one.insert(
8404            dest.to_vec(),
8405            RenameRenameTwoToOne {
8406                ours_source: ours_src.to_vec(),
8407                theirs_source: theirs_src.to_vec(),
8408            },
8409        );
8410    }
8411
8412    // rename/add on ours: ours renamed onto `dest`, which theirs added (present
8413    // on theirs, absent from base, and not a theirs rename target).
8414    for (dest, ours_src) in &ours_by_dest {
8415        if renames.rename_rename_two_to_one.contains_key(*dest)
8416            || renames.rename_rename_one_to_two.contains_key(*dest)
8417        {
8418            continue;
8419        }
8420        if theirs_map.contains_key(*dest)
8421            && !base_map.contains_key(*dest)
8422            && !theirs_by_dest.contains_key(dest)
8423        {
8424            renames.rename_adds.insert(
8425                dest.to_vec(),
8426                RenameAdd {
8427                    source: ours_src.to_vec(),
8428                    side: RenameSide::Ours,
8429                },
8430            );
8431        }
8432    }
8433    // rename/add on theirs: symmetric.
8434    for (dest, theirs_src) in &theirs_by_dest {
8435        if renames.rename_rename_two_to_one.contains_key(*dest)
8436            || renames.rename_rename_one_to_two.contains_key(*dest)
8437            || renames.rename_adds.contains_key(*dest)
8438        {
8439            continue;
8440        }
8441        if ours_map.contains_key(*dest)
8442            && !base_map.contains_key(*dest)
8443            && !ours_by_dest.contains_key(dest)
8444        {
8445            renames.rename_adds.insert(
8446                dest.to_vec(),
8447                RenameAdd {
8448                    source: theirs_src.to_vec(),
8449                    side: RenameSide::Theirs,
8450                },
8451            );
8452        }
8453    }
8454}
8455
8456fn collect_rename_rename_one_to_two(
8457    renames: &mut MergeRenames,
8458    ours_side: &SideRenames,
8459    theirs_side: &SideRenames,
8460) {
8461    let ours_by_source: BTreeMap<&[u8], &[u8]> = ours_side
8462        .pairs
8463        .iter()
8464        .map(|(old, new)| (old.as_slice(), new.as_slice()))
8465        .collect();
8466    for (old, theirs_new) in &theirs_side.pairs {
8467        let Some(ours_new) = ours_by_source.get(old.as_slice()) else {
8468            continue;
8469        };
8470        if *ours_new == theirs_new.as_slice() {
8471            continue;
8472        }
8473        renames.rename_deletes.remove(*ours_new);
8474        renames.rename_deletes.remove(theirs_new);
8475        renames.dest_to_source.remove(*ours_new);
8476        renames.dest_to_source.remove(theirs_new);
8477        renames.rename_rename_one_to_two.insert(
8478            old.clone(),
8479            RenameRenameOneToTwo {
8480                ours_dest: (*ours_new).to_vec(),
8481                theirs_dest: theirs_new.clone(),
8482            },
8483        );
8484    }
8485}
8486
8487/// Collect renames that occurred on `side` (relative to `base`). Records the
8488/// merge-relevant subset (renames the `other` side still references) into
8489/// `renames`, and returns the *complete* per-side rename set for directory-rename
8490/// inference. `db`/`format` resolve blob bytes for similarity scoring.
8491#[allow(clippy::too_many_arguments)]
8492fn collect_side_renames(
8493    db: &FileObjectDatabase,
8494    format: ObjectFormat,
8495    base_map: &MergeEntryMap,
8496    side_map: &MergeEntryMap,
8497    other_map: &MergeEntryMap,
8498    side: RenameSide,
8499    threshold: u8,
8500    rename_limit: usize,
8501    renames: &mut MergeRenames,
8502) -> Result<SideRenames> {
8503    // Diff base->side with inexact rename detection; the resulting `Renamed`
8504    // entries name (old_path -> new_path) pairs on this side.
8505    let base_tree = entry_map_as_tracked(base_map);
8506    let side_tree = entry_map_as_tracked(side_map);
8507    let options = RenameDetectionOptions {
8508        base: DiffNameStatusOptions {
8509            detect_renames: true,
8510            detect_copies: false,
8511            find_copies_harder: false,
8512            rename_empty: false,
8513        },
8514        detect_inexact: true,
8515        rename_threshold: threshold,
8516        copy_threshold: threshold,
8517        rename_limit,
8518    };
8519    let changes = diff_name_status_maps_with_renames(
8520        &base_tree,
8521        &side_tree,
8522        base_tree.keys().chain(side_tree.keys()),
8523        options,
8524        |oid| merge_blob_bytes(db, oid).ok(),
8525    )?;
8526
8527    let mut pairs = Vec::new();
8528    for change in changes {
8529        let NameStatus::Renamed(_) = change.status else {
8530            continue;
8531        };
8532        let Some(old_path) = change.old_path.as_ref() else {
8533            continue;
8534        };
8535        let old = old_path.as_bytes().to_vec();
8536        let new = change.path.as_bytes().to_vec();
8537        // Complete rename set, fed to directory-rename inference.
8538        pairs.push((old.clone(), new.clone()));
8539
8540        // Only act when the destination is genuinely new (not already present
8541        // in either side from a different origin) and the OTHER side still
8542        // references the source path — i.e. the other side modified/kept `old`,
8543        // and its change should follow the rename to `new`.
8544        if !other_map.contains_key(&old) {
8545            // The source path is gone on the other side. If it existed in base
8546            // (so the other side *deleted* it) and the other side did not also
8547            // produce `new`, this is a rename/delete conflict: this side renamed
8548            // the file, the other side deleted its source.
8549            if base_map.contains_key(&old) && !other_map.contains_key(&new) {
8550                renames
8551                    .rename_deletes
8552                    .entry(new.clone())
8553                    .or_insert(RenameDelete {
8554                        source: old.clone(),
8555                        side,
8556                    });
8557            }
8558            continue;
8559        }
8560        // If the other side ALSO renamed/created `new`, that is a rename/rename
8561        // or rename/add corner case we leave to the path-keyed core (stage-b).
8562        if other_map.contains_key(&new) {
8563            continue;
8564        }
8565        // Skip if both sides renamed the same source to the same dest (already
8566        // recorded) or to anything (first writer wins; the path-keyed core then
8567        // sees identical dest entries and resolves trivially).
8568        renames
8569            .dest_to_source
8570            .entry(new)
8571            .or_insert(MergeRename { source: old, side });
8572    }
8573
8574    let _ = format;
8575    Ok(SideRenames { pairs })
8576}
8577
8578/// Rewrite the three side maps so that each detected one-sided rename old->new
8579/// presents the OTHER side's `old` entry at `new`, and removes `old` from
8580/// every side. The path-keyed merge core then performs the 3-way content merge
8581/// at `new` with base=base[old], one side = the renaming side's new content,
8582/// the other side = the modifying side's old content.
8583fn apply_merge_renames(
8584    base_map: &MergeEntryMap,
8585    ours_map: &MergeEntryMap,
8586    theirs_map: &MergeEntryMap,
8587    renames: &MergeRenames,
8588) -> (MergeEntryMap, MergeEntryMap, MergeEntryMap) {
8589    if renames.dest_to_source.is_empty() {
8590        return (base_map.clone(), ours_map.clone(), theirs_map.clone());
8591    }
8592    let mut base = base_map.clone();
8593    let mut ours = ours_map.clone();
8594    let mut theirs = theirs_map.clone();
8595
8596    for (new, rename) in &renames.dest_to_source {
8597        let old = &rename.source;
8598        // Move base[old] to base[new] so the destination has a proper ancestor.
8599        if let Some(entry) = base.remove(old) {
8600            base.entry(new.clone()).or_insert(entry);
8601        }
8602        // For each side, if it still has `old`, move that entry to `new`.
8603        for side in [&mut ours, &mut theirs] {
8604            if let Some(entry) = side.remove(old) {
8605                side.entry(new.clone()).or_insert(entry);
8606            }
8607        }
8608    }
8609    (base, ours, theirs)
8610}
8611
8612// --- Directory-rename detection -------------------------------------------
8613
8614/// The parent directory of `path`, or `None` for a top-level path.
8615fn parent_dir(path: &[u8]) -> Option<&[u8]> {
8616    path.iter().rposition(|b| *b == b'/').map(|i| &path[..i])
8617}
8618
8619/// Apply a directory rename `old_dir -> new_dir` to `path` (which must live
8620/// under `old_dir`). E.g. `old_dir=z`, `new_dir=y`, `path=z/d` -> `y/d`; an
8621/// empty `new_dir` (rename into the repo root) drops the directory prefix.
8622fn apply_dir_rename(old_dir: &[u8], new_dir: &[u8], path: &[u8]) -> Vec<u8> {
8623    // The portion of `path` after `old_dir/` (handle root-target by stepping
8624    // past the separator, exactly as git's apply_dir_rename does).
8625    let rest_start = if new_dir.is_empty() {
8626        old_dir.len() + 1
8627    } else {
8628        old_dir.len()
8629    };
8630    let mut out = new_dir.to_vec();
8631    out.extend_from_slice(&path[rest_start..]);
8632    out
8633}
8634
8635/// Find the longest renamed ancestor directory of `path`: walk parent dirs from
8636/// the deepest up and return the first one present in `dir_renames`. Mirrors
8637/// merge-ort's `check_dir_renamed`.
8638fn check_dir_renamed<'a>(
8639    path: &[u8],
8640    dir_renames: &'a BTreeMap<Vec<u8>, Vec<u8>>,
8641) -> Option<(&'a [u8], &'a [u8])> {
8642    let mut cur = parent_dir(path);
8643    while let Some(dir) = cur {
8644        if let Some((old_dir, new_dir)) = dir_renames.get_key_value(dir) {
8645            return Some((old_dir.as_slice(), new_dir.as_slice()));
8646        }
8647        cur = parent_dir(dir);
8648    }
8649    None
8650}
8651
8652/// The provisional directory renames computed for both sides, plus the source
8653/// directories whose rename was ambiguous (a "split").
8654struct DirectoryRenameMaps {
8655    /// `old_dir -> new_dir` directory renames detected on ours' side. A path
8656    /// added/renamed by theirs under `old_dir` re-homes into `new_dir`.
8657    ours: BTreeMap<Vec<u8>, Vec<u8>>,
8658    /// Directory renames detected on theirs' side.
8659    theirs: BTreeMap<Vec<u8>, Vec<u8>>,
8660    /// Source directories whose split was unclear on ours' side (no unique
8661    /// majority target); paths on theirs that would need to follow such a rename
8662    /// are a conflict, not silent.
8663    ours_split: BTreeSet<Vec<u8>>,
8664    /// Source directories whose split was unclear on theirs' side.
8665    theirs_split: BTreeSet<Vec<u8>>,
8666}
8667
8668/// Infer directory renames from the complete per-side file-rename sets, mirroring
8669/// merge-ort's `get_provisional_directory_renames` + `handle_directory_level_conflicts`.
8670/// For every file moved `.../old_dir/x -> .../new_dir/x`, the ancestor pairs are
8671/// tallied (`dir_rename_count`) and collapsed to `old_dir -> best_new_dir` where
8672/// `best` is the unique highest count. A tie marks the source directory as a
8673/// "split". A rename is only kept if the source directory was *entirely removed*
8674/// on that side (the `dirs_removed` gate). A directory renamed on BOTH sides is
8675/// dropped from both maps (ambiguous).
8676fn compute_directory_renames(
8677    ours_map: &MergeEntryMap,
8678    theirs_map: &MergeEntryMap,
8679    ours_side: &SideRenames,
8680    theirs_side: &SideRenames,
8681) -> DirectoryRenameMaps {
8682    let ours = compute_side_dir_renames(&ours_side.pairs, ours_map);
8683    let theirs = compute_side_dir_renames(&theirs_side.pairs, theirs_map);
8684
8685    // A directory renamed on BOTH sides (to whatever target) is ambiguous;
8686    // git's handle_directory_level_conflicts drops it from both maps so neither
8687    // side's directory rename is applied.
8688    let mut ours_map_out = ours.renames;
8689    let mut theirs_map_out = theirs.renames;
8690    let dup: Vec<Vec<u8>> = ours_map_out
8691        .keys()
8692        .filter(|k| theirs_map_out.contains_key(*k))
8693        .cloned()
8694        .collect();
8695    for k in dup {
8696        ours_map_out.remove(&k);
8697        theirs_map_out.remove(&k);
8698    }
8699
8700    DirectoryRenameMaps {
8701        ours: ours_map_out,
8702        theirs: theirs_map_out,
8703        ours_split: ours.split,
8704        theirs_split: theirs.split,
8705    }
8706}
8707
8708/// Per-side directory-rename computation result.
8709struct SideDirRenames {
8710    renames: BTreeMap<Vec<u8>, Vec<u8>>,
8711    split: BTreeSet<Vec<u8>>,
8712}
8713
8714/// Compute one side's `old_dir -> new_dir` map from its file renames, gated on
8715/// the source directory being fully removed on that side.
8716fn compute_side_dir_renames(
8717    pairs: &[(Vec<u8>, Vec<u8>)],
8718    side_map: &MergeEntryMap,
8719) -> SideDirRenames {
8720    // dir_rename_count: count[old_dir][new_dir]. Built by walking every rename's
8721    // ancestor directories while the *trailing* path components match, exactly
8722    // as merge-ort's update_dir_rename_counts does. For
8723    //   a/b/c/d/e/foo.c -> a/b/some/thing/else/e/foo.c
8724    // this records both
8725    //   a/b/c/d/e => a/b/some/thing/else/e   AND   a/b/c/d => a/b/some/thing/else
8726    // but stops once the trailing components diverge.
8727    let mut counts: BTreeMap<Vec<u8>, BTreeMap<Vec<u8>, usize>> = BTreeMap::new();
8728    for (old, new) in pairs {
8729        update_dir_rename_counts(&mut counts, old, new);
8730    }
8731
8732    let mut renames = BTreeMap::new();
8733    let mut split = BTreeSet::new();
8734    for (old_dir, targets) in counts {
8735        let mut max = 0usize;
8736        let mut bad_max = 0usize;
8737        let mut best: Option<Vec<u8>> = None;
8738        for (target, count) in &targets {
8739            if *count == max {
8740                bad_max = max;
8741            } else if *count > max {
8742                max = *count;
8743                best = Some(target.clone());
8744            }
8745        }
8746        if max == 0 {
8747            continue;
8748        }
8749        if bad_max == max {
8750            split.insert(old_dir);
8751            continue;
8752        }
8753        // dirs_removed gate: the source directory must be entirely gone on this
8754        // side. New files that recreate the old directory count too; otherwise
8755        // cases like "both sides renamed z/ -> y/, but one side added z/d"
8756        // incorrectly look like both sides performed a whole-directory rename.
8757        if let Some(best) = best
8758            && directory_fully_removed(&old_dir, side_map)
8759        {
8760            renames.insert(old_dir, best);
8761        }
8762    }
8763
8764    SideDirRenames { renames, split }
8765}
8766
8767/// Tally the ancestor directory-rename pairs implied by a single file rename
8768/// `old -> new`, mirroring merge-ort's `update_dir_rename_counts`. Starting from
8769/// the immediate parent dirs, we strip one trailing component at a time and
8770/// record `old_ancestor -> new_ancestor` as long as the *remaining* trailing
8771/// suffix still matches between the two paths.
8772fn update_dir_rename_counts(
8773    counts: &mut BTreeMap<Vec<u8>, BTreeMap<Vec<u8>, usize>>,
8774    old: &[u8],
8775    new: &[u8],
8776) {
8777    // Work on owned copies we progressively truncate at each '/'.
8778    let mut old_dir = old.to_vec();
8779    let mut new_dir = new.to_vec();
8780    let mut first = true;
8781    loop {
8782        // Strip the trailing component (basename on the first pass, then a dir
8783        // each pass) to ascend one level.
8784        let old_has = dir_munge(&mut old_dir);
8785        let new_has = dir_munge(&mut new_dir);
8786
8787        // On the first pass we only stripped the basename; the dirs need not
8788        // match. On later passes the *trailing* components must agree, otherwise
8789        // the rename no longer implies this ancestor pairing.
8790        if !first {
8791            let old_sub = trailing_component(old, &old_dir);
8792            let new_sub = trailing_component(new, &new_dir);
8793            if old_sub != new_sub {
8794                break;
8795            }
8796        }
8797
8798        if old_dir == new_dir {
8799            // Same directory at this level — no rename implied, and no deeper
8800            // ancestor can differ usefully either.
8801            break;
8802        }
8803        *counts
8804            .entry(old_dir.clone())
8805            .or_default()
8806            .entry(new_dir.clone())
8807            .or_default() += 1;
8808
8809        first = false;
8810        // Hitting the toplevel ("") on either side ends the ascent.
8811        if old_dir.is_empty() || new_dir.is_empty() {
8812            break;
8813        }
8814        // If the two ancestors are identical from here up, stop (git stops once
8815        // the suffix-equal walk reaches a common prefix).
8816        if !old_has || !new_has {
8817            break;
8818        }
8819    }
8820}
8821
8822/// Truncate `buf` at its last '/', leaving the parent directory (or empty for a
8823/// toplevel name). Returns whether a '/' was present (i.e. there is a deeper
8824/// ancestor to ascend into).
8825fn dir_munge(buf: &mut Vec<u8>) -> bool {
8826    match buf.iter().rposition(|b| *b == b'/') {
8827        Some(i) => {
8828            buf.truncate(i);
8829            true
8830        }
8831        None => {
8832            buf.clear();
8833            false
8834        }
8835    }
8836}
8837
8838/// The trailing path component that was stripped from `full` to reach `dir`
8839/// (i.e. the suffix of `full` after `dir/`). Used to compare whether the two
8840/// sides of a rename share the same trailing directory chain.
8841fn trailing_component<'a>(full: &'a [u8], dir: &[u8]) -> &'a [u8] {
8842    if dir.is_empty() {
8843        full
8844    } else {
8845        // full = dir + "/" + suffix
8846        &full[dir.len() + 1..]
8847    }
8848}
8849
8850/// True when no path under `dir/` exists on `side` (the directory was entirely
8851/// removed there). Mirrors merge-ort's `dirs_removed` precondition.
8852fn directory_fully_removed(dir: &[u8], side_map: &MergeEntryMap) -> bool {
8853    let mut prefix = dir.to_vec();
8854    prefix.push(b'/');
8855    for path in side_map.keys() {
8856        if path.starts_with(&prefix) {
8857            return false;
8858        }
8859    }
8860    true
8861}
8862
8863/// A path on one side whose location is rewritten by a directory rename the
8864/// *other* side performed. The rewrite applies equally to a freshly added file
8865/// and to a file the side itself renamed (a transitive rename).
8866struct DirRenameMove {
8867    /// The path as it currently sits in the side's effective map (the side's own
8868    /// rename, if any, already applied).
8869    from: Vec<u8>,
8870    /// The re-homed destination, after applying the other side's directory rename.
8871    to: Vec<u8>,
8872    /// `Some(source)` when `from` is a rename destination produced by this side
8873    /// (transitive rename); `None` for a fresh add. Drives git's
8874    /// "renamed to"/"added in" message wording.
8875    renamed_from: Option<Vec<u8>>,
8876}
8877
8878struct DirRenameTwoToOne {
8879    dest: Vec<u8>,
8880    ours_source: Vec<u8>,
8881    theirs_source: Vec<u8>,
8882    ours_label_path: Vec<u8>,
8883    theirs_label_path: Vec<u8>,
8884}
8885
8886/// Provenance of a re-homed path, for `=conflict`-mode `CONFLICT (file location)`
8887/// reporting.
8888#[derive(Clone)]
8889struct RehomeInfo {
8890    /// The pre-re-home path on the adding/renaming side.
8891    old_path: Vec<u8>,
8892    /// `Some(source)` for a transitive rename, `None` for a fresh add.
8893    renamed_from: Option<Vec<u8>>,
8894    /// Whether the *adding/renaming* side was ours (true) or theirs (false). The
8895    /// caller resolves this to a branch label.
8896    added_on_ours: bool,
8897}
8898
8899/// Per-side provenance for a destination created by directory-rename rehoming.
8900#[derive(Clone, Default)]
8901struct RehomeSides {
8902    ours: Option<RehomeInfo>,
8903    theirs: Option<RehomeInfo>,
8904}
8905
8906/// An implicit-directory-rename collision: one or more paths a directory rename
8907/// would re-home onto `dest`, which is blocked because `dest` is already
8908/// occupied (a file in the way) or because multiple sources map to it. git emits
8909/// `CONFLICT (implicit dir rename): Existing file/dir at <dest> in the way ...`.
8910struct DirRenameCollision {
8911    /// The blocked destination path (the file/dir already there).
8912    dest: Vec<u8>,
8913    /// The source path(s) the directory rename tried to move onto `dest`.
8914    sources: Vec<Vec<u8>>,
8915}
8916
8917/// Outcome of applying directory renames to all three effective maps.
8918struct DirRenameOutcome {
8919    /// Rewritten base/ours/theirs maps with re-homed paths moved to their
8920    /// destinations. `base` moves too so a re-homed content-merge keeps its
8921    /// ancestor at the new location.
8922    base: MergeEntryMap,
8923    ours: MergeEntryMap,
8924    theirs: MergeEntryMap,
8925    /// Re-homed destination path -> provenance (for `=conflict`-mode reporting).
8926    rehomed: BTreeMap<Vec<u8>, RehomeSides>,
8927    /// Implicit-dir-rename collisions (file in the way / N-to-1), for the
8928    /// `CONFLICT (implicit dir rename)` message; always conflicts regardless of
8929    /// mode.
8930    collisions: Vec<DirRenameCollision>,
8931    /// Split source dirs that were relevant to a path on the other side.
8932    splits: BTreeSet<Vec<u8>>,
8933    /// Destinations where a directory rename moved a file back onto its own base
8934    /// source path (rename-to-self) and the other side modified that path. git
8935    /// records these as an unmerged file-location conflict (`UU`) rather than a
8936    /// clean auto-resolution; the trivial 3-way at the destination would
8937    /// otherwise resolve cleanly because the renamed side's content equals base.
8938    back_to_self: BTreeSet<Vec<u8>>,
8939    /// True if a directory-level collision or split made the merge dirty even in
8940    /// `=true` mode (e.g. two paths re-homed onto one destination).
8941    dirty: bool,
8942    info_messages: Vec<MergeInfoMessage>,
8943}
8944
8945/// Apply directory renames to both sides' effective maps.
8946///
8947/// This mirrors merge-ort's `collect_renames` + `check_for_directory_rename` +
8948/// `apply_directory_rename_modifications`: every path a side *added* or *renamed*
8949/// that lives under a directory the OTHER side renamed has its destination
8950/// rewritten to follow that rename — making the directory rename a property of
8951/// the rename-detection pass that every path consults, not a per-file special
8952/// case. Handles:
8953///   - transitive renames (a file the side renamed into a dir the other side
8954///     renamed follows on into the final directory),
8955///   - `dir_rename_exclusions` (never re-home into a directory THIS side itself
8956///     renamed — that would create a spurious rename/rename(1to2)),
8957///   - collisions (N paths mapping to one destination -> conflict),
8958///   - splits (a source dir with no majority target -> conflict, leave in place).
8959#[allow(clippy::too_many_arguments)]
8960fn apply_directory_renames(
8961    base_map: &MergeEntryMap,
8962    eff_base: &MergeEntryMap,
8963    eff_ours: &MergeEntryMap,
8964    eff_theirs: &MergeEntryMap,
8965    ours_side: &SideRenames,
8966    theirs_side: &SideRenames,
8967    dir_renames: &DirectoryRenameMaps,
8968    file_rename_dests: &BTreeMap<Vec<u8>, MergeRename>,
8969) -> DirRenameOutcome {
8970    let mut base = eff_base.clone();
8971    let mut ours = eff_ours.clone();
8972    let mut theirs = eff_theirs.clone();
8973    let mut rehomed = BTreeMap::new();
8974    let mut collisions = Vec::new();
8975    let mut splits = BTreeSet::new();
8976    let mut back_to_self = BTreeSet::new();
8977    let mut info_messages = Vec::new();
8978    let mut dirty = false;
8979
8980    // Ours' paths follow THEIRS' directory renames; the exclusions are OURS' own
8981    // renamed-into dirs (never re-home a path into a directory this same side
8982    // renamed). Symmetrically for theirs.
8983    let ours_excl = exclusion_dirs(&dir_renames.ours);
8984    let theirs_excl = exclusion_dirs(&dir_renames.theirs);
8985
8986    // Plan ours' moves (following theirs' dir-renames) and theirs' moves
8987    // (following ours' dir-renames). Planning before applying lets us detect
8988    // collisions (N paths onto one destination) across the whole side.
8989    let ours_moves = plan_rehome(
8990        base_map,
8991        &ours,
8992        ours_side,
8993        &dir_renames.theirs,
8994        &ours_excl,
8995        &dir_renames.theirs_split,
8996        &mut collisions,
8997        &mut splits,
8998        &mut info_messages,
8999        &mut dirty,
9000    );
9001    let theirs_moves = plan_rehome(
9002        base_map,
9003        &theirs,
9004        theirs_side,
9005        &dir_renames.ours,
9006        &theirs_excl,
9007        &dir_renames.ours_split,
9008        &mut collisions,
9009        &mut splits,
9010        &mut info_messages,
9011        &mut dirty,
9012    );
9013
9014    apply_rehome_moves(
9015        base_map,
9016        file_rename_dests,
9017        &mut base,
9018        &mut ours,
9019        &mut theirs,
9020        ours_moves,
9021        true,
9022        &mut rehomed,
9023        &mut collisions,
9024        &mut back_to_self,
9025        &mut dirty,
9026    );
9027    apply_rehome_moves(
9028        base_map,
9029        file_rename_dests,
9030        &mut base,
9031        &mut ours,
9032        &mut theirs,
9033        theirs_moves,
9034        false,
9035        &mut rehomed,
9036        &mut collisions,
9037        &mut back_to_self,
9038        &mut dirty,
9039    );
9040
9041    DirRenameOutcome {
9042        base,
9043        ours,
9044        theirs,
9045        rehomed,
9046        collisions,
9047        splits,
9048        back_to_self,
9049        dirty,
9050        info_messages,
9051    }
9052}
9053
9054/// The set of *source* directories a side renamed away from. A directory rename
9055/// the other side wants to apply into one of these dirs is skipped (it would
9056/// produce a spurious rename/rename(1to2)); git's `dir_rename_exclusions`.
9057fn exclusion_dirs(side_dir_renames: &BTreeMap<Vec<u8>, Vec<u8>>) -> BTreeSet<Vec<u8>> {
9058    side_dir_renames.keys().cloned().collect()
9059}
9060
9061/// Re-home `target`'s added/renamed paths that fall under a directory the other
9062/// side renamed (`renamer_dirs`: `old_dir -> new_dir`).
9063///
9064/// Candidates are paths present on this side and absent in base — i.e. both
9065/// Plan the directory-rename moves for one side: which of its added/renamed
9066/// paths re-home where, following `renamer_dirs` (the OTHER side's dir-renames).
9067///
9068/// Candidates are paths present on this side and absent in base — both freshly
9069/// added files AND this side's own rename destinations (the latter give the
9070/// transitive-rename behaviour). A candidate whose target directory is in
9071/// `exclusions` (a dir this side itself renamed) is skipped. Splits mark the
9072/// merge dirty; N-to-1 collisions (multiple sources onto one destination) record
9073/// a `DirRenameCollision` and yield no move. Returns the surviving single moves
9074/// (one per destination).
9075#[allow(clippy::too_many_arguments)]
9076fn plan_rehome(
9077    base_map: &MergeEntryMap,
9078    side: &MergeEntryMap,
9079    side_renames: &SideRenames,
9080    renamer_dirs: &BTreeMap<Vec<u8>, Vec<u8>>,
9081    exclusions: &BTreeSet<Vec<u8>>,
9082    split_dirs: &BTreeSet<Vec<u8>>,
9083    collisions: &mut Vec<DirRenameCollision>,
9084    splits: &mut BTreeSet<Vec<u8>>,
9085    info_messages: &mut Vec<MergeInfoMessage>,
9086    dirty: &mut bool,
9087) -> Vec<DirRenameMove> {
9088    if renamer_dirs.is_empty() && split_dirs.is_empty() {
9089        return Vec::new();
9090    }
9091
9092    // This side's rename destinations -> sources; eligible for a transitive
9093    // rewrite and carry the original source for message wording.
9094    let side_rename_src: BTreeMap<&[u8], &[u8]> = side_renames
9095        .pairs
9096        .iter()
9097        .map(|(o, n)| (n.as_slice(), o.as_slice()))
9098        .collect();
9099
9100    let candidates: Vec<Vec<u8>> = side
9101        .keys()
9102        .filter(|p| !base_map.contains_key(*p) || side_rename_src.contains_key(p.as_slice()))
9103        .cloned()
9104        .collect();
9105
9106    // dest -> the moves wanting to land there (collision detection).
9107    let mut planned: BTreeMap<Vec<u8>, Vec<DirRenameMove>> = BTreeMap::new();
9108    for path in candidates {
9109        if let Some(split_dir) = check_dir_split(&path, split_dirs) {
9110            splits.insert(split_dir.to_vec());
9111            *dirty = true;
9112            continue;
9113        }
9114        let Some((old_dir, new_dir)) = check_dir_renamed(&path, renamer_dirs) else {
9115            continue;
9116        };
9117        // dir_rename_exclusions: don't apply a rename INTO a directory this side
9118        // itself renamed; that would cause a spurious rename/rename(1to2). The
9119        // file instead follows this side's own rename, so leave it.
9120        let new_dir_is_exclusion = exclusions.contains(new_dir);
9121        let new_dir_inside_exclusion = exclusions
9122            .iter()
9123            .any(|dir| directory_contains_proper(dir, new_dir));
9124        if new_dir_is_exclusion
9125            || (new_dir_inside_exclusion
9126                && !side_has_pure_add_under_dir(side, base_map, &side_rename_src, old_dir))
9127        {
9128            info_messages.push(MergeInfoMessage::DirRenameSkippedDueToRerename {
9129                old_dir: old_dir.to_vec(),
9130                path: path.clone(),
9131                new_dir: new_dir.to_vec(),
9132            });
9133            continue;
9134        }
9135        let dest = apply_dir_rename(old_dir, new_dir, &path);
9136        if dest == path {
9137            // Directory rename causes a rename-to-self: already in place.
9138            continue;
9139        }
9140        let renamed_from = side_rename_src.get(path.as_slice()).map(|s| s.to_vec());
9141        planned
9142            .entry(dest.clone())
9143            .or_default()
9144            .push(DirRenameMove {
9145                from: path,
9146                to: dest,
9147                renamed_from,
9148            });
9149    }
9150
9151    let mut moves = Vec::new();
9152    for (dest, group) in planned {
9153        if group.len() > 1 {
9154            // Multiple paths map to one destination: an implicit-dir-rename
9155            // collision. git leaves all of them in place and conflicts.
9156            *dirty = true;
9157            collisions.push(DirRenameCollision {
9158                dest,
9159                sources: group.into_iter().map(|m| m.from).collect(),
9160            });
9161            continue;
9162        }
9163        moves.push(group.into_iter().next().expect("non-empty"));
9164    }
9165    moves
9166}
9167
9168fn check_dir_split<'a>(path: &[u8], split_dirs: &'a BTreeSet<Vec<u8>>) -> Option<&'a [u8]> {
9169    let mut dir = parent_dir(path)?;
9170    loop {
9171        if let Some(split_dir) = split_dirs.get(dir) {
9172            return Some(split_dir);
9173        }
9174        dir = parent_dir(dir)?;
9175    }
9176}
9177
9178fn directory_contains_proper(parent: &[u8], child: &[u8]) -> bool {
9179    !parent.is_empty()
9180        && child.len() > parent.len()
9181        && child.starts_with(parent)
9182        && child[parent.len()] == b'/'
9183}
9184
9185fn side_has_pure_add_under_dir(
9186    side: &MergeEntryMap,
9187    base_map: &MergeEntryMap,
9188    side_rename_src: &BTreeMap<&[u8], &[u8]>,
9189    dir: &[u8],
9190) -> bool {
9191    side.keys().any(|path| {
9192        path_is_under_dir(path, dir)
9193            && !base_map.contains_key(path)
9194            && !side_rename_src.contains_key(path.as_slice())
9195    })
9196}
9197
9198fn path_is_under_dir(path: &[u8], dir: &[u8]) -> bool {
9199    !dir.is_empty() && path.len() > dir.len() && path.starts_with(dir) && path[dir.len()] == b'/'
9200}
9201
9202/// Apply a side's planned re-home moves to all three effective maps.
9203///
9204/// `side_is_ours` says whether the moves originate from ours' (true) or theirs'
9205/// (false) paths — used both for `=conflict`-mode provenance and to decide which
9206/// side's entry the move primarily belongs to. A move whose source is a
9207/// content-merge path (present on the other side and in base too) re-homes
9208/// across `base`/`ours`/`theirs` together, so the 3-way merge follows it to the
9209/// new location; a pure add re-homes only its own side.
9210#[allow(clippy::too_many_arguments)]
9211fn apply_rehome_moves(
9212    original_base: &MergeEntryMap,
9213    file_rename_dests: &BTreeMap<Vec<u8>, MergeRename>,
9214    base: &mut MergeEntryMap,
9215    ours: &mut MergeEntryMap,
9216    theirs: &mut MergeEntryMap,
9217    moves: Vec<DirRenameMove>,
9218    side_is_ours: bool,
9219    rehomed: &mut BTreeMap<Vec<u8>, RehomeSides>,
9220    collisions: &mut Vec<DirRenameCollision>,
9221    back_to_self: &mut BTreeSet<Vec<u8>>,
9222    dirty: &mut bool,
9223) {
9224    for mv in moves {
9225        // A file in the way at the destination is only a blocker when it is
9226        // present on this same side (or in base). If the other side already
9227        // occupies the destination, applying this move produces the normal
9228        // two-sided conflict at that path (e.g. t6423 1d's rename/rename(2to1)).
9229        let occupied_on_this_side = if side_is_ours {
9230            ours.contains_key(&mv.to) || map_has_directory_at(ours, &mv.to)
9231        } else {
9232            theirs.contains_key(&mv.to) || map_has_directory_at(theirs, &mv.to)
9233        };
9234        let occupied_by_cross_rename =
9235            file_rename_dests
9236                .get(&mv.to)
9237                .is_some_and(|rename| match (side_is_ours, rename.side) {
9238                    (true, RenameSide::Theirs) | (false, RenameSide::Ours) => true,
9239                    (true, RenameSide::Ours) | (false, RenameSide::Theirs) => false,
9240                });
9241        let base_entry_at_dest = original_base.get(&mv.to).copied();
9242        let base_entry_at_source = original_base.get(&mv.from).copied();
9243        let other_side_entry_at_dest = if side_is_ours {
9244            theirs.get(&mv.to).copied()
9245        } else {
9246            ours.get(&mv.to).copied()
9247        };
9248        let other_side_entry_at_source = if side_is_ours {
9249            theirs.get(&mv.from).copied()
9250        } else {
9251            ours.get(&mv.from).copied()
9252        };
9253        let base_entry_for_shifted_source = base_entry_at_source.or(base_entry_at_dest);
9254        let rename_back_to_modified_source = mv
9255            .renamed_from
9256            .as_ref()
9257            .is_some_and(|source| source == &mv.to)
9258            && base_entry_at_dest.is_some()
9259            && (other_side_entry_at_dest.is_some_and(|entry| Some(entry) != base_entry_at_dest)
9260                || other_side_entry_at_source
9261                    .is_some_and(|entry| Some(entry) != base_entry_for_shifted_source));
9262        if ((base_entry_at_dest.is_some() && !rename_back_to_modified_source)
9263            || (occupied_on_this_side && !occupied_by_cross_rename))
9264            && mv.to != mv.from
9265        {
9266            *dirty = true;
9267            collisions.push(DirRenameCollision {
9268                dest: mv.to.clone(),
9269                sources: vec![mv.from.clone()],
9270            });
9271            continue;
9272        }
9273        let mut moved = false;
9274        if occupied_by_cross_rename {
9275            base.remove(&mv.from);
9276            if side_is_ours {
9277                if let Some(entry) = ours.remove(&mv.from) {
9278                    ours.insert(mv.to.clone(), entry);
9279                    moved = true;
9280                }
9281                theirs.remove(&mv.from);
9282            } else {
9283                ours.remove(&mv.from);
9284                if let Some(entry) = theirs.remove(&mv.from) {
9285                    theirs.insert(mv.to.clone(), entry);
9286                    moved = true;
9287                }
9288            }
9289        } else {
9290            // Move the path on every map that holds it (base for the ancestor,
9291            // and whichever sides carry content at the path). This keeps a
9292            // content-merge keyed consistently at the re-homed destination.
9293            for m in [&mut *base, &mut *ours, &mut *theirs] {
9294                if let Some(entry) = m.remove(&mv.from) {
9295                    m.insert(mv.to.clone(), entry);
9296                    moved = true;
9297                }
9298            }
9299        }
9300        if moved {
9301            if rename_back_to_modified_source {
9302                back_to_self.insert(mv.to.clone());
9303            }
9304            let info = RehomeInfo {
9305                old_path: mv.from.clone(),
9306                renamed_from: mv.renamed_from.clone(),
9307                added_on_ours: side_is_ours,
9308            };
9309            let entry = rehomed.entry(mv.to.clone()).or_default();
9310            if side_is_ours {
9311                entry.ours = Some(info);
9312            } else {
9313                entry.theirs = Some(info);
9314            }
9315        }
9316    }
9317}
9318
9319fn collect_dir_rename_two_to_one(
9320    renames: &MergeRenames,
9321    rehomed: &BTreeMap<Vec<u8>, RehomeSides>,
9322) -> Vec<DirRenameTwoToOne> {
9323    let mut conflicts = Vec::new();
9324    for (dest, sides) in rehomed {
9325        let Some(file_rename) = renames.dest_to_source.get(dest) else {
9326            continue;
9327        };
9328        match file_rename.side {
9329            RenameSide::Ours => {
9330                let Some(info) = sides.theirs.as_ref() else {
9331                    continue;
9332                };
9333                let Some(theirs_source) = info.renamed_from.as_ref() else {
9334                    continue;
9335                };
9336                conflicts.push(DirRenameTwoToOne {
9337                    dest: dest.clone(),
9338                    ours_source: file_rename.source.clone(),
9339                    theirs_source: theirs_source.clone(),
9340                    ours_label_path: dest.clone(),
9341                    theirs_label_path: info.old_path.clone(),
9342                });
9343            }
9344            RenameSide::Theirs => {
9345                let Some(info) = sides.ours.as_ref() else {
9346                    continue;
9347                };
9348                let Some(ours_source) = info.renamed_from.as_ref() else {
9349                    continue;
9350                };
9351                conflicts.push(DirRenameTwoToOne {
9352                    dest: dest.clone(),
9353                    ours_source: ours_source.clone(),
9354                    theirs_source: file_rename.source.clone(),
9355                    ours_label_path: info.old_path.clone(),
9356                    theirs_label_path: dest.clone(),
9357                });
9358            }
9359        }
9360    }
9361    conflicts
9362}
9363
9364fn map_has_directory_at(map: &MergeEntryMap, path: &[u8]) -> bool {
9365    let mut prefix = path.to_vec();
9366    prefix.push(b'/');
9367    map.keys().any(|candidate| candidate.starts_with(&prefix))
9368}
9369
9370fn remap_rename_destinations(renames: &mut MergeRenames, rehomed: &BTreeMap<Vec<u8>, RehomeSides>) {
9371    if rehomed.is_empty() {
9372        return;
9373    }
9374    let mut remapped_deletes = BTreeMap::new();
9375    for (dest, rd) in std::mem::take(&mut renames.rename_deletes) {
9376        let new_dest = rehomed
9377            .iter()
9378            .find_map(|(new_dest, sides)| {
9379                let moved = sides
9380                    .ours
9381                    .as_ref()
9382                    .is_some_and(|info| info.old_path == dest)
9383                    || sides
9384                        .theirs
9385                        .as_ref()
9386                        .is_some_and(|info| info.old_path == dest);
9387                moved.then(|| new_dest.clone())
9388            })
9389            .unwrap_or(dest);
9390        remapped_deletes.insert(new_dest, rd);
9391    }
9392    renames.rename_deletes = remapped_deletes;
9393
9394    for rename in renames.rename_rename_one_to_two.values_mut() {
9395        for (dest, sides) in rehomed {
9396            if sides
9397                .ours
9398                .as_ref()
9399                .is_some_and(|info| info.old_path == rename.ours_dest)
9400            {
9401                rename.ours_dest = dest.clone();
9402            }
9403            if sides
9404                .theirs
9405                .as_ref()
9406                .is_some_and(|info| info.old_path == rename.theirs_dest)
9407            {
9408                rename.theirs_dest = dest.clone();
9409            }
9410        }
9411    }
9412}
9413
9414fn drop_collapsed_rename_rename_conflicts(renames: &mut MergeRenames) {
9415    renames
9416        .rename_rename_one_to_two
9417        .retain(|_, rename| rename.ours_dest != rename.theirs_dest);
9418}
9419
9420fn apply_dir_rename_two_to_one_conflicts(
9421    db: &FileObjectDatabase,
9422    eff_ours: &MergeEntryMap,
9423    eff_theirs: &MergeEntryMap,
9424    conflicts: &[DirRenameTwoToOne],
9425    paths: &mut [MergedPath],
9426    leaves: &mut MergeEntryMap,
9427    options: &MergeTreesOptions<'_>,
9428) -> Result<()> {
9429    for conflict in conflicts {
9430        let Some(slot) = paths.iter_mut().find(|path| path.path == conflict.dest) else {
9431            continue;
9432        };
9433        let ours_entry = eff_ours.get(&conflict.dest).copied();
9434        let theirs_entry = eff_theirs.get(&conflict.dest).copied();
9435        let (Some((ours_mode, ours_oid)), Some((theirs_mode, theirs_oid))) =
9436            (ours_entry, theirs_entry)
9437        else {
9438            continue;
9439        };
9440        let ours_bytes = merge_blob_bytes(db, &ours_oid)?;
9441        let theirs_bytes = merge_blob_bytes(db, &theirs_oid)?;
9442        let (resolved_mode, mode_conflict) = merge_file_modes(None, ours_mode, theirs_mode);
9443        let result = if is_mergeable_file_mode(ours_mode) && is_mergeable_file_mode(theirs_mode) {
9444            merge_blobs(
9445                &[],
9446                &ours_bytes,
9447                &theirs_bytes,
9448                &MergeBlobOptions {
9449                    ours_label: &qualify_label(options.ours_label, &conflict.ours_label_path),
9450                    theirs_label: &qualify_label(options.theirs_label, &conflict.theirs_label_path),
9451                    base_label: options.ancestor_label,
9452                    style: options.style,
9453                    favor: options.favor,
9454                    ws_ignore: options.ws_ignore,
9455                },
9456            )
9457        } else {
9458            MergeBlobResult {
9459                content: ours_bytes.clone(),
9460                conflicted: true,
9461            }
9462        };
9463        let oid = db.write_object(EncodedObject::new(ObjectType::Blob, result.content.clone()))?;
9464        leaves.insert(conflict.dest.clone(), (resolved_mode, oid));
9465        slot.stages = MergeStages {
9466            base: None,
9467            ours: ours_entry,
9468            theirs: theirs_entry,
9469        };
9470        slot.result = Some((resolved_mode, oid));
9471        slot.worktree = Some((
9472            if ours_mode == theirs_mode {
9473                ours_mode
9474            } else {
9475                0o100644
9476            },
9477            result.content,
9478        ));
9479        slot.conflict = Some(MergeConflictKind::RenameRenameTwoToOne {
9480            ours_path: conflict.ours_source.clone(),
9481            theirs_path: conflict.theirs_source.clone(),
9482        });
9483        slot.auto_merged = !mode_conflict;
9484    }
9485    Ok(())
9486}
9487
9488/// 3-way merge one rename's content into a single leaf entry: `base` is the
9489/// source's ancestor blob, `ours`/`theirs` the two sides' content (one of which
9490/// is the renamed file, the other the other side's change to the source). Both
9491/// present and differing → a real content merge; otherwise the surviving side's
9492/// entry is carried as-is.
9493fn rename_merged_leaf(
9494    db: &FileObjectDatabase,
9495    base: Option<(u32, ObjectId)>,
9496    ours: Option<(u32, ObjectId)>,
9497    theirs: Option<(u32, ObjectId)>,
9498    options: &MergeTreesOptions<'_>,
9499) -> Result<Option<(u32, ObjectId)>> {
9500    match (ours, theirs) {
9501        (None, None) => Ok(None),
9502        (Some(entry), None) | (None, Some(entry)) => Ok(Some(entry)),
9503        (Some((ours_mode, ours_oid)), Some((theirs_mode, theirs_oid))) => {
9504            if (ours_mode, ours_oid) == (theirs_mode, theirs_oid) {
9505                return Ok(Some((ours_mode, ours_oid)));
9506            }
9507            if !is_mergeable_file_mode(ours_mode) || !is_mergeable_file_mode(theirs_mode) {
9508                return Ok(Some((ours_mode, ours_oid)));
9509            }
9510            let base_bytes = match base {
9511                Some((_, oid)) => merge_blob_bytes(db, &oid)?,
9512                None => Vec::new(),
9513            };
9514            let result = merge_blobs(
9515                &base_bytes,
9516                &merge_blob_bytes(db, &ours_oid)?,
9517                &merge_blob_bytes(db, &theirs_oid)?,
9518                &MergeBlobOptions {
9519                    ours_label: options.ours_label,
9520                    theirs_label: options.theirs_label,
9521                    base_label: options.ancestor_label,
9522                    style: options.style,
9523                    favor: options.favor,
9524                    ws_ignore: options.ws_ignore,
9525                },
9526            );
9527            let (mode, _) = merge_file_modes(base.map(|(mode, _)| mode), ours_mode, theirs_mode);
9528            let oid = db.write_object(EncodedObject::new(ObjectType::Blob, result.content))?;
9529            Ok(Some((mode, oid)))
9530        }
9531    }
9532}
9533
9534/// Apply rename/rename(2to1) and rename/add conflicts: two distinct contents
9535/// land on one destination path. Each side's content at the destination is the
9536/// 3-way merge of its own rename (so the other side's change to the renamed
9537/// source follows the rename); the two results become stages 2 and 3 with no
9538/// common ancestor, and the worktree holds their two-way merge. The rename
9539/// source paths are consumed (removed from the path set) so they don't surface as
9540/// a spurious modify/delete.
9541#[allow(clippy::too_many_arguments)]
9542fn apply_rename_two_to_one_and_add_conflicts(
9543    db: &FileObjectDatabase,
9544    base_map: &MergeEntryMap,
9545    ours_map: &MergeEntryMap,
9546    theirs_map: &MergeEntryMap,
9547    renames: &MergeRenames,
9548    paths: &mut Vec<MergedPath>,
9549    leaves: &mut MergeEntryMap,
9550    options: &MergeTreesOptions<'_>,
9551) -> Result<()> {
9552    let mut consumed_sources: Vec<Vec<u8>> = Vec::new();
9553
9554    for (dest, conflict) in &renames.rename_rename_two_to_one {
9555        // Ours renamed `ours_source`->dest; theirs' change to `ours_source`
9556        // follows the rename. Symmetric for theirs.
9557        let ours_leaf = rename_merged_leaf(
9558            db,
9559            base_map.get(&conflict.ours_source).copied(),
9560            ours_map.get(dest).copied(),
9561            theirs_map.get(&conflict.ours_source).copied(),
9562            options,
9563        )?;
9564        let theirs_leaf = rename_merged_leaf(
9565            db,
9566            base_map.get(&conflict.theirs_source).copied(),
9567            ours_map.get(&conflict.theirs_source).copied(),
9568            theirs_map.get(dest).copied(),
9569            options,
9570        )?;
9571        write_two_sided_dest_conflict(
9572            db,
9573            dest,
9574            ours_leaf,
9575            theirs_leaf,
9576            MergeConflictKind::RenameRenameTwoToOne {
9577                ours_path: conflict.ours_source.clone(),
9578                theirs_path: conflict.theirs_source.clone(),
9579            },
9580            options,
9581            paths,
9582            leaves,
9583        )?;
9584        consumed_sources.push(conflict.ours_source.clone());
9585        consumed_sources.push(conflict.theirs_source.clone());
9586    }
9587
9588    for (dest, add) in &renames.rename_adds {
9589        let (ours_leaf, theirs_leaf) = match add.side {
9590            RenameSide::Ours => (
9591                rename_merged_leaf(
9592                    db,
9593                    base_map.get(&add.source).copied(),
9594                    ours_map.get(dest).copied(),
9595                    theirs_map.get(&add.source).copied(),
9596                    options,
9597                )?,
9598                theirs_map.get(dest).copied(),
9599            ),
9600            RenameSide::Theirs => (
9601                ours_map.get(dest).copied(),
9602                rename_merged_leaf(
9603                    db,
9604                    base_map.get(&add.source).copied(),
9605                    ours_map.get(&add.source).copied(),
9606                    theirs_map.get(dest).copied(),
9607                    options,
9608                )?,
9609            ),
9610        };
9611        write_two_sided_dest_conflict(
9612            db,
9613            dest,
9614            ours_leaf,
9615            theirs_leaf,
9616            MergeConflictKind::Content { add_add: true },
9617            options,
9618            paths,
9619            leaves,
9620        )?;
9621        consumed_sources.push(add.source.clone());
9622    }
9623
9624    // The rename source paths are consumed by the rename: the other side's
9625    // change to them followed the rename to the destination, so they resolve to
9626    // a clean deletion (not the path-keyed core's modify/delete). Marking them
9627    // `Resolved(None)` lets the worktree writer remove the now-stale source file
9628    // rather than leaving it as a stray untracked file.
9629    for source in &consumed_sources {
9630        leaves.remove(source);
9631        if let Some(slot) = paths.iter_mut().find(|path| &path.path == source) {
9632            slot.stages = MergeStages::default();
9633            slot.result = None;
9634            slot.worktree = None;
9635            slot.conflict = None;
9636            slot.auto_merged = false;
9637        } else {
9638            paths.push(MergedPath {
9639                path: source.clone(),
9640                stages: MergeStages::default(),
9641                result: None,
9642                worktree: None,
9643                conflict: None,
9644                auto_merged: false,
9645            });
9646        }
9647    }
9648    Ok(())
9649}
9650
9651/// Record a destination path that holds two unmerged contents (rename/rename
9652/// 2to1 or rename/add): stage 2 = `ours_leaf`, stage 3 = `theirs_leaf`, no
9653/// common ancestor, worktree = their two-way merge. Replaces any existing slot
9654/// (the path-keyed core's add/add result) for the destination.
9655#[allow(clippy::too_many_arguments)]
9656fn write_two_sided_dest_conflict(
9657    db: &FileObjectDatabase,
9658    dest: &[u8],
9659    ours_leaf: Option<(u32, ObjectId)>,
9660    theirs_leaf: Option<(u32, ObjectId)>,
9661    kind: MergeConflictKind,
9662    options: &MergeTreesOptions<'_>,
9663    paths: &mut Vec<MergedPath>,
9664    leaves: &mut MergeEntryMap,
9665) -> Result<()> {
9666    let ours_bytes = match ours_leaf {
9667        Some((mode, oid)) => Some((mode, merge_worktree_bytes(db, mode, &oid)?)),
9668        None => None,
9669    };
9670    let theirs_bytes = match theirs_leaf {
9671        Some((mode, oid)) => Some((mode, merge_worktree_bytes(db, mode, &oid)?)),
9672        None => None,
9673    };
9674    let (worktree_mode, worktree_content, result_leaf) = match (&ours_bytes, &theirs_bytes) {
9675        (Some((ours_mode, ours_content)), Some((theirs_mode, theirs_content))) => {
9676            let merged = merge_blobs(
9677                &[],
9678                ours_content,
9679                theirs_content,
9680                &MergeBlobOptions {
9681                    ours_label: options.ours_label,
9682                    theirs_label: options.theirs_label,
9683                    base_label: options.ancestor_label,
9684                    style: options.style,
9685                    favor: options.favor,
9686                    ws_ignore: options.ws_ignore,
9687                },
9688            );
9689            let mode = if ours_mode == theirs_mode {
9690                *ours_mode
9691            } else {
9692                0o100644
9693            };
9694            let oid =
9695                db.write_object(EncodedObject::new(ObjectType::Blob, merged.content.clone()))?;
9696            (mode, merged.content, Some((mode, oid)))
9697        }
9698        (Some((mode, content)), None) | (None, Some((mode, content))) => {
9699            (*mode, content.clone(), ours_leaf.or(theirs_leaf))
9700        }
9701        (None, None) => (0o100644, Vec::new(), None),
9702    };
9703
9704    let slot = MergedPath {
9705        path: dest.to_vec(),
9706        stages: MergeStages {
9707            base: None,
9708            ours: ours_leaf,
9709            theirs: theirs_leaf,
9710        },
9711        result: result_leaf,
9712        worktree: Some((worktree_mode, worktree_content)),
9713        conflict: Some(kind),
9714        auto_merged: true,
9715    };
9716    if let Some(existing) = paths.iter_mut().find(|path| path.path == dest) {
9717        *existing = slot;
9718    } else {
9719        paths.push(slot);
9720    }
9721    if let Some(leaf) = result_leaf {
9722        leaves.insert(dest.to_vec(), leaf);
9723    } else {
9724        leaves.remove(dest);
9725    }
9726    Ok(())
9727}
9728
9729#[allow(clippy::too_many_arguments)]
9730fn apply_rename_rename_one_to_two_conflicts(
9731    db: &FileObjectDatabase,
9732    base_map: &MergeEntryMap,
9733    eff_ours: &MergeEntryMap,
9734    eff_theirs: &MergeEntryMap,
9735    conflicts: &BTreeMap<Vec<u8>, RenameRenameOneToTwo>,
9736    paths: &mut Vec<MergedPath>,
9737    leaves: &mut MergeEntryMap,
9738    options: &MergeTreesOptions<'_>,
9739) -> Result<()> {
9740    for (old_path, conflict) in conflicts {
9741        let base_entry = base_map.get(old_path).copied();
9742        let ours_entry = eff_ours.get(&conflict.ours_dest).copied();
9743        let theirs_entry = eff_theirs.get(&conflict.theirs_dest).copied();
9744        let theirs_add_at_ours_dest = eff_theirs.get(&conflict.ours_dest).copied();
9745        let ours_add_at_theirs_dest = eff_ours.get(&conflict.theirs_dest).copied();
9746
9747        leaves.remove(old_path);
9748        leaves.remove(&conflict.ours_dest);
9749        leaves.remove(&conflict.theirs_dest);
9750        paths.retain(|path| {
9751            path.path != *old_path
9752                && path.path != conflict.ours_dest
9753                && path.path != conflict.theirs_dest
9754        });
9755
9756        paths.push(MergedPath {
9757            path: old_path.clone(),
9758            stages: MergeStages {
9759                base: base_entry,
9760                ours: None,
9761                theirs: None,
9762            },
9763            result: None,
9764            worktree: None,
9765            conflict: Some(MergeConflictKind::RenameRenameOneToTwo {
9766                old_path: old_path.clone(),
9767                ours_path: conflict.ours_dest.clone(),
9768                theirs_path: conflict.theirs_dest.clone(),
9769                ours_label: options.ours_label.to_string(),
9770                theirs_label: options.theirs_label.to_string(),
9771            }),
9772            auto_merged: false,
9773        });
9774
9775        let ours_worktree = match ours_entry {
9776            Some((mode, oid)) => Some((mode, merge_worktree_bytes(db, mode, &oid)?)),
9777            None => None,
9778        };
9779        paths.push(MergedPath {
9780            path: conflict.ours_dest.clone(),
9781            stages: MergeStages {
9782                base: None,
9783                ours: ours_entry,
9784                theirs: theirs_add_at_ours_dest,
9785            },
9786            result: None,
9787            worktree: ours_worktree,
9788            conflict: Some(MergeConflictKind::RenameRenameOneToTwoStage),
9789            auto_merged: false,
9790        });
9791
9792        let theirs_worktree = match theirs_entry {
9793            Some((mode, oid)) => Some((mode, merge_worktree_bytes(db, mode, &oid)?)),
9794            None => None,
9795        };
9796        paths.push(MergedPath {
9797            path: conflict.theirs_dest.clone(),
9798            stages: MergeStages {
9799                base: None,
9800                ours: ours_add_at_theirs_dest,
9801                theirs: theirs_entry,
9802            },
9803            result: None,
9804            worktree: theirs_worktree,
9805            conflict: Some(MergeConflictKind::RenameRenameOneToTwoStage),
9806            auto_merged: false,
9807        });
9808    }
9809    Ok(())
9810}
9811
9812/// Build a path-qualified conflict-marker label `"<label>:<path>"`, as git does
9813/// for renamed files (so the two sides of a conflict name their distinct paths).
9814fn qualify_label(label: &str, path: &[u8]) -> String {
9815    format!("{label}:{}", String::from_utf8_lossy(path))
9816}
9817
9818/// Adapt a flat `path -> (mode, oid)` map into the `TrackedEntry` map the
9819/// name-status diff core consumes.
9820fn entry_map_as_tracked(map: &MergeEntryMap) -> BTreeMap<Vec<u8>, TrackedEntry> {
9821    map.iter()
9822        .map(|(path, (mode, oid))| {
9823            (
9824                path.clone(),
9825                TrackedEntry {
9826                    mode: *mode,
9827                    oid: *oid,
9828                },
9829            )
9830        })
9831        .collect()
9832}
9833
9834#[cfg(test)]
9835mod tests {
9836    use super::*;
9837    use sley_formats::RepositoryLayout;
9838    use sley_object::TreeEntry;
9839    use sley_odb::ObjectWriter;
9840    use std::path::PathBuf;
9841    use std::sync::atomic::{AtomicU64, Ordering};
9842
9843    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
9844
9845    #[test]
9846    fn name_status_reports_added_from_index() {
9847        let root = temp_root();
9848        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
9849            .expect("test operation should succeed");
9850        let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
9851        let oid = db
9852            .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
9853            .expect("test operation should succeed");
9854        let index = Index {
9855            version: 2,
9856            entries: vec![sley_index::IndexEntry {
9857                ctime_seconds: 0,
9858                ctime_nanoseconds: 0,
9859                mtime_seconds: 0,
9860                mtime_nanoseconds: 0,
9861                dev: 0,
9862                ino: 0,
9863                mode: 0o100644,
9864                uid: 0,
9865                gid: 0,
9866                size: 6,
9867                oid,
9868                flags: "hello.txt".len() as u16,
9869                flags_extended: 0,
9870                path: BString::from(b"hello.txt"),
9871            }],
9872            extensions: Vec::new(),
9873            checksum: None,
9874        };
9875        fs::write(
9876            layout.git_dir.join("index"),
9877            index
9878                .write_v2_sha1()
9879                .expect("test operation should succeed"),
9880        )
9881        .expect("test operation should succeed");
9882        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
9883        let changes = diff_name_status_head_worktree(&root, &layout.git_dir, ObjectFormat::Sha1)
9884            .expect("test operation should succeed");
9885        assert_eq!(changes[0].line(), "A\thello.txt");
9886        fs::remove_dir_all(root).expect("test operation should succeed");
9887    }
9888
9889    #[test]
9890    fn index_worktree_diff_returns_staged_gitlinks() {
9891        let root = temp_root();
9892        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
9893            .expect("test operation should succeed");
9894        let oid = ObjectId::from_hex(
9895            ObjectFormat::Sha1,
9896            "1111111111111111111111111111111111111111",
9897        )
9898        .expect("test operation should succeed");
9899        let index = Index {
9900            version: 2,
9901            entries: vec![sley_index::IndexEntry {
9902                ctime_seconds: 0,
9903                ctime_nanoseconds: 0,
9904                mtime_seconds: 0,
9905                mtime_nanoseconds: 0,
9906                dev: 0,
9907                ino: 0,
9908                mode: sley_index::GITLINK_MODE,
9909                uid: 0,
9910                gid: 0,
9911                size: 0,
9912                oid,
9913                flags: "deps/sub".len() as u16,
9914                flags_extended: 0,
9915                path: BString::from(b"deps/sub"),
9916            }],
9917            extensions: Vec::new(),
9918            checksum: None,
9919        };
9920        fs::write(
9921            layout.git_dir.join("index"),
9922            index
9923                .write_v2_sha1()
9924                .expect("test operation should succeed"),
9925        )
9926        .expect("test operation should succeed");
9927
9928        let diff = diff_name_status_index_worktree_with_options_and_gitlinks(
9929            &root,
9930            &layout.git_dir,
9931            ObjectFormat::Sha1,
9932            DiffNameStatusOptions::default(),
9933        )
9934        .expect("test operation should succeed");
9935
9936        assert_eq!(diff.entries.len(), 1);
9937        let gitlinks = diff.staged_gitlinks;
9938        assert_eq!(gitlinks.len(), 1);
9939        assert_eq!(gitlinks[0].path.as_bytes(), b"deps/sub");
9940        assert_eq!(gitlinks[0].oid, oid);
9941        fs::remove_dir_all(root).expect("test operation should succeed");
9942    }
9943
9944    #[cfg(unix)]
9945    #[test]
9946    fn index_worktree_diff_ignores_untracked_dangling_symlink() {
9947        use std::os::unix::fs::symlink;
9948
9949        let root = temp_root();
9950        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
9951            .expect("test operation should succeed");
9952        let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
9953        let oid = db
9954            .write_object(EncodedObject::new(ObjectType::Blob, b"clean\n".to_vec()))
9955            .expect("test operation should succeed");
9956        let index = Index {
9957            version: 2,
9958            entries: vec![sley_index::IndexEntry {
9959                ctime_seconds: 0,
9960                ctime_nanoseconds: 0,
9961                mtime_seconds: 0,
9962                mtime_nanoseconds: 0,
9963                dev: 0,
9964                ino: 0,
9965                mode: 0o100644,
9966                uid: 0,
9967                gid: 0,
9968                size: 6,
9969                oid,
9970                flags: "tracked.txt".len() as u16,
9971                flags_extended: 0,
9972                path: BString::from(b"tracked.txt"),
9973            }],
9974            extensions: Vec::new(),
9975            checksum: None,
9976        };
9977        fs::write(
9978            layout.git_dir.join("index"),
9979            index
9980                .write_v2_sha1()
9981                .expect("test operation should succeed"),
9982        )
9983        .expect("test operation should succeed");
9984        fs::write(root.join("tracked.txt"), b"clean\n").expect("test operation should succeed");
9985        symlink("missing-target", root.join("untracked-link"))
9986            .expect("test operation should succeed");
9987
9988        let changes = diff_name_status_index_worktree_with_options(
9989            &root,
9990            &layout.git_dir,
9991            ObjectFormat::Sha1,
9992            DiffNameStatusOptions {
9993                detect_renames: false,
9994                detect_copies: false,
9995                find_copies_harder: false,
9996                rename_empty: true,
9997            },
9998        )
9999        .expect("untracked dangling symlink should be ignored");
10000        assert!(changes.is_empty());
10001        fs::remove_dir_all(root).expect("test operation should succeed");
10002    }
10003
10004    #[test]
10005    fn index_worktree_diff_trusts_non_racy_stat_cache() {
10006        let root = temp_root();
10007        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
10008            .expect("test operation should succeed");
10009        let worktree_path = root.join("tracked.txt");
10010        fs::write(&worktree_path, b"clean\n").expect("test operation should succeed");
10011        let metadata = fs::symlink_metadata(&worktree_path).expect("test operation should succeed");
10012        let (mtime_seconds, mtime_nanoseconds) =
10013            sley_index::file_mtime_parts(&metadata).expect("test operation should succeed");
10014        let bogus_oid = ObjectId::from_hex(
10015            ObjectFormat::Sha1,
10016            "1111111111111111111111111111111111111111",
10017        )
10018        .expect("test operation should succeed");
10019        let index = Index {
10020            version: 2,
10021            entries: vec![sley_index::IndexEntry {
10022                ctime_seconds: 0,
10023                ctime_nanoseconds: 0,
10024                mtime_seconds: mtime_seconds as u32,
10025                mtime_nanoseconds: mtime_nanoseconds as u32,
10026                dev: 0,
10027                ino: 0,
10028                mode: sley_index::worktree_metadata_mode(&metadata),
10029                uid: 0,
10030                gid: 0,
10031                size: metadata.len() as u32,
10032                oid: bogus_oid,
10033                flags: "tracked.txt".len() as u16,
10034                flags_extended: 0,
10035                path: BString::from(b"tracked.txt"),
10036            }],
10037            extensions: Vec::new(),
10038            checksum: None,
10039        };
10040        std::thread::sleep(std::time::Duration::from_millis(1100));
10041        fs::write(
10042            layout.git_dir.join("index"),
10043            index
10044                .write_v2_sha1()
10045                .expect("test operation should succeed"),
10046        )
10047        .expect("test operation should succeed");
10048
10049        let changes = diff_name_status_index_worktree(&root, &layout.git_dir, ObjectFormat::Sha1)
10050            .expect("test operation should succeed");
10051        assert!(
10052            changes.is_empty(),
10053            "a clean non-racy stat match must reuse the cached index oid"
10054        );
10055        fs::remove_dir_all(root).expect("test operation should succeed");
10056    }
10057
10058    fn temp_root() -> PathBuf {
10059        let path = std::env::temp_dir().join(format!(
10060            "sley-diff-{}-{}",
10061            std::process::id(),
10062            TEMP_COUNTER.fetch_add(1, Ordering::Relaxed)
10063        ));
10064        fs::create_dir_all(&path).expect("test operation should succeed");
10065        path
10066    }
10067
10068    // ---- line diff / blob merge tests ---------------------------------------
10069
10070    fn merge_opts() -> MergeBlobOptions<'static> {
10071        MergeBlobOptions {
10072            ours_label: "ours",
10073            theirs_label: "theirs",
10074            base_label: "base",
10075            style: ConflictStyle::Merge,
10076            favor: MergeFavor::None,
10077            ws_ignore: WsIgnore::EMPTY,
10078        }
10079    }
10080
10081    #[test]
10082    fn split_lines_preserves_content_and_newlines() {
10083        let lines = split_lines(b"a\nb\nc\n");
10084        assert_eq!(lines.len(), 3);
10085        assert_eq!(lines[0].content, b"a\n");
10086        assert!(lines[0].has_newline);
10087        assert_eq!(lines[2].content, b"c\n");
10088        assert!(lines[2].has_newline);
10089        assert!(split_lines(b"").is_empty());
10090    }
10091
10092    #[test]
10093    fn split_lines_tracks_missing_final_newline() {
10094        let lines = split_lines(b"a\nb");
10095        assert_eq!(lines.len(), 2);
10096        assert!(lines[0].has_newline);
10097        assert!(!lines[1].has_newline);
10098        assert_eq!(lines[1].content, b"b");
10099        assert_eq!(lines[1].bytes_without_newline(), b"b");
10100        // A line that lost its newline must not compare equal to one that has it.
10101        let with_nl = split_lines(b"b\n");
10102        assert_ne!(lines[1], with_nl[0]);
10103    }
10104
10105    #[test]
10106    fn myers_replace_single_line() {
10107        let old = split_lines(b"a\nb\nc\n");
10108        let new = split_lines(b"a\nx\nc\n");
10109        assert_eq!(
10110            myers_diff_lines(&old, &new),
10111            vec![
10112                DiffOp::Equal(1),
10113                DiffOp::Delete(1),
10114                DiffOp::Insert(1),
10115                DiffOp::Equal(1),
10116            ]
10117        );
10118    }
10119
10120    #[test]
10121    fn myers_identical_is_single_equal() {
10122        let old = split_lines(b"a\nb\nc\n");
10123        let new = split_lines(b"a\nb\nc\n");
10124        assert_eq!(myers_diff_lines(&old, &new), vec![DiffOp::Equal(3)]);
10125    }
10126
10127    #[test]
10128    fn myers_pure_insert_and_delete() {
10129        let empty = split_lines(b"");
10130        let two = split_lines(b"a\nb\n");
10131        assert_eq!(myers_diff_lines(&empty, &two), vec![DiffOp::Insert(2)]);
10132        assert_eq!(myers_diff_lines(&two, &empty), vec![DiffOp::Delete(2)]);
10133
10134        let old = split_lines(b"a\nb\nc\nd\n");
10135        let new = split_lines(b"a\nc\nd\n");
10136        assert_eq!(
10137            myers_diff_lines(&old, &new),
10138            vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Equal(2)]
10139        );
10140    }
10141
10142    #[test]
10143    fn myers_reconstructs_new_and_is_minimal() {
10144        // Apply the script to `old` and confirm it yields `new`; also count edits.
10145        let old = split_lines(b"the\nquick\nbrown\nfox\n");
10146        let new = split_lines(b"the\nlazy\nbrown\ncat\n");
10147        let ops = myers_diff_lines(&old, &new);
10148        let mut oi = 0usize;
10149        let mut ni = 0usize;
10150        let mut edits = 0usize;
10151        let mut rebuilt: Vec<u8> = Vec::new();
10152        for op in &ops {
10153            match *op {
10154                DiffOp::Equal(n) => {
10155                    for _ in 0..n {
10156                        assert_eq!(old[oi], new[ni]);
10157                        rebuilt.extend_from_slice(old[oi].content);
10158                        oi += 1;
10159                        ni += 1;
10160                    }
10161                }
10162                DiffOp::Delete(n) => {
10163                    oi += n;
10164                    edits += n;
10165                }
10166                DiffOp::Insert(n) => {
10167                    for _ in 0..n {
10168                        rebuilt.extend_from_slice(new[ni].content);
10169                        ni += 1;
10170                    }
10171                    edits += n;
10172                }
10173            }
10174        }
10175        assert_eq!(rebuilt, b"the\nlazy\nbrown\ncat\n");
10176        // Two lines changed -> 2 deletes + 2 inserts is the minimal SES here.
10177        assert_eq!(edits, 4);
10178    }
10179
10180    #[test]
10181    fn merge_non_overlapping_changes_is_clean() {
10182        let base = b"a\nb\nc\nd\ne\n";
10183        let ours = b"A\nb\nc\nd\ne\n";
10184        let theirs = b"a\nb\nc\nd\nE\n";
10185        let result = merge_blobs(base, ours, theirs, &merge_opts());
10186        assert!(!result.conflicted);
10187        assert_eq!(result.content, b"A\nb\nc\nd\nE\n");
10188    }
10189
10190    #[test]
10191    fn merge_identical_changes_no_conflict() {
10192        let base = b"a\nb\nc\n";
10193        let ours = b"a\nX\nc\n";
10194        let theirs = b"a\nX\nc\n";
10195        let result = merge_blobs(base, ours, theirs, &merge_opts());
10196        assert!(!result.conflicted);
10197        assert_eq!(result.content, b"a\nX\nc\n");
10198    }
10199
10200    #[test]
10201    fn merge_overlapping_change_emits_exact_markers() {
10202        let base = b"a\nb\nc\n";
10203        let ours = b"a\nOURS\nc\n";
10204        let theirs = b"a\nTHEIRS\nc\n";
10205        let result = merge_blobs(base, ours, theirs, &merge_opts());
10206        assert!(result.conflicted);
10207        assert_eq!(
10208            result.content,
10209            b"a\n<<<<<<< ours\nOURS\n=======\nTHEIRS\n>>>>>>> theirs\nc\n".to_vec(),
10210        );
10211    }
10212
10213    #[test]
10214    fn merge_diff3_style_includes_base_section() {
10215        let base = b"a\nb\nc\n";
10216        let ours = b"a\nOURS\nc\n";
10217        let theirs = b"a\nTHEIRS\nc\n";
10218        let options = MergeBlobOptions {
10219            style: ConflictStyle::Diff3,
10220            ..merge_opts()
10221        };
10222        let result = merge_blobs(base, ours, theirs, &options);
10223        assert!(result.conflicted);
10224        assert_eq!(
10225            result.content,
10226            b"a\n<<<<<<< ours\nOURS\n||||||| base\nb\n=======\nTHEIRS\n>>>>>>> theirs\nc\n"
10227                .to_vec(),
10228        );
10229    }
10230
10231    #[test]
10232    fn merge_empty_label_omits_trailing_space() {
10233        let base = b"a\nb\nc\n";
10234        let ours = b"a\nOURS\nc\n";
10235        let theirs = b"a\nTHEIRS\nc\n";
10236        let options = MergeBlobOptions {
10237            ours_label: "",
10238            theirs_label: "",
10239            base_label: "",
10240            style: ConflictStyle::Merge,
10241            favor: MergeFavor::None,
10242            ws_ignore: WsIgnore::EMPTY,
10243        };
10244        let result = merge_blobs(base, ours, theirs, &options);
10245        assert!(result.conflicted);
10246        // No trailing space after the 7 marker chars when the label is empty.
10247        assert_eq!(
10248            result.content,
10249            b"a\n<<<<<<<\nOURS\n=======\nTHEIRS\n>>>>>>>\nc\n".to_vec(),
10250        );
10251    }
10252
10253    #[test]
10254    fn merge_add_add_empty_base_conflicts() {
10255        let result = merge_blobs(b"", b"x\ny\n", b"p\nq\n", &merge_opts());
10256        assert!(result.conflicted);
10257        assert_eq!(
10258            result.content,
10259            b"<<<<<<< ours\nx\ny\n=======\np\nq\n>>>>>>> theirs\n".to_vec(),
10260        );
10261    }
10262
10263    #[test]
10264    fn merge_ignore_space_change_resolves_clean_keeping_ours() {
10265        // ours: only-whitespace change (collapsed run); theirs: real change.
10266        // Under -Xignore-space-change the whitespace-only line is not a conflict
10267        // and ours' actual bytes survive (xdl_merge copies common spans from
10268        // file1); theirs' real change to a different line wins on its own line.
10269        let base = b"alpha   beta\nsecond line\n";
10270        let ours = b"alpha beta\nsecond line\n"; // collapsed the run
10271        let theirs = b"alpha   beta\nsecond CHANGED\n"; // real change on line 2
10272        let options = MergeBlobOptions {
10273            ws_ignore: WsIgnore {
10274                space_change: true,
10275                ..WsIgnore::EMPTY
10276            },
10277            ..merge_opts()
10278        };
10279        let result = merge_blobs(base, ours, theirs, &options);
10280        assert!(
10281            !result.conflicted,
10282            "whitespace-only divergence is not a conflict"
10283        );
10284        assert_eq!(result.content, b"alpha beta\nsecond CHANGED\n".to_vec());
10285    }
10286
10287    #[test]
10288    fn merge_ignore_space_change_still_conflicts_on_real_divergence() {
10289        // Both sides make a real (non-whitespace) change to the same line: still
10290        // a conflict even under -Xignore-space-change.
10291        let base = b"one\n";
10292        let ours = b"OURS\n";
10293        let theirs = b"THEIRS\n";
10294        let options = MergeBlobOptions {
10295            ws_ignore: WsIgnore {
10296                space_change: true,
10297                ..WsIgnore::EMPTY
10298            },
10299            ..merge_opts()
10300        };
10301        let result = merge_blobs(base, ours, theirs, &options);
10302        assert!(result.conflicted);
10303    }
10304
10305    #[test]
10306    fn merge_add_add_empty_base_identical_is_clean() {
10307        let result = merge_blobs(b"", b"x\ny\n", b"x\ny\n", &merge_opts());
10308        assert!(!result.conflicted);
10309        assert_eq!(result.content, b"x\ny\n");
10310    }
10311
10312    #[test]
10313    fn merge_deletion_one_side_takes_deletion() {
10314        // ours deletes line b; theirs leaves it -> clean, deletion wins.
10315        let result = merge_blobs(b"a\nb\nc\n", b"a\nc\n", b"a\nb\nc\n", &merge_opts());
10316        assert!(!result.conflicted);
10317        assert_eq!(result.content, b"a\nc\n");
10318    }
10319
10320    #[test]
10321    fn merge_deletion_vs_modification_conflicts() {
10322        // ours deletes b; theirs modifies b -> conflict.
10323        let result = merge_blobs(b"a\nb\nc\n", b"a\nc\n", b"a\nB!\nc\n", &merge_opts());
10324        assert!(result.conflicted);
10325        // ours side of the conflict is empty (the line was deleted).
10326        assert_eq!(
10327            result.content,
10328            b"a\n<<<<<<< ours\n=======\nB!\n>>>>>>> theirs\nc\n".to_vec(),
10329        );
10330    }
10331
10332    #[test]
10333    fn merge_missing_final_newline_marker_starts_on_own_line() {
10334        // Both sides drop the trailing newline AND conflict at the end. The
10335        // closing marker section must still begin on its own line.
10336        let base = b"a\nb";
10337        let ours = b"a\nOURS";
10338        let theirs = b"a\nTHEIRS";
10339        let result = merge_blobs(base, ours, theirs, &merge_opts());
10340        assert!(result.conflicted);
10341        assert_eq!(
10342            result.content,
10343            b"a\n<<<<<<< ours\nOURS\n=======\nTHEIRS\n>>>>>>> theirs\n".to_vec(),
10344        );
10345    }
10346
10347    #[test]
10348    fn merge_clean_preserves_missing_final_newline() {
10349        // ours removes the trailing newline; theirs is unchanged -> ours wins,
10350        // and the result keeps the missing newline.
10351        let result = merge_blobs(b"a\nb\n", b"a\nb", b"a\nb\n", &merge_opts());
10352        assert!(!result.conflicted);
10353        assert_eq!(result.content, b"a\nb");
10354    }
10355
10356    #[test]
10357    fn merge_both_append_identical_tail_is_clean() {
10358        let result = merge_blobs(b"a\n", b"a\nz\n", b"a\nz\n", &merge_opts());
10359        assert!(!result.conflicted);
10360        assert_eq!(result.content, b"a\nz\n");
10361    }
10362
10363    #[test]
10364    fn merge_when_ours_equals_base_yields_theirs() {
10365        // Regression: a side that did not change must not suppress the other
10366        // side's edits anywhere in the file.
10367        let base = b"b\na\n";
10368        let theirs = b"b\nb\nc\na\nc\n";
10369        let result = merge_blobs(base, base, theirs, &merge_opts());
10370        assert!(!result.conflicted);
10371        assert_eq!(result.content, theirs.to_vec());
10372    }
10373    fn applied(outcome: ApplyOutcome) -> Vec<u8> {
10374        match outcome {
10375            ApplyOutcome::Applied(bytes) => bytes,
10376            ApplyOutcome::Rejected => panic!("expected Applied, got Rejected"),
10377        }
10378    }
10379
10380    #[test]
10381    fn parse_multi_file_patch() {
10382        let patch = b"\
10383diff --git a/one.txt b/one.txt
10384index aaaaaaa..bbbbbbb 100644
10385--- a/one.txt
10386+++ b/one.txt
10387@@ -1,3 +1,3 @@
10388 alpha
10389-beta
10390+BETA
10391 gamma
10392diff --git a/two.txt b/two.txt
10393index ccccccc..ddddddd 100644
10394--- a/two.txt
10395+++ b/two.txt
10396@@ -1,2 +1,3 @@
10397 first
10398+inserted
10399 second
10400";
10401        let patches = parse_unified_patch(patch).expect("test operation should succeed");
10402        assert_eq!(patches.len(), 2);
10403
10404        assert_eq!(patches[0].old_path.as_deref(), Some(b"one.txt".as_slice()));
10405        assert_eq!(patches[0].new_path.as_deref(), Some(b"one.txt".as_slice()));
10406        // The `index <a>..<b> 100644` line carries the unchanged-file mode, which
10407        // git's gitdiff_index records as old_mode.
10408        assert_eq!(patches[0].old_mode, Some(0o100644));
10409        assert_eq!(
10410            patches[0].old_oid_hex.as_deref(),
10411            Some(b"aaaaaaa".as_slice())
10412        );
10413        assert_eq!(
10414            patches[0].new_oid_hex.as_deref(),
10415            Some(b"bbbbbbb".as_slice())
10416        );
10417        assert_eq!(patches[0].hunks.len(), 1);
10418        let h = &patches[0].hunks[0];
10419        assert_eq!(
10420            (h.old_start, h.old_len, h.new_start, h.new_len),
10421            (1, 3, 1, 3)
10422        );
10423        assert_eq!(
10424            h.lines,
10425            vec![
10426                HunkLine::Context(b"alpha".to_vec()),
10427                HunkLine::Delete(b"beta".to_vec()),
10428                HunkLine::Insert(b"BETA".to_vec()),
10429                HunkLine::Context(b"gamma".to_vec()),
10430            ]
10431        );
10432
10433        assert_eq!(patches[1].new_path.as_deref(), Some(b"two.txt".as_slice()));
10434        assert_eq!(patches[1].hunks[0].new_len, 3);
10435    }
10436
10437    #[test]
10438    fn parse_default_hunk_range_length() {
10439        // `@@ -1 +1,2 @@` (no comma) means a length of 1 on the old side.
10440        let patch = b"\
10441--- a/x
10442+++ b/x
10443@@ -1 +1,2 @@
10444 line
10445+added
10446";
10447        let patches = parse_unified_patch(patch).expect("test operation should succeed");
10448        let h = &patches[0].hunks[0];
10449        assert_eq!(
10450            (h.old_start, h.old_len, h.new_start, h.new_len),
10451            (1, 1, 1, 2)
10452        );
10453    }
10454
10455    #[test]
10456    fn parse_hunk_header_before_file_errors() {
10457        let patch = b"@@ -1,1 +1,1 @@\n context\n";
10458        assert!(parse_unified_patch(patch).is_err());
10459    }
10460
10461    #[test]
10462    fn parse_mismatched_counts_errors() {
10463        // Header promises two old lines but only one is present.
10464        let patch = b"--- a/x\n+++ b/x\n@@ -1,2 +1,2 @@\n only\n+new\n";
10465        assert!(parse_unified_patch(patch).is_err());
10466    }
10467
10468    #[test]
10469    fn apply_clean_hunk() {
10470        let base = b"alpha\nbeta\ngamma\n";
10471        let patch = parse_unified_patch(
10472            b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
10473        )
10474        .expect("test operation should succeed");
10475        let out = applied(apply_file_patch(base, &patch[0]));
10476        assert_eq!(out, b"alpha\nBETA\ngamma\n");
10477    }
10478
10479    #[test]
10480    fn apply_with_line_offset() {
10481        // The hunk's recorded position (line 2) is a couple of lines above where
10482        // the matching context actually lives (line 4); the outward search must
10483        // find it. The hunk is NOT anchored at the file start (old_start > 1, so
10484        // no match_beginning) and has trailing context (`tail`, so no
10485        // match_end), which is exactly the shape a real drifted patch takes —
10486        // verified against `git apply` ("Hunk #1 succeeded at 4 (offset 2)").
10487        let base = b"pre1\npre2\npre3\nalpha\nbeta\ngamma\ntail\n";
10488        let patch = parse_unified_patch(
10489            b"--- a/x\n+++ b/x\n@@ -2,4 +2,4 @@\n alpha\n-beta\n+BETA\n gamma\n tail\n",
10490        )
10491        .expect("test operation should succeed");
10492        let out = applied(apply_file_patch(base, &patch[0]));
10493        assert_eq!(out, b"pre1\npre2\npre3\nalpha\nBETA\ngamma\ntail\n");
10494    }
10495
10496    #[test]
10497    fn apply_with_negative_line_offset() {
10498        // Recorded position is well past the real location; search backward.
10499        let base = b"alpha\nbeta\ngamma\n";
10500        let patch = parse_unified_patch(
10501            b"--- a/x\n+++ b/x\n@@ -50,3 +50,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
10502        )
10503        .expect("test operation should succeed");
10504        let out = applied(apply_file_patch(base, &patch[0]));
10505        assert_eq!(out, b"alpha\nBETA\ngamma\n");
10506    }
10507
10508    #[test]
10509    fn apply_multiple_hunks() {
10510        let base = b"a\nb\nc\nd\ne\nf\ng\nh\n";
10511        let patch = parse_unified_patch(
10512            b"--- a/x\n+++ b/x\n\
10513@@ -1,3 +1,3 @@\n a\n-b\n+B\n c\n\
10514@@ -6,3 +6,3 @@\n f\n-g\n+G\n h\n",
10515        )
10516        .expect("test operation should succeed");
10517        let out = applied(apply_file_patch(base, &patch[0]));
10518        assert_eq!(out, b"a\nB\nc\nd\ne\nf\nG\nh\n");
10519    }
10520
10521    #[test]
10522    fn reject_on_context_mismatch() {
10523        let base = b"alpha\nDIFFERENT\ngamma\n";
10524        let patch = parse_unified_patch(
10525            b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
10526        )
10527        .expect("test operation should succeed");
10528        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
10529    }
10530
10531    #[test]
10532    fn reject_when_match_end_required_but_not_at_eof() {
10533        // git's `apply.c`: a hunk with NO trailing context must match the END of
10534        // the file (`match_end`). Here the leading context (`tail`/`anchor`)
10535        // matches at the middle of the base, but there are further lines after
10536        // it, so the preimage does not reach EOF. git rejects this; the old
10537        // sley matcher wrongly applied it (duplicating the appended block). This
10538        // is the t4150-am cell-34 lever: rejection forces `am -3`'s 3-way path.
10539        let base = b"one\ntwo\nanchor\nalready\nappended\n";
10540        // Hunk: context `anchor`, then append `added1`/`added2`. No trailing
10541        // context => match_end. At line 3 (`anchor`) the preimage is just one
10542        // line and does not reach EOF, so it must be rejected.
10543        let patch =
10544            parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -3,1 +3,3 @@\n anchor\n+added1\n+added2\n")
10545                .expect("test operation should succeed");
10546        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
10547    }
10548
10549    #[test]
10550    fn append_at_eof_matches_when_context_reaches_end() {
10551        // The mirror of the rejection case: the same shape applies cleanly when
10552        // the matching context IS the last line of the file (preimage reaches
10553        // EOF), so `match_end` is satisfied.
10554        let base = b"one\ntwo\nanchor\n";
10555        let patch =
10556            parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -3,1 +3,3 @@\n anchor\n+added1\n+added2\n")
10557                .expect("test operation should succeed");
10558        let out = applied(apply_file_patch(base, &patch[0]));
10559        assert_eq!(out, b"one\ntwo\nanchor\nadded1\nadded2\n");
10560    }
10561
10562    #[test]
10563    fn reject_when_match_beginning_required_but_not_at_start() {
10564        // A hunk anchored at line 1 (`old_start <= 1`) must match the START of
10565        // the file (`match_beginning`). If the matching context only appears
10566        // later, git rejects rather than wandering to it.
10567        let base = b"junk\nalpha\nbeta\ngamma\n";
10568        let patch =
10569            parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,2 +1,3 @@\n alpha\n+INSERT\n beta\n")
10570                .expect("test operation should succeed");
10571        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
10572    }
10573
10574    #[test]
10575    fn no_default_fuzz_rejects_on_trailing_context_mismatch() {
10576        // `git apply` / `git am` keep `p_context = UINT_MAX` by default, so they
10577        // do NOT fuzz a hunk in by dropping context. Here the trailing context
10578        // line (`gamma`) differs from the base (`DIVERGED`), and because the
10579        // anchor is line 1 the hunk must match the beginning with its FULL
10580        // preimage. Verified against real `git apply`: this is rejected.
10581        let base = b"alpha\nbeta\nDIVERGED\n";
10582        let patch = parse_unified_patch(
10583            b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
10584        )
10585        .expect("test operation should succeed");
10586        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
10587    }
10588
10589    #[test]
10590    fn parse_and_apply_new_file() {
10591        let patch = parse_unified_patch(
10592            b"\
10593diff --git a/new.txt b/new.txt
10594new file mode 100644
10595index 0000000..1111111
10596--- /dev/null
10597+++ b/new.txt
10598@@ -0,0 +1,2 @@
10599+hello
10600+world
10601",
10602        )
10603        .expect("test operation should succeed");
10604        assert!(patches_first_is_new(&patch));
10605        assert_eq!(patch[0].old_path, None);
10606        assert_eq!(patch[0].new_path.as_deref(), Some(b"new.txt".as_slice()));
10607        assert_eq!(patch[0].new_mode, Some(0o100644));
10608        // Base is ignored for a new file.
10609        let out = applied(apply_file_patch(b"garbage that is ignored", &patch[0]));
10610        assert_eq!(out, b"hello\nworld\n");
10611    }
10612
10613    fn patches_first_is_new(patches: &[FilePatch]) -> bool {
10614        patches.first().map(|p| p.is_new).unwrap_or(false)
10615    }
10616
10617    #[test]
10618    fn parse_and_apply_delete_file() {
10619        let patch = parse_unified_patch(
10620            b"\
10621diff --git a/gone.txt b/gone.txt
10622deleted file mode 100644
10623index 1111111..0000000
10624--- a/gone.txt
10625+++ /dev/null
10626@@ -1,2 +0,0 @@
10627-hello
10628-world
10629",
10630        )
10631        .expect("test operation should succeed");
10632        assert!(patch[0].is_delete);
10633        assert_eq!(patch[0].old_path.as_deref(), Some(b"gone.txt".as_slice()));
10634        assert_eq!(patch[0].new_path, None);
10635        assert_eq!(patch[0].old_mode, Some(0o100644));
10636        let out = applied(apply_file_patch(b"hello\nworld\n", &patch[0]));
10637        assert_eq!(out, b"");
10638    }
10639
10640    #[test]
10641    fn parse_rename_headers() {
10642        let patch = parse_unified_patch(
10643            b"\
10644diff --git a/old/name.txt b/new/name.txt
10645similarity index 100%
10646rename from old/name.txt
10647rename to new/name.txt
10648",
10649        )
10650        .expect("test operation should succeed");
10651        assert!(patch[0].is_rename);
10652        assert_eq!(
10653            patch[0].old_path.as_deref(),
10654            Some(b"old/name.txt".as_slice())
10655        );
10656        assert_eq!(
10657            patch[0].new_path.as_deref(),
10658            Some(b"new/name.txt".as_slice())
10659        );
10660        assert!(patch[0].hunks.is_empty());
10661    }
10662
10663    #[test]
10664    fn parse_mode_change_headers() {
10665        let patch = parse_unified_patch(
10666            b"\
10667diff --git a/script.sh b/script.sh
10668old mode 100644
10669new mode 100755
10670",
10671        )
10672        .expect("test operation should succeed");
10673        assert_eq!(patch[0].old_mode, Some(0o100644));
10674        assert_eq!(patch[0].new_mode, Some(0o100755));
10675        assert!(!patch[0].is_new);
10676        assert!(!patch[0].is_delete);
10677    }
10678
10679    #[test]
10680    fn no_final_newline_base_preserved_when_untouched() {
10681        // The change is on line 1; the final line has no newline and is not
10682        // modified, so its no-newline state must survive. This uses the patch
10683        // shape real `git diff` emits for such a change — `@@ -1,3 +1,3 @@` with
10684        // the two unchanged lines as trailing context (the `\ No newline`
10685        // marker rides the last context line). A hand-rolled `@@ -1,1 +1,1 @@`
10686        // with NO trailing context would (correctly) be rejected by git, since
10687        // a no-trailing-context hunk anchored at line 1 must span the whole
10688        // file (`match_beginning` && `match_end`).
10689        let base = b"alpha\nbeta\nnotail"; // "notail" has no trailing \n
10690        let patch = parse_unified_patch(
10691            b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n-alpha\n+ALPHA\n beta\n notail\n\\ No newline at end of file\n",
10692        )
10693        .expect("test operation should succeed");
10694        let out = applied(apply_file_patch(base, &patch[0]));
10695        assert_eq!(out, b"ALPHA\nbeta\nnotail");
10696    }
10697
10698    #[test]
10699    fn no_final_newline_added_by_patch() {
10700        // Old file ends with a newline; patch rewrites the last line to one
10701        // without a trailing newline.
10702        let base = b"alpha\nbeta\n";
10703        let patch = parse_unified_patch(
10704            b"--- a/x\n+++ b/x\n@@ -2,1 +2,1 @@\n-beta\n+beta-notail\n\\ No newline at end of file\n",
10705        )
10706        .expect("test operation should succeed");
10707        assert!(patch[0].hunks[0].new_no_newline);
10708        assert!(!patch[0].hunks[0].old_no_newline);
10709        let out = applied(apply_file_patch(base, &patch[0]));
10710        assert_eq!(out, b"alpha\nbeta-notail");
10711    }
10712
10713    #[test]
10714    fn no_final_newline_in_base_matched_and_kept() {
10715        // Both sides lack a trailing newline; context match must require the
10716        // base's final line to itself be newline-free.
10717        let base = b"alpha\nbeta"; // no trailing newline
10718        let patch = parse_unified_patch(
10719            b"--- a/x\n+++ b/x\n@@ -1,2 +1,2 @@\n-alpha\n+ALPHA\n beta\n\\ No newline at end of file\n",
10720        )
10721        .expect("test operation should succeed");
10722        assert!(patch[0].hunks[0].old_no_newline);
10723        assert!(patch[0].hunks[0].new_no_newline);
10724        let out = applied(apply_file_patch(base, &patch[0]));
10725        assert_eq!(out, b"ALPHA\nbeta");
10726    }
10727
10728    #[test]
10729    fn no_final_newline_mismatch_rejected() {
10730        // Patch asserts the old file has no trailing newline, but the base does.
10731        // That must be rejected rather than silently mis-applied.
10732        let base = b"alpha\nbeta\n"; // HAS trailing newline
10733        let patch = parse_unified_patch(
10734            b"--- a/x\n+++ b/x\n@@ -2,1 +2,1 @@\n-beta\n\\ No newline at end of file\n+beta2\n",
10735        )
10736        .expect("test operation should succeed");
10737        assert!(patch[0].hunks[0].old_no_newline);
10738        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
10739    }
10740
10741    #[test]
10742    fn delete_with_no_final_newline() {
10743        // Deleting the entire content of a file that had no trailing newline.
10744        let base = b"only line no newline";
10745        let patch = parse_unified_patch(
10746            b"--- a/x\n+++ /dev/null\n@@ -1,1 +0,0 @@\n-only line no newline\n\\ No newline at end of file\n",
10747        )
10748        .expect("test operation should succeed");
10749        assert!(patch[0].is_delete);
10750        let out = applied(apply_file_patch(base, &patch[0]));
10751        assert_eq!(out, b"");
10752    }
10753
10754    #[test]
10755    fn apply_pure_insertion_hunk() {
10756        let base = b"first\nsecond\n";
10757        let patch =
10758            parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,2 +1,3 @@\n first\n+middle\n second\n")
10759                .expect("test operation should succeed");
10760        let out = applied(apply_file_patch(base, &patch[0]));
10761        assert_eq!(out, b"first\nmiddle\nsecond\n");
10762    }
10763
10764    #[test]
10765    fn apply_pure_deletion_hunk() {
10766        let base = b"first\nmiddle\nsecond\n";
10767        let patch =
10768            parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,3 +1,2 @@\n first\n-middle\n second\n")
10769                .expect("test operation should succeed");
10770        let out = applied(apply_file_patch(base, &patch[0]));
10771        assert_eq!(out, b"first\nsecond\n");
10772    }
10773
10774    #[test]
10775    fn apply_then_reparse_round_trip() {
10776        // Hand-written unified diff -> apply -> the result is exactly the new
10777        // file content the diff describes. Re-parsing the same patch yields an
10778        // identical structure (idempotent parse).
10779        let base = b"l1\nl2\nl3\nl4\nl5\n";
10780        let text = b"--- a/f\n+++ b/f\n@@ -2,3 +2,4 @@\n l2\n-l3\n+L3\n+L3b\n l4\n";
10781        let p1 = parse_unified_patch(text).expect("test operation should succeed");
10782        let p2 = parse_unified_patch(text).expect("test operation should succeed");
10783        assert_eq!(p1, p2);
10784        let out = applied(apply_file_patch(base, &p1[0]));
10785        assert_eq!(out, b"l1\nl2\nL3\nL3b\nl4\nl5\n");
10786    }
10787
10788    #[test]
10789    fn empty_context_line_without_trailing_space() {
10790        // Some transports strip the single leading space from blank context
10791        // lines; the parser treats a wholly empty body line as blank context.
10792        let base = b"a\n\nb\n";
10793        let patch = parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n a\n\n-b\n+B\n")
10794            .expect("test operation should succeed");
10795        assert_eq!(patch[0].hunks[0].lines[1], HunkLine::Context(Vec::new()));
10796        let out = applied(apply_file_patch(base, &patch[0]));
10797        assert_eq!(out, b"a\n\nB\n");
10798    }
10799
10800    #[test]
10801    fn split_blob_lines_handles_edge_cases() {
10802        assert!(split_blob_lines(b"").is_empty());
10803        let single = split_blob_lines(b"abc");
10804        assert_eq!(single.len(), 1);
10805        assert!(single[0].no_newline);
10806        let terminated = split_blob_lines(b"abc\n");
10807        assert_eq!(terminated.len(), 1);
10808        assert!(!terminated[0].no_newline);
10809        let blank_then_eof = split_blob_lines(b"x\n");
10810        assert_eq!(blank_then_eof.len(), 1);
10811    }
10812
10813    // ---- content similarity & inexact rename/copy detection -----------------
10814
10815    #[test]
10816    fn similarity_identical_and_empty_conventions() {
10817        // Byte-identical blobs are always 100% similar.
10818        assert_eq!(blob_similarity(b"hello\nworld\n", b"hello\nworld\n"), 100);
10819        // Two empty blobs are identical -> 100.
10820        assert_eq!(blob_similarity(b"", b""), 100);
10821        // An empty blob vs a non-empty one shares nothing -> 0.
10822        assert_eq!(blob_similarity(b"", b"hello\n"), 0);
10823        assert_eq!(blob_similarity(b"hello\n", b""), 0);
10824    }
10825
10826    #[test]
10827    fn similarity_one_changed_line_is_75_and_symmetric() {
10828        // A = one/two/three/four/five (bytes: 4+4+6+5+5 = 24).
10829        // B changes "three\n" -> "THREE\n" (same total size 24).
10830        // Common spans: one,two,four,five = 4+4+5+5 = 18 bytes.
10831        // score = round(18 * 100 / max(24, 24)) = round(75) = 75.
10832        // Verified against `git diff -M` which reports "similarity index 75%".
10833        let a = b"one\ntwo\nthree\nfour\nfive\n";
10834        let b = b"one\ntwo\nTHREE\nfour\nfive\n";
10835        assert_eq!(blob_similarity(a, b), 75);
10836        // The metric is symmetric.
10837        assert_eq!(blob_similarity(b, a), 75);
10838    }
10839
10840    #[test]
10841    fn similarity_one_edited_line_of_three_is_66_not_67() {
10842        // "a\nb\nc\n" -> "a\nB\nc\n": one of three lines edited (4 common bytes of
10843        // 6). git reports `R066` / "similarity index 66%". git's two-step integer
10844        // math is `4 * 60000 / 6 = 40000`, then `40000 * 100 / 60000 = 66` (both
10845        // truncated); a single rounded `4 * 100 / 6` would give 67. This pins the
10846        // MAX_SCORE-based rounding so it stays aligned with diffcore-rename.
10847        assert_eq!(blob_similarity(b"a\nb\nc\n", b"a\nB\nc\n"), 66);
10848        assert_eq!(blob_similarity(b"a\nB\nc\n", b"a\nb\nc\n"), 66);
10849    }
10850
10851    #[test]
10852    fn similarity_small_append_is_88() {
10853        // A: 8 lines totalling 46 bytes. B: same 8 lines + "ADDED\n" (6 bytes) = 52.
10854        // Common = the 46 original bytes; score = round(46*100/52) = 88.
10855        // Verified against `git diff -M` -> "similarity index 88%".
10856        let a = b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\n";
10857        let b = b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\nADDED\n";
10858        assert_eq!(blob_similarity(a, b), 88);
10859    }
10860
10861    #[test]
10862    fn similarity_half_rewrite_is_50() {
10863        // 6 lines, last 3 rewritten. Common = l1,l2,l3 = 9 bytes; total each 18.
10864        // score = round(9*100/18) = 50. Verified against `git diff -M`.
10865        let a = b"l1\nl2\nl3\nl4\nl5\nl6\n";
10866        let b = b"l1\nl2\nl3\nX4\nX5\nX6\n";
10867        assert_eq!(blob_similarity(a, b), 50);
10868    }
10869
10870    // ---- tree-diff based inexact detection ----------------------------------
10871
10872    /// Write a blob and return its oid.
10873    fn write_blob(db: &mut FileObjectDatabase, bytes: &[u8]) -> ObjectId {
10874        db.write_object(EncodedObject::new(ObjectType::Blob, bytes.to_vec()))
10875            .expect("test operation should succeed")
10876    }
10877
10878    /// Write a tree from `(name, mode, oid)` entries (sorted by name as git
10879    /// requires) and return its oid.
10880    fn write_tree(db: &mut FileObjectDatabase, entries: &[(&[u8], u32, ObjectId)]) -> ObjectId {
10881        let mut tree_entries: Vec<TreeEntry> = entries
10882            .iter()
10883            .map(|(name, mode, oid)| TreeEntry {
10884                mode: *mode,
10885                name: BString::from(*name),
10886                oid: *oid,
10887            })
10888            .collect();
10889        tree_entries.sort_by(|a, b| a.name.cmp(&b.name));
10890        let tree = Tree {
10891            entries: tree_entries,
10892        };
10893        db.write_object(EncodedObject::new(ObjectType::Tree, tree.write()))
10894            .expect("test operation should succeed")
10895    }
10896
10897    #[test]
10898    fn inexact_rename_detected_with_plausible_score() {
10899        // a.txt (one changed line vs the new b.txt) should be detected as a
10900        // rename with score 75 (see `similarity_one_changed_line_is_75`).
10901        let root = temp_root();
10902        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
10903            .expect("test operation should succeed");
10904        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
10905
10906        let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
10907        let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
10908        let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
10909        let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
10910
10911        let opts = RenameDetectionOptions {
10912            base: DiffNameStatusOptions {
10913                detect_renames: true,
10914                detect_copies: false,
10915                find_copies_harder: false,
10916                rename_empty: true,
10917            },
10918            detect_inexact: true,
10919            rename_threshold: DEFAULT_RENAME_THRESHOLD,
10920            copy_threshold: DEFAULT_RENAME_THRESHOLD,
10921            rename_limit: 0,
10922        };
10923        let entries = diff_name_status_trees_with_rename_options(
10924            &db,
10925            ObjectFormat::Sha1,
10926            &left,
10927            &right,
10928            opts,
10929        )
10930        .expect("test operation should succeed");
10931
10932        assert_eq!(
10933            entries.len(),
10934            1,
10935            "expected a single rename entry: {entries:?}"
10936        );
10937        assert_eq!(entries[0].status, NameStatus::Renamed(75));
10938        assert_eq!(
10939            entries[0].old_path.as_ref().map(|p| p.as_bytes()),
10940            Some(b"a.txt".as_slice())
10941        );
10942        assert_eq!(entries[0].path, b"b.txt");
10943        assert_eq!(entries[0].line(), "R075\ta.txt\tb.txt");
10944        fs::remove_dir_all(root).expect("test operation should succeed");
10945    }
10946
10947    #[test]
10948    fn inexact_rename_below_threshold_not_detected() {
10949        // A half-rewrite scores 50%. With a 60% threshold it must NOT be paired;
10950        // the change shows up as a separate Add + Delete instead.
10951        let root = temp_root();
10952        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
10953            .expect("test operation should succeed");
10954        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
10955
10956        let old = write_blob(&mut db, b"l1\nl2\nl3\nl4\nl5\nl6\n");
10957        let new = write_blob(&mut db, b"l1\nl2\nl3\nX4\nX5\nX6\n");
10958        let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
10959        let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
10960
10961        let opts = RenameDetectionOptions {
10962            base: DiffNameStatusOptions {
10963                detect_renames: true,
10964                detect_copies: false,
10965                find_copies_harder: false,
10966                rename_empty: true,
10967            },
10968            detect_inexact: true,
10969            rename_threshold: 60,
10970            copy_threshold: 60,
10971            rename_limit: 0,
10972        };
10973        let entries = diff_name_status_trees_with_rename_options(
10974            &db,
10975            ObjectFormat::Sha1,
10976            &left,
10977            &right,
10978            opts,
10979        )
10980        .expect("test operation should succeed");
10981
10982        let statuses: Vec<_> = entries.iter().map(|e| e.status).collect();
10983        assert!(
10984            statuses.contains(&NameStatus::Added) && statuses.contains(&NameStatus::Deleted),
10985            "expected separate add/delete below threshold, got {entries:?}"
10986        );
10987        assert!(
10988            !statuses.iter().any(|s| matches!(s, NameStatus::Renamed(_))),
10989            "no rename should be reported below threshold: {entries:?}"
10990        );
10991
10992        // Sanity: lowering the threshold to 50 *does* detect it (boundary is
10993        // inclusive), and the score is exactly 50.
10994        let opts_low = RenameDetectionOptions {
10995            rename_threshold: 50,
10996            ..opts
10997        };
10998        let entries_low = diff_name_status_trees_with_rename_options(
10999            &db,
11000            ObjectFormat::Sha1,
11001            &left,
11002            &right,
11003            opts_low,
11004        )
11005        .expect("test operation should succeed");
11006        assert_eq!(entries_low.len(), 1);
11007        assert_eq!(entries_low[0].status, NameStatus::Renamed(50));
11008        fs::remove_dir_all(root).expect("test operation should succeed");
11009    }
11010
11011    #[test]
11012    fn exact_rename_scores_100_and_takes_priority() {
11013        // Identical content moved to a new path is an exact rename: score 100,
11014        // detected even with inexact disabled, and still 100 with it enabled.
11015        let root = temp_root();
11016        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
11017            .expect("test operation should succeed");
11018        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
11019
11020        let oid = write_blob(&mut db, b"identical\ncontent\nhere\n");
11021        let left = write_tree(&mut db, &[(b"old.txt", 0o100644, oid)]);
11022        let right = write_tree(&mut db, &[(b"new.txt", 0o100644, oid)]);
11023
11024        for inexact in [false, true] {
11025            let opts = RenameDetectionOptions {
11026                base: DiffNameStatusOptions {
11027                    detect_renames: true,
11028                    detect_copies: false,
11029                    find_copies_harder: false,
11030                    rename_empty: true,
11031                },
11032                detect_inexact: inexact,
11033                rename_threshold: DEFAULT_RENAME_THRESHOLD,
11034                copy_threshold: DEFAULT_RENAME_THRESHOLD,
11035                rename_limit: 0,
11036            };
11037            let entries = diff_name_status_trees_with_rename_options(
11038                &db,
11039                ObjectFormat::Sha1,
11040                &left,
11041                &right,
11042                opts,
11043            )
11044            .expect("test operation should succeed");
11045            assert_eq!(entries.len(), 1, "inexact={inexact}: {entries:?}");
11046            assert_eq!(entries[0].status, NameStatus::Renamed(100));
11047            assert_eq!(
11048                entries[0].old_path.as_ref().map(|p| p.as_bytes()),
11049                Some(b"old.txt".as_slice())
11050            );
11051            assert_eq!(entries[0].path, b"new.txt");
11052        }
11053        fs::remove_dir_all(root).expect("test operation should succeed");
11054    }
11055
11056    #[test]
11057    fn inexact_copy_detected_with_score() {
11058        // orig.txt is unchanged and a near-copy (one line differs, 80% similar)
11059        // is added. With copy detection + find_copies_harder + inexact, the new
11060        // file is reported as a copy with score 80 (matches `git diff -C
11061        // --find-copies-harder`).
11062        let root = temp_root();
11063        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
11064            .expect("test operation should succeed");
11065        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
11066
11067        let orig = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\neee\n");
11068        let copy = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\nEEE\n");
11069        let left = write_tree(&mut db, &[(b"orig.txt", 0o100644, orig.clone())]);
11070        let right = write_tree(
11071            &mut db,
11072            &[(b"orig.txt", 0o100644, orig), (b"copy.txt", 0o100644, copy)],
11073        );
11074
11075        let opts = RenameDetectionOptions {
11076            base: DiffNameStatusOptions {
11077                detect_renames: true,
11078                detect_copies: true,
11079                find_copies_harder: true,
11080                rename_empty: true,
11081            },
11082            detect_inexact: true,
11083            rename_threshold: DEFAULT_RENAME_THRESHOLD,
11084            copy_threshold: DEFAULT_RENAME_THRESHOLD,
11085            rename_limit: 0,
11086        };
11087        let entries = diff_name_status_trees_with_rename_options(
11088            &db,
11089            ObjectFormat::Sha1,
11090            &left,
11091            &right,
11092            opts,
11093        )
11094        .expect("test operation should succeed");
11095
11096        let copy_entry = entries
11097            .iter()
11098            .find(|e| e.path == b"copy.txt")
11099            .unwrap_or_else(|| panic!("no copy.txt entry: {entries:?}"));
11100        assert_eq!(copy_entry.status, NameStatus::Copied(80));
11101        assert_eq!(
11102            copy_entry.old_path.as_ref().map(|p| p.as_bytes()),
11103            Some(b"orig.txt".as_slice())
11104        );
11105        // The source remains present (copies do not consume the original).
11106        assert!(
11107            entries.iter().all(|e| e.status != NameStatus::Deleted),
11108            "copy must not delete the source: {entries:?}"
11109        );
11110        fs::remove_dir_all(root).expect("test operation should succeed");
11111    }
11112
11113    #[test]
11114    fn inexact_copy_skipped_over_rename_limit() {
11115        // git's `too_many_rename_candidates`: when the copy matrix
11116        // (sources × dests) exceeds `rename_limit²`, inexact copy detection is
11117        // skipped wholesale and the new file is reported as a plain Add — the
11118        // same `A` real git emits (`git diff -C --find-copies-harder -l1` warns
11119        // "rename detection was skipped" and shows `A copy.txt`). A `rename_limit`
11120        // comfortably above the matrix still detects the copy, proving the gate
11121        // fires *only* over-limit and not on any positive limit.
11122        let root = temp_root();
11123        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
11124            .expect("test operation should succeed");
11125        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
11126
11127        let orig = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\neee\n");
11128        let extra = write_blob(&mut db, b"111\n222\n333\n444\n555\n");
11129        let copy = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\nEEE\n");
11130        // Two unchanged left files → under `--find-copies-harder` both are copy
11131        // sources, so the matrix is 2 (sources) × 1 (dest) = 2.
11132        let left = write_tree(
11133            &mut db,
11134            &[
11135                (b"orig.txt", 0o100644, orig.clone()),
11136                (b"extra.txt", 0o100644, extra.clone()),
11137            ],
11138        );
11139        let right = write_tree(
11140            &mut db,
11141            &[
11142                (b"orig.txt", 0o100644, orig),
11143                (b"extra.txt", 0o100644, extra),
11144                (b"copy.txt", 0o100644, copy),
11145            ],
11146        );
11147
11148        let opts_for = |rename_limit| RenameDetectionOptions {
11149            base: DiffNameStatusOptions {
11150                detect_renames: true,
11151                detect_copies: true,
11152                find_copies_harder: true,
11153                rename_empty: true,
11154            },
11155            detect_inexact: true,
11156            rename_threshold: DEFAULT_RENAME_THRESHOLD,
11157            copy_threshold: DEFAULT_RENAME_THRESHOLD,
11158            rename_limit,
11159        };
11160
11161        // Over limit: 2 × 1 = 2 > 1² ⇒ copy detection skipped, copy.txt is Added.
11162        let over = diff_name_status_trees_with_rename_options(
11163            &db,
11164            ObjectFormat::Sha1,
11165            &left,
11166            &right,
11167            opts_for(1),
11168        )
11169        .expect("test operation should succeed");
11170        let copy_over = over
11171            .iter()
11172            .find(|e| e.path == b"copy.txt")
11173            .unwrap_or_else(|| panic!("no copy.txt entry: {over:?}"));
11174        assert_eq!(
11175            copy_over.status,
11176            NameStatus::Added,
11177            "over rename_limit, copy must degrade to a plain Add: {over:?}"
11178        );
11179
11180        // Under limit: 2 × 1 = 2 ≤ 4² ⇒ copy still detected (score 80).
11181        let under = diff_name_status_trees_with_rename_options(
11182            &db,
11183            ObjectFormat::Sha1,
11184            &left,
11185            &right,
11186            opts_for(4),
11187        )
11188        .expect("test operation should succeed");
11189        let copy_under = under
11190            .iter()
11191            .find(|e| e.path == b"copy.txt")
11192            .unwrap_or_else(|| panic!("no copy.txt entry: {under:?}"));
11193        assert_eq!(
11194            copy_under.status,
11195            NameStatus::Copied(80),
11196            "below rename_limit, copy detection is unaffected: {under:?}"
11197        );
11198
11199        fs::remove_dir_all(root).expect("test operation should succeed");
11200    }
11201
11202    #[test]
11203    fn inexact_rename_with_small_edit_scores_88() {
11204        // A rename that also appends a single line scores 88% (see
11205        // `similarity_small_append_is_88`).
11206        let root = temp_root();
11207        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
11208            .expect("test operation should succeed");
11209        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
11210
11211        let old = write_blob(
11212            &mut db,
11213            b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\n",
11214        );
11215        let new = write_blob(
11216            &mut db,
11217            b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\nADDED\n",
11218        );
11219        let left = write_tree(&mut db, &[(b"src.txt", 0o100644, old)]);
11220        let right = write_tree(&mut db, &[(b"dst.txt", 0o100644, new)]);
11221
11222        let opts = RenameDetectionOptions::inexact(DiffNameStatusOptions {
11223            detect_renames: true,
11224            detect_copies: false,
11225            find_copies_harder: false,
11226            rename_empty: true,
11227        });
11228        let entries = diff_name_status_trees_with_rename_options(
11229            &db,
11230            ObjectFormat::Sha1,
11231            &left,
11232            &right,
11233            opts,
11234        )
11235        .expect("test operation should succeed");
11236
11237        assert_eq!(entries.len(), 1, "{entries:?}");
11238        assert_eq!(entries[0].status, NameStatus::Renamed(88));
11239        assert_eq!(
11240            entries[0].old_path.as_ref().map(|p| p.as_bytes()),
11241            Some(b"src.txt".as_slice())
11242        );
11243        assert_eq!(entries[0].path, b"dst.txt");
11244        fs::remove_dir_all(root).expect("test operation should succeed");
11245    }
11246
11247    #[test]
11248    fn inexact_disabled_default_preserves_exact_only_behavior() {
11249        // With RenameDetectionOptions::default() (detect_inexact == false), a
11250        // similar-but-not-identical pair is NOT a rename — identical to the
11251        // legacy exact-only path. Defaults must not silently turn on inexact.
11252        assert!(!RenameDetectionOptions::default().detect_inexact);
11253        assert_eq!(
11254            RenameDetectionOptions::default().rename_threshold,
11255            DEFAULT_RENAME_THRESHOLD
11256        );
11257
11258        let root = temp_root();
11259        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
11260            .expect("test operation should succeed");
11261        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
11262
11263        let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
11264        let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
11265        let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
11266        let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
11267
11268        let entries = diff_name_status_trees_with_rename_options(
11269            &db,
11270            ObjectFormat::Sha1,
11271            &left,
11272            &right,
11273            RenameDetectionOptions::default(),
11274        )
11275        .expect("test operation should succeed");
11276        let statuses: Vec<_> = entries.iter().map(|e| e.status).collect();
11277        assert!(statuses.contains(&NameStatus::Added));
11278        assert!(statuses.contains(&NameStatus::Deleted));
11279        assert!(!statuses.iter().any(|s| matches!(s, NameStatus::Renamed(_))));
11280        fs::remove_dir_all(root).expect("test operation should succeed");
11281    }
11282
11283    // ---- patience / histogram diff tests ------------------------------------
11284
11285    /// Apply an edit script to `old` and return the reconstructed `new` bytes.
11286    ///
11287    /// Panics (test-only) if the script ever references a line out of range or
11288    /// claims a line is `Equal` when the corresponding `old`/`new` lines differ
11289    /// — that is exactly the invariant a correct LCS diff must uphold.
11290    fn apply_ops(old: &[DiffLine<'_>], new: &[DiffLine<'_>], ops: &[DiffOp]) -> Vec<u8> {
11291        let mut oi = 0usize;
11292        let mut ni = 0usize;
11293        let mut rebuilt: Vec<u8> = Vec::new();
11294        for op in ops {
11295            match *op {
11296                DiffOp::Equal(n) => {
11297                    for _ in 0..n {
11298                        // Equal must mean genuinely-equal lines (LCS-correct).
11299                        assert_eq!(old[oi], new[ni], "Equal op covered unequal lines");
11300                        rebuilt.extend_from_slice(old[oi].content);
11301                        oi += 1;
11302                        ni += 1;
11303                    }
11304                }
11305                DiffOp::Delete(n) => oi += n,
11306                DiffOp::Insert(n) => {
11307                    for _ in 0..n {
11308                        rebuilt.extend_from_slice(new[ni].content);
11309                        ni += 1;
11310                    }
11311                }
11312            }
11313        }
11314        // The script must consume every line of both sides exactly once.
11315        assert_eq!(oi, old.len(), "script did not consume all of old");
11316        assert_eq!(ni, new.len(), "script did not consume all of new");
11317        rebuilt
11318    }
11319
11320    /// Assert that `ops` is a valid LCS-correct script: it reconstructs `new`
11321    /// from `old`, and consecutive ops are coalesced (no two same-kind in a row).
11322    fn assert_valid_script(old_bytes: &[u8], new_bytes: &[u8], ops: &[DiffOp]) {
11323        let old = split_lines(old_bytes);
11324        let new = split_lines(new_bytes);
11325        let rebuilt = apply_ops(&old, &new, ops);
11326        assert_eq!(rebuilt, new_bytes, "script did not rebuild new");
11327        for pair in ops.windows(2) {
11328            let same_kind = matches!(
11329                (pair[0], pair[1]),
11330                (DiffOp::Equal(_), DiffOp::Equal(_))
11331                    | (DiffOp::Delete(_), DiffOp::Delete(_))
11332                    | (DiffOp::Insert(_), DiffOp::Insert(_))
11333            );
11334            assert!(!same_kind, "ops not coalesced: {:?}", ops);
11335        }
11336    }
11337
11338    /// Run all three real algorithms over a byte pair and assert each produces a
11339    /// valid, coalesced, LCS-correct script.
11340    fn check_all_algorithms(old_bytes: &[u8], new_bytes: &[u8]) {
11341        let old = split_lines(old_bytes);
11342        let new = split_lines(new_bytes);
11343        for algo in [
11344            DiffAlgorithm::Myers,
11345            DiffAlgorithm::Minimal,
11346            DiffAlgorithm::Patience,
11347            DiffAlgorithm::Histogram,
11348        ] {
11349            let ops = diff_lines_with_algorithm(&old, &new, algo);
11350            assert_valid_script(old_bytes, new_bytes, &ops);
11351        }
11352    }
11353
11354    #[test]
11355    fn patience_and_histogram_match_myers_on_simple_cases() {
11356        // For localized single-line edits with no repeated lines, all three
11357        // algorithms agree with the canonical Myers script.
11358        let cases: &[(&[u8], &[u8], Vec<DiffOp>)] = &[
11359            (
11360                b"a\nb\nc\n",
11361                b"a\nx\nc\n",
11362                vec![
11363                    DiffOp::Equal(1),
11364                    DiffOp::Delete(1),
11365                    DiffOp::Insert(1),
11366                    DiffOp::Equal(1),
11367                ],
11368            ),
11369            (b"a\nb\nc\n", b"a\nb\nc\n", vec![DiffOp::Equal(3)]),
11370            (b"", b"a\nb\n", vec![DiffOp::Insert(2)]),
11371            (b"a\nb\n", b"", vec![DiffOp::Delete(2)]),
11372            (
11373                b"a\nb\nc\nd\n",
11374                b"a\nc\nd\n",
11375                vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Equal(2)],
11376            ),
11377        ];
11378        for (old_bytes, new_bytes, expected) in cases {
11379            let old = split_lines(old_bytes);
11380            let new = split_lines(new_bytes);
11381            assert_eq!(&patience_diff_lines(&old, &new), expected);
11382            assert_eq!(&histogram_diff_lines(&old, &new), expected);
11383            assert_eq!(&myers_diff_lines(&old, &new), expected);
11384        }
11385    }
11386
11387    #[test]
11388    fn patience_handles_both_empty() {
11389        let empty = split_lines(b"");
11390        assert!(patience_diff_lines(&empty, &empty).is_empty());
11391        assert!(histogram_diff_lines(&empty, &empty).is_empty());
11392    }
11393
11394    #[test]
11395    fn patience_aligns_unique_anchors_across_moved_block() {
11396        // Reordering two unique blocks: patience anchors on the unique lines and
11397        // produces a delete-then-insert (or insert-then-delete) that still
11398        // reconstructs `new`. Validity is the contract; exact shape may differ
11399        // from Myers, so we only assert reconstruction here.
11400        check_all_algorithms(
11401            b"alpha\nbeta\ngamma\ndelta\n",
11402            b"gamma\ndelta\nalpha\nbeta\n",
11403        );
11404    }
11405
11406    #[test]
11407    fn histogram_differs_from_myers_keeping_block_contiguous() {
11408        // A case where histogram diverges from Myers. With old = "b a" and a new
11409        // that surrounds an intact "b a" with inserted "b" lines, Myers splits
11410        // the common run into two single-line Equals (matching the leading and
11411        // trailing `b`/`a` separately), while histogram anchors on the rare line
11412        // and keeps the original two lines together as one Equal(2) block.
11413        let old = b"b\na\n";
11414        let new = b"a\nb\nb\na\nb\n";
11415        let old_l = split_lines(old);
11416        let new_l = split_lines(new);
11417
11418        let myers = myers_diff_lines(&old_l, &new_l);
11419        let histogram = histogram_diff_lines(&old_l, &new_l);
11420
11421        // All variants must reconstruct `new`.
11422        assert_valid_script(old, new, &myers);
11423        assert_valid_script(old, new, &histogram);
11424
11425        // Exact, pinned shapes: Myers interleaves single-line equals; histogram
11426        // keeps "b\na\n" contiguous.
11427        assert_eq!(
11428            myers,
11429            vec![
11430                DiffOp::Insert(1),
11431                DiffOp::Equal(1),
11432                DiffOp::Insert(1),
11433                DiffOp::Equal(1),
11434                DiffOp::Insert(1),
11435            ]
11436        );
11437        assert_eq!(
11438            histogram,
11439            vec![DiffOp::Insert(2), DiffOp::Equal(2), DiffOp::Insert(1)]
11440        );
11441        // The contract the task calls out: histogram differs from Myers here.
11442        assert_ne!(myers, histogram);
11443    }
11444
11445    #[test]
11446    fn patience_differs_from_myers_on_repeated_lines() {
11447        // A case where patience diverges from Myers. old = "b a", new = "a a b".
11448        // Myers deletes the leading `b` and appends; patience anchors on the
11449        // single unique-in-both line `a`... but `a` occurs twice in `new`, so it
11450        // is NOT unique there; patience instead falls through to its recursive
11451        // structure and produces the mirror script. Both reconstruct `new`.
11452        let old = b"b\na\n";
11453        let new = b"a\na\nb\n";
11454        let old_l = split_lines(old);
11455        let new_l = split_lines(new);
11456
11457        let myers = myers_diff_lines(&old_l, &new_l);
11458        let patience = patience_diff_lines(&old_l, &new_l);
11459
11460        assert_valid_script(old, new, &myers);
11461        assert_valid_script(old, new, &patience);
11462
11463        assert_eq!(
11464            myers,
11465            vec![DiffOp::Delete(1), DiffOp::Equal(1), DiffOp::Insert(2)]
11466        );
11467        assert_eq!(
11468            patience,
11469            vec![DiffOp::Insert(2), DiffOp::Equal(1), DiffOp::Delete(1)]
11470        );
11471        assert_ne!(myers, patience);
11472    }
11473
11474    #[test]
11475    fn realistic_function_insertion_all_valid() {
11476        // A more lifelike example: a new function is inserted ahead of an
11477        // existing one that shares structural lines ("}", blank line). We don't
11478        // pin exact shapes (they depend on trim interactions) but every
11479        // algorithm must produce a valid LCS-correct script.
11480        let old = b"int f() {\n    return 1;\n}\n";
11481        let new = b"int g() {\n    return 2;\n}\n\nint f() {\n    return 1;\n}\n";
11482        check_all_algorithms(old, new);
11483    }
11484
11485    #[test]
11486    fn histogram_anchors_on_rare_line_when_no_unique_line_exists() {
11487        // No line is globally unique on both sides (every distinct line repeats
11488        // on at least one side), so plain patience would fall straight to Myers.
11489        // Histogram still anchors on the least-frequent shared line. We assert
11490        // both produce valid, reconstructing scripts.
11491        check_all_algorithms(b"x\nx\nmid\nx\nx\n", b"x\nmid\nx\nx\nx\n");
11492        check_all_algorithms(
11493            b"dup\ndup\nrare\ndup\ndup\n",
11494            b"dup\nrare\ndup\ndup\ndup\ndup\n",
11495        );
11496    }
11497
11498    #[test]
11499    fn all_algorithms_treat_missing_final_newline_as_change() {
11500        // "b" (no newline) vs "b\n" is a real change for every algorithm.
11501        let old = split_lines(b"a\nb");
11502        let new = split_lines(b"a\nb\n");
11503        for algo in [
11504            DiffAlgorithm::Myers,
11505            DiffAlgorithm::Minimal,
11506            DiffAlgorithm::Patience,
11507            DiffAlgorithm::Histogram,
11508        ] {
11509            let ops = diff_lines_with_algorithm(&old, &new, algo);
11510            assert_eq!(
11511                ops,
11512                vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Insert(1)],
11513                "algorithm {:?} mishandled missing final newline",
11514                algo
11515            );
11516        }
11517    }
11518
11519    #[test]
11520    fn dispatcher_routes_each_variant() {
11521        let old = split_lines(b"a\nb\nc\n");
11522        let new = split_lines(b"a\nx\nc\n");
11523        assert_eq!(
11524            diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Myers),
11525            myers_diff_lines(&old, &new)
11526        );
11527        // Minimal aliases Myers (the Myers search is already a minimal SES).
11528        assert_eq!(
11529            diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Minimal),
11530            myers_diff_lines(&old, &new)
11531        );
11532        assert_eq!(
11533            diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Patience),
11534            patience_diff_lines(&old, &new)
11535        );
11536        assert_eq!(
11537            diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Histogram),
11538            histogram_diff_lines(&old, &new)
11539        );
11540    }
11541
11542    #[test]
11543    fn patience_recurses_into_gaps_between_anchors() {
11544        // Unique anchors `head`/`tail` bracket an inner edit; patience must
11545        // recurse into the middle gap and diff `mid1`->`MID` there.
11546        let old = b"head\nmid1\nmid2\ntail\n";
11547        let new = b"head\nMID\nmid2\ntail\n";
11548        let old_l = split_lines(old);
11549        let new_l = split_lines(new);
11550        let ops = patience_diff_lines(&old_l, &new_l);
11551        assert_eq!(
11552            ops,
11553            vec![
11554                DiffOp::Equal(1),
11555                DiffOp::Delete(1),
11556                DiffOp::Insert(1),
11557                DiffOp::Equal(2),
11558            ]
11559        );
11560        assert_valid_script(old, new, &ops);
11561    }
11562
11563    #[test]
11564    fn patience_falls_back_to_myers_with_no_unique_lines() {
11565        // Every line is duplicated within its own side, so there are no
11566        // unique-in-both anchors; patience must defer to Myers but still return
11567        // a valid script.
11568        let old = b"a\na\nb\nb\n";
11569        let new = b"a\na\na\nb\n";
11570        let old_l = split_lines(old);
11571        let new_l = split_lines(new);
11572        let ops = patience_diff_lines(&old_l, &new_l);
11573        // The contract for the fallback path is validity, not minimality: after
11574        // the greedy prefix/suffix trim (which git's patience does too) the
11575        // leftover block is handed to Myers, and the whole script must still
11576        // reconstruct `new`.
11577        assert_valid_script(old, new, &ops);
11578    }
11579
11580    #[test]
11581    fn algorithms_agree_with_myers_when_all_lines_distinct() {
11582        // When every line is globally unique, patience's anchor set is the full
11583        // LCS, so patience and histogram must produce exactly the Myers script.
11584        let cases: &[(&[u8], &[u8])] = &[
11585            (b"a\nb\nc\nd\ne\n", b"a\nc\nd\nf\ne\n"),
11586            (b"1\n2\n3\n4\n5\n6\n", b"1\n3\n2\n4\n6\n5\n"),
11587            (b"q\nw\ne\nr\nt\ny\n", b"q\nw\nx\nr\nt\nz\n"),
11588        ];
11589        for (old_bytes, new_bytes) in cases {
11590            let old = split_lines(old_bytes);
11591            let new = split_lines(new_bytes);
11592            let myers = myers_diff_lines(&old, &new);
11593            assert_eq!(
11594                patience_diff_lines(&old, &new),
11595                myers,
11596                "patience must equal Myers when all lines are distinct: {:?}",
11597                old_bytes
11598            );
11599            assert_eq!(
11600                histogram_diff_lines(&old, &new),
11601                myers,
11602                "histogram must equal Myers when all lines are distinct: {:?}",
11603                old_bytes
11604            );
11605        }
11606    }
11607
11608    #[test]
11609    fn fuzz_all_algorithms_reconstruct_new() {
11610        // A small deterministic LCG drives many random small inputs over a tiny
11611        // alphabet (so lines repeat and exercise the anchor/fallback paths).
11612        // Every algorithm must produce a valid LCS-correct script for each pair.
11613        let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
11614        let mut next = || {
11615            state = state
11616                .wrapping_mul(6364136223846793005)
11617                .wrapping_add(1442695040888963407);
11618            (state >> 33) as u32
11619        };
11620        let alphabet = [b"a\n", b"b\n", b"c\n", b"d\n"];
11621        let build = |rng: &mut dyn FnMut() -> u32| -> Vec<u8> {
11622            let len = (rng() % 9) as usize; // 0..=8 lines
11623            let mut buf = Vec::new();
11624            for _ in 0..len {
11625                let pick = (rng() % alphabet.len() as u32) as usize;
11626                buf.extend_from_slice(alphabet[pick]);
11627            }
11628            // Occasionally drop the trailing newline to exercise that path.
11629            if !buf.is_empty() && rng().is_multiple_of(4) {
11630                buf.pop();
11631            }
11632            buf
11633        };
11634        for _ in 0..400 {
11635            let old_bytes = build(&mut next);
11636            let new_bytes = build(&mut next);
11637            check_all_algorithms(&old_bytes, &new_bytes);
11638        }
11639    }
11640
11641    #[test]
11642    fn exhaustive_small_inputs_all_algorithms_reconstruct() {
11643        // Brute force over a 3-symbol alphabet up to 5 lines per side: every
11644        // algorithm must produce a valid LCS-correct script for *every* pair.
11645        // This is the strongest correctness net for the recursion/fallback
11646        // paths; apply_ops asserts both reconstruction and Equal-correctness.
11647        let syms = [b"a\n".to_vec(), b"b\n".to_vec(), b"c\n".to_vec()];
11648        let make = |n: usize, mut code: usize| -> Vec<u8> {
11649            let mut v = Vec::new();
11650            for _ in 0..n {
11651                v.extend_from_slice(&syms[code % 3]);
11652                code /= 3;
11653            }
11654            v
11655        };
11656        for la in 0..=5usize {
11657            for lb in 0..=5usize {
11658                for ca in 0..3usize.pow(la as u32) {
11659                    for cb in 0..3usize.pow(lb as u32) {
11660                        let ob = make(la, ca);
11661                        let nb = make(lb, cb);
11662                        let ol = split_lines(&ob);
11663                        let nl = split_lines(&nb);
11664                        assert_eq!(apply_ops(&ol, &nl, &myers_diff_lines(&ol, &nl)), nb);
11665                        assert_eq!(apply_ops(&ol, &nl, &patience_diff_lines(&ol, &nl)), nb);
11666                        assert_eq!(apply_ops(&ol, &nl, &histogram_diff_lines(&ol, &nl)), nb);
11667                    }
11668                }
11669            }
11670        }
11671    }
11672
11673    #[test]
11674    fn fuzz_distinct_lines_patience_histogram_equal_myers() {
11675        // When inputs are permutations/subsequences of globally-unique lines,
11676        // patience and histogram must match Myers exactly. We generate sequences
11677        // of distinct tokens to guarantee global uniqueness on both sides.
11678        let mut state: u64 = 0x1234_5678_9ABC_DEF0;
11679        let mut next = || {
11680            state = state
11681                .wrapping_mul(6364136223846793005)
11682                .wrapping_add(1442695040888963407);
11683            (state >> 33) as u32
11684        };
11685        for _ in 0..200 {
11686            // Random subset+order of tokens "0\n".."9\n" for each side; tokens
11687            // are globally unique, so any common line is unique in both.
11688            let pick_subseq = |rng: &mut dyn FnMut() -> u32| -> Vec<u8> {
11689                let mut buf = Vec::new();
11690                for t in 0..10u32 {
11691                    if rng().is_multiple_of(2) {
11692                        buf.extend_from_slice(format!("{t}\n").as_bytes());
11693                    }
11694                }
11695                buf
11696            };
11697            let old_bytes = pick_subseq(&mut next);
11698            let new_bytes = pick_subseq(&mut next);
11699            let old = split_lines(&old_bytes);
11700            let new = split_lines(&new_bytes);
11701            let myers = myers_diff_lines(&old, &new);
11702            assert_eq!(patience_diff_lines(&old, &new), myers);
11703            assert_eq!(histogram_diff_lines(&old, &new), myers);
11704        }
11705    }
11706
11707    // ===================================================================
11708    // Subtree-skip-by-OID tree-diff optimization: the pruned simultaneous
11709    // walk (`changed_tree_entries`) must produce byte-identical name-status
11710    // output to the legacy "flatten both sides fully" walk
11711    // (`collect_full_tree_pair`) on every representative diff shape.
11712    // ===================================================================
11713
11714    /// Format a name-status result into stable, comparable lines.
11715    fn status_lines(entries: &[NameStatusEntry]) -> Vec<String> {
11716        entries.iter().map(|entry| entry.line()).collect()
11717    }
11718
11719    /// Assert the pruned walk and the full flatten agree, both as raw map diffs
11720    /// and through the public tree-diff entry points, for the given options.
11721    fn assert_tree_diff_matches_full(
11722        db: &FileObjectDatabase,
11723        left: &ObjectId,
11724        right: &ObjectId,
11725        options: DiffNameStatusOptions,
11726    ) {
11727        // Reference ("old") behaviour: fully flatten both trees, then diff.
11728        let (full_left, full_right) = collect_full_tree_pair(db, ObjectFormat::Sha1, left, right)
11729            .expect("test operation should succeed");
11730        let reference = diff_name_status_maps(
11731            &full_left,
11732            &full_right,
11733            full_left.keys().chain(full_right.keys()),
11734            options,
11735        )
11736        .expect("test operation should succeed");
11737
11738        // Optimized ("new") behaviour: prune identical subtrees, then diff.
11739        let (pruned_left, pruned_right) = changed_tree_entries(db, ObjectFormat::Sha1, left, right)
11740            .expect("test operation should succeed");
11741        let pruned = diff_name_status_maps(
11742            &pruned_left,
11743            &pruned_right,
11744            pruned_left.keys().chain(pruned_right.keys()),
11745            options,
11746        )
11747        .expect("test operation should succeed");
11748
11749        assert_eq!(
11750            status_lines(&reference),
11751            status_lines(&pruned),
11752            "pruned map diff diverged from full map diff for {options:?}"
11753        );
11754
11755        // And the public entry point (which itself selects pruned vs full) must
11756        // match the reference too.
11757        let public =
11758            diff_name_status_trees_with_options(db, ObjectFormat::Sha1, left, right, options)
11759                .expect("test operation should succeed");
11760        assert_eq!(
11761            status_lines(&reference),
11762            status_lines(&public),
11763            "public tree diff diverged from full map diff for {options:?}"
11764        );
11765
11766        // The pruned maps must be a subset of the full maps and must contain
11767        // exactly the paths that actually changed (no identical entries leak in,
11768        // no changed entries get dropped).
11769        for (path, tracked) in &pruned_left {
11770            assert_eq!(
11771                full_left.get(path),
11772                Some(tracked),
11773                "pruned left entry not present (or differs) in full left map: {:?}",
11774                String::from_utf8_lossy(path)
11775            );
11776        }
11777        for (path, tracked) in &pruned_right {
11778            assert_eq!(
11779                full_right.get(path),
11780                Some(tracked),
11781                "pruned right entry not present (or differs) in full right map: {:?}",
11782                String::from_utf8_lossy(path)
11783            );
11784        }
11785        // Every path the full diff reports as changed must survive pruning on
11786        // whichever side(s) it lives.
11787        for entry in &reference {
11788            let path = entry.path.as_bytes();
11789            match entry.status {
11790                NameStatus::Added => assert!(
11791                    pruned_right.contains_key(path),
11792                    "added path dropped by pruning: {:?}",
11793                    String::from_utf8_lossy(path)
11794                ),
11795                NameStatus::Deleted => assert!(
11796                    pruned_left.contains_key(path),
11797                    "deleted path dropped by pruning: {:?}",
11798                    String::from_utf8_lossy(path)
11799                ),
11800                NameStatus::Modified => {
11801                    assert!(
11802                        pruned_left.contains_key(path) && pruned_right.contains_key(path),
11803                        "modified path dropped by pruning: {:?}",
11804                        String::from_utf8_lossy(path)
11805                    );
11806                }
11807                _ => {}
11808            }
11809        }
11810    }
11811
11812    /// Run the equivalence assertion across the option matrix that the pruned
11813    /// path serves (everything except `--find-copies-harder`, which uses the
11814    /// full maps and is checked separately).
11815    fn assert_tree_diff_matches_full_all_modes(
11816        db: &FileObjectDatabase,
11817        left: &ObjectId,
11818        right: &ObjectId,
11819    ) {
11820        for detect_renames in [false, true] {
11821            for detect_copies in [false, true] {
11822                let options = DiffNameStatusOptions {
11823                    detect_renames,
11824                    detect_copies,
11825                    find_copies_harder: false,
11826                    rename_empty: true,
11827                };
11828                assert_tree_diff_matches_full(db, left, right, options);
11829            }
11830        }
11831    }
11832
11833    /// Build a DB pre-seeded with a fixed bank of blobs for the structural tests.
11834    fn structural_db() -> (PathBuf, FileObjectDatabase) {
11835        let root = temp_root();
11836        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
11837            .expect("test operation should succeed");
11838        let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
11839        (root, db)
11840    }
11841
11842    #[test]
11843    fn pruned_walk_skips_identical_subtree_and_matches_full() {
11844        // A large shared subtree (`shared/`) is byte-identical on both sides; the
11845        // only change lives in `app/`. The pruned walk must skip `shared/`
11846        // entirely yet still produce the exact same diff as flattening it.
11847        let (root, mut db) = structural_db();
11848
11849        // shared/ — identical on both sides, several nested files.
11850        let s1 = write_blob(&mut db, b"shared one\n");
11851        let s2 = write_blob(&mut db, b"shared two\n");
11852        let s3 = write_blob(&mut db, b"deep nested\n");
11853        let shared_inner = write_tree(&mut db, &[(b"c.txt", 0o100644, s3.clone())]);
11854        let shared = write_tree(
11855            &mut db,
11856            &[
11857                (b"a.txt", 0o100644, s1.clone()),
11858                (b"b.txt", 0o100644, s2.clone()),
11859                (b"inner", 0o040000, shared_inner.clone()),
11860            ],
11861        );
11862
11863        // app/ — one file modified between sides.
11864        let app_old = write_blob(&mut db, b"version 1\n");
11865        let app_new = write_blob(&mut db, b"version 2\n");
11866        let app_left = write_tree(&mut db, &[(b"main.rs", 0o100644, app_old)]);
11867        let app_right = write_tree(&mut db, &[(b"main.rs", 0o100644, app_new)]);
11868
11869        let left = write_tree(
11870            &mut db,
11871            &[
11872                (b"app", 0o040000, app_left),
11873                (b"shared", 0o040000, shared.clone()),
11874            ],
11875        );
11876        let right = write_tree(
11877            &mut db,
11878            &[(b"app", 0o040000, app_right), (b"shared", 0o040000, shared)],
11879        );
11880
11881        // Sanity: the only change is the nested app/main.rs modification.
11882        let (pruned_left, pruned_right) =
11883            changed_tree_entries(&db, ObjectFormat::Sha1, &left, &right)
11884                .expect("test operation should succeed");
11885        assert_eq!(
11886            pruned_left.keys().collect::<Vec<_>>(),
11887            vec![&b"app/main.rs".to_vec()],
11888            "pruning should leave only the changed path on the left"
11889        );
11890        assert_eq!(
11891            pruned_right.keys().collect::<Vec<_>>(),
11892            vec![&b"app/main.rs".to_vec()],
11893            "pruning should leave only the changed path on the right"
11894        );
11895        assert!(
11896            !pruned_left.contains_key(b"shared/a.txt".as_slice()),
11897            "identical shared subtree must not appear in pruned maps"
11898        );
11899
11900        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
11901        fs::remove_dir_all(root).expect("test operation should succeed");
11902    }
11903
11904    #[test]
11905    fn pruned_walk_matches_full_for_add_delete_modify_nested() {
11906        // Mixed shape: a top-level add, a top-level delete, a nested modify, and
11907        // an untouched nested subtree that must be skipped.
11908        let (root, mut db) = structural_db();
11909
11910        let keep = write_blob(&mut db, b"unchanged\n");
11911        let untouched_dir = write_tree(&mut db, &[(b"keep.txt", 0o100644, keep.clone())]);
11912
11913        let nested_old = write_blob(&mut db, b"nested old\n");
11914        let nested_new = write_blob(&mut db, b"nested new\n");
11915        let dir_left = write_tree(
11916            &mut db,
11917            &[
11918                (b"changed.txt", 0o100644, nested_old),
11919                (b"stable.txt", 0o100644, keep.clone()),
11920            ],
11921        );
11922        let dir_right = write_tree(
11923            &mut db,
11924            &[
11925                (b"changed.txt", 0o100644, nested_new),
11926                (b"stable.txt", 0o100644, keep.clone()),
11927            ],
11928        );
11929
11930        let only_left = write_blob(&mut db, b"will be deleted\n");
11931        let only_right = write_blob(&mut db, b"freshly added\n");
11932
11933        let left = write_tree(
11934            &mut db,
11935            &[
11936                (b"dir", 0o040000, dir_left),
11937                (b"gone.txt", 0o100644, only_left),
11938                (b"untouched", 0o040000, untouched_dir.clone()),
11939            ],
11940        );
11941        let right = write_tree(
11942            &mut db,
11943            &[
11944                (b"dir", 0o040000, dir_right),
11945                (b"new.txt", 0o100644, only_right),
11946                (b"untouched", 0o040000, untouched_dir),
11947            ],
11948        );
11949
11950        let entries = diff_name_status_trees_with_options(
11951            &db,
11952            ObjectFormat::Sha1,
11953            &left,
11954            &right,
11955            DiffNameStatusOptions {
11956                detect_renames: false,
11957                detect_copies: false,
11958                find_copies_harder: false,
11959                rename_empty: true,
11960            },
11961        )
11962        .expect("test operation should succeed");
11963        assert_eq!(
11964            status_lines(&entries),
11965            vec![
11966                "M\tdir/changed.txt".to_string(),
11967                "D\tgone.txt".to_string(),
11968                "A\tnew.txt".to_string(),
11969            ],
11970            "unexpected raw status for mixed nested diff"
11971        );
11972
11973        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
11974        fs::remove_dir_all(root).expect("test operation should succeed");
11975    }
11976
11977    #[test]
11978    fn pruned_walk_matches_full_for_rename_across_dirs() {
11979        // An exact rename (same blob oid) moving between directories. Rename
11980        // detection runs on the pruned add/delete set and must match the full
11981        // walk's result exactly.
11982        let (root, mut db) = structural_db();
11983
11984        let moved = write_blob(&mut db, b"i get moved across directories\n");
11985        let companion = write_blob(&mut db, b"i stay put\n");
11986        let stable_dir = write_tree(&mut db, &[(b"keep.txt", 0o100644, companion.clone())]);
11987
11988        let src_dir = write_tree(&mut db, &[(b"file.txt", 0o100644, moved.clone())]);
11989        let dst_dir = write_tree(&mut db, &[(b"renamed.txt", 0o100644, moved.clone())]);
11990
11991        let left = write_tree(
11992            &mut db,
11993            &[
11994                (b"src", 0o040000, src_dir),
11995                (b"stable", 0o040000, stable_dir.clone()),
11996            ],
11997        );
11998        let right = write_tree(
11999            &mut db,
12000            &[
12001                (b"dst", 0o040000, dst_dir),
12002                (b"stable", 0o040000, stable_dir),
12003            ],
12004        );
12005
12006        let entries = diff_name_status_trees_with_options(
12007            &db,
12008            ObjectFormat::Sha1,
12009            &left,
12010            &right,
12011            DiffNameStatusOptions {
12012                detect_renames: true,
12013                detect_copies: false,
12014                find_copies_harder: false,
12015                rename_empty: true,
12016            },
12017        )
12018        .expect("test operation should succeed");
12019        assert_eq!(
12020            status_lines(&entries),
12021            vec!["R100\tsrc/file.txt\tdst/renamed.txt".to_string()],
12022            "rename across dirs should be detected on pruned set"
12023        );
12024
12025        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
12026        fs::remove_dir_all(root).expect("test operation should succeed");
12027    }
12028
12029    #[test]
12030    fn pruned_walk_matches_full_for_binary_and_mode_change() {
12031        // Binary blob modification plus an executable-bit (mode) change on an
12032        // otherwise-identical blob. Mode-only changes must still register as a
12033        // Modify (the pruned walk compares mode + oid, like the full map).
12034        let (root, mut db) = structural_db();
12035
12036        let bin_old = write_blob(&mut db, &[0u8, 159, 146, 150, 0, 255, 1, 2, 3]);
12037        let bin_new = write_blob(&mut db, &[0u8, 159, 146, 150, 0, 254, 9, 8, 7]);
12038        let script = write_blob(&mut db, b"#!/bin/sh\necho hi\n");
12039
12040        let left = write_tree(
12041            &mut db,
12042            &[
12043                (b"image.bin", 0o100644, bin_old),
12044                (b"run.sh", 0o100644, script.clone()),
12045            ],
12046        );
12047        let right = write_tree(
12048            &mut db,
12049            &[
12050                (b"image.bin", 0o100644, bin_new),
12051                // same blob oid, executable bit flipped on
12052                (b"run.sh", 0o100755, script),
12053            ],
12054        );
12055
12056        let entries = diff_name_status_trees_with_options(
12057            &db,
12058            ObjectFormat::Sha1,
12059            &left,
12060            &right,
12061            DiffNameStatusOptions {
12062                detect_renames: false,
12063                detect_copies: false,
12064                find_copies_harder: false,
12065                rename_empty: true,
12066            },
12067        )
12068        .expect("test operation should succeed");
12069        assert_eq!(
12070            status_lines(&entries),
12071            vec!["M\timage.bin".to_string(), "M\trun.sh".to_string()],
12072            "binary edit and mode-only change should both be Modify"
12073        );
12074
12075        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
12076        fs::remove_dir_all(root).expect("test operation should succeed");
12077    }
12078
12079    #[test]
12080    fn pruned_walk_matches_full_for_dir_replaced_by_file() {
12081        // A name that is a directory on the left and a regular file on the right
12082        // (and vice versa). The flattened paths differ (`thing/...` vs `thing`),
12083        // so the pruned walk must treat them as unrelated add/delete pairs,
12084        // exactly as the full flatten does.
12085        let (root, mut db) = structural_db();
12086
12087        let inner_a = write_blob(&mut db, b"inner a\n");
12088        let inner_b = write_blob(&mut db, b"inner b\n");
12089        let thing_dir = write_tree(
12090            &mut db,
12091            &[(b"a.txt", 0o100644, inner_a), (b"b.txt", 0o100644, inner_b)],
12092        );
12093        let thing_file = write_blob(&mut db, b"now i am a file\n");
12094
12095        // other/ is a file on the left, a directory on the right.
12096        let other_file = write_blob(&mut db, b"i was a file\n");
12097        let other_inner = write_blob(&mut db, b"now nested\n");
12098        let other_dir = write_tree(&mut db, &[(b"x.txt", 0o100644, other_inner)]);
12099
12100        let left = write_tree(
12101            &mut db,
12102            &[
12103                (b"other", 0o100644, other_file),
12104                (b"thing", 0o040000, thing_dir),
12105            ],
12106        );
12107        let right = write_tree(
12108            &mut db,
12109            &[
12110                (b"other", 0o040000, other_dir),
12111                (b"thing", 0o100644, thing_file),
12112            ],
12113        );
12114
12115        let entries = diff_name_status_trees_with_options(
12116            &db,
12117            ObjectFormat::Sha1,
12118            &left,
12119            &right,
12120            DiffNameStatusOptions {
12121                detect_renames: false,
12122                detect_copies: false,
12123                find_copies_harder: false,
12124                rename_empty: true,
12125            },
12126        )
12127        .expect("test operation should succeed");
12128        assert_eq!(
12129            status_lines(&entries),
12130            vec![
12131                "D\tother".to_string(),
12132                "A\tother/x.txt".to_string(),
12133                "A\tthing".to_string(),
12134                "D\tthing/a.txt".to_string(),
12135                "D\tthing/b.txt".to_string(),
12136            ],
12137            "dir<->file swap should flatten to independent adds/deletes"
12138        );
12139
12140        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
12141        fs::remove_dir_all(root).expect("test operation should succeed");
12142    }
12143
12144    #[test]
12145    fn pruned_walk_matches_full_for_identical_trees() {
12146        // Two identical root trees: zero changes, and the root must be skipped
12147        // without reading anything below it.
12148        let (root, mut db) = structural_db();
12149
12150        let blob = write_blob(&mut db, b"same\n");
12151        let sub = write_tree(&mut db, &[(b"f.txt", 0o100644, blob.clone())]);
12152        let tree = write_tree(
12153            &mut db,
12154            &[(b"sub", 0o040000, sub), (b"top.txt", 0o100644, blob)],
12155        );
12156
12157        let (pruned_left, pruned_right) =
12158            changed_tree_entries(&db, ObjectFormat::Sha1, &tree, &tree)
12159                .expect("test operation should succeed");
12160        assert!(
12161            pruned_left.is_empty() && pruned_right.is_empty(),
12162            "identical trees must produce no changed entries"
12163        );
12164
12165        let entries = diff_name_status_trees_with_options(
12166            &db,
12167            ObjectFormat::Sha1,
12168            &tree,
12169            &tree,
12170            DiffNameStatusOptions::default(),
12171        )
12172        .expect("test operation should succeed");
12173        assert!(entries.is_empty(), "identical trees must produce no diff");
12174
12175        assert_tree_diff_matches_full_all_modes(&db, &tree, &tree);
12176        fs::remove_dir_all(root).expect("test operation should succeed");
12177    }
12178
12179    #[test]
12180    fn find_copies_harder_uses_full_left_map_and_finds_unchanged_source() {
12181        // `--find-copies-harder` must still see an *unchanged* file as a copy
12182        // source. This is the case where the public entry point deliberately
12183        // falls back to the full flatten; verify the full-map fallback both
12184        // behaves correctly and matches a direct full-map computation.
12185        let (root, mut db) = structural_db();
12186
12187        // `template.txt` is unchanged between sides (lives in an untouched
12188        // subtree), and `copy.txt` is added on the right with the same content.
12189        let template = write_blob(&mut db, b"reusable boilerplate content\n");
12190        let lib_dir = write_tree(&mut db, &[(b"template.txt", 0o100644, template.clone())]);
12191
12192        let trigger_old = write_blob(&mut db, b"trigger old\n");
12193        let trigger_new = write_blob(&mut db, b"trigger new\n");
12194
12195        let left = write_tree(
12196            &mut db,
12197            &[
12198                (b"lib", 0o040000, lib_dir.clone()),
12199                (b"trigger.txt", 0o100644, trigger_old),
12200            ],
12201        );
12202        let right = write_tree(
12203            &mut db,
12204            &[
12205                (b"copy.txt", 0o100644, template.clone()),
12206                (b"lib", 0o040000, lib_dir),
12207                (b"trigger.txt", 0o100644, trigger_new),
12208            ],
12209        );
12210
12211        let options = DiffNameStatusOptions {
12212            detect_renames: true,
12213            detect_copies: true,
12214            find_copies_harder: true,
12215            rename_empty: true,
12216        };
12217
12218        // Reference via the full flatten (the old algorithm).
12219        let (full_left, full_right) =
12220            collect_full_tree_pair(&db, ObjectFormat::Sha1, &left, &right)
12221                .expect("test operation should succeed");
12222        let reference = diff_name_status_maps(
12223            &full_left,
12224            &full_right,
12225            full_left.keys().chain(full_right.keys()),
12226            options,
12227        )
12228        .expect("test operation should succeed");
12229
12230        let public =
12231            diff_name_status_trees_with_options(&db, ObjectFormat::Sha1, &left, &right, options)
12232                .expect("test operation should succeed");
12233        assert_eq!(
12234            status_lines(&reference),
12235            status_lines(&public),
12236            "find-copies-harder public diff must match full-map reference"
12237        );
12238        // The copy must be detected from the unchanged template source.
12239        assert!(
12240            public
12241                .iter()
12242                .any(|entry| matches!(entry.status, NameStatus::Copied(_))
12243                    && entry.old_path.as_ref().map(|p| p.as_bytes())
12244                        == Some(b"lib/template.txt".as_slice())
12245                    && entry.path == b"copy.txt"),
12246            "copy from unchanged source must be found with find_copies_harder: {public:?}"
12247        );
12248        fs::remove_dir_all(root).expect("test operation should succeed");
12249    }
12250
12251    #[test]
12252    fn pruned_walk_matches_full_with_inexact_rename_options() {
12253        // Exercise the rename-options entry point (which also selects pruned vs
12254        // full) with inexact detection enabled, across an untouched subtree.
12255        let (root, mut db) = structural_db();
12256
12257        let untouched = write_blob(&mut db, b"untouched file\n");
12258        let untouched_dir = write_tree(&mut db, &[(b"u.txt", 0o100644, untouched.clone())]);
12259
12260        // a.txt -> b.txt with one changed line (a 75% inexact rename).
12261        let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
12262        let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
12263
12264        let left = write_tree(
12265            &mut db,
12266            &[
12267                (b"a.txt", 0o100644, old),
12268                (b"keep", 0o040000, untouched_dir.clone()),
12269            ],
12270        );
12271        let right = write_tree(
12272            &mut db,
12273            &[
12274                (b"b.txt", 0o100644, new),
12275                (b"keep", 0o040000, untouched_dir),
12276            ],
12277        );
12278
12279        let options = RenameDetectionOptions {
12280            base: DiffNameStatusOptions {
12281                detect_renames: true,
12282                detect_copies: false,
12283                find_copies_harder: false,
12284                rename_empty: true,
12285            },
12286            detect_inexact: true,
12287            rename_threshold: DEFAULT_RENAME_THRESHOLD,
12288            copy_threshold: DEFAULT_RENAME_THRESHOLD,
12289            rename_limit: 0,
12290        };
12291
12292        // Reference: full flatten + same detection.
12293        let (full_left, full_right) =
12294            collect_full_tree_pair(&db, ObjectFormat::Sha1, &left, &right)
12295                .expect("test operation should succeed");
12296        let reference = diff_name_status_maps_with_renames(
12297            &full_left,
12298            &full_right,
12299            full_left.keys().chain(full_right.keys()),
12300            options,
12301            |oid| read_blob_bytes(&db, oid),
12302        )
12303        .expect("test operation should succeed");
12304
12305        let public = diff_name_status_trees_with_rename_options(
12306            &db,
12307            ObjectFormat::Sha1,
12308            &left,
12309            &right,
12310            options,
12311        )
12312        .expect("test operation should succeed");
12313
12314        assert_eq!(
12315            status_lines(&reference),
12316            status_lines(&public),
12317            "inexact rename via pruned walk must match full-map reference"
12318        );
12319        assert_eq!(
12320            status_lines(&public),
12321            vec!["R075\ta.txt\tb.txt".to_string()],
12322            "expected a 75% inexact rename"
12323        );
12324        fs::remove_dir_all(root).expect("test operation should succeed");
12325    }
12326}